From 20a6cfe1a64ca53000ada28a5391739c6f00e067 Mon Sep 17 00:00:00 2001 From: Ralph Chang Date: Wed, 29 Apr 2026 16:56:47 +0800 Subject: [PATCH] chore(release): prepare v1.5.0 --- AGENTS.md | 14 ++--- CHANGELOG.md | 32 +++++++++++ README.md | 44 ++++++++++---- RELEASE_NOTES.md | 101 +++++++++++++++++++++++++++++++++ docs/architecture.md | 52 +++++++++++++---- docs/configuration.md | 59 +++++++++++++++---- package.json | 2 +- src/types.ts | 6 +- tests/workspace-memory.test.ts | 8 ++- 9 files changed, 274 insertions(+), 44 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 33d94c6..586ff7b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -111,7 +111,7 @@ export type LongTermSource = "explicit" | "compaction" | "manual"; // ✅ USE: const assertions for limits export const LONG_TERM_LIMITS = { - maxRenderedChars: 5200, + maxRenderedChars: 3600, maxEntries: 28, } as const; ``` @@ -140,8 +140,8 @@ const maxEntries = 28; async function loadWorkspaceMemory() { } // ✅ REQUIRED: SCREAMING_SNAKE_CASE for constants -const LONG_TERM_LIMITS = { maxRenderedChars: 5200, maxEntries: 28 }; -const HOT_STATE_LIMITS = { maxRenderedChars: 1200 }; +const LONG_TERM_LIMITS = { maxRenderedChars: 3600, maxEntries: 28 }; +const HOT_STATE_LIMITS = { maxRenderedChars: 700 }; // ✅ REQUIRED: PascalCase for types type WorkspaceMemoryStore = { ... }; @@ -236,7 +236,7 @@ export default { - **Location**: `~/.local/share/opencode-working-memory/workspaces/{workspaceKey}/workspace-memory.json` - **Workspace Key**: First 16 chars of `sha256(realpath(workspaceRoot))` - **Schema**: See `src/types.ts:WorkspaceMemoryStore` -- **Limits**: 5200 chars, 28 entries max +- **Limits**: 3600 chars, 28 entries max ### Session State Files @@ -299,9 +299,9 @@ Extracts workspace memory candidates from conversation, applies quality gate and ## Performance Considerations -- **Workspace memory budget**: 5200 chars injected into system prompt -- **Session state budget**: 1200 chars injected into system prompt -- **Total overhead**: ~1500-6000 chars per message (minimal) +- **Workspace memory budget**: 3600 chars injected into system prompt +- **Session state budget**: 700 chars injected into system prompt +- **Total overhead**: typically well below configured maximums - **Storage footprint**: ~2-5 KB per workspace for memory, ~1-3 KB per session ## Contributing diff --git a/CHANGELOG.md b/CHANGELOG.md index c7c28d9..b06b52f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,38 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.5.0] - 2026-04-29 + +### Added + +- Strength-based workspace memory retention using exponential decay instead of additive priority scoring. +- Per-type rendered caps for workspace memory candidates: feedback 10, decision 10, project 8, and reference 6. +- Safety-critical memory weighting and type-cap exemption so important entries survive type floods while still competing under the global rendered cap. +- Dormant-workspace effective age: after 14 days without activity, additional dormant time counts at 0.25x for retention decay. +- Reinforcement tracking for repeated memories, with same-session and one-hour guards to prevent accidental reinforcement spam. +- Memory health diagnostics for stored vs rendered counts, type caps, global cap overflow, dormancy, retention monitoring, and strength-ranked top/weakest entries. +- CLI smoke tests and regression fixtures covering retention decay, stale-prune removal, type caps, reinforcement, invalid timestamps, and diagnostics. + +### Changed + +- Workspace memory rendering now ranks entries by retention strength, not the previous priority/penalty model. +- Confidence is retained for compatibility but no longer affects retention scoring. +- Old or stale-marked memories are no longer hard-pruned; they remain stored and only fall out of rendered context through strength and cap competition. +- Existing duplicate promotion and dedupe paths now reinforce the surviving memory instead of only absorbing the duplicate. +- Health output now separates stored active memories from rendered candidates to make cap behavior easier to understand. +- Default prompt budgets are lower after calibration against observed rendered output: workspace memory is 3600 characters and hot session state is 700 characters. + +### Fixed + +- Invalid `updatedAt` or `retentionClock` values no longer produce `NaN` retention strength or unstable sorting. +- Dormant age calculation only discounts the dormant overlap since an entry was created, so new memories do not inherit old workspace dormancy. +- Type max totals above the global cap are handled correctly: the global rendered limit still wins. + +### Not Included Yet + +- Delete tombstones and explicit `supersedes` chain enforcement remain deferred follow-up work. +- Hot/warm/cold tiered storage remains a future v1.6 direction. + ## [1.4.0] - 2026-04-28 ### Added diff --git a/README.md b/README.md index 5b877d6..c7aa15e 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,9 @@ Automatic memory for OpenCode agents. -OpenCode Working Memory helps your agent keep useful context across compactions and sessions: project decisions, preferences, important references, active files, and unresolved errors. +Working memory is context that **remembers what matters, fades what changes, and stays out of the way.** -It works automatically, without manual memory tools or extra LLM/API calls. +OpenCode Working Memory preserves project decisions, preferences, and references across compactions and sessions, while keeping active files and unresolved errors fresh for the current session — with no manual tools or extra LLM/API calls. ## Why This Exists @@ -31,6 +31,7 @@ Use it when you want your agent to remember things like: - **Compaction-based extraction** — memory extraction piggybacks on OpenCode’s existing compaction flow. - **No manual tools** — memory is injected automatically into the system prompt. - **Quality guards** — filters noisy memories, temporary progress snapshots, stack traces, raw errors, and credentials. +- **Retention decay** — keeps the strongest memories in prompt context while older or weaker memories fade out naturally; important and reinforced memories decay more slowly. ## Installation @@ -121,6 +122,27 @@ Memory types: - `decision` — important implementation or architecture decisions - `reference` — useful paths, commands, or configuration references +### Retention Decay + +> **Memory should fade, so the agent can keep learning.** +> +> Important memories decay more slowly, but every memory must leave room for newer project reality. + +Memories decay over time. The strongest stay visible in the prompt; weaker ones fade from context without being deleted. + +```text + strength + │ + ██ │╲____ reinforced: slower decline + │ ╲______ + ▒▒ │ ╲__ ordinary memory + │ ╲ + ├ ─ ─ ─ ─ ─ ─ ─ ─╲─ dynamic cap competition zone + ░░ │ ╲ easier for new memories to replace + │ ↑ still stored, not deleted + └──────────────────────────────→ time / sessions +``` + ## Explicit Memory Triggers You can explicitly ask the agent to remember durable facts. @@ -167,13 +189,15 @@ It includes guards for: - Credential redaction - Duplicate memory cleanup -- Superseding older decisions with newer ones -- Consolidation accounting so promoted, absorbed, superseded, and rejected memories are handled differently +- Accounting for promoted, absorbed, superseded, and rejected memories +- Strength-based retention so useful memories stay visible without hard age pruning - Filtering stack traces, git hashes, raw errors, and noisy path-heavy facts - Rejecting temporary project progress snapshots The goal is to remember durable facts, not every detail. +**Good memory is selective memory.** + Historical cleanup is intentionally conservative: extraction-time filtering may reject more aggressively, but one-time migration cleanup only supersedes high-confidence garbage patterns. This protects existing durable memories written in declarative style, such as "API endpoint is X" or "Product branding is Y". For local development cleanup, use: @@ -191,21 +215,21 @@ OpenCode Working Memory works out of the box. Default behavior: -- Workspace memory budget: 5200 characters +- Workspace memory budget: 3600 characters (~900 tokens) - Workspace memory limit: 28 entries -- Hot session state budget: 1200 characters +- Hot session state budget: 700 characters (~175 tokens) - Active files shown: 8 - Open errors shown: 3 See [Configuration](docs/configuration.md) for customization options. -## Ongoing Work +## Roadmap Current focus: -- Improve memory recording quality so only durable, useful facts are kept. -- Strengthen deduplication and supersession so stale memories do not pile up. -- Add better forgetting behavior for obsolete decisions, preferences, and project facts. +- Add explicit delete tombstones so removed memories do not get re-extracted. +- Enforce explicit `supersedes` chains for safer replacement of obsolete memories. +- Explore tiered hot/warm/cold storage after the retention model has more real-world data. ## Documentation diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 307b7f7..c38a71f 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,5 +1,106 @@ # Release Notes +## 1.5.0 (2026-04-29) + +### Retention Decay Model + +This release changes workspace memory retention from hard stale pruning and additive priority scoring to a strength-based decay model. + +Think of it like a forgetting curve: memories fade over time, but important, reinforced, and safety-critical memories decay slower. Weak entries fall out of rendered prompt context by cap competition, not hard deletion. + +> **Memory should fade, so the agent can keep learning.** +> Important memories decay slower, but every memory must leave room for newer project reality and avoid long-term memory pollution. + +```text + strength + │ + ██ │╲____ reinforced: slower decline + │ ╲______ + ▒▒ │ ╲__ ordinary memory + │ ╲ + ├ ─ ─ ─ ─ ─ ─ ─ ─╲─ dynamic cap competition zone + ░░ │ ╲ easier for new memories to replace + │ ↑ still stored, not deleted + └──────────────────────────────→ time / sessions +``` + +### What Changed + +- **Strength-based retention**: workspace memory now uses exponential decay: initial strength × age decay. +- **Better initial strength**: type, source, user importance, and safety-critical status now determine how strong a memory starts. +- **No confidence scoring**: confidence remains in stored data for compatibility, but it no longer affects retention ranking. +- **Type caps**: rendered workspace memory now caps feedback, decisions, project facts, and references separately so one type cannot monopolize all 28 slots. +- **Safety-critical protection**: safety-critical entries get stronger retention and are exempt from per-type caps, while still competing under the global rendered cap. +- **Dormant-aware age**: after 14 inactive days, additional dormant workspace time counts at 0.25x so paused projects do not forget too aggressively. +- **Reinforcement**: repeated matching memories reinforce the survivor and slow future decay, with same-session and one-hour guards to avoid accidental spam. +- **No hard stale pruning**: old or stale-marked memories are no longer automatically dropped by age; they lose rendered space only through cap competition. +- **Calibrated prompt budgets**: observed rendered output was typically under ~2000 characters for workspace memory and ~500 characters for hot session state, so defaults were reduced to 3600 and 700 characters to keep overhead lower while retaining buffer. +- **Clearer health output**: `memory-diag health` now reports stored vs rendered counts, type caps, global cap overflow, dormancy, retention monitoring, and strength-ranked top/weakest entries. + +### Why This Helps + +- User preferences and explicit memories are less likely to disappear just because inferred project facts are newer. +- Feedback, decisions, project facts, and references share prompt space more fairly. +- Returning to an old workspace is less punishing because dormant time decays more slowly. +- Maintainers can see why memories are rendered or capped instead of guessing from a single active-memory count. +- Stale entries can fade out of prompt context without destructive cleanup. + +### Diagnostics + +Maintainers can inspect retention behavior with: + +```bash +bun scripts/memory-diag.ts health +``` + +The health output now includes sections like: + +```txt +Stored active memories: 28 +Rendered candidates: 20 + +By type: + feedback stored=17 rendered=10 typeCap=10 + decision stored=11 rendered=10 typeCap=10 + +Retention caps: + type-capped entries: 8 + global-cap overflow: 0 + +Dormancy: + dormant discount active: no + +Retention monitoring: + high_importance_ratio: 0.0% (alert > 30%) +``` + +### Not Included Yet + +- Delete tombstones are not implemented in this release. +- Explicit `supersedes` chain enforcement is still deferred. +- Hot/warm/cold tiered storage remains future work. + +### Upgrade Notes + +- No configuration changes required. +- Existing workspace memory files remain compatible. +- Existing entries without a `retentionClock` fall back safely to existing timestamps. +- The OpenCode config entry stays the same: + +```json +{ + "plugin": ["opencode-working-memory"] +} +``` + +### Validation + +- `npm run typecheck` +- `npm test` — 237 tests passing +- `bun scripts/memory-diag.ts health` + +--- + ## 1.4.0 (2026-04-28) ### Memory Quality Cleanup diff --git a/docs/architecture.md b/docs/architecture.md index 6dee71f..0363c8a 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -10,7 +10,7 @@ OpenCode Working Memory implements a **three-layer memory architecture** designe │ • Persistent storage: ~/.local/share/opencode-working-... │ │ • Types: feedback | project | decision | reference │ │ • Sources: explicit | compaction | manual │ -│ • Limits: 5200 chars / 28 entries │ +│ • Render limits: 3600 chars / 28 entries │ │ • Survives: session reset, compaction (same workspace) │ └─────────────────────────────────────────────────────────────┘ ↓ @@ -48,8 +48,9 @@ Long-term memory that persists across sessions within the same workspace. Perfec { version: 1, workspace: { root: string, key: string }, - limits: { maxRenderedChars: 5200, maxEntries: 28 }, + limits: { maxRenderedChars: 3600, maxEntries: 28 }, entries: LongTermMemoryEntry[], + lastActivityAt?: string, updatedAt: string } ``` @@ -90,18 +91,47 @@ Memory candidates: - Path-heavy facts (>50% paths) - Very short text (<20 chars) -### Consolidation and Deduplication +### Consolidation, Deduplication, and Retention Memories are deduplicated and consolidated with accounting: 1. Normalize exact text: lowercase, strip punctuation, collapse whitespace. 2. Group project/reference entries by identity where possible. -3. Group decisions and feedback by topic where possible. -4. Keep the best surviving entry by source, confidence, type, and freshness rules. +3. Keep decision and feedback entries on exact canonical matching to avoid broad semantic merges. +4. Keep the best surviving entry by source, confidence, specificity, and freshness tie-breakers. 5. Emit accounting events so pending memories can be classified as promoted, absorbed, superseded, or rejected. This prevents absorbed or superseded pending memories from retrying forever while still preserving the active surviving memory. +Retention then decides which active memories are rendered into prompt context. It does not hard-delete old memories by age. + +```typescript +strength = initialStrength * 2 ** (-effectiveAgeDays / effectiveHalfLifeDays) +``` + +Initial strength is based on memory type, source, optional user importance, and safety-critical status. Confidence remains stored for compatibility but is not part of retention scoring. + +Rendered candidates are selected in this order: + +1. Exclude `status: "superseded"` entries. +2. Compute current retention strength. +3. Sort by strength descending. +4. Apply per-type caps, with safety-critical entries exempt from type caps. +5. Keep the top 28 rendered entries under the workspace memory character budget. + +Default type caps: + +| Type | Rendered cap | +|------|--------------| +| `feedback` | 10 | +| `decision` | 10 | +| `project` | 8 | +| `reference` | 6 | + +The type-cap total is 34, intentionally above the global 28-entry cap. These are maximums, not quotas. + +Dormant workspaces age more slowly: after 14 inactive days, additional dormant time counts at 0.25x for retention decay. Repeated duplicate memories reinforce the surviving entry and slow future decay, but same-session and under-one-hour repeats do not stack reinforcement. + ### System Prompt Injection Workspace memory is injected at the top of every message: @@ -241,7 +271,7 @@ Applies quality gate, redaction, migration, consolidation accounting, deduplicat - `session.compacted`: Promote session decisions to workspace memory - `session.deleted`: Clean up session state files -Promotion uses accounting results from workspace memory normalization. Pending memories that are kept are promoted; duplicate memories are absorbed; obsolete same-topic memories are superseded; stale or over-capacity compaction memories are rejected. +Promotion uses accounting results from workspace memory normalization. Pending memories that are kept are promoted; duplicate memories are absorbed; exact decision replacements can be superseded; over-capacity compaction memories are rejected. Stale-marked memories are not hard-pruned by age; they lose rendered space through retention strength and cap competition. ## Quality Guarantees @@ -319,14 +349,14 @@ const workspaceKey = sha256(realpath(workspaceRoot)).slice(0, 16) | Layer | Max Chars | Max Entries | |-------|-----------|-------------| -| Workspace Memory | 5200 | 28 | -| Hot Session State | 1200 | 8 files, 3 errors | +| Workspace Memory | 3600 | 28 | +| Hot Session State | 700 | 8 files, 3 errors | ### Injection Overhead -- Workspace memory: ~200-500 chars per message -- Hot session state: ~200-400 chars per message -- Total: ~400-900 chars per message (minimal) +- Workspace memory: usually under ~2000 chars in observed rendered output +- Hot session state: usually under ~500 chars in observed rendered output +- Total: typically well below the configured maximums ### Storage Footprint diff --git a/docs/configuration.md b/docs/configuration.md index 4835d61..6610705 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -8,8 +8,8 @@ OpenCode Working Memory works out-of-the-box with sensible defaults. Configurati ```typescript const LONG_TERM_LIMITS = { - maxRenderedChars: 5200, // Maximum characters in system prompt - targetRenderedChars: 4200, // Target characters (leave buffer) + maxRenderedChars: 3600, // Maximum characters in system prompt + targetRenderedChars: 3000, // Target characters (leave buffer) maxEntries: 28, // Maximum number of entries maxEntryTextChars: 260, // Maximum characters per entry text maxRationaleChars: 180, // Maximum characters per entry rationale @@ -18,14 +18,40 @@ const LONG_TERM_LIMITS = { **Recommendations**: - Keep `maxRenderedChars` under 5500 to avoid context bloat +- Defaults are calibrated from observed rendered usage that was typically under ~2000 characters - `maxEntries` of 28 provides good coverage without overwhelming - Entry text limits ensure entries stay concise +## Retention Model Defaults + +Workspace memory retention uses strength-based decay. These constants live in `src/workspace-memory.ts`: + +```typescript +const BASE_HALF_LIFE_DAYS = 45; +const REINFORCEMENT_HALFLIFE_FACTOR = 0.85; +const REINFORCEMENT_MAX_COUNT = 6; +const WORKSPACE_DORMANT_AFTER_DAYS = 14; +const DORMANT_DECAY_MULTIPLIER = 0.25; +``` + +Initial strength uses type, source, user importance, and safety-critical factors. Confidence is stored for compatibility but is not used for retention scoring. + +Rendered type caps prevent one type from filling all workspace memory slots: + +| Type | Rendered cap | +|------|--------------| +| `feedback` | 10 | +| `decision` | 10 | +| `project` | 8 | +| `reference` | 6 | + +Safety-critical memories are exempt from type caps but still compete under the global `maxEntries` limit. Old or stale-marked memories are not hard-pruned by age; they lose rendered space through strength and cap competition. + ## Hot Session State Limits ```typescript const HOT_STATE_LIMITS = { - maxRenderedChars: 1200, // Maximum characters in system prompt + maxRenderedChars: 700, // Maximum characters in system prompt maxActiveFilesStored: 20, // Maximum files tracked in state maxActiveFilesRendered: 8, // Maximum files shown in prompt maxOpenErrorsStored: 5, // Maximum errors tracked @@ -36,6 +62,7 @@ const HOT_STATE_LIMITS = { **Recommendations**: - Keep `maxRenderedChars` under 1500 for fast prompts +- Defaults are calibrated from observed rendered usage around ~500 characters or less - `maxActiveFilesRendered` of 8 provides good context coverage - `maxOpenErrorsRendered` of 3 avoids overwhelming error lists @@ -43,12 +70,12 @@ const HOT_STATE_LIMITS = { ### Long-Term Memory Types -| Type | Purpose | Stale After (days) | -|------|---------|---------------------| -| `feedback` | User preferences for workspace | 90 | -| `project` | Project-level information | 60 | -| `decision` | Important decisions | 45 | -| `reference` | Key references | 90 | +| Type | Purpose | Rendered cap | +|------|---------|--------------| +| `feedback` | User preferences for workspace | 10 | +| `project` | Project-level information | 8 | +| `decision` | Important decisions | 10 | +| `reference` | Key references | 6 | ### Memory Sources @@ -114,7 +141,7 @@ To customize limits, edit the constants in `src/types.ts`: ```typescript // Example: Increase workspace memory limit export const LONG_TERM_LIMITS = { - maxRenderedChars: 6000, // Increased from 5200 + maxRenderedChars: 6000, // Increased from 3600 maxEntries: 35, // Increased from 28 // ... }; @@ -144,7 +171,7 @@ const HOT_STATE_LIMITS = { // Preserve more context const LONG_TERM_LIMITS = { maxEntries: 40, // Increased - targetRenderedChars: 5000, // Increased + targetRenderedChars: 5000, // Increased }; ``` @@ -175,6 +202,16 @@ cat ~/.local/share/opencode-working-memory/workspaces/*/workspace-memory.json | cat ~/.local/share/opencode-working-memory/workspaces/*/sessions/*.json | jq ``` +### Inspect Retention Health + +From a source checkout, maintainers can inspect stored vs rendered memory behavior: + +```bash +bun scripts/memory-diag.ts health +``` + +The health output includes stored active memories, rendered candidates, type caps, global cap overflow, dormancy status, retention monitoring alerts, and strength-ranked top/weakest entries. + ### Clear Workspace Memory ```bash diff --git a/package.json b/package.json index 7487aed..18ad99b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "opencode-working-memory", - "version": "1.4.0", + "version": "1.5.0", "description": "Three-layer memory architecture for OpenCode with workspace memory and hot session state", "type": "module", "main": "index.ts", diff --git a/src/types.ts b/src/types.ts index cfb1a80..f24cc65 100644 --- a/src/types.ts +++ b/src/types.ts @@ -95,15 +95,15 @@ export type SessionState = { }; export const LONG_TERM_LIMITS = { - maxRenderedChars: 5200, - targetRenderedChars: 4200, + maxRenderedChars: 3600, + targetRenderedChars: 3000, maxEntries: 28, maxEntryTextChars: 260, maxRationaleChars: 180, } as const; export const HOT_STATE_LIMITS = { - maxRenderedChars: 1200, + maxRenderedChars: 700, maxActiveFilesStored: 20, maxActiveFilesRendered: 8, maxOpenErrorsStored: 5, diff --git a/tests/workspace-memory.test.ts b/tests/workspace-memory.test.ts index ee6df2f..38475cf 100644 --- a/tests/workspace-memory.test.ts +++ b/tests/workspace-memory.test.ts @@ -4,7 +4,7 @@ import { mkdir, mkdtemp, readFile, rm, stat, writeFile } from "node:fs/promises" import { join, dirname } from "node:path"; import { tmpdir } from "node:os"; import type { LongTermMemoryEntry, WorkspaceMemoryStore } from "../src/types.ts"; -import { LONG_TERM_LIMITS } from "../src/types.ts"; +import { HOT_STATE_LIMITS, LONG_TERM_LIMITS } from "../src/types.ts"; import { workspaceKey, workspaceMemoryPath } from "../src/paths.ts"; import { renderWorkspaceMemory, @@ -33,6 +33,12 @@ import { REAL_WORKSPACE_FIXTURES } from "./fixtures/real-workspaces-snapshot.ts" const DAY_MS = 24 * 60 * 60 * 1000; +test("default prompt budgets use calibrated conservative character caps", () => { + assert.equal(LONG_TERM_LIMITS.maxRenderedChars, 3600); + assert.equal(LONG_TERM_LIMITS.targetRenderedChars, 3000); + assert.equal(HOT_STATE_LIMITS.maxRenderedChars, 700); +}); + function entry(id: string, text: string, type: LongTermMemoryEntry["type"] = "decision"): LongTermMemoryEntry { const now = new Date().toISOString(); return {