chore(release): prepare v1.5.0

2026-06-01 22:11:08 +02:00 · 2026-04-29 16:56:47 +08:00
parent 36b78ea91c
commit 20a6cfe1a6
9 changed files with 274 additions and 44 deletions
@@ -111,7 +111,7 @@ export type LongTermSource = "explicit" | "compaction" | "manual";

 // ✅ USE: const assertions for limits
 export const LONG_TERM_LIMITS = {
-  maxRenderedChars: 5200,
+  maxRenderedChars: 3600,
  maxEntries: 28,
 } as const;
 ```
@@ -140,8 +140,8 @@ const maxEntries = 28;
 async function loadWorkspaceMemory() { }

 // ✅ REQUIRED: SCREAMING_SNAKE_CASE for constants
-const LONG_TERM_LIMITS = { maxRenderedChars: 5200, maxEntries: 28 };
-const HOT_STATE_LIMITS = { maxRenderedChars: 1200 };
+const LONG_TERM_LIMITS = { maxRenderedChars: 3600, maxEntries: 28 };
+const HOT_STATE_LIMITS = { maxRenderedChars: 700 };

 // ✅ REQUIRED: PascalCase for types
 type WorkspaceMemoryStore = { ... };
@@ -236,7 +236,7 @@ export default {
 - **Location**: `~/.local/share/opencode-working-memory/workspaces/{workspaceKey}/workspace-memory.json`
 - **Workspace Key**: First 16 chars of `sha256(realpath(workspaceRoot))`
 - **Schema**: See `src/types.ts:WorkspaceMemoryStore`
- **Limits**: 5200 chars, 28 entries max
+- **Limits**: 3600 chars, 28 entries max

 ### Session State Files

@@ -299,9 +299,9 @@ Extracts workspace memory candidates from conversation, applies quality gate and

 ## Performance Considerations

- **Workspace memory budget**: 5200 chars injected into system prompt
- **Session state budget**: 1200 chars injected into system prompt
- **Total overhead**: ~1500-6000 chars per message (minimal)
+- **Workspace memory budget**: 3600 chars injected into system prompt
+- **Session state budget**: 700 chars injected into system prompt
+- **Total overhead**: typically well below configured maximums
 - **Storage footprint**: ~2-5 KB per workspace for memory, ~1-3 KB per session

 ## Contributing
@@ -5,6 +5,38 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

+## [1.5.0] - 2026-04-29
+
+### Added
+
+- Strength-based workspace memory retention using exponential decay instead of additive priority scoring.
+- Per-type rendered caps for workspace memory candidates: feedback 10, decision 10, project 8, and reference 6.
+- Safety-critical memory weighting and type-cap exemption so important entries survive type floods while still competing under the global rendered cap.
+- Dormant-workspace effective age: after 14 days without activity, additional dormant time counts at 0.25x for retention decay.
+- Reinforcement tracking for repeated memories, with same-session and one-hour guards to prevent accidental reinforcement spam.
+- Memory health diagnostics for stored vs rendered counts, type caps, global cap overflow, dormancy, retention monitoring, and strength-ranked top/weakest entries.
+- CLI smoke tests and regression fixtures covering retention decay, stale-prune removal, type caps, reinforcement, invalid timestamps, and diagnostics.
+
+### Changed
+
+- Workspace memory rendering now ranks entries by retention strength, not the previous priority/penalty model.
+- Confidence is retained for compatibility but no longer affects retention scoring.
+- Old or stale-marked memories are no longer hard-pruned; they remain stored and only fall out of rendered context through strength and cap competition.
+- Existing duplicate promotion and dedupe paths now reinforce the surviving memory instead of only absorbing the duplicate.
+- Health output now separates stored active memories from rendered candidates to make cap behavior easier to understand.
+- Default prompt budgets are lower after calibration against observed rendered output: workspace memory is 3600 characters and hot session state is 700 characters.
+
+### Fixed
+
+- Invalid `updatedAt` or `retentionClock` values no longer produce `NaN` retention strength or unstable sorting.
+- Dormant age calculation only discounts the dormant overlap since an entry was created, so new memories do not inherit old workspace dormancy.
+- Type max totals above the global cap are handled correctly: the global rendered limit still wins.
+
+### Not Included Yet
+
+- Delete tombstones and explicit `supersedes` chain enforcement remain deferred follow-up work.
+- Hot/warm/cold tiered storage remains a future v1.6 direction.
+
 ## [1.4.0] - 2026-04-28

 ### Added
@@ -5,9 +5,9 @@

 Automatic memory for OpenCode agents.

-OpenCode Working Memory helps your agent keep useful context across compactions and sessions: project decisions, preferences, important references, active files, and unresolved errors.
+Working memory is context that **remembers what matters, fades what changes, and stays out of the way.**

-It works automatically, without manual memory tools or extra LLM/API calls.
+OpenCode Working Memory preserves project decisions, preferences, and references across compactions and sessions, while keeping active files and unresolved errors fresh for the current session — with no manual tools or extra LLM/API calls.

 ## Why This Exists

@@ -31,6 +31,7 @@ Use it when you want your agent to remember things like:
 - **Compaction-based extraction** — memory extraction piggybacks on OpenCode’s existing compaction flow.
 - **No manual tools** — memory is injected automatically into the system prompt.
 - **Quality guards** — filters noisy memories, temporary progress snapshots, stack traces, raw errors, and credentials.
+- **Retention decay** — keeps the strongest memories in prompt context while older or weaker memories fade out naturally; important and reinforced memories decay more slowly.

 ## Installation

@@ -121,6 +122,27 @@ Memory types:
 - `decision` — important implementation or architecture decisions
 - `reference` — useful paths, commands, or configuration references

+### Retention Decay
+
+> **Memory should fade, so the agent can keep learning.**
+>
+> Important memories decay more slowly, but every memory must leave room for newer project reality.
+
+Memories decay over time. The strongest stay visible in the prompt; weaker ones fade from context without being deleted.
+
+```text
+  strength
+    │
+ ██ │╲____   reinforced: slower decline
+    │     ╲______
+ ▒▒ │            ╲__ ordinary memory
+    │               ╲
+    ├ ─ ─ ─ ─ ─ ─ ─ ─╲─ dynamic cap competition zone
+ ░░ │                 ╲  easier for new memories to replace
+    │                  ↑ still stored, not deleted
+    └──────────────────────────────→ time / sessions
+```
+
 ## Explicit Memory Triggers

 You can explicitly ask the agent to remember durable facts.
@@ -167,13 +189,15 @@ It includes guards for:

 - Credential redaction
 - Duplicate memory cleanup
- Superseding older decisions with newer ones
- Consolidation accounting so promoted, absorbed, superseded, and rejected memories are handled differently
+- Accounting for promoted, absorbed, superseded, and rejected memories
+- Strength-based retention so useful memories stay visible without hard age pruning
 - Filtering stack traces, git hashes, raw errors, and noisy path-heavy facts
 - Rejecting temporary project progress snapshots

 The goal is to remember durable facts, not every detail.

+**Good memory is selective memory.**
+
 Historical cleanup is intentionally conservative: extraction-time filtering may reject more aggressively, but one-time migration cleanup only supersedes high-confidence garbage patterns. This protects existing durable memories written in declarative style, such as "API endpoint is X" or "Product branding is Y".

 For local development cleanup, use:
@@ -191,21 +215,21 @@ OpenCode Working Memory works out of the box.

 Default behavior:

- Workspace memory budget: 5200 characters
+- Workspace memory budget: 3600 characters (~900 tokens)
 - Workspace memory limit: 28 entries
- Hot session state budget: 1200 characters
+- Hot session state budget: 700 characters (~175 tokens)
 - Active files shown: 8
 - Open errors shown: 3

 See [Configuration](docs/configuration.md) for customization options.

-## Ongoing Work
+## Roadmap

 Current focus:

- Improve memory recording quality so only durable, useful facts are kept.
- Strengthen deduplication and supersession so stale memories do not pile up.
- Add better forgetting behavior for obsolete decisions, preferences, and project facts.
+- Add explicit delete tombstones so removed memories do not get re-extracted.
+- Enforce explicit `supersedes` chains for safer replacement of obsolete memories.
+- Explore tiered hot/warm/cold storage after the retention model has more real-world data.

 ## Documentation

@@ -1,5 +1,106 @@
 # Release Notes

+## 1.5.0 (2026-04-29)
+
+### Retention Decay Model
+
+This release changes workspace memory retention from hard stale pruning and additive priority scoring to a strength-based decay model.
+
+Think of it like a forgetting curve: memories fade over time, but important, reinforced, and safety-critical memories decay slower. Weak entries fall out of rendered prompt context by cap competition, not hard deletion.
+
+> **Memory should fade, so the agent can keep learning.**
+> Important memories decay slower, but every memory must leave room for newer project reality and avoid long-term memory pollution.
+
+```text
+  strength
+    │
+ ██ │╲____   reinforced: slower decline
+    │     ╲______
+ ▒▒ │            ╲__ ordinary memory
+    │               ╲
+    ├ ─ ─ ─ ─ ─ ─ ─ ─╲─ dynamic cap competition zone
+ ░░ │                 ╲  easier for new memories to replace
+    │                  ↑ still stored, not deleted
+    └──────────────────────────────→ time / sessions
+```
+
+### What Changed
+
+- **Strength-based retention**: workspace memory now uses exponential decay: initial strength × age decay.
+- **Better initial strength**: type, source, user importance, and safety-critical status now determine how strong a memory starts.
+- **No confidence scoring**: confidence remains in stored data for compatibility, but it no longer affects retention ranking.
+- **Type caps**: rendered workspace memory now caps feedback, decisions, project facts, and references separately so one type cannot monopolize all 28 slots.
+- **Safety-critical protection**: safety-critical entries get stronger retention and are exempt from per-type caps, while still competing under the global rendered cap.
+- **Dormant-aware age**: after 14 inactive days, additional dormant workspace time counts at 0.25x so paused projects do not forget too aggressively.
+- **Reinforcement**: repeated matching memories reinforce the survivor and slow future decay, with same-session and one-hour guards to avoid accidental spam.
+- **No hard stale pruning**: old or stale-marked memories are no longer automatically dropped by age; they lose rendered space only through cap competition.
+- **Calibrated prompt budgets**: observed rendered output was typically under ~2000 characters for workspace memory and ~500 characters for hot session state, so defaults were reduced to 3600 and 700 characters to keep overhead lower while retaining buffer.
+- **Clearer health output**: `memory-diag health` now reports stored vs rendered counts, type caps, global cap overflow, dormancy, retention monitoring, and strength-ranked top/weakest entries.
+
+### Why This Helps
+
+- User preferences and explicit memories are less likely to disappear just because inferred project facts are newer.
+- Feedback, decisions, project facts, and references share prompt space more fairly.
+- Returning to an old workspace is less punishing because dormant time decays more slowly.
+- Maintainers can see why memories are rendered or capped instead of guessing from a single active-memory count.
+- Stale entries can fade out of prompt context without destructive cleanup.
+
+### Diagnostics
+
+Maintainers can inspect retention behavior with:
+
+```bash
+bun scripts/memory-diag.ts health
+```
+
+The health output now includes sections like:
+
+```txt
+Stored active memories: 28
+Rendered candidates: 20
+
+By type:
+  feedback  stored=17  rendered=10  typeCap=10
+  decision  stored=11  rendered=10  typeCap=10
+
+Retention caps:
+  type-capped entries: 8
+  global-cap overflow: 0
+
+Dormancy:
+  dormant discount active: no
+
+Retention monitoring:
+  high_importance_ratio: 0.0% (alert > 30%)
+```
+
+### Not Included Yet
+
+- Delete tombstones are not implemented in this release.
+- Explicit `supersedes` chain enforcement is still deferred.
+- Hot/warm/cold tiered storage remains future work.
+
+### Upgrade Notes
+
+- No configuration changes required.
+- Existing workspace memory files remain compatible.
+- Existing entries without a `retentionClock` fall back safely to existing timestamps.
+- The OpenCode config entry stays the same:
+
+```json
+{
+  "plugin": ["opencode-working-memory"]
+}
+```
+
+### Validation
+
+- `npm run typecheck`
+- `npm test` — 237 tests passing
+- `bun scripts/memory-diag.ts health`
+
+---
+
 ## 1.4.0 (2026-04-28)

 ### Memory Quality Cleanup
@@ -10,7 +10,7 @@ OpenCode Working Memory implements a **three-layer memory architecture** designe
 │  • Persistent storage: ~/.local/share/opencode-working-...  │
 │  • Types: feedback | project | decision | reference        │
 │  • Sources: explicit | compaction | manual                  │
-│  • Limits: 5200 chars / 28 entries                          │
+│  • Render limits: 3600 chars / 28 entries                    │
 │  • Survives: session reset, compaction (same workspace)    │
 └─────────────────────────────────────────────────────────────┘
                              ↓
@@ -48,8 +48,9 @@ Long-term memory that persists across sessions within the same workspace. Perfec
  {
    version: 1,
    workspace: { root: string, key: string },
-    limits: { maxRenderedChars: 5200, maxEntries: 28 },
+    limits: { maxRenderedChars: 3600, maxEntries: 28 },
    entries: LongTermMemoryEntry[],
+    lastActivityAt?: string,
    updatedAt: string
  }
  ```
@@ -90,18 +91,47 @@ Memory candidates:
 - Path-heavy facts (>50% paths)
 - Very short text (<20 chars)

-### Consolidation and Deduplication
+### Consolidation, Deduplication, and Retention

 Memories are deduplicated and consolidated with accounting:

 1. Normalize exact text: lowercase, strip punctuation, collapse whitespace.
 2. Group project/reference entries by identity where possible.
-3. Group decisions and feedback by topic where possible.
-4. Keep the best surviving entry by source, confidence, type, and freshness rules.
+3. Keep decision and feedback entries on exact canonical matching to avoid broad semantic merges.
+4. Keep the best surviving entry by source, confidence, specificity, and freshness tie-breakers.
 5. Emit accounting events so pending memories can be classified as promoted, absorbed, superseded, or rejected.

 This prevents absorbed or superseded pending memories from retrying forever while still preserving the active surviving memory.

+Retention then decides which active memories are rendered into prompt context. It does not hard-delete old memories by age.
+
+```typescript
+strength = initialStrength * 2 ** (-effectiveAgeDays / effectiveHalfLifeDays)
+```
+
+Initial strength is based on memory type, source, optional user importance, and safety-critical status. Confidence remains stored for compatibility but is not part of retention scoring.
+
+Rendered candidates are selected in this order:
+
+1. Exclude `status: "superseded"` entries.
+2. Compute current retention strength.
+3. Sort by strength descending.
+4. Apply per-type caps, with safety-critical entries exempt from type caps.
+5. Keep the top 28 rendered entries under the workspace memory character budget.
+
+Default type caps:
+
+| Type | Rendered cap |
+|------|--------------|
+| `feedback` | 10 |
+| `decision` | 10 |
+| `project` | 8 |
+| `reference` | 6 |
+
+The type-cap total is 34, intentionally above the global 28-entry cap. These are maximums, not quotas.
+
+Dormant workspaces age more slowly: after 14 inactive days, additional dormant time counts at 0.25x for retention decay. Repeated duplicate memories reinforce the surviving entry and slow future decay, but same-session and under-one-hour repeats do not stack reinforcement.
+
 ### System Prompt Injection

 Workspace memory is injected at the top of every message:
@@ -241,7 +271,7 @@ Applies quality gate, redaction, migration, consolidation accounting, deduplicat
 - `session.compacted`: Promote session decisions to workspace memory
 - `session.deleted`: Clean up session state files

-Promotion uses accounting results from workspace memory normalization. Pending memories that are kept are promoted; duplicate memories are absorbed; obsolete same-topic memories are superseded; stale or over-capacity compaction memories are rejected.
+Promotion uses accounting results from workspace memory normalization. Pending memories that are kept are promoted; duplicate memories are absorbed; exact decision replacements can be superseded; over-capacity compaction memories are rejected. Stale-marked memories are not hard-pruned by age; they lose rendered space through retention strength and cap competition.

 ## Quality Guarantees

@@ -319,14 +349,14 @@ const workspaceKey = sha256(realpath(workspaceRoot)).slice(0, 16)

 | Layer | Max Chars | Max Entries |
 |-------|-----------|-------------|
-| Workspace Memory | 5200 | 28 |
-| Hot Session State | 1200 | 8 files, 3 errors |
+| Workspace Memory | 3600 | 28 |
+| Hot Session State | 700 | 8 files, 3 errors |

 ### Injection Overhead

- Workspace memory: ~200-500 chars per message
- Hot session state: ~200-400 chars per message
- Total: ~400-900 chars per message (minimal)
+- Workspace memory: usually under ~2000 chars in observed rendered output
+- Hot session state: usually under ~500 chars in observed rendered output
+- Total: typically well below the configured maximums

 ### Storage Footprint

@@ -8,8 +8,8 @@ OpenCode Working Memory works out-of-the-box with sensible defaults. Configurati

 ```typescript
 const LONG_TERM_LIMITS = {
-  maxRenderedChars: 5200,    // Maximum characters in system prompt
-  targetRenderedChars: 4200, // Target characters (leave buffer)
+  maxRenderedChars: 3600,    // Maximum characters in system prompt
+  targetRenderedChars: 3000, // Target characters (leave buffer)
  maxEntries: 28,            // Maximum number of entries
  maxEntryTextChars: 260,    // Maximum characters per entry text
  maxRationaleChars: 180,    // Maximum characters per entry rationale
@@ -18,14 +18,40 @@ const LONG_TERM_LIMITS = {

 **Recommendations**:
 - Keep `maxRenderedChars` under 5500 to avoid context bloat
+- Defaults are calibrated from observed rendered usage that was typically under ~2000 characters
 - `maxEntries` of 28 provides good coverage without overwhelming
 - Entry text limits ensure entries stay concise

+## Retention Model Defaults
+
+Workspace memory retention uses strength-based decay. These constants live in `src/workspace-memory.ts`:
+
+```typescript
+const BASE_HALF_LIFE_DAYS = 45;
+const REINFORCEMENT_HALFLIFE_FACTOR = 0.85;
+const REINFORCEMENT_MAX_COUNT = 6;
+const WORKSPACE_DORMANT_AFTER_DAYS = 14;
+const DORMANT_DECAY_MULTIPLIER = 0.25;
+```
+
+Initial strength uses type, source, user importance, and safety-critical factors. Confidence is stored for compatibility but is not used for retention scoring.
+
+Rendered type caps prevent one type from filling all workspace memory slots:
+
+| Type | Rendered cap |
+|------|--------------|
+| `feedback` | 10 |
+| `decision` | 10 |
+| `project` | 8 |
+| `reference` | 6 |
+
+Safety-critical memories are exempt from type caps but still compete under the global `maxEntries` limit. Old or stale-marked memories are not hard-pruned by age; they lose rendered space through strength and cap competition.
+
 ## Hot Session State Limits

 ```typescript
 const HOT_STATE_LIMITS = {
-  maxRenderedChars: 1200,       // Maximum characters in system prompt
+  maxRenderedChars: 700,        // Maximum characters in system prompt
  maxActiveFilesStored: 20,    // Maximum files tracked in state
  maxActiveFilesRendered: 8,   // Maximum files shown in prompt
  maxOpenErrorsStored: 5,      // Maximum errors tracked
@@ -36,6 +62,7 @@ const HOT_STATE_LIMITS = {

 **Recommendations**:
 - Keep `maxRenderedChars` under 1500 for fast prompts
+- Defaults are calibrated from observed rendered usage around ~500 characters or less
 - `maxActiveFilesRendered` of 8 provides good context coverage
 - `maxOpenErrorsRendered` of 3 avoids overwhelming error lists

@@ -43,12 +70,12 @@ const HOT_STATE_LIMITS = {

 ### Long-Term Memory Types

-| Type | Purpose | Stale After (days) |
-|------|---------|---------------------|
-| `feedback` | User preferences for workspace | 90 |
-| `project` | Project-level information | 60 |
-| `decision` | Important decisions | 45 |
-| `reference` | Key references | 90 |
+| Type | Purpose | Rendered cap |
+|------|---------|--------------|
+| `feedback` | User preferences for workspace | 10 |
+| `project` | Project-level information | 8 |
+| `decision` | Important decisions | 10 |
+| `reference` | Key references | 6 |

 ### Memory Sources

@@ -114,7 +141,7 @@ To customize limits, edit the constants in `src/types.ts`:
 ```typescript
 // Example: Increase workspace memory limit
 export const LONG_TERM_LIMITS = {
-  maxRenderedChars: 6000,  // Increased from 5200
+  maxRenderedChars: 6000,  // Increased from 3600
  maxEntries: 35,          // Increased from 28
  // ...
 };
@@ -144,7 +171,7 @@ const HOT_STATE_LIMITS = {
 // Preserve more context
 const LONG_TERM_LIMITS = {
  maxEntries: 40,              // Increased
-  targetRenderedChars: 5000,    // Increased
+  targetRenderedChars: 5000,   // Increased
 };
 ```

@@ -175,6 +202,16 @@ cat ~/.local/share/opencode-working-memory/workspaces/*/workspace-memory.json |
 cat ~/.local/share/opencode-working-memory/workspaces/*/sessions/*.json | jq
 ```

+### Inspect Retention Health
+
+From a source checkout, maintainers can inspect stored vs rendered memory behavior:
+
+```bash
+bun scripts/memory-diag.ts health
+```
+
+The health output includes stored active memories, rendered candidates, type caps, global cap overflow, dormancy status, retention monitoring alerts, and strength-ranked top/weakest entries.
+
 ### Clear Workspace Memory

 ```bash
@@ -1,6 +1,6 @@
 {
  "name": "opencode-working-memory",
-  "version": "1.4.0",
+  "version": "1.5.0",
  "description": "Three-layer memory architecture for OpenCode with workspace memory and hot session state",
  "type": "module",
  "main": "index.ts",
@@ -95,15 +95,15 @@ export type SessionState = {
 };

 export const LONG_TERM_LIMITS = {
-  maxRenderedChars: 5200,
-  targetRenderedChars: 4200,
+  maxRenderedChars: 3600,
+  targetRenderedChars: 3000,
  maxEntries: 28,
  maxEntryTextChars: 260,
  maxRationaleChars: 180,
 } as const;

 export const HOT_STATE_LIMITS = {
-  maxRenderedChars: 1200,
+  maxRenderedChars: 700,
  maxActiveFilesStored: 20,
  maxActiveFilesRendered: 8,
  maxOpenErrorsStored: 5,
@@ -4,7 +4,7 @@ import { mkdir, mkdtemp, readFile, rm, stat, writeFile } from "node:fs/promises"
 import { join, dirname } from "node:path";
 import { tmpdir } from "node:os";
 import type { LongTermMemoryEntry, WorkspaceMemoryStore } from "../src/types.ts";
-import { LONG_TERM_LIMITS } from "../src/types.ts";
+import { HOT_STATE_LIMITS, LONG_TERM_LIMITS } from "../src/types.ts";
 import { workspaceKey, workspaceMemoryPath } from "../src/paths.ts";
 import {
  renderWorkspaceMemory,
@@ -33,6 +33,12 @@ import { REAL_WORKSPACE_FIXTURES } from "./fixtures/real-workspaces-snapshot.ts"

 const DAY_MS = 24 * 60 * 60 * 1000;

+test("default prompt budgets use calibrated conservative character caps", () => {
+  assert.equal(LONG_TERM_LIMITS.maxRenderedChars, 3600);
+  assert.equal(LONG_TERM_LIMITS.targetRenderedChars, 3000);
+  assert.equal(HOT_STATE_LIMITS.maxRenderedChars, 700);
+});
+
 function entry(id: string, text: string, type: LongTermMemoryEntry["type"] = "decision"): LongTermMemoryEntry {
  const now = new Date().toISOString();
  return {