commit 22aeab952d431e75e58e4956972dabc78d01059e Author: CodingInCarhartts Date: Sun Dec 14 12:31:01 2025 -0800 feat: Implement initial personal knowledge management system with vector search and MCP server integration. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..efc5e03 --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ +# Dependencies +node_modules/ +bun.lock + +# Build output +dist/ + +# Runtime data +data/ + +# IDE +.vscode/ +.idea/ + +# OS +.DS_Store +Thumbs.db + +# Environment +.env +.env.local diff --git a/README.md b/README.md new file mode 100644 index 0000000..b7f2565 --- /dev/null +++ b/README.md @@ -0,0 +1,91 @@ +# opencode-personal-knowledge + +Personal knowledge MCP server with vector database for the Opencode ecosystem. + +## Features + +- **Semantic Search** — Find knowledge using vector embeddings +- **Text Search** — Keyword-based search fallback +- **Tag Organization** — Categorize entries with tags +- **Plug-and-Play** — No external services required (embeddings run locally) + +## Quick Start (Source installation - Testing) + +```bash +# Install dependencies +bun install + +# Run CLI +bun start add "Title" "Content" --tags "ai,mcp" +bun start search "query" + +# Run MCP server - For Testing (Not Required for Opencode Integration will auto start on opencode load) +bun run mcp +``` + +## Opencode Integration (Recommended) + +Add to `~/.config/opencode/opencode.jsonc`: + +```jsonc +{ + "mcp": { + "personal-knowledge": { + "type": "local", + "command": ["npx", "-y", "opencode-personal-knowledge"], + "enabled": true + } + } +} +``` + +## MCP Tools + +| Tool | Description | +| :---------------------- | :-------------------------- | +| `store_knowledge` | Store a new knowledge entry | +| `search_knowledge` | Semantic search | +| `search_knowledge_text` | Keyword search | +| `get_knowledge` | Get entry by ID | +| `update_knowledge` | Update entry | +| `delete_knowledge` | Delete entry | +| `list_knowledge` | List entries | +| `get_knowledge_stats` | Database stats | + +## Example Usage + +**User:** "store a knowledge entry about Opencode Features" + +**Agent:** Researches and compiles entry, then calls `store_knowledge`: + +``` +Tool: personal-knowledge_store_knowledge +Title: "Opencode Features" +Content: "Opencode is an open source AI coding agent that helps write code +in terminals, IDEs, or desktops. Key features include: LSP-enabled, +multi-session support, shareable session links, Claude Pro integration, +75+ LLM providers via Models.dev, and availability across terminal, +desktop app, and IDE extensions." +Tags: ["opencode", "features", "ai-coding-agent"] +``` + +**Result:** `✅ Stored knowledge entry #2: "Opencode Features" 📊 Indexed for semantic search` + +--- + +**User:** "@search_knowledge for opencode" + +**Agent:** Performs semantic search and returns matching entry: + +``` +Found 1 similar entry: + +### 1. Opencode Features (85% similar) +Opencode is an open source AI coding agent that helps write code in +terminals, IDEs, or desktops. Key features include: LSP-enabled, +multi-session support, shareable session links, Claude Pro integration... +``` + +## License + +MIT diff --git a/package.json b/package.json new file mode 100644 index 0000000..0a64ab3 --- /dev/null +++ b/package.json @@ -0,0 +1,59 @@ +{ + "name": "opencode-personal-knowledge", + "version": "1.0.0", + "description": "Personal knowledge MCP server with vector database for the Opencode ecosystem", + "type": "module", + "main": "dist/mcp-server.js", + "bin": { + "opencode-personal-knowledge": "dist/mcp-server.js" + }, + "files": [ + "dist", + "README.md" + ], + "scripts": { + "start": "bun run src/index.ts", + "dev": "bun --watch run src/index.ts", + "mcp": "bun run src/mcp-server.ts", + "build": "bun build src/mcp-server.ts --outdir=dist --target=node --format=esm && bun build src/index.ts --outdir=dist --target=node --format=esm", + "prepublishOnly": "npm run build", + "test": "bun test", + "test:coverage": "bun test --coverage", + "lint": "bunx @biomejs/biome check src/", + "format": "bunx @biomejs/biome format --write src/" + }, + "keywords": [ + "mcp", + "model-context-protocol", + "knowledge", + "vector", + "opencode", + "ai", + "embeddings", + "semantic-search" + ], + "author": "NocturnLabs - Yum (CodingInCarhartts)", + "license": "MIT", + "repository": { + "type": "git", + "url": "https://github.com/NocturnLabs/opencode-personal-knowledge" + }, + "engines": { + "node": ">=18.0.0" + }, + "dependencies": { + "@lancedb/lancedb": "^0.22.3", + "@modelcontextprotocol/sdk": "^1.24.3", + "@xenova/transformers": "^2.17.2", + "commander": "^14.0.2", + "zod": "^4.1.13" + }, + "devDependencies": { + "@biomejs/biome": "^1.9.4", + "@types/bun": "^1.1.14" + }, + "trustedDependencies": [ + "@biomejs/biome", + "protobufjs" + ] +} diff --git a/src/database/index.ts b/src/database/index.ts new file mode 100644 index 0000000..936ef19 --- /dev/null +++ b/src/database/index.ts @@ -0,0 +1,280 @@ +/** + * Database module for knowledge entries using Bun's native SQLite. + */ +import { Database } from "bun:sqlite"; +import { join } from "path"; +import { existsSync, mkdirSync } from "fs"; +import { homedir } from "os"; + +// Use persistent user data directory (XDG-compliant on Linux) +const DATA_DIR = process.env.OPENCODE_PK_DATA_DIR || join(homedir(), ".local", "share", "opencode-personal-knowledge"); +const DB_PATH = join(DATA_DIR, "knowledge.db"); + +export interface KnowledgeEntry { + id?: number; + title: string; + content: string; + source?: string; + tags?: string[]; + created_at: string; + updated_at: string; +} + +export interface KnowledgeRecord { + id: number; + title: string; + content: string; + source: string | null; + tags: string | null; // JSON string + created_at: string; + updated_at: string; +} + +let db: Database | null = null; + +/** + * Ensure data directory exists. + */ +function ensureDataDir(): void { + if (!existsSync(DATA_DIR)) { + mkdirSync(DATA_DIR, { recursive: true }); + } +} + +/** + * Initialize the database connection and create tables if needed. + */ +export function initDatabase(): Database { + if (db) return db; + + ensureDataDir(); + db = new Database(DB_PATH, { create: true }); + + db.run(` + CREATE TABLE IF NOT EXISTS knowledge_entries ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + title TEXT NOT NULL, + content TEXT NOT NULL, + source TEXT, + tags TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL + ) + `); + + // Create indexes + db.run(`CREATE INDEX IF NOT EXISTS idx_tags ON knowledge_entries(tags)`); + db.run(`CREATE INDEX IF NOT EXISTS idx_created ON knowledge_entries(created_at)`); + + return db; +} + +/** + * Save a new knowledge entry. + */ +export function saveKnowledgeEntry(entry: Omit): number { + const database = initDatabase(); + const now = new Date().toISOString(); + const tagsJson = entry.tags ? JSON.stringify(entry.tags) : null; + + const stmt = database.prepare(` + INSERT INTO knowledge_entries (title, content, source, tags, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?) + `); + + const result = stmt.run(entry.title, entry.content, entry.source || null, tagsJson, now, now); + return Number(result.lastInsertRowid); +} + +/** + * Get a knowledge entry by ID. + */ +export function getKnowledgeEntry(id: number): KnowledgeEntry | null { + const database = initDatabase(); + const record = database.prepare("SELECT * FROM knowledge_entries WHERE id = ?").get(id) as KnowledgeRecord | undefined; + + if (!record) return null; + + return { + id: record.id, + title: record.title, + content: record.content, + source: record.source || undefined, + tags: record.tags ? JSON.parse(record.tags) : undefined, + created_at: record.created_at, + updated_at: record.updated_at, + }; +} + +/** + * Update an existing knowledge entry. + */ +export function updateKnowledgeEntry( + id: number, + updates: Partial> +): boolean { + const database = initDatabase(); + const existing = getKnowledgeEntry(id); + if (!existing) return false; + + const now = new Date().toISOString(); + const newTitle = updates.title ?? existing.title; + const newContent = updates.content ?? existing.content; + const newSource = updates.source ?? existing.source ?? null; + const newTags = updates.tags ? JSON.stringify(updates.tags) : (existing.tags ? JSON.stringify(existing.tags) : null); + + const stmt = database.prepare(` + UPDATE knowledge_entries + SET title = ?, content = ?, source = ?, tags = ?, updated_at = ? + WHERE id = ? + `); + + stmt.run(newTitle, newContent, newSource, newTags, now, id); + return true; +} + +/** + * Delete a knowledge entry. + */ +export function deleteKnowledgeEntry(id: number): boolean { + const database = initDatabase(); + const result = database.prepare("DELETE FROM knowledge_entries WHERE id = ?").run(id); + return result.changes > 0; +} + +/** + * List knowledge entries with optional filters. + */ +export function listKnowledgeEntries(options: { + limit?: number; + offset?: number; + tags?: string[]; +}): KnowledgeEntry[] { + const database = initDatabase(); + const { limit = 20, offset = 0, tags } = options; + + let sql = "SELECT * FROM knowledge_entries"; + const params: (string | number)[] = []; + + if (tags && tags.length > 0) { + // Search for any tag match in JSON array + const tagConditions = tags.map(() => "tags LIKE ?").join(" OR "); + sql += ` WHERE (${tagConditions})`; + params.push(...tags.map(t => `%"${t}"%`)); + } + + sql += " ORDER BY created_at DESC LIMIT ? OFFSET ?"; + params.push(limit, offset); + + const records = database.prepare(sql).all(...params) as KnowledgeRecord[]; + + return records.map(record => ({ + id: record.id, + title: record.title, + content: record.content, + source: record.source || undefined, + tags: record.tags ? JSON.parse(record.tags) : undefined, + created_at: record.created_at, + updated_at: record.updated_at, + })); +} + +/** + * Search knowledge entries by text. + */ +export function searchKnowledgeByText(query: string, limit = 10): KnowledgeEntry[] { + const database = initDatabase(); + + // Split query into words for OR search + const words = query.toLowerCase().split(/\s+/).filter(w => w.length > 2); + + if (words.length === 0) { + return []; + } + + const conditions = words.map(() => + "(LOWER(title) LIKE ? OR LOWER(content) LIKE ?)" + ).join(" OR "); + + const params = words.flatMap(w => [`%${w}%`, `%${w}%`]); + + const records = database.prepare(` + SELECT * FROM knowledge_entries + WHERE ${conditions} + ORDER BY updated_at DESC + LIMIT ? + `).all(...params, limit) as KnowledgeRecord[]; + + return records.map(record => ({ + id: record.id, + title: record.title, + content: record.content, + source: record.source || undefined, + tags: record.tags ? JSON.parse(record.tags) : undefined, + created_at: record.created_at, + updated_at: record.updated_at, + })); +} + +/** + * Get all entries for vector conversion. + */ +export function getAllEntries(): KnowledgeEntry[] { + const database = initDatabase(); + const records = database.prepare("SELECT * FROM knowledge_entries ORDER BY id").all() as KnowledgeRecord[]; + + return records.map(record => ({ + id: record.id, + title: record.title, + content: record.content, + source: record.source || undefined, + tags: record.tags ? JSON.parse(record.tags) : undefined, + created_at: record.created_at, + updated_at: record.updated_at, + })); +} + +/** + * Get database statistics. + */ +export function getStats(): { + totalEntries: number; + tagCounts: Record; + oldestEntry: string | null; + newestEntry: string | null; +} { + const database = initDatabase(); + + const countResult = database.prepare("SELECT COUNT(*) as count FROM knowledge_entries").get() as { count: number }; + + const oldest = database.prepare("SELECT MIN(created_at) as oldest FROM knowledge_entries").get() as { oldest: string | null }; + const newest = database.prepare("SELECT MAX(created_at) as newest FROM knowledge_entries").get() as { newest: string | null }; + + // Count tags + const allTags = database.prepare("SELECT tags FROM knowledge_entries WHERE tags IS NOT NULL").all() as { tags: string }[]; + const tagCounts: Record = {}; + + for (const row of allTags) { + const tags = JSON.parse(row.tags) as string[]; + for (const tag of tags) { + tagCounts[tag] = (tagCounts[tag] || 0) + 1; + } + } + + return { + totalEntries: countResult.count, + tagCounts, + oldestEntry: oldest.oldest, + newestEntry: newest.newest, + }; +} + +/** + * Close the database connection. + */ +export function closeDatabase(): void { + if (db) { + db.close(); + db = null; + } +} diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..66c4260 --- /dev/null +++ b/src/index.ts @@ -0,0 +1,238 @@ +#!/usr/bin/env bun +/** + * Personal Knowledge CLI + * + * Command-line interface for managing personal knowledge entries. + */ +import { Command } from "commander"; +import { + addKnowledge, + searchKnowledge, + searchKnowledgeText, + getKnowledge, + updateKnowledge, + deleteKnowledge, + listKnowledge, + getKnowledgeStats, +} from "./services/knowledgeService.js"; +import { convertToVectorDB, getVectorStats, clearVectorDB } from "./services/vectorService.js"; + +const program = new Command(); + +program + .name("pk") + .description("Personal Knowledge CLI - Manage your knowledge base") + .version("1.0.0"); + +// Add command +program + .command("add") + .description("Add a new knowledge entry") + .argument("", "Entry title") + .argument("<content>", "Entry content") + .option("-s, --source <source>", "Source URL or reference") + .option("-t, --tags <tags>", "Comma-separated tags") + .action(async (title, content, options) => { + const tags = options.tags ? options.tags.split(",").map((t: string) => t.trim()) : undefined; + const result = await addKnowledge({ title, content, source: options.source, tags }); + console.log(`✅ Added entry #${result.id}: "${title}"`); + console.log(result.vectorized ? "📊 Indexed for semantic search" : "⚠️ Saved to database only"); + }); + +// Search command +program + .command("search") + .description("Search knowledge entries") + .argument("<query>", "Search query") + .option("-t, --text", "Use text search instead of semantic search") + .option("-l, --limit <limit>", "Maximum results", "5") + .action(async (query, options) => { + const limit = parseInt(options.limit); + + if (options.text) { + const results = searchKnowledgeText(query, limit); + if (results.length === 0) { + console.log("No results found."); + return; + } + console.log(`Found ${results.length} result(s):\n`); + for (const r of results) { + console.log(`[${r.id}] ${r.title}`); + console.log(` ${r.content.slice(0, 100)}...`); + if (r.tags) console.log(` Tags: ${r.tags.join(", ")}`); + console.log(); + } + } else { + try { + const results = await searchKnowledge(query, { limit }); + if (results.length === 0) { + console.log("No similar entries found."); + return; + } + console.log(`Found ${results.length} similar entries:\n`); + for (const r of results) { + const similarity = Math.round(r.score * 100); + console.log(`[${r.id}] ${r.title} (${similarity}% similar)`); + console.log(` ${r.content_preview.slice(0, 100)}...`); + if (r.tags.length > 0) console.log(` Tags: ${r.tags.join(", ")}`); + console.log(); + } + } catch (error) { + console.error("Semantic search failed. Try --text for keyword search."); + console.error(error instanceof Error ? error.message : error); + } + } + }); + +// Get command +program + .command("get") + .description("Get a knowledge entry by ID") + .argument("<id>", "Entry ID") + .action((id) => { + const entry = getKnowledge(parseInt(id)); + if (!entry) { + console.log(`No entry found with ID: ${id}`); + return; + } + console.log(`# ${entry.title}\n`); + console.log(`ID: ${entry.id}`); + console.log(`Created: ${entry.created_at}`); + console.log(`Updated: ${entry.updated_at}`); + if (entry.source) console.log(`Source: ${entry.source}`); + if (entry.tags) console.log(`Tags: ${entry.tags.join(", ")}`); + console.log(`\n${entry.content}`); + }); + +// Update command +program + .command("update") + .description("Update a knowledge entry") + .argument("<id>", "Entry ID") + .option("--title <title>", "New title") + .option("--content <content>", "New content") + .option("-s, --source <source>", "New source") + .option("-t, --tags <tags>", "New comma-separated tags") + .action(async (id, options) => { + const updates: Record<string, unknown> = {}; + if (options.title) updates.title = options.title; + if (options.content) updates.content = options.content; + if (options.source) updates.source = options.source; + if (options.tags) updates.tags = options.tags.split(",").map((t: string) => t.trim()); + + if (Object.keys(updates).length === 0) { + console.log("No updates provided."); + return; + } + + const result = await updateKnowledge(parseInt(id), updates); + if (!result.success) { + console.log(`No entry found with ID: ${id}`); + return; + } + console.log(`✅ Updated entry #${id}`); + console.log(result.vectorized ? "📊 Re-indexed" : "⚠️ Vector update failed"); + }); + +// Delete command +program + .command("delete") + .description("Delete a knowledge entry") + .argument("<id>", "Entry ID") + .action(async (id) => { + const success = await deleteKnowledge(parseInt(id)); + if (!success) { + console.log(`No entry found with ID: ${id}`); + return; + } + console.log(`✅ Deleted entry #${id}`); + }); + +// List command +program + .command("list") + .description("List knowledge entries") + .option("-l, --limit <limit>", "Maximum entries", "20") + .option("-o, --offset <offset>", "Offset for pagination", "0") + .option("-t, --tags <tags>", "Filter by comma-separated tags") + .action((options) => { + const limit = parseInt(options.limit); + const offset = parseInt(options.offset); + const tags = options.tags ? options.tags.split(",").map((t: string) => t.trim()) : undefined; + + const entries = listKnowledge({ limit, offset, tags }); + if (entries.length === 0) { + console.log("No entries found."); + return; + } + + console.log(`📚 Knowledge Entries (${entries.length}):\n`); + for (const e of entries) { + console.log(`[${e.id}] ${e.title}${e.tags ? ` [${e.tags.join(", ")}]` : ""}`); + } + }); + +// Stats command +program + .command("stats") + .description("Get knowledge base statistics") + .action(async () => { + const stats = await getKnowledgeStats(); + + console.log("📊 Knowledge Base Stats\n"); + console.log(`Total Entries: ${stats.database.totalEntries}`); + console.log(`Vectors Indexed: ${stats.vectors.totalVectors}`); + console.log(`Oldest: ${stats.database.oldestEntry || "N/A"}`); + console.log(`Newest: ${stats.database.newestEntry || "N/A"}`); + + const tags = Object.entries(stats.database.tagCounts).sort((a, b) => b[1] - a[1]); + if (tags.length > 0) { + console.log("\nTop Tags:"); + for (const [tag, count] of tags.slice(0, 10)) { + console.log(` ${tag}: ${count}`); + } + } + }); + +// Vectors subcommand +const vectors = program.command("vectors").description("Manage vector database"); + +vectors + .command("convert") + .description("Convert all entries to vector database") + .action(async () => { + console.log("Converting entries to vectors..."); + const result = await convertToVectorDB({ + onProgress: (current, total) => { + process.stdout.write(`\rProgress: ${current}/${total}`); + }, + }); + console.log(`\n✅ Converted ${result.converted} entries (${result.skipped} already indexed)`); + }); + +vectors + .command("stats") + .description("Get vector database statistics") + .action(async () => { + const stats = await getVectorStats(); + console.log("📊 Vector Database Stats\n"); + console.log(`Total Vectors: ${stats.totalVectors}`); + + const tags = Object.entries(stats.tagCounts).sort((a, b) => b[1] - a[1]); + if (tags.length > 0) { + console.log("\nTags in vectors:"); + for (const [tag, count] of tags.slice(0, 10)) { + console.log(` ${tag}: ${count}`); + } + } + }); + +vectors + .command("clear") + .description("Clear the vector database") + .action(async () => { + await clearVectorDB(); + console.log("✅ Vector database cleared"); + }); + +program.parse(); diff --git a/src/mcp-server.ts b/src/mcp-server.ts new file mode 100644 index 0000000..25f4e14 --- /dev/null +++ b/src/mcp-server.ts @@ -0,0 +1,304 @@ +#!/usr/bin/env bun +/** + * Personal Knowledge MCP Server + * + * Exposes knowledge database via Model Context Protocol for use by AI agents. + * + * Tools provided: + * - store_knowledge: Store a new knowledge entry + * - search_knowledge: Semantic search using vector embeddings + * - search_knowledge_text: Keyword-based text search + * - get_knowledge: Get a specific entry by ID + * - update_knowledge: Update an existing entry + * - delete_knowledge: Delete an entry + * - list_knowledge: List entries with filters + * - get_knowledge_stats: Get database statistics + */ +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; +import { z } from "zod"; +import { + addKnowledge, + searchKnowledge, + searchKnowledgeText, + getKnowledge, + updateKnowledge, + deleteKnowledge, + listKnowledge, + getKnowledgeStats, +} from "./services/knowledgeService.js"; + +// Create MCP server +const server = new McpServer({ + name: "personal-knowledge-mcp", + version: "1.0.0", +}); + +// Tool: Store knowledge +server.tool( + "store_knowledge", + "Store a new knowledge entry in your personal knowledge base. Use this to save important information, notes, or learnings for later retrieval.", + { + title: z.string().describe("Short descriptive title for the entry"), + content: z.string().describe("The full content/text of the knowledge entry"), + source: z.string().optional().describe("Optional source URL or reference"), + tags: z.array(z.string()).optional().describe("Optional tags for categorization (e.g., ['typescript', 'patterns'])"), + }, + async ({ title, content, source, tags }) => { + try { + const result = await addKnowledge({ title, content, source, tags }); + return { + content: [{ + type: "text", + text: `✅ Stored knowledge entry #${result.id}: "${title}"\n${result.vectorized ? "📊 Indexed for semantic search" : "⚠️ Saved to database only (vector indexing failed)"}`, + }], + }; + } catch (error) { + const message = error instanceof Error ? error.message : "Unknown error"; + return { + content: [{ type: "text", text: `❌ Failed to store knowledge: ${message}` }], + }; + } + } +); + +// Tool: Semantic search +server.tool( + "search_knowledge", + "Search your personal knowledge base using semantic similarity. Returns entries most similar in meaning to your query.", + { + query: z.string().describe("Search query to find similar knowledge entries"), + limit: z.number().optional().default(5).describe("Maximum number of results (default: 5)"), + }, + async ({ query, limit }) => { + try { + const results = await searchKnowledge(query, { limit, minScore: 0.3 }); + + if (results.length === 0) { + return { + content: [{ type: "text", text: "No similar knowledge entries found." }], + }; + } + + let output = `## Found ${results.length} similar entries:\n\n`; + for (let i = 0; i < results.length; i++) { + const r = results[i]; + const similarity = Math.round(r.score * 100); + output += `### ${i + 1}. ${r.title} (${similarity}% similar)\n`; + output += `**ID:** ${r.id}\n`; + if (r.tags.length > 0) { + output += `**Tags:** ${r.tags.join(", ")}\n`; + } + output += `\n${r.content_preview}...\n\n---\n\n`; + } + + return { + content: [{ type: "text", text: output }], + }; + } catch (error) { + const message = error instanceof Error ? error.message : "Unknown error"; + if (message.includes("not initialized")) { + return { + content: [{ + type: "text", + text: "Vector database not initialized. Use search_knowledge_text for keyword search, or add some entries first." + }], + }; + } + return { + content: [{ type: "text", text: `Search error: ${message}` }], + }; + } + } +); + +// Tool: Text search +server.tool( + "search_knowledge_text", + "Search knowledge entries by keyword (text-based, no semantic similarity). Good for exact matches.", + { + query: z.string().describe("Keywords to search for in titles and content"), + limit: z.number().optional().default(10).describe("Maximum number of results"), + }, + async ({ query, limit }) => { + const results = searchKnowledgeText(query, limit); + + if (results.length === 0) { + return { + content: [{ type: "text", text: `No results found for: "${query}"` }], + }; + } + + const formatted = results.map((r) => + `**${r.title}** (ID: ${r.id})\n${r.content.slice(0, 200)}...${r.tags ? `\nTags: ${r.tags.join(", ")}` : ""}` + ).join("\n\n---\n\n"); + + return { + content: [{ + type: "text", + text: `Found ${results.length} result(s) for "${query}":\n\n${formatted}` + }], + }; + } +); + +// Tool: Get by ID +server.tool( + "get_knowledge", + "Get a specific knowledge entry by its ID", + { + id: z.number().describe("The ID of the knowledge entry"), + }, + async ({ id }) => { + const entry = getKnowledge(id); + + if (!entry) { + return { + content: [{ type: "text", text: `No entry found with ID: ${id}` }], + }; + } + + return { + content: [{ + type: "text", + text: `# ${entry.title}\n\n**ID:** ${entry.id}\n**Created:** ${entry.created_at}\n**Updated:** ${entry.updated_at}${entry.source ? `\n**Source:** ${entry.source}` : ""}${entry.tags ? `\n**Tags:** ${entry.tags.join(", ")}` : ""}\n\n---\n\n${entry.content}`, + }], + }; + } +); + +// Tool: Update +server.tool( + "update_knowledge", + "Update an existing knowledge entry", + { + id: z.number().describe("The ID of the entry to update"), + title: z.string().optional().describe("New title"), + content: z.string().optional().describe("New content"), + source: z.string().optional().describe("New source"), + tags: z.array(z.string()).optional().describe("New tags (replaces existing)"), + }, + async ({ id, title, content, source, tags }) => { + const updates: Record<string, unknown> = {}; + if (title !== undefined) updates.title = title; + if (content !== undefined) updates.content = content; + if (source !== undefined) updates.source = source; + if (tags !== undefined) updates.tags = tags; + + if (Object.keys(updates).length === 0) { + return { + content: [{ type: "text", text: "No updates provided" }], + }; + } + + const result = await updateKnowledge(id, updates); + + if (!result.success) { + return { + content: [{ type: "text", text: `No entry found with ID: ${id}` }], + }; + } + + return { + content: [{ + type: "text", + text: `✅ Updated entry #${id}\n${result.vectorized ? "📊 Re-indexed for semantic search" : "⚠️ Database updated (vector re-indexing failed)"}` + }], + }; + } +); + +// Tool: Delete +server.tool( + "delete_knowledge", + "Delete a knowledge entry from the database", + { + id: z.number().describe("The ID of the entry to delete"), + }, + async ({ id }) => { + const success = await deleteKnowledge(id); + + if (!success) { + return { + content: [{ type: "text", text: `No entry found with ID: ${id}` }], + }; + } + + return { + content: [{ type: "text", text: `✅ Deleted entry #${id}` }], + }; + } +); + +// Tool: List +server.tool( + "list_knowledge", + "List knowledge entries with optional filtering", + { + limit: z.number().optional().default(20).describe("Maximum entries to return"), + offset: z.number().optional().default(0).describe("Offset for pagination"), + tags: z.array(z.string()).optional().describe("Filter by tags"), + }, + async ({ limit, offset, tags }) => { + const entries = listKnowledge({ limit, offset, tags }); + + if (entries.length === 0) { + return { + content: [{ type: "text", text: "No knowledge entries found." }], + }; + } + + const formatted = entries.map((e) => + `- **${e.title}** (ID: ${e.id})${e.tags ? ` [${e.tags.join(", ")}]` : ""}` + ).join("\n"); + + return { + content: [{ + type: "text", + text: `📚 Knowledge Entries (${entries.length}):\n\n${formatted}` + }], + }; + } +); + +// Tool: Stats +server.tool( + "get_knowledge_stats", + "Get statistics about your personal knowledge base", + {}, + async () => { + const stats = await getKnowledgeStats(); + + const tagList = Object.entries(stats.database.tagCounts) + .sort((a, b) => b[1] - a[1]) + .slice(0, 10) + .map(([tag, count]) => ` ${tag}: ${count}`) + .join("\n"); + + return { + content: [{ + type: "text", + text: `## Personal Knowledge Base Stats + +**Total Entries:** ${stats.database.totalEntries} +**Vectors Indexed:** ${stats.vectors.totalVectors} + +**Date Range:** + Oldest: ${stats.database.oldestEntry || "N/A"} + Newest: ${stats.database.newestEntry || "N/A"} + +**Top Tags:** +${tagList || " No tags yet"}`, + }], + }; + } +); + +// Start the server +async function main() { + const transport = new StdioServerTransport(); + await server.connect(transport); + console.error("Personal Knowledge MCP Server running on stdio"); +} + +main().catch(console.error); diff --git a/src/services/knowledgeService.ts b/src/services/knowledgeService.ts new file mode 100644 index 0000000..e2977de --- /dev/null +++ b/src/services/knowledgeService.ts @@ -0,0 +1,141 @@ +/** + * Knowledge Service + * + * Business logic coordinating database and vector operations. + */ +import { + saveKnowledgeEntry, + getKnowledgeEntry, + updateKnowledgeEntry, + deleteKnowledgeEntry, + listKnowledgeEntries, + searchKnowledgeByText, + getStats as getDbStats, + type KnowledgeEntry, +} from "../database/index.js"; +import { + queryVectors, + updateVector, + deleteVector, + getVectorStats, + type SearchResult, +} from "./vectorService.js"; + +export { type KnowledgeEntry }; + +/** + * Add a new knowledge entry with automatic vector indexing. + */ +export async function addKnowledge(entry: { + title: string; + content: string; + source?: string; + tags?: string[]; +}): Promise<{ id: number; vectorized: boolean }> { + // Save to SQLite + const id = saveKnowledgeEntry(entry); + + // Index in vector DB + let vectorized = false; + try { + const savedEntry = getKnowledgeEntry(id); + if (savedEntry) { + await updateVector(savedEntry); + vectorized = true; + } + } catch { + // Vector indexing failed, but entry is saved + console.error("Vector indexing failed, entry saved to database only"); + } + + return { id, vectorized }; +} + +/** + * Search knowledge using semantic similarity. + */ +export async function searchKnowledge( + query: string, + options: { limit?: number; minScore?: number } = {} +): Promise<SearchResult[]> { + return queryVectors(query, options); +} + +/** + * Search knowledge using text matching. + */ +export function searchKnowledgeText(query: string, limit = 10): KnowledgeEntry[] { + return searchKnowledgeByText(query, limit); +} + +/** + * Get a knowledge entry by ID. + */ +export function getKnowledge(id: number): KnowledgeEntry | null { + return getKnowledgeEntry(id); +} + +/** + * Update a knowledge entry with automatic vector re-indexing. + */ +export async function updateKnowledge( + id: number, + updates: Partial<Pick<KnowledgeEntry, "title" | "content" | "source" | "tags">> +): Promise<{ success: boolean; vectorized: boolean }> { + const success = updateKnowledgeEntry(id, updates); + + if (!success) { + return { success: false, vectorized: false }; + } + + // Re-index in vector DB + let vectorized = false; + try { + const updatedEntry = getKnowledgeEntry(id); + if (updatedEntry) { + await updateVector(updatedEntry); + vectorized = true; + } + } catch { + console.error("Vector re-indexing failed"); + } + + return { success, vectorized }; +} + +/** + * Delete a knowledge entry and its vector. + */ +export async function deleteKnowledge(id: number): Promise<boolean> { + // Delete vector first + try { + await deleteVector(id); + } catch { + // Continue even if vector deletion fails + } + + return deleteKnowledgeEntry(id); +} + +/** + * List knowledge entries with optional filters. + */ +export function listKnowledge(options: { + limit?: number; + offset?: number; + tags?: string[]; +}): KnowledgeEntry[] { + return listKnowledgeEntries(options); +} + +/** + * Get combined statistics. + */ +export async function getKnowledgeStats(): Promise<{ + database: ReturnType<typeof getDbStats>; + vectors: Awaited<ReturnType<typeof getVectorStats>>; +}> { + const database = getDbStats(); + const vectors = await getVectorStats(); + return { database, vectors }; +} diff --git a/src/services/vectorService.ts b/src/services/vectorService.ts new file mode 100644 index 0000000..0400d39 --- /dev/null +++ b/src/services/vectorService.ts @@ -0,0 +1,290 @@ +/** + * Vector Database Service + * + * Provides semantic search over knowledge entries using LanceDB and Transformers.js embeddings. + * Uses all-MiniLM-L6-v2 model which auto-downloads on first use (~22MB). + */ +import lancedb from "@lancedb/lancedb"; +import { pipeline, type FeatureExtractionPipeline } from "@xenova/transformers"; +import { join } from "path"; +import { existsSync, mkdirSync } from "fs"; +import { homedir } from "os"; +import { getAllEntries, type KnowledgeEntry } from "../database/index.js"; + +// Use persistent user data directory (XDG-compliant on Linux) +const DATA_DIR = process.env.OPENCODE_PK_DATA_DIR || join(homedir(), ".local", "share", "opencode-personal-knowledge"); +const VECTOR_DB_PATH = join(DATA_DIR, "vectors"); + +// Embedding model (auto-downloads on first use) +const EMBEDDING_MODEL = "Xenova/all-MiniLM-L6-v2"; + +// Singleton embedding pipeline +let embeddingPipeline: FeatureExtractionPipeline | null = null; + +/** + * Get or initialize the embedding pipeline. + */ +async function getEmbeddingPipeline(): Promise<FeatureExtractionPipeline> { + if (!embeddingPipeline) { + console.error("Loading embedding model (first run may download ~22MB)..."); + embeddingPipeline = await pipeline("feature-extraction", EMBEDDING_MODEL); + console.error("Embedding model loaded."); + } + return embeddingPipeline; +} + +/** + * Generate embedding for text. + */ +export async function embed(text: string): Promise<number[]> { + const extractor = await getEmbeddingPipeline(); + const output = await extractor(text, { pooling: "mean", normalize: true }); + return Array.from(output.data as Float32Array); +} + +/** + * Vector record with embedding. + */ +export interface VectorRecord { + [key: string]: unknown; + id: number; + title: string; + content_preview: string; + tags: string | null; + vector: number[]; +} + +/** + * Search result from vector query. + */ +export interface SearchResult { + id: number; + title: string; + content_preview: string; + tags: string[]; + score: number; +} + +/** + * Ensure data directory exists. + */ +function ensureDataDir(): void { + if (!existsSync(VECTOR_DB_PATH)) { + mkdirSync(VECTOR_DB_PATH, { recursive: true }); + } +} + +/** + * Get or create LanceDB connection. + */ +async function getVectorDB() { + ensureDataDir(); + return await lancedb.connect(VECTOR_DB_PATH); +} + +/** + * Convert all knowledge entries to vector database. + */ +export async function convertToVectorDB(options: { + batchSize?: number; + onProgress?: (current: number, total: number) => void; +} = {}): Promise<{ converted: number; skipped: number }> { + const { batchSize = 50, onProgress } = options; + + // Get all entries from SQLite + const entries = getAllEntries(); + + if (entries.length === 0) { + return { converted: 0, skipped: 0 }; + } + + const db = await getVectorDB(); + + // Check for existing table + const tables = await db.tableNames(); + let existingIds = new Set<number>(); + + if (tables.includes("knowledge_vectors")) { + const table = await db.openTable("knowledge_vectors"); + const existing = await table.query().select(["id"]).toArray(); + existingIds = new Set(existing.map((r: { id: number }) => r.id)); + } + + // Filter out already converted entries + const toConvert = entries.filter((e) => e.id && !existingIds.has(e.id)); + + if (toConvert.length === 0) { + return { converted: 0, skipped: entries.length }; + } + + // Process in batches + const vectorRecords: VectorRecord[] = []; + + for (let i = 0; i < toConvert.length; i += batchSize) { + const batch = toConvert.slice(i, i + batchSize); + + for (const entry of batch) { + // Combine title and content for embedding + const text = `${entry.title}\n${entry.content.slice(0, 1000)}`; + const vector = await embed(text); + + vectorRecords.push({ + id: entry.id!, + title: entry.title, + content_preview: entry.content.slice(0, 500), + tags: entry.tags ? JSON.stringify(entry.tags) : null, + vector, + }); + } + + onProgress?.(Math.min(i + batchSize, toConvert.length), toConvert.length); + } + + // Create or append to table + if (tables.includes("knowledge_vectors")) { + const table = await db.openTable("knowledge_vectors"); + await table.add(vectorRecords); + } else { + await db.createTable("knowledge_vectors", vectorRecords); + } + + return { converted: vectorRecords.length, skipped: existingIds.size }; +} + +/** + * Query the vector database for similar entries. + */ +export async function queryVectors( + query: string, + options: { + limit?: number; + minScore?: number; + } = {} +): Promise<SearchResult[]> { + const { limit = 5, minScore = 0.3 } = options; + + const db = await getVectorDB(); + const tables = await db.tableNames(); + + if (!tables.includes("knowledge_vectors")) { + throw new Error("Vector database not initialized. Run 'bun start vectors convert' first."); + } + + // Generate query embedding + const queryVector = await embed(query); + + // Search + const table = await db.openTable("knowledge_vectors"); + const results = await table + .vectorSearch(queryVector) + .limit(limit) + .toArray(); + + // Format and filter results + return results + .map((r: Record<string, unknown>) => ({ + id: r.id as number, + title: r.title as string, + content_preview: r.content_preview as string, + tags: r.tags ? JSON.parse(r.tags as string) : [], + score: 1 - (r._distance as number), // Convert distance to similarity score + })) + .filter((r) => r.score >= minScore); +} + +/** + * Delete a vector by entry ID. + */ +export async function deleteVector(id: number): Promise<boolean> { + const db = await getVectorDB(); + const tables = await db.tableNames(); + + if (!tables.includes("knowledge_vectors")) { + return false; + } + + const table = await db.openTable("knowledge_vectors"); + await table.delete(`id = ${id}`); + return true; +} + +/** + * Update vector for a single entry. + */ +export async function updateVector(entry: KnowledgeEntry): Promise<boolean> { + if (!entry.id) return false; + + // Delete old vector + await deleteVector(entry.id); + + // Create new vector + const db = await getVectorDB(); + const tables = await db.tableNames(); + + const text = `${entry.title}\n${entry.content.slice(0, 1000)}`; + const vector = await embed(text); + + const record: VectorRecord = { + id: entry.id, + title: entry.title, + content_preview: entry.content.slice(0, 500), + tags: entry.tags ? JSON.stringify(entry.tags) : null, + vector, + }; + + if (tables.includes("knowledge_vectors")) { + const table = await db.openTable("knowledge_vectors"); + await table.add([record]); + } else { + await db.createTable("knowledge_vectors", [record]); + } + + return true; +} + +/** + * Get vector database statistics. + */ +export async function getVectorStats(): Promise<{ + totalVectors: number; + tagCounts: Record<string, number>; +}> { + const db = await getVectorDB(); + const tables = await db.tableNames(); + + if (!tables.includes("knowledge_vectors")) { + return { totalVectors: 0, tagCounts: {} }; + } + + const table = await db.openTable("knowledge_vectors"); + const all = await table.query().select(["tags"]).toArray(); + + const tagCounts: Record<string, number> = {}; + + for (const r of all) { + const record = r as { tags: string | null }; + if (record.tags) { + const tags = JSON.parse(record.tags) as string[]; + for (const tag of tags) { + tagCounts[tag] = (tagCounts[tag] || 0) + 1; + } + } + } + + return { + totalVectors: all.length, + tagCounts, + }; +} + +/** + * Clear the vector database. + */ +export async function clearVectorDB(): Promise<void> { + const db = await getVectorDB(); + const tables = await db.tableNames(); + + if (tables.includes("knowledge_vectors")) { + await db.dropTable("knowledge_vectors"); + } +} diff --git a/tests/services/database.test.ts b/tests/services/database.test.ts new file mode 100644 index 0000000..944f994 --- /dev/null +++ b/tests/services/database.test.ts @@ -0,0 +1,91 @@ +import { describe, test, expect, beforeAll, afterAll } from "bun:test"; +import { Database } from "bun:sqlite"; +import { join } from "path"; +import { existsSync, unlinkSync, mkdirSync } from "fs"; + +// Test with a separate test database +const TEST_DIR = join(import.meta.dir, "../../test-data"); +const TEST_DB_PATH = join(TEST_DIR, "test_knowledge.db"); + +describe("database", () => { + let db: Database; + + beforeAll(() => { + if (!existsSync(TEST_DIR)) { + mkdirSync(TEST_DIR, { recursive: true }); + } + db = new Database(TEST_DB_PATH, { create: true }); + db.run(` + CREATE TABLE IF NOT EXISTS knowledge_entries ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + title TEXT NOT NULL, + content TEXT NOT NULL, + source TEXT, + tags TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL + ) + `); + }); + + afterAll(() => { + db.close(); + if (existsSync(TEST_DB_PATH)) { + unlinkSync(TEST_DB_PATH); + } + }); + + test("can insert a knowledge entry", () => { + const stmt = db.prepare(` + INSERT INTO knowledge_entries + (title, content, source, tags, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?) + `); + + const now = new Date().toISOString(); + stmt.run("Test Title", "Test content here", "https://example.com", '["test","demo"]', now, now); + + const count = db.prepare("SELECT COUNT(*) as count FROM knowledge_entries").get() as { count: number }; + expect(count.count).toBe(1); + }); + + test("can query entries", () => { + const records = db.prepare("SELECT * FROM knowledge_entries").all(); + expect(records.length).toBeGreaterThan(0); + }); + + test("can search by title", () => { + const records = db.prepare( + "SELECT * FROM knowledge_entries WHERE LOWER(title) LIKE ?" + ).all("%test%"); + expect(records.length).toBe(1); + }); + + test("can search by content", () => { + const records = db.prepare( + "SELECT * FROM knowledge_entries WHERE LOWER(content) LIKE ?" + ).all("%content%"); + expect(records.length).toBe(1); + }); + + test("can parse tags JSON", () => { + const record = db.prepare("SELECT tags FROM knowledge_entries WHERE id = 1").get() as { tags: string }; + const tags = JSON.parse(record.tags); + expect(tags).toEqual(["test", "demo"]); + }); + + test("can update an entry", () => { + const now = new Date().toISOString(); + db.prepare("UPDATE knowledge_entries SET title = ?, updated_at = ? WHERE id = ?").run("Updated Title", now, 1); + + const record = db.prepare("SELECT title FROM knowledge_entries WHERE id = 1").get() as { title: string }; + expect(record.title).toBe("Updated Title"); + }); + + test("can delete an entry", () => { + db.prepare("DELETE FROM knowledge_entries WHERE id = ?").run(1); + + const count = db.prepare("SELECT COUNT(*) as count FROM knowledge_entries").get() as { count: number }; + expect(count.count).toBe(0); + }); +}); diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..1afbb53 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,17 @@ +{ + "compilerOptions": { + "target": "ESNext", + "module": "ESNext", + "moduleResolution": "bundler", + "esModuleInterop": true, + "strict": true, + "skipLibCheck": true, + "resolveJsonModule": true, + "declaration": true, + "outDir": "./dist", + "rootDir": "./src", + "types": ["bun-types"] + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist", "tests"] +}