feat: Implement initial personal knowledge management system with vector search and MCP server integration.

This commit is contained in:
CodingInCarhartts
2025-12-14 12:31:01 -08:00
commit 22aeab952d
10 changed files with 1532 additions and 0 deletions
+21
View File
@@ -0,0 +1,21 @@
# Dependencies
node_modules/
bun.lock
# Build output
dist/
# Runtime data
data/
# IDE
.vscode/
.idea/
# OS
.DS_Store
Thumbs.db
# Environment
.env
.env.local
+91
View File
@@ -0,0 +1,91 @@
# opencode-personal-knowledge
Personal knowledge MCP server with vector database for the Opencode ecosystem.
## Features
- **Semantic Search** — Find knowledge using vector embeddings
- **Text Search** — Keyword-based search fallback
- **Tag Organization** — Categorize entries with tags
- **Plug-and-Play** — No external services required (embeddings run locally)
## Quick Start (Source installation - Testing)
```bash
# Install dependencies
bun install
# Run CLI
bun start add "Title" "Content" --tags "ai,mcp"
bun start search "query"
# Run MCP server - For Testing (Not Required for Opencode Integration will auto start on opencode load)
bun run mcp
```
## Opencode Integration (Recommended)
Add to `~/.config/opencode/opencode.jsonc`:
```jsonc
{
"mcp": {
"personal-knowledge": {
"type": "local",
"command": ["npx", "-y", "opencode-personal-knowledge"],
"enabled": true
}
}
}
```
## MCP Tools
| Tool | Description |
| :---------------------- | :-------------------------- |
| `store_knowledge` | Store a new knowledge entry |
| `search_knowledge` | Semantic search |
| `search_knowledge_text` | Keyword search |
| `get_knowledge` | Get entry by ID |
| `update_knowledge` | Update entry |
| `delete_knowledge` | Delete entry |
| `list_knowledge` | List entries |
| `get_knowledge_stats` | Database stats |
## Example Usage
**User:** "store a knowledge entry about Opencode Features"
**Agent:** Researches and compiles entry, then calls `store_knowledge`:
```
Tool: personal-knowledge_store_knowledge
Title: "Opencode Features"
Content: "Opencode is an open source AI coding agent that helps write code
in terminals, IDEs, or desktops. Key features include: LSP-enabled,
multi-session support, shareable session links, Claude Pro integration,
75+ LLM providers via Models.dev, and availability across terminal,
desktop app, and IDE extensions."
Tags: ["opencode", "features", "ai-coding-agent"]
```
**Result:** `✅ Stored knowledge entry #2: "Opencode Features" 📊 Indexed for semantic search`
---
**User:** "@search_knowledge for opencode"
**Agent:** Performs semantic search and returns matching entry:
```
Found 1 similar entry:
### 1. Opencode Features (85% similar)
Opencode is an open source AI coding agent that helps write code in
terminals, IDEs, or desktops. Key features include: LSP-enabled,
multi-session support, shareable session links, Claude Pro integration...
```
## License
MIT
+59
View File
@@ -0,0 +1,59 @@
{
"name": "opencode-personal-knowledge",
"version": "1.0.0",
"description": "Personal knowledge MCP server with vector database for the Opencode ecosystem",
"type": "module",
"main": "dist/mcp-server.js",
"bin": {
"opencode-personal-knowledge": "dist/mcp-server.js"
},
"files": [
"dist",
"README.md"
],
"scripts": {
"start": "bun run src/index.ts",
"dev": "bun --watch run src/index.ts",
"mcp": "bun run src/mcp-server.ts",
"build": "bun build src/mcp-server.ts --outdir=dist --target=node --format=esm && bun build src/index.ts --outdir=dist --target=node --format=esm",
"prepublishOnly": "npm run build",
"test": "bun test",
"test:coverage": "bun test --coverage",
"lint": "bunx @biomejs/biome check src/",
"format": "bunx @biomejs/biome format --write src/"
},
"keywords": [
"mcp",
"model-context-protocol",
"knowledge",
"vector",
"opencode",
"ai",
"embeddings",
"semantic-search"
],
"author": "NocturnLabs - Yum (CodingInCarhartts)",
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/NocturnLabs/opencode-personal-knowledge"
},
"engines": {
"node": ">=18.0.0"
},
"dependencies": {
"@lancedb/lancedb": "^0.22.3",
"@modelcontextprotocol/sdk": "^1.24.3",
"@xenova/transformers": "^2.17.2",
"commander": "^14.0.2",
"zod": "^4.1.13"
},
"devDependencies": {
"@biomejs/biome": "^1.9.4",
"@types/bun": "^1.1.14"
},
"trustedDependencies": [
"@biomejs/biome",
"protobufjs"
]
}
+280
View File
@@ -0,0 +1,280 @@
/**
* Database module for knowledge entries using Bun's native SQLite.
*/
import { Database } from "bun:sqlite";
import { join } from "path";
import { existsSync, mkdirSync } from "fs";
import { homedir } from "os";
// Use persistent user data directory (XDG-compliant on Linux)
const DATA_DIR = process.env.OPENCODE_PK_DATA_DIR || join(homedir(), ".local", "share", "opencode-personal-knowledge");
const DB_PATH = join(DATA_DIR, "knowledge.db");
export interface KnowledgeEntry {
id?: number;
title: string;
content: string;
source?: string;
tags?: string[];
created_at: string;
updated_at: string;
}
export interface KnowledgeRecord {
id: number;
title: string;
content: string;
source: string | null;
tags: string | null; // JSON string
created_at: string;
updated_at: string;
}
let db: Database | null = null;
/**
* Ensure data directory exists.
*/
function ensureDataDir(): void {
if (!existsSync(DATA_DIR)) {
mkdirSync(DATA_DIR, { recursive: true });
}
}
/**
* Initialize the database connection and create tables if needed.
*/
export function initDatabase(): Database {
if (db) return db;
ensureDataDir();
db = new Database(DB_PATH, { create: true });
db.run(`
CREATE TABLE IF NOT EXISTS knowledge_entries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT NOT NULL,
content TEXT NOT NULL,
source TEXT,
tags TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
)
`);
// Create indexes
db.run(`CREATE INDEX IF NOT EXISTS idx_tags ON knowledge_entries(tags)`);
db.run(`CREATE INDEX IF NOT EXISTS idx_created ON knowledge_entries(created_at)`);
return db;
}
/**
* Save a new knowledge entry.
*/
export function saveKnowledgeEntry(entry: Omit<KnowledgeEntry, "id" | "created_at" | "updated_at">): number {
const database = initDatabase();
const now = new Date().toISOString();
const tagsJson = entry.tags ? JSON.stringify(entry.tags) : null;
const stmt = database.prepare(`
INSERT INTO knowledge_entries (title, content, source, tags, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?)
`);
const result = stmt.run(entry.title, entry.content, entry.source || null, tagsJson, now, now);
return Number(result.lastInsertRowid);
}
/**
* Get a knowledge entry by ID.
*/
export function getKnowledgeEntry(id: number): KnowledgeEntry | null {
const database = initDatabase();
const record = database.prepare("SELECT * FROM knowledge_entries WHERE id = ?").get(id) as KnowledgeRecord | undefined;
if (!record) return null;
return {
id: record.id,
title: record.title,
content: record.content,
source: record.source || undefined,
tags: record.tags ? JSON.parse(record.tags) : undefined,
created_at: record.created_at,
updated_at: record.updated_at,
};
}
/**
* Update an existing knowledge entry.
*/
export function updateKnowledgeEntry(
id: number,
updates: Partial<Pick<KnowledgeEntry, "title" | "content" | "source" | "tags">>
): boolean {
const database = initDatabase();
const existing = getKnowledgeEntry(id);
if (!existing) return false;
const now = new Date().toISOString();
const newTitle = updates.title ?? existing.title;
const newContent = updates.content ?? existing.content;
const newSource = updates.source ?? existing.source ?? null;
const newTags = updates.tags ? JSON.stringify(updates.tags) : (existing.tags ? JSON.stringify(existing.tags) : null);
const stmt = database.prepare(`
UPDATE knowledge_entries
SET title = ?, content = ?, source = ?, tags = ?, updated_at = ?
WHERE id = ?
`);
stmt.run(newTitle, newContent, newSource, newTags, now, id);
return true;
}
/**
* Delete a knowledge entry.
*/
export function deleteKnowledgeEntry(id: number): boolean {
const database = initDatabase();
const result = database.prepare("DELETE FROM knowledge_entries WHERE id = ?").run(id);
return result.changes > 0;
}
/**
* List knowledge entries with optional filters.
*/
export function listKnowledgeEntries(options: {
limit?: number;
offset?: number;
tags?: string[];
}): KnowledgeEntry[] {
const database = initDatabase();
const { limit = 20, offset = 0, tags } = options;
let sql = "SELECT * FROM knowledge_entries";
const params: (string | number)[] = [];
if (tags && tags.length > 0) {
// Search for any tag match in JSON array
const tagConditions = tags.map(() => "tags LIKE ?").join(" OR ");
sql += ` WHERE (${tagConditions})`;
params.push(...tags.map(t => `%"${t}"%`));
}
sql += " ORDER BY created_at DESC LIMIT ? OFFSET ?";
params.push(limit, offset);
const records = database.prepare(sql).all(...params) as KnowledgeRecord[];
return records.map(record => ({
id: record.id,
title: record.title,
content: record.content,
source: record.source || undefined,
tags: record.tags ? JSON.parse(record.tags) : undefined,
created_at: record.created_at,
updated_at: record.updated_at,
}));
}
/**
* Search knowledge entries by text.
*/
export function searchKnowledgeByText(query: string, limit = 10): KnowledgeEntry[] {
const database = initDatabase();
// Split query into words for OR search
const words = query.toLowerCase().split(/\s+/).filter(w => w.length > 2);
if (words.length === 0) {
return [];
}
const conditions = words.map(() =>
"(LOWER(title) LIKE ? OR LOWER(content) LIKE ?)"
).join(" OR ");
const params = words.flatMap(w => [`%${w}%`, `%${w}%`]);
const records = database.prepare(`
SELECT * FROM knowledge_entries
WHERE ${conditions}
ORDER BY updated_at DESC
LIMIT ?
`).all(...params, limit) as KnowledgeRecord[];
return records.map(record => ({
id: record.id,
title: record.title,
content: record.content,
source: record.source || undefined,
tags: record.tags ? JSON.parse(record.tags) : undefined,
created_at: record.created_at,
updated_at: record.updated_at,
}));
}
/**
* Get all entries for vector conversion.
*/
export function getAllEntries(): KnowledgeEntry[] {
const database = initDatabase();
const records = database.prepare("SELECT * FROM knowledge_entries ORDER BY id").all() as KnowledgeRecord[];
return records.map(record => ({
id: record.id,
title: record.title,
content: record.content,
source: record.source || undefined,
tags: record.tags ? JSON.parse(record.tags) : undefined,
created_at: record.created_at,
updated_at: record.updated_at,
}));
}
/**
* Get database statistics.
*/
export function getStats(): {
totalEntries: number;
tagCounts: Record<string, number>;
oldestEntry: string | null;
newestEntry: string | null;
} {
const database = initDatabase();
const countResult = database.prepare("SELECT COUNT(*) as count FROM knowledge_entries").get() as { count: number };
const oldest = database.prepare("SELECT MIN(created_at) as oldest FROM knowledge_entries").get() as { oldest: string | null };
const newest = database.prepare("SELECT MAX(created_at) as newest FROM knowledge_entries").get() as { newest: string | null };
// Count tags
const allTags = database.prepare("SELECT tags FROM knowledge_entries WHERE tags IS NOT NULL").all() as { tags: string }[];
const tagCounts: Record<string, number> = {};
for (const row of allTags) {
const tags = JSON.parse(row.tags) as string[];
for (const tag of tags) {
tagCounts[tag] = (tagCounts[tag] || 0) + 1;
}
}
return {
totalEntries: countResult.count,
tagCounts,
oldestEntry: oldest.oldest,
newestEntry: newest.newest,
};
}
/**
* Close the database connection.
*/
export function closeDatabase(): void {
if (db) {
db.close();
db = null;
}
}
+238
View File
@@ -0,0 +1,238 @@
#!/usr/bin/env bun
/**
* Personal Knowledge CLI
*
* Command-line interface for managing personal knowledge entries.
*/
import { Command } from "commander";
import {
addKnowledge,
searchKnowledge,
searchKnowledgeText,
getKnowledge,
updateKnowledge,
deleteKnowledge,
listKnowledge,
getKnowledgeStats,
} from "./services/knowledgeService.js";
import { convertToVectorDB, getVectorStats, clearVectorDB } from "./services/vectorService.js";
const program = new Command();
program
.name("pk")
.description("Personal Knowledge CLI - Manage your knowledge base")
.version("1.0.0");
// Add command
program
.command("add")
.description("Add a new knowledge entry")
.argument("<title>", "Entry title")
.argument("<content>", "Entry content")
.option("-s, --source <source>", "Source URL or reference")
.option("-t, --tags <tags>", "Comma-separated tags")
.action(async (title, content, options) => {
const tags = options.tags ? options.tags.split(",").map((t: string) => t.trim()) : undefined;
const result = await addKnowledge({ title, content, source: options.source, tags });
console.log(`✅ Added entry #${result.id}: "${title}"`);
console.log(result.vectorized ? "📊 Indexed for semantic search" : "⚠️ Saved to database only");
});
// Search command
program
.command("search")
.description("Search knowledge entries")
.argument("<query>", "Search query")
.option("-t, --text", "Use text search instead of semantic search")
.option("-l, --limit <limit>", "Maximum results", "5")
.action(async (query, options) => {
const limit = parseInt(options.limit);
if (options.text) {
const results = searchKnowledgeText(query, limit);
if (results.length === 0) {
console.log("No results found.");
return;
}
console.log(`Found ${results.length} result(s):\n`);
for (const r of results) {
console.log(`[${r.id}] ${r.title}`);
console.log(` ${r.content.slice(0, 100)}...`);
if (r.tags) console.log(` Tags: ${r.tags.join(", ")}`);
console.log();
}
} else {
try {
const results = await searchKnowledge(query, { limit });
if (results.length === 0) {
console.log("No similar entries found.");
return;
}
console.log(`Found ${results.length} similar entries:\n`);
for (const r of results) {
const similarity = Math.round(r.score * 100);
console.log(`[${r.id}] ${r.title} (${similarity}% similar)`);
console.log(` ${r.content_preview.slice(0, 100)}...`);
if (r.tags.length > 0) console.log(` Tags: ${r.tags.join(", ")}`);
console.log();
}
} catch (error) {
console.error("Semantic search failed. Try --text for keyword search.");
console.error(error instanceof Error ? error.message : error);
}
}
});
// Get command
program
.command("get")
.description("Get a knowledge entry by ID")
.argument("<id>", "Entry ID")
.action((id) => {
const entry = getKnowledge(parseInt(id));
if (!entry) {
console.log(`No entry found with ID: ${id}`);
return;
}
console.log(`# ${entry.title}\n`);
console.log(`ID: ${entry.id}`);
console.log(`Created: ${entry.created_at}`);
console.log(`Updated: ${entry.updated_at}`);
if (entry.source) console.log(`Source: ${entry.source}`);
if (entry.tags) console.log(`Tags: ${entry.tags.join(", ")}`);
console.log(`\n${entry.content}`);
});
// Update command
program
.command("update")
.description("Update a knowledge entry")
.argument("<id>", "Entry ID")
.option("--title <title>", "New title")
.option("--content <content>", "New content")
.option("-s, --source <source>", "New source")
.option("-t, --tags <tags>", "New comma-separated tags")
.action(async (id, options) => {
const updates: Record<string, unknown> = {};
if (options.title) updates.title = options.title;
if (options.content) updates.content = options.content;
if (options.source) updates.source = options.source;
if (options.tags) updates.tags = options.tags.split(",").map((t: string) => t.trim());
if (Object.keys(updates).length === 0) {
console.log("No updates provided.");
return;
}
const result = await updateKnowledge(parseInt(id), updates);
if (!result.success) {
console.log(`No entry found with ID: ${id}`);
return;
}
console.log(`✅ Updated entry #${id}`);
console.log(result.vectorized ? "📊 Re-indexed" : "⚠️ Vector update failed");
});
// Delete command
program
.command("delete")
.description("Delete a knowledge entry")
.argument("<id>", "Entry ID")
.action(async (id) => {
const success = await deleteKnowledge(parseInt(id));
if (!success) {
console.log(`No entry found with ID: ${id}`);
return;
}
console.log(`✅ Deleted entry #${id}`);
});
// List command
program
.command("list")
.description("List knowledge entries")
.option("-l, --limit <limit>", "Maximum entries", "20")
.option("-o, --offset <offset>", "Offset for pagination", "0")
.option("-t, --tags <tags>", "Filter by comma-separated tags")
.action((options) => {
const limit = parseInt(options.limit);
const offset = parseInt(options.offset);
const tags = options.tags ? options.tags.split(",").map((t: string) => t.trim()) : undefined;
const entries = listKnowledge({ limit, offset, tags });
if (entries.length === 0) {
console.log("No entries found.");
return;
}
console.log(`📚 Knowledge Entries (${entries.length}):\n`);
for (const e of entries) {
console.log(`[${e.id}] ${e.title}${e.tags ? ` [${e.tags.join(", ")}]` : ""}`);
}
});
// Stats command
program
.command("stats")
.description("Get knowledge base statistics")
.action(async () => {
const stats = await getKnowledgeStats();
console.log("📊 Knowledge Base Stats\n");
console.log(`Total Entries: ${stats.database.totalEntries}`);
console.log(`Vectors Indexed: ${stats.vectors.totalVectors}`);
console.log(`Oldest: ${stats.database.oldestEntry || "N/A"}`);
console.log(`Newest: ${stats.database.newestEntry || "N/A"}`);
const tags = Object.entries(stats.database.tagCounts).sort((a, b) => b[1] - a[1]);
if (tags.length > 0) {
console.log("\nTop Tags:");
for (const [tag, count] of tags.slice(0, 10)) {
console.log(` ${tag}: ${count}`);
}
}
});
// Vectors subcommand
const vectors = program.command("vectors").description("Manage vector database");
vectors
.command("convert")
.description("Convert all entries to vector database")
.action(async () => {
console.log("Converting entries to vectors...");
const result = await convertToVectorDB({
onProgress: (current, total) => {
process.stdout.write(`\rProgress: ${current}/${total}`);
},
});
console.log(`\n✅ Converted ${result.converted} entries (${result.skipped} already indexed)`);
});
vectors
.command("stats")
.description("Get vector database statistics")
.action(async () => {
const stats = await getVectorStats();
console.log("📊 Vector Database Stats\n");
console.log(`Total Vectors: ${stats.totalVectors}`);
const tags = Object.entries(stats.tagCounts).sort((a, b) => b[1] - a[1]);
if (tags.length > 0) {
console.log("\nTags in vectors:");
for (const [tag, count] of tags.slice(0, 10)) {
console.log(` ${tag}: ${count}`);
}
}
});
vectors
.command("clear")
.description("Clear the vector database")
.action(async () => {
await clearVectorDB();
console.log("✅ Vector database cleared");
});
program.parse();
+304
View File
@@ -0,0 +1,304 @@
#!/usr/bin/env bun
/**
* Personal Knowledge MCP Server
*
* Exposes knowledge database via Model Context Protocol for use by AI agents.
*
* Tools provided:
* - store_knowledge: Store a new knowledge entry
* - search_knowledge: Semantic search using vector embeddings
* - search_knowledge_text: Keyword-based text search
* - get_knowledge: Get a specific entry by ID
* - update_knowledge: Update an existing entry
* - delete_knowledge: Delete an entry
* - list_knowledge: List entries with filters
* - get_knowledge_stats: Get database statistics
*/
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import { z } from "zod";
import {
addKnowledge,
searchKnowledge,
searchKnowledgeText,
getKnowledge,
updateKnowledge,
deleteKnowledge,
listKnowledge,
getKnowledgeStats,
} from "./services/knowledgeService.js";
// Create MCP server
const server = new McpServer({
name: "personal-knowledge-mcp",
version: "1.0.0",
});
// Tool: Store knowledge
server.tool(
"store_knowledge",
"Store a new knowledge entry in your personal knowledge base. Use this to save important information, notes, or learnings for later retrieval.",
{
title: z.string().describe("Short descriptive title for the entry"),
content: z.string().describe("The full content/text of the knowledge entry"),
source: z.string().optional().describe("Optional source URL or reference"),
tags: z.array(z.string()).optional().describe("Optional tags for categorization (e.g., ['typescript', 'patterns'])"),
},
async ({ title, content, source, tags }) => {
try {
const result = await addKnowledge({ title, content, source, tags });
return {
content: [{
type: "text",
text: `✅ Stored knowledge entry #${result.id}: "${title}"\n${result.vectorized ? "📊 Indexed for semantic search" : "⚠️ Saved to database only (vector indexing failed)"}`,
}],
};
} catch (error) {
const message = error instanceof Error ? error.message : "Unknown error";
return {
content: [{ type: "text", text: `❌ Failed to store knowledge: ${message}` }],
};
}
}
);
// Tool: Semantic search
server.tool(
"search_knowledge",
"Search your personal knowledge base using semantic similarity. Returns entries most similar in meaning to your query.",
{
query: z.string().describe("Search query to find similar knowledge entries"),
limit: z.number().optional().default(5).describe("Maximum number of results (default: 5)"),
},
async ({ query, limit }) => {
try {
const results = await searchKnowledge(query, { limit, minScore: 0.3 });
if (results.length === 0) {
return {
content: [{ type: "text", text: "No similar knowledge entries found." }],
};
}
let output = `## Found ${results.length} similar entries:\n\n`;
for (let i = 0; i < results.length; i++) {
const r = results[i];
const similarity = Math.round(r.score * 100);
output += `### ${i + 1}. ${r.title} (${similarity}% similar)\n`;
output += `**ID:** ${r.id}\n`;
if (r.tags.length > 0) {
output += `**Tags:** ${r.tags.join(", ")}\n`;
}
output += `\n${r.content_preview}...\n\n---\n\n`;
}
return {
content: [{ type: "text", text: output }],
};
} catch (error) {
const message = error instanceof Error ? error.message : "Unknown error";
if (message.includes("not initialized")) {
return {
content: [{
type: "text",
text: "Vector database not initialized. Use search_knowledge_text for keyword search, or add some entries first."
}],
};
}
return {
content: [{ type: "text", text: `Search error: ${message}` }],
};
}
}
);
// Tool: Text search
server.tool(
"search_knowledge_text",
"Search knowledge entries by keyword (text-based, no semantic similarity). Good for exact matches.",
{
query: z.string().describe("Keywords to search for in titles and content"),
limit: z.number().optional().default(10).describe("Maximum number of results"),
},
async ({ query, limit }) => {
const results = searchKnowledgeText(query, limit);
if (results.length === 0) {
return {
content: [{ type: "text", text: `No results found for: "${query}"` }],
};
}
const formatted = results.map((r) =>
`**${r.title}** (ID: ${r.id})\n${r.content.slice(0, 200)}...${r.tags ? `\nTags: ${r.tags.join(", ")}` : ""}`
).join("\n\n---\n\n");
return {
content: [{
type: "text",
text: `Found ${results.length} result(s) for "${query}":\n\n${formatted}`
}],
};
}
);
// Tool: Get by ID
server.tool(
"get_knowledge",
"Get a specific knowledge entry by its ID",
{
id: z.number().describe("The ID of the knowledge entry"),
},
async ({ id }) => {
const entry = getKnowledge(id);
if (!entry) {
return {
content: [{ type: "text", text: `No entry found with ID: ${id}` }],
};
}
return {
content: [{
type: "text",
text: `# ${entry.title}\n\n**ID:** ${entry.id}\n**Created:** ${entry.created_at}\n**Updated:** ${entry.updated_at}${entry.source ? `\n**Source:** ${entry.source}` : ""}${entry.tags ? `\n**Tags:** ${entry.tags.join(", ")}` : ""}\n\n---\n\n${entry.content}`,
}],
};
}
);
// Tool: Update
server.tool(
"update_knowledge",
"Update an existing knowledge entry",
{
id: z.number().describe("The ID of the entry to update"),
title: z.string().optional().describe("New title"),
content: z.string().optional().describe("New content"),
source: z.string().optional().describe("New source"),
tags: z.array(z.string()).optional().describe("New tags (replaces existing)"),
},
async ({ id, title, content, source, tags }) => {
const updates: Record<string, unknown> = {};
if (title !== undefined) updates.title = title;
if (content !== undefined) updates.content = content;
if (source !== undefined) updates.source = source;
if (tags !== undefined) updates.tags = tags;
if (Object.keys(updates).length === 0) {
return {
content: [{ type: "text", text: "No updates provided" }],
};
}
const result = await updateKnowledge(id, updates);
if (!result.success) {
return {
content: [{ type: "text", text: `No entry found with ID: ${id}` }],
};
}
return {
content: [{
type: "text",
text: `✅ Updated entry #${id}\n${result.vectorized ? "📊 Re-indexed for semantic search" : "⚠️ Database updated (vector re-indexing failed)"}`
}],
};
}
);
// Tool: Delete
server.tool(
"delete_knowledge",
"Delete a knowledge entry from the database",
{
id: z.number().describe("The ID of the entry to delete"),
},
async ({ id }) => {
const success = await deleteKnowledge(id);
if (!success) {
return {
content: [{ type: "text", text: `No entry found with ID: ${id}` }],
};
}
return {
content: [{ type: "text", text: `✅ Deleted entry #${id}` }],
};
}
);
// Tool: List
server.tool(
"list_knowledge",
"List knowledge entries with optional filtering",
{
limit: z.number().optional().default(20).describe("Maximum entries to return"),
offset: z.number().optional().default(0).describe("Offset for pagination"),
tags: z.array(z.string()).optional().describe("Filter by tags"),
},
async ({ limit, offset, tags }) => {
const entries = listKnowledge({ limit, offset, tags });
if (entries.length === 0) {
return {
content: [{ type: "text", text: "No knowledge entries found." }],
};
}
const formatted = entries.map((e) =>
`- **${e.title}** (ID: ${e.id})${e.tags ? ` [${e.tags.join(", ")}]` : ""}`
).join("\n");
return {
content: [{
type: "text",
text: `📚 Knowledge Entries (${entries.length}):\n\n${formatted}`
}],
};
}
);
// Tool: Stats
server.tool(
"get_knowledge_stats",
"Get statistics about your personal knowledge base",
{},
async () => {
const stats = await getKnowledgeStats();
const tagList = Object.entries(stats.database.tagCounts)
.sort((a, b) => b[1] - a[1])
.slice(0, 10)
.map(([tag, count]) => ` ${tag}: ${count}`)
.join("\n");
return {
content: [{
type: "text",
text: `## Personal Knowledge Base Stats
**Total Entries:** ${stats.database.totalEntries}
**Vectors Indexed:** ${stats.vectors.totalVectors}
**Date Range:**
Oldest: ${stats.database.oldestEntry || "N/A"}
Newest: ${stats.database.newestEntry || "N/A"}
**Top Tags:**
${tagList || " No tags yet"}`,
}],
};
}
);
// Start the server
async function main() {
const transport = new StdioServerTransport();
await server.connect(transport);
console.error("Personal Knowledge MCP Server running on stdio");
}
main().catch(console.error);
+141
View File
@@ -0,0 +1,141 @@
/**
* Knowledge Service
*
* Business logic coordinating database and vector operations.
*/
import {
saveKnowledgeEntry,
getKnowledgeEntry,
updateKnowledgeEntry,
deleteKnowledgeEntry,
listKnowledgeEntries,
searchKnowledgeByText,
getStats as getDbStats,
type KnowledgeEntry,
} from "../database/index.js";
import {
queryVectors,
updateVector,
deleteVector,
getVectorStats,
type SearchResult,
} from "./vectorService.js";
export { type KnowledgeEntry };
/**
* Add a new knowledge entry with automatic vector indexing.
*/
export async function addKnowledge(entry: {
title: string;
content: string;
source?: string;
tags?: string[];
}): Promise<{ id: number; vectorized: boolean }> {
// Save to SQLite
const id = saveKnowledgeEntry(entry);
// Index in vector DB
let vectorized = false;
try {
const savedEntry = getKnowledgeEntry(id);
if (savedEntry) {
await updateVector(savedEntry);
vectorized = true;
}
} catch {
// Vector indexing failed, but entry is saved
console.error("Vector indexing failed, entry saved to database only");
}
return { id, vectorized };
}
/**
* Search knowledge using semantic similarity.
*/
export async function searchKnowledge(
query: string,
options: { limit?: number; minScore?: number } = {}
): Promise<SearchResult[]> {
return queryVectors(query, options);
}
/**
* Search knowledge using text matching.
*/
export function searchKnowledgeText(query: string, limit = 10): KnowledgeEntry[] {
return searchKnowledgeByText(query, limit);
}
/**
* Get a knowledge entry by ID.
*/
export function getKnowledge(id: number): KnowledgeEntry | null {
return getKnowledgeEntry(id);
}
/**
* Update a knowledge entry with automatic vector re-indexing.
*/
export async function updateKnowledge(
id: number,
updates: Partial<Pick<KnowledgeEntry, "title" | "content" | "source" | "tags">>
): Promise<{ success: boolean; vectorized: boolean }> {
const success = updateKnowledgeEntry(id, updates);
if (!success) {
return { success: false, vectorized: false };
}
// Re-index in vector DB
let vectorized = false;
try {
const updatedEntry = getKnowledgeEntry(id);
if (updatedEntry) {
await updateVector(updatedEntry);
vectorized = true;
}
} catch {
console.error("Vector re-indexing failed");
}
return { success, vectorized };
}
/**
* Delete a knowledge entry and its vector.
*/
export async function deleteKnowledge(id: number): Promise<boolean> {
// Delete vector first
try {
await deleteVector(id);
} catch {
// Continue even if vector deletion fails
}
return deleteKnowledgeEntry(id);
}
/**
* List knowledge entries with optional filters.
*/
export function listKnowledge(options: {
limit?: number;
offset?: number;
tags?: string[];
}): KnowledgeEntry[] {
return listKnowledgeEntries(options);
}
/**
* Get combined statistics.
*/
export async function getKnowledgeStats(): Promise<{
database: ReturnType<typeof getDbStats>;
vectors: Awaited<ReturnType<typeof getVectorStats>>;
}> {
const database = getDbStats();
const vectors = await getVectorStats();
return { database, vectors };
}
+290
View File
@@ -0,0 +1,290 @@
/**
* Vector Database Service
*
* Provides semantic search over knowledge entries using LanceDB and Transformers.js embeddings.
* Uses all-MiniLM-L6-v2 model which auto-downloads on first use (~22MB).
*/
import lancedb from "@lancedb/lancedb";
import { pipeline, type FeatureExtractionPipeline } from "@xenova/transformers";
import { join } from "path";
import { existsSync, mkdirSync } from "fs";
import { homedir } from "os";
import { getAllEntries, type KnowledgeEntry } from "../database/index.js";
// Use persistent user data directory (XDG-compliant on Linux)
const DATA_DIR = process.env.OPENCODE_PK_DATA_DIR || join(homedir(), ".local", "share", "opencode-personal-knowledge");
const VECTOR_DB_PATH = join(DATA_DIR, "vectors");
// Embedding model (auto-downloads on first use)
const EMBEDDING_MODEL = "Xenova/all-MiniLM-L6-v2";
// Singleton embedding pipeline
let embeddingPipeline: FeatureExtractionPipeline | null = null;
/**
* Get or initialize the embedding pipeline.
*/
async function getEmbeddingPipeline(): Promise<FeatureExtractionPipeline> {
if (!embeddingPipeline) {
console.error("Loading embedding model (first run may download ~22MB)...");
embeddingPipeline = await pipeline("feature-extraction", EMBEDDING_MODEL);
console.error("Embedding model loaded.");
}
return embeddingPipeline;
}
/**
* Generate embedding for text.
*/
export async function embed(text: string): Promise<number[]> {
const extractor = await getEmbeddingPipeline();
const output = await extractor(text, { pooling: "mean", normalize: true });
return Array.from(output.data as Float32Array);
}
/**
* Vector record with embedding.
*/
export interface VectorRecord {
[key: string]: unknown;
id: number;
title: string;
content_preview: string;
tags: string | null;
vector: number[];
}
/**
* Search result from vector query.
*/
export interface SearchResult {
id: number;
title: string;
content_preview: string;
tags: string[];
score: number;
}
/**
* Ensure data directory exists.
*/
function ensureDataDir(): void {
if (!existsSync(VECTOR_DB_PATH)) {
mkdirSync(VECTOR_DB_PATH, { recursive: true });
}
}
/**
* Get or create LanceDB connection.
*/
async function getVectorDB() {
ensureDataDir();
return await lancedb.connect(VECTOR_DB_PATH);
}
/**
* Convert all knowledge entries to vector database.
*/
export async function convertToVectorDB(options: {
batchSize?: number;
onProgress?: (current: number, total: number) => void;
} = {}): Promise<{ converted: number; skipped: number }> {
const { batchSize = 50, onProgress } = options;
// Get all entries from SQLite
const entries = getAllEntries();
if (entries.length === 0) {
return { converted: 0, skipped: 0 };
}
const db = await getVectorDB();
// Check for existing table
const tables = await db.tableNames();
let existingIds = new Set<number>();
if (tables.includes("knowledge_vectors")) {
const table = await db.openTable("knowledge_vectors");
const existing = await table.query().select(["id"]).toArray();
existingIds = new Set(existing.map((r: { id: number }) => r.id));
}
// Filter out already converted entries
const toConvert = entries.filter((e) => e.id && !existingIds.has(e.id));
if (toConvert.length === 0) {
return { converted: 0, skipped: entries.length };
}
// Process in batches
const vectorRecords: VectorRecord[] = [];
for (let i = 0; i < toConvert.length; i += batchSize) {
const batch = toConvert.slice(i, i + batchSize);
for (const entry of batch) {
// Combine title and content for embedding
const text = `${entry.title}\n${entry.content.slice(0, 1000)}`;
const vector = await embed(text);
vectorRecords.push({
id: entry.id!,
title: entry.title,
content_preview: entry.content.slice(0, 500),
tags: entry.tags ? JSON.stringify(entry.tags) : null,
vector,
});
}
onProgress?.(Math.min(i + batchSize, toConvert.length), toConvert.length);
}
// Create or append to table
if (tables.includes("knowledge_vectors")) {
const table = await db.openTable("knowledge_vectors");
await table.add(vectorRecords);
} else {
await db.createTable("knowledge_vectors", vectorRecords);
}
return { converted: vectorRecords.length, skipped: existingIds.size };
}
/**
* Query the vector database for similar entries.
*/
export async function queryVectors(
query: string,
options: {
limit?: number;
minScore?: number;
} = {}
): Promise<SearchResult[]> {
const { limit = 5, minScore = 0.3 } = options;
const db = await getVectorDB();
const tables = await db.tableNames();
if (!tables.includes("knowledge_vectors")) {
throw new Error("Vector database not initialized. Run 'bun start vectors convert' first.");
}
// Generate query embedding
const queryVector = await embed(query);
// Search
const table = await db.openTable("knowledge_vectors");
const results = await table
.vectorSearch(queryVector)
.limit(limit)
.toArray();
// Format and filter results
return results
.map((r: Record<string, unknown>) => ({
id: r.id as number,
title: r.title as string,
content_preview: r.content_preview as string,
tags: r.tags ? JSON.parse(r.tags as string) : [],
score: 1 - (r._distance as number), // Convert distance to similarity score
}))
.filter((r) => r.score >= minScore);
}
/**
* Delete a vector by entry ID.
*/
export async function deleteVector(id: number): Promise<boolean> {
const db = await getVectorDB();
const tables = await db.tableNames();
if (!tables.includes("knowledge_vectors")) {
return false;
}
const table = await db.openTable("knowledge_vectors");
await table.delete(`id = ${id}`);
return true;
}
/**
* Update vector for a single entry.
*/
export async function updateVector(entry: KnowledgeEntry): Promise<boolean> {
if (!entry.id) return false;
// Delete old vector
await deleteVector(entry.id);
// Create new vector
const db = await getVectorDB();
const tables = await db.tableNames();
const text = `${entry.title}\n${entry.content.slice(0, 1000)}`;
const vector = await embed(text);
const record: VectorRecord = {
id: entry.id,
title: entry.title,
content_preview: entry.content.slice(0, 500),
tags: entry.tags ? JSON.stringify(entry.tags) : null,
vector,
};
if (tables.includes("knowledge_vectors")) {
const table = await db.openTable("knowledge_vectors");
await table.add([record]);
} else {
await db.createTable("knowledge_vectors", [record]);
}
return true;
}
/**
* Get vector database statistics.
*/
export async function getVectorStats(): Promise<{
totalVectors: number;
tagCounts: Record<string, number>;
}> {
const db = await getVectorDB();
const tables = await db.tableNames();
if (!tables.includes("knowledge_vectors")) {
return { totalVectors: 0, tagCounts: {} };
}
const table = await db.openTable("knowledge_vectors");
const all = await table.query().select(["tags"]).toArray();
const tagCounts: Record<string, number> = {};
for (const r of all) {
const record = r as { tags: string | null };
if (record.tags) {
const tags = JSON.parse(record.tags) as string[];
for (const tag of tags) {
tagCounts[tag] = (tagCounts[tag] || 0) + 1;
}
}
}
return {
totalVectors: all.length,
tagCounts,
};
}
/**
* Clear the vector database.
*/
export async function clearVectorDB(): Promise<void> {
const db = await getVectorDB();
const tables = await db.tableNames();
if (tables.includes("knowledge_vectors")) {
await db.dropTable("knowledge_vectors");
}
}
+91
View File
@@ -0,0 +1,91 @@
import { describe, test, expect, beforeAll, afterAll } from "bun:test";
import { Database } from "bun:sqlite";
import { join } from "path";
import { existsSync, unlinkSync, mkdirSync } from "fs";
// Test with a separate test database
const TEST_DIR = join(import.meta.dir, "../../test-data");
const TEST_DB_PATH = join(TEST_DIR, "test_knowledge.db");
describe("database", () => {
let db: Database;
beforeAll(() => {
if (!existsSync(TEST_DIR)) {
mkdirSync(TEST_DIR, { recursive: true });
}
db = new Database(TEST_DB_PATH, { create: true });
db.run(`
CREATE TABLE IF NOT EXISTS knowledge_entries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT NOT NULL,
content TEXT NOT NULL,
source TEXT,
tags TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
)
`);
});
afterAll(() => {
db.close();
if (existsSync(TEST_DB_PATH)) {
unlinkSync(TEST_DB_PATH);
}
});
test("can insert a knowledge entry", () => {
const stmt = db.prepare(`
INSERT INTO knowledge_entries
(title, content, source, tags, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?)
`);
const now = new Date().toISOString();
stmt.run("Test Title", "Test content here", "https://example.com", '["test","demo"]', now, now);
const count = db.prepare("SELECT COUNT(*) as count FROM knowledge_entries").get() as { count: number };
expect(count.count).toBe(1);
});
test("can query entries", () => {
const records = db.prepare("SELECT * FROM knowledge_entries").all();
expect(records.length).toBeGreaterThan(0);
});
test("can search by title", () => {
const records = db.prepare(
"SELECT * FROM knowledge_entries WHERE LOWER(title) LIKE ?"
).all("%test%");
expect(records.length).toBe(1);
});
test("can search by content", () => {
const records = db.prepare(
"SELECT * FROM knowledge_entries WHERE LOWER(content) LIKE ?"
).all("%content%");
expect(records.length).toBe(1);
});
test("can parse tags JSON", () => {
const record = db.prepare("SELECT tags FROM knowledge_entries WHERE id = 1").get() as { tags: string };
const tags = JSON.parse(record.tags);
expect(tags).toEqual(["test", "demo"]);
});
test("can update an entry", () => {
const now = new Date().toISOString();
db.prepare("UPDATE knowledge_entries SET title = ?, updated_at = ? WHERE id = ?").run("Updated Title", now, 1);
const record = db.prepare("SELECT title FROM knowledge_entries WHERE id = 1").get() as { title: string };
expect(record.title).toBe("Updated Title");
});
test("can delete an entry", () => {
db.prepare("DELETE FROM knowledge_entries WHERE id = ?").run(1);
const count = db.prepare("SELECT COUNT(*) as count FROM knowledge_entries").get() as { count: number };
expect(count.count).toBe(0);
});
});
+17
View File
@@ -0,0 +1,17 @@
{
"compilerOptions": {
"target": "ESNext",
"module": "ESNext",
"moduleResolution": "bundler",
"esModuleInterop": true,
"strict": true,
"skipLibCheck": true,
"resolveJsonModule": true,
"declaration": true,
"outDir": "./dist",
"rootDir": "./src",
"types": ["bun-types"]
},
"include": ["src/**/*"],
"exclude": ["node_modules", "dist", "tests"]
}