mirror of
https://github.com/NocturnLabs/opencode-personal-knowledge.git
synced 2026-06-02 06:03:47 +02:00
feat: Implement initial personal knowledge management system with vector search and MCP server integration.
This commit is contained in:
+21
@@ -0,0 +1,21 @@
|
||||
# Dependencies
|
||||
node_modules/
|
||||
bun.lock
|
||||
|
||||
# Build output
|
||||
dist/
|
||||
|
||||
# Runtime data
|
||||
data/
|
||||
|
||||
# IDE
|
||||
.vscode/
|
||||
.idea/
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Environment
|
||||
.env
|
||||
.env.local
|
||||
@@ -0,0 +1,91 @@
|
||||
# opencode-personal-knowledge
|
||||
|
||||
Personal knowledge MCP server with vector database for the Opencode ecosystem.
|
||||
|
||||
## Features
|
||||
|
||||
- **Semantic Search** — Find knowledge using vector embeddings
|
||||
- **Text Search** — Keyword-based search fallback
|
||||
- **Tag Organization** — Categorize entries with tags
|
||||
- **Plug-and-Play** — No external services required (embeddings run locally)
|
||||
|
||||
## Quick Start (Source installation - Testing)
|
||||
|
||||
```bash
|
||||
# Install dependencies
|
||||
bun install
|
||||
|
||||
# Run CLI
|
||||
bun start add "Title" "Content" --tags "ai,mcp"
|
||||
bun start search "query"
|
||||
|
||||
# Run MCP server - For Testing (Not Required for Opencode Integration will auto start on opencode load)
|
||||
bun run mcp
|
||||
```
|
||||
|
||||
## Opencode Integration (Recommended)
|
||||
|
||||
Add to `~/.config/opencode/opencode.jsonc`:
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"mcp": {
|
||||
"personal-knowledge": {
|
||||
"type": "local",
|
||||
"command": ["npx", "-y", "opencode-personal-knowledge"],
|
||||
"enabled": true
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## MCP Tools
|
||||
|
||||
| Tool | Description |
|
||||
| :---------------------- | :-------------------------- |
|
||||
| `store_knowledge` | Store a new knowledge entry |
|
||||
| `search_knowledge` | Semantic search |
|
||||
| `search_knowledge_text` | Keyword search |
|
||||
| `get_knowledge` | Get entry by ID |
|
||||
| `update_knowledge` | Update entry |
|
||||
| `delete_knowledge` | Delete entry |
|
||||
| `list_knowledge` | List entries |
|
||||
| `get_knowledge_stats` | Database stats |
|
||||
|
||||
## Example Usage
|
||||
|
||||
**User:** "store a knowledge entry about Opencode Features"
|
||||
|
||||
**Agent:** Researches and compiles entry, then calls `store_knowledge`:
|
||||
|
||||
```
|
||||
Tool: personal-knowledge_store_knowledge
|
||||
Title: "Opencode Features"
|
||||
Content: "Opencode is an open source AI coding agent that helps write code
|
||||
in terminals, IDEs, or desktops. Key features include: LSP-enabled,
|
||||
multi-session support, shareable session links, Claude Pro integration,
|
||||
75+ LLM providers via Models.dev, and availability across terminal,
|
||||
desktop app, and IDE extensions."
|
||||
Tags: ["opencode", "features", "ai-coding-agent"]
|
||||
```
|
||||
|
||||
**Result:** `✅ Stored knowledge entry #2: "Opencode Features" 📊 Indexed for semantic search`
|
||||
|
||||
---
|
||||
|
||||
**User:** "@search_knowledge for opencode"
|
||||
|
||||
**Agent:** Performs semantic search and returns matching entry:
|
||||
|
||||
```
|
||||
Found 1 similar entry:
|
||||
|
||||
### 1. Opencode Features (85% similar)
|
||||
Opencode is an open source AI coding agent that helps write code in
|
||||
terminals, IDEs, or desktops. Key features include: LSP-enabled,
|
||||
multi-session support, shareable session links, Claude Pro integration...
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
MIT
|
||||
@@ -0,0 +1,59 @@
|
||||
{
|
||||
"name": "opencode-personal-knowledge",
|
||||
"version": "1.0.0",
|
||||
"description": "Personal knowledge MCP server with vector database for the Opencode ecosystem",
|
||||
"type": "module",
|
||||
"main": "dist/mcp-server.js",
|
||||
"bin": {
|
||||
"opencode-personal-knowledge": "dist/mcp-server.js"
|
||||
},
|
||||
"files": [
|
||||
"dist",
|
||||
"README.md"
|
||||
],
|
||||
"scripts": {
|
||||
"start": "bun run src/index.ts",
|
||||
"dev": "bun --watch run src/index.ts",
|
||||
"mcp": "bun run src/mcp-server.ts",
|
||||
"build": "bun build src/mcp-server.ts --outdir=dist --target=node --format=esm && bun build src/index.ts --outdir=dist --target=node --format=esm",
|
||||
"prepublishOnly": "npm run build",
|
||||
"test": "bun test",
|
||||
"test:coverage": "bun test --coverage",
|
||||
"lint": "bunx @biomejs/biome check src/",
|
||||
"format": "bunx @biomejs/biome format --write src/"
|
||||
},
|
||||
"keywords": [
|
||||
"mcp",
|
||||
"model-context-protocol",
|
||||
"knowledge",
|
||||
"vector",
|
||||
"opencode",
|
||||
"ai",
|
||||
"embeddings",
|
||||
"semantic-search"
|
||||
],
|
||||
"author": "NocturnLabs - Yum (CodingInCarhartts)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/NocturnLabs/opencode-personal-knowledge"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@lancedb/lancedb": "^0.22.3",
|
||||
"@modelcontextprotocol/sdk": "^1.24.3",
|
||||
"@xenova/transformers": "^2.17.2",
|
||||
"commander": "^14.0.2",
|
||||
"zod": "^4.1.13"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@biomejs/biome": "^1.9.4",
|
||||
"@types/bun": "^1.1.14"
|
||||
},
|
||||
"trustedDependencies": [
|
||||
"@biomejs/biome",
|
||||
"protobufjs"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,280 @@
|
||||
/**
|
||||
* Database module for knowledge entries using Bun's native SQLite.
|
||||
*/
|
||||
import { Database } from "bun:sqlite";
|
||||
import { join } from "path";
|
||||
import { existsSync, mkdirSync } from "fs";
|
||||
import { homedir } from "os";
|
||||
|
||||
// Use persistent user data directory (XDG-compliant on Linux)
|
||||
const DATA_DIR = process.env.OPENCODE_PK_DATA_DIR || join(homedir(), ".local", "share", "opencode-personal-knowledge");
|
||||
const DB_PATH = join(DATA_DIR, "knowledge.db");
|
||||
|
||||
export interface KnowledgeEntry {
|
||||
id?: number;
|
||||
title: string;
|
||||
content: string;
|
||||
source?: string;
|
||||
tags?: string[];
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
}
|
||||
|
||||
export interface KnowledgeRecord {
|
||||
id: number;
|
||||
title: string;
|
||||
content: string;
|
||||
source: string | null;
|
||||
tags: string | null; // JSON string
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
}
|
||||
|
||||
let db: Database | null = null;
|
||||
|
||||
/**
|
||||
* Ensure data directory exists.
|
||||
*/
|
||||
function ensureDataDir(): void {
|
||||
if (!existsSync(DATA_DIR)) {
|
||||
mkdirSync(DATA_DIR, { recursive: true });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the database connection and create tables if needed.
|
||||
*/
|
||||
export function initDatabase(): Database {
|
||||
if (db) return db;
|
||||
|
||||
ensureDataDir();
|
||||
db = new Database(DB_PATH, { create: true });
|
||||
|
||||
db.run(`
|
||||
CREATE TABLE IF NOT EXISTS knowledge_entries (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
title TEXT NOT NULL,
|
||||
content TEXT NOT NULL,
|
||||
source TEXT,
|
||||
tags TEXT,
|
||||
created_at TEXT NOT NULL,
|
||||
updated_at TEXT NOT NULL
|
||||
)
|
||||
`);
|
||||
|
||||
// Create indexes
|
||||
db.run(`CREATE INDEX IF NOT EXISTS idx_tags ON knowledge_entries(tags)`);
|
||||
db.run(`CREATE INDEX IF NOT EXISTS idx_created ON knowledge_entries(created_at)`);
|
||||
|
||||
return db;
|
||||
}
|
||||
|
||||
/**
|
||||
* Save a new knowledge entry.
|
||||
*/
|
||||
export function saveKnowledgeEntry(entry: Omit<KnowledgeEntry, "id" | "created_at" | "updated_at">): number {
|
||||
const database = initDatabase();
|
||||
const now = new Date().toISOString();
|
||||
const tagsJson = entry.tags ? JSON.stringify(entry.tags) : null;
|
||||
|
||||
const stmt = database.prepare(`
|
||||
INSERT INTO knowledge_entries (title, content, source, tags, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
`);
|
||||
|
||||
const result = stmt.run(entry.title, entry.content, entry.source || null, tagsJson, now, now);
|
||||
return Number(result.lastInsertRowid);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a knowledge entry by ID.
|
||||
*/
|
||||
export function getKnowledgeEntry(id: number): KnowledgeEntry | null {
|
||||
const database = initDatabase();
|
||||
const record = database.prepare("SELECT * FROM knowledge_entries WHERE id = ?").get(id) as KnowledgeRecord | undefined;
|
||||
|
||||
if (!record) return null;
|
||||
|
||||
return {
|
||||
id: record.id,
|
||||
title: record.title,
|
||||
content: record.content,
|
||||
source: record.source || undefined,
|
||||
tags: record.tags ? JSON.parse(record.tags) : undefined,
|
||||
created_at: record.created_at,
|
||||
updated_at: record.updated_at,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Update an existing knowledge entry.
|
||||
*/
|
||||
export function updateKnowledgeEntry(
|
||||
id: number,
|
||||
updates: Partial<Pick<KnowledgeEntry, "title" | "content" | "source" | "tags">>
|
||||
): boolean {
|
||||
const database = initDatabase();
|
||||
const existing = getKnowledgeEntry(id);
|
||||
if (!existing) return false;
|
||||
|
||||
const now = new Date().toISOString();
|
||||
const newTitle = updates.title ?? existing.title;
|
||||
const newContent = updates.content ?? existing.content;
|
||||
const newSource = updates.source ?? existing.source ?? null;
|
||||
const newTags = updates.tags ? JSON.stringify(updates.tags) : (existing.tags ? JSON.stringify(existing.tags) : null);
|
||||
|
||||
const stmt = database.prepare(`
|
||||
UPDATE knowledge_entries
|
||||
SET title = ?, content = ?, source = ?, tags = ?, updated_at = ?
|
||||
WHERE id = ?
|
||||
`);
|
||||
|
||||
stmt.run(newTitle, newContent, newSource, newTags, now, id);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a knowledge entry.
|
||||
*/
|
||||
export function deleteKnowledgeEntry(id: number): boolean {
|
||||
const database = initDatabase();
|
||||
const result = database.prepare("DELETE FROM knowledge_entries WHERE id = ?").run(id);
|
||||
return result.changes > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* List knowledge entries with optional filters.
|
||||
*/
|
||||
export function listKnowledgeEntries(options: {
|
||||
limit?: number;
|
||||
offset?: number;
|
||||
tags?: string[];
|
||||
}): KnowledgeEntry[] {
|
||||
const database = initDatabase();
|
||||
const { limit = 20, offset = 0, tags } = options;
|
||||
|
||||
let sql = "SELECT * FROM knowledge_entries";
|
||||
const params: (string | number)[] = [];
|
||||
|
||||
if (tags && tags.length > 0) {
|
||||
// Search for any tag match in JSON array
|
||||
const tagConditions = tags.map(() => "tags LIKE ?").join(" OR ");
|
||||
sql += ` WHERE (${tagConditions})`;
|
||||
params.push(...tags.map(t => `%"${t}"%`));
|
||||
}
|
||||
|
||||
sql += " ORDER BY created_at DESC LIMIT ? OFFSET ?";
|
||||
params.push(limit, offset);
|
||||
|
||||
const records = database.prepare(sql).all(...params) as KnowledgeRecord[];
|
||||
|
||||
return records.map(record => ({
|
||||
id: record.id,
|
||||
title: record.title,
|
||||
content: record.content,
|
||||
source: record.source || undefined,
|
||||
tags: record.tags ? JSON.parse(record.tags) : undefined,
|
||||
created_at: record.created_at,
|
||||
updated_at: record.updated_at,
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* Search knowledge entries by text.
|
||||
*/
|
||||
export function searchKnowledgeByText(query: string, limit = 10): KnowledgeEntry[] {
|
||||
const database = initDatabase();
|
||||
|
||||
// Split query into words for OR search
|
||||
const words = query.toLowerCase().split(/\s+/).filter(w => w.length > 2);
|
||||
|
||||
if (words.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const conditions = words.map(() =>
|
||||
"(LOWER(title) LIKE ? OR LOWER(content) LIKE ?)"
|
||||
).join(" OR ");
|
||||
|
||||
const params = words.flatMap(w => [`%${w}%`, `%${w}%`]);
|
||||
|
||||
const records = database.prepare(`
|
||||
SELECT * FROM knowledge_entries
|
||||
WHERE ${conditions}
|
||||
ORDER BY updated_at DESC
|
||||
LIMIT ?
|
||||
`).all(...params, limit) as KnowledgeRecord[];
|
||||
|
||||
return records.map(record => ({
|
||||
id: record.id,
|
||||
title: record.title,
|
||||
content: record.content,
|
||||
source: record.source || undefined,
|
||||
tags: record.tags ? JSON.parse(record.tags) : undefined,
|
||||
created_at: record.created_at,
|
||||
updated_at: record.updated_at,
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all entries for vector conversion.
|
||||
*/
|
||||
export function getAllEntries(): KnowledgeEntry[] {
|
||||
const database = initDatabase();
|
||||
const records = database.prepare("SELECT * FROM knowledge_entries ORDER BY id").all() as KnowledgeRecord[];
|
||||
|
||||
return records.map(record => ({
|
||||
id: record.id,
|
||||
title: record.title,
|
||||
content: record.content,
|
||||
source: record.source || undefined,
|
||||
tags: record.tags ? JSON.parse(record.tags) : undefined,
|
||||
created_at: record.created_at,
|
||||
updated_at: record.updated_at,
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get database statistics.
|
||||
*/
|
||||
export function getStats(): {
|
||||
totalEntries: number;
|
||||
tagCounts: Record<string, number>;
|
||||
oldestEntry: string | null;
|
||||
newestEntry: string | null;
|
||||
} {
|
||||
const database = initDatabase();
|
||||
|
||||
const countResult = database.prepare("SELECT COUNT(*) as count FROM knowledge_entries").get() as { count: number };
|
||||
|
||||
const oldest = database.prepare("SELECT MIN(created_at) as oldest FROM knowledge_entries").get() as { oldest: string | null };
|
||||
const newest = database.prepare("SELECT MAX(created_at) as newest FROM knowledge_entries").get() as { newest: string | null };
|
||||
|
||||
// Count tags
|
||||
const allTags = database.prepare("SELECT tags FROM knowledge_entries WHERE tags IS NOT NULL").all() as { tags: string }[];
|
||||
const tagCounts: Record<string, number> = {};
|
||||
|
||||
for (const row of allTags) {
|
||||
const tags = JSON.parse(row.tags) as string[];
|
||||
for (const tag of tags) {
|
||||
tagCounts[tag] = (tagCounts[tag] || 0) + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
totalEntries: countResult.count,
|
||||
tagCounts,
|
||||
oldestEntry: oldest.oldest,
|
||||
newestEntry: newest.newest,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the database connection.
|
||||
*/
|
||||
export function closeDatabase(): void {
|
||||
if (db) {
|
||||
db.close();
|
||||
db = null;
|
||||
}
|
||||
}
|
||||
+238
@@ -0,0 +1,238 @@
|
||||
#!/usr/bin/env bun
|
||||
/**
|
||||
* Personal Knowledge CLI
|
||||
*
|
||||
* Command-line interface for managing personal knowledge entries.
|
||||
*/
|
||||
import { Command } from "commander";
|
||||
import {
|
||||
addKnowledge,
|
||||
searchKnowledge,
|
||||
searchKnowledgeText,
|
||||
getKnowledge,
|
||||
updateKnowledge,
|
||||
deleteKnowledge,
|
||||
listKnowledge,
|
||||
getKnowledgeStats,
|
||||
} from "./services/knowledgeService.js";
|
||||
import { convertToVectorDB, getVectorStats, clearVectorDB } from "./services/vectorService.js";
|
||||
|
||||
const program = new Command();
|
||||
|
||||
program
|
||||
.name("pk")
|
||||
.description("Personal Knowledge CLI - Manage your knowledge base")
|
||||
.version("1.0.0");
|
||||
|
||||
// Add command
|
||||
program
|
||||
.command("add")
|
||||
.description("Add a new knowledge entry")
|
||||
.argument("<title>", "Entry title")
|
||||
.argument("<content>", "Entry content")
|
||||
.option("-s, --source <source>", "Source URL or reference")
|
||||
.option("-t, --tags <tags>", "Comma-separated tags")
|
||||
.action(async (title, content, options) => {
|
||||
const tags = options.tags ? options.tags.split(",").map((t: string) => t.trim()) : undefined;
|
||||
const result = await addKnowledge({ title, content, source: options.source, tags });
|
||||
console.log(`✅ Added entry #${result.id}: "${title}"`);
|
||||
console.log(result.vectorized ? "📊 Indexed for semantic search" : "⚠️ Saved to database only");
|
||||
});
|
||||
|
||||
// Search command
|
||||
program
|
||||
.command("search")
|
||||
.description("Search knowledge entries")
|
||||
.argument("<query>", "Search query")
|
||||
.option("-t, --text", "Use text search instead of semantic search")
|
||||
.option("-l, --limit <limit>", "Maximum results", "5")
|
||||
.action(async (query, options) => {
|
||||
const limit = parseInt(options.limit);
|
||||
|
||||
if (options.text) {
|
||||
const results = searchKnowledgeText(query, limit);
|
||||
if (results.length === 0) {
|
||||
console.log("No results found.");
|
||||
return;
|
||||
}
|
||||
console.log(`Found ${results.length} result(s):\n`);
|
||||
for (const r of results) {
|
||||
console.log(`[${r.id}] ${r.title}`);
|
||||
console.log(` ${r.content.slice(0, 100)}...`);
|
||||
if (r.tags) console.log(` Tags: ${r.tags.join(", ")}`);
|
||||
console.log();
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
const results = await searchKnowledge(query, { limit });
|
||||
if (results.length === 0) {
|
||||
console.log("No similar entries found.");
|
||||
return;
|
||||
}
|
||||
console.log(`Found ${results.length} similar entries:\n`);
|
||||
for (const r of results) {
|
||||
const similarity = Math.round(r.score * 100);
|
||||
console.log(`[${r.id}] ${r.title} (${similarity}% similar)`);
|
||||
console.log(` ${r.content_preview.slice(0, 100)}...`);
|
||||
if (r.tags.length > 0) console.log(` Tags: ${r.tags.join(", ")}`);
|
||||
console.log();
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Semantic search failed. Try --text for keyword search.");
|
||||
console.error(error instanceof Error ? error.message : error);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Get command
|
||||
program
|
||||
.command("get")
|
||||
.description("Get a knowledge entry by ID")
|
||||
.argument("<id>", "Entry ID")
|
||||
.action((id) => {
|
||||
const entry = getKnowledge(parseInt(id));
|
||||
if (!entry) {
|
||||
console.log(`No entry found with ID: ${id}`);
|
||||
return;
|
||||
}
|
||||
console.log(`# ${entry.title}\n`);
|
||||
console.log(`ID: ${entry.id}`);
|
||||
console.log(`Created: ${entry.created_at}`);
|
||||
console.log(`Updated: ${entry.updated_at}`);
|
||||
if (entry.source) console.log(`Source: ${entry.source}`);
|
||||
if (entry.tags) console.log(`Tags: ${entry.tags.join(", ")}`);
|
||||
console.log(`\n${entry.content}`);
|
||||
});
|
||||
|
||||
// Update command
|
||||
program
|
||||
.command("update")
|
||||
.description("Update a knowledge entry")
|
||||
.argument("<id>", "Entry ID")
|
||||
.option("--title <title>", "New title")
|
||||
.option("--content <content>", "New content")
|
||||
.option("-s, --source <source>", "New source")
|
||||
.option("-t, --tags <tags>", "New comma-separated tags")
|
||||
.action(async (id, options) => {
|
||||
const updates: Record<string, unknown> = {};
|
||||
if (options.title) updates.title = options.title;
|
||||
if (options.content) updates.content = options.content;
|
||||
if (options.source) updates.source = options.source;
|
||||
if (options.tags) updates.tags = options.tags.split(",").map((t: string) => t.trim());
|
||||
|
||||
if (Object.keys(updates).length === 0) {
|
||||
console.log("No updates provided.");
|
||||
return;
|
||||
}
|
||||
|
||||
const result = await updateKnowledge(parseInt(id), updates);
|
||||
if (!result.success) {
|
||||
console.log(`No entry found with ID: ${id}`);
|
||||
return;
|
||||
}
|
||||
console.log(`✅ Updated entry #${id}`);
|
||||
console.log(result.vectorized ? "📊 Re-indexed" : "⚠️ Vector update failed");
|
||||
});
|
||||
|
||||
// Delete command
|
||||
program
|
||||
.command("delete")
|
||||
.description("Delete a knowledge entry")
|
||||
.argument("<id>", "Entry ID")
|
||||
.action(async (id) => {
|
||||
const success = await deleteKnowledge(parseInt(id));
|
||||
if (!success) {
|
||||
console.log(`No entry found with ID: ${id}`);
|
||||
return;
|
||||
}
|
||||
console.log(`✅ Deleted entry #${id}`);
|
||||
});
|
||||
|
||||
// List command
|
||||
program
|
||||
.command("list")
|
||||
.description("List knowledge entries")
|
||||
.option("-l, --limit <limit>", "Maximum entries", "20")
|
||||
.option("-o, --offset <offset>", "Offset for pagination", "0")
|
||||
.option("-t, --tags <tags>", "Filter by comma-separated tags")
|
||||
.action((options) => {
|
||||
const limit = parseInt(options.limit);
|
||||
const offset = parseInt(options.offset);
|
||||
const tags = options.tags ? options.tags.split(",").map((t: string) => t.trim()) : undefined;
|
||||
|
||||
const entries = listKnowledge({ limit, offset, tags });
|
||||
if (entries.length === 0) {
|
||||
console.log("No entries found.");
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`📚 Knowledge Entries (${entries.length}):\n`);
|
||||
for (const e of entries) {
|
||||
console.log(`[${e.id}] ${e.title}${e.tags ? ` [${e.tags.join(", ")}]` : ""}`);
|
||||
}
|
||||
});
|
||||
|
||||
// Stats command
|
||||
program
|
||||
.command("stats")
|
||||
.description("Get knowledge base statistics")
|
||||
.action(async () => {
|
||||
const stats = await getKnowledgeStats();
|
||||
|
||||
console.log("📊 Knowledge Base Stats\n");
|
||||
console.log(`Total Entries: ${stats.database.totalEntries}`);
|
||||
console.log(`Vectors Indexed: ${stats.vectors.totalVectors}`);
|
||||
console.log(`Oldest: ${stats.database.oldestEntry || "N/A"}`);
|
||||
console.log(`Newest: ${stats.database.newestEntry || "N/A"}`);
|
||||
|
||||
const tags = Object.entries(stats.database.tagCounts).sort((a, b) => b[1] - a[1]);
|
||||
if (tags.length > 0) {
|
||||
console.log("\nTop Tags:");
|
||||
for (const [tag, count] of tags.slice(0, 10)) {
|
||||
console.log(` ${tag}: ${count}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Vectors subcommand
|
||||
const vectors = program.command("vectors").description("Manage vector database");
|
||||
|
||||
vectors
|
||||
.command("convert")
|
||||
.description("Convert all entries to vector database")
|
||||
.action(async () => {
|
||||
console.log("Converting entries to vectors...");
|
||||
const result = await convertToVectorDB({
|
||||
onProgress: (current, total) => {
|
||||
process.stdout.write(`\rProgress: ${current}/${total}`);
|
||||
},
|
||||
});
|
||||
console.log(`\n✅ Converted ${result.converted} entries (${result.skipped} already indexed)`);
|
||||
});
|
||||
|
||||
vectors
|
||||
.command("stats")
|
||||
.description("Get vector database statistics")
|
||||
.action(async () => {
|
||||
const stats = await getVectorStats();
|
||||
console.log("📊 Vector Database Stats\n");
|
||||
console.log(`Total Vectors: ${stats.totalVectors}`);
|
||||
|
||||
const tags = Object.entries(stats.tagCounts).sort((a, b) => b[1] - a[1]);
|
||||
if (tags.length > 0) {
|
||||
console.log("\nTags in vectors:");
|
||||
for (const [tag, count] of tags.slice(0, 10)) {
|
||||
console.log(` ${tag}: ${count}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
vectors
|
||||
.command("clear")
|
||||
.description("Clear the vector database")
|
||||
.action(async () => {
|
||||
await clearVectorDB();
|
||||
console.log("✅ Vector database cleared");
|
||||
});
|
||||
|
||||
program.parse();
|
||||
@@ -0,0 +1,304 @@
|
||||
#!/usr/bin/env bun
|
||||
/**
|
||||
* Personal Knowledge MCP Server
|
||||
*
|
||||
* Exposes knowledge database via Model Context Protocol for use by AI agents.
|
||||
*
|
||||
* Tools provided:
|
||||
* - store_knowledge: Store a new knowledge entry
|
||||
* - search_knowledge: Semantic search using vector embeddings
|
||||
* - search_knowledge_text: Keyword-based text search
|
||||
* - get_knowledge: Get a specific entry by ID
|
||||
* - update_knowledge: Update an existing entry
|
||||
* - delete_knowledge: Delete an entry
|
||||
* - list_knowledge: List entries with filters
|
||||
* - get_knowledge_stats: Get database statistics
|
||||
*/
|
||||
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
||||
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
||||
import { z } from "zod";
|
||||
import {
|
||||
addKnowledge,
|
||||
searchKnowledge,
|
||||
searchKnowledgeText,
|
||||
getKnowledge,
|
||||
updateKnowledge,
|
||||
deleteKnowledge,
|
||||
listKnowledge,
|
||||
getKnowledgeStats,
|
||||
} from "./services/knowledgeService.js";
|
||||
|
||||
// Create MCP server
|
||||
const server = new McpServer({
|
||||
name: "personal-knowledge-mcp",
|
||||
version: "1.0.0",
|
||||
});
|
||||
|
||||
// Tool: Store knowledge
|
||||
server.tool(
|
||||
"store_knowledge",
|
||||
"Store a new knowledge entry in your personal knowledge base. Use this to save important information, notes, or learnings for later retrieval.",
|
||||
{
|
||||
title: z.string().describe("Short descriptive title for the entry"),
|
||||
content: z.string().describe("The full content/text of the knowledge entry"),
|
||||
source: z.string().optional().describe("Optional source URL or reference"),
|
||||
tags: z.array(z.string()).optional().describe("Optional tags for categorization (e.g., ['typescript', 'patterns'])"),
|
||||
},
|
||||
async ({ title, content, source, tags }) => {
|
||||
try {
|
||||
const result = await addKnowledge({ title, content, source, tags });
|
||||
return {
|
||||
content: [{
|
||||
type: "text",
|
||||
text: `✅ Stored knowledge entry #${result.id}: "${title}"\n${result.vectorized ? "📊 Indexed for semantic search" : "⚠️ Saved to database only (vector indexing failed)"}`,
|
||||
}],
|
||||
};
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : "Unknown error";
|
||||
return {
|
||||
content: [{ type: "text", text: `❌ Failed to store knowledge: ${message}` }],
|
||||
};
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// Tool: Semantic search
|
||||
server.tool(
|
||||
"search_knowledge",
|
||||
"Search your personal knowledge base using semantic similarity. Returns entries most similar in meaning to your query.",
|
||||
{
|
||||
query: z.string().describe("Search query to find similar knowledge entries"),
|
||||
limit: z.number().optional().default(5).describe("Maximum number of results (default: 5)"),
|
||||
},
|
||||
async ({ query, limit }) => {
|
||||
try {
|
||||
const results = await searchKnowledge(query, { limit, minScore: 0.3 });
|
||||
|
||||
if (results.length === 0) {
|
||||
return {
|
||||
content: [{ type: "text", text: "No similar knowledge entries found." }],
|
||||
};
|
||||
}
|
||||
|
||||
let output = `## Found ${results.length} similar entries:\n\n`;
|
||||
for (let i = 0; i < results.length; i++) {
|
||||
const r = results[i];
|
||||
const similarity = Math.round(r.score * 100);
|
||||
output += `### ${i + 1}. ${r.title} (${similarity}% similar)\n`;
|
||||
output += `**ID:** ${r.id}\n`;
|
||||
if (r.tags.length > 0) {
|
||||
output += `**Tags:** ${r.tags.join(", ")}\n`;
|
||||
}
|
||||
output += `\n${r.content_preview}...\n\n---\n\n`;
|
||||
}
|
||||
|
||||
return {
|
||||
content: [{ type: "text", text: output }],
|
||||
};
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : "Unknown error";
|
||||
if (message.includes("not initialized")) {
|
||||
return {
|
||||
content: [{
|
||||
type: "text",
|
||||
text: "Vector database not initialized. Use search_knowledge_text for keyword search, or add some entries first."
|
||||
}],
|
||||
};
|
||||
}
|
||||
return {
|
||||
content: [{ type: "text", text: `Search error: ${message}` }],
|
||||
};
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// Tool: Text search
|
||||
server.tool(
|
||||
"search_knowledge_text",
|
||||
"Search knowledge entries by keyword (text-based, no semantic similarity). Good for exact matches.",
|
||||
{
|
||||
query: z.string().describe("Keywords to search for in titles and content"),
|
||||
limit: z.number().optional().default(10).describe("Maximum number of results"),
|
||||
},
|
||||
async ({ query, limit }) => {
|
||||
const results = searchKnowledgeText(query, limit);
|
||||
|
||||
if (results.length === 0) {
|
||||
return {
|
||||
content: [{ type: "text", text: `No results found for: "${query}"` }],
|
||||
};
|
||||
}
|
||||
|
||||
const formatted = results.map((r) =>
|
||||
`**${r.title}** (ID: ${r.id})\n${r.content.slice(0, 200)}...${r.tags ? `\nTags: ${r.tags.join(", ")}` : ""}`
|
||||
).join("\n\n---\n\n");
|
||||
|
||||
return {
|
||||
content: [{
|
||||
type: "text",
|
||||
text: `Found ${results.length} result(s) for "${query}":\n\n${formatted}`
|
||||
}],
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
// Tool: Get by ID
|
||||
server.tool(
|
||||
"get_knowledge",
|
||||
"Get a specific knowledge entry by its ID",
|
||||
{
|
||||
id: z.number().describe("The ID of the knowledge entry"),
|
||||
},
|
||||
async ({ id }) => {
|
||||
const entry = getKnowledge(id);
|
||||
|
||||
if (!entry) {
|
||||
return {
|
||||
content: [{ type: "text", text: `No entry found with ID: ${id}` }],
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
content: [{
|
||||
type: "text",
|
||||
text: `# ${entry.title}\n\n**ID:** ${entry.id}\n**Created:** ${entry.created_at}\n**Updated:** ${entry.updated_at}${entry.source ? `\n**Source:** ${entry.source}` : ""}${entry.tags ? `\n**Tags:** ${entry.tags.join(", ")}` : ""}\n\n---\n\n${entry.content}`,
|
||||
}],
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
// Tool: Update
|
||||
server.tool(
|
||||
"update_knowledge",
|
||||
"Update an existing knowledge entry",
|
||||
{
|
||||
id: z.number().describe("The ID of the entry to update"),
|
||||
title: z.string().optional().describe("New title"),
|
||||
content: z.string().optional().describe("New content"),
|
||||
source: z.string().optional().describe("New source"),
|
||||
tags: z.array(z.string()).optional().describe("New tags (replaces existing)"),
|
||||
},
|
||||
async ({ id, title, content, source, tags }) => {
|
||||
const updates: Record<string, unknown> = {};
|
||||
if (title !== undefined) updates.title = title;
|
||||
if (content !== undefined) updates.content = content;
|
||||
if (source !== undefined) updates.source = source;
|
||||
if (tags !== undefined) updates.tags = tags;
|
||||
|
||||
if (Object.keys(updates).length === 0) {
|
||||
return {
|
||||
content: [{ type: "text", text: "No updates provided" }],
|
||||
};
|
||||
}
|
||||
|
||||
const result = await updateKnowledge(id, updates);
|
||||
|
||||
if (!result.success) {
|
||||
return {
|
||||
content: [{ type: "text", text: `No entry found with ID: ${id}` }],
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
content: [{
|
||||
type: "text",
|
||||
text: `✅ Updated entry #${id}\n${result.vectorized ? "📊 Re-indexed for semantic search" : "⚠️ Database updated (vector re-indexing failed)"}`
|
||||
}],
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
// Tool: Delete
|
||||
server.tool(
|
||||
"delete_knowledge",
|
||||
"Delete a knowledge entry from the database",
|
||||
{
|
||||
id: z.number().describe("The ID of the entry to delete"),
|
||||
},
|
||||
async ({ id }) => {
|
||||
const success = await deleteKnowledge(id);
|
||||
|
||||
if (!success) {
|
||||
return {
|
||||
content: [{ type: "text", text: `No entry found with ID: ${id}` }],
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
content: [{ type: "text", text: `✅ Deleted entry #${id}` }],
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
// Tool: List
|
||||
server.tool(
|
||||
"list_knowledge",
|
||||
"List knowledge entries with optional filtering",
|
||||
{
|
||||
limit: z.number().optional().default(20).describe("Maximum entries to return"),
|
||||
offset: z.number().optional().default(0).describe("Offset for pagination"),
|
||||
tags: z.array(z.string()).optional().describe("Filter by tags"),
|
||||
},
|
||||
async ({ limit, offset, tags }) => {
|
||||
const entries = listKnowledge({ limit, offset, tags });
|
||||
|
||||
if (entries.length === 0) {
|
||||
return {
|
||||
content: [{ type: "text", text: "No knowledge entries found." }],
|
||||
};
|
||||
}
|
||||
|
||||
const formatted = entries.map((e) =>
|
||||
`- **${e.title}** (ID: ${e.id})${e.tags ? ` [${e.tags.join(", ")}]` : ""}`
|
||||
).join("\n");
|
||||
|
||||
return {
|
||||
content: [{
|
||||
type: "text",
|
||||
text: `📚 Knowledge Entries (${entries.length}):\n\n${formatted}`
|
||||
}],
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
// Tool: Stats
|
||||
server.tool(
|
||||
"get_knowledge_stats",
|
||||
"Get statistics about your personal knowledge base",
|
||||
{},
|
||||
async () => {
|
||||
const stats = await getKnowledgeStats();
|
||||
|
||||
const tagList = Object.entries(stats.database.tagCounts)
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.slice(0, 10)
|
||||
.map(([tag, count]) => ` ${tag}: ${count}`)
|
||||
.join("\n");
|
||||
|
||||
return {
|
||||
content: [{
|
||||
type: "text",
|
||||
text: `## Personal Knowledge Base Stats
|
||||
|
||||
**Total Entries:** ${stats.database.totalEntries}
|
||||
**Vectors Indexed:** ${stats.vectors.totalVectors}
|
||||
|
||||
**Date Range:**
|
||||
Oldest: ${stats.database.oldestEntry || "N/A"}
|
||||
Newest: ${stats.database.newestEntry || "N/A"}
|
||||
|
||||
**Top Tags:**
|
||||
${tagList || " No tags yet"}`,
|
||||
}],
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
// Start the server
|
||||
async function main() {
|
||||
const transport = new StdioServerTransport();
|
||||
await server.connect(transport);
|
||||
console.error("Personal Knowledge MCP Server running on stdio");
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -0,0 +1,141 @@
|
||||
/**
|
||||
* Knowledge Service
|
||||
*
|
||||
* Business logic coordinating database and vector operations.
|
||||
*/
|
||||
import {
|
||||
saveKnowledgeEntry,
|
||||
getKnowledgeEntry,
|
||||
updateKnowledgeEntry,
|
||||
deleteKnowledgeEntry,
|
||||
listKnowledgeEntries,
|
||||
searchKnowledgeByText,
|
||||
getStats as getDbStats,
|
||||
type KnowledgeEntry,
|
||||
} from "../database/index.js";
|
||||
import {
|
||||
queryVectors,
|
||||
updateVector,
|
||||
deleteVector,
|
||||
getVectorStats,
|
||||
type SearchResult,
|
||||
} from "./vectorService.js";
|
||||
|
||||
export { type KnowledgeEntry };
|
||||
|
||||
/**
|
||||
* Add a new knowledge entry with automatic vector indexing.
|
||||
*/
|
||||
export async function addKnowledge(entry: {
|
||||
title: string;
|
||||
content: string;
|
||||
source?: string;
|
||||
tags?: string[];
|
||||
}): Promise<{ id: number; vectorized: boolean }> {
|
||||
// Save to SQLite
|
||||
const id = saveKnowledgeEntry(entry);
|
||||
|
||||
// Index in vector DB
|
||||
let vectorized = false;
|
||||
try {
|
||||
const savedEntry = getKnowledgeEntry(id);
|
||||
if (savedEntry) {
|
||||
await updateVector(savedEntry);
|
||||
vectorized = true;
|
||||
}
|
||||
} catch {
|
||||
// Vector indexing failed, but entry is saved
|
||||
console.error("Vector indexing failed, entry saved to database only");
|
||||
}
|
||||
|
||||
return { id, vectorized };
|
||||
}
|
||||
|
||||
/**
|
||||
* Search knowledge using semantic similarity.
|
||||
*/
|
||||
export async function searchKnowledge(
|
||||
query: string,
|
||||
options: { limit?: number; minScore?: number } = {}
|
||||
): Promise<SearchResult[]> {
|
||||
return queryVectors(query, options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Search knowledge using text matching.
|
||||
*/
|
||||
export function searchKnowledgeText(query: string, limit = 10): KnowledgeEntry[] {
|
||||
return searchKnowledgeByText(query, limit);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a knowledge entry by ID.
|
||||
*/
|
||||
export function getKnowledge(id: number): KnowledgeEntry | null {
|
||||
return getKnowledgeEntry(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Update a knowledge entry with automatic vector re-indexing.
|
||||
*/
|
||||
export async function updateKnowledge(
|
||||
id: number,
|
||||
updates: Partial<Pick<KnowledgeEntry, "title" | "content" | "source" | "tags">>
|
||||
): Promise<{ success: boolean; vectorized: boolean }> {
|
||||
const success = updateKnowledgeEntry(id, updates);
|
||||
|
||||
if (!success) {
|
||||
return { success: false, vectorized: false };
|
||||
}
|
||||
|
||||
// Re-index in vector DB
|
||||
let vectorized = false;
|
||||
try {
|
||||
const updatedEntry = getKnowledgeEntry(id);
|
||||
if (updatedEntry) {
|
||||
await updateVector(updatedEntry);
|
||||
vectorized = true;
|
||||
}
|
||||
} catch {
|
||||
console.error("Vector re-indexing failed");
|
||||
}
|
||||
|
||||
return { success, vectorized };
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a knowledge entry and its vector.
|
||||
*/
|
||||
export async function deleteKnowledge(id: number): Promise<boolean> {
|
||||
// Delete vector first
|
||||
try {
|
||||
await deleteVector(id);
|
||||
} catch {
|
||||
// Continue even if vector deletion fails
|
||||
}
|
||||
|
||||
return deleteKnowledgeEntry(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* List knowledge entries with optional filters.
|
||||
*/
|
||||
export function listKnowledge(options: {
|
||||
limit?: number;
|
||||
offset?: number;
|
||||
tags?: string[];
|
||||
}): KnowledgeEntry[] {
|
||||
return listKnowledgeEntries(options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get combined statistics.
|
||||
*/
|
||||
export async function getKnowledgeStats(): Promise<{
|
||||
database: ReturnType<typeof getDbStats>;
|
||||
vectors: Awaited<ReturnType<typeof getVectorStats>>;
|
||||
}> {
|
||||
const database = getDbStats();
|
||||
const vectors = await getVectorStats();
|
||||
return { database, vectors };
|
||||
}
|
||||
@@ -0,0 +1,290 @@
|
||||
/**
|
||||
* Vector Database Service
|
||||
*
|
||||
* Provides semantic search over knowledge entries using LanceDB and Transformers.js embeddings.
|
||||
* Uses all-MiniLM-L6-v2 model which auto-downloads on first use (~22MB).
|
||||
*/
|
||||
import lancedb from "@lancedb/lancedb";
|
||||
import { pipeline, type FeatureExtractionPipeline } from "@xenova/transformers";
|
||||
import { join } from "path";
|
||||
import { existsSync, mkdirSync } from "fs";
|
||||
import { homedir } from "os";
|
||||
import { getAllEntries, type KnowledgeEntry } from "../database/index.js";
|
||||
|
||||
// Use persistent user data directory (XDG-compliant on Linux)
|
||||
const DATA_DIR = process.env.OPENCODE_PK_DATA_DIR || join(homedir(), ".local", "share", "opencode-personal-knowledge");
|
||||
const VECTOR_DB_PATH = join(DATA_DIR, "vectors");
|
||||
|
||||
// Embedding model (auto-downloads on first use)
|
||||
const EMBEDDING_MODEL = "Xenova/all-MiniLM-L6-v2";
|
||||
|
||||
// Singleton embedding pipeline
|
||||
let embeddingPipeline: FeatureExtractionPipeline | null = null;
|
||||
|
||||
/**
|
||||
* Get or initialize the embedding pipeline.
|
||||
*/
|
||||
async function getEmbeddingPipeline(): Promise<FeatureExtractionPipeline> {
|
||||
if (!embeddingPipeline) {
|
||||
console.error("Loading embedding model (first run may download ~22MB)...");
|
||||
embeddingPipeline = await pipeline("feature-extraction", EMBEDDING_MODEL);
|
||||
console.error("Embedding model loaded.");
|
||||
}
|
||||
return embeddingPipeline;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate embedding for text.
|
||||
*/
|
||||
export async function embed(text: string): Promise<number[]> {
|
||||
const extractor = await getEmbeddingPipeline();
|
||||
const output = await extractor(text, { pooling: "mean", normalize: true });
|
||||
return Array.from(output.data as Float32Array);
|
||||
}
|
||||
|
||||
/**
|
||||
* Vector record with embedding.
|
||||
*/
|
||||
export interface VectorRecord {
|
||||
[key: string]: unknown;
|
||||
id: number;
|
||||
title: string;
|
||||
content_preview: string;
|
||||
tags: string | null;
|
||||
vector: number[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Search result from vector query.
|
||||
*/
|
||||
export interface SearchResult {
|
||||
id: number;
|
||||
title: string;
|
||||
content_preview: string;
|
||||
tags: string[];
|
||||
score: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure data directory exists.
|
||||
*/
|
||||
function ensureDataDir(): void {
|
||||
if (!existsSync(VECTOR_DB_PATH)) {
|
||||
mkdirSync(VECTOR_DB_PATH, { recursive: true });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get or create LanceDB connection.
|
||||
*/
|
||||
async function getVectorDB() {
|
||||
ensureDataDir();
|
||||
return await lancedb.connect(VECTOR_DB_PATH);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert all knowledge entries to vector database.
|
||||
*/
|
||||
export async function convertToVectorDB(options: {
|
||||
batchSize?: number;
|
||||
onProgress?: (current: number, total: number) => void;
|
||||
} = {}): Promise<{ converted: number; skipped: number }> {
|
||||
const { batchSize = 50, onProgress } = options;
|
||||
|
||||
// Get all entries from SQLite
|
||||
const entries = getAllEntries();
|
||||
|
||||
if (entries.length === 0) {
|
||||
return { converted: 0, skipped: 0 };
|
||||
}
|
||||
|
||||
const db = await getVectorDB();
|
||||
|
||||
// Check for existing table
|
||||
const tables = await db.tableNames();
|
||||
let existingIds = new Set<number>();
|
||||
|
||||
if (tables.includes("knowledge_vectors")) {
|
||||
const table = await db.openTable("knowledge_vectors");
|
||||
const existing = await table.query().select(["id"]).toArray();
|
||||
existingIds = new Set(existing.map((r: { id: number }) => r.id));
|
||||
}
|
||||
|
||||
// Filter out already converted entries
|
||||
const toConvert = entries.filter((e) => e.id && !existingIds.has(e.id));
|
||||
|
||||
if (toConvert.length === 0) {
|
||||
return { converted: 0, skipped: entries.length };
|
||||
}
|
||||
|
||||
// Process in batches
|
||||
const vectorRecords: VectorRecord[] = [];
|
||||
|
||||
for (let i = 0; i < toConvert.length; i += batchSize) {
|
||||
const batch = toConvert.slice(i, i + batchSize);
|
||||
|
||||
for (const entry of batch) {
|
||||
// Combine title and content for embedding
|
||||
const text = `${entry.title}\n${entry.content.slice(0, 1000)}`;
|
||||
const vector = await embed(text);
|
||||
|
||||
vectorRecords.push({
|
||||
id: entry.id!,
|
||||
title: entry.title,
|
||||
content_preview: entry.content.slice(0, 500),
|
||||
tags: entry.tags ? JSON.stringify(entry.tags) : null,
|
||||
vector,
|
||||
});
|
||||
}
|
||||
|
||||
onProgress?.(Math.min(i + batchSize, toConvert.length), toConvert.length);
|
||||
}
|
||||
|
||||
// Create or append to table
|
||||
if (tables.includes("knowledge_vectors")) {
|
||||
const table = await db.openTable("knowledge_vectors");
|
||||
await table.add(vectorRecords);
|
||||
} else {
|
||||
await db.createTable("knowledge_vectors", vectorRecords);
|
||||
}
|
||||
|
||||
return { converted: vectorRecords.length, skipped: existingIds.size };
|
||||
}
|
||||
|
||||
/**
|
||||
* Query the vector database for similar entries.
|
||||
*/
|
||||
export async function queryVectors(
|
||||
query: string,
|
||||
options: {
|
||||
limit?: number;
|
||||
minScore?: number;
|
||||
} = {}
|
||||
): Promise<SearchResult[]> {
|
||||
const { limit = 5, minScore = 0.3 } = options;
|
||||
|
||||
const db = await getVectorDB();
|
||||
const tables = await db.tableNames();
|
||||
|
||||
if (!tables.includes("knowledge_vectors")) {
|
||||
throw new Error("Vector database not initialized. Run 'bun start vectors convert' first.");
|
||||
}
|
||||
|
||||
// Generate query embedding
|
||||
const queryVector = await embed(query);
|
||||
|
||||
// Search
|
||||
const table = await db.openTable("knowledge_vectors");
|
||||
const results = await table
|
||||
.vectorSearch(queryVector)
|
||||
.limit(limit)
|
||||
.toArray();
|
||||
|
||||
// Format and filter results
|
||||
return results
|
||||
.map((r: Record<string, unknown>) => ({
|
||||
id: r.id as number,
|
||||
title: r.title as string,
|
||||
content_preview: r.content_preview as string,
|
||||
tags: r.tags ? JSON.parse(r.tags as string) : [],
|
||||
score: 1 - (r._distance as number), // Convert distance to similarity score
|
||||
}))
|
||||
.filter((r) => r.score >= minScore);
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a vector by entry ID.
|
||||
*/
|
||||
export async function deleteVector(id: number): Promise<boolean> {
|
||||
const db = await getVectorDB();
|
||||
const tables = await db.tableNames();
|
||||
|
||||
if (!tables.includes("knowledge_vectors")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const table = await db.openTable("knowledge_vectors");
|
||||
await table.delete(`id = ${id}`);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update vector for a single entry.
|
||||
*/
|
||||
export async function updateVector(entry: KnowledgeEntry): Promise<boolean> {
|
||||
if (!entry.id) return false;
|
||||
|
||||
// Delete old vector
|
||||
await deleteVector(entry.id);
|
||||
|
||||
// Create new vector
|
||||
const db = await getVectorDB();
|
||||
const tables = await db.tableNames();
|
||||
|
||||
const text = `${entry.title}\n${entry.content.slice(0, 1000)}`;
|
||||
const vector = await embed(text);
|
||||
|
||||
const record: VectorRecord = {
|
||||
id: entry.id,
|
||||
title: entry.title,
|
||||
content_preview: entry.content.slice(0, 500),
|
||||
tags: entry.tags ? JSON.stringify(entry.tags) : null,
|
||||
vector,
|
||||
};
|
||||
|
||||
if (tables.includes("knowledge_vectors")) {
|
||||
const table = await db.openTable("knowledge_vectors");
|
||||
await table.add([record]);
|
||||
} else {
|
||||
await db.createTable("knowledge_vectors", [record]);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get vector database statistics.
|
||||
*/
|
||||
export async function getVectorStats(): Promise<{
|
||||
totalVectors: number;
|
||||
tagCounts: Record<string, number>;
|
||||
}> {
|
||||
const db = await getVectorDB();
|
||||
const tables = await db.tableNames();
|
||||
|
||||
if (!tables.includes("knowledge_vectors")) {
|
||||
return { totalVectors: 0, tagCounts: {} };
|
||||
}
|
||||
|
||||
const table = await db.openTable("knowledge_vectors");
|
||||
const all = await table.query().select(["tags"]).toArray();
|
||||
|
||||
const tagCounts: Record<string, number> = {};
|
||||
|
||||
for (const r of all) {
|
||||
const record = r as { tags: string | null };
|
||||
if (record.tags) {
|
||||
const tags = JSON.parse(record.tags) as string[];
|
||||
for (const tag of tags) {
|
||||
tagCounts[tag] = (tagCounts[tag] || 0) + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
totalVectors: all.length,
|
||||
tagCounts,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear the vector database.
|
||||
*/
|
||||
export async function clearVectorDB(): Promise<void> {
|
||||
const db = await getVectorDB();
|
||||
const tables = await db.tableNames();
|
||||
|
||||
if (tables.includes("knowledge_vectors")) {
|
||||
await db.dropTable("knowledge_vectors");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,91 @@
|
||||
import { describe, test, expect, beforeAll, afterAll } from "bun:test";
|
||||
import { Database } from "bun:sqlite";
|
||||
import { join } from "path";
|
||||
import { existsSync, unlinkSync, mkdirSync } from "fs";
|
||||
|
||||
// Test with a separate test database
|
||||
const TEST_DIR = join(import.meta.dir, "../../test-data");
|
||||
const TEST_DB_PATH = join(TEST_DIR, "test_knowledge.db");
|
||||
|
||||
describe("database", () => {
|
||||
let db: Database;
|
||||
|
||||
beforeAll(() => {
|
||||
if (!existsSync(TEST_DIR)) {
|
||||
mkdirSync(TEST_DIR, { recursive: true });
|
||||
}
|
||||
db = new Database(TEST_DB_PATH, { create: true });
|
||||
db.run(`
|
||||
CREATE TABLE IF NOT EXISTS knowledge_entries (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
title TEXT NOT NULL,
|
||||
content TEXT NOT NULL,
|
||||
source TEXT,
|
||||
tags TEXT,
|
||||
created_at TEXT NOT NULL,
|
||||
updated_at TEXT NOT NULL
|
||||
)
|
||||
`);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
db.close();
|
||||
if (existsSync(TEST_DB_PATH)) {
|
||||
unlinkSync(TEST_DB_PATH);
|
||||
}
|
||||
});
|
||||
|
||||
test("can insert a knowledge entry", () => {
|
||||
const stmt = db.prepare(`
|
||||
INSERT INTO knowledge_entries
|
||||
(title, content, source, tags, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
`);
|
||||
|
||||
const now = new Date().toISOString();
|
||||
stmt.run("Test Title", "Test content here", "https://example.com", '["test","demo"]', now, now);
|
||||
|
||||
const count = db.prepare("SELECT COUNT(*) as count FROM knowledge_entries").get() as { count: number };
|
||||
expect(count.count).toBe(1);
|
||||
});
|
||||
|
||||
test("can query entries", () => {
|
||||
const records = db.prepare("SELECT * FROM knowledge_entries").all();
|
||||
expect(records.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test("can search by title", () => {
|
||||
const records = db.prepare(
|
||||
"SELECT * FROM knowledge_entries WHERE LOWER(title) LIKE ?"
|
||||
).all("%test%");
|
||||
expect(records.length).toBe(1);
|
||||
});
|
||||
|
||||
test("can search by content", () => {
|
||||
const records = db.prepare(
|
||||
"SELECT * FROM knowledge_entries WHERE LOWER(content) LIKE ?"
|
||||
).all("%content%");
|
||||
expect(records.length).toBe(1);
|
||||
});
|
||||
|
||||
test("can parse tags JSON", () => {
|
||||
const record = db.prepare("SELECT tags FROM knowledge_entries WHERE id = 1").get() as { tags: string };
|
||||
const tags = JSON.parse(record.tags);
|
||||
expect(tags).toEqual(["test", "demo"]);
|
||||
});
|
||||
|
||||
test("can update an entry", () => {
|
||||
const now = new Date().toISOString();
|
||||
db.prepare("UPDATE knowledge_entries SET title = ?, updated_at = ? WHERE id = ?").run("Updated Title", now, 1);
|
||||
|
||||
const record = db.prepare("SELECT title FROM knowledge_entries WHERE id = 1").get() as { title: string };
|
||||
expect(record.title).toBe("Updated Title");
|
||||
});
|
||||
|
||||
test("can delete an entry", () => {
|
||||
db.prepare("DELETE FROM knowledge_entries WHERE id = ?").run(1);
|
||||
|
||||
const count = db.prepare("SELECT COUNT(*) as count FROM knowledge_entries").get() as { count: number };
|
||||
expect(count.count).toBe(0);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ESNext",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "bundler",
|
||||
"esModuleInterop": true,
|
||||
"strict": true,
|
||||
"skipLibCheck": true,
|
||||
"resolveJsonModule": true,
|
||||
"declaration": true,
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src",
|
||||
"types": ["bun-types"]
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["node_modules", "dist", "tests"]
|
||||
}
|
||||
Reference in New Issue
Block a user