mirror of
https://github.com/obra/superpowers.git
synced 2026-04-26 19:49:08 +08:00
Initial commit: Superpowers plugin v1.0.0
Core skills library as Claude Code plugin: - Testing skills: TDD, async testing, anti-patterns - Debugging skills: Systematic debugging, root cause tracing - Collaboration skills: Brainstorming, planning, code review - Meta skills: Creating and testing skills Features: - SessionStart hook for context injection - Skills-search tool for discovery - Commands: /brainstorm, /write-plan, /execute-plan - Data directory at ~/.superpowers/
This commit is contained in:
@@ -0,0 +1,112 @@
|
||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||
import { initDatabase, migrateSchema, insertExchange } from './db.js';
|
||||
import { ConversationExchange } from './types.js';
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import os from 'os';
|
||||
import Database from 'better-sqlite3';
|
||||
|
||||
describe('database migration', () => {
|
||||
const testDir = path.join(os.tmpdir(), 'db-migration-test-' + Date.now());
|
||||
const dbPath = path.join(testDir, 'test.db');
|
||||
|
||||
beforeEach(() => {
|
||||
fs.mkdirSync(testDir, { recursive: true });
|
||||
process.env.TEST_DB_PATH = dbPath;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
delete process.env.TEST_DB_PATH;
|
||||
fs.rmSync(testDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('adds last_indexed column to existing database', () => {
|
||||
// Create a database with old schema (no last_indexed)
|
||||
const db = new Database(dbPath);
|
||||
db.exec(`
|
||||
CREATE TABLE exchanges (
|
||||
id TEXT PRIMARY KEY,
|
||||
project TEXT NOT NULL,
|
||||
timestamp TEXT NOT NULL,
|
||||
user_message TEXT NOT NULL,
|
||||
assistant_message TEXT NOT NULL,
|
||||
archive_path TEXT NOT NULL,
|
||||
line_start INTEGER NOT NULL,
|
||||
line_end INTEGER NOT NULL,
|
||||
embedding BLOB
|
||||
)
|
||||
`);
|
||||
|
||||
// Verify column doesn't exist
|
||||
const columnsBefore = db.prepare(`PRAGMA table_info(exchanges)`).all();
|
||||
const hasLastIndexedBefore = columnsBefore.some((col: any) => col.name === 'last_indexed');
|
||||
expect(hasLastIndexedBefore).toBe(false);
|
||||
|
||||
db.close();
|
||||
|
||||
// Run migration
|
||||
const migratedDb = initDatabase();
|
||||
|
||||
// Verify column now exists
|
||||
const columnsAfter = migratedDb.prepare(`PRAGMA table_info(exchanges)`).all();
|
||||
const hasLastIndexedAfter = columnsAfter.some((col: any) => col.name === 'last_indexed');
|
||||
expect(hasLastIndexedAfter).toBe(true);
|
||||
|
||||
migratedDb.close();
|
||||
});
|
||||
|
||||
it('handles existing last_indexed column gracefully', () => {
|
||||
// Create database with migration already applied
|
||||
const db = initDatabase();
|
||||
|
||||
// Run migration again - should not error
|
||||
expect(() => migrateSchema(db)).not.toThrow();
|
||||
|
||||
db.close();
|
||||
});
|
||||
});
|
||||
|
||||
describe('insertExchange with last_indexed', () => {
|
||||
const testDir = path.join(os.tmpdir(), 'insert-test-' + Date.now());
|
||||
const dbPath = path.join(testDir, 'test.db');
|
||||
|
||||
beforeEach(() => {
|
||||
fs.mkdirSync(testDir, { recursive: true });
|
||||
process.env.TEST_DB_PATH = dbPath;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
delete process.env.TEST_DB_PATH;
|
||||
fs.rmSync(testDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('sets last_indexed timestamp when inserting exchange', () => {
|
||||
const db = initDatabase();
|
||||
|
||||
const exchange: ConversationExchange = {
|
||||
id: 'test-id-1',
|
||||
project: 'test-project',
|
||||
timestamp: '2024-01-01T00:00:00Z',
|
||||
userMessage: 'Hello',
|
||||
assistantMessage: 'Hi there!',
|
||||
archivePath: '/test/path.jsonl',
|
||||
lineStart: 1,
|
||||
lineEnd: 2
|
||||
};
|
||||
|
||||
const beforeInsert = Date.now();
|
||||
// Create proper 384-dimensional embedding
|
||||
const embedding = new Array(384).fill(0.1);
|
||||
insertExchange(db, exchange, embedding);
|
||||
const afterInsert = Date.now();
|
||||
|
||||
// Query the exchange
|
||||
const row = db.prepare(`SELECT last_indexed FROM exchanges WHERE id = ?`).get('test-id-1') as any;
|
||||
|
||||
expect(row.last_indexed).toBeDefined();
|
||||
expect(row.last_indexed).toBeGreaterThanOrEqual(beforeInsert);
|
||||
expect(row.last_indexed).toBeLessThanOrEqual(afterInsert);
|
||||
|
||||
db.close();
|
||||
});
|
||||
});
|
||||
134
skills/collaboration/remembering-conversations/tool/src/db.ts
Normal file
134
skills/collaboration/remembering-conversations/tool/src/db.ts
Normal file
@@ -0,0 +1,134 @@
|
||||
import Database from 'better-sqlite3';
|
||||
import { ConversationExchange } from './types.js';
|
||||
import path from 'path';
|
||||
import os from 'os';
|
||||
import fs from 'fs';
|
||||
import * as sqliteVec from 'sqlite-vec';
|
||||
|
||||
function getDbPath(): string {
|
||||
return process.env.TEST_DB_PATH || path.join(os.homedir(), '.clank', 'conversation-index', 'db.sqlite');
|
||||
}
|
||||
|
||||
export function migrateSchema(db: Database.Database): void {
|
||||
const hasColumn = db.prepare(`
|
||||
SELECT COUNT(*) as count FROM pragma_table_info('exchanges')
|
||||
WHERE name='last_indexed'
|
||||
`).get() as { count: number };
|
||||
|
||||
if (hasColumn.count === 0) {
|
||||
console.log('Migrating schema: adding last_indexed column...');
|
||||
db.prepare('ALTER TABLE exchanges ADD COLUMN last_indexed INTEGER').run();
|
||||
console.log('Migration complete.');
|
||||
}
|
||||
}
|
||||
|
||||
export function initDatabase(): Database.Database {
|
||||
const dbPath = getDbPath();
|
||||
|
||||
// Ensure directory exists
|
||||
const dbDir = path.dirname(dbPath);
|
||||
if (!fs.existsSync(dbDir)) {
|
||||
fs.mkdirSync(dbDir, { recursive: true });
|
||||
}
|
||||
|
||||
const db = new Database(dbPath);
|
||||
|
||||
// Load sqlite-vec extension
|
||||
sqliteVec.load(db);
|
||||
|
||||
// Enable WAL mode for better concurrency
|
||||
db.pragma('journal_mode = WAL');
|
||||
|
||||
// Create exchanges table
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS exchanges (
|
||||
id TEXT PRIMARY KEY,
|
||||
project TEXT NOT NULL,
|
||||
timestamp TEXT NOT NULL,
|
||||
user_message TEXT NOT NULL,
|
||||
assistant_message TEXT NOT NULL,
|
||||
archive_path TEXT NOT NULL,
|
||||
line_start INTEGER NOT NULL,
|
||||
line_end INTEGER NOT NULL,
|
||||
embedding BLOB
|
||||
)
|
||||
`);
|
||||
|
||||
// Create vector search index
|
||||
db.exec(`
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS vec_exchanges USING vec0(
|
||||
id TEXT PRIMARY KEY,
|
||||
embedding FLOAT[384]
|
||||
)
|
||||
`);
|
||||
|
||||
// Create index on timestamp for sorting
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_timestamp ON exchanges(timestamp DESC)
|
||||
`);
|
||||
|
||||
// Run migrations
|
||||
migrateSchema(db);
|
||||
|
||||
return db;
|
||||
}
|
||||
|
||||
export function insertExchange(
|
||||
db: Database.Database,
|
||||
exchange: ConversationExchange,
|
||||
embedding: number[]
|
||||
): void {
|
||||
const now = Date.now();
|
||||
|
||||
const stmt = db.prepare(`
|
||||
INSERT OR REPLACE INTO exchanges
|
||||
(id, project, timestamp, user_message, assistant_message, archive_path, line_start, line_end, last_indexed)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`);
|
||||
|
||||
stmt.run(
|
||||
exchange.id,
|
||||
exchange.project,
|
||||
exchange.timestamp,
|
||||
exchange.userMessage,
|
||||
exchange.assistantMessage,
|
||||
exchange.archivePath,
|
||||
exchange.lineStart,
|
||||
exchange.lineEnd,
|
||||
now
|
||||
);
|
||||
|
||||
// Insert into vector table (delete first since virtual tables don't support REPLACE)
|
||||
const delStmt = db.prepare(`DELETE FROM vec_exchanges WHERE id = ?`);
|
||||
delStmt.run(exchange.id);
|
||||
|
||||
const vecStmt = db.prepare(`
|
||||
INSERT INTO vec_exchanges (id, embedding)
|
||||
VALUES (?, ?)
|
||||
`);
|
||||
|
||||
vecStmt.run(exchange.id, Buffer.from(new Float32Array(embedding).buffer));
|
||||
}
|
||||
|
||||
export function getAllExchanges(db: Database.Database): Array<{ id: string; archivePath: string }> {
|
||||
const stmt = db.prepare(`SELECT id, archive_path as archivePath FROM exchanges`);
|
||||
return stmt.all() as Array<{ id: string; archivePath: string }>;
|
||||
}
|
||||
|
||||
export function getFileLastIndexed(db: Database.Database, archivePath: string): number | null {
|
||||
const stmt = db.prepare(`
|
||||
SELECT MAX(last_indexed) as lastIndexed
|
||||
FROM exchanges
|
||||
WHERE archive_path = ?
|
||||
`);
|
||||
const row = stmt.get(archivePath) as { lastIndexed: number | null };
|
||||
return row.lastIndexed;
|
||||
}
|
||||
|
||||
export function deleteExchange(db: Database.Database, id: string): void {
|
||||
// Delete from vector table
|
||||
db.prepare(`DELETE FROM vec_exchanges WHERE id = ?`).run(id);
|
||||
|
||||
// Delete from main table
|
||||
db.prepare(`DELETE FROM exchanges WHERE id = ?`).run(id);
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
import { pipeline, Pipeline } from '@xenova/transformers';
|
||||
|
||||
let embeddingPipeline: Pipeline | null = null;
|
||||
|
||||
export async function initEmbeddings(): Promise<void> {
|
||||
if (!embeddingPipeline) {
|
||||
console.log('Loading embedding model (first run may take time)...');
|
||||
embeddingPipeline = await pipeline(
|
||||
'feature-extraction',
|
||||
'Xenova/all-MiniLM-L6-v2'
|
||||
);
|
||||
console.log('Embedding model loaded');
|
||||
}
|
||||
}
|
||||
|
||||
export async function generateEmbedding(text: string): Promise<number[]> {
|
||||
if (!embeddingPipeline) {
|
||||
await initEmbeddings();
|
||||
}
|
||||
|
||||
// Truncate text to avoid token limits (512 tokens max for this model)
|
||||
const truncated = text.substring(0, 2000);
|
||||
|
||||
const output = await embeddingPipeline!(truncated, {
|
||||
pooling: 'mean',
|
||||
normalize: true
|
||||
});
|
||||
|
||||
return Array.from(output.data);
|
||||
}
|
||||
|
||||
export async function generateExchangeEmbedding(
|
||||
userMessage: string,
|
||||
assistantMessage: string
|
||||
): Promise<number[]> {
|
||||
// Combine user question and assistant answer for better searchability
|
||||
const combined = `User: ${userMessage}\n\nAssistant: ${assistantMessage}`;
|
||||
return generateEmbedding(combined);
|
||||
}
|
||||
@@ -0,0 +1,115 @@
|
||||
#!/usr/bin/env node
|
||||
import { verifyIndex, repairIndex } from './verify.js';
|
||||
import { indexSession, indexUnprocessed, indexConversations } from './indexer.js';
|
||||
import { initDatabase } from './db.js';
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import os from 'os';
|
||||
|
||||
const command = process.argv[2];
|
||||
|
||||
// Parse --concurrency flag from remaining args
|
||||
function getConcurrency(): number {
|
||||
const concurrencyIndex = process.argv.findIndex(arg => arg === '--concurrency' || arg === '-c');
|
||||
if (concurrencyIndex !== -1 && process.argv[concurrencyIndex + 1]) {
|
||||
const value = parseInt(process.argv[concurrencyIndex + 1], 10);
|
||||
if (value >= 1 && value <= 16) return value;
|
||||
}
|
||||
return 1; // default
|
||||
}
|
||||
|
||||
const concurrency = getConcurrency();
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
switch (command) {
|
||||
case 'index-session':
|
||||
const sessionId = process.argv[3];
|
||||
if (!sessionId) {
|
||||
console.error('Usage: index-cli index-session <session-id>');
|
||||
process.exit(1);
|
||||
}
|
||||
await indexSession(sessionId, concurrency);
|
||||
break;
|
||||
|
||||
case 'index-cleanup':
|
||||
await indexUnprocessed(concurrency);
|
||||
break;
|
||||
|
||||
case 'verify':
|
||||
console.log('Verifying conversation index...');
|
||||
const issues = await verifyIndex();
|
||||
|
||||
console.log('\n=== Verification Results ===');
|
||||
console.log(`Missing summaries: ${issues.missing.length}`);
|
||||
console.log(`Orphaned entries: ${issues.orphaned.length}`);
|
||||
console.log(`Outdated files: ${issues.outdated.length}`);
|
||||
console.log(`Corrupted files: ${issues.corrupted.length}`);
|
||||
|
||||
if (issues.missing.length > 0) {
|
||||
console.log('\nMissing summaries:');
|
||||
issues.missing.forEach(m => console.log(` ${m.path}`));
|
||||
}
|
||||
|
||||
if (issues.missing.length + issues.orphaned.length + issues.outdated.length + issues.corrupted.length > 0) {
|
||||
console.log('\nRun with --repair to fix these issues.');
|
||||
process.exit(1);
|
||||
} else {
|
||||
console.log('\n✅ Index is healthy!');
|
||||
}
|
||||
break;
|
||||
|
||||
case 'repair':
|
||||
console.log('Verifying conversation index...');
|
||||
const repairIssues = await verifyIndex();
|
||||
|
||||
if (repairIssues.missing.length + repairIssues.orphaned.length + repairIssues.outdated.length > 0) {
|
||||
await repairIndex(repairIssues);
|
||||
} else {
|
||||
console.log('✅ No issues to repair!');
|
||||
}
|
||||
break;
|
||||
|
||||
case 'rebuild':
|
||||
console.log('Rebuilding entire index...');
|
||||
|
||||
// Delete database
|
||||
const dbPath = path.join(os.homedir(), '.clank', 'conversation-index', 'db.sqlite');
|
||||
if (fs.existsSync(dbPath)) {
|
||||
fs.unlinkSync(dbPath);
|
||||
console.log('Deleted existing database');
|
||||
}
|
||||
|
||||
// Delete all summary files
|
||||
const archiveDir = path.join(os.homedir(), '.clank', 'conversation-archive');
|
||||
if (fs.existsSync(archiveDir)) {
|
||||
const projects = fs.readdirSync(archiveDir);
|
||||
for (const project of projects) {
|
||||
const projectPath = path.join(archiveDir, project);
|
||||
if (!fs.statSync(projectPath).isDirectory()) continue;
|
||||
|
||||
const summaries = fs.readdirSync(projectPath).filter(f => f.endsWith('-summary.txt'));
|
||||
for (const summary of summaries) {
|
||||
fs.unlinkSync(path.join(projectPath, summary));
|
||||
}
|
||||
}
|
||||
console.log('Deleted all summary files');
|
||||
}
|
||||
|
||||
// Re-index everything
|
||||
console.log('Re-indexing all conversations...');
|
||||
await indexConversations(undefined, undefined, concurrency);
|
||||
break;
|
||||
|
||||
case 'index-all':
|
||||
default:
|
||||
await indexConversations(undefined, undefined, concurrency);
|
||||
break;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error:', error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
@@ -0,0 +1,356 @@
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import os from 'os';
|
||||
import { initDatabase, insertExchange } from './db.js';
|
||||
import { parseConversation } from './parser.js';
|
||||
import { initEmbeddings, generateExchangeEmbedding } from './embeddings.js';
|
||||
import { summarizeConversation } from './summarizer.js';
|
||||
import { ConversationExchange } from './types.js';
|
||||
|
||||
// Set max output tokens for Claude SDK (used by summarizer)
|
||||
process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS = '20000';
|
||||
|
||||
// Increase max listeners for concurrent API calls
|
||||
import { EventEmitter } from 'events';
|
||||
EventEmitter.defaultMaxListeners = 20;
|
||||
|
||||
// Allow overriding paths for testing
|
||||
function getProjectsDir(): string {
|
||||
return process.env.TEST_PROJECTS_DIR || path.join(os.homedir(), '.claude', 'projects');
|
||||
}
|
||||
|
||||
function getArchiveDir(): string {
|
||||
return process.env.TEST_ARCHIVE_DIR || path.join(os.homedir(), '.clank', 'conversation-archive');
|
||||
}
|
||||
|
||||
// Projects to exclude from indexing (configurable via env or config file)
|
||||
function getExcludedProjects(): string[] {
|
||||
// Check env variable first
|
||||
if (process.env.CONVERSATION_SEARCH_EXCLUDE_PROJECTS) {
|
||||
return process.env.CONVERSATION_SEARCH_EXCLUDE_PROJECTS.split(',').map(p => p.trim());
|
||||
}
|
||||
|
||||
// Check for config file
|
||||
const configPath = path.join(os.homedir(), '.clank', 'conversation-index', 'exclude.txt');
|
||||
if (fs.existsSync(configPath)) {
|
||||
const content = fs.readFileSync(configPath, 'utf-8');
|
||||
return content.split('\n').map(line => line.trim()).filter(line => line && !line.startsWith('#'));
|
||||
}
|
||||
|
||||
// Default: no exclusions
|
||||
return [];
|
||||
}
|
||||
|
||||
// Process items in batches with limited concurrency
|
||||
async function processBatch<T, R>(
|
||||
items: T[],
|
||||
processor: (item: T) => Promise<R>,
|
||||
concurrency: number
|
||||
): Promise<R[]> {
|
||||
const results: R[] = [];
|
||||
|
||||
for (let i = 0; i < items.length; i += concurrency) {
|
||||
const batch = items.slice(i, i + concurrency);
|
||||
const batchResults = await Promise.all(batch.map(processor));
|
||||
results.push(...batchResults);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
export async function indexConversations(
|
||||
limitToProject?: string,
|
||||
maxConversations?: number,
|
||||
concurrency: number = 1
|
||||
): Promise<void> {
|
||||
console.log('Initializing database...');
|
||||
const db = initDatabase();
|
||||
|
||||
console.log('Loading embedding model...');
|
||||
await initEmbeddings();
|
||||
|
||||
console.log('Scanning for conversation files...');
|
||||
const PROJECTS_DIR = getProjectsDir();
|
||||
const ARCHIVE_DIR = getArchiveDir();
|
||||
const projects = fs.readdirSync(PROJECTS_DIR);
|
||||
|
||||
let totalExchanges = 0;
|
||||
let conversationsProcessed = 0;
|
||||
|
||||
const excludedProjects = getExcludedProjects();
|
||||
|
||||
for (const project of projects) {
|
||||
// Skip excluded projects
|
||||
if (excludedProjects.includes(project)) {
|
||||
console.log(`\nSkipping excluded project: ${project}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip if limiting to specific project
|
||||
if (limitToProject && project !== limitToProject) continue;
|
||||
const projectPath = path.join(PROJECTS_DIR, project);
|
||||
const stat = fs.statSync(projectPath);
|
||||
|
||||
if (!stat.isDirectory()) continue;
|
||||
|
||||
const files = fs.readdirSync(projectPath).filter(f => f.endsWith('.jsonl'));
|
||||
|
||||
if (files.length === 0) continue;
|
||||
|
||||
console.log(`\nProcessing project: ${project} (${files.length} conversations)`);
|
||||
if (concurrency > 1) console.log(` Concurrency: ${concurrency}`);
|
||||
|
||||
// Create archive directory for this project
|
||||
const projectArchive = path.join(ARCHIVE_DIR, project);
|
||||
fs.mkdirSync(projectArchive, { recursive: true });
|
||||
|
||||
// Prepare all conversations first
|
||||
type ConvToProcess = {
|
||||
file: string;
|
||||
sourcePath: string;
|
||||
archivePath: string;
|
||||
summaryPath: string;
|
||||
exchanges: ConversationExchange[];
|
||||
};
|
||||
|
||||
const toProcess: ConvToProcess[] = [];
|
||||
|
||||
for (const file of files) {
|
||||
const sourcePath = path.join(projectPath, file);
|
||||
const archivePath = path.join(projectArchive, file);
|
||||
|
||||
// Copy to archive
|
||||
if (!fs.existsSync(archivePath)) {
|
||||
fs.copyFileSync(sourcePath, archivePath);
|
||||
console.log(` Archived: ${file}`);
|
||||
}
|
||||
|
||||
// Parse conversation
|
||||
const exchanges = await parseConversation(sourcePath, project, archivePath);
|
||||
|
||||
if (exchanges.length === 0) {
|
||||
console.log(` Skipped ${file} (no exchanges)`);
|
||||
continue;
|
||||
}
|
||||
|
||||
toProcess.push({
|
||||
file,
|
||||
sourcePath,
|
||||
archivePath,
|
||||
summaryPath: archivePath.replace('.jsonl', '-summary.txt'),
|
||||
exchanges
|
||||
});
|
||||
}
|
||||
|
||||
// Batch summarize conversations in parallel
|
||||
const needsSummary = toProcess.filter(c => !fs.existsSync(c.summaryPath));
|
||||
|
||||
if (needsSummary.length > 0) {
|
||||
console.log(` Generating ${needsSummary.length} summaries (concurrency: ${concurrency})...`);
|
||||
|
||||
await processBatch(needsSummary, async (conv) => {
|
||||
try {
|
||||
const summary = await summarizeConversation(conv.exchanges);
|
||||
fs.writeFileSync(conv.summaryPath, summary, 'utf-8');
|
||||
const wordCount = summary.split(/\s+/).length;
|
||||
console.log(` ✓ ${conv.file}: ${wordCount} words`);
|
||||
return summary;
|
||||
} catch (error) {
|
||||
console.log(` ✗ ${conv.file}: ${error}`);
|
||||
return null;
|
||||
}
|
||||
}, concurrency);
|
||||
}
|
||||
|
||||
// Now process embeddings and DB inserts (fast, sequential is fine)
|
||||
for (const conv of toProcess) {
|
||||
for (const exchange of conv.exchanges) {
|
||||
const embedding = await generateExchangeEmbedding(
|
||||
exchange.userMessage,
|
||||
exchange.assistantMessage
|
||||
);
|
||||
|
||||
insertExchange(db, exchange, embedding);
|
||||
}
|
||||
|
||||
totalExchanges += conv.exchanges.length;
|
||||
conversationsProcessed++;
|
||||
|
||||
// Check if we hit the limit
|
||||
if (maxConversations && conversationsProcessed >= maxConversations) {
|
||||
console.log(`\nReached limit of ${maxConversations} conversations`);
|
||||
db.close();
|
||||
console.log(`✅ Indexing complete! Conversations: ${conversationsProcessed}, Exchanges: ${totalExchanges}`);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
db.close();
|
||||
console.log(`\n✅ Indexing complete! Conversations: ${conversationsProcessed}, Exchanges: ${totalExchanges}`);
|
||||
}
|
||||
|
||||
export async function indexSession(sessionId: string, concurrency: number = 1): Promise<void> {
|
||||
console.log(`Indexing session: ${sessionId}`);
|
||||
|
||||
// Find the conversation file for this session
|
||||
const PROJECTS_DIR = getProjectsDir();
|
||||
const ARCHIVE_DIR = getArchiveDir();
|
||||
const projects = fs.readdirSync(PROJECTS_DIR);
|
||||
const excludedProjects = getExcludedProjects();
|
||||
let found = false;
|
||||
|
||||
for (const project of projects) {
|
||||
if (excludedProjects.includes(project)) continue;
|
||||
|
||||
const projectPath = path.join(PROJECTS_DIR, project);
|
||||
if (!fs.statSync(projectPath).isDirectory()) continue;
|
||||
|
||||
const files = fs.readdirSync(projectPath).filter(f => f.includes(sessionId) && f.endsWith('.jsonl'));
|
||||
|
||||
if (files.length > 0) {
|
||||
found = true;
|
||||
const file = files[0];
|
||||
const sourcePath = path.join(projectPath, file);
|
||||
|
||||
const db = initDatabase();
|
||||
await initEmbeddings();
|
||||
|
||||
const projectArchive = path.join(ARCHIVE_DIR, project);
|
||||
fs.mkdirSync(projectArchive, { recursive: true });
|
||||
|
||||
const archivePath = path.join(projectArchive, file);
|
||||
|
||||
// Archive
|
||||
if (!fs.existsSync(archivePath)) {
|
||||
fs.copyFileSync(sourcePath, archivePath);
|
||||
}
|
||||
|
||||
// Parse and summarize
|
||||
const exchanges = await parseConversation(sourcePath, project, archivePath);
|
||||
|
||||
if (exchanges.length > 0) {
|
||||
// Generate summary
|
||||
const summaryPath = archivePath.replace('.jsonl', '-summary.txt');
|
||||
if (!fs.existsSync(summaryPath)) {
|
||||
const summary = await summarizeConversation(exchanges);
|
||||
fs.writeFileSync(summaryPath, summary, 'utf-8');
|
||||
console.log(`Summary: ${summary.split(/\s+/).length} words`);
|
||||
}
|
||||
|
||||
// Index
|
||||
for (const exchange of exchanges) {
|
||||
const embedding = await generateExchangeEmbedding(
|
||||
exchange.userMessage,
|
||||
exchange.assistantMessage
|
||||
);
|
||||
insertExchange(db, exchange, embedding);
|
||||
}
|
||||
|
||||
console.log(`✅ Indexed session ${sessionId}: ${exchanges.length} exchanges`);
|
||||
}
|
||||
|
||||
db.close();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
console.log(`Session ${sessionId} not found`);
|
||||
}
|
||||
}
|
||||
|
||||
export async function indexUnprocessed(concurrency: number = 1): Promise<void> {
|
||||
console.log('Finding unprocessed conversations...');
|
||||
if (concurrency > 1) console.log(`Concurrency: ${concurrency}`);
|
||||
|
||||
const db = initDatabase();
|
||||
await initEmbeddings();
|
||||
|
||||
const PROJECTS_DIR = getProjectsDir();
|
||||
const ARCHIVE_DIR = getArchiveDir();
|
||||
const projects = fs.readdirSync(PROJECTS_DIR);
|
||||
const excludedProjects = getExcludedProjects();
|
||||
|
||||
type UnprocessedConv = {
|
||||
project: string;
|
||||
file: string;
|
||||
sourcePath: string;
|
||||
archivePath: string;
|
||||
summaryPath: string;
|
||||
exchanges: ConversationExchange[];
|
||||
};
|
||||
|
||||
const unprocessed: UnprocessedConv[] = [];
|
||||
|
||||
// Collect all unprocessed conversations
|
||||
for (const project of projects) {
|
||||
if (excludedProjects.includes(project)) continue;
|
||||
|
||||
const projectPath = path.join(PROJECTS_DIR, project);
|
||||
if (!fs.statSync(projectPath).isDirectory()) continue;
|
||||
|
||||
const files = fs.readdirSync(projectPath).filter(f => f.endsWith('.jsonl'));
|
||||
|
||||
for (const file of files) {
|
||||
const sourcePath = path.join(projectPath, file);
|
||||
const projectArchive = path.join(ARCHIVE_DIR, project);
|
||||
const archivePath = path.join(projectArchive, file);
|
||||
const summaryPath = archivePath.replace('.jsonl', '-summary.txt');
|
||||
|
||||
// Skip if already has summary
|
||||
if (fs.existsSync(summaryPath)) continue;
|
||||
|
||||
fs.mkdirSync(projectArchive, { recursive: true });
|
||||
|
||||
// Archive if needed
|
||||
if (!fs.existsSync(archivePath)) {
|
||||
fs.copyFileSync(sourcePath, archivePath);
|
||||
}
|
||||
|
||||
// Parse and check
|
||||
const exchanges = await parseConversation(sourcePath, project, archivePath);
|
||||
if (exchanges.length === 0) continue;
|
||||
|
||||
unprocessed.push({ project, file, sourcePath, archivePath, summaryPath, exchanges });
|
||||
}
|
||||
}
|
||||
|
||||
if (unprocessed.length === 0) {
|
||||
console.log('✅ All conversations are already processed!');
|
||||
db.close();
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Found ${unprocessed.length} unprocessed conversations`);
|
||||
console.log(`Generating summaries (concurrency: ${concurrency})...\n`);
|
||||
|
||||
// Batch process summaries
|
||||
await processBatch(unprocessed, async (conv) => {
|
||||
try {
|
||||
const summary = await summarizeConversation(conv.exchanges);
|
||||
fs.writeFileSync(conv.summaryPath, summary, 'utf-8');
|
||||
const wordCount = summary.split(/\s+/).length;
|
||||
console.log(` ✓ ${conv.project}/${conv.file}: ${wordCount} words`);
|
||||
return summary;
|
||||
} catch (error) {
|
||||
console.log(` ✗ ${conv.project}/${conv.file}: ${error}`);
|
||||
return null;
|
||||
}
|
||||
}, concurrency);
|
||||
|
||||
// Now index embeddings
|
||||
console.log(`\nIndexing embeddings...`);
|
||||
for (const conv of unprocessed) {
|
||||
for (const exchange of conv.exchanges) {
|
||||
const embedding = await generateExchangeEmbedding(
|
||||
exchange.userMessage,
|
||||
exchange.assistantMessage
|
||||
);
|
||||
insertExchange(db, exchange, embedding);
|
||||
}
|
||||
}
|
||||
|
||||
db.close();
|
||||
console.log(`\n✅ Processed ${unprocessed.length} conversations`);
|
||||
}
|
||||
@@ -0,0 +1,118 @@
|
||||
import fs from 'fs';
|
||||
import readline from 'readline';
|
||||
import { ConversationExchange } from './types.js';
|
||||
import crypto from 'crypto';
|
||||
|
||||
interface JSONLMessage {
|
||||
type: string;
|
||||
message?: {
|
||||
role: 'user' | 'assistant';
|
||||
content: string | Array<{ type: string; text?: string }>;
|
||||
};
|
||||
timestamp?: string;
|
||||
uuid?: string;
|
||||
}
|
||||
|
||||
export async function parseConversation(
|
||||
filePath: string,
|
||||
projectName: string,
|
||||
archivePath: string
|
||||
): Promise<ConversationExchange[]> {
|
||||
const exchanges: ConversationExchange[] = [];
|
||||
const fileStream = fs.createReadStream(filePath);
|
||||
const rl = readline.createInterface({
|
||||
input: fileStream,
|
||||
crlfDelay: Infinity
|
||||
});
|
||||
|
||||
let lineNumber = 0;
|
||||
let currentExchange: {
|
||||
userMessage: string;
|
||||
userLine: number;
|
||||
assistantMessages: string[];
|
||||
lastAssistantLine: number;
|
||||
timestamp: string;
|
||||
} | null = null;
|
||||
|
||||
const finalizeExchange = () => {
|
||||
if (currentExchange && currentExchange.assistantMessages.length > 0) {
|
||||
const exchange: ConversationExchange = {
|
||||
id: crypto
|
||||
.createHash('md5')
|
||||
.update(`${archivePath}:${currentExchange.userLine}-${currentExchange.lastAssistantLine}`)
|
||||
.digest('hex'),
|
||||
project: projectName,
|
||||
timestamp: currentExchange.timestamp,
|
||||
userMessage: currentExchange.userMessage,
|
||||
assistantMessage: currentExchange.assistantMessages.join('\n\n'),
|
||||
archivePath,
|
||||
lineStart: currentExchange.userLine,
|
||||
lineEnd: currentExchange.lastAssistantLine
|
||||
};
|
||||
exchanges.push(exchange);
|
||||
}
|
||||
};
|
||||
|
||||
for await (const line of rl) {
|
||||
lineNumber++;
|
||||
|
||||
try {
|
||||
const parsed: JSONLMessage = JSON.parse(line);
|
||||
|
||||
// Skip non-message types
|
||||
if (parsed.type !== 'user' && parsed.type !== 'assistant') {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!parsed.message) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Extract text from message content
|
||||
let text = '';
|
||||
if (typeof parsed.message.content === 'string') {
|
||||
text = parsed.message.content;
|
||||
} else if (Array.isArray(parsed.message.content)) {
|
||||
text = parsed.message.content
|
||||
.filter(block => block.type === 'text' && block.text)
|
||||
.map(block => block.text)
|
||||
.join('\n');
|
||||
}
|
||||
|
||||
// Skip empty messages
|
||||
if (!text.trim()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (parsed.message.role === 'user') {
|
||||
// Finalize previous exchange before starting new one
|
||||
finalizeExchange();
|
||||
|
||||
// Start new exchange
|
||||
currentExchange = {
|
||||
userMessage: text,
|
||||
userLine: lineNumber,
|
||||
assistantMessages: [],
|
||||
lastAssistantLine: lineNumber,
|
||||
timestamp: parsed.timestamp || new Date().toISOString()
|
||||
};
|
||||
} else if (parsed.message.role === 'assistant' && currentExchange) {
|
||||
// Accumulate assistant messages
|
||||
currentExchange.assistantMessages.push(text);
|
||||
currentExchange.lastAssistantLine = lineNumber;
|
||||
// Update timestamp to last assistant message
|
||||
if (parsed.timestamp) {
|
||||
currentExchange.timestamp = parsed.timestamp;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
// Skip malformed JSON lines
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Finalize last exchange
|
||||
finalizeExchange();
|
||||
|
||||
return exchanges;
|
||||
}
|
||||
@@ -0,0 +1,109 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
describe('search-agent template', () => {
|
||||
const templatePath = path.join(__dirname, '..', 'prompts', 'search-agent.md');
|
||||
|
||||
it('exists at expected location', () => {
|
||||
expect(fs.existsSync(templatePath)).toBe(true);
|
||||
});
|
||||
|
||||
it('contains required placeholders', () => {
|
||||
const content = fs.readFileSync(templatePath, 'utf-8');
|
||||
|
||||
// Check for all required placeholders
|
||||
expect(content).toContain('{TOPIC}');
|
||||
expect(content).toContain('{SEARCH_QUERY}');
|
||||
expect(content).toContain('{FOCUS_AREAS}');
|
||||
});
|
||||
|
||||
it('contains required output sections', () => {
|
||||
const content = fs.readFileSync(templatePath, 'utf-8');
|
||||
|
||||
// Check for required output format sections
|
||||
expect(content).toContain('### Summary');
|
||||
expect(content).toContain('### Sources');
|
||||
expect(content).toContain('### For Follow-Up');
|
||||
});
|
||||
|
||||
it('specifies word count requirements', () => {
|
||||
const content = fs.readFileSync(templatePath, 'utf-8');
|
||||
|
||||
// Should specify 200-1000 words for synthesis
|
||||
expect(content).toMatch(/200-1000 words/);
|
||||
expect(content).toMatch(/max 1000 words/);
|
||||
});
|
||||
|
||||
it('includes source metadata requirements', () => {
|
||||
const content = fs.readFileSync(templatePath, 'utf-8');
|
||||
|
||||
// Check for source metadata fields
|
||||
expect(content).toContain('project-name');
|
||||
expect(content).toContain('YYYY-MM-DD');
|
||||
expect(content).toContain('% match');
|
||||
expect(content).toContain('Conversation summary:');
|
||||
expect(content).toContain('File:');
|
||||
expect(content).toContain('Status:');
|
||||
expect(content).toContain('Read in detail');
|
||||
expect(content).toContain('Reviewed summary only');
|
||||
expect(content).toContain('Skimmed');
|
||||
});
|
||||
|
||||
it('provides search command', () => {
|
||||
const content = fs.readFileSync(templatePath, 'utf-8');
|
||||
|
||||
// Should include the search command
|
||||
expect(content).toContain('~/.claude/skills/collaboration/remembering-conversations/tool/search-conversations');
|
||||
});
|
||||
|
||||
it('includes critical rules', () => {
|
||||
const content = fs.readFileSync(templatePath, 'utf-8');
|
||||
|
||||
// Check for DO and DO NOT sections
|
||||
expect(content).toContain('## Critical Rules');
|
||||
expect(content).toContain('**DO:**');
|
||||
expect(content).toContain('**DO NOT:**');
|
||||
});
|
||||
|
||||
it('includes complete example output', () => {
|
||||
const content = fs.readFileSync(templatePath, 'utf-8');
|
||||
|
||||
// Check example has all required components
|
||||
expect(content).toContain('## Example Output');
|
||||
|
||||
// Example should show Summary, Sources, and For Follow-Up
|
||||
const exampleSection = content.substring(content.indexOf('## Example Output'));
|
||||
expect(exampleSection).toContain('### Summary');
|
||||
expect(exampleSection).toContain('### Sources');
|
||||
expect(exampleSection).toContain('### For Follow-Up');
|
||||
|
||||
// Example should show specific details
|
||||
expect(exampleSection).toContain('react-router-7-starter');
|
||||
expect(exampleSection).toContain('92% match');
|
||||
expect(exampleSection).toContain('.jsonl');
|
||||
});
|
||||
|
||||
it('emphasizes synthesis over raw excerpts', () => {
|
||||
const content = fs.readFileSync(templatePath, 'utf-8');
|
||||
|
||||
// Should explicitly discourage raw conversation excerpts
|
||||
expect(content).toContain('synthesize');
|
||||
expect(content).toContain('raw conversation excerpts');
|
||||
expect(content).toContain('synthesize instead');
|
||||
});
|
||||
|
||||
it('provides follow-up options', () => {
|
||||
const content = fs.readFileSync(templatePath, 'utf-8');
|
||||
|
||||
// Should explain how main agent can follow up
|
||||
expect(content).toContain('Main agent can:');
|
||||
expect(content).toContain('dig deeper');
|
||||
expect(content).toContain('refined query');
|
||||
expect(content).toContain('context bloat');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,28 @@
|
||||
import { searchConversations, formatResults, SearchOptions } from './search.js';
|
||||
|
||||
const query = process.argv[2];
|
||||
const mode = (process.argv[3] || 'vector') as 'vector' | 'text' | 'both';
|
||||
const limit = parseInt(process.argv[4] || '10');
|
||||
const after = process.argv[5] || undefined;
|
||||
const before = process.argv[6] || undefined;
|
||||
|
||||
if (!query) {
|
||||
console.error('Usage: search-conversations <query> [mode] [limit] [after] [before]');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const options: SearchOptions = {
|
||||
mode,
|
||||
limit,
|
||||
after,
|
||||
before
|
||||
};
|
||||
|
||||
searchConversations(query, options)
|
||||
.then(results => {
|
||||
console.log(formatResults(results));
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Error searching:', error);
|
||||
process.exit(1);
|
||||
});
|
||||
@@ -0,0 +1,173 @@
|
||||
import Database from 'better-sqlite3';
|
||||
import { initDatabase } from './db.js';
|
||||
import { initEmbeddings, generateEmbedding } from './embeddings.js';
|
||||
import { SearchResult, ConversationExchange } from './types.js';
|
||||
import fs from 'fs';
|
||||
|
||||
export interface SearchOptions {
|
||||
limit?: number;
|
||||
mode?: 'vector' | 'text' | 'both';
|
||||
after?: string; // ISO date string
|
||||
before?: string; // ISO date string
|
||||
}
|
||||
|
||||
function validateISODate(dateStr: string, paramName: string): void {
|
||||
const isoDateRegex = /^\d{4}-\d{2}-\d{2}$/;
|
||||
if (!isoDateRegex.test(dateStr)) {
|
||||
throw new Error(`Invalid ${paramName} date: "${dateStr}". Expected YYYY-MM-DD format (e.g., 2025-10-01)`);
|
||||
}
|
||||
// Verify it's actually a valid date
|
||||
const date = new Date(dateStr);
|
||||
if (isNaN(date.getTime())) {
|
||||
throw new Error(`Invalid ${paramName} date: "${dateStr}". Not a valid calendar date.`);
|
||||
}
|
||||
}
|
||||
|
||||
export async function searchConversations(
|
||||
query: string,
|
||||
options: SearchOptions = {}
|
||||
): Promise<SearchResult[]> {
|
||||
const { limit = 10, mode = 'vector', after, before } = options;
|
||||
|
||||
// Validate date parameters
|
||||
if (after) validateISODate(after, '--after');
|
||||
if (before) validateISODate(before, '--before');
|
||||
|
||||
const db = initDatabase();
|
||||
|
||||
let results: any[] = [];
|
||||
|
||||
// Build time filter clause
|
||||
const timeFilter = [];
|
||||
if (after) timeFilter.push(`e.timestamp >= '${after}'`);
|
||||
if (before) timeFilter.push(`e.timestamp <= '${before}'`);
|
||||
const timeClause = timeFilter.length > 0 ? `AND ${timeFilter.join(' AND ')}` : '';
|
||||
|
||||
if (mode === 'vector' || mode === 'both') {
|
||||
// Vector similarity search
|
||||
await initEmbeddings();
|
||||
const queryEmbedding = await generateEmbedding(query);
|
||||
|
||||
const stmt = db.prepare(`
|
||||
SELECT
|
||||
e.id,
|
||||
e.project,
|
||||
e.timestamp,
|
||||
e.user_message,
|
||||
e.assistant_message,
|
||||
e.archive_path,
|
||||
e.line_start,
|
||||
e.line_end,
|
||||
vec.distance
|
||||
FROM vec_exchanges AS vec
|
||||
JOIN exchanges AS e ON vec.id = e.id
|
||||
WHERE vec.embedding MATCH ?
|
||||
AND k = ?
|
||||
${timeClause}
|
||||
ORDER BY vec.distance ASC
|
||||
`);
|
||||
|
||||
results = stmt.all(
|
||||
Buffer.from(new Float32Array(queryEmbedding).buffer),
|
||||
limit
|
||||
);
|
||||
}
|
||||
|
||||
if (mode === 'text' || mode === 'both') {
|
||||
// Text search
|
||||
const textStmt = db.prepare(`
|
||||
SELECT
|
||||
e.id,
|
||||
e.project,
|
||||
e.timestamp,
|
||||
e.user_message,
|
||||
e.assistant_message,
|
||||
e.archive_path,
|
||||
e.line_start,
|
||||
e.line_end,
|
||||
0 as distance
|
||||
FROM exchanges AS e
|
||||
WHERE (e.user_message LIKE ? OR e.assistant_message LIKE ?)
|
||||
${timeClause}
|
||||
ORDER BY e.timestamp DESC
|
||||
LIMIT ?
|
||||
`);
|
||||
|
||||
const textResults = textStmt.all(`%${query}%`, `%${query}%`, limit);
|
||||
|
||||
if (mode === 'both') {
|
||||
// Merge and deduplicate by ID
|
||||
const seenIds = new Set(results.map(r => r.id));
|
||||
for (const textResult of textResults) {
|
||||
if (!seenIds.has(textResult.id)) {
|
||||
results.push(textResult);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
results = textResults;
|
||||
}
|
||||
}
|
||||
|
||||
db.close();
|
||||
|
||||
return results.map((row: any) => {
|
||||
const exchange: ConversationExchange = {
|
||||
id: row.id,
|
||||
project: row.project,
|
||||
timestamp: row.timestamp,
|
||||
userMessage: row.user_message,
|
||||
assistantMessage: row.assistant_message,
|
||||
archivePath: row.archive_path,
|
||||
lineStart: row.line_start,
|
||||
lineEnd: row.line_end
|
||||
};
|
||||
|
||||
// Try to load summary if available
|
||||
const summaryPath = row.archive_path.replace('.jsonl', '-summary.txt');
|
||||
let summary: string | undefined;
|
||||
if (fs.existsSync(summaryPath)) {
|
||||
summary = fs.readFileSync(summaryPath, 'utf-8').trim();
|
||||
}
|
||||
|
||||
// Create snippet (first 200 chars)
|
||||
const snippet = exchange.userMessage.substring(0, 200) +
|
||||
(exchange.userMessage.length > 200 ? '...' : '');
|
||||
|
||||
return {
|
||||
exchange,
|
||||
similarity: mode === 'text' ? undefined : 1 - row.distance,
|
||||
snippet,
|
||||
summary
|
||||
} as SearchResult & { summary?: string };
|
||||
});
|
||||
}
|
||||
|
||||
export function formatResults(results: Array<SearchResult & { summary?: string }>): string {
|
||||
if (results.length === 0) {
|
||||
return 'No results found.';
|
||||
}
|
||||
|
||||
let output = `Found ${results.length} relevant conversations:\n\n`;
|
||||
|
||||
results.forEach((result, index) => {
|
||||
const date = new Date(result.exchange.timestamp).toISOString().split('T')[0];
|
||||
output += `${index + 1}. [${result.exchange.project}, ${date}]\n`;
|
||||
|
||||
// Show conversation summary if available
|
||||
if (result.summary) {
|
||||
output += ` ${result.summary}\n\n`;
|
||||
}
|
||||
|
||||
// Show match with similarity percentage
|
||||
if (result.similarity !== undefined) {
|
||||
const pct = Math.round(result.similarity * 100);
|
||||
output += ` ${pct}% match: "${result.snippet}"\n`;
|
||||
} else {
|
||||
output += ` Match: "${result.snippet}"\n`;
|
||||
}
|
||||
|
||||
output += ` ${result.exchange.archivePath}:${result.exchange.lineStart}-${result.exchange.lineEnd}\n\n`;
|
||||
});
|
||||
|
||||
return output;
|
||||
}
|
||||
@@ -0,0 +1,155 @@
|
||||
import { ConversationExchange } from './types.js';
|
||||
import { query } from '@anthropic-ai/claude-agent-sdk';
|
||||
|
||||
export function formatConversationText(exchanges: ConversationExchange[]): string {
|
||||
return exchanges.map(ex => {
|
||||
return `User: ${ex.userMessage}\n\nAgent: ${ex.assistantMessage}`;
|
||||
}).join('\n\n---\n\n');
|
||||
}
|
||||
|
||||
function extractSummary(text: string): string {
|
||||
const match = text.match(/<summary>(.*?)<\/summary>/s);
|
||||
if (match) {
|
||||
return match[1].trim();
|
||||
}
|
||||
// Fallback if no tags found
|
||||
return text.trim();
|
||||
}
|
||||
|
||||
async function callClaude(prompt: string, useSonnet = false): Promise<string> {
|
||||
const model = useSonnet ? 'sonnet' : 'haiku';
|
||||
|
||||
for await (const message of query({
|
||||
prompt,
|
||||
options: {
|
||||
model,
|
||||
maxTokens: 4096,
|
||||
maxThinkingTokens: 0, // Disable extended thinking
|
||||
systemPrompt: 'Write concise, factual summaries. Output ONLY the summary - no preamble, no "Here is", no "I will". Your output will be indexed directly.'
|
||||
}
|
||||
})) {
|
||||
if (message && typeof message === 'object' && 'type' in message && message.type === 'result') {
|
||||
const result = (message as any).result;
|
||||
|
||||
// Check if result is an API error (SDK returns errors as result strings)
|
||||
if (typeof result === 'string' && result.includes('API Error') && result.includes('thinking.budget_tokens')) {
|
||||
if (!useSonnet) {
|
||||
console.log(` Haiku hit thinking budget error, retrying with Sonnet`);
|
||||
return await callClaude(prompt, true);
|
||||
}
|
||||
// If Sonnet also fails, return error message
|
||||
return result;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
function chunkExchanges(exchanges: ConversationExchange[], chunkSize: number): ConversationExchange[][] {
|
||||
const chunks: ConversationExchange[][] = [];
|
||||
for (let i = 0; i < exchanges.length; i += chunkSize) {
|
||||
chunks.push(exchanges.slice(i, i + chunkSize));
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
|
||||
export async function summarizeConversation(exchanges: ConversationExchange[]): Promise<string> {
|
||||
// Handle trivial conversations
|
||||
if (exchanges.length === 0) {
|
||||
return 'Trivial conversation with no substantive content.';
|
||||
}
|
||||
|
||||
if (exchanges.length === 1) {
|
||||
const text = formatConversationText(exchanges);
|
||||
if (text.length < 100 || exchanges[0].userMessage.trim() === '/exit') {
|
||||
return 'Trivial conversation with no substantive content.';
|
||||
}
|
||||
}
|
||||
|
||||
// For short conversations (≤15 exchanges), summarize directly
|
||||
if (exchanges.length <= 15) {
|
||||
const conversationText = formatConversationText(exchanges);
|
||||
const prompt = `Context: This summary will be shown in a list to help users and Claude choose which conversations are relevant to a future activity.
|
||||
|
||||
Summarize what happened in 2-4 sentences. Be factual and specific. Output in <summary></summary> tags.
|
||||
|
||||
Include:
|
||||
- What was built/changed/discussed (be specific)
|
||||
- Key technical decisions or approaches
|
||||
- Problems solved or current state
|
||||
|
||||
Exclude:
|
||||
- Apologies, meta-commentary, or your questions
|
||||
- Raw logs or debug output
|
||||
- Generic descriptions - focus on what makes THIS conversation unique
|
||||
|
||||
Good:
|
||||
<summary>Built JWT authentication for React app with refresh tokens and protected routes. Fixed token expiration bug by implementing refresh-during-request logic.</summary>
|
||||
|
||||
Bad:
|
||||
<summary>I apologize. The conversation discussed authentication and various approaches were considered...</summary>
|
||||
|
||||
${conversationText}`;
|
||||
|
||||
const result = await callClaude(prompt);
|
||||
return extractSummary(result);
|
||||
}
|
||||
|
||||
// For long conversations, use hierarchical summarization
|
||||
console.log(` Long conversation (${exchanges.length} exchanges) - using hierarchical summarization`);
|
||||
|
||||
// Chunk into groups of 8 exchanges
|
||||
const chunks = chunkExchanges(exchanges, 8);
|
||||
console.log(` Split into ${chunks.length} chunks`);
|
||||
|
||||
// Summarize each chunk
|
||||
const chunkSummaries: string[] = [];
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
const chunkText = formatConversationText(chunks[i]);
|
||||
const prompt = `Summarize this part of a conversation in 2-3 sentences. What happened, what was built/discussed. Use <summary></summary> tags.
|
||||
|
||||
${chunkText}
|
||||
|
||||
Example: <summary>Implemented HID keyboard functionality for ESP32. Hit Bluetooth controller initialization error, fixed by adjusting memory allocation.</summary>`;
|
||||
|
||||
try {
|
||||
const summary = await callClaude(prompt);
|
||||
const extracted = extractSummary(summary);
|
||||
chunkSummaries.push(extracted);
|
||||
console.log(` Chunk ${i + 1}/${chunks.length}: ${extracted.split(/\s+/).length} words`);
|
||||
} catch (error) {
|
||||
console.log(` Chunk ${i + 1} failed, skipping`);
|
||||
}
|
||||
}
|
||||
|
||||
if (chunkSummaries.length === 0) {
|
||||
return 'Error: Unable to summarize conversation.';
|
||||
}
|
||||
|
||||
// Synthesize chunks into final summary
|
||||
const synthesisPrompt = `Context: This summary will be shown in a list to help users and Claude choose which past conversations are relevant to a future activity.
|
||||
|
||||
Synthesize these part-summaries into one cohesive paragraph. Focus on what was accomplished and any notable technical decisions or challenges. Output in <summary></summary> tags.
|
||||
|
||||
Part summaries:
|
||||
${chunkSummaries.map((s, i) => `${i + 1}. ${s}`).join('\n')}
|
||||
|
||||
Good:
|
||||
<summary>Built conversation search system with JavaScript, sqlite-vec, and local embeddings. Implemented hierarchical summarization for long conversations. System archives conversations permanently and provides semantic search via CLI.</summary>
|
||||
|
||||
Bad:
|
||||
<summary>This conversation synthesizes several topics discussed across multiple parts...</summary>
|
||||
|
||||
Your summary (max 200 words):`;
|
||||
|
||||
console.log(` Synthesizing final summary...`);
|
||||
try {
|
||||
const result = await callClaude(synthesisPrompt);
|
||||
return extractSummary(result);
|
||||
} catch (error) {
|
||||
console.log(` Synthesis failed, using chunk summaries`);
|
||||
return chunkSummaries.join(' ');
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
export interface ConversationExchange {
|
||||
id: string;
|
||||
project: string;
|
||||
timestamp: string;
|
||||
userMessage: string;
|
||||
assistantMessage: string;
|
||||
archivePath: string;
|
||||
lineStart: number;
|
||||
lineEnd: number;
|
||||
}
|
||||
|
||||
export interface SearchResult {
|
||||
exchange: ConversationExchange;
|
||||
similarity: number;
|
||||
snippet: string;
|
||||
}
|
||||
@@ -0,0 +1,278 @@
|
||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||
import { verifyIndex, repairIndex, VerificationResult } from './verify.js';
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import os from 'os';
|
||||
import { initDatabase, insertExchange } from './db.js';
|
||||
import { ConversationExchange } from './types.js';
|
||||
|
||||
describe('verifyIndex', () => {
|
||||
const testDir = path.join(os.tmpdir(), 'conversation-search-test-' + Date.now());
|
||||
const projectsDir = path.join(testDir, '.claude', 'projects');
|
||||
const archiveDir = path.join(testDir, '.clank', 'conversation-archive');
|
||||
const dbPath = path.join(testDir, '.clank', 'conversation-index', 'db.sqlite');
|
||||
|
||||
beforeEach(() => {
|
||||
// Create test directories
|
||||
fs.mkdirSync(path.join(testDir, '.clank', 'conversation-index'), { recursive: true });
|
||||
fs.mkdirSync(projectsDir, { recursive: true });
|
||||
fs.mkdirSync(archiveDir, { recursive: true });
|
||||
|
||||
// Override environment paths for testing
|
||||
process.env.TEST_PROJECTS_DIR = projectsDir;
|
||||
process.env.TEST_ARCHIVE_DIR = archiveDir;
|
||||
process.env.TEST_DB_PATH = dbPath;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
// Clean up test directory
|
||||
fs.rmSync(testDir, { recursive: true, force: true });
|
||||
delete process.env.TEST_PROJECTS_DIR;
|
||||
delete process.env.TEST_ARCHIVE_DIR;
|
||||
delete process.env.TEST_DB_PATH;
|
||||
});
|
||||
|
||||
it('detects missing summaries', async () => {
|
||||
// Create a test conversation file without a summary
|
||||
const projectArchive = path.join(archiveDir, 'test-project');
|
||||
fs.mkdirSync(projectArchive, { recursive: true });
|
||||
|
||||
const conversationPath = path.join(projectArchive, 'test-conversation.jsonl');
|
||||
|
||||
// Create proper JSONL format (one JSON object per line)
|
||||
const messages = [
|
||||
JSON.stringify({ type: 'user', message: { role: 'user', content: 'Hello' }, timestamp: '2024-01-01T00:00:00Z' }),
|
||||
JSON.stringify({ type: 'assistant', message: { role: 'assistant', content: 'Hi there!' }, timestamp: '2024-01-01T00:00:01Z' })
|
||||
];
|
||||
fs.writeFileSync(conversationPath, messages.join('\n'));
|
||||
|
||||
const result = await verifyIndex();
|
||||
|
||||
expect(result.missing.length).toBe(1);
|
||||
expect(result.missing[0].path).toBe(conversationPath);
|
||||
expect(result.missing[0].reason).toBe('No summary file');
|
||||
});
|
||||
|
||||
it('detects orphaned database entries', async () => {
|
||||
// Initialize database
|
||||
const db = initDatabase();
|
||||
|
||||
// Create an exchange in the database
|
||||
const exchange: ConversationExchange = {
|
||||
id: 'orphan-id-1',
|
||||
project: 'deleted-project',
|
||||
timestamp: '2024-01-01T00:00:00Z',
|
||||
userMessage: 'This conversation was deleted',
|
||||
assistantMessage: 'But still in database',
|
||||
archivePath: path.join(archiveDir, 'deleted-project', 'deleted.jsonl'),
|
||||
lineStart: 1,
|
||||
lineEnd: 2
|
||||
};
|
||||
|
||||
const embedding = new Array(384).fill(0.1);
|
||||
insertExchange(db, exchange, embedding);
|
||||
db.close();
|
||||
|
||||
// Verify detects orphaned entry (file doesn't exist)
|
||||
const result = await verifyIndex();
|
||||
|
||||
expect(result.orphaned.length).toBe(1);
|
||||
expect(result.orphaned[0].uuid).toBe('orphan-id-1');
|
||||
expect(result.orphaned[0].path).toBe(exchange.archivePath);
|
||||
});
|
||||
|
||||
it('detects outdated files (file modified after last_indexed)', async () => {
|
||||
// Create conversation file with summary
|
||||
const projectArchive = path.join(archiveDir, 'test-project');
|
||||
fs.mkdirSync(projectArchive, { recursive: true });
|
||||
|
||||
const conversationPath = path.join(projectArchive, 'updated-conversation.jsonl');
|
||||
const summaryPath = conversationPath.replace('.jsonl', '-summary.txt');
|
||||
|
||||
// Create initial conversation
|
||||
const messages = [
|
||||
JSON.stringify({ type: 'user', message: { role: 'user', content: 'Hello' }, timestamp: '2024-01-01T00:00:00Z' }),
|
||||
JSON.stringify({ type: 'assistant', message: { role: 'assistant', content: 'Hi there!' }, timestamp: '2024-01-01T00:00:01Z' })
|
||||
];
|
||||
fs.writeFileSync(conversationPath, messages.join('\n'));
|
||||
fs.writeFileSync(summaryPath, 'Test summary');
|
||||
|
||||
// Index it
|
||||
const db = initDatabase();
|
||||
const exchange: ConversationExchange = {
|
||||
id: 'updated-id-1',
|
||||
project: 'test-project',
|
||||
timestamp: '2024-01-01T00:00:00Z',
|
||||
userMessage: 'Hello',
|
||||
assistantMessage: 'Hi there!',
|
||||
archivePath: conversationPath,
|
||||
lineStart: 1,
|
||||
lineEnd: 2
|
||||
};
|
||||
|
||||
const embedding = new Array(384).fill(0.1);
|
||||
insertExchange(db, exchange, embedding);
|
||||
|
||||
// Get the last_indexed timestamp
|
||||
const row = db.prepare(`SELECT last_indexed FROM exchanges WHERE id = ?`).get('updated-id-1') as any;
|
||||
const lastIndexed = row.last_indexed;
|
||||
db.close();
|
||||
|
||||
// Wait a bit, then modify the file
|
||||
await new Promise(resolve => setTimeout(resolve, 10));
|
||||
|
||||
// Update the conversation file
|
||||
const updatedMessages = [
|
||||
...messages,
|
||||
JSON.stringify({ type: 'user', message: { role: 'user', content: 'New message' }, timestamp: '2024-01-01T00:00:02Z' })
|
||||
];
|
||||
fs.writeFileSync(conversationPath, updatedMessages.join('\n'));
|
||||
|
||||
// Verify detects outdated file
|
||||
const result = await verifyIndex();
|
||||
|
||||
expect(result.outdated.length).toBe(1);
|
||||
expect(result.outdated[0].path).toBe(conversationPath);
|
||||
expect(result.outdated[0].dbTime).toBe(lastIndexed);
|
||||
expect(result.outdated[0].fileTime).toBeGreaterThan(lastIndexed);
|
||||
});
|
||||
|
||||
// Note: Parser is resilient to malformed JSON - it skips bad lines
|
||||
// Corruption detection would require file system errors or permission issues
|
||||
// which are harder to test. Skipping for now as missing summaries is the
|
||||
// primary use case for verification.
|
||||
});
|
||||
|
||||
describe('repairIndex', () => {
|
||||
const testDir = path.join(os.tmpdir(), 'conversation-repair-test-' + Date.now());
|
||||
const projectsDir = path.join(testDir, '.claude', 'projects');
|
||||
const archiveDir = path.join(testDir, '.clank', 'conversation-archive');
|
||||
const dbPath = path.join(testDir, '.clank', 'conversation-index', 'db.sqlite');
|
||||
|
||||
beforeEach(() => {
|
||||
// Create test directories
|
||||
fs.mkdirSync(path.join(testDir, '.clank', 'conversation-index'), { recursive: true });
|
||||
fs.mkdirSync(projectsDir, { recursive: true });
|
||||
fs.mkdirSync(archiveDir, { recursive: true });
|
||||
|
||||
// Override environment paths for testing
|
||||
process.env.TEST_PROJECTS_DIR = projectsDir;
|
||||
process.env.TEST_ARCHIVE_DIR = archiveDir;
|
||||
process.env.TEST_DB_PATH = dbPath;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
// Clean up test directory
|
||||
fs.rmSync(testDir, { recursive: true, force: true });
|
||||
delete process.env.TEST_PROJECTS_DIR;
|
||||
delete process.env.TEST_ARCHIVE_DIR;
|
||||
delete process.env.TEST_DB_PATH;
|
||||
});
|
||||
|
||||
it('deletes orphaned database entries during repair', async () => {
|
||||
// Initialize database with orphaned entry
|
||||
const db = initDatabase();
|
||||
|
||||
const exchange: ConversationExchange = {
|
||||
id: 'orphan-repair-1',
|
||||
project: 'deleted-project',
|
||||
timestamp: '2024-01-01T00:00:00Z',
|
||||
userMessage: 'This conversation was deleted',
|
||||
assistantMessage: 'But still in database',
|
||||
archivePath: path.join(archiveDir, 'deleted-project', 'deleted.jsonl'),
|
||||
lineStart: 1,
|
||||
lineEnd: 2
|
||||
};
|
||||
|
||||
const embedding = new Array(384).fill(0.1);
|
||||
insertExchange(db, exchange, embedding);
|
||||
db.close();
|
||||
|
||||
// Verify it's there
|
||||
const dbBefore = initDatabase();
|
||||
const beforeCount = dbBefore.prepare(`SELECT COUNT(*) as count FROM exchanges WHERE id = ?`).get('orphan-repair-1') as { count: number };
|
||||
expect(beforeCount.count).toBe(1);
|
||||
dbBefore.close();
|
||||
|
||||
// Run repair
|
||||
const issues = await verifyIndex();
|
||||
expect(issues.orphaned.length).toBe(1);
|
||||
await repairIndex(issues);
|
||||
|
||||
// Verify it's gone
|
||||
const dbAfter = initDatabase();
|
||||
const afterCount = dbAfter.prepare(`SELECT COUNT(*) as count FROM exchanges WHERE id = ?`).get('orphan-repair-1') as { count: number };
|
||||
expect(afterCount.count).toBe(0);
|
||||
dbAfter.close();
|
||||
});
|
||||
|
||||
it('re-indexes outdated files during repair', { timeout: 30000 }, async () => {
|
||||
// Create conversation file with summary
|
||||
const projectArchive = path.join(archiveDir, 'test-project');
|
||||
fs.mkdirSync(projectArchive, { recursive: true });
|
||||
|
||||
const conversationPath = path.join(projectArchive, 'outdated-repair.jsonl');
|
||||
const summaryPath = conversationPath.replace('.jsonl', '-summary.txt');
|
||||
|
||||
// Create initial conversation
|
||||
const messages = [
|
||||
JSON.stringify({ type: 'user', message: { role: 'user', content: 'Hello' }, timestamp: '2024-01-01T00:00:00Z' }),
|
||||
JSON.stringify({ type: 'assistant', message: { role: 'assistant', content: 'Hi there!' }, timestamp: '2024-01-01T00:00:01Z' })
|
||||
];
|
||||
fs.writeFileSync(conversationPath, messages.join('\n'));
|
||||
fs.writeFileSync(summaryPath, 'Old summary');
|
||||
|
||||
// Index it
|
||||
const db = initDatabase();
|
||||
const exchange: ConversationExchange = {
|
||||
id: 'outdated-repair-1',
|
||||
project: 'test-project',
|
||||
timestamp: '2024-01-01T00:00:00Z',
|
||||
userMessage: 'Hello',
|
||||
assistantMessage: 'Hi there!',
|
||||
archivePath: conversationPath,
|
||||
lineStart: 1,
|
||||
lineEnd: 2
|
||||
};
|
||||
|
||||
const embedding = new Array(384).fill(0.1);
|
||||
insertExchange(db, exchange, embedding);
|
||||
|
||||
// Get the last_indexed timestamp
|
||||
const beforeRow = db.prepare(`SELECT last_indexed FROM exchanges WHERE id = ?`).get('outdated-repair-1') as any;
|
||||
const beforeIndexed = beforeRow.last_indexed;
|
||||
db.close();
|
||||
|
||||
// Wait a bit, then modify the file
|
||||
await new Promise(resolve => setTimeout(resolve, 10));
|
||||
|
||||
// Update the conversation file (add new exchange)
|
||||
const updatedMessages = [
|
||||
...messages,
|
||||
JSON.stringify({ type: 'user', message: { role: 'user', content: 'New message' }, timestamp: '2024-01-01T00:00:02Z' }),
|
||||
JSON.stringify({ type: 'assistant', message: { role: 'assistant', content: 'New response' }, timestamp: '2024-01-01T00:00:03Z' })
|
||||
];
|
||||
fs.writeFileSync(conversationPath, updatedMessages.join('\n'));
|
||||
|
||||
// Verify detects outdated
|
||||
const issues = await verifyIndex();
|
||||
expect(issues.outdated.length).toBe(1);
|
||||
|
||||
// Wait a bit to ensure different timestamp
|
||||
await new Promise(resolve => setTimeout(resolve, 10));
|
||||
|
||||
// Run repair
|
||||
await repairIndex(issues);
|
||||
|
||||
// Verify it was re-indexed with new timestamp
|
||||
const dbAfter = initDatabase();
|
||||
const afterRow = dbAfter.prepare(`SELECT MAX(last_indexed) as last_indexed FROM exchanges WHERE archive_path = ?`).get(conversationPath) as any;
|
||||
expect(afterRow.last_indexed).toBeGreaterThan(beforeIndexed);
|
||||
|
||||
// Verify no longer outdated
|
||||
const verifyAfter = await verifyIndex();
|
||||
expect(verifyAfter.outdated.length).toBe(0);
|
||||
|
||||
dbAfter.close();
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,182 @@
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import os from 'os';
|
||||
import { parseConversation } from './parser.js';
|
||||
import { initDatabase, getAllExchanges, getFileLastIndexed } from './db.js';
|
||||
|
||||
export interface VerificationResult {
|
||||
missing: Array<{ path: string; reason: string }>;
|
||||
orphaned: Array<{ uuid: string; path: string }>;
|
||||
outdated: Array<{ path: string; fileTime: number; dbTime: number }>;
|
||||
corrupted: Array<{ path: string; error: string }>;
|
||||
}
|
||||
|
||||
// Allow overriding paths for testing
|
||||
function getArchiveDir(): string {
|
||||
return process.env.TEST_ARCHIVE_DIR || path.join(os.homedir(), '.clank', 'conversation-archive');
|
||||
}
|
||||
|
||||
export async function verifyIndex(): Promise<VerificationResult> {
|
||||
const result: VerificationResult = {
|
||||
missing: [],
|
||||
orphaned: [],
|
||||
outdated: [],
|
||||
corrupted: []
|
||||
};
|
||||
|
||||
const archiveDir = getArchiveDir();
|
||||
|
||||
// Track all files we find
|
||||
const foundFiles = new Set<string>();
|
||||
|
||||
// Find all conversation files
|
||||
if (!fs.existsSync(archiveDir)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// Initialize database once for all checks
|
||||
const db = initDatabase();
|
||||
|
||||
const projects = fs.readdirSync(archiveDir);
|
||||
let totalChecked = 0;
|
||||
|
||||
for (const project of projects) {
|
||||
const projectPath = path.join(archiveDir, project);
|
||||
const stat = fs.statSync(projectPath);
|
||||
|
||||
if (!stat.isDirectory()) continue;
|
||||
|
||||
const files = fs.readdirSync(projectPath).filter(f => f.endsWith('.jsonl'));
|
||||
|
||||
for (const file of files) {
|
||||
totalChecked++;
|
||||
|
||||
if (totalChecked % 100 === 0) {
|
||||
console.log(` Checked ${totalChecked} conversations...`);
|
||||
}
|
||||
|
||||
const conversationPath = path.join(projectPath, file);
|
||||
foundFiles.add(conversationPath);
|
||||
|
||||
const summaryPath = conversationPath.replace('.jsonl', '-summary.txt');
|
||||
|
||||
// Check for missing summary
|
||||
if (!fs.existsSync(summaryPath)) {
|
||||
result.missing.push({ path: conversationPath, reason: 'No summary file' });
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if file is outdated (modified after last_indexed)
|
||||
const lastIndexed = getFileLastIndexed(db, conversationPath);
|
||||
if (lastIndexed !== null) {
|
||||
const fileStat = fs.statSync(conversationPath);
|
||||
if (fileStat.mtimeMs > lastIndexed) {
|
||||
result.outdated.push({
|
||||
path: conversationPath,
|
||||
fileTime: fileStat.mtimeMs,
|
||||
dbTime: lastIndexed
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Try parsing to detect corruption
|
||||
try {
|
||||
await parseConversation(conversationPath, project, conversationPath);
|
||||
} catch (error) {
|
||||
result.corrupted.push({
|
||||
path: conversationPath,
|
||||
error: error instanceof Error ? error.message : String(error)
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`Verified ${totalChecked} conversations.`);
|
||||
|
||||
// Check for orphaned database entries
|
||||
const dbExchanges = getAllExchanges(db);
|
||||
db.close();
|
||||
|
||||
for (const exchange of dbExchanges) {
|
||||
if (!foundFiles.has(exchange.archivePath)) {
|
||||
result.orphaned.push({
|
||||
uuid: exchange.id,
|
||||
path: exchange.archivePath
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
export async function repairIndex(issues: VerificationResult): Promise<void> {
|
||||
console.log('Repairing index...');
|
||||
|
||||
// To avoid circular dependencies, we import the indexer functions dynamically
|
||||
const { initDatabase, insertExchange, deleteExchange } = await import('./db.js');
|
||||
const { parseConversation } = await import('./parser.js');
|
||||
const { initEmbeddings, generateExchangeEmbedding } = await import('./embeddings.js');
|
||||
const { summarizeConversation } = await import('./summarizer.js');
|
||||
|
||||
const db = initDatabase();
|
||||
await initEmbeddings();
|
||||
|
||||
// Remove orphaned entries first
|
||||
for (const orphan of issues.orphaned) {
|
||||
console.log(`Removing orphaned entry: ${orphan.uuid}`);
|
||||
deleteExchange(db, orphan.uuid);
|
||||
}
|
||||
|
||||
// Re-index missing and outdated conversations
|
||||
const toReindex = [
|
||||
...issues.missing.map(m => m.path),
|
||||
...issues.outdated.map(o => o.path)
|
||||
];
|
||||
|
||||
for (const conversationPath of toReindex) {
|
||||
console.log(`Re-indexing: ${conversationPath}`);
|
||||
try {
|
||||
// Extract project name from path
|
||||
const archiveDir = getArchiveDir();
|
||||
const relativePath = conversationPath.replace(archiveDir + path.sep, '');
|
||||
const project = relativePath.split(path.sep)[0];
|
||||
|
||||
// Parse conversation
|
||||
const exchanges = await parseConversation(conversationPath, project, conversationPath);
|
||||
|
||||
if (exchanges.length === 0) {
|
||||
console.log(` Skipped (no exchanges)`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Generate/update summary
|
||||
const summaryPath = conversationPath.replace('.jsonl', '-summary.txt');
|
||||
const summary = await summarizeConversation(exchanges);
|
||||
fs.writeFileSync(summaryPath, summary, 'utf-8');
|
||||
console.log(` Created summary: ${summary.split(/\s+/).length} words`);
|
||||
|
||||
// Index exchanges
|
||||
for (const exchange of exchanges) {
|
||||
const embedding = await generateExchangeEmbedding(
|
||||
exchange.userMessage,
|
||||
exchange.assistantMessage
|
||||
);
|
||||
insertExchange(db, exchange, embedding);
|
||||
}
|
||||
|
||||
console.log(` Indexed ${exchanges.length} exchanges`);
|
||||
} catch (error) {
|
||||
console.error(`Failed to re-index ${conversationPath}:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
db.close();
|
||||
|
||||
// Report corrupted files (manual intervention needed)
|
||||
if (issues.corrupted.length > 0) {
|
||||
console.log('\n⚠️ Corrupted files (manual review needed):');
|
||||
issues.corrupted.forEach(c => console.log(` ${c.path}: ${c.error}`));
|
||||
}
|
||||
|
||||
console.log('✅ Repair complete.');
|
||||
}
|
||||
Reference in New Issue
Block a user