From 6690ea61f45a789bba6bbadd91d8e31618a3e832 Mon Sep 17 00:00:00 2001 From: koloxarto <106516707+web3gh@users.noreply.github.com> Date: Sun, 12 Jan 2025 06:52:49 +0100 Subject: [PATCH] fix: Koloxarto/fix ragknowledge for postgres (#2153) * fix formatting out of the way * fix postgress chunk uuid handling for ragKnowledge --------- Co-authored-by: Odilitime --- packages/adapter-postgres/src/index.ts | 226 +++++++++++++----- packages/core/src/ragknowledge.ts | 303 ++++++++++++++++--------- packages/core/src/runtime.ts | 138 ++++++----- 3 files changed, 449 insertions(+), 218 deletions(-) diff --git a/packages/adapter-postgres/src/index.ts b/packages/adapter-postgres/src/index.ts index 2a774b53f2..5f257bb719 100644 --- a/packages/adapter-postgres/src/index.ts +++ b/packages/adapter-postgres/src/index.ts @@ -4,31 +4,31 @@ import { v4 } from "uuid"; import pg from "pg"; type Pool = pg.Pool; -import { - QueryConfig, - QueryConfigValues, - QueryResult, - QueryResultRow, -} from "pg"; import { Account, Actor, + DatabaseAdapter, + EmbeddingProvider, GoalStatus, + Participant, + RAGKnowledgeItem, + elizaLogger, + getEmbeddingConfig, type Goal, + type IDatabaseCacheAdapter, type Memory, type Relationship, type UUID, - type IDatabaseCacheAdapter, - Participant, - elizaLogger, - getEmbeddingConfig, - DatabaseAdapter, - EmbeddingProvider, - RAGKnowledgeItem } from "@elizaos/core"; import fs from "fs"; -import { fileURLToPath } from "url"; import path from "path"; +import { + QueryConfig, + QueryConfigValues, + QueryResult, + QueryResultRow, +} from "pg"; +import { fileURLToPath } from "url"; const __filename = fileURLToPath(import.meta.url); // get the resolved path to the file const __dirname = path.dirname(__filename); // get the name of the directory @@ -199,7 +199,7 @@ export class PostgresDatabaseAdapter return true; } catch (error) { elizaLogger.error("Failed to validate vector extension:", { - error: error instanceof Error ? error.message : String(error) + error: error instanceof Error ? error.message : String(error), }); return false; } @@ -239,8 +239,10 @@ export class PostgresDatabaseAdapter ); `); - if (!rows[0].exists || !await this.validateVectorSetup()) { - elizaLogger.info("Applying database schema - tables or vector extension missing"); + if (!rows[0].exists || !(await this.validateVectorSetup())) { + elizaLogger.info( + "Applying database schema - tables or vector extension missing" + ); const schema = fs.readFileSync( path.resolve(__dirname, "../schema.sql"), "utf8" @@ -1523,12 +1525,17 @@ export class PostgresDatabaseAdapter const { rows } = await this.pool.query(sql, queryParams); - return rows.map(row => ({ + return rows.map((row) => ({ id: row.id, agentId: row.agentId, - content: typeof row.content === 'string' ? JSON.parse(row.content) : row.content, - embedding: row.embedding ? new Float32Array(row.embedding) : undefined, - createdAt: row.createdAt.getTime() + content: + typeof row.content === "string" + ? JSON.parse(row.content) + : row.content, + embedding: row.embedding + ? new Float32Array(row.embedding) + : undefined, + createdAt: row.createdAt.getTime(), })); }, "getKnowledge"); } @@ -1544,7 +1551,7 @@ export class PostgresDatabaseAdapter const cacheKey = `embedding_${params.agentId}_${params.searchText}`; const cachedResult = await this.getCache({ key: cacheKey, - agentId: params.agentId + agentId: params.agentId, }); if (cachedResult) { @@ -1594,24 +1601,29 @@ export class PostgresDatabaseAdapter const { rows } = await this.pool.query(sql, [ vectorStr, params.agentId, - `%${params.searchText || ''}%`, + `%${params.searchText || ""}%`, params.match_threshold, - params.match_count + params.match_count, ]); - const results = rows.map(row => ({ + const results = rows.map((row) => ({ id: row.id, agentId: row.agentId, - content: typeof row.content === 'string' ? JSON.parse(row.content) : row.content, - embedding: row.embedding ? new Float32Array(row.embedding) : undefined, + content: + typeof row.content === "string" + ? JSON.parse(row.content) + : row.content, + embedding: row.embedding + ? new Float32Array(row.embedding) + : undefined, createdAt: row.createdAt.getTime(), - similarity: row.combined_score + similarity: row.combined_score, })); await this.setCache({ key: cacheKey, agentId: params.agentId, - value: JSON.stringify(results) + value: JSON.stringify(results), }); return results; @@ -1622,35 +1634,52 @@ export class PostgresDatabaseAdapter return this.withDatabase(async () => { const client = await this.pool.connect(); try { - await client.query('BEGIN'); - - const sql = ` - INSERT INTO knowledge ( - id, "agentId", content, embedding, "createdAt", - "isMain", "originalId", "chunkIndex", "isShared" - ) VALUES ($1, $2, $3, $4, to_timestamp($5/1000.0), $6, $7, $8, $9) - ON CONFLICT (id) DO NOTHING - `; + await client.query("BEGIN"); const metadata = knowledge.content.metadata || {}; - const vectorStr = knowledge.embedding ? - `[${Array.from(knowledge.embedding).join(",")}]` : null; - - await client.query(sql, [ - knowledge.id, - metadata.isShared ? null : knowledge.agentId, - knowledge.content, - vectorStr, - knowledge.createdAt || Date.now(), - metadata.isMain || false, - metadata.originalId || null, - metadata.chunkIndex || null, - metadata.isShared || false - ]); + const vectorStr = knowledge.embedding + ? `[${Array.from(knowledge.embedding).join(",")}]` + : null; + + // If this is a chunk, use createKnowledgeChunk + if (metadata.isChunk && metadata.originalId) { + await this.createKnowledgeChunk({ + id: knowledge.id, + originalId: metadata.originalId, + agentId: metadata.isShared ? null : knowledge.agentId, + content: knowledge.content, + embedding: knowledge.embedding, + chunkIndex: metadata.chunkIndex || 0, + isShared: metadata.isShared || false, + createdAt: knowledge.createdAt || Date.now(), + }); + } else { + // This is a main knowledge item + await client.query( + ` + INSERT INTO knowledge ( + id, "agentId", content, embedding, "createdAt", + "isMain", "originalId", "chunkIndex", "isShared" + ) VALUES ($1, $2, $3, $4, to_timestamp($5/1000.0), $6, $7, $8, $9) + ON CONFLICT (id) DO NOTHING + `, + [ + knowledge.id, + metadata.isShared ? null : knowledge.agentId, + knowledge.content, + vectorStr, + knowledge.createdAt || Date.now(), + true, + null, + null, + metadata.isShared || false, + ] + ); + } - await client.query('COMMIT'); + await client.query("COMMIT"); } catch (error) { - await client.query('ROLLBACK'); + await client.query("ROLLBACK"); throw error; } finally { client.release(); @@ -1660,19 +1689,100 @@ export class PostgresDatabaseAdapter async removeKnowledge(id: UUID): Promise { return this.withDatabase(async () => { - await this.pool.query('DELETE FROM knowledge WHERE id = $1', [id]); + const client = await this.pool.connect(); + try { + await client.query("BEGIN"); + + // Check if this is a pattern-based chunk deletion (e.g., "id-chunk-*") + if (typeof id === "string" && id.includes("-chunk-*")) { + const mainId = id.split("-chunk-")[0]; + // Delete chunks for this main ID + await client.query( + 'DELETE FROM knowledge WHERE "originalId" = $1', + [mainId] + ); + } else { + // First delete all chunks associated with this knowledge item + await client.query( + 'DELETE FROM knowledge WHERE "originalId" = $1', + [id] + ); + // Then delete the main knowledge item + await client.query("DELETE FROM knowledge WHERE id = $1", [ + id, + ]); + } + + await client.query("COMMIT"); + } catch (error) { + await client.query("ROLLBACK"); + elizaLogger.error("Error removing knowledge", { + error: + error instanceof Error ? error.message : String(error), + id, + }); + throw error; + } finally { + client.release(); + } }, "removeKnowledge"); } async clearKnowledge(agentId: UUID, shared?: boolean): Promise { return this.withDatabase(async () => { - const sql = shared ? - 'DELETE FROM knowledge WHERE ("agentId" = $1 OR "isShared" = true)' : - 'DELETE FROM knowledge WHERE "agentId" = $1'; + const sql = shared + ? 'DELETE FROM knowledge WHERE ("agentId" = $1 OR "isShared" = true)' + : 'DELETE FROM knowledge WHERE "agentId" = $1'; await this.pool.query(sql, [agentId]); }, "clearKnowledge"); } + + private async createKnowledgeChunk(params: { + id: UUID; + originalId: UUID; + agentId: UUID | null; + content: any; + embedding: Float32Array | undefined | null; + chunkIndex: number; + isShared: boolean; + createdAt: number; + }): Promise { + const vectorStr = params.embedding + ? `[${Array.from(params.embedding).join(",")}]` + : null; + + // Store the pattern-based ID in the content metadata for compatibility + const patternId = `${params.originalId}-chunk-${params.chunkIndex}`; + const contentWithPatternId = { + ...params.content, + metadata: { + ...params.content.metadata, + patternId, + }, + }; + + await this.pool.query( + ` + INSERT INTO knowledge ( + id, "agentId", content, embedding, "createdAt", + "isMain", "originalId", "chunkIndex", "isShared" + ) VALUES ($1, $2, $3, $4, to_timestamp($5/1000.0), $6, $7, $8, $9) + ON CONFLICT (id) DO NOTHING + `, + [ + v4(), // Generate a proper UUID for PostgreSQL + params.agentId, + contentWithPatternId, // Store the pattern ID in metadata + vectorStr, + params.createdAt, + false, + params.originalId, + params.chunkIndex, + params.isShared, + ] + ); + } } export default PostgresDatabaseAdapter; diff --git a/packages/core/src/ragknowledge.ts b/packages/core/src/ragknowledge.ts index 5c91309703..4ccc56c8e1 100644 --- a/packages/core/src/ragknowledge.ts +++ b/packages/core/src/ragknowledge.ts @@ -1,12 +1,12 @@ import { embed } from "./embedding.ts"; +import { splitChunks } from "./generation.ts"; import elizaLogger from "./logger.ts"; import { + IAgentRuntime, IRAGKnowledgeManager, RAGKnowledgeItem, UUID, - IAgentRuntime } from "./types.ts"; -import { splitChunks } from "./generation.ts"; import { stringToUuid } from "./uuid.ts"; /** @@ -41,20 +41,62 @@ export class RAGKnowledgeManager implements IRAGKnowledgeManager { * Common English stop words to filter out from query analysis */ private readonly stopWords = new Set([ - 'a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'does', 'for', 'from', 'had', - 'has', 'have', 'he', 'her', 'his', 'how', 'hey', 'i', 'in', 'is', 'it', 'its', - 'of', 'on', 'or', 'that', 'the', 'this', 'to', 'was', 'what', 'when', 'where', - 'which', 'who', 'will', 'with', 'would', 'there', 'their', 'they', 'your', 'you' + "a", + "an", + "and", + "are", + "as", + "at", + "be", + "by", + "does", + "for", + "from", + "had", + "has", + "have", + "he", + "her", + "his", + "how", + "hey", + "i", + "in", + "is", + "it", + "its", + "of", + "on", + "or", + "that", + "the", + "this", + "to", + "was", + "what", + "when", + "where", + "which", + "who", + "will", + "with", + "would", + "there", + "their", + "they", + "your", + "you", ]); /** * Filters out stop words and returns meaningful terms */ private getQueryTerms(query: string): string[] { - return query.toLowerCase() - .split(' ') - .filter(term => term.length > 3) // Filter very short words - .filter(term => !this.stopWords.has(term)); // Filter stop words + return query + .toLowerCase() + .split(" ") + .filter((term) => term.length > 3) // Filter very short words + .filter((term) => !this.stopWords.has(term)); // Filter stop words } /** @@ -89,9 +131,10 @@ export class RAGKnowledgeManager implements IRAGKnowledgeManager { } private hasProximityMatch(text: string, terms: string[]): boolean { - const words = text.toLowerCase().split(' '); - const positions = terms.map(term => words.findIndex(w => w.includes(term))) - .filter(pos => pos !== -1); + const words = text.toLowerCase().split(" "); + const positions = terms + .map((term) => words.findIndex((w) => w.includes(term))) + .filter((pos) => pos !== -1); if (positions.length < 2) return false; @@ -115,10 +158,11 @@ export class RAGKnowledgeManager implements IRAGKnowledgeManager { // If id is provided, do direct lookup first if (params.id) { - const directResults = await this.runtime.databaseAdapter.getKnowledge({ - id: params.id, - agentId: agentId - }); + const directResults = + await this.runtime.databaseAdapter.getKnowledge({ + id: params.id, + agentId: agentId, + }); if (directResults.length > 0) { return directResults; @@ -133,7 +177,9 @@ export class RAGKnowledgeManager implements IRAGKnowledgeManager { // Build search text with optional context let searchText = processedQuery; if (params.conversationContext) { - const relevantContext = this.preprocess(params.conversationContext); + const relevantContext = this.preprocess( + params.conversationContext + ); searchText = `${relevantContext} ${processedQuery}`; } @@ -142,51 +188,65 @@ export class RAGKnowledgeManager implements IRAGKnowledgeManager { const embedding = new Float32Array(embeddingArray); // Get results with single query - const results = await this.runtime.databaseAdapter.searchKnowledge({ - agentId: this.runtime.agentId, - embedding: embedding, - match_threshold: this.defaultRAGMatchThreshold, - match_count: (params.limit || this.defaultRAGMatchCount) * 2, - searchText: processedQuery - }); + const results = + await this.runtime.databaseAdapter.searchKnowledge({ + agentId: this.runtime.agentId, + embedding: embedding, + match_threshold: this.defaultRAGMatchThreshold, + match_count: + (params.limit || this.defaultRAGMatchCount) * 2, + searchText: processedQuery, + }); // Enhanced reranking with sophisticated scoring - const rerankedResults = results.map(result => { - let score = result.similarity; - - // Check for direct query term matches - const queryTerms = this.getQueryTerms(processedQuery); - - const matchingTerms = queryTerms.filter(term => - result.content.text.toLowerCase().includes(term)); - - if (matchingTerms.length > 0) { - // Much stronger boost for matches - score *= (1 + (matchingTerms.length / queryTerms.length) * 2); // Double the boost - - if (this.hasProximityMatch(result.content.text, matchingTerms)) { - score *= 1.5; // Stronger proximity boost - } - } else { - // More aggressive penalty - if (!params.conversationContext) { - score *= 0.3; // Stronger penalty + const rerankedResults = results + .map((result) => { + let score = result.similarity; + + // Check for direct query term matches + const queryTerms = this.getQueryTerms(processedQuery); + + const matchingTerms = queryTerms.filter((term) => + result.content.text.toLowerCase().includes(term) + ); + + if (matchingTerms.length > 0) { + // Much stronger boost for matches + score *= + 1 + + (matchingTerms.length / queryTerms.length) * 2; // Double the boost + + if ( + this.hasProximityMatch( + result.content.text, + matchingTerms + ) + ) { + score *= 1.5; // Stronger proximity boost + } + } else { + // More aggressive penalty + if (!params.conversationContext) { + score *= 0.3; // Stronger penalty + } } - } - return { - ...result, - score, - matchedTerms: matchingTerms // Add for debugging - }; - }).sort((a, b) => b.score - a.score); + return { + ...result, + score, + matchedTerms: matchingTerms, // Add for debugging + }; + }) + .sort((a, b) => b.score - a.score); // Filter and return results return rerankedResults - .filter(result => result.score >= this.defaultRAGMatchThreshold) + .filter( + (result) => + result.score >= this.defaultRAGMatchThreshold + ) .slice(0, params.limit || this.defaultRAGMatchCount); - - } catch(error) { + } catch (error) { console.log(`[RAG Search Error] ${error}`); return []; } @@ -205,7 +265,10 @@ export class RAGKnowledgeManager implements IRAGKnowledgeManager { try { // Process main document const processedContent = this.preprocess(item.content.text); - const mainEmbeddingArray = await embed(this.runtime, processedContent); + const mainEmbeddingArray = await embed( + this.runtime, + processedContent + ); const mainEmbedding = new Float32Array(mainEmbeddingArray); @@ -217,11 +280,11 @@ export class RAGKnowledgeManager implements IRAGKnowledgeManager { text: item.content.text, metadata: { ...item.content.metadata, - isMain: true - } + isMain: true, + }, }, embedding: mainEmbedding, - createdAt: Date.now() + createdAt: Date.now(), }); // Generate and store chunks @@ -241,11 +304,11 @@ export class RAGKnowledgeManager implements IRAGKnowledgeManager { ...item.content.metadata, isChunk: true, originalId: item.id, - chunkIndex: index - } + chunkIndex: index, + }, }, embedding: chunkEmbedding, - createdAt: Date.now() + createdAt: Date.now(), }); } } catch (error) { @@ -265,17 +328,19 @@ export class RAGKnowledgeManager implements IRAGKnowledgeManager { match_threshold = this.defaultRAGMatchThreshold, match_count = this.defaultRAGMatchCount, embedding, - searchText + searchText, } = params; - const float32Embedding = Array.isArray(embedding) ? new Float32Array(embedding) : embedding; + const float32Embedding = Array.isArray(embedding) + ? new Float32Array(embedding) + : embedding; return await this.runtime.databaseAdapter.searchKnowledge({ agentId: params.agentId || this.runtime.agentId, embedding: float32Embedding, match_threshold, match_count, - searchText + searchText, }); } @@ -284,14 +349,17 @@ export class RAGKnowledgeManager implements IRAGKnowledgeManager { } async clearKnowledge(shared?: boolean): Promise { - await this.runtime.databaseAdapter.clearKnowledge(this.runtime.agentId, shared ? shared : false); + await this.runtime.databaseAdapter.clearKnowledge( + this.runtime.agentId, + shared ? shared : false + ); } async processFile(file: { path: string; content: string; - type: 'pdf' | 'md' | 'txt'; - isShared?: boolean + type: "pdf" | "md" | "txt"; + isShared?: boolean; }): Promise { const timeMarker = (label: string) => { const time = (Date.now() - startTime) / 1000; @@ -302,18 +370,23 @@ export class RAGKnowledgeManager implements IRAGKnowledgeManager { const content = file.content; try { - const fileSizeKB = (new TextEncoder().encode(content)).length / 1024; - elizaLogger.info(`[File Progress] Starting ${file.path} (${fileSizeKB.toFixed(2)} KB)`); + const fileSizeKB = new TextEncoder().encode(content).length / 1024; + elizaLogger.info( + `[File Progress] Starting ${file.path} (${fileSizeKB.toFixed(2)} KB)` + ); // Step 1: Preprocessing //const preprocessStart = Date.now(); const processedContent = this.preprocess(content); - timeMarker('Preprocessing'); + timeMarker("Preprocessing"); // Step 2: Main document embedding - const mainEmbeddingArray = await embed(this.runtime, processedContent); + const mainEmbeddingArray = await embed( + this.runtime, + processedContent + ); const mainEmbedding = new Float32Array(mainEmbeddingArray); - timeMarker('Main embedding'); + timeMarker("Main embedding"); // Step 3: Create main document await this.runtime.databaseAdapter.createKnowledge({ @@ -324,19 +397,19 @@ export class RAGKnowledgeManager implements IRAGKnowledgeManager { metadata: { source: file.path, type: file.type, - isShared: file.isShared || false - } + isShared: file.isShared || false, + }, }, embedding: mainEmbedding, - createdAt: Date.now() + createdAt: Date.now(), }); - timeMarker('Main document storage'); + timeMarker("Main document storage"); // Step 4: Generate chunks const chunks = await splitChunks(processedContent, 512, 20); const totalChunks = chunks.length; elizaLogger.info(`Generated ${totalChunks} chunks`); - timeMarker('Chunk generation'); + timeMarker("Chunk generation"); // Step 5: Process chunks with larger batches const BATCH_SIZE = 10; // Increased batch size @@ -344,52 +417,66 @@ export class RAGKnowledgeManager implements IRAGKnowledgeManager { for (let i = 0; i < chunks.length; i += BATCH_SIZE) { const batchStart = Date.now(); - const batch = chunks.slice(i, Math.min(i + BATCH_SIZE, chunks.length)); + const batch = chunks.slice( + i, + Math.min(i + BATCH_SIZE, chunks.length) + ); // Process embeddings in parallel const embeddings = await Promise.all( - batch.map(chunk => embed(this.runtime, chunk)) + batch.map((chunk) => embed(this.runtime, chunk)) ); // Batch database operations - await Promise.all(embeddings.map(async (embeddingArray, index) => { - const chunkId = `${stringToUuid(file.path)}-chunk-${i + index}` as UUID; - const chunkEmbedding = new Float32Array(embeddingArray); - - await this.runtime.databaseAdapter.createKnowledge({ - id: chunkId, - agentId: this.runtime.agentId, - content: { - text: batch[index], - metadata: { - source: file.path, - type: file.type, - isShared: file.isShared || false, - isChunk: true, - originalId: stringToUuid(file.path), - chunkIndex: i + index - } - }, - embedding: chunkEmbedding, - createdAt: Date.now() - }); - })); + await Promise.all( + embeddings.map(async (embeddingArray, index) => { + const chunkId = + `${stringToUuid(file.path)}-chunk-${i + index}` as UUID; + const chunkEmbedding = new Float32Array(embeddingArray); + + await this.runtime.databaseAdapter.createKnowledge({ + id: chunkId, + agentId: this.runtime.agentId, + content: { + text: batch[index], + metadata: { + source: file.path, + type: file.type, + isShared: file.isShared || false, + isChunk: true, + originalId: stringToUuid(file.path), + chunkIndex: i + index, + }, + }, + embedding: chunkEmbedding, + createdAt: Date.now(), + }); + }) + ); processedChunks += batch.length; const batchTime = (Date.now() - batchStart) / 1000; - elizaLogger.info(`[Batch Progress] Processed ${processedChunks}/${totalChunks} chunks (${batchTime.toFixed(2)}s for batch)`); + elizaLogger.info( + `[Batch Progress] Processed ${processedChunks}/${totalChunks} chunks (${batchTime.toFixed(2)}s for batch)` + ); } const totalTime = (Date.now() - startTime) / 1000; - elizaLogger.info(`[Complete] Processed ${file.path} in ${totalTime.toFixed(2)}s`); - + elizaLogger.info( + `[Complete] Processed ${file.path} in ${totalTime.toFixed(2)}s` + ); } catch (error) { - if (file.isShared && error?.code === 'SQLITE_CONSTRAINT_PRIMARYKEY') { - elizaLogger.info(`Shared knowledge ${file.path} already exists in database, skipping creation`); + if ( + file.isShared && + error?.code === "SQLITE_CONSTRAINT_PRIMARYKEY" + ) { + elizaLogger.info( + `Shared knowledge ${file.path} already exists in database, skipping creation` + ); return; } elizaLogger.error(`Error processing file ${file.path}:`, error); throw error; } } -} \ No newline at end of file +} diff --git a/packages/core/src/runtime.ts b/packages/core/src/runtime.ts index bb33b78796..646dc2b0ed 100644 --- a/packages/core/src/runtime.ts +++ b/packages/core/src/runtime.ts @@ -1,3 +1,5 @@ +import { readFile } from "fs/promises"; +import { join } from "path"; import { names, uniqueNamesGenerator } from "unique-names-generator"; import { v4 as uuidv4 } from "uuid"; import { @@ -17,12 +19,12 @@ import { generateText } from "./generation.ts"; import { formatGoalsAsString, getGoals } from "./goals.ts"; import { elizaLogger } from "./index.ts"; import knowledge from "./knowledge.ts"; -import { RAGKnowledgeManager } from "./ragknowledge.ts"; import { MemoryManager } from "./memory.ts"; import { formatActors, formatMessages, getActorDetails } from "./messages.ts"; import { parseJsonArrayFromText } from "./parsing.ts"; import { formatPosts } from "./posts.ts"; import { getProviders } from "./providers.ts"; +import { RAGKnowledgeManager } from "./ragknowledge.ts"; import settings from "./settings.ts"; import { Character, @@ -31,8 +33,9 @@ import { IAgentRuntime, ICacheManager, IDatabaseAdapter, - IRAGKnowledgeManager, IMemoryManager, + IRAGKnowledgeManager, + IVerifiableInferenceAdapter, KnowledgeItem, //RAGKnowledgeItem, //Media, @@ -48,11 +51,8 @@ import { type Actor, type Evaluator, type Memory, - IVerifiableInferenceAdapter, } from "./types.ts"; import { stringToUuid } from "./uuid.ts"; -import { readFile } from 'fs/promises'; -import { join } from 'path'; /** * Represents the runtime environment for an agent, handling message processing, @@ -308,7 +308,7 @@ export class AgentRuntime implements IAgentRuntime { this.ragKnowledgeManager = new RAGKnowledgeManager({ runtime: this, - tableName: 'knowledge' + tableName: "knowledge", }); (opts.managers ?? []).forEach((manager: IMemoryManager) => { @@ -438,11 +438,13 @@ export class AgentRuntime implements IAgentRuntime { this.character.knowledge && this.character.knowledge.length > 0 ) { - if(this.character.settings.ragKnowledge) { - await this.processCharacterRAGKnowledge(this.character.knowledge); + if (this.character.settings.ragKnowledge) { + await this.processCharacterRAGKnowledge( + this.character.knowledge + ); } else { - const stringKnowledge = this.character.knowledge.filter((item): item is string => - typeof item === 'string' + const stringKnowledge = this.character.knowledge.filter( + (item): item is string => typeof item === "string" ); await this.processCharacterKnowledge(stringKnowledge); @@ -511,19 +513,21 @@ export class AgentRuntime implements IAgentRuntime { * then chunks the content into fragments, embeds each fragment, and creates fragment knowledge. * An array of knowledge items or objects containing id, path, and content. */ - private async processCharacterRAGKnowledge(items: (string | { path: string; shared?: boolean })[]) { + private async processCharacterRAGKnowledge( + items: (string | { path: string; shared?: boolean })[] + ) { let hasError = false; for (const item of items) { if (!item) continue; try { - // Check if item is marked as shared + // Check if item is marked as shared let isShared = false; let contentItem = item; // Only treat as shared if explicitly marked - if (typeof item === 'object' && 'path' in item) { + if (typeof item === "object" && "path" in item) { isShared = item.shared === true; contentItem = item.path; } else { @@ -531,22 +535,40 @@ export class AgentRuntime implements IAgentRuntime { } const knowledgeId = stringToUuid(contentItem); - const fileExtension = contentItem.split('.').pop()?.toLowerCase(); + const fileExtension = contentItem + .split(".") + .pop() + ?.toLowerCase(); // Check if it's a file or direct knowledge - if (fileExtension && ['md', 'txt', 'pdf'].includes(fileExtension)) { + if ( + fileExtension && + ["md", "txt", "pdf"].includes(fileExtension) + ) { try { - const rootPath = join(process.cwd(), '..'); - const filePath = join(rootPath, 'characters', 'knowledge', contentItem); - elizaLogger.info("Attempting to read file from:", filePath); + const rootPath = join(process.cwd(), ".."); + const filePath = join( + rootPath, + "characters", + "knowledge", + contentItem + ); + elizaLogger.info( + "Attempting to read file from:", + filePath + ); // Get existing knowledge first - const existingKnowledge = await this.ragKnowledgeManager.getKnowledge({ - id: knowledgeId, - agentId: this.agentId - }); + const existingKnowledge = + await this.ragKnowledgeManager.getKnowledge({ + id: knowledgeId, + agentId: this.agentId, + }); - const content: string = await readFile(filePath, 'utf8'); + const content: string = await readFile( + filePath, + "utf8" + ); if (!content) { hasError = true; continue; @@ -554,15 +576,23 @@ export class AgentRuntime implements IAgentRuntime { // If the file exists in DB, check if content has changed if (existingKnowledge.length > 0) { - const existingContent = existingKnowledge[0].content.text; + const existingContent = + existingKnowledge[0].content.text; if (existingContent === content) { - elizaLogger.info(`File ${contentItem} unchanged, skipping`); + elizaLogger.info( + `File ${contentItem} unchanged, skipping` + ); continue; } else { // If content changed, remove old knowledge before adding new - await this.ragKnowledgeManager.removeKnowledge(knowledgeId); - // Also remove any associated chunks - await this.ragKnowledgeManager.removeKnowledge(`${knowledgeId}-chunk-*` as UUID); + await this.ragKnowledgeManager.removeKnowledge( + knowledgeId + ); + // Also remove any associated chunks - this is needed for non-PostgreSQL adapters + // PostgreSQL adapter handles chunks internally via foreign keys + await this.ragKnowledgeManager.removeKnowledge( + `${knowledgeId}-chunk-*` as UUID + ); } } @@ -576,15 +606,14 @@ export class AgentRuntime implements IAgentRuntime { await this.ragKnowledgeManager.processFile({ path: contentItem, content: content, - type: fileExtension as 'pdf' | 'md' | 'txt', - isShared: isShared + type: fileExtension as "pdf" | "md" | "txt", + isShared: isShared, }); - } catch (error: any) { hasError = true; elizaLogger.error( `Failed to read knowledge file ${contentItem}. Error details:`, - error?.message || error || 'Unknown error' + error?.message || error || "Unknown error" ); continue; // Continue to next item even if this one fails } @@ -597,13 +626,16 @@ export class AgentRuntime implements IAgentRuntime { contentItem.slice(0, 100) ); - const existingKnowledge = await this.ragKnowledgeManager.getKnowledge({ - id: knowledgeId, - agentId: this.agentId - }); + const existingKnowledge = + await this.ragKnowledgeManager.getKnowledge({ + id: knowledgeId, + agentId: this.agentId, + }); if (existingKnowledge.length > 0) { - elizaLogger.info(`Direct knowledge ${knowledgeId} already exists, skipping`); + elizaLogger.info( + `Direct knowledge ${knowledgeId} already exists, skipping` + ); continue; } @@ -613,23 +645,25 @@ export class AgentRuntime implements IAgentRuntime { content: { text: contentItem, metadata: { - type: 'direct' - } - } + type: "direct", + }, + }, }); } } catch (error: any) { hasError = true; elizaLogger.error( `Error processing knowledge item ${item}:`, - error?.message || error || 'Unknown error' + error?.message || error || "Unknown error" ); continue; // Continue to next item even if this one fails } } if (hasError) { - elizaLogger.warn('Some knowledge items failed to process, but continuing with available knowledge'); + elizaLogger.warn( + "Some knowledge items failed to process, but continuing with available knowledge" + ); } } @@ -1100,10 +1134,10 @@ Text: ${attachment.text} // Check the existing memories in the database return this.messageManager.getMemoriesByRoomIds({ - // filter out the current room id from rooms - roomIds: rooms.filter((room) => room !== roomId), - limit: 20 - }); + // filter out the current room id from rooms + roomIds: rooms.filter((room) => room !== roomId), + limit: 20, + }); }; const recentInteractions = @@ -1167,18 +1201,18 @@ Text: ${attachment.text} } let knowledgeData = []; - let formattedKnowledge = ''; + let formattedKnowledge = ""; - if(this.character.settings?.ragKnowledge) { + if (this.character.settings?.ragKnowledge) { const recentContext = recentMessagesData - .slice(-3) // Last 3 messages - .map(msg => msg.content.text) - .join(' '); + .slice(-3) // Last 3 messages + .map((msg) => msg.content.text) + .join(" "); knowledgeData = await this.ragKnowledgeManager.getKnowledge({ query: message.content.text, conversationContext: recentContext, - limit: 5 + limit: 5, }); formattedKnowledge = formatKnowledge(knowledgeData);