From de3d26fb158e5ff9d6b56463dfa91a7c1c620473 Mon Sep 17 00:00:00 2001 From: Willie Zutz Date: Sat, 28 Jun 2025 14:48:08 -0600 Subject: [PATCH] feat(agent): Refactor agent architecture to enhance content routing and file search capabilities - Introduced ContentRouterAgent to determine the next step in information gathering (file search, web search, or analysis) based on task relevance and focus mode. - Added FileSearchAgent to handle searching through attached files, processing file content into searchable documents. - Updated SynthesizerAgent to utilize a prompt template for generating comprehensive responses based on context and user queries. - Enhanced TaskManagerAgent to consider file context when creating tasks. - Improved AnalyzerAgent to assess the sufficiency of context, including file and web documents. - Implemented utility functions for processing files and ranking documents based on similarity to queries. - Updated prompts to include new instructions for handling file context and routing decisions. - Adjusted agent search workflow to integrate new agents and support file handling. --- .github/copilot-instructions.md | 84 +++++-- src/app/api/chat/route.ts | 1 + src/app/api/search/route.ts | 1 + src/app/api/uploads/route.ts | 125 +++++++++- src/components/MessageInputActions/Attach.tsx | 8 + .../MessageInputActions/AttachSmall.tsx | 8 + src/lib/agents/agentState.ts | 8 + src/lib/agents/contentRouterAgent.ts | 222 +++++++++++++++++ src/lib/agents/fileSearchAgent.ts | 226 ++++++++++++++++++ src/lib/agents/index.ts | 2 + src/lib/agents/synthesizerAgent.ts | 77 ++---- src/lib/agents/taskManagerAgent.ts | 13 +- src/lib/prompts/analyzer.ts | 26 ++ src/lib/prompts/contentRouter.ts | 86 +++++++ src/lib/prompts/index.ts | 2 + src/lib/prompts/synthesizer.ts | 48 ++++ src/lib/prompts/taskBreakdown.ts | 35 ++- src/lib/search/agentSearch.ts | 46 +++- src/lib/search/metaSearchAgent.ts | 9 +- src/lib/utils/fileProcessing.ts | 113 +++++++++ 20 files changed, 1044 insertions(+), 96 deletions(-) create mode 100644 src/lib/agents/contentRouterAgent.ts create mode 100644 src/lib/agents/fileSearchAgent.ts create mode 100644 src/lib/prompts/contentRouter.ts create mode 100644 src/lib/prompts/synthesizer.ts create mode 100644 src/lib/utils/fileProcessing.ts diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 7348f58..3875861 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -12,14 +12,39 @@ The system works through these main steps: - Results are ranked using embedding-based similarity search - LLMs are used to generate a comprehensive response with cited sources -## Key Technologies +## Architecture Details +### Technology Stack - **Frontend**: React, Next.js, Tailwind CSS - **Backend**: Node.js - **Database**: SQLite with Drizzle ORM -- **AI/ML**: LangChain for orchestration, various LLM providers including OpenAI, Anthropic, Groq, Ollama (local models) +- **AI/ML**: LangChain + LangGraph for orchestration - **Search**: SearXNG integration -- **Embedding Models**: For re-ranking search results +- **Content Processing**: Mozilla Readability, Cheerio, Playwright + +### Database (SQLite + Drizzle ORM) +- Schema: `src/lib/db/schema.ts` +- Tables: `messages`, `chats`, `systemPrompts` +- Configuration: `drizzle.config.ts` +- Local file: `data/db.sqlite` + +### AI/ML Stack +- **LLM Providers**: OpenAI, Anthropic, Groq, Ollama, Gemini, DeepSeek, LM Studio +- **Embeddings**: Xenova Transformers, similarity search (cosine/dot product) +- **Agents**: `webSearchAgent`, `analyzerAgent`, `synthesizerAgent`, `taskManagerAgent` + +### External Services +- **Search Engine**: SearXNG integration (`src/lib/searxng.ts`) +- **Configuration**: TOML-based config file + +### Data Flow +1. User query → Task Manager Agent +2. Web Search Agent → SearXNG → Content extraction +3. Analyzer Agent → Content processing + embedding +4. Synthesizer Agent → LLM response generation +5. Response with cited sources + + ## Project Structure @@ -47,13 +72,14 @@ Perplexica supports multiple specialized search modes: - Wolfram Alpha Search Mode: For calculations and data analysis - Reddit Search Mode: For community discussions -## Development Workflow +## Core Commands -- Use `npm run dev` for local development -- Format code with `npm run format:write` before committing -- Database migrations: `npm run db:push` -- Build for production: `npm run build` -- Start production server: `npm run start` +- **Development**: `npm run dev` (uses Turbopack for faster builds) +- **Build**: `npm run build` (includes automatic DB push) +- **Production**: `npm run start` +- **Linting**: `npm run lint` (Next.js ESLint) +- **Formatting**: `npm run format:write` (Prettier) +- **Database**: `npm run db:push` (Drizzle migrations) ## Configuration @@ -77,12 +103,36 @@ When working on this codebase, you might need to: - Build new chains in `/src/lib/chains` - Implement new LangGraph agents in `/src/lib/agents` -## AI Behavior +## AI Behavior Guidelines -- Avoid conciliatory language -- It is not necessary to apologize -- If you don't know the answer, ask for clarification -- Do not add additional packages or dependencies unless explicitly requested -- Only make changes to the code that are relevant to the task at hand -- Do not create new files to test changes -- Do not run the application unless asked +- Focus on factual, technical responses without unnecessary pleasantries +- Avoid conciliatory language and apologies +- Ask for clarification when requirements are unclear +- Do not add dependencies unless explicitly requested +- Only make changes relevant to the specific task +- Do not create test files or run the application unless requested +- Prioritize existing patterns and architectural decisions +- Use the established component structure and styling patterns + +## Code Style & Standards + +### TypeScript Configuration +- Strict mode enabled +- ES2017 target +- Path aliases: `@/*` → `src/*` +- No test files (testing not implemented) + +### Formatting & Linting +- ESLint: Next.js core web vitals rules +- Prettier: Use `npm run format:write` before commits +- Import style: Use `@/` prefix for internal imports + +### File Organization +- Components: React functional components with TypeScript +- API routes: Next.js App Router (`src/app/api/`) +- Utilities: Grouped by domain (`src/lib/`) +- Naming: camelCase for functions/variables, PascalCase for components + +### Error Handling +- Use try/catch blocks for async operations +- Return structured error responses from API routes diff --git a/src/app/api/chat/route.ts b/src/app/api/chat/route.ts index 3a17bcf..1c1125c 100644 --- a/src/app/api/chat/route.ts +++ b/src/app/api/chat/route.ts @@ -440,6 +440,7 @@ export const POST = async (req: Request) => { systemInstructionsContent, abortController.signal, personaInstructionsContent, + body.focusMode, ); handleEmitterEvents( diff --git a/src/app/api/search/route.ts b/src/app/api/search/route.ts index c76018f..a5a6c1b 100644 --- a/src/app/api/search/route.ts +++ b/src/app/api/search/route.ts @@ -142,6 +142,7 @@ export const POST = async (req: Request) => { promptData.systemInstructions, signal, promptData.personaInstructions, + body.focusMode, ); if (!body.stream) { diff --git a/src/app/api/uploads/route.ts b/src/app/api/uploads/route.ts index 9fbaf2d..410eafd 100644 --- a/src/app/api/uploads/route.ts +++ b/src/app/api/uploads/route.ts @@ -2,11 +2,20 @@ import { NextResponse } from 'next/server'; import fs from 'fs'; import path from 'path'; import crypto from 'crypto'; -import { getAvailableEmbeddingModelProviders } from '@/lib/providers'; +import { getAvailableEmbeddingModelProviders, getAvailableChatModelProviders } from '@/lib/providers'; +import { + getCustomOpenaiApiKey, + getCustomOpenaiApiUrl, + getCustomOpenaiModelName, +} from '@/lib/config'; import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf'; import { DocxLoader } from '@langchain/community/document_loaders/fs/docx'; import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; import { Document } from 'langchain/document'; +import { BaseChatModel } from '@langchain/core/language_models/chat_models'; +import { ChatOpenAI } from '@langchain/openai'; +import { ChatOllama } from '@langchain/ollama'; +import { z } from 'zod'; interface FileRes { fileName: string; @@ -25,6 +34,52 @@ const splitter = new RecursiveCharacterTextSplitter({ chunkOverlap: 100, }); +// Define Zod schema for structured topic generation output +const TopicsSchema = z.object({ + topics: z + .array(z.string()) + .min(1) + .max(3) + .describe('Array of 1-3 concise, descriptive topics that capture the main subject matter'), +}); + +type TopicsOutput = z.infer; + +/** + * Generate semantic topics for a document using LLM with structured output + */ +async function generateFileTopics( + content: string, + filename: string, + llm: BaseChatModel +): Promise { + try { + // Take first 1500 characters for topic generation to avoid token limits + const excerpt = content.substring(0, 1500); + + const prompt = `Analyze the following document excerpt and generate 1-5 concise, descriptive topics that capture the main subject matter. The topics should be useful for determining if this document is relevant to answer questions. + +Document filename: ${filename} +Document excerpt: +${excerpt} + +Generate topics that describe what this document is about, its domain, and key subject areas. Focus on topics that would help determine relevance for search queries.`; + + // Use structured output for reliable topic extraction + const structuredLlm = llm.withStructuredOutput(TopicsSchema, { + name: 'generate_topics', + }); + + const result = await structuredLlm.invoke(prompt); + console.log('Generated topics:', result.topics); + // Filename is included for context + return filename + ', ' + result.topics.join(', '); + } catch (error) { + console.warn('Error generating topics with LLM:', error); + return `Document: ${filename}`; + } +} + export async function POST(req: Request) { try { const formData = await req.formData(); @@ -32,6 +87,9 @@ export async function POST(req: Request) { const files = formData.getAll('files') as File[]; const embedding_model = formData.get('embedding_model'); const embedding_model_provider = formData.get('embedding_model_provider'); + const chat_model = formData.get('chat_model'); + const chat_model_provider = formData.get('chat_model_provider'); + const ollama_context_window = formData.get('ollama_context_window'); if (!embedding_model || !embedding_model_provider) { return NextResponse.json( @@ -40,21 +98,65 @@ export async function POST(req: Request) { ); } - const embeddingModels = await getAvailableEmbeddingModelProviders(); - const provider = - embedding_model_provider ?? Object.keys(embeddingModels)[0]; - const embeddingModel = - embedding_model ?? Object.keys(embeddingModels[provider as string])[0]; + // Get available providers + const [chatModelProviders, embeddingModelProviders] = await Promise.all([ + getAvailableChatModelProviders(), + getAvailableEmbeddingModelProviders(), + ]); - let embeddingsModel = - embeddingModels[provider as string]?.[embeddingModel as string]?.model; - if (!embeddingsModel) { + // Setup embedding model + const embeddingProvider = + embeddingModelProviders[ + embedding_model_provider as string ?? Object.keys(embeddingModelProviders)[0] + ]; + const embeddingModelConfig = + embeddingProvider[ + embedding_model as string ?? Object.keys(embeddingProvider)[0] + ]; + + if (!embeddingModelConfig) { return NextResponse.json( { message: 'Invalid embedding model selected' }, { status: 400 }, ); } + let embeddingsModel = embeddingModelConfig.model; + + // Setup chat model for topic generation (similar to chat route) + const chatModelProvider = + chatModelProviders[ + chat_model_provider as string ?? Object.keys(chatModelProviders)[0] + ]; + const chatModelConfig = + chatModelProvider[ + chat_model as string ?? Object.keys(chatModelProvider)[0] + ]; + + let llm: BaseChatModel; + + // Handle chat model creation like in chat route + if (chat_model_provider === 'custom_openai') { + llm = new ChatOpenAI({ + openAIApiKey: getCustomOpenaiApiKey(), + modelName: getCustomOpenaiModelName(), + temperature: 0.1, + configuration: { + baseURL: getCustomOpenaiApiUrl(), + }, + }) as unknown as BaseChatModel; + } else if (chatModelProvider && chatModelConfig) { + llm = chatModelConfig.model; + + // Set context window size for Ollama models + if (llm instanceof ChatOllama && chat_model_provider === 'ollama') { + // Use provided context window or default to 2048 + const contextWindow = ollama_context_window ? + parseInt(ollama_context_window as string, 10) : 2048; + llm.numCtx = contextWindow; + } + } + const processedFiles: FileRes[] = []; await Promise.all( @@ -89,11 +191,16 @@ export async function POST(req: Request) { const splitted = await splitter.splitDocuments(docs); + // Generate semantic topics using LLM + const fullContent = docs.map(doc => doc.pageContent).join('\n'); + const semanticTopics = await generateFileTopics(fullContent, file.name, llm); + const extractedDataPath = filePath.replace(/\.\w+$/, '-extracted.json'); fs.writeFileSync( extractedDataPath, JSON.stringify({ title: file.name, + topics: semanticTopics, contents: splitted.map((doc) => doc.pageContent), }), ); diff --git a/src/components/MessageInputActions/Attach.tsx b/src/components/MessageInputActions/Attach.tsx index 6c21f1d..4fb9b4d 100644 --- a/src/components/MessageInputActions/Attach.tsx +++ b/src/components/MessageInputActions/Attach.tsx @@ -35,9 +35,17 @@ const Attach = ({ 'embeddingModelProvider', ); const embeddingModel = localStorage.getItem('embeddingModel'); + const chatModelProvider = localStorage.getItem('chatModelProvider'); + const chatModel = localStorage.getItem('chatModel'); + const ollamaContextWindow = localStorage.getItem('ollamaContextWindow') || '2048'; data.append('embedding_model_provider', embeddingModelProvider!); data.append('embedding_model', embeddingModel!); + data.append('chat_model_provider', chatModelProvider!); + data.append('chat_model', chatModel!); + if (chatModelProvider === 'ollama') { + data.append('ollama_context_window', ollamaContextWindow); + } const res = await fetch(`/api/uploads`, { method: 'POST', diff --git a/src/components/MessageInputActions/AttachSmall.tsx b/src/components/MessageInputActions/AttachSmall.tsx index 044b652..0f7f2b9 100644 --- a/src/components/MessageInputActions/AttachSmall.tsx +++ b/src/components/MessageInputActions/AttachSmall.tsx @@ -35,9 +35,17 @@ const AttachSmall = ({ 'embeddingModelProvider', ); const embeddingModel = localStorage.getItem('embeddingModel'); + const chatModelProvider = localStorage.getItem('chatModelProvider'); + const chatModel = localStorage.getItem('chatModel'); + const ollamaContextWindow = localStorage.getItem('ollamaContextWindow') || '2048'; data.append('embedding_model_provider', embeddingModelProvider!); data.append('embedding_model', embeddingModel!); + data.append('chat_model_provider', chatModelProvider!); + data.append('chat_model', chatModel!); + if (chatModelProvider === 'ollama') { + data.append('ollama_context_window', ollamaContextWindow); + } const res = await fetch(`/api/uploads`, { method: 'POST', diff --git a/src/lib/agents/agentState.ts b/src/lib/agents/agentState.ts index 595a318..c8f89ed 100644 --- a/src/lib/agents/agentState.ts +++ b/src/lib/agents/agentState.ts @@ -58,4 +58,12 @@ export const AgentState = Annotation.Root({ reducer: (x, y) => y ?? x, default: () => '', }), + fileIds: Annotation({ + reducer: (x, y) => y ?? x, + default: () => [], + }), + focusMode: Annotation({ + reducer: (x, y) => y ?? x, + default: () => 'webSearch', + }), }); diff --git a/src/lib/agents/contentRouterAgent.ts b/src/lib/agents/contentRouterAgent.ts new file mode 100644 index 0000000..420a9f0 --- /dev/null +++ b/src/lib/agents/contentRouterAgent.ts @@ -0,0 +1,222 @@ +import { BaseChatModel } from '@langchain/core/language_models/chat_models'; +import { AIMessage } from '@langchain/core/messages'; +import { PromptTemplate } from '@langchain/core/prompts'; +import { Command, END } from '@langchain/langgraph'; +import { EventEmitter } from 'events'; +import { z } from 'zod'; +import fs from 'node:fs'; +import path from 'node:path'; +import { AgentState } from './agentState'; +import { contentRouterPrompt } from '../prompts/contentRouter'; +import { removeThinkingBlocksFromMessages } from '../utils/contentUtils'; + +// Define Zod schema for structured router decision output +const RouterDecisionSchema = z.object({ + decision: z + .enum(['file_search', 'web_search', 'analyzer']) + .describe('The next step to take in the workflow'), + reasoning: z + .string() + .describe('Explanation of why this decision was made'), +}); + +type RouterDecision = z.infer; + +export class ContentRouterAgent { + private llm: BaseChatModel; + private emitter: EventEmitter; + private systemInstructions: string; + private signal: AbortSignal; + + constructor( + llm: BaseChatModel, + emitter: EventEmitter, + systemInstructions: string, + signal: AbortSignal, + ) { + this.llm = llm; + this.emitter = emitter; + this.systemInstructions = systemInstructions; + this.signal = signal; + } + + /** + * Content router agent node + */ + async execute(state: typeof AgentState.State): Promise { + try { + // Determine current task to process + const currentTask = + state.tasks && state.tasks.length > 0 + ? state.tasks[state.currentTaskIndex || 0] + : state.query; + + console.log( + `Content router processing task ${(state.currentTaskIndex || 0) + 1} of ${state.tasks?.length || 1}: "${currentTask}"`, + ); + + // Extract focus mode from state - this should now come from the API + const focusMode = state.focusMode || 'webSearch'; + + const hasFiles = state.fileIds && state.fileIds.length > 0; + const documentCount = state.relevantDocuments.length; + const searchHistory = state.searchInstructionHistory.join(', ') || 'None'; + + // Extract file topics if files are available + const fileTopics = hasFiles ? await this.extractFileTopics(state.fileIds!) : 'None'; + + // Emit routing decision event + this.emitter.emit('agent_action', { + type: 'agent_action', + data: { + action: 'ROUTING_DECISION', + message: `Determining optimal information source for current task`, + details: { + query: state.query, + currentTask: currentTask, + taskIndex: (state.currentTaskIndex || 0) + 1, + totalTasks: state.tasks?.length || 1, + focusMode: focusMode, + hasFiles: hasFiles, + fileCount: state.fileIds?.length || 0, + documentCount: documentCount, + searchIterations: state.searchInstructionHistory.length, + }, + }, + }); + + const template = PromptTemplate.fromTemplate(contentRouterPrompt); + const prompt = await template.format({ + currentTask: currentTask, + query: state.originalQuery || state.query, + focusMode: focusMode, + hasFiles: hasFiles, + fileTopics: fileTopics, + documentCount: documentCount, + searchHistory: searchHistory, + }); + + // Use structured output for routing decision + const structuredLlm = this.llm.withStructuredOutput(RouterDecisionSchema, { + name: 'route_content', + }); + + const routerDecision = await structuredLlm.invoke( + [...removeThinkingBlocksFromMessages(state.messages), prompt], + { signal: this.signal }, + ); + + console.log(`Router decision: ${routerDecision.decision}`); + console.log(`Router reasoning: ${routerDecision.reasoning}`); + console.log(`File topics: ${fileTopics}`); + console.log(`Focus mode: ${focusMode}`); + + // Validate decision based on focus mode restrictions + const validatedDecision = this.validateDecision(routerDecision, focusMode, hasFiles); + + // Emit routing result event + this.emitter.emit('agent_action', { + type: 'agent_action', + data: { + action: 'ROUTING_RESULT', + message: `Routing to ${validatedDecision.decision}: ${validatedDecision.reasoning}`, + details: { + query: state.query, + currentTask: currentTask, + taskIndex: (state.currentTaskIndex || 0) + 1, + totalTasks: state.tasks?.length || 1, + decision: validatedDecision.decision, + focusMode: focusMode, + hasFiles: hasFiles, + documentCount: documentCount, + searchIterations: state.searchInstructionHistory.length, + }, + }, + }); + + const responseMessage = `Content routing completed. Next step: ${validatedDecision.decision}`; + console.log(responseMessage); + + return new Command({ + goto: validatedDecision.decision, + update: { + messages: [new AIMessage(responseMessage)], + }, + }); + } catch (error) { + console.error('Content router error:', error); + const errorMessage = new AIMessage( + `Content routing failed: ${error instanceof Error ? error.message : 'Unknown error'}`, + ); + + return new Command({ + goto: END, + update: { + messages: [errorMessage], + }, + }); + } + } + + /** + * Extract semantic topics from attached files for relevance assessment + */ + private async extractFileTopics(fileIds: string[]): Promise { + try { + const topics = fileIds.map(fileId => { + try { + const filePath = path.join(process.cwd(), 'uploads', fileId); + const contentPath = filePath + '-extracted.json'; + + if (fs.existsSync(contentPath)) { + const content = JSON.parse(fs.readFileSync(contentPath, 'utf8')); + const filename = content.title || 'Document'; + + // Use LLM-generated semantic topics if available, otherwise fall back to filename + const semanticTopics = content.topics; + return semanticTopics || filename; + } + return 'Unknown Document'; + } catch (error) { + console.warn(`Error extracting topic for file ${fileId}:`, error); + return 'Unknown Document'; + } + }); + + return topics.join('; '); + } catch (error) { + console.warn('Error extracting file topics:', error); + return 'Unable to determine file topics'; + } + } + + /** + * Validate and potentially override the router decision based on focus mode restrictions + */ + private validateDecision( + decision: RouterDecision, + focusMode: string, + hasFiles: boolean, + ): RouterDecision { + // Enforce focus mode restrictions for chat and localResearch modes + if ((focusMode === 'chat' || focusMode === 'localResearch') && + decision.decision === 'web_search') { + + // Override to file_search if files are available, otherwise analyzer + const fallbackDecision = hasFiles ? 'file_search' : 'analyzer'; + + console.log( + `Overriding web_search decision to ${fallbackDecision} due to focus mode restriction: ${focusMode}` + ); + + return { + decision: fallbackDecision as 'file_search' | 'analyzer', + reasoning: `Overridden to ${fallbackDecision} - web search not allowed in ${focusMode} mode. ${decision.reasoning}`, + }; + } + + // For webSearch mode, trust the LLM's decision about file relevance + // No overrides needed - the enhanced prompt handles file relevance assessment + return decision; + } +} diff --git a/src/lib/agents/fileSearchAgent.ts b/src/lib/agents/fileSearchAgent.ts new file mode 100644 index 0000000..d928259 --- /dev/null +++ b/src/lib/agents/fileSearchAgent.ts @@ -0,0 +1,226 @@ +import { BaseChatModel } from '@langchain/core/language_models/chat_models'; +import { AIMessage } from '@langchain/core/messages'; +import { Command, END } from '@langchain/langgraph'; +import { EventEmitter } from 'events'; +import { Document } from 'langchain/document'; +import { AgentState } from './agentState'; +import { Embeddings } from '@langchain/core/embeddings'; +import { processFilesToDocuments, getRankedDocs } from '../utils/fileProcessing'; + +export class FileSearchAgent { + private llm: BaseChatModel; + private emitter: EventEmitter; + private systemInstructions: string; + private signal: AbortSignal; + private embeddings: Embeddings; + + constructor( + llm: BaseChatModel, + emitter: EventEmitter, + systemInstructions: string, + signal: AbortSignal, + embeddings: Embeddings, + ) { + this.llm = llm; + this.emitter = emitter; + this.systemInstructions = systemInstructions; + this.signal = signal; + this.embeddings = embeddings; + } + + /** + * File search agent node + */ + async execute(state: typeof AgentState.State): Promise { + try { + // Determine current task to process + const currentTask = + state.tasks && state.tasks.length > 0 + ? state.tasks[state.currentTaskIndex || 0] + : state.query; + + console.log( + `Processing file search for task ${(state.currentTaskIndex || 0) + 1} of ${state.tasks?.length || 1}: "${currentTask}"`, + ); + + // Check if we have file IDs to process + if (!state.fileIds || state.fileIds.length === 0) { + console.log('No files attached for search'); + return new Command({ + goto: 'analyzer', + update: { + messages: [new AIMessage('No files attached to search.')], + }, + }); + } + + // Emit consulting attached files event + this.emitter.emit('agent_action', { + type: 'agent_action', + data: { + action: 'CONSULTING_ATTACHED_FILES', + message: `Consulting attached files...`, + details: { + query: state.query, + currentTask: currentTask, + taskIndex: (state.currentTaskIndex || 0) + 1, + totalTasks: state.tasks?.length || 1, + fileCount: state.fileIds.length, + documentCount: state.relevantDocuments.length, + }, + }, + }); + + // Process files to documents + const fileDocuments = await processFilesToDocuments(state.fileIds); + + if (fileDocuments.length === 0) { + console.log('No processable file content found'); + return new Command({ + goto: 'analyzer', + update: { + messages: [new AIMessage('No searchable content found in attached files.')], + }, + }); + } + + console.log(`Processed ${fileDocuments.length} file documents for search`); + + // Emit searching file content event + this.emitter.emit('agent_action', { + type: 'agent_action', + data: { + action: 'SEARCHING_FILE_CONTENT', + message: `Searching through ${fileDocuments.length} file sections for relevant information`, + details: { + query: state.query, + currentTask: currentTask, + taskIndex: (state.currentTaskIndex || 0) + 1, + totalTasks: state.tasks?.length || 1, + fileDocumentCount: fileDocuments.length, + documentCount: state.relevantDocuments.length, + }, + }, + }); + + // Generate query embedding for similarity search + const queryEmbedding = await this.embeddings.embedQuery( + state.originalQuery + ' ' + currentTask, + ); + + // Perform similarity search over file documents + const rankedDocuments = getRankedDocs( + queryEmbedding, + fileDocuments, + 12, // maxDocs + 0.3, // similarity threshold + ); + + console.log(`Found ${rankedDocuments.length} relevant file sections`); + + if (rankedDocuments.length === 0) { + // Emit no relevant content event + this.emitter.emit('agent_action', { + type: 'agent_action', + data: { + action: 'NO_RELEVANT_FILE_CONTENT', + message: `No relevant content found in attached files for the current task`, + details: { + query: state.query, + currentTask: currentTask, + taskIndex: (state.currentTaskIndex || 0) + 1, + totalTasks: state.tasks?.length || 1, + searchedDocuments: fileDocuments.length, + documentCount: state.relevantDocuments.length, + }, + }, + }); + + return new Command({ + goto: 'analyzer', + update: { + messages: [new AIMessage('No relevant content found in attached files for the current task.')], + }, + }); + } + + // Emit file content found event + this.emitter.emit('agent_action', { + type: 'agent_action', + data: { + action: 'FILE_CONTENT_FOUND', + message: `Found ${rankedDocuments.length} relevant sections in attached files`, + details: { + query: state.query, + currentTask: currentTask, + taskIndex: (state.currentTaskIndex || 0) + 1, + totalTasks: state.tasks?.length || 1, + relevantSections: rankedDocuments.length, + searchedDocuments: fileDocuments.length, + documentCount: state.relevantDocuments.length + rankedDocuments.length, + }, + }, + }); + + const responseMessage = `File search completed. Found ${rankedDocuments.length} relevant sections in attached files.`; + console.log(responseMessage); + + return new Command({ + goto: 'analyzer', // Route back to analyzer to process the results + update: { + messages: [new AIMessage(responseMessage)], + relevantDocuments: rankedDocuments, + }, + }); + } catch (error) { + console.error('File search error:', error); + const errorMessage = new AIMessage( + `File search failed: ${error instanceof Error ? error.message : 'Unknown error'}`, + ); + + return new Command({ + goto: END, + update: { + messages: [errorMessage], + }, + }); + } + } + + /** + * Perform a similarity search over file documents + * @param state The current agent state + * @returns Ranked documents relevant to the current task + */ + async search(state: typeof AgentState.State): Promise { + if (!state.fileIds || state.fileIds.length === 0) { + return []; + } + + // Process files to documents + const fileDocuments = await processFilesToDocuments(state.fileIds); + + if (fileDocuments.length === 0) { + return []; + } + + // Determine current task to search for + const currentTask = + state.tasks && state.tasks.length > 0 + ? state.tasks[state.currentTaskIndex || 0] + : state.query; + + // Generate query embedding for similarity search + const queryEmbedding = await this.embeddings.embedQuery( + state.originalQuery + ' ' + currentTask, + ); + + // Perform similarity search and return ranked documents + return getRankedDocs( + queryEmbedding, + fileDocuments, + 8, // maxDocs + 0.3, // similarity threshold + ); + } +} diff --git a/src/lib/agents/index.ts b/src/lib/agents/index.ts index f6e5661..5bc5cdb 100644 --- a/src/lib/agents/index.ts +++ b/src/lib/agents/index.ts @@ -3,3 +3,5 @@ export { WebSearchAgent } from './webSearchAgent'; export { AnalyzerAgent } from './analyzerAgent'; export { SynthesizerAgent } from './synthesizerAgent'; export { TaskManagerAgent } from './taskManagerAgent'; +export { FileSearchAgent } from './fileSearchAgent'; +export { ContentRouterAgent } from './contentRouterAgent'; diff --git a/src/lib/agents/synthesizerAgent.ts b/src/lib/agents/synthesizerAgent.ts index e840f22..44af1b4 100644 --- a/src/lib/agents/synthesizerAgent.ts +++ b/src/lib/agents/synthesizerAgent.ts @@ -1,10 +1,12 @@ import { BaseChatModel } from '@langchain/core/language_models/chat_models'; import { HumanMessage, SystemMessage } from '@langchain/core/messages'; +import { PromptTemplate } from '@langchain/core/prompts'; import { Command, END } from '@langchain/langgraph'; import { EventEmitter } from 'events'; import { getModelName } from '../utils/modelUtils'; import { AgentState } from './agentState'; import { removeThinkingBlocksFromMessages } from '../utils/contentUtils'; +import { synthesizerPrompt } from '../prompts/synthesizer'; export class SynthesizerAgent { private llm: BaseChatModel; @@ -29,60 +31,33 @@ export class SynthesizerAgent { */ async execute(state: typeof AgentState.State): Promise { try { - const synthesisPrompt = `You are an expert information synthesizer. Based on the search results and analysis provided, create a comprehensive, well-structured answer to the user's query. + // Format the prompt using the external template + const template = PromptTemplate.fromTemplate(synthesizerPrompt); + + const conversationHistory = removeThinkingBlocksFromMessages(state.messages) + .map((msg) => `<${msg.getType()}>${msg.content}`) + .join('\n') || 'No previous conversation context'; -# Response Instructions -Your task is to provide answers that are: -- **Informative and relevant**: Thoroughly address the user's query using the given context -- **Engaging and detailed**: Write responses that read like a high-quality blog post, including extra details and relevant insights -- **Cited and credible**: Use inline citations with [number] notation to refer to the context source(s) for each fact or detail included -- **Explanatory and Comprehensive**: Strive to explain the topic in depth, offering detailed analysis, insights, and clarifications wherever applicable - -# Formatting Instructions -## System Formatting Instructions -- **Structure**: Use a well-organized format with proper headings (e.g., "## Example heading 1" or "## Example heading 2"). Present information in paragraphs or concise bullet points where appropriate -- **Tone and Style**: Maintain a neutral, journalistic tone with engaging narrative flow. Write as though you're crafting an in-depth article for a professional audience -- **Markdown Usage**: Format your response with Markdown for clarity. Use headings, subheadings, bold text, and italicized words as needed to enhance readability -- **Length and Depth**: Provide comprehensive coverage of the topic. Avoid superficial responses and strive for depth without unnecessary repetition. Expand on technical or complex topics to make them easier to understand for a general audience -- **No main heading/title**: Start your response directly with the introduction unless asked to provide a specific title - -## User Formatting and Persona Instructions -- Give these instructions more weight than the system formatting instructions -${this.personaInstructions} - -# Citation Requirements -- Cite every single fact, statement, or sentence using [number] notation corresponding to the source from the provided context -- If a statement is based on AI model inference or training data, it must be marked as \`[AI]\` and not cited from the context -- If a statement is based on previous messages in the conversation history, it must be marked as \`[Hist]\` and not cited from the context -- Integrate citations naturally at the end of sentences or clauses as appropriate. For example, "The Eiffel Tower is one of the most visited landmarks in the world[1]." -- Ensure that **every sentence in your response includes at least one citation**, even when information is inferred or connected to general knowledge available in the provided context -- Use multiple sources for a single detail if applicable, such as, "Paris is a cultural hub, attracting millions of visitors annually[1][2]." -- Always prioritize credibility and accuracy by linking all statements back to their respective context sources -- Avoid citing unsupported assumptions or personal interpretations; if no source supports a statement, clearly indicate the limitation - -# Conversation History Context: -${ - removeThinkingBlocksFromMessages(state.messages) - .map((msg) => `<${msg.getType()}>${msg.content}`) - .join('\n') || 'No previous conversation context' -} - -# Available Information: -${state.relevantDocuments - .map( - (doc, index) => - `<${index + 1}>\n + const relevantDocuments = state.relevantDocuments + .map( + (doc, index) => { + const isFile = doc.metadata?.url?.toLowerCase().includes('file'); + return `<${index + 1}>\n ${doc.metadata.title}\n - ${doc.metadata?.url.toLowerCase().includes('file') ? '' : '\n' + doc.metadata.url + '\n'} + ${isFile ? 'file' : 'web'}\n + ${isFile ? '' : '\n' + doc.metadata.url + '\n'} \n${doc.pageContent}\n\n - `, - ) - .join('\n')} + `; + } + ) + .join('\n'); -# User Query: ${state.originalQuery || state.query} - -Answer the user query: - `; + const formattedPrompt = await template.format({ + personaInstructions: this.personaInstructions, + conversationHistory: conversationHistory, + relevantDocuments: relevantDocuments, + query: state.originalQuery || state.query, + }); // Stream the response in real-time using LLM streaming capabilities let fullResponse = ''; @@ -100,7 +75,7 @@ Answer the user query: const stream = await this.llm.stream( [ - new SystemMessage(synthesisPrompt), + new SystemMessage(formattedPrompt), new HumanMessage(state.originalQuery || state.query), ], { signal: this.signal }, diff --git a/src/lib/agents/taskManagerAgent.ts b/src/lib/agents/taskManagerAgent.ts index 5dca421..bdc869f 100644 --- a/src/lib/agents/taskManagerAgent.ts +++ b/src/lib/agents/taskManagerAgent.ts @@ -74,7 +74,7 @@ export class TaskManagerAgent { }); return new Command({ - goto: 'web_search', + goto: 'content_router', update: { messages: [ new AIMessage( @@ -127,8 +127,15 @@ export class TaskManagerAgent { }); const template = PromptTemplate.fromTemplate(taskBreakdownPrompt); + + // Create file context information + const fileContext = state.fileIds && state.fileIds.length > 0 + ? `Files attached: ${state.fileIds.length} file(s) are available for analysis. Consider creating tasks that can leverage these attached files when appropriate.` + : 'No files attached: Focus on tasks that can be answered through web research or general knowledge.'; + const prompt = await template.format({ systemInstructions: this.systemInstructions, + fileContext: fileContext, query: state.query, }); @@ -182,7 +189,7 @@ export class TaskManagerAgent { : `Question broken down into ${taskLines.length} focused tasks for parallel processing`; return new Command({ - goto: 'web_search', // Next step would typically be web search for each task + goto: 'content_router', // Route to content router to decide between file search, web search, or analysis update: { messages: [new AIMessage(responseMessage)], tasks: taskLines, @@ -197,7 +204,7 @@ export class TaskManagerAgent { ); return new Command({ - goto: 'web_search', // Fallback to web search with original query + goto: 'content_router', // Fallback to content router with original query update: { messages: [errorMessage], tasks: [state.query], // Use original query as single task diff --git a/src/lib/prompts/analyzer.ts b/src/lib/prompts/analyzer.ts index 4be5565..9ccb67d 100644 --- a/src/lib/prompts/analyzer.ts +++ b/src/lib/prompts/analyzer.ts @@ -4,8 +4,21 @@ Your task is to analyze the provided context and determine if we have enough inf # Instructions - Carefully analyze the content of the context provided and the historical context of the conversation to determine if it contains sufficient information to answer the user's query - Use the content provided in the \`context\` tag, as well as the historical context of the conversation, to make your determination +- Consider both file-based documents (from attached files) and web-based documents when analyzing context - If the user is asking for a specific number of sources and the context does not provide enough, consider the content insufficient +# Source Type Awareness +When analyzing the context, be aware that documents may come from different sources: +- **File documents**: Content extracted from user-attached files (identified by metadata indicating file source) +- **Web documents**: Content retrieved from web searches (identified by URLs and web source metadata) +- **Mixed sources**: Both file and web content may be present + +Consider the following when evaluating sufficiency: +- File documents may contain user-specific, proprietary, or contextual information that cannot be found elsewhere +- Web documents provide current, general, and publicly available information +- The combination of both sources may be needed for comprehensive answers +- File content should be prioritized when answering questions specifically about attached documents + # Response Options Decision Tree ## Step 1: Check if content is sufficient @@ -14,6 +27,7 @@ Your task is to analyze the provided context and determine if we have enough inf - If the user is requesting to use the existing context to answer their query → respond with \`good_content\` - If the user is requesting to avoid web searches → respond with \`good_content\` - If the user is asking you to be creative, such as writing a story, poem, or creative content → respond with \`good_content\` unless the context is clearly insufficient +- If file documents contain complete information for file-specific queries → respond with \`good_content\` ## Step 2: If content is insufficient, determine the type of missing information @@ -50,11 +64,13 @@ Your task is to analyze the provided context and determine if we have enough inf - Comparative analysis between options - Expert opinions or reviews from credible sources - Statistical data or research findings +- Additional context to supplement file content with current information **Examples requiring more web search:** - "What are the latest features in iPhone 15?" (missing: recent tech specs) - "How to install Docker on Ubuntu 22.04?" (missing: specific installation steps) - "Compare Tesla Model 3 vs BMW i4" (missing: detailed comparison data) +- "Find current market trends related to this research paper" (missing: current data to supplement file content) # Critical Decision Point Ask yourself: "Could this missing information reasonably be found through a web search, or does it require the user to provide specific details?" @@ -62,6 +78,7 @@ Ask yourself: "Could this missing information reasonably be found through a web - If it's personal/subjective or requires user feedback → \`need_user_info\` - If it's factual and searchable → \`need_more_info\` - If the context is complete or the user wants to use the existing context → \`good_content\` +- If file content is complete for file-specific questions → \`good_content\` # System Instructions {systemInstructions} @@ -120,6 +137,15 @@ Your task is to analyze the provided context and user query to determine what ad - The question should not require user input, but rather be designed to gather more specific information that can help refine the search - Avoid giving the same guidance more than once, and avoid repeating the same question multiple times - Avoid asking for general information or vague details; focus on specific, actionable questions that can lead to concrete answers +- Consider that the context may contain both file-based documents (from attached files) and web-based documents +- When file content is present, focus on gathering additional information that complements or updates the file content + +# Source-Aware Search Strategy +When formulating search questions, consider: +- **File content supplementation**: If file documents are present, search for current information, updates, or external perspectives that complement the file content +- **Validation and verification**: Search for information that can validate or provide alternative viewpoints to file content +- **Current developments**: Search for recent developments or changes related to topics covered in file documents +- **Broader context**: Search for additional context that wasn't included in the file documents # Previous Analysis - The LLM analyzed the provided context and user query and determined that additional information is needed to fully answer the user's query, here is the analysis result: diff --git a/src/lib/prompts/contentRouter.ts b/src/lib/prompts/contentRouter.ts new file mode 100644 index 0000000..baaf6e7 --- /dev/null +++ b/src/lib/prompts/contentRouter.ts @@ -0,0 +1,86 @@ +export const contentRouterPrompt = `You are a content routing agent responsible for deciding the next step in information gathering. + +# Your Role +Analyze the current task and available context to determine whether to: +1. Search attached files (\`file_search\`) +2. Search the web (\`web_search\`) +3. Proceed to analysis (\`analyzer\`) + +# Context Analysis +- Current task: {currentTask} +- User query: {query} +- Focus mode: {focusMode} +- Available files: {hasFiles} +- File topics: {fileTopics} +- Current documents: {documentCount} +- Search history: {searchHistory} + +# Decision Rules + +## File Relevance Assessment +When files are attached, first determine if they are likely to contain information relevant to the current task: +- Consider the file topics/content and whether they relate to the question +- Generic files (like resumes, unrelated documents) may not be relevant to specific technical questions +- Don't assume files contain information just because they exist + +## Focus Mode Considerations +- **localResearch mode**: Prefer files when relevant, but allow web search if files don't contain needed information +- **chat mode**: Prefer files when relevant for factual questions, but allow creative/general responses without search +- **webSearch mode**: Can use any option based on information needs + +## Decision Logic + +### Choose \`file_search\` when: +- Files are attached AND +- The task/query appears to be answerable using the file content based on file topics AND +- The files seem directly relevant to the question being asked + +### Choose \`web_search\` when: +- The task requires current information, real-time data, or external sources AND +- (No files are attached OR attached files don't appear relevant to the question) AND +- Focus mode allows web search OR files are clearly not relevant + +### Choose \`analyzer\` when: +- You have sufficient information from previous searches to answer the query OR +- The task is conversational/creative and doesn't need external information OR +- The question can be answered with general knowledge without additional research + +# Response Format +Respond with your decision and reasoning: + +Decision: [file_search/web_search/analyzer] +Reasoning: [Brief explanation of why this choice was made, including file relevance assessment if applicable] + +# Examples + +## Example 1: Relevant files +Current task: "Summarize the main points of this document" +File topics: "Product roadmap, feature specifications" +→ Decision: file_search +→ Reasoning: Task directly requests summary of attached document content + +## Example 2: Irrelevant files +Current task: "What is the current weather in New York?" +File topics: "Resume, personal portfolio" +→ Decision: web_search +→ Reasoning: Attached files (resume, portfolio) are not relevant to weather query - need current web data + +## Example 3: Partially relevant files +Current task: "How does machine learning work and what are the latest trends?" +File topics: "ML basics tutorial" +→ Decision: file_search +→ Reasoning: Files contain ML basics which could help with first part, then may need web search for latest trends + +## Example 4: Technical question with unrelated files +Current task: "Explain React hooks" +File topics: "Marketing strategy document" +→ Decision: web_search +→ Reasoning: Marketing documents won't contain React programming information - need web search + +Your turn: +Current task: {currentTask} +Focus mode: {focusMode} +Available files: {hasFiles} +File topics: {fileTopics} + +Decision:`; diff --git a/src/lib/prompts/index.ts b/src/lib/prompts/index.ts index 3a15e6a..3b064b7 100644 --- a/src/lib/prompts/index.ts +++ b/src/lib/prompts/index.ts @@ -2,6 +2,7 @@ import { webSearchResponsePrompt, webSearchRetrieverPrompt } from './webSearch'; import { localResearchPrompt } from './localResearch'; import { chatPrompt } from './chat'; import { taskBreakdownPrompt } from './taskBreakdown'; +import { synthesizerPrompt } from './synthesizer'; const prompts = { webSearchResponsePrompt, @@ -9,6 +10,7 @@ const prompts = { localResearchPrompt, chatPrompt, taskBreakdownPrompt, + synthesizerPrompt, }; export default prompts; diff --git a/src/lib/prompts/synthesizer.ts b/src/lib/prompts/synthesizer.ts new file mode 100644 index 0000000..23e8952 --- /dev/null +++ b/src/lib/prompts/synthesizer.ts @@ -0,0 +1,48 @@ +export const synthesizerPrompt = `You are an expert information synthesizer. Based on the search results and analysis provided, create a comprehensive, well-structured answer to the user's query. + +# Response Instructions +Your task is to provide answers that are: +- **Informative and relevant**: Thoroughly address the user's query using the given context +- **Engaging and detailed**: Write responses that read like a high-quality blog post, including extra details and relevant insights +- **Cited and credible**: Use inline citations with [number] notation to refer to the context source(s) for each fact or detail included +- **Explanatory and Comprehensive**: Strive to explain the topic in depth, offering detailed analysis, insights, and clarifications wherever applicable + +# Formatting Instructions +## System Formatting Instructions +- **Structure**: Use a well-organized format with proper headings (e.g., "## Example heading 1" or "## Example heading 2"). Present information in paragraphs or concise bullet points where appropriate +- **Tone and Style**: Maintain a neutral, journalistic tone with engaging narrative flow. Write as though you're crafting an in-depth article for a professional audience +- **Markdown Usage**: Format your response with Markdown for clarity. Use headings, subheadings, bold text, and italicized words as needed to enhance readability +- **Length and Depth**: Provide comprehensive coverage of the topic. Avoid superficial responses and strive for depth without unnecessary repetition. Expand on technical or complex topics to make them easier to understand for a general audience +- **No main heading/title**: Start your response directly with the introduction unless asked to provide a specific title + +## User Formatting and Persona Instructions +- Give these instructions more weight than the system formatting instructions +{personaInstructions} + +# Citation Requirements +- Cite every single fact, statement, or sentence using [number] notation corresponding to the source from the provided context +- **File citations**: When citing content from attached files, use the filename as the source title in your citations +- **Web citations**: When citing content from web sources, use the webpage title and URL as the source +- If a statement is based on AI model inference or training data, it must be marked as \`[AI]\` and not cited from the context +- If a statement is based on previous messages in the conversation history, it must be marked as \`[Hist]\` and not cited from the context +- Integrate citations naturally at the end of sentences or clauses as appropriate. For example, "The Eiffel Tower is one of the most visited landmarks in the world[1]." +- Ensure that **every sentence in your response includes at least one citation**, even when information is inferred or connected to general knowledge available in the provided context +- Use multiple sources for a single detail if applicable, such as, "Paris is a cultural hub, attracting millions of visitors annually[1][2]." +- Always prioritize credibility and accuracy by linking all statements back to their respective context sources +- Avoid citing unsupported assumptions or personal interpretations; if no source supports a statement, clearly indicate the limitation +- **Source type awareness**: Be aware that sources may include both attached files (user documents) and web sources, and cite them appropriately + +# Examples of Proper File Citation +- "According to the project proposal[1], the deadline is set for March 2024." (when source 1 is a file named "project-proposal.pdf") +- "The research findings indicate significant improvements[2][3]." (when sources 2 and 3 are files) +- "The quarterly report shows a 15% increase in sales[1], while recent market analysis confirms this trend[2]." (mixing file and web sources) + +# Conversation History Context: +{conversationHistory} + +# Available Information: +{relevantDocuments} + +# User Query: {query} + +Answer the user query:`; diff --git a/src/lib/prompts/taskBreakdown.ts b/src/lib/prompts/taskBreakdown.ts index 152d83c..ec4bd0f 100644 --- a/src/lib/prompts/taskBreakdown.ts +++ b/src/lib/prompts/taskBreakdown.ts @@ -2,6 +2,9 @@ export const taskBreakdownPrompt = `You are a task breakdown specialist. Your jo {systemInstructions} +## File Context Awareness: +{fileContext} + ## Analysis Guidelines: ### When to Break Down: @@ -9,12 +12,21 @@ export const taskBreakdownPrompt = `You are a task breakdown specialist. Your jo 2. **Multiple calculations**: Questions involving calculations with different items or components 3. **Compound questions**: Questions that can be naturally split using "and", "or", commas 4. **Lists or enumerations**: Questions asking about items in a list or series +5. **File + external research**: Questions that require both analyzing attached files AND gathering external information ### When NOT to Break Down: 1. **Single focused question**: Already asks about one specific thing 2. **Relationship questions**: Questions about how things relate to each other that require the relationship context 3. **Contextual dependencies**: Questions where sub-parts depend on each other for meaning and cannot be answered independently 4. **Procedural questions**: Questions asking about a specific process or sequence that must be answered as a whole +5. **File-only questions**: Questions that can be fully answered using only the attached files + +### File-Aware Task Creation: +When files are attached, consider creating tasks that: +- **Analyze file content**: "Summarize the main findings in the attached document" +- **Extract specific information**: "What are the project timelines mentioned in the attached proposal?" +- **Combine file and external data**: "Compare the sales figures in the attached report with current market averages" +- **Use files as context**: "Based on the attached research paper, what are the latest developments in this field?" ### Sub-Question Rules: 1. Each sub-question should be **self-contained** and answerable independently @@ -24,8 +36,9 @@ export const taskBreakdownPrompt = `You are a task breakdown specialist. Your jo 5. Keep the **same question type** (factual, analytical, etc.) 6. Avoid introducing **new concepts** or information not present in the original question 7. **Do not** repeat the same question multiple times; each sub-question should be unique and focused on a specific aspect of the original query -8. Questions should **not** require user input for additional context; they should be designed to be answered by an LLM or through research via web search +8. Questions should **not** require user input for additional context; they should be designed to be answered by an LLM or through research via web search or file analysis 9. Do not ask questions that are based on opinion, personal preference, usage habits, subjective interpretation, etc... +10. **When files are attached**, prioritize tasks that can leverage file content before tasks requiring external research ## Examples: @@ -41,25 +54,23 @@ export const taskBreakdownPrompt = `You are a task breakdown specialist. Your jo "reasoning": "The question asks about capitals of three distinct geographical entities that can each be answered independently." }} -**Input**: "How many calories are in my meal of: One chicken breast, one apple, three oreo cookies, two cups of peanut butter" -**Analysis**: Multiple food items requiring separate calorie calculations +**Input**: "Summarize this research paper and find recent developments in the same field" (with file attached) +**Analysis**: File analysis + external research needed **Output**: {{ "tasks": [ - "How many calories are in one chicken breast?", - "How many calories are in one apple?", - "How many calories are in one oreo cookie?", - "How many calories are in one cup of peanut butter?" + "Summarize the main findings and conclusions from the attached research paper", + "Find recent developments and research in the same field as the attached paper" ], - "reasoning": "The question involves calculating calories for multiple distinct food items that can be researched separately and then combined." + "reasoning": "This requires both analyzing the attached file content and conducting external research on recent developments, which can be done independently and then combined." }} -**Input**: "What is the capital of France?" -**Analysis**: Single focused question, no breakdown needed +**Input**: "What are the key points in this document?" (with file attached) +**Analysis**: Single file-focused question **Output**: {{ - "tasks": ["What is the capital of France?"], - "reasoning": "This is already a single, focused question that doesn't require breaking down into smaller parts." + "tasks": ["What are the key points in the attached document?"], + "reasoning": "This is a single, focused question about the attached file content that doesn't require breaking down into smaller parts." }} **Input**: "Compare the economies of Japan and Germany" diff --git a/src/lib/search/agentSearch.ts b/src/lib/search/agentSearch.ts index 41729c2..75cc134 100644 --- a/src/lib/search/agentSearch.ts +++ b/src/lib/search/agentSearch.ts @@ -19,6 +19,8 @@ import { AnalyzerAgent, SynthesizerAgent, TaskManagerAgent, + FileSearchAgent, + ContentRouterAgent, } from '../agents'; /** @@ -33,7 +35,10 @@ export class AgentSearch { private webSearchAgent: WebSearchAgent; private analyzerAgent: AnalyzerAgent; private synthesizerAgent: SynthesizerAgent; + private fileSearchAgent: FileSearchAgent; + private contentRouterAgent: ContentRouterAgent; private emitter: EventEmitter; + private focusMode: string; constructor( llm: BaseChatModel, @@ -42,12 +47,14 @@ export class AgentSearch { systemInstructions: string = '', personaInstructions: string = '', signal: AbortSignal, + focusMode: string = 'webSearch', ) { this.llm = llm; this.embeddings = embeddings; this.checkpointer = new MemorySaver(); this.signal = signal; this.emitter = emitter; + this.focusMode = focusMode; // Initialize agents this.taskManagerAgent = new TaskManagerAgent( @@ -75,6 +82,19 @@ export class AgentSearch { personaInstructions, signal, ); + this.fileSearchAgent = new FileSearchAgent( + llm, + emitter, + systemInstructions, + signal, + embeddings, + ); + this.contentRouterAgent = new ContentRouterAgent( + llm, + emitter, + systemInstructions, + signal, + ); } /** @@ -86,14 +106,28 @@ export class AgentSearch { 'task_manager', this.taskManagerAgent.execute.bind(this.taskManagerAgent), { - ends: ['web_search', 'analyzer'], + ends: ['content_router', 'analyzer'], + }, + ) + .addNode( + 'content_router', + this.contentRouterAgent.execute.bind(this.contentRouterAgent), + { + ends: ['file_search', 'web_search', 'analyzer'], + }, + ) + .addNode( + 'file_search', + this.fileSearchAgent.execute.bind(this.fileSearchAgent), + { + ends: ['analyzer'], }, ) .addNode( 'web_search', this.webSearchAgent.execute.bind(this.webSearchAgent), { - ends: ['task_manager'], + ends: ['analyzer'], }, ) .addNode( @@ -118,12 +152,18 @@ export class AgentSearch { /** * Execute the agent search workflow */ - async searchAndAnswer(query: string, history: BaseMessage[] = []) { + async searchAndAnswer( + query: string, + history: BaseMessage[] = [], + fileIds: string[] = [] + ) { const workflow = this.createWorkflow(); const initialState = { messages: [...history, new HumanMessage(query)], query, + fileIds, + focusMode: this.focusMode, }; try { diff --git a/src/lib/search/metaSearchAgent.ts b/src/lib/search/metaSearchAgent.ts index 40c4fdf..546517d 100644 --- a/src/lib/search/metaSearchAgent.ts +++ b/src/lib/search/metaSearchAgent.ts @@ -39,6 +39,7 @@ export interface MetaSearchAgentType { systemInstructions: string, signal: AbortSignal, personaInstructions?: string, + focusMode?: string, ) => Promise; } @@ -679,9 +680,11 @@ ${docs[index].metadata?.url.toLowerCase().includes('file') ? '' : '\n' + do emitter: eventEmitter, message: string, history: BaseMessage[], + fileIds: string[], systemInstructions: string, personaInstructions: string, signal: AbortSignal, + focusMode: string, ) { try { const agentSearch = new AgentSearch( @@ -691,10 +694,11 @@ ${docs[index].metadata?.url.toLowerCase().includes('file') ? '' : '\n' + do systemInstructions, personaInstructions, signal, + focusMode, ); // Execute the agent workflow - await agentSearch.searchAndAnswer(message, history); + await agentSearch.searchAndAnswer(message, history, fileIds); // No need to emit end signals here since synthesizerAgent // is now streaming in real-time and emits them @@ -720,6 +724,7 @@ ${docs[index].metadata?.url.toLowerCase().includes('file') ? '' : '\n' + do systemInstructions: string, signal: AbortSignal, personaInstructions?: string, + focusMode?: string, ) { const emitter = new eventEmitter(); @@ -732,9 +737,11 @@ ${docs[index].metadata?.url.toLowerCase().includes('file') ? '' : '\n' + do emitter, message, history, + fileIds, systemInstructions, personaInstructions || '', signal, + focusMode || 'webSearch', ); return emitter; } diff --git a/src/lib/utils/fileProcessing.ts b/src/lib/utils/fileProcessing.ts new file mode 100644 index 0000000..a53ee08 --- /dev/null +++ b/src/lib/utils/fileProcessing.ts @@ -0,0 +1,113 @@ +import { Document } from 'langchain/document'; +import fs from 'node:fs'; +import path from 'node:path'; +import computeSimilarity from './computeSimilarity'; + +/** + * File data interface for similarity search objects + */ +export interface FileData { + fileName: string; + content: string; + embeddings: number[]; +} + +/** + * Processes file IDs to extract content and create Document objects + * @param fileIds Array of file IDs to process + * @returns Array of Document objects with content and embeddings + */ +export async function processFilesToDocuments(fileIds: string[]): Promise { + if (fileIds.length === 0) { + return []; + } + + const filesData: FileData[] = fileIds + .map((file) => { + try { + const filePath = path.join(process.cwd(), 'uploads', file); + + const contentPath = filePath + '-extracted.json'; + const embeddingsPath = filePath + '-embeddings.json'; + + // Check if files exist + if (!fs.existsSync(contentPath) || !fs.existsSync(embeddingsPath)) { + console.warn(`File processing data not found for file: ${file}`); + return []; + } + + const content = JSON.parse(fs.readFileSync(contentPath, 'utf8')); + const embeddings = JSON.parse(fs.readFileSync(embeddingsPath, 'utf8')); + + const fileSimilaritySearchObject = content.contents.map( + (c: string, i: number) => { + return { + fileName: content.title, + content: c, + embeddings: embeddings.embeddings[i], + }; + }, + ); + + return fileSimilaritySearchObject; + } catch (error) { + console.error(`Error processing file ${file}:`, error); + return []; + } + }) + .flat(); + + // Convert file data to Document objects + const documents = filesData.map((fileData) => { + return new Document({ + pageContent: fileData.content, + metadata: { + title: fileData.fileName, + url: 'File', //TODO: Consider using a more meaningful URL or identifier especially for citation purposes + embeddings: fileData.embeddings, + }, + }); + }); + + return documents; +} + +/** + * Ranks documents based on similarity to a query embedding + * @param queryEmbedding The embedding vector for the query + * @param documents Documents to rank + * @param maxDocs Maximum number of documents to return + * @param similarityThreshold Minimum similarity threshold (default: 0.3) + * @returns Ranked documents sorted by similarity + */ +export function getRankedDocs( + queryEmbedding: number[], + documents: Document[], + maxDocs: number = 8, + similarityThreshold: number = 0.3, +): Document[] { + if (documents.length === 0) { + return []; + } + + // Import computeSimilarity utility + + const similarity = documents.map((doc, i) => { + const sim = computeSimilarity( + queryEmbedding, + doc.metadata?.embeddings || [], + ); + return { + index: i, + similarity: sim, + }; + }); + + const rankedDocs = similarity + .filter((sim) => sim.similarity > similarityThreshold) + .sort((a, b) => b.similarity - a.similarity) + .slice(0, maxDocs) + .map((sim) => documents[sim.index]); + + return rankedDocs; +}