From f6eab5a95ae12ce11df29550972d69284c653adf Mon Sep 17 00:00:00 2001 From: Willie Zutz Date: Tue, 29 Jul 2025 10:18:11 -0600 Subject: [PATCH] feat(agent): Implement simplified chat agent tools and state management --- .github/copilot-instructions.md | 6 + src/app/globals.css | 5 +- .../dashboard/WidgetConfigModal.tsx | 3 +- src/components/dashboard/WidgetDisplay.tsx | 9 +- src/lib/agents/agentState.ts | 81 --- src/lib/agents/analyzerAgent.ts | 360 ---------- src/lib/agents/contentRouterAgent.ts | 233 ------- src/lib/agents/fileSearchAgent.ts | 238 ------- src/lib/agents/index.ts | 8 - src/lib/agents/synthesizerAgent.ts | 165 ----- src/lib/agents/taskManagerAgent.ts | 225 ------- src/lib/agents/urlSummarizationAgent.ts | 300 --------- src/lib/agents/webSearchAgent.ts | 461 ------------- src/lib/constants/dashboard.ts | 16 +- src/lib/hooks/useDashboard.ts | 182 ++--- src/lib/search/agentSearch.ts | 299 +-------- src/lib/search/simplifiedAgent.ts | 634 ++++++++++++++++++ src/lib/state/chatAgentState.ts | 72 ++ src/lib/tools/agents/fileSearchTool.ts | 148 ++++ src/lib/tools/agents/index.ts | 51 ++ src/lib/tools/agents/simpleWebSearchTool.ts | 228 +++++++ src/lib/tools/agents/taskManagerTool.ts | 112 ++++ src/lib/tools/agents/urlSummarizationTool.ts | 200 ++++++ src/lib/tools/agents/webSearchTool.ts | 314 +++++++++ src/lib/tools/index.ts | 6 + src/lib/utils/analyzePreviewContent.ts | 2 +- src/lib/utils/summarizeWebContent.ts | 4 +- 27 files changed, 1929 insertions(+), 2433 deletions(-) delete mode 100644 src/lib/agents/agentState.ts delete mode 100644 src/lib/agents/analyzerAgent.ts delete mode 100644 src/lib/agents/contentRouterAgent.ts delete mode 100644 src/lib/agents/fileSearchAgent.ts delete mode 100644 src/lib/agents/index.ts delete mode 100644 src/lib/agents/synthesizerAgent.ts delete mode 100644 src/lib/agents/taskManagerAgent.ts delete mode 100644 src/lib/agents/urlSummarizationAgent.ts delete mode 100644 src/lib/agents/webSearchAgent.ts create mode 100644 src/lib/search/simplifiedAgent.ts create mode 100644 src/lib/state/chatAgentState.ts create mode 100644 src/lib/tools/agents/fileSearchTool.ts create mode 100644 src/lib/tools/agents/index.ts create mode 100644 src/lib/tools/agents/simpleWebSearchTool.ts create mode 100644 src/lib/tools/agents/taskManagerTool.ts create mode 100644 src/lib/tools/agents/urlSummarizationTool.ts create mode 100644 src/lib/tools/agents/webSearchTool.ts diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 1cf1e63..65acac0 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -143,3 +143,9 @@ When working on this codebase, you might need to: - Use try/catch blocks for async operations - Return structured error responses from API routes + +## Available Tools and Help + +- You can use the context7 tool to get help using the following identifiers for libraries used in this project + - `/langchain-ai/langchainjs` for LangChain + - `/langchain-ai/langgraph` for LangGraph diff --git a/src/app/globals.css b/src/app/globals.css index 64516f4..f1832da 100644 --- a/src/app/globals.css +++ b/src/app/globals.css @@ -1,12 +1,11 @@ /* React Grid Layout styles */ -@import "react-grid-layout/css/styles.css"; -@import "react-resizable/css/styles.css"; +@import 'react-grid-layout/css/styles.css'; +@import 'react-resizable/css/styles.css'; @tailwind base; @tailwind components; @tailwind utilities; - @layer base { .overflow-hidden-scrollable { -ms-overflow-style: none; diff --git a/src/components/dashboard/WidgetConfigModal.tsx b/src/components/dashboard/WidgetConfigModal.tsx index 1f47ddf..a19af6a 100644 --- a/src/components/dashboard/WidgetConfigModal.tsx +++ b/src/components/dashboard/WidgetConfigModal.tsx @@ -435,7 +435,8 @@ const WidgetConfigModal = ({ ) : (
- Click "Run Preview" to see how your widget will look + Click "Run Preview" to see how your widget + will look
)} diff --git a/src/components/dashboard/WidgetDisplay.tsx b/src/components/dashboard/WidgetDisplay.tsx index 5a8255d..4410970 100644 --- a/src/components/dashboard/WidgetDisplay.tsx +++ b/src/components/dashboard/WidgetDisplay.tsx @@ -55,13 +55,16 @@ const WidgetDisplay = ({
{/* Drag Handle */} -
- +
- + {widget.title} diff --git a/src/lib/agents/agentState.ts b/src/lib/agents/agentState.ts deleted file mode 100644 index 7d32c02..0000000 --- a/src/lib/agents/agentState.ts +++ /dev/null @@ -1,81 +0,0 @@ -import { BaseMessage } from '@langchain/core/messages'; -import { Annotation, END } from '@langchain/langgraph'; -import { Document } from 'langchain/document'; - -/** - * State interface for the agent supervisor workflow - */ -export const AgentState = Annotation.Root({ - messages: Annotation({ - reducer: (x, y) => x.concat(y), - default: () => [], - }), - query: Annotation({ - reducer: (x, y) => y ?? x, - default: () => '', - }), - relevantDocuments: Annotation({ - reducer: (x, y) => x.concat(y), - default: () => [], - }), - bannedSummaryUrls: Annotation({ - reducer: (x, y) => x.concat(y), - default: () => [], - }), - bannedPreviewUrls: Annotation({ - reducer: (x, y) => x.concat(y), - default: () => [], - }), - searchInstructionHistory: Annotation({ - reducer: (x, y) => x.concat(y), - default: () => [], - }), - searchInstructions: Annotation({ - reducer: (x, y) => y ?? x, - default: () => '', - }), - next: Annotation({ - reducer: (x, y) => y ?? x ?? END, - default: () => END, - }), - analysis: Annotation({ - reducer: (x, y) => y ?? x, - default: () => '', - }), - fullAnalysisAttempts: Annotation({ - reducer: (x, y) => (y ?? 0) + x, - default: () => 0, - }), - tasks: Annotation({ - reducer: (x, y) => y ?? x, - default: () => [], - }), - currentTaskIndex: Annotation({ - reducer: (x, y) => y ?? x, - default: () => 0, - }), - originalQuery: Annotation({ - reducer: (x, y) => y ?? x, - default: () => '', - }), - fileIds: Annotation({ - reducer: (x, y) => y ?? x, - default: () => [], - }), - focusMode: Annotation({ - reducer: (x, y) => y ?? x, - default: () => 'webSearch', - }), - urlsToSummarize: Annotation({ - reducer: (x, y) => y ?? x, - default: () => [], - }), - summarizationIntent: Annotation({ - reducer: (x, y) => y ?? x, - default: () => '', - }), - recursionLimitReached: Annotation({ - reducer: (x, y) => y ?? x, - default: () => false, - }), -}); diff --git a/src/lib/agents/analyzerAgent.ts b/src/lib/agents/analyzerAgent.ts deleted file mode 100644 index 085ed1e..0000000 --- a/src/lib/agents/analyzerAgent.ts +++ /dev/null @@ -1,360 +0,0 @@ -import { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import { - AIMessage, - HumanMessage, - SystemMessage, -} from '@langchain/core/messages'; -import { ChatPromptTemplate } from '@langchain/core/prompts'; -import { Command, END } from '@langchain/langgraph'; -import { EventEmitter } from 'events'; -import { z } from 'zod'; -import LineOutputParser from '../outputParsers/lineOutputParser'; -import { formatDateForLLM } from '../utils'; -import { AgentState } from './agentState'; -import { setTemperature } from '../utils/modelUtils'; -import { - additionalUserInputPrompt, - additionalWebSearchPrompt, - decideNextActionPrompt, -} from '../prompts/analyzer'; -import { - removeThinkingBlocks, - removeThinkingBlocksFromMessages, -} from '../utils/contentUtils'; -import { withStructuredOutput } from '../utils/structuredOutput'; -import next from 'next'; - -// Define Zod schemas for structured output -const NextActionSchema = z.object({ - action: z - .enum(['good_content', 'need_user_info', 'need_more_info']) - .describe('The next action to take based on content analysis'), - reasoning: z - .string() - .describe('Brief explanation of why this action was chosen'), -}); - -const UserInfoRequestSchema = z.object({ - question: z - .string() - .describe('A detailed question to ask the user for additional information'), - reasoning: z - .string() - .describe('Explanation of why this information is needed'), -}); - -const SearchRefinementSchema = z.object({ - question: z - .string() - .describe('A refined search question to gather more specific information'), - reasoning: z - .string() - .describe( - 'Explanation of what information is missing and why this search will help', - ), -}); - -export class AnalyzerAgent { - private llm: BaseChatModel; - private emitter: EventEmitter; - private systemInstructions: string; - private signal: AbortSignal; - - constructor( - llm: BaseChatModel, - emitter: EventEmitter, - systemInstructions: string, - signal: AbortSignal, - ) { - this.llm = llm; - this.emitter = emitter; - this.systemInstructions = systemInstructions; - this.signal = signal; - } - - async execute(state: typeof AgentState.State): Promise { - try { - //setTemperature(this.llm, 0.0); - - // Initialize originalQuery if not set - if (!state.originalQuery) { - state.originalQuery = state.query; - } - - // Check for URLs first - if found and not yet processed, route to URL summarization - if (!state.urlsToSummarize || state.urlsToSummarize.length === 0) { - const urlRegex = /https?:\/\/[^\s]+/gi; - const urls = [...new Set(state.query.match(urlRegex) || [])]; - - if (urls.length > 0) { - console.log( - 'URLs detected in initial query, routing to URL summarization', - ); - console.log(`URLs found: ${urls.join(', ')}`); - - // Emit URL detection event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'URLS_DETECTED_ROUTING', - message: `Detected ${urls.length} URL(s) in query - processing content first`, - details: { - query: state.query, - urls: urls, - }, - }, - }); - - return new Command({ - goto: 'url_summarization', - update: { - urlsToSummarize: urls, - summarizationIntent: `Process the content from the provided URLs to help answer: ${state.query}`, - }, - }); - } - } - - // Skip full analysis if this is the first run. - //if (state.fullAnalysisAttempts > 0) { - // Emit initial analysis event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'ANALYZING_CONTEXT', - message: - 'Analyzing the context to see if we have enough information to answer the query', - details: { - documentCount: state.relevantDocuments.length, - query: state.query, - searchIterations: state.searchInstructionHistory.length, - }, - }, - }); - - console.log( - `Analyzing ${state.relevantDocuments.length} documents for relevance...`, - ); - - const nextActionPrompt = await ChatPromptTemplate.fromTemplate( - decideNextActionPrompt, - ).format({ - systemInstructions: this.systemInstructions, - context: state.relevantDocuments - .map( - (doc, index) => - `${doc?.metadata?.title ? `${doc?.metadata?.title}` : ''}${doc?.metadata.url ? `${doc?.metadata?.url}` : ''}${doc.pageContent}`, - ) - .join('\n\n'), - date: formatDateForLLM(new Date()), - searchInstructionHistory: state.searchInstructionHistory - .map((question) => `- ${question}`) - .join('\n'), - query: state.originalQuery || state.query, // Use original query for analysis context - }); - - const thinkingBlocksRemovedMessages = removeThinkingBlocksFromMessages( - state.messages, - ); - - // Use structured output for next action decision - const structuredLlm = withStructuredOutput(this.llm, NextActionSchema, { - name: 'analyze_content', - }); - - const nextActionResponse = await structuredLlm.invoke( - [...thinkingBlocksRemovedMessages, new HumanMessage(nextActionPrompt)], - { signal: this.signal }, - ); - - console.log('Next action response:', nextActionResponse); - - if (nextActionResponse.action !== 'good_content') { - // If we don't have enough information, but we still have available tasks, proceed with the next task - - if (state.tasks && state.tasks.length > 0) { - const hasMoreTasks = state.currentTaskIndex < state.tasks.length - 1; - - if (hasMoreTasks) { - return new Command({ - goto: 'task_manager', - }); - } - } - - if (nextActionResponse.action === 'need_user_info') { - // Use structured output for user info request - const userInfoLlm = withStructuredOutput( - this.llm, - UserInfoRequestSchema, - { - name: 'request_user_info', - }, - ); - - const moreUserInfoPrompt = await ChatPromptTemplate.fromTemplate( - additionalUserInputPrompt, - ).format({ - systemInstructions: this.systemInstructions, - context: state.relevantDocuments - .map( - (doc, index) => - `${doc?.metadata?.title ? `${doc?.metadata?.title}` : ''}${doc.pageContent}`, - ) - .join('\n\n'), - date: formatDateForLLM(new Date()), - searchInstructionHistory: state.searchInstructionHistory - .map((question) => `- ${question}`) - .join('\n'), - query: state.originalQuery || state.query, // Use original query for user info context - previousAnalysis: nextActionResponse.reasoning, // Include reasoning from previous analysis - }); - - const userInfoRequest = await userInfoLlm.invoke( - [ - ...removeThinkingBlocksFromMessages(state.messages), - new HumanMessage(moreUserInfoPrompt), - ], - { signal: this.signal }, - ); - - // Emit the complete question to the user - this.emitter.emit( - 'data', - JSON.stringify({ - type: 'response', - data: userInfoRequest.question, - }), - ); - - this.emitter.emit('end'); - - // Create the final response message with the complete content - const response = new SystemMessage(userInfoRequest.question); - - return new Command({ - goto: END, - update: { - messages: [response], - }, - }); - } - - // If we need more information from the LLM, generate a more specific search query - // Use structured output for search refinement - const searchRefinementLlm = withStructuredOutput( - this.llm, - SearchRefinementSchema, - { - name: 'refine_search', - }, - ); - - const moreInfoPrompt = await ChatPromptTemplate.fromTemplate( - additionalWebSearchPrompt, - ).format({ - systemInstructions: this.systemInstructions, - context: state.relevantDocuments - .map( - (doc, index) => - `${doc?.metadata?.title ? `\n${doc?.metadata?.title}` : ''}\n${doc.pageContent}\n`, - ) - .join('\n\n'), - date: formatDateForLLM(new Date()), - searchInstructionHistory: state.searchInstructionHistory - .map((question) => `- ${question}`) - .join('\n'), - query: state.originalQuery || state.query, // Use original query for more info context - previousAnalysis: nextActionResponse.reasoning, // Include reasoning from previous analysis - }); - - const searchRefinement = await searchRefinementLlm.invoke( - [ - ...removeThinkingBlocksFromMessages(state.messages), - new HumanMessage(moreInfoPrompt), - ], - { signal: this.signal }, - ); - - // Emit reanalyzing event when we need more information - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'MORE_DATA_NEEDED', - message: - 'Current context is insufficient - analyzing search requirements', - details: { - nextSearchQuery: searchRefinement.question, - documentCount: state.relevantDocuments.length, - searchIterations: state.searchInstructionHistory.length, - query: state.originalQuery || state.query, // Show original query in details - currentSearchFocus: searchRefinement.question, - }, - }, - }); - - return new Command({ - goto: 'task_manager', - update: { - // messages: [ - // new AIMessage( - // `The following question can help refine the search: ${searchRefinement.question}`, - // ), - // ], - query: searchRefinement.question, // Use the refined question for TaskManager to analyze - searchInstructions: searchRefinement.question, - searchInstructionHistory: [ - ...(state.searchInstructionHistory || []), - searchRefinement.question, - ], - fullAnalysisAttempts: 1, - originalQuery: state.originalQuery || state.query, // Preserve the original user query - // Reset task list so TaskManager can break down the search requirements again - tasks: [], - currentTaskIndex: 0, - }, - }); - } - - // Emit information gathering complete event when we have sufficient information - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'INFORMATION_GATHERING_COMPLETE', - message: 'Ready to respond.', - details: { - documentCount: state.relevantDocuments.length, - searchIterations: state.searchInstructionHistory.length, - totalTasks: state.tasks?.length || 1, - query: state.originalQuery || state.query, - }, - }, - }); - - return new Command({ - goto: 'synthesizer', - // update: { - // messages: [ - // new AIMessage( - // `Analysis completed. We have sufficient information to answer the query.`, - // ), - // ], - // }, - }); - } catch (error) { - console.error('Analysis error:', error); - const errorMessage = new AIMessage( - `Analysis failed: ${error instanceof Error ? error.message : 'Unknown error'}`, - ); - - return new Command({ - goto: END, - update: { - messages: [errorMessage], - }, - }); - } finally { - setTemperature(this.llm); // Reset temperature for subsequent actions - } - } -} diff --git a/src/lib/agents/contentRouterAgent.ts b/src/lib/agents/contentRouterAgent.ts deleted file mode 100644 index 732af3a..0000000 --- a/src/lib/agents/contentRouterAgent.ts +++ /dev/null @@ -1,233 +0,0 @@ -import { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import { AIMessage } from '@langchain/core/messages'; -import { PromptTemplate } from '@langchain/core/prompts'; -import { Command, END } from '@langchain/langgraph'; -import { EventEmitter } from 'events'; -import { z } from 'zod'; -import fs from 'node:fs'; -import path from 'node:path'; -import { AgentState } from './agentState'; -import { contentRouterPrompt } from '../prompts/contentRouter'; -import { removeThinkingBlocksFromMessages } from '../utils/contentUtils'; -import { withStructuredOutput } from '../utils/structuredOutput'; - -// Define Zod schema for structured router decision output -const RouterDecisionSchema = z.object({ - decision: z - .enum(['file_search', 'web_search', 'analyzer']) - .describe('The next step to take in the workflow'), - reasoning: z.string().describe('Explanation of why this decision was made'), -}); - -type RouterDecision = z.infer; - -export class ContentRouterAgent { - private llm: BaseChatModel; - private emitter: EventEmitter; - private systemInstructions: string; - private signal: AbortSignal; - - constructor( - llm: BaseChatModel, - emitter: EventEmitter, - systemInstructions: string, - signal: AbortSignal, - ) { - this.llm = llm; - this.emitter = emitter; - this.systemInstructions = systemInstructions; - this.signal = signal; - } - - /** - * Content router agent node - */ - async execute(state: typeof AgentState.State): Promise { - try { - // Determine current task to process - const currentTask = - state.tasks && state.tasks.length > 0 - ? state.tasks[state.currentTaskIndex || 0] - : state.query; - - console.log( - `Content router processing task ${(state.currentTaskIndex || 0) + 1} of ${state.tasks?.length || 1}: "${currentTask}"`, - ); - - // Extract focus mode from state - this should now come from the API - const focusMode = state.focusMode || 'webSearch'; - - const hasFiles = state.fileIds && state.fileIds.length > 0; - const documentCount = state.relevantDocuments.length; - const searchHistory = state.searchInstructionHistory.join(', ') || 'None'; - - // Extract file topics if files are available - const fileTopics = hasFiles - ? await this.extractFileTopics(state.fileIds!) - : 'None'; - - // Emit routing decision event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'ROUTING_DECISION', - message: `Determining optimal information source for current task`, - details: { - query: state.query, - currentTask: currentTask, - taskIndex: (state.currentTaskIndex || 0) + 1, - totalTasks: state.tasks?.length || 1, - focusMode: focusMode, - hasFiles: hasFiles, - fileCount: state.fileIds?.length || 0, - documentCount: documentCount, - searchIterations: state.searchInstructionHistory.length, - }, - }, - }); - - const template = PromptTemplate.fromTemplate(contentRouterPrompt); - const prompt = await template.format({ - systemInstructions: this.systemInstructions, - currentTask: currentTask, - query: state.originalQuery || state.query, - focusMode: focusMode, - hasFiles: hasFiles, - fileTopics: fileTopics, - documentCount: documentCount, - searchHistory: searchHistory, - }); - - // Use structured output for routing decision - const structuredLlm = withStructuredOutput( - this.llm, - RouterDecisionSchema, - { - name: 'route_content', - }, - ); - - const routerDecision = (await structuredLlm.invoke( - [...removeThinkingBlocksFromMessages(state.messages), prompt], - { signal: this.signal }, - )) as RouterDecision; - - console.log(`Router decision: ${routerDecision.decision}`); - console.log(`Router reasoning: ${routerDecision.reasoning}`); - console.log(`File topics: ${fileTopics}`); - console.log(`Focus mode: ${focusMode}`); - - // Validate decision based on focus mode restrictions - const validatedDecision = this.validateDecision( - routerDecision, - focusMode, - hasFiles, - ); - - // Emit routing result event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'ROUTING_RESULT', - message: `Routing to ${validatedDecision.decision}: ${validatedDecision.reasoning}`, - details: { - query: state.query, - currentTask: currentTask, - taskIndex: (state.currentTaskIndex || 0) + 1, - totalTasks: state.tasks?.length || 1, - decision: validatedDecision.decision, - focusMode: focusMode, - hasFiles: hasFiles, - documentCount: documentCount, - searchIterations: state.searchInstructionHistory.length, - }, - }, - }); - - const responseMessage = `Content routing completed. Next step: ${validatedDecision.decision}`; - console.log(responseMessage); - - return new Command({ - goto: validatedDecision.decision, - // update: { - // messages: [new AIMessage(responseMessage)], - // }, - }); - } catch (error) { - console.error('Content router error:', error); - const errorMessage = new AIMessage( - `Content routing failed: ${error instanceof Error ? error.message : 'Unknown error'}`, - ); - - return new Command({ - goto: END, - update: { - messages: [errorMessage], - }, - }); - } - } - - /** - * Extract semantic topics from attached files for relevance assessment - */ - private async extractFileTopics(fileIds: string[]): Promise { - try { - const topics = fileIds.map((fileId) => { - try { - const filePath = path.join(process.cwd(), 'uploads', fileId); - const contentPath = filePath + '-extracted.json'; - - if (fs.existsSync(contentPath)) { - const content = JSON.parse(fs.readFileSync(contentPath, 'utf8')); - const filename = content.title || 'Document'; - - // Use LLM-generated semantic topics if available, otherwise fall back to filename - const semanticTopics = content.topics; - return semanticTopics || filename; - } - return 'Unknown Document'; - } catch (error) { - console.warn(`Error extracting topic for file ${fileId}:`, error); - return 'Unknown Document'; - } - }); - - return topics.join('; '); - } catch (error) { - console.warn('Error extracting file topics:', error); - return 'Unable to determine file topics'; - } - } - - /** - * Validate and potentially override the router decision based on focus mode restrictions - */ - private validateDecision( - decision: RouterDecision, - focusMode: string, - hasFiles: boolean, - ): RouterDecision { - // Enforce focus mode restrictions for chat and localResearch modes - if ( - (focusMode === 'chat' || focusMode === 'localResearch') && - decision.decision === 'web_search' - ) { - // Override to file_search if files are available, otherwise analyzer - const fallbackDecision = hasFiles ? 'file_search' : 'analyzer'; - - console.log( - `Overriding web_search decision to ${fallbackDecision} due to focus mode restriction: ${focusMode}`, - ); - - return { - decision: fallbackDecision as 'file_search' | 'analyzer', - reasoning: `Overridden to ${fallbackDecision} - web search not allowed in ${focusMode} mode. ${decision.reasoning}`, - }; - } - - // For webSearch mode, trust the LLM's decision about file relevance - // No overrides needed - the enhanced prompt handles file relevance assessment - return decision; - } -} diff --git a/src/lib/agents/fileSearchAgent.ts b/src/lib/agents/fileSearchAgent.ts deleted file mode 100644 index 578dd24..0000000 --- a/src/lib/agents/fileSearchAgent.ts +++ /dev/null @@ -1,238 +0,0 @@ -import { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import { AIMessage } from '@langchain/core/messages'; -import { Command, END } from '@langchain/langgraph'; -import { EventEmitter } from 'events'; -import { Document } from 'langchain/document'; -import { AgentState } from './agentState'; -import { Embeddings } from '@langchain/core/embeddings'; -import { - processFilesToDocuments, - getRankedDocs, -} from '../utils/fileProcessing'; - -export class FileSearchAgent { - private llm: BaseChatModel; - private emitter: EventEmitter; - private systemInstructions: string; - private signal: AbortSignal; - private embeddings: Embeddings; - - constructor( - llm: BaseChatModel, - emitter: EventEmitter, - systemInstructions: string, - signal: AbortSignal, - embeddings: Embeddings, - ) { - this.llm = llm; - this.emitter = emitter; - this.systemInstructions = systemInstructions; - this.signal = signal; - this.embeddings = embeddings; - } - - /** - * File search agent node - */ - async execute(state: typeof AgentState.State): Promise { - try { - // Determine current task to process - const currentTask = - state.tasks && state.tasks.length > 0 - ? state.tasks[state.currentTaskIndex || 0] - : state.query; - - console.log( - `Processing file search for task ${(state.currentTaskIndex || 0) + 1} of ${state.tasks?.length || 1}: "${currentTask}"`, - ); - - // Check if we have file IDs to process - if (!state.fileIds || state.fileIds.length === 0) { - console.log('No files attached for search'); - return new Command({ - goto: 'analyzer', - update: { - messages: [new AIMessage('No files attached to search.')], - }, - }); - } - - // Emit consulting attached files event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'CONSULTING_ATTACHED_FILES', - message: `Consulting attached files...`, - details: { - query: state.query, - currentTask: currentTask, - taskIndex: (state.currentTaskIndex || 0) + 1, - totalTasks: state.tasks?.length || 1, - fileCount: state.fileIds.length, - documentCount: state.relevantDocuments.length, - }, - }, - }); - - // Process files to documents - const fileDocuments = await processFilesToDocuments(state.fileIds); - - if (fileDocuments.length === 0) { - console.log('No processable file content found'); - return new Command({ - goto: 'analyzer', - // update: { - // messages: [ - // new AIMessage('No searchable content found in attached files.'), - // ], - // }, - }); - } - - console.log( - `Processed ${fileDocuments.length} file documents for search`, - ); - - // Emit searching file content event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'SEARCHING_FILE_CONTENT', - message: `Searching through ${fileDocuments.length} file sections for relevant information`, - details: { - query: state.query, - currentTask: currentTask, - taskIndex: (state.currentTaskIndex || 0) + 1, - totalTasks: state.tasks?.length || 1, - fileDocumentCount: fileDocuments.length, - documentCount: state.relevantDocuments.length, - }, - }, - }); - - // Generate query embedding for similarity search - const queryEmbedding = await this.embeddings.embedQuery( - state.originalQuery + ' ' + currentTask, - ); - - // Perform similarity search over file documents - const rankedDocuments = getRankedDocs( - queryEmbedding, - fileDocuments, - 12, // maxDocs - 0.3, // similarity threshold - ); - - console.log(`Found ${rankedDocuments.length} relevant file sections`); - - if (rankedDocuments.length === 0) { - // Emit no relevant content event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'NO_RELEVANT_FILE_CONTENT', - message: `No relevant content found in attached files for the current task`, - details: { - query: state.query, - currentTask: currentTask, - taskIndex: (state.currentTaskIndex || 0) + 1, - totalTasks: state.tasks?.length || 1, - searchedDocuments: fileDocuments.length, - documentCount: state.relevantDocuments.length, - }, - }, - }); - - return new Command({ - goto: 'analyzer', - // update: { - // messages: [ - // new AIMessage( - // 'No relevant content found in attached files for the current task.', - // ), - // ], - // }, - }); - } - - // Emit file content found event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'FILE_CONTENT_FOUND', - message: `Found ${rankedDocuments.length} relevant sections in attached files`, - details: { - query: state.query, - currentTask: currentTask, - taskIndex: (state.currentTaskIndex || 0) + 1, - totalTasks: state.tasks?.length || 1, - relevantSections: rankedDocuments.length, - searchedDocuments: fileDocuments.length, - documentCount: - state.relevantDocuments.length + rankedDocuments.length, - }, - }, - }); - - const responseMessage = `File search completed. Found ${rankedDocuments.length} relevant sections in attached files.`; - console.log(responseMessage); - - return new Command({ - goto: 'analyzer', // Route back to analyzer to process the results - update: { - // messages: [new AIMessage(responseMessage)], - relevantDocuments: rankedDocuments, - }, - }); - } catch (error) { - console.error('File search error:', error); - const errorMessage = new AIMessage( - `File search failed: ${error instanceof Error ? error.message : 'Unknown error'}`, - ); - - return new Command({ - goto: END, - update: { - messages: [errorMessage], - }, - }); - } - } - - /** - * Perform a similarity search over file documents - * @param state The current agent state - * @returns Ranked documents relevant to the current task - */ - async search(state: typeof AgentState.State): Promise { - if (!state.fileIds || state.fileIds.length === 0) { - return []; - } - - // Process files to documents - const fileDocuments = await processFilesToDocuments(state.fileIds); - - if (fileDocuments.length === 0) { - return []; - } - - // Determine current task to search for - const currentTask = - state.tasks && state.tasks.length > 0 - ? state.tasks[state.currentTaskIndex || 0] - : state.query; - - // Generate query embedding for similarity search - const queryEmbedding = await this.embeddings.embedQuery( - state.originalQuery + ' ' + currentTask, - ); - - // Perform similarity search and return ranked documents - return getRankedDocs( - queryEmbedding, - fileDocuments, - 8, // maxDocs - 0.3, // similarity threshold - ); - } -} diff --git a/src/lib/agents/index.ts b/src/lib/agents/index.ts deleted file mode 100644 index b6e5aff..0000000 --- a/src/lib/agents/index.ts +++ /dev/null @@ -1,8 +0,0 @@ -export { AgentState } from './agentState'; -export { WebSearchAgent } from './webSearchAgent'; -export { AnalyzerAgent } from './analyzerAgent'; -export { SynthesizerAgent } from './synthesizerAgent'; -export { TaskManagerAgent } from './taskManagerAgent'; -export { FileSearchAgent } from './fileSearchAgent'; -export { ContentRouterAgent } from './contentRouterAgent'; -export { URLSummarizationAgent } from './urlSummarizationAgent'; diff --git a/src/lib/agents/synthesizerAgent.ts b/src/lib/agents/synthesizerAgent.ts deleted file mode 100644 index 7342fe0..0000000 --- a/src/lib/agents/synthesizerAgent.ts +++ /dev/null @@ -1,165 +0,0 @@ -import { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import { HumanMessage, SystemMessage } from '@langchain/core/messages'; -import { PromptTemplate } from '@langchain/core/prompts'; -import { Command, END } from '@langchain/langgraph'; -import { EventEmitter } from 'events'; -import { getModelName } from '../utils/modelUtils'; -import { AgentState } from './agentState'; -import { removeThinkingBlocksFromMessages } from '../utils/contentUtils'; -import { synthesizerPrompt } from '../prompts/synthesizer'; - -export class SynthesizerAgent { - private llm: BaseChatModel; - private emitter: EventEmitter; - private personaInstructions: string; - private signal: AbortSignal; - - constructor( - llm: BaseChatModel, - emitter: EventEmitter, - personaInstructions: string, - signal: AbortSignal, - ) { - this.llm = llm; - this.emitter = emitter; - this.personaInstructions = personaInstructions; - this.signal = signal; - } - - /** - * Synthesizer agent node that combines information to answer the query - */ - async execute(state: typeof AgentState.State): Promise { - try { - // Format the prompt using the external template - const template = PromptTemplate.fromTemplate(synthesizerPrompt); - - const conversationHistory = - removeThinkingBlocksFromMessages(state.messages) - .map((msg) => `<${msg.getType()}>${msg.content}`) - .join('\n') || 'No previous conversation context'; - - const relevantDocuments = state.relevantDocuments - .map((doc, index) => { - const isFile = doc.metadata?.url?.toLowerCase().includes('file'); - return `<${index + 1}>\n - ${doc.metadata.title} - ${isFile ? 'file' : 'web'} - ${isFile ? '' : '\n' + doc.metadata.url + ''} - \n${doc.pageContent}\n -`; - }) - .join('\n'); - - const recursionLimitMessage = state.recursionLimitReached - ? `# ⚠️ IMPORTANT NOTICE - LIMITED INFORMATION -**The search process was interrupted due to complexity limits. You MUST start your response with a warning about incomplete information and qualify all statements appropriately.** -## ⚠️ CRITICAL: Incomplete Information Response Requirements -**You MUST:** -1. **Start your response** with a clear warning that the information may be incomplete or conflicting -2. **Acknowledge limitations** throughout your response where information gaps exist -3. **Be transparent** about what you cannot determine from the available sources -4. **Suggest follow-up actions** for the user to get more complete information -5. **Qualify your statements** with phrases like "based on available information" or "from the limited sources gathered" - -**Example opening for incomplete information responses:** -"⚠️ **Please note:** This response is based on incomplete information due to search complexity limits. The findings below may be missing important details or conflicting perspectives. I recommend verifying this information through additional research or rephrasing your query for better results. - -` - : ''; - - // If we have limited documents due to recursion limit, acknowledge this - const documentsAvailable = state.relevantDocuments?.length || 0; - const limitedInfoNote = - state.recursionLimitReached && documentsAvailable === 0 - ? '**CRITICAL: No source documents were gathered due to search limitations.**\n\n' - : state.recursionLimitReached - ? `**NOTICE: Search was interrupted with ${documentsAvailable} documents gathered.**\n\n` - : ''; - - const formattedPrompt = await template.format({ - personaInstructions: this.personaInstructions, - conversationHistory: conversationHistory, - relevantDocuments: relevantDocuments, - query: state.originalQuery || state.query, - recursionLimitReached: recursionLimitMessage + limitedInfoNote, - }); - - // Stream the response in real-time using LLM streaming capabilities - let fullResponse = ''; - - // Emit the sources as a data response - this.emitter.emit( - 'data', - JSON.stringify({ - type: 'sources', - data: state.relevantDocuments, - searchQuery: '', - searchUrl: '', - }), - ); - - const stream = await this.llm.stream( - [ - new SystemMessage(formattedPrompt), - new HumanMessage(state.originalQuery || state.query), - ], - { signal: this.signal }, - ); - - for await (const chunk of stream) { - if (this.signal.aborted) { - break; - } - - const content = chunk.content; - if (typeof content === 'string' && content.length > 0) { - fullResponse += content; - - // Emit each chunk as a data response in real-time - this.emitter.emit( - 'data', - JSON.stringify({ - type: 'response', - data: content, - }), - ); - } - } - - // Emit model stats and end signal after streaming is complete - const modelName = getModelName(this.llm); - this.emitter.emit( - 'stats', - JSON.stringify({ - type: 'modelStats', - data: { modelName }, - }), - ); - - this.emitter.emit('end'); - - // Create the final response message with the complete content - const response = new SystemMessage(fullResponse); - - return new Command({ - goto: END, - update: { - messages: [response], - }, - }); - } catch (error) { - console.error('Synthesis error:', error); - const errorMessage = new SystemMessage( - `Failed to synthesize answer: ${error instanceof Error ? error.message : 'Unknown error'}`, - ); - - return new Command({ - goto: END, - update: { - messages: [errorMessage], - }, - }); - } - } -} diff --git a/src/lib/agents/taskManagerAgent.ts b/src/lib/agents/taskManagerAgent.ts deleted file mode 100644 index cd11374..0000000 --- a/src/lib/agents/taskManagerAgent.ts +++ /dev/null @@ -1,225 +0,0 @@ -import { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import { AIMessage } from '@langchain/core/messages'; -import { PromptTemplate } from '@langchain/core/prompts'; -import { Command } from '@langchain/langgraph'; -import { EventEmitter } from 'events'; -import { z } from 'zod'; -import { taskBreakdownPrompt } from '../prompts/taskBreakdown'; -import { AgentState } from './agentState'; -import { setTemperature } from '../utils/modelUtils'; -import { withStructuredOutput } from '../utils/structuredOutput'; - -// Define Zod schema for structured task breakdown output -const TaskBreakdownSchema = z.object({ - tasks: z - .array(z.string()) - .describe( - 'Array of specific, focused tasks broken down from the original query', - ), - reasoning: z - .string() - .describe( - 'Explanation of how and why the query was broken down into these tasks', - ), -}); - -type TaskBreakdown = z.infer; - -export class TaskManagerAgent { - private llm: BaseChatModel; - private emitter: EventEmitter; - private systemInstructions: string; - private signal: AbortSignal; - - constructor( - llm: BaseChatModel, - emitter: EventEmitter, - systemInstructions: string, - signal: AbortSignal, - ) { - this.llm = llm; - this.emitter = emitter; - this.systemInstructions = systemInstructions; - this.signal = signal; - } - - /** - * Task manager agent node - breaks down complex questions into smaller tasks - */ - async execute(state: typeof AgentState.State): Promise { - try { - //setTemperature(this.llm, 0); // Set temperature to 0 for deterministic output - - // Check if we're in task progression mode (tasks already exist and we're processing them) - if (state.tasks && state.tasks.length > 0) { - const currentTaskIndex = state.currentTaskIndex || 0; - const hasMoreTasks = currentTaskIndex < state.tasks.length - 1; - - if (hasMoreTasks) { - // Move to next task - const nextTaskIndex = currentTaskIndex + 1; - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'PROCEEDING_TO_NEXT_TASK', - message: `Task ${currentTaskIndex + 1} completed. Moving to task ${nextTaskIndex + 1} of ${state.tasks.length}.`, - details: { - completedTask: state.tasks[currentTaskIndex], - nextTask: state.tasks[nextTaskIndex], - taskIndex: nextTaskIndex + 1, - totalTasks: state.tasks.length, - documentCount: state.relevantDocuments.length, - query: state.originalQuery || state.query, - }, - }, - }); - - return new Command({ - goto: 'content_router', - update: { - // messages: [ - // new AIMessage( - // `Task ${currentTaskIndex + 1} completed. Processing task ${nextTaskIndex + 1} of ${state.tasks.length}: "${state.tasks[nextTaskIndex]}"`, - // ), - // ], - currentTaskIndex: nextTaskIndex, - }, - }); - } else { - // All tasks completed, move to analysis - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'ALL_TASKS_COMPLETED', - message: `All ${state.tasks.length} tasks completed. Ready for analysis.`, - details: { - totalTasks: state.tasks.length, - documentCount: state.relevantDocuments.length, - query: state.originalQuery || state.query, - }, - }, - }); - - return new Command({ - goto: 'analyzer', - // update: { - // messages: [ - // new AIMessage( - // `All ${state.tasks.length} tasks completed. Moving to analysis phase.`, - // ), - // ], - // }, - }); - } - } - - // Original task breakdown logic for new queries - // Emit task analysis event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'ANALYZING_TASK_COMPLEXITY', - message: `Analyzing question to determine if it needs to be broken down into smaller tasks`, - details: { - query: state.query, - currentTasks: state.tasks?.length || 0, - }, - }, - }); - - const template = PromptTemplate.fromTemplate(taskBreakdownPrompt); - - // Create file context information - const fileContext = - state.fileIds && state.fileIds.length > 0 - ? `Files attached: ${state.fileIds.length} file(s) are available for analysis. Consider creating tasks that can leverage these attached files when appropriate.` - : 'No files attached: Focus on tasks that can be answered through web research or general knowledge.'; - - const prompt = await template.format({ - systemInstructions: this.systemInstructions, - fileContext: fileContext, - query: state.query, - }); - - // Use structured output for task breakdown - const structuredLlm = withStructuredOutput( - this.llm, - TaskBreakdownSchema, - { - name: 'break_down_tasks', - }, - ); - - const taskBreakdownResult = (await structuredLlm.invoke([prompt], { - signal: this.signal, - })) as TaskBreakdown; - - console.log('Task breakdown response:', taskBreakdownResult); - - // Extract tasks from structured response - const taskLines = taskBreakdownResult.tasks.filter( - (task) => task.trim().length > 0, - ); - - if (taskLines.length === 0) { - // Fallback: if no tasks found, use the original query - taskLines.push(state.query); - } - - console.log( - `Task breakdown completed: ${taskLines.length} tasks identified`, - ); - console.log('Reasoning:', taskBreakdownResult.reasoning); - taskLines.forEach((task, index) => { - console.log(`Task ${index + 1}: ${task}`); - }); - - // Emit task breakdown completion event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'TASK_BREAKDOWN_COMPLETED', - message: `Question broken down into ${taskLines.length} focused ${taskLines.length === 1 ? 'task' : 'tasks'}`, - details: { - query: state.query, - taskCount: taskLines.length, - tasks: taskLines, - reasoning: taskBreakdownResult.reasoning, - }, - }, - }); - - const responseMessage = - taskLines.length === 1 - ? 'Question is already focused and ready for processing' - : `Question broken down into ${taskLines.length} focused tasks for parallel processing`; - - return new Command({ - goto: 'content_router', // Route to content router to decide between file search, web search, or analysis - update: { - // messages: [new AIMessage(responseMessage)], - tasks: taskLines, - currentTaskIndex: 0, - originalQuery: state.originalQuery || state.query, // Preserve original if not already set - }, - }); - } catch (error) { - console.error('Task breakdown error:', error); - const errorMessage = new AIMessage( - `Task breakdown failed: ${error instanceof Error ? error.message : 'Unknown error'}`, - ); - - return new Command({ - goto: 'content_router', // Fallback to content router with original query - update: { - messages: [errorMessage], - tasks: [state.query], // Use original query as single task - currentTaskIndex: 0, - originalQuery: state.originalQuery || state.query, // Preserve original if not already set - }, - }); - } finally { - setTemperature(this.llm, undefined); // Reset temperature to default - } - } -} diff --git a/src/lib/agents/urlSummarizationAgent.ts b/src/lib/agents/urlSummarizationAgent.ts deleted file mode 100644 index ab3aa7e..0000000 --- a/src/lib/agents/urlSummarizationAgent.ts +++ /dev/null @@ -1,300 +0,0 @@ -import { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import { AIMessage } from '@langchain/core/messages'; -import { Command, END } from '@langchain/langgraph'; -import { EventEmitter } from 'events'; -import { Document } from 'langchain/document'; -import { AgentState } from './agentState'; -import { getWebContent } from '../utils/documents'; -import { removeThinkingBlocks } from '../utils/contentUtils'; -import { setTemperature } from '../utils/modelUtils'; - -export class URLSummarizationAgent { - private llm: BaseChatModel; - private emitter: EventEmitter; - private systemInstructions: string; - private signal: AbortSignal; - - constructor( - llm: BaseChatModel, - emitter: EventEmitter, - systemInstructions: string, - signal: AbortSignal, - ) { - this.llm = llm; - this.emitter = emitter; - this.systemInstructions = systemInstructions; - this.signal = signal; - } - - /** - * URL processing agent node - */ - async execute(state: typeof AgentState.State): Promise { - try { - setTemperature(this.llm, 0); // Set temperature to 0 for deterministic output - - // Use pre-analyzed URLs from ContentRouterAgent - const urlsToProcess = state.urlsToSummarize || []; - const summarizationIntent = - state.summarizationIntent || - 'process content to help answer the user query'; - - if (urlsToProcess.length === 0) { - console.log( - 'No URLs found for processing, routing back to content router', - ); - return new Command({ - goto: 'content_router', - // update: { - // messages: [ - // new AIMessage( - // 'No URLs found for processing, routing to content router', - // ), - // ], - // }, - }); - } - - console.log(`URL processing detected. URLs: ${urlsToProcess.join(', ')}`); - console.log(`Processing intent: ${summarizationIntent}`); - - // Emit URL detection event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'URL_PROCESSING_DETECTED', - message: `Processing ${urlsToProcess.length} URL(s) to extract content for analysis`, - details: { - query: state.query, - urls: urlsToProcess, - intent: summarizationIntent, - }, - }, - }); - - const documents: Document[] = []; - - // Process each URL - for (const url of urlsToProcess) { - if (this.signal.aborted) { - console.warn('URL summarization operation aborted by signal'); - break; - } - - try { - // Emit URL processing event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'PROCESSING_URL', - message: `Retrieving and processing content from: ${url}`, - details: { - query: state.query, - sourceUrl: url, - intent: summarizationIntent, - }, - }, - }); - - // Fetch full content using the enhanced web content retrieval - const webContent = await getWebContent(url, true); - - if (!webContent || !webContent.pageContent) { - console.warn(`No content retrieved from URL: ${url}`); - - // Emit URL processing failure event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'URL_PROCESSING_FAILED', - message: `Failed to retrieve content from: ${url}`, - details: { - query: state.query, - sourceUrl: url, - reason: 'No content retrieved', - }, - }, - }); - continue; - } - - const contentLength = webContent.pageContent.length; - let finalContent: string; - let processingType: string; - - // If content is short (< 4000 chars), use it directly; otherwise summarize - if (contentLength < 4000) { - finalContent = webContent.pageContent; - processingType = 'url-direct-content'; - - console.log( - `Content is short (${contentLength} chars), using directly without summarization`, - ); - - // Emit direct content usage event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'URL_DIRECT_CONTENT', - message: `Content is short (${contentLength} chars), using directly from: ${url}`, - details: { - query: state.query, - sourceUrl: url, - sourceTitle: webContent.metadata.title || 'Web Page', - contentLength: contentLength, - intent: summarizationIntent, - }, - }, - }); - } else { - // Content is long, summarize using LLM - console.log( - `Content is long (${contentLength} chars), generating summary`, - ); - - const systemPrompt = this.systemInstructions - ? `${this.systemInstructions}\n\n` - : ''; - - const summarizationPrompt = `${systemPrompt}You are a web content processor. Extract and summarize ONLY the information from the provided web page content that is relevant to the user's query. - -# Critical Instructions -- Output ONLY a summary of the web page content provided below -- Focus on information that relates to or helps answer the user's query -- Do NOT add pleasantries, greetings, or conversational elements -- Do NOT mention missing URLs, other pages, or content not provided -- Do NOT ask follow-up questions or suggest additional actions -- Do NOT add commentary about the user's request or query -- Present the information in a clear, well-structured format with key facts and details -- Include all relevant details that could help answer the user's question - -# User's Query: ${state.query} - -# Content Title: ${webContent.metadata.title || 'Web Page'} -# Content URL: ${url} - -# Web Page Content to Summarize: -${webContent.pageContent} - -Provide a comprehensive summary of the above web page content, focusing on information relevant to the user's query:`; - - const result = await this.llm.invoke(summarizationPrompt, { - signal: this.signal, - }); - - finalContent = removeThinkingBlocks(result.content as string); - processingType = 'url-content-extraction'; - } - - if (finalContent && finalContent.trim().length > 0) { - const document = new Document({ - pageContent: finalContent, - metadata: { - title: webContent.metadata.title || 'URL Content', - url: url, - source: url, - processingType: processingType, - processingIntent: summarizationIntent, - originalContentLength: contentLength, - }, - }); - - documents.push(document); - - // Emit successful URL processing event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'URL_CONTENT_EXTRACTED', - message: `Successfully processed content from: ${url}`, - details: { - query: state.query, - sourceUrl: url, - sourceTitle: webContent.metadata.title || 'Web Page', - contentLength: finalContent.length, - originalContentLength: contentLength, - processingType: processingType, - intent: summarizationIntent, - }, - }, - }); - - console.log( - `Successfully processed content from ${url} (${finalContent.length} characters, ${processingType})`, - ); - } else { - console.warn(`No valid content generated for URL: ${url}`); - } - } catch (error) { - console.error(`Error processing URL ${url}:`, error); - - // Emit URL processing error event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'URL_PROCESSING_ERROR', - message: `Error processing URL: ${url}`, - details: { - query: state.query, - sourceUrl: url, - error: error instanceof Error ? error.message : 'Unknown error', - }, - }, - }); - } - } - - if (documents.length === 0) { - const errorMessage = `No content could be retrieved or summarized from the provided URL(s): ${urlsToProcess.join(', ')}`; - console.error(errorMessage); - - return new Command({ - goto: 'analyzer', - // update: { - // messages: [new AIMessage(errorMessage)], - // }, - }); - } - - // Emit completion event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'URL_PROCESSING_COMPLETED', - message: `Successfully processed ${documents.length} URL(s) and extracted content`, - details: { - query: state.query, - processedUrls: urlsToProcess.length, - successfulExtractions: documents.length, - intent: summarizationIntent, - }, - }, - }); - - const responseMessage = `URL processing completed. Successfully processed ${documents.length} out of ${urlsToProcess.length} URLs.`; - console.log(responseMessage); - - return new Command({ - goto: 'analyzer', // Route to analyzer to continue with normal workflow after URL processing - update: { - // messages: [new AIMessage(responseMessage)], - relevantDocuments: documents, - }, - }); - } catch (error) { - console.error('URL summarization error:', error); - const errorMessage = new AIMessage( - `URL summarization failed: ${error instanceof Error ? error.message : 'Unknown error'}`, - ); - - return new Command({ - goto: END, - update: { - messages: [errorMessage], - }, - }); - } finally { - setTemperature(this.llm, undefined); // Reset temperature to default - } - } -} diff --git a/src/lib/agents/webSearchAgent.ts b/src/lib/agents/webSearchAgent.ts deleted file mode 100644 index f9b7aca..0000000 --- a/src/lib/agents/webSearchAgent.ts +++ /dev/null @@ -1,461 +0,0 @@ -import { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import { AIMessage } from '@langchain/core/messages'; -import { PromptTemplate } from '@langchain/core/prompts'; -import { Command, END } from '@langchain/langgraph'; -import { EventEmitter } from 'events'; -import { Document } from 'langchain/document'; -import { z } from 'zod'; -import LineOutputParser from '../outputParsers/lineOutputParser'; -import { webSearchRetrieverAgentPrompt } from '../prompts/webSearch'; -import { searchSearxng } from '../searxng'; -import { formatDateForLLM } from '../utils'; -import { summarizeWebContent } from '../utils/summarizeWebContent'; -import { - analyzePreviewContent, - PreviewContent, -} from '../utils/analyzePreviewContent'; -import { AgentState } from './agentState'; -import { setTemperature } from '../utils/modelUtils'; -import { Embeddings } from '@langchain/core/embeddings'; -import { removeThinkingBlocksFromMessages } from '../utils/contentUtils'; -import computeSimilarity from '../utils/computeSimilarity'; -import { withStructuredOutput } from '../utils/structuredOutput'; - -// Define Zod schema for structured search query output -const SearchQuerySchema = z.object({ - searchQuery: z - .string() - .describe('The optimized search query to use for web search'), - reasoning: z - .string() - .describe( - 'Explanation of how the search query was optimized for better results', - ), -}); - -type SearchQuery = z.infer; - -export class WebSearchAgent { - private llm: BaseChatModel; - private emitter: EventEmitter; - private systemInstructions: string; - private signal: AbortSignal; - private embeddings: Embeddings; - - constructor( - llm: BaseChatModel, - emitter: EventEmitter, - systemInstructions: string, - signal: AbortSignal, - embeddings: Embeddings, - ) { - this.llm = llm; - this.emitter = emitter; - this.systemInstructions = systemInstructions; - this.signal = signal; - this.embeddings = embeddings; - } - - /** - * Web search agent node - */ - async execute(state: typeof AgentState.State): Promise { - try { - //setTemperature(this.llm, 0); // Set temperature to 0 for deterministic output - - // Determine current task to process - const currentTask = - state.tasks && state.tasks.length > 0 - ? state.tasks[state.currentTaskIndex || 0] - : state.query; - - console.log( - `Processing task ${(state.currentTaskIndex || 0) + 1} of ${state.tasks?.length || 1}: "${currentTask}"`, - ); - - // Emit preparing web search event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'PREPARING_SEARCH_QUERY', - // message: `Preparing search query`, - details: { - query: state.query, - currentTask: currentTask, - taskIndex: (state.currentTaskIndex || 0) + 1, - totalTasks: state.tasks?.length || 1, - searchInstructions: state.searchInstructions || currentTask, - documentCount: state.relevantDocuments.length, - searchIterations: state.searchInstructionHistory.length, - }, - }, - }); - - const template = PromptTemplate.fromTemplate( - webSearchRetrieverAgentPrompt, - ); - const prompt = await template.format({ - systemInstructions: this.systemInstructions, - query: currentTask, // Use current task instead of main query - date: formatDateForLLM(new Date()), - supervisor: state.searchInstructions, - }); - - // Use structured output for search query generation - const structuredLlm = withStructuredOutput(this.llm, SearchQuerySchema, { - name: 'generate_search_query', - }); - - const searchQueryResult = await structuredLlm.invoke( - [...removeThinkingBlocksFromMessages(state.messages), prompt], - { signal: this.signal }, - ); - - const searchQuery = searchQueryResult.searchQuery; - - console.log(`Performing web search for query: "${searchQuery}"`); - console.log('Search query reasoning:', searchQueryResult.reasoning); - - // Emit executing web search event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'EXECUTING_WEB_SEARCH', - // message: `Searching the web for: '${searchQuery}'`, - details: { - query: state.query, - currentTask: currentTask, - taskIndex: (state.currentTaskIndex || 0) + 1, - totalTasks: state.tasks?.length || 1, - searchQuery: searchQuery, - documentCount: state.relevantDocuments.length, - searchIterations: state.searchInstructionHistory.length, - }, - }, - }); - - const searchResults = await searchSearxng(searchQuery, { - language: 'en', - engines: [], - }); - - // Emit web sources identified event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'WEB_SOURCES_IDENTIFIED', - message: `Found ${searchResults.results.length} potential web sources`, - details: { - query: state.query, - currentTask: currentTask, - taskIndex: (state.currentTaskIndex || 0) + 1, - totalTasks: state.tasks?.length || 1, - searchQuery: searchQuery, - sourcesFound: searchResults.results.length, - documentCount: state.relevantDocuments.length, - searchIterations: state.searchInstructionHistory.length, - }, - }, - }); - - let bannedSummaryUrls = state.bannedSummaryUrls || []; - let bannedPreviewUrls = state.bannedPreviewUrls || []; - const queryVector = await this.embeddings.embedQuery( - state.originalQuery + ' ' + currentTask, - ); - - // Filter out banned URLs first - const filteredResults = searchResults.results.filter( - (result) => - !bannedSummaryUrls.includes(result.url) && - !bannedPreviewUrls.includes(result.url), - ); - - // Calculate similarities for all filtered results - const resultsWithSimilarity = await Promise.all( - filteredResults.map(async (result) => { - const vector = await this.embeddings.embedQuery( - result.title + ' ' + result.content || '', - ); - const similarity = computeSimilarity(vector, queryVector); - return { result, similarity }; - }), - ); - - let previewContents: PreviewContent[] = []; - // Always take the top 3 results for preview content - previewContents.push( - ...filteredResults.slice(0, 3).map((result) => ({ - title: result.title || 'Untitled', - snippet: result.content || '', - url: result.url, - })), - ); - - // Sort by relevance score and take top 12 results for a total of 15 - previewContents.push( - ...resultsWithSimilarity - .slice(3) - .sort((a, b) => b.similarity - a.similarity) - .slice(0, 12) - .map(({ result }) => ({ - title: result.title || 'Untitled', - snippet: result.content || '', - url: result.url, - })), - ); - - console.log( - `Extracted preview content from ${previewContents.length} search results for analysis`, - ); - - // Perform preview analysis to determine if full content retrieval is needed - let previewAnalysisResult = null; - if (previewContents.length > 0) { - console.log( - 'Starting preview content analysis to determine if full processing is needed', - ); - - // Emit preview analysis event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'ANALYZING_PREVIEW_CONTENT', - message: `Analyzing ${previewContents.length} search result previews to determine processing approach`, - details: { - query: currentTask, - previewCount: previewContents.length, - documentCount: state.relevantDocuments.length, - searchIterations: state.searchInstructionHistory.length, - }, - }, - }); - - previewAnalysisResult = await analyzePreviewContent( - previewContents, - state.query, - currentTask, - removeThinkingBlocksFromMessages(state.messages), - this.llm, - this.systemInstructions, - this.signal, - ); - - console.log( - `Preview analysis result: ${previewAnalysisResult.isSufficient ? 'SUFFICIENT' : 'INSUFFICIENT'}${previewAnalysisResult.reason ? ` - ${previewAnalysisResult.reason}` : ''}`, - ); - } - - let documents: Document[] = []; - let attemptedUrlCount = 0; // Declare outside conditional blocks - - // Conditional workflow based on preview analysis result - if (previewAnalysisResult && previewAnalysisResult.isSufficient) { - // Preview content is sufficient - create documents from preview content - console.log( - 'Preview content determined sufficient - skipping full content retrieval', - ); - - // Emit preview processing event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'PROCESSING_PREVIEW_CONTENT', - message: `Using preview content from ${previewContents.length} sources - no full content retrieval needed`, - details: { - query: currentTask, - previewCount: previewContents.length, - documentCount: state.relevantDocuments.length, - searchIterations: state.searchInstructionHistory.length, - processingType: 'preview-only', - }, - }, - }); - - // Create documents from preview content - documents = previewContents.map( - (content, index) => - new Document({ - pageContent: `# ${content.title}\n\n${content.snippet}`, - metadata: { - title: content.title, - url: content.url, - source: content.url, - processingType: 'preview-only', - snippet: content.snippet, - }, - }), - ); - - previewContents.forEach((content) => { - bannedPreviewUrls.push(content.url); // Add to banned preview URLs to avoid duplicates - }); - - console.log( - `Created ${documents.length} documents from preview content`, - ); - } else { - // Preview content is insufficient - proceed with full content processing - const insufficiencyReason = - previewAnalysisResult?.reason || - 'Preview content not available or insufficient'; - console.log( - `Preview content insufficient: ${insufficiencyReason} - proceeding with full content retrieval`, - ); - - // Emit full processing event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'PROCEEDING_WITH_FULL_ANALYSIS', - message: `Preview content insufficient - proceeding with detailed content analysis`, - details: { - query: currentTask, - insufficiencyReason: insufficiencyReason, - documentCount: state.relevantDocuments.length, - searchIterations: state.searchInstructionHistory.length, - processingType: 'full-content', - }, - }, - }); - - // Summarize the top 2 search results - for (const result of previewContents) { - if (this.signal.aborted) { - console.warn('Search operation aborted by signal'); - break; // Exit if the operation is aborted - } - - if (bannedSummaryUrls.includes(result.url)) { - console.log(`Skipping banned URL: ${result.url}`); - // Note: We don't emit an agent_action event for banned URLs as this is an internal - // optimization that should be transparent to the user - continue; // Skip banned URLs - } - // if (attemptedUrlCount >= 5) { - // console.warn( - // 'Too many attempts to summarize URLs, stopping further attempts.', - // ); - // break; // Limit the number of attempts to summarize URLs - // } - attemptedUrlCount++; - - bannedSummaryUrls.push(result.url); // Add to banned URLs to avoid duplicates - - if (documents.length >= 2) { - break; // Limit to top 1 document - } - - // Emit analyzing source event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'ANALYZING_SOURCE', - message: `Analyzing and summarizing content from: ${result.title || result.url}`, - details: { - query: currentTask, - sourceUrl: result.url, - sourceTitle: result.title || 'Untitled', - documentCount: state.relevantDocuments.length, - searchIterations: state.searchInstructionHistory.length, - }, - }, - }); - - const summaryResult = await summarizeWebContent( - result.url, - currentTask, - this.llm, - this.systemInstructions, - this.signal, - ); - - if (summaryResult.document) { - documents.push(summaryResult.document); - - // Emit context updated event - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'CONTEXT_UPDATED', - message: `Added information from ${summaryResult.document.metadata.title || result.url} to context`, - details: { - query: currentTask, - sourceUrl: result.url, - sourceTitle: - summaryResult.document.metadata.title || 'Untitled', - contentLength: summaryResult.document.pageContent.length, - documentCount: - state.relevantDocuments.length + documents.length, - searchIterations: state.searchInstructionHistory.length, - }, - }, - }); - - console.log( - `Summarized content from ${result.url} to ${summaryResult.document.pageContent.length} characters. Content: ${summaryResult.document.pageContent}`, - ); - } else { - console.warn(`No relevant content found for URL: ${result.url}`); - - // Emit skipping irrelevant source event for non-relevant content - this.emitter.emit('agent_action', { - type: 'agent_action', - data: { - action: 'SKIPPING_IRRELEVANT_SOURCE', - message: `Source ${result.title || result.url} was not relevant - trying next`, - details: { - query: state.query, - sourceUrl: result.url, - sourceTitle: result.title || 'Untitled', - skipReason: - summaryResult.notRelevantReason || - 'Content was not relevant to the query', - documentCount: - state.relevantDocuments.length + documents.length, - searchIterations: state.searchInstructionHistory.length, - }, - }, - }); - } - } - } // Close the else block for full content processing - - if (documents.length === 0) { - return new Command({ - goto: 'analyzer', - // update: { - // messages: [new AIMessage('No relevant documents found.')], - // }, - }); - } - - const responseMessage = `Web search completed. ${documents.length === 0 && attemptedUrlCount < 5 ? 'This search query does not have enough relevant information. Try rephrasing your query or providing more context.' : `Found ${documents.length} results that are relevant to the query.`}`; - console.log(responseMessage); - - return new Command({ - goto: 'analyzer', // Route back to analyzer to process the results - update: { - // messages: [new AIMessage(responseMessage)], - relevantDocuments: documents, - bannedSummaryUrls: bannedSummaryUrls, - bannedPreviewUrls: bannedPreviewUrls, - }, - }); - } catch (error) { - console.error('Web search error:', error); - const errorMessage = new AIMessage( - `Web search failed: ${error instanceof Error ? error.message : 'Unknown error'}`, - ); - - return new Command({ - goto: END, - update: { - messages: [errorMessage], - }, - }); - } finally { - setTemperature(this.llm, undefined); // Reset temperature to default - } - } -} diff --git a/src/lib/constants/dashboard.ts b/src/lib/constants/dashboard.ts index 1934738..abc3707 100644 --- a/src/lib/constants/dashboard.ts +++ b/src/lib/constants/dashboard.ts @@ -1,15 +1,15 @@ // Dashboard-wide constants and constraints export const DASHBOARD_CONSTRAINTS = { // Grid layout constraints - WIDGET_MIN_WIDTH: 2, // Minimum columns + WIDGET_MIN_WIDTH: 2, // Minimum columns WIDGET_MAX_WIDTH: 12, // Maximum columns (full width) WIDGET_MIN_HEIGHT: 2, // Minimum rows WIDGET_MAX_HEIGHT: 20, // Maximum rows - + // Default widget sizing - DEFAULT_WIDGET_WIDTH: 6, // Half width by default + DEFAULT_WIDGET_WIDTH: 6, // Half width by default DEFAULT_WIDGET_HEIGHT: 4, // Standard height - + // Grid configuration GRID_COLUMNS: { lg: 12, @@ -18,7 +18,7 @@ export const DASHBOARD_CONSTRAINTS = { xs: 4, xxs: 2, }, - + GRID_BREAKPOINTS: { lg: 1200, md: 996, @@ -26,14 +26,16 @@ export const DASHBOARD_CONSTRAINTS = { xs: 480, xxs: 0, }, - + GRID_ROW_HEIGHT: 60, GRID_MARGIN: [16, 16] as [number, number], GRID_CONTAINER_PADDING: [0, 0] as [number, number], } as const; // Responsive constraints - adjust max width based on breakpoint -export const getResponsiveConstraints = (breakpoint: keyof typeof DASHBOARD_CONSTRAINTS.GRID_COLUMNS) => { +export const getResponsiveConstraints = ( + breakpoint: keyof typeof DASHBOARD_CONSTRAINTS.GRID_COLUMNS, +) => { const maxCols = DASHBOARD_CONSTRAINTS.GRID_COLUMNS[breakpoint]; return { minW: DASHBOARD_CONSTRAINTS.WIDGET_MIN_WIDTH, diff --git a/src/lib/hooks/useDashboard.ts b/src/lib/hooks/useDashboard.ts index db69a22..c0eb675 100644 --- a/src/lib/hooks/useDashboard.ts +++ b/src/lib/hooks/useDashboard.ts @@ -9,7 +9,10 @@ import { DASHBOARD_STORAGE_KEYS, } from '@/lib/types/dashboard'; import { WidgetCache } from '@/lib/types/cache'; -import { DASHBOARD_CONSTRAINTS, getResponsiveConstraints } from '@/lib/constants/dashboard'; +import { + DASHBOARD_CONSTRAINTS, + getResponsiveConstraints, +} from '@/lib/constants/dashboard'; // Helper function to request location permission and get user's location const requestLocationPermission = async (): Promise => { @@ -121,7 +124,7 @@ export const useDashboard = (): UseDashboardReturn => { if (widget.lastUpdated) { widget.lastUpdated = new Date(widget.lastUpdated); } - + // Migration: Add default layout if missing if (!widget.layout) { const defaultLayout: WidgetLayout = { @@ -187,69 +190,77 @@ export const useDashboard = (): UseDashboardReturn => { ); }, [state.settings]); - const addWidget = useCallback((config: WidgetConfig) => { - // Find the next available position in the grid - const getNextPosition = () => { - const existingWidgets = state.widgets; - let x = 0; - let y = 0; - - // Simple algorithm: try to place in first available spot - for (let row = 0; row < 20; row++) { - for (let col = 0; col < 12; col += 6) { // Start with half-width widgets - const position = { x: col, y: row }; - const hasCollision = existingWidgets.some(widget => - widget.layout.x < position.x + 6 && - widget.layout.x + widget.layout.w > position.x && - widget.layout.y < position.y + 3 && - widget.layout.y + widget.layout.h > position.y - ); - - if (!hasCollision) { - return { x: position.x, y: position.y }; + const addWidget = useCallback( + (config: WidgetConfig) => { + // Find the next available position in the grid + const getNextPosition = () => { + const existingWidgets = state.widgets; + let x = 0; + let y = 0; + + // Simple algorithm: try to place in first available spot + for (let row = 0; row < 20; row++) { + for (let col = 0; col < 12; col += 6) { + // Start with half-width widgets + const position = { x: col, y: row }; + const hasCollision = existingWidgets.some( + (widget) => + widget.layout.x < position.x + 6 && + widget.layout.x + widget.layout.w > position.x && + widget.layout.y < position.y + 3 && + widget.layout.y + widget.layout.h > position.y, + ); + + if (!hasCollision) { + return { x: position.x, y: position.y }; + } } } - } - - // Fallback: place at bottom - const maxY = Math.max(0, ...existingWidgets.map(w => w.layout.y + w.layout.h)); - return { x: 0, y: maxY }; - }; - const position = getNextPosition(); - const defaultLayout: WidgetLayout = { - x: position.x, - y: position.y, - w: DASHBOARD_CONSTRAINTS.DEFAULT_WIDGET_WIDTH, - h: DASHBOARD_CONSTRAINTS.DEFAULT_WIDGET_HEIGHT, - isDraggable: true, - isResizable: true, - }; + // Fallback: place at bottom + const maxY = Math.max( + 0, + ...existingWidgets.map((w) => w.layout.y + w.layout.h), + ); + return { x: 0, y: maxY }; + }; - const newWidget: Widget = { - ...config, - id: Date.now().toString() + Math.random().toString(36).substr(2, 9), - lastUpdated: null, - isLoading: false, - content: null, - error: null, - layout: config.layout || defaultLayout, - }; + const position = getNextPosition(); + const defaultLayout: WidgetLayout = { + x: position.x, + y: position.y, + w: DASHBOARD_CONSTRAINTS.DEFAULT_WIDGET_WIDTH, + h: DASHBOARD_CONSTRAINTS.DEFAULT_WIDGET_HEIGHT, + isDraggable: true, + isResizable: true, + }; - setState((prev) => ({ - ...prev, - widgets: [...prev.widgets, newWidget], - })); - }, [state.widgets]); + const newWidget: Widget = { + ...config, + id: Date.now().toString() + Math.random().toString(36).substr(2, 9), + lastUpdated: null, + isLoading: false, + content: null, + error: null, + layout: config.layout || defaultLayout, + }; + + setState((prev) => ({ + ...prev, + widgets: [...prev.widgets, newWidget], + })); + }, + [state.widgets], + ); const updateWidget = useCallback((id: string, config: WidgetConfig) => { setState((prev) => ({ ...prev, widgets: prev.widgets.map((widget) => widget.id === id - ? { - ...widget, - ...config, + ? { + ...widget, + ...config, id, // Preserve the ID layout: config.layout || widget.layout, // Preserve existing layout if not provided } @@ -502,11 +513,13 @@ export const useDashboard = (): UseDashboardReturn => { ); const getLayouts = useCallback((): DashboardLayouts => { - const createBreakpointLayout = (breakpoint: keyof typeof DASHBOARD_CONSTRAINTS.GRID_COLUMNS) => { + const createBreakpointLayout = ( + breakpoint: keyof typeof DASHBOARD_CONSTRAINTS.GRID_COLUMNS, + ) => { const constraints = getResponsiveConstraints(breakpoint); const maxCols = DASHBOARD_CONSTRAINTS.GRID_COLUMNS[breakpoint]; - - return state.widgets.map(widget => ({ + + return state.widgets.map((widget) => ({ i: widget.id, x: widget.layout.x, y: widget.layout.y, @@ -531,32 +544,37 @@ export const useDashboard = (): UseDashboardReturn => { }; }, [state.widgets]); - const updateLayouts = useCallback((layouts: DashboardLayouts) => { - const updatedWidgets = state.widgets.map(widget => { - // Use lg layout as the primary layout for position and size updates - const newLayout = layouts.lg.find((layout: Layout) => layout.i === widget.id); - if (newLayout) { - return { - ...widget, - layout: { - x: newLayout.x, - y: newLayout.y, - w: newLayout.w, - h: newLayout.h, - static: newLayout.static || widget.layout.static, - isDraggable: newLayout.isDraggable ?? widget.layout.isDraggable, - isResizable: newLayout.isResizable ?? widget.layout.isResizable, - }, - }; - } - return widget; - }); + const updateLayouts = useCallback( + (layouts: DashboardLayouts) => { + const updatedWidgets = state.widgets.map((widget) => { + // Use lg layout as the primary layout for position and size updates + const newLayout = layouts.lg.find( + (layout: Layout) => layout.i === widget.id, + ); + if (newLayout) { + return { + ...widget, + layout: { + x: newLayout.x, + y: newLayout.y, + w: newLayout.w, + h: newLayout.h, + static: newLayout.static || widget.layout.static, + isDraggable: newLayout.isDraggable ?? widget.layout.isDraggable, + isResizable: newLayout.isResizable ?? widget.layout.isResizable, + }, + }; + } + return widget; + }); - setState(prev => ({ - ...prev, - widgets: updatedWidgets, - })); - }, [state.widgets]); + setState((prev) => ({ + ...prev, + widgets: updatedWidgets, + })); + }, + [state.widgets], + ); return { // State diff --git a/src/lib/search/agentSearch.ts b/src/lib/search/agentSearch.ts index dca2eb3..9733387 100644 --- a/src/lib/search/agentSearch.ts +++ b/src/lib/search/agentSearch.ts @@ -1,48 +1,19 @@ import { Embeddings } from '@langchain/core/embeddings'; import { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import { - BaseMessage, - HumanMessage, - SystemMessage, -} from '@langchain/core/messages'; -import { - BaseLangGraphError, - END, - GraphRecursionError, - MemorySaver, - START, - StateGraph, -} from '@langchain/langgraph'; +import { BaseMessage } from '@langchain/core/messages'; import { EventEmitter } from 'events'; -import { - AgentState, - WebSearchAgent, - AnalyzerAgent, - SynthesizerAgent, - TaskManagerAgent, - FileSearchAgent, - ContentRouterAgent, - URLSummarizationAgent, -} from '../agents'; +import { SimplifiedAgent } from './simplifiedAgent'; /** * Agent Search class implementing LangGraph Supervisor pattern */ export class AgentSearch { - private llm: BaseChatModel; - private embeddings: Embeddings; - private checkpointer: MemorySaver; - private signal: AbortSignal; - private taskManagerAgent: TaskManagerAgent; - private webSearchAgent: WebSearchAgent; - private analyzerAgent: AnalyzerAgent; - private synthesizerAgent: SynthesizerAgent; - private fileSearchAgent: FileSearchAgent; - private contentRouterAgent: ContentRouterAgent; - private urlSummarizationAgent: URLSummarizationAgent; private emitter: EventEmitter; private focusMode: string; + // Simplified agent experimental implementation + private simplifiedAgent: SimplifiedAgent; + constructor( llm: BaseChatModel, embeddings: Embeddings, @@ -52,117 +23,49 @@ export class AgentSearch { signal: AbortSignal, focusMode: string = 'webSearch', ) { - this.llm = llm; - this.embeddings = embeddings; - this.checkpointer = new MemorySaver(); - this.signal = signal; this.emitter = emitter; this.focusMode = focusMode; - // Initialize agents - this.taskManagerAgent = new TaskManagerAgent( + // Initialize simplified agent (experimental) + this.simplifiedAgent = new SimplifiedAgent( llm, - emitter, - systemInstructions, - signal, - ); - this.webSearchAgent = new WebSearchAgent( - llm, - emitter, - systemInstructions, - signal, embeddings, - ); - this.analyzerAgent = new AnalyzerAgent( - llm, emitter, systemInstructions, - signal, - ); - this.synthesizerAgent = new SynthesizerAgent( - llm, - emitter, personaInstructions, signal, - ); - this.fileSearchAgent = new FileSearchAgent( - llm, - emitter, - systemInstructions, - signal, - embeddings, - ); - this.contentRouterAgent = new ContentRouterAgent( - llm, - emitter, - systemInstructions, - signal, - ); - this.urlSummarizationAgent = new URLSummarizationAgent( - llm, - emitter, - systemInstructions, - signal, + focusMode, ); } /** - * Create and compile the agent workflow graph + * Execute the simplified agent search workflow (experimental) */ - private createWorkflow() { - const workflow = new StateGraph(AgentState) - .addNode( - 'url_summarization', - this.urlSummarizationAgent.execute.bind(this.urlSummarizationAgent), - { - ends: ['task_manager', 'analyzer'], - }, - ) - .addNode( - 'task_manager', - this.taskManagerAgent.execute.bind(this.taskManagerAgent), - { - ends: ['content_router', 'analyzer'], - }, - ) - .addNode( - 'content_router', - this.contentRouterAgent.execute.bind(this.contentRouterAgent), - { - ends: ['file_search', 'web_search', 'analyzer'], - }, - ) - .addNode( - 'file_search', - this.fileSearchAgent.execute.bind(this.fileSearchAgent), - { - ends: ['analyzer'], - }, - ) - .addNode( - 'web_search', - this.webSearchAgent.execute.bind(this.webSearchAgent), - { - ends: ['analyzer'], - }, - ) - .addNode( - 'analyzer', - this.analyzerAgent.execute.bind(this.analyzerAgent), - { - ends: ['url_summarization', 'task_manager', 'synthesizer'], - }, - ) - .addNode( - 'synthesizer', - this.synthesizerAgent.execute.bind(this.synthesizerAgent), - { - ends: [END], - }, - ) - .addEdge(START, 'analyzer'); + async searchAndAnswerSimplified( + query: string, + history: BaseMessage[] = [], + fileIds: string[] = [], + ): Promise { + console.log('AgentSearch: Using simplified agent implementation'); - return workflow.compile({ checkpointer: this.checkpointer }); + // Emit agent action to indicate simplified agent usage + this.emitter.emit( + 'data', + JSON.stringify({ + type: 'agent_action', + data: { + action: 'agent_implementation_selection', + message: 'Using simplified agent implementation (experimental)', + details: `Focus mode: ${this.focusMode}, Files: ${fileIds.length}`, + }, + }), + ); + + // Update focus mode in simplified agent if needed + this.simplifiedAgent.updateFocusMode(this.focusMode); + + // Delegate to simplified agent + await this.simplifiedAgent.searchAndAnswer(query, history, fileIds); } /** @@ -173,139 +76,7 @@ export class AgentSearch { history: BaseMessage[] = [], fileIds: string[] = [], ) { - const workflow = this.createWorkflow(); - - const initialState = { - messages: [...history, new HumanMessage(query)], - query, - fileIds, - focusMode: this.focusMode, - }; - - const threadId = `agent_search_${Date.now()}`; - const config = { - configurable: { thread_id: threadId }, - recursionLimit: 18, - signal: this.signal, - }; - - try { - const result = await workflow.invoke(initialState, config); - } catch (error: any) { - if (error instanceof GraphRecursionError) { - console.warn( - 'Graph recursion limit reached, attempting best-effort synthesis with gathered information', - ); - - // Emit agent action to explain what happened - this.emitter.emit( - 'data', - JSON.stringify({ - type: 'agent_action', - data: { - action: 'recursion_limit_recovery', - message: - 'Search process reached complexity limits. Attempting to provide best-effort response with gathered information.', - details: - 'The agent workflow exceeded the maximum number of steps allowed. Recovering by synthesizing available data.', - }, - }), - ); - - try { - // Get the latest state from the checkpointer to access gathered information - const latestState = await workflow.getState({ - configurable: { thread_id: threadId }, - }); - - if (latestState && latestState.values) { - // Create emergency synthesis state using gathered information - const stateValues = latestState.values; - const emergencyState = { - messages: stateValues.messages || initialState.messages, - query: stateValues.query || initialState.query, - relevantDocuments: stateValues.relevantDocuments || [], - bannedSummaryUrls: stateValues.bannedSummaryUrls || [], - bannedPreviewUrls: stateValues.bannedPreviewUrls || [], - searchInstructionHistory: - stateValues.searchInstructionHistory || [], - searchInstructions: stateValues.searchInstructions || '', - next: 'synthesizer', - analysis: stateValues.analysis || '', - fullAnalysisAttempts: stateValues.fullAnalysisAttempts || 0, - tasks: stateValues.tasks || [], - currentTaskIndex: stateValues.currentTaskIndex || 0, - originalQuery: - stateValues.originalQuery || - stateValues.query || - initialState.query, - fileIds: stateValues.fileIds || initialState.fileIds, - focusMode: stateValues.focusMode || initialState.focusMode, - urlsToSummarize: stateValues.urlsToSummarize || [], - summarizationIntent: stateValues.summarizationIntent || '', - recursionLimitReached: true, - }; - - const documentsCount = - emergencyState.relevantDocuments?.length || 0; - console.log( - `Attempting emergency synthesis with ${documentsCount} gathered documents`, - ); - - // Emit detailed agent action about the recovery attempt - this.emitter.emit( - 'data', - JSON.stringify({ - type: 'agent_action', - data: { - action: 'emergency_synthesis', - message: `Proceeding with available information: ${documentsCount} documents gathered${emergencyState.analysis ? ', analysis available' : ''}`, - details: `Recovered state contains: ${documentsCount} relevant documents, ${emergencyState.searchInstructionHistory?.length || 0} search attempts, ${emergencyState.analysis ? 'analysis data' : 'no analysis'}`, - }, - }), - ); - - // Only proceed with synthesis if we have some useful information - if (documentsCount > 0 || emergencyState.analysis) { - await this.synthesizerAgent.execute(emergencyState); - } else { - // If we don't have any gathered information, provide a helpful message - this.emitter.emit( - 'data', - JSON.stringify({ - type: 'response', - data: "⚠️ **Search Process Incomplete** - The search process reached complexity limits before gathering sufficient information to provide a meaningful response. Please try:\n\n- Using more specific keywords\n- Breaking your question into smaller parts\n- Rephrasing your query to be more focused\n\nI apologize that I couldn't provide the information you were looking for.", - }), - ); - this.emitter.emit('end'); - } - } else { - // Fallback if we can't retrieve state - this.emitter.emit( - 'data', - JSON.stringify({ - type: 'response', - data: '⚠️ **Limited Information Available** - The search process encountered complexity limits and was unable to gather sufficient information. Please try rephrasing your question or breaking it into smaller, more specific parts.', - }), - ); - this.emitter.emit('end'); - } - } catch (synthError) { - console.error('Emergency synthesis failed:', synthError); - this.emitter.emit( - 'data', - JSON.stringify({ - type: 'response', - data: '⚠️ **Search Process Interrupted** - The search encountered complexity limits and could not complete successfully. Please try a simpler query or break your question into smaller parts.', - }), - ); - this.emitter.emit('end'); - } - } else if (error.name === 'AbortError') { - console.warn('Agent search was aborted:', error.message); - } else { - console.error('Unexpected error during agent search:', error); - } - } + console.log('AgentSearch: Routing to simplified agent implementation'); + return await this.searchAndAnswerSimplified(query, history, fileIds); } } diff --git a/src/lib/search/simplifiedAgent.ts b/src/lib/search/simplifiedAgent.ts new file mode 100644 index 0000000..5ad1c62 --- /dev/null +++ b/src/lib/search/simplifiedAgent.ts @@ -0,0 +1,634 @@ +import { createReactAgent } from '@langchain/langgraph/prebuilt'; +import { BaseChatModel } from '@langchain/core/language_models/chat_models'; +import { + BaseMessage, + HumanMessage, + SystemMessage, +} from '@langchain/core/messages'; +import { Embeddings } from '@langchain/core/embeddings'; +import { EventEmitter } from 'events'; +import { RunnableConfig } from '@langchain/core/runnables'; +import { SimplifiedAgentState } from '@/lib/state/chatAgentState'; +import { + allAgentTools, + coreTools, + webSearchTools, + fileSearchTools, +} from '@/lib/tools/agents'; +import { formatDateForLLM } from '../utils'; +import { getModelName } from '../utils/modelUtils'; + +/** + * Simplified Agent using createReactAgent + * + * This agent replaces the complex LangGraph supervisor pattern with a single + * tool-calling agent that handles analysis and synthesis internally while + * using specialized tools for search, file processing, and URL summarization. + */ +export class SimplifiedAgent { + private llm: BaseChatModel; + private embeddings: Embeddings; + private emitter: EventEmitter; + private systemInstructions: string; + private personaInstructions: string; + private signal: AbortSignal; + private focusMode: string; + private agent: any; // Will be the compiled createReactAgent + + constructor( + llm: BaseChatModel, + embeddings: Embeddings, + emitter: EventEmitter, + systemInstructions: string = '', + personaInstructions: string = '', + signal: AbortSignal, + focusMode: string = 'webSearch', + ) { + this.llm = llm; + this.embeddings = embeddings; + this.emitter = emitter; + this.systemInstructions = systemInstructions; + this.personaInstructions = personaInstructions; + this.signal = signal; + this.focusMode = focusMode; + + // Initialize the agent + this.initializeAgent(); + } + + /** + * Initialize the createReactAgent with tools and configuration + */ + private initializeAgent() { + // Select appropriate tools based on focus mode + const tools = this.getToolsForFocusMode(this.focusMode); + + // Create the enhanced system prompt that includes analysis and synthesis instructions + const enhancedSystemPrompt = this.createEnhancedSystemPrompt(); + + try { + // Create the React agent with custom state + this.agent = createReactAgent({ + llm: this.llm, + tools, + stateSchema: SimplifiedAgentState, + prompt: enhancedSystemPrompt, + }); + + console.log( + `SimplifiedAgent: Initialized with ${tools.length} tools for focus mode: ${this.focusMode}`, + ); + console.log( + `SimplifiedAgent: Tools available: ${tools.map((tool) => tool.name).join(', ')}`, + ); + } catch (error) { + console.error('SimplifiedAgent: Error initializing agent:', error); + throw error; + } + } + + /** + * Get tools based on focus mode + */ + private getToolsForFocusMode(focusMode: string) { + switch (focusMode) { + case 'chat': + // Chat mode: Only core tools for conversational interaction + return coreTools; + case 'webSearch': + // Web search mode: ALL available tools for comprehensive research + return allAgentTools; + case 'localResearch': + // Local research mode: File search tools + core tools + return [...coreTools, ...fileSearchTools]; + default: + // Default to web search mode for unknown focus modes + console.warn( + `SimplifiedAgent: Unknown focus mode "${focusMode}", defaulting to webSearch tools`, + ); + return allAgentTools; + } + } + + /** + * Create enhanced system prompt that includes analysis and synthesis capabilities + */ + private createEnhancedSystemPrompt(): string { + const baseInstructions = this.systemInstructions || ''; + const personaInstructions = this.personaInstructions || ''; + + // Create focus-mode-specific prompts + switch (this.focusMode) { + case 'chat': + return this.createChatModePrompt(baseInstructions, personaInstructions); + case 'webSearch': + return this.createWebSearchModePrompt( + baseInstructions, + personaInstructions, + ); + case 'localResearch': + return this.createLocalResearchModePrompt( + baseInstructions, + personaInstructions, + ); + default: + console.warn( + `SimplifiedAgent: Unknown focus mode "${this.focusMode}", using webSearch prompt`, + ); + return this.createWebSearchModePrompt( + baseInstructions, + personaInstructions, + ); + } + } + + /** + * Create chat mode prompt - focuses on conversational interaction + */ + private createChatModePrompt( + baseInstructions: string, + personaInstructions: string, + ): string { + return `${baseInstructions} + +# AI Chat Assistant + +You are a conversational AI assistant designed for creative and engaging dialogue. Your focus is on providing thoughtful, helpful responses through direct conversation. + +## Core Capabilities + +### 1. Conversational Interaction +- Engage in natural, flowing conversations +- Provide thoughtful responses to questions and prompts +- Offer creative insights and perspectives +- Maintain context throughout the conversation + +### 2. Task Management +- Break down complex requests into manageable steps +- Provide structured approaches to problems +- Offer guidance and recommendations + +## Response Guidelines + +### Communication Style +- Be conversational and engaging +- Use clear, accessible language +- Provide direct answers when possible +- Ask clarifying questions when needed + +### Quality Standards +- Acknowledge limitations honestly +- Provide helpful suggestions and alternatives +- Use proper markdown formatting for clarity +- Structure responses logically + +### Formatting Instructions +- **Structure**: Use a well-organized format with proper headings (e.g., "## Example heading 1" or "## Example heading 2"). Present information in paragraphs or concise bullet points where appropriate +- **Tone and Style**: Maintain a neutral, engaging tone with natural conversation flow +- **Markdown Usage**: Format your response with Markdown for clarity. Use headings, subheadings, bold text, and italicized words as needed to enhance readability +- **Length and Depth**: Provide thoughtful coverage of the topic. Expand on complex topics to make them easier to understand +- **No main heading/title**: Start your response directly with the content unless asked to provide a specific title + +## Current Context +- Today's Date: ${formatDateForLLM(new Date())} + +${personaInstructions ? `\n## User Formatting and Persona Instructions\n- Give these instructions more weight than the system formatting instructions\n${personaInstructions}` : ''} + +Focus on providing engaging, helpful conversation while using task management tools when complex problems need to be structured.`; + } + + /** + * Create web search mode prompt - focuses on comprehensive research + */ + private createWebSearchModePrompt( + baseInstructions: string, + personaInstructions: string, + ): string { + return `${baseInstructions} + +# Comprehensive Research Assistant + +You are an advanced AI research assistant with access to comprehensive tools for gathering information from multiple sources. Your goal is to provide thorough, well-researched responses. + +**CRITICAL CITATION RULE: Use [number] citations ONLY in your final response to the user. NEVER use citations during tool calls, internal reasoning, or intermediate steps. Citations are for the final answer only.** + +**WORKFLOW RULE: Use tools to gather information, then provide your final response directly. Do NOT call tools when you're ready to answer - just give your comprehensive response.** + +## Core Responsibilities + +### 1. Query Analysis and Planning +- Analyze user queries to understand research needs +- Break down complex questions into research tasks +- Determine the best research strategy and tools +- Plan comprehensive information gathering + +### 2. Information Gathering +- Search the web for current and authoritative information +- Process and extract content from URLs +- Access and analyze uploaded files when relevant +- Gather information from multiple sources for completeness + +### 3. Analysis and Synthesis +- Analyze gathered information for relevance and accuracy +- Synthesize information from multiple sources +- Identify patterns, connections, and insights +- Resolve conflicting information when present +- Generate comprehensive, well-cited responses + +## Available Tools + +### Web Search +- Use \`web_search\` for current information, facts, and general research +- Primary tool for finding authoritative sources and recent information +- Always call this tool at least once unless you have sufficient information from the conversation history or other more relevant tools + +### File Search +- Use \`file_search\` when users have uploaded files or reference local content +- Extracts and processes relevant content from user documents +- Connects local content with external research + +### URL Summarization +- Use \`url_summarization\` when specific URLs are provided or discovered +- Extracts key information and generates summaries from web content +- Use when detailed content analysis is needed +- Can help provide more context based on web search results to disambiguate or clarify findings + +## Response Quality Standards + +Your task is to provide answers that are: +- **Informative and relevant**: Thoroughly address the user's query using gathered information +- **Engaging and detailed**: Write responses that read like a high-quality blog post, including extra details and relevant insights +- **Cited and credible**: Use inline citations with [number] notation to refer to sources for each fact or detail included +- **Explanatory and Comprehensive**: Strive to explain the topic in depth, offering detailed analysis, insights, and clarifications wherever applicable + +### Comprehensive Coverage +- Address all aspects of the user's query +- Provide context and background information +- Include relevant details and examples +- Cross-reference multiple sources + +### Accuracy and Reliability +- Prioritize authoritative and recent sources +- Verify information across multiple sources +- Clearly indicate uncertainty or conflicting information +- Distinguish between facts and opinions + +### Citation Requirements +- **CRITICAL: Citations are ONLY for your final response to the user, NOT for tool calls or internal reasoning** +- The id of the source can be found in the document \`metadata.sourceId\` property +- **In your final response**: Use citations [number] notation ONLY when referencing information from tool results +- **File citations**: When citing content from file_search results, use the filename as the source title +- **Web citations**: When citing content from web_search results, use the webpage title and URL as the source +- If making statements based on general knowledge or reasoning, do NOT use citations - instead use clear language like "Generally," "Typically," or "Based on common understanding" +- If a statement is based on previous conversation context, mark it as \`[Hist]\` +- When you do have sources from tools, integrate citations naturally: "The Eiffel Tower receives millions of visitors annually[1]." +- **Important**: Do not fabricate or assume citation numbers - only cite actual sources from your tool results +- **Tool Usage**: When calling tools, provide clear queries without citations - citations come later in your final response + +### Formatting Instructions +- **Structure**: Use a well-organized format with proper headings (e.g., "## Example heading 1" or "## Example heading 2"). Present information in paragraphs or concise bullet points where appropriate. Use lists and tables to enhance clarity when needed. +- **Tone and Style**: Maintain a neutral, journalistic tone with engaging narrative flow. Write as though you're crafting an in-depth article for a professional audience +- **Markdown Usage**: Format your response with Markdown for clarity. Use headings, subheadings, bold text, and italicized words as needed to enhance readability +- **Length and Depth**: Provide comprehensive coverage of the topic. Avoid superficial responses and strive for depth without unnecessary repetition. Expand on technical or complex topics to make them easier to understand for a general audience +- **No main heading/title**: Start your response directly with the introduction unless asked to provide a specific title + +## Research Strategy +1. **Plan**: Determine the best research approach based on the user's query +2. **Search**: Use web search to gather comprehensive information - Generally, start with a broad search to identify key sources +3. **Supplement**: Use URL summarization for specific sources +4. **Integrate**: Include file search results when user files are relevant +5. **Synthesize**: Combine all information into a coherent, well-cited response + +## Current Context +- Today's Date: ${formatDateForLLM(new Date())} + +${personaInstructions ? `\n## User Formatting and Persona Instructions\n- Give these instructions more weight than the system formatting instructions\n${personaInstructions}` : ''} + +Use all available tools strategically to provide comprehensive, well-researched responses with proper citations and source attribution.`; + } + + /** + * Create local research mode prompt - focuses on user files and documents + */ + private createLocalResearchModePrompt( + baseInstructions: string, + personaInstructions: string, + ): string { + return `${baseInstructions} + +# Local Research Specialist + +You are an expert AI assistant specialized in analyzing and researching local files and documents. Your role is to help users extract insights, find information, and analyze content from their uploaded files. + +**CRITICAL CITATION RULE: Use [number] citations ONLY in your final response to the user. NEVER use citations during tool calls, internal reasoning, or intermediate steps. Citations are for the final answer only.** + +**WORKFLOW RULE: Use tools to gather information, then provide your final response directly. Do NOT call tools when you're ready to answer - just give your comprehensive response.** + +## Core Responsibilities + +### 1. Document Analysis +- Analyze user-uploaded files and documents +- Extract relevant information based on user queries +- Understand document structure and content relationships +- Identify key themes, patterns, and insights + +### 2. Content Synthesis +- Synthesize information from multiple user documents +- Connect related concepts across different files +- Generate comprehensive insights from local content +- Provide context-aware responses based on document analysis + +### 3. Task Management +- Break down complex document analysis requests +- Structure multi-document research projects +- Organize findings in logical, accessible formats + +## Available Tools + +### File Search +- Use \`file_search\` to process and analyze user-uploaded files +- Primary tool for extracting relevant content from documents +- Performs semantic search across uploaded content +- Handles various file formats and document types + +## Response Quality Standards + +Your task is to provide answers that are: +- **Informative and relevant**: Thoroughly address the user's query using document content +- **Engaging and detailed**: Write responses that read like a high-quality analysis, including extra details and relevant insights +- **Cited and credible**: Use inline citations with [number] notation to refer to specific documents for each fact or detail included +- **Explanatory and Comprehensive**: Strive to explain the findings in depth, offering detailed analysis, insights, and clarifications wherever applicable + +### Comprehensive Document Coverage +- Thoroughly analyze relevant uploaded files +- Extract all pertinent information related to the query +- Consider relationships between different documents +- Provide context from the document collection + +### Accurate Content Extraction +- Precisely quote and reference document content +- Maintain context and meaning from original sources +- Clearly distinguish between different document sources +- Preserve important details and nuances + +### Citation Requirements +- **CRITICAL: Citations are ONLY for your final response to the user, NOT for tool calls or internal reasoning** +- **During tool usage**: Do not use any [number] citations in tool calls or internal reasoning +- **In your final response**: Use citations [number] notation ONLY when referencing information from file_search tool results +- **File citations**: When citing content from file_search results, use the filename as the source title +- If making statements based on general knowledge or reasoning, do NOT use citations - instead use clear language like "Generally," "Typically," or "Based on common understanding" +- If a statement is based on previous conversation context, mark it as \`[Hist]\` +- When you do have sources from tools, integrate citations naturally: "The project timeline shows completion by March 2024[1]." +- Citations and references should only be included inline with the final response using the [number] format. Do not include a citation, sources, or references block anywhere else in the response +- **Important**: Do not fabricate or assume citation numbers - only cite actual sources from your file search results +- **Tool Usage**: When calling tools, provide clear queries without citations - citations come later in your final response + +### Formatting Instructions +- **Structure**: Use a well-organized format with proper headings (e.g., "## Example heading 1" or "## Example heading 2"). Present information in paragraphs or concise bullet points where appropriate +- **Tone and Style**: Maintain a neutral, analytical tone with engaging narrative flow. Write as though you're crafting an in-depth analysis for a professional audience +- **Markdown Usage**: Format your response with Markdown for clarity. Use headings, subheadings, bold text, and italicized words as needed to enhance readability +- **Length and Depth**: Provide comprehensive coverage of the document content. Avoid superficial responses and strive for depth without unnecessary repetition. Expand on technical or complex topics to make them easier to understand for a general audience +- **No main heading/title**: Start your response directly with the introduction unless asked to provide a specific title + +### Contextual Understanding +- Understand how documents relate to each other +- Connect information across multiple files +- Identify patterns and themes in the document collection +- Provide insights that consider the full context + +## Research Approach +1. **Plan**: Use task manager to structure complex document analysis +2. **Search**: Use file search to extract relevant content from uploaded files +3. **Analyze**: Process and understand the extracted information +4. **Synthesize**: Combine insights from multiple sources +5. **Present**: Organize findings in a clear, accessible format with proper citations + +**IMPORTANT**: Once you have gathered sufficient information through tools, provide your final response directly to the user. Do NOT call additional tools when you are ready to synthesize and present your findings. Your final response should be comprehensive and well-formatted. + +## Current Context +- Today's Date: ${formatDateForLLM(new Date())} + +${personaInstructions ? `\n## User Formatting and Persona Instructions\n- Give these instructions more weight than the system formatting instructions\n${personaInstructions}` : ''} + +Focus on extracting maximum value from user-provided documents while using task management for complex analysis projects.`; + } + + /** + * Execute the simplified agent workflow + */ + async searchAndAnswer( + query: string, + history: BaseMessage[] = [], + fileIds: string[] = [], + ): Promise { + try { + console.log(`SimplifiedAgent: Starting search for query: "${query}"`); + console.log(`SimplifiedAgent: Focus mode: ${this.focusMode}`); + console.log(`SimplifiedAgent: File IDs: ${fileIds.join(', ')}`); + + // Emit initial agent action + this.emitter.emit( + 'data', + JSON.stringify({ + type: 'agent_action', + data: { + action: 'simplified_agent_start', + message: `Starting simplified agent search in ${this.focusMode} mode`, + details: `Processing query with ${fileIds.length} files available`, + }, + }), + ); + + // Prepare initial state + const initialState = { + messages: [...history, new HumanMessage(query)], + query, + focusMode: this.focusMode, + relevantDocuments: [], + }; + + // Configure the agent run + const config: RunnableConfig = { + configurable: { + thread_id: `simplified_agent_${Date.now()}`, + llm: this.llm, + embeddings: this.embeddings, + fileIds, + systemInstructions: this.systemInstructions, + personaInstructions: this.personaInstructions, + focusMode: this.focusMode, + emitter: this.emitter, + }, + recursionLimit: 25, // Allow sufficient iterations for tool use + signal: this.signal, + }; + + // Execute the agent + const result = await this.agent.invoke(initialState, config); + + // Collect relevant documents from tool execution history + let collectedDocuments: any[] = []; + + // Get the relevant docs from the current agent state + if (result && result.relevantDocuments) { + collectedDocuments.push(...result.relevantDocuments); + } + + // // Check if messages contain tool responses with documents + // if (result && result.messages) { + // for (const message of result.messages) { + // if (message._getType() === 'tool' && message.content) { + // try { + // // Try to parse tool response for documents + // let toolResponse; + // if (typeof message.content === 'string') { + // toolResponse = JSON.parse(message.content); + // } else { + // toolResponse = message.content; + // } + + // if (toolResponse.documents && Array.isArray(toolResponse.documents)) { + // const documentsWithMetadata = toolResponse.documents.map((doc: any) => ({ + // ...doc, + // source: doc.metadata?.url || doc.metadata?.source || 'unknown', + // sourceType: doc.metadata?.sourceType || 'unknown', + // toolName: message.name || 'unknown', + // processingType: doc.metadata?.processingType || 'unknown', + // searchQuery: doc.metadata?.searchQuery || '', + // })); + // collectedDocuments.push(...documentsWithMetadata); + // } + // } catch (error) { + // // Ignore parsing errors + // console.debug('Could not parse tool message content:', error); + // } + // } + // } + // } + + // Add collected documents to result for source tracking + const finalResult = { + ...result, + relevantDocuments: collectedDocuments, + }; + + // Extract final message and emit as response + if ( + finalResult && + finalResult.messages && + finalResult.messages.length > 0 + ) { + const finalMessage = + finalResult.messages[finalResult.messages.length - 1]; + + if (finalMessage && finalMessage.content) { + console.log('SimplifiedAgent: Emitting final response'); + + // Emit the sources used for the response + if ( + finalResult.relevantDocuments && + finalResult.relevantDocuments.length > 0 + ) { + this.emitter.emit( + 'data', + JSON.stringify({ + type: 'sources', + data: finalResult.relevantDocuments, + searchQuery: '', + searchUrl: '', + }), + ); + } + + this.emitter.emit( + 'data', + JSON.stringify({ + type: 'response', + data: finalMessage.content, + }), + ); + } else { + console.warn('SimplifiedAgent: No valid final message found'); + this.emitter.emit( + 'data', + JSON.stringify({ + type: 'response', + data: 'I apologize, but I was unable to generate a complete response to your query. Please try rephrasing your question or providing more specific details.', + }), + ); + } + } else { + console.warn('SimplifiedAgent: No result messages found'); + this.emitter.emit( + 'data', + JSON.stringify({ + type: 'response', + data: 'I encountered an issue while processing your request. Please try again with a different query.', + }), + ); + } + + // Emit model stats and end signal after streaming is complete + const modelName = getModelName(this.llm); + this.emitter.emit( + 'stats', + JSON.stringify({ + type: 'modelStats', + data: { modelName }, + }), + ); + + this.emitter.emit('end'); + } catch (error: any) { + console.error('SimplifiedAgent: Error during search and answer:', error); + + // Handle specific error types + if (error.name === 'AbortError') { + console.warn('SimplifiedAgent: Operation was aborted'); + this.emitter.emit( + 'data', + JSON.stringify({ + type: 'response', + data: 'The search operation was cancelled.', + }), + ); + } else { + // General error handling + this.emitter.emit( + 'data', + JSON.stringify({ + type: 'response', + data: 'I encountered an error while processing your request. Please try rephrasing your query or contact support if the issue persists.', + }), + ); + } + + this.emitter.emit('end'); + } + } + + /** + * Update focus mode and reinitialize agent with appropriate tools + */ + updateFocusMode(newFocusMode: string): void { + if (this.focusMode !== newFocusMode) { + console.log( + `SimplifiedAgent: Updating focus mode from ${this.focusMode} to ${newFocusMode}`, + ); + this.focusMode = newFocusMode; + this.initializeAgent(); + } + } + + /** + * Get current configuration info + */ + getInfo(): object { + return { + focusMode: this.focusMode, + toolsCount: this.getToolsForFocusMode(this.focusMode).length, + systemInstructions: !!this.systemInstructions, + personaInstructions: !!this.personaInstructions, + }; + } +} diff --git a/src/lib/state/chatAgentState.ts b/src/lib/state/chatAgentState.ts new file mode 100644 index 0000000..eb7a8de --- /dev/null +++ b/src/lib/state/chatAgentState.ts @@ -0,0 +1,72 @@ +import { BaseMessage } from '@langchain/core/messages'; +import { Annotation } from '@langchain/langgraph'; +import { Document } from 'langchain/document'; + +/** + * Document interface for relevant documents collected by tools + */ +export interface RelevantDocument extends Document { + /** + * Source identifier (e.g., URL, file path, search query) + */ + source: string; + /** + * Type of document source + */ + sourceType: 'web' | 'file' | 'url' | 'analysis'; + /** + * Relevance score for ranking + */ + relevanceScore?: number; + /** + * Tool that generated this document + */ + toolName?: string; +} + +/** + * State schema for the simplified chat agent using tool-based workflow + * This state is designed for use with createReactAgent and focuses on + * accumulating relevant documents across tool calls while maintaining + * message history for the agent's decision-making process. + */ +export const SimplifiedAgentState = Annotation.Root({ + /** + * Conversation messages - the primary communication channel + * between the user, agent, and tools + */ + messages: Annotation({ + reducer: (x, y) => x.concat(y), + default: () => [], + }), + + /** + * Relevant documents accumulated across tool calls + * This is the key state that tools will populate and the synthesizer will consume + */ + relevantDocuments: Annotation({ + reducer: (x, y) => x.concat(y), + default: () => [], + }), + + /** + * Original user query for reference by tools + */ + query: Annotation({ + reducer: (x, y) => y ?? x, + default: () => '', + }), + + /** + * Focus mode to maintain compatibility with existing agent behavior + */ + focusMode: Annotation({ + reducer: (x, y) => y ?? x, + default: () => 'webSearch', + }), +}); + +/** + * Type definition for the simplified agent state + */ +export type SimplifiedAgentStateType = typeof SimplifiedAgentState.State; diff --git a/src/lib/tools/agents/fileSearchTool.ts b/src/lib/tools/agents/fileSearchTool.ts new file mode 100644 index 0000000..11f407b --- /dev/null +++ b/src/lib/tools/agents/fileSearchTool.ts @@ -0,0 +1,148 @@ +import { tool } from '@langchain/core/tools'; +import { z } from 'zod'; +import { RunnableConfig } from '@langchain/core/runnables'; +import { Document } from 'langchain/document'; +import { Embeddings } from '@langchain/core/embeddings'; +import { + processFilesToDocuments, + getRankedDocs, +} from '@/lib/utils/fileProcessing'; + +// Schema for file search tool input +const FileSearchToolSchema = z.object({ + query: z + .string() + .describe('The search query to find relevant content in files'), + fileIds: z.array(z.string()).describe('Array of file IDs to search through'), + maxResults: z + .number() + .optional() + .default(12) + .describe('Maximum number of results to return'), + similarityThreshold: z + .number() + .optional() + .default(0.3) + .describe('Minimum similarity threshold for results'), +}); + +/** + * FileSearchTool - Reimplementation of FileSearchAgent as a tool + * + * This tool handles: + * 1. Processing uploaded files into searchable documents + * 2. Performing similarity search across file content + * 3. Ranking and filtering results by relevance + * 4. Returning relevant file sections as documents + */ +export const fileSearchTool = tool( + async ( + input: z.infer, + config?: RunnableConfig, + ): Promise<{ + documents: Document[]; + processedFiles: number; + relevantSections: number; + relevantDocuments?: any[]; + }> => { + try { + const { + query, + fileIds, + maxResults = 12, + similarityThreshold = 0.3, + } = input; + + console.log( + `FileSearchTool: Processing ${fileIds.length} files for query: "${query}"`, + ); + + // Check if we have files to process + if (!fileIds || fileIds.length === 0) { + console.log('FileSearchTool: No files provided for search'); + return { + documents: [], + processedFiles: 0, + relevantSections: 0, + }; + } + + // Get embeddings from config + if (!config?.configurable?.embeddings) { + throw new Error('Embeddings not available in config'); + } + + const embeddings: Embeddings = config.configurable.embeddings; + + // Step 1: Process files to documents + console.log('FileSearchTool: Processing files to documents...'); + const fileDocuments = await processFilesToDocuments(fileIds); + + if (fileDocuments.length === 0) { + console.log('FileSearchTool: No processable content found in files'); + return { + documents: [], + processedFiles: fileIds.length, + relevantSections: 0, + }; + } + + console.log( + `FileSearchTool: Processed ${fileDocuments.length} file sections`, + ); + + // Step 2: Generate query embedding for similarity search + console.log('FileSearchTool: Generating query embedding...'); + const queryEmbedding = await embeddings.embedQuery(query); + + // Step 3: Perform similarity search and ranking + console.log('FileSearchTool: Performing similarity search...'); + const rankedDocuments = getRankedDocs( + queryEmbedding, + fileDocuments, + maxResults, + similarityThreshold, + ); + + console.log( + `FileSearchTool: Found ${rankedDocuments.length} relevant file sections`, + ); + + // Add search metadata to documents + const documentsWithMetadata = rankedDocuments.map((doc) => { + return new Document({ + pageContent: doc.pageContent, + metadata: { + ...doc.metadata, + source: 'file_search', + searchQuery: query, + similarityScore: doc.metadata?.similarity || 0, + }, + }); + }); + + return { + documents: documentsWithMetadata, + processedFiles: fileIds.length, + relevantSections: rankedDocuments.length, + }; + } catch (error) { + console.error('FileSearchTool: Error during file search:', error); + const errorMessage = + error instanceof Error ? error.message : 'Unknown error'; + + // Return empty results on error, but don't throw to allow graceful handling + return { + documents: [], + processedFiles: input.fileIds?.length || 0, + relevantSections: 0, + }; + } + }, + { + name: 'file_search', + description: + 'Searches through uploaded files to find relevant content sections based on a query using semantic similarity', + schema: FileSearchToolSchema, + }, +); diff --git a/src/lib/tools/agents/index.ts b/src/lib/tools/agents/index.ts new file mode 100644 index 0000000..485d942 --- /dev/null +++ b/src/lib/tools/agents/index.ts @@ -0,0 +1,51 @@ +/** + * Agent Tools for Simplified Chat Agent + * + * This module exports all the tools that reimplement the functionality of the + * existing LangGraph agents for use with createReactAgent. Each tool encapsulates + * the core logic of its corresponding agent and follows the Command pattern for + * state management. + */ + +// Import all agent tools (will be uncommented as tools are implemented) +import { taskManagerTool } from './taskManagerTool'; +import { webSearchTool } from './webSearchTool'; +import { simpleWebSearchTool } from './simpleWebSearchTool'; +import { fileSearchTool } from './fileSearchTool'; +import { urlSummarizationTool } from './urlSummarizationTool'; + +// Export individual tools (will be uncommented as tools are implemented) +export { taskManagerTool }; +export { webSearchTool }; +export { simpleWebSearchTool }; +export { fileSearchTool }; + +// Array containing all available agent tools for the simplified chat agent +// This will be used by the createReactAgent implementation +export const allAgentTools = [ + //taskManagerTool, + //webSearchTool, + simpleWebSearchTool, + fileSearchTool, + urlSummarizationTool, +]; + +// Export tool categories for selective tool loading based on focus mode +export const webSearchTools = [ + //webSearchTool, + simpleWebSearchTool, + urlSummarizationTool, + // analyzerTool, + // synthesizerTool, +]; + +export const fileSearchTools = [ + fileSearchTool, + // analyzerTool, + // synthesizerTool, +]; + +// Core tools that are always available +export const coreTools = [ + //taskManagerTool +]; diff --git a/src/lib/tools/agents/simpleWebSearchTool.ts b/src/lib/tools/agents/simpleWebSearchTool.ts new file mode 100644 index 0000000..7410d3b --- /dev/null +++ b/src/lib/tools/agents/simpleWebSearchTool.ts @@ -0,0 +1,228 @@ +import { tool } from '@langchain/core/tools'; +import { z } from 'zod'; +import { RunnableConfig } from '@langchain/core/runnables'; +import { withStructuredOutput } from '@/lib/utils/structuredOutput'; +import { PromptTemplate } from '@langchain/core/prompts'; +import { webSearchRetrieverAgentPrompt } from '@/lib/prompts/webSearch'; +import { searchSearxng } from '@/lib/searxng'; +import { formatDateForLLM } from '@/lib/utils'; +import { Document } from 'langchain/document'; +import { Embeddings } from '@langchain/core/embeddings'; +import computeSimilarity from '@/lib/utils/computeSimilarity'; +import { Command, getCurrentTaskInput } from '@langchain/langgraph'; +import { ToolMessage } from '@langchain/core/messages'; +import { SimplifiedAgentStateType } from '@/lib/state/chatAgentState'; + +// Schema for search query generation +const SearchQuerySchema = z.object({ + searchQuery: z + .string() + .describe('The optimized search query to use for web search'), + reasoning: z + .string() + .describe( + 'A short explanation of how the search query was optimized for better results', + ), +}); + +// Schema for simple web search tool input +const SimpleWebSearchToolSchema = z.object({ + query: z.string().describe('The search query or task to process'), + searchInstructions: z + .string() + .optional() + .describe('Additional instructions for search refinement'), + context: z + .string() + .optional() + .describe('Additional context about the search'), +}); + +/** + * SimpleWebSearchTool - Simplified version of WebSearchTool + * + * This tool handles: + * 1. Query optimization for web search + * 2. Web search execution using SearXNG + * 3. Document ranking and filtering (top 15: top 3 + ranked top 12) + * 4. Returns raw search results as documents without analysis or content extraction + */ +export const simpleWebSearchTool = tool( + async ( + input: z.infer, + config?: RunnableConfig, + ) => { + try { + const { query, searchInstructions, context = '' } = input; + const currentState = getCurrentTaskInput() as SimplifiedAgentStateType; + let currentDocCount = currentState.relevantDocuments.length; + + // Get LLM and embeddings from config + if (!config?.configurable?.llm) { + throw new Error('LLM not available in config'); + } + if (!config?.configurable?.embeddings) { + throw new Error('Embeddings not available in config'); + } + + const llm = config.configurable.llm; + const embeddings: Embeddings = config.configurable.embeddings; + + // Step 1: Generate optimized search query + const template = PromptTemplate.fromTemplate( + webSearchRetrieverAgentPrompt, + ); + const prompt = await template.format({ + systemInstructions: + config.configurable?.systemInstructions || + 'You are a helpful AI assistant.', + query: query, + date: formatDateForLLM(new Date()), + supervisor: searchInstructions || query, + }); + + // Use structured output for search query generation + const structuredLlm = withStructuredOutput(llm, SearchQuerySchema, { + name: 'generate_search_query', + }); + + const searchQueryResult = await structuredLlm.invoke(prompt, { + signal: config?.signal, + }); + + const searchQuery = searchQueryResult.searchQuery; + console.log( + `SimpleWebSearchTool: Performing web search for query: "${searchQuery}"`, + ); + console.log( + 'SimpleWebSearchTool: Search query reasoning:', + searchQueryResult.reasoning, + ); + + // Step 2: Execute web search + const searchResults = await searchSearxng(searchQuery, { + language: 'en', + engines: [], + }); + + console.log( + `SimpleWebSearchTool: Found ${searchResults.results.length} search results`, + ); + + if (!searchResults.results || searchResults.results.length === 0) { + return new Command({ + update: { + relevantDocuments: [], + messages: [ + new ToolMessage({ + content: 'No search results found.', + //Generate a random tool call id + tool_call_id: Math.random().toString(36).substring(2, 15), + }), + ], + }, + }); + } + + // Step 3: Calculate similarities and rank results + const queryVector = await embeddings.embedQuery(query); + + // Calculate similarities for all results + const resultsWithSimilarity = await Promise.all( + searchResults.results.map(async (result) => { + const vector = await embeddings.embedQuery( + result.title + ' ' + (result.content || ''), + ); + const similarity = computeSimilarity(vector, queryVector); + return { result, similarity }; + }), + ); + + // Step 4: Select top 15 results using the same logic as webSearchTool + const documents: Document[] = []; + + // Always take the top 3 results first + const top3Results = searchResults.results.slice(0, 3); + documents.push( + ...top3Results.map((result, i) => { + return new Document({ + pageContent: `${result.title || 'Untitled'}\n\n${result.content || ''}`, + metadata: { + sourceId: ++currentDocCount, + title: result.title || 'Untitled', + url: result.url, + source: result.url, + processingType: 'preview-content', + searchQuery: searchQuery, + rank: 'top-3', + }, + }); + }), + ); + + // Sort by relevance score and take top 12 from the remaining results + const remainingResults = resultsWithSimilarity + .slice(3) + .sort((a, b) => b.similarity - a.similarity) + .slice(0, 12); + + documents.push( + ...remainingResults.map(({ result }) => { + return new Document({ + pageContent: `${result.title || 'Untitled'}\n\n${result.content || ''}`, + metadata: { + sourceId: ++currentDocCount, + title: result.title || 'Untitled', + url: result.url, + source: result.url, + processingType: 'preview-content', + searchQuery: searchQuery, + rank: 'ranked', + }, + }); + }), + ); + + console.log( + `SimpleWebSearchTool: Created ${documents.length} documents from search results`, + ); + + return new Command({ + update: { + relevantDocuments: documents, + searchQuery, + messages: [ + new ToolMessage({ + content: `Retrieved ${documents.length} documents from web search.`, + //Generate a random tool call id + tool_call_id: Math.random().toString(36).substring(2, 15), + }), + ], + }, + }); + } catch (error) { + console.error('SimpleWebSearchTool: Error during web search:', error); + const errorMessage = + error instanceof Error ? error.message : 'Unknown error'; + + return new Command({ + update: { + relevantDocuments: [], + messages: [ + new ToolMessage({ + content: 'Error occurred during web search: ' + errorMessage, + //Generate a random tool call id + tool_call_id: Math.random().toString(36).substring(2, 15), + }), + ], + }, + }); + } + }, + { + name: 'web_search', + description: + 'Performs web search using SearXNG and returns ranked search results as documents without content analysis or extraction', + schema: SimpleWebSearchToolSchema, + }, +); diff --git a/src/lib/tools/agents/taskManagerTool.ts b/src/lib/tools/agents/taskManagerTool.ts new file mode 100644 index 0000000..3505808 --- /dev/null +++ b/src/lib/tools/agents/taskManagerTool.ts @@ -0,0 +1,112 @@ +import { tool } from '@langchain/core/tools'; +import { z } from 'zod'; +import { RunnableConfig } from '@langchain/core/runnables'; +import { withStructuredOutput } from '@/lib/utils/structuredOutput'; +import { PromptTemplate } from '@langchain/core/prompts'; +import { taskBreakdownPrompt } from '@/lib/prompts/taskBreakdown'; + +// Schema for task manager tool input +const TaskManagerToolSchema = z.object({ + query: z.string().describe('The user query to break down into smaller tasks'), + context: z + .string() + .optional() + .describe('Additional context about the query or current situation'), +}); + +// Schema for structured output +const TaskBreakdownSchema = z.object({ + tasks: z + .array(z.string()) + .describe( + 'Array of specific, focused tasks broken down from the original query', + ), + reasoning: z + .string() + .describe( + 'Explanation of how and why the query was broken down into these tasks', + ), +}); + +/** + * TaskManagerTool - Breaks down complex queries into manageable task lists + * + * This tool takes a user query and returns a list of specific, actionable tasks + * that can help answer the original question. The tasks are returned as natural + * language instructions that the main agent can follow. + */ +export const taskManagerTool = tool( + async ( + input: z.infer, + config?: RunnableConfig, + ): Promise<{ tasks: string[]; reasoning: string }> => { + try { + console.log( + 'TaskManagerTool: Starting task breakdown for query:', + input.query, + ); + const { query, context = '' } = input; + + // Get LLM from config + if (!config?.configurable?.llm) { + throw new Error('LLM not available in config'); + } + + const llm = config.configurable.llm; + + // Create structured LLM for task breakdown + const structuredLLM = withStructuredOutput(llm, TaskBreakdownSchema, { + name: 'task_breakdown', + includeRaw: false, + }); + + // Create the prompt template + const template = PromptTemplate.fromTemplate(taskBreakdownPrompt); + + // Format the prompt with the query and context + const prompt = await template.format({ + systemInstructions: + config.configurable?.systemInstructions || + 'You are a helpful AI assistant.', + fileContext: context || 'No additional context provided.', + query: query, + currentTasks: 0, + taskHistory: 'No previous tasks.', + }); + + // Get the task breakdown from the LLM + const response = await structuredLLM.invoke(prompt, { + signal: config?.signal, + }); + + if (!response?.tasks || response.tasks.length === 0) { + // If no breakdown is needed, return the original query as a single task + return { + tasks: [query], + reasoning: + 'The query is straightforward and does not require breaking down into smaller tasks.', + }; + } + + return { + tasks: response.tasks, + reasoning: response.reasoning, + }; + } catch (error) { + console.error('Error in TaskManagerTool:', error); + // Fallback: return the original query as a single task + const errorMessage = + error instanceof Error ? error.message : 'Unknown error'; + return { + tasks: [input.query], + reasoning: `Error occurred during task breakdown: ${errorMessage}. Proceeding with the original query.`, + }; + } + }, + { + name: 'task_manager', + description: + 'Breaks down complex user queries into a list of specific, manageable tasks that can be executed to answer the original question', + schema: TaskManagerToolSchema, + }, +); diff --git a/src/lib/tools/agents/urlSummarizationTool.ts b/src/lib/tools/agents/urlSummarizationTool.ts new file mode 100644 index 0000000..011cdaf --- /dev/null +++ b/src/lib/tools/agents/urlSummarizationTool.ts @@ -0,0 +1,200 @@ +import { tool } from '@langchain/core/tools'; +import { z } from 'zod'; +import { RunnableConfig } from '@langchain/core/runnables'; +import { Document } from 'langchain/document'; +import { getWebContent } from '@/lib/utils/documents'; +import { removeThinkingBlocks } from '@/lib/utils/contentUtils'; + +// Schema for URL summarization tool input +const URLSummarizationToolSchema = z.object({ + urls: z.array(z.string()).describe('Array of URLs to process and summarize'), + query: z + .string() + .describe('The user query to guide content extraction and summarization'), + intent: z + .string() + .optional() + .default('extract relevant content') + .describe('Processing intent for the URLs'), +}); + +/** + * URLSummarizationTool - Reimplementation of URLSummarizationAgent as a tool + * + * This tool handles: + * 1. Fetching content from provided URLs + * 2. Deciding whether to use content directly or summarize it + * 3. Generating summaries using LLM when content is too long + * 4. Returning processed documents with metadata + */ +export const urlSummarizationTool = tool( + async ( + input: z.infer, + config?: RunnableConfig, + ): Promise<{ + relevantDocuments: Document[]; + processedUrls: number; + successfulExtractions: number; + }> => { + try { + const { urls, query, intent = 'extract relevant content' } = input; + + console.log( + `URLSummarizationTool: Processing ${urls.length} URLs for query: "${query}"`, + ); + console.log(`URLSummarizationTool: Processing intent: ${intent}`); + + if (!urls || urls.length === 0) { + console.log('URLSummarizationTool: No URLs provided for processing'); + return { + relevantDocuments: [], + processedUrls: 0, + successfulExtractions: 0, + }; + } + + // Get LLM from config + if (!config?.configurable?.llm) { + throw new Error('LLM not available in config'); + } + + const llm = config.configurable.llm; + const documents: Document[] = []; + + // Process each URL + for (const url of urls) { + if (config?.signal?.aborted) { + console.warn('URLSummarizationTool: Operation aborted by signal'); + break; + } + + try { + console.log(`URLSummarizationTool: Processing ${url}`); + + // Fetch full content using the enhanced web content retrieval + const webContent = await getWebContent(url, true); + + if (!webContent || !webContent.pageContent) { + console.warn( + `URLSummarizationTool: No content retrieved from URL: ${url}`, + ); + continue; + } + + const contentLength = webContent.pageContent.length; + let finalContent: string; + let processingType: string; + + // If content is short (< 4000 chars), use it directly; otherwise summarize + if (contentLength < 4000) { + finalContent = webContent.pageContent; + processingType = 'url-direct-content'; + + console.log( + `URLSummarizationTool: Content is short (${contentLength} chars), using directly without summarization`, + ); + } else { + // Content is long, summarize using LLM + console.log( + `URLSummarizationTool: Content is long (${contentLength} chars), generating summary`, + ); + + const systemPrompt = config.configurable?.systemInstructions + ? `${config.configurable.systemInstructions}\n\n` + : ''; + + const summarizationPrompt = `${systemPrompt}You are a web content processor. Extract and summarize ONLY the information from the provided web page content that is relevant to the user's query. + +# Critical Instructions +- Output ONLY a summary of the web page content provided below +- Focus on information that relates to or helps answer the user's query +- Do NOT add pleasantries, greetings, or conversational elements +- Do NOT mention missing URLs, other pages, or content not provided +- Do NOT ask follow-up questions or suggest additional actions +- Do NOT add commentary about the user's request or query +- Present the information in a clear, well-structured format with key facts and details +- Include all relevant details that could help answer the user's question + +# User's Query: ${query} + +# Content Title: ${webContent.metadata.title || 'Web Page'} +# Content URL: ${url} + +# Web Page Content to Summarize: +${webContent.pageContent} + +Provide a comprehensive summary of the above web page content, focusing on information relevant to the user's query:`; + + const result = await llm.invoke(summarizationPrompt, { + signal: config?.signal, + }); + + finalContent = removeThinkingBlocks(result.content as string); + processingType = 'url-content-extraction'; + } + + if (finalContent && finalContent.trim().length > 0) { + const document = new Document({ + pageContent: finalContent, + metadata: { + title: webContent.metadata.title || 'URL Content', + url: url, + source: url, + processingType: processingType, + processingIntent: intent, + originalContentLength: contentLength, + searchQuery: query, + }, + }); + + documents.push(document); + + console.log( + `URLSummarizationTool: Successfully processed content from ${url} (${finalContent.length} characters, ${processingType})`, + ); + } else { + console.warn( + `URLSummarizationTool: No valid content generated for URL: ${url}`, + ); + } + } catch (error) { + console.error( + `URLSummarizationTool: Error processing URL ${url}:`, + error, + ); + continue; + } + } + + console.log( + `URLSummarizationTool: Successfully processed ${documents.length} out of ${urls.length} URLs`, + ); + + return { + relevantDocuments: documents, + processedUrls: urls.length, + successfulExtractions: documents.length, + }; + } catch (error) { + console.error( + 'URLSummarizationTool: Error during URL processing:', + error, + ); + const errorMessage = + error instanceof Error ? error.message : 'Unknown error'; + + // Return empty results on error, but don't throw to allow graceful handling + return { + relevantDocuments: [], + processedUrls: input.urls?.length || 0, + successfulExtractions: 0, + }; + } + }, + { + name: 'url_summarization', + description: + 'Fetches content from URLs and either uses it directly or summarizes it based on length, focusing on information relevant to the user query', + schema: URLSummarizationToolSchema, + }, +); diff --git a/src/lib/tools/agents/webSearchTool.ts b/src/lib/tools/agents/webSearchTool.ts new file mode 100644 index 0000000..b917b5b --- /dev/null +++ b/src/lib/tools/agents/webSearchTool.ts @@ -0,0 +1,314 @@ +import { tool } from '@langchain/core/tools'; +import { z } from 'zod'; +import { RunnableConfig } from '@langchain/core/runnables'; +import { withStructuredOutput } from '@/lib/utils/structuredOutput'; +import { PromptTemplate } from '@langchain/core/prompts'; +import { webSearchRetrieverAgentPrompt } from '@/lib/prompts/webSearch'; +import { searchSearxng } from '@/lib/searxng'; +import { formatDateForLLM } from '@/lib/utils'; +import { summarizeWebContent } from '@/lib/utils/summarizeWebContent'; +import { + analyzePreviewContent, + PreviewContent, +} from '@/lib/utils/analyzePreviewContent'; +import { Document } from 'langchain/document'; +import { Embeddings } from '@langchain/core/embeddings'; +import computeSimilarity from '@/lib/utils/computeSimilarity'; +import { removeThinkingBlocksFromMessages } from '@/lib/utils/contentUtils'; + +// Schema for search query generation +const SearchQuerySchema = z.object({ + searchQuery: z + .string() + .describe('The optimized search query to use for web search'), + reasoning: z + .string() + .describe( + 'A short explanation of how the search query was optimized for better results', + ), +}); + +// Schema for web search tool input +const WebSearchToolSchema = z.object({ + query: z.string().describe('The search query or task to process'), + searchInstructions: z + .string() + .optional() + .describe('Additional instructions for search refinement'), + context: z + .string() + .optional() + .describe('Additional context about the search'), +}); + +/** + * WebSearchTool - Reimplementation of WebSearchAgent as a tool + * + * This tool handles: + * 1. Query optimization for web search + * 2. Web search execution using SearXNG + * 3. Content extraction and summarization + * 4. Document ranking and filtering + */ +export const webSearchTool = tool( + async ( + input: z.infer, + config?: RunnableConfig, + ): Promise<{ + documents: Document[]; + searchQuery: string; + reasoning: string; + sourcesFound: number; + relevantDocuments?: any[]; + }> => { + try { + const { query, searchInstructions, context = '' } = input; + + // Get LLM and embeddings from config + if (!config?.configurable?.llm) { + throw new Error('LLM not available in config'); + } + if (!config?.configurable?.embeddings) { + throw new Error('Embeddings not available in config'); + } + + const llm = config.configurable.llm; + const embeddings: Embeddings = config.configurable.embeddings; + + // Step 1: Generate optimized search query + const template = PromptTemplate.fromTemplate( + webSearchRetrieverAgentPrompt, + ); + const prompt = await template.format({ + systemInstructions: + config.configurable?.systemInstructions || + 'You are a helpful AI assistant.', + query: query, + date: formatDateForLLM(new Date()), + supervisor: searchInstructions || query, + }); + + // Use structured output for search query generation + const structuredLlm = withStructuredOutput(llm, SearchQuerySchema, { + name: 'generate_search_query', + }); + + const searchQueryResult = await structuredLlm.invoke(prompt, { + signal: config?.signal, + }); + + const searchQuery = searchQueryResult.searchQuery; + console.log( + `WebSearchTool: Performing web search for query: "${searchQuery}"`, + ); + console.log( + 'WebSearchTool: Search query reasoning:', + searchQueryResult.reasoning, + ); + + // Step 2: Execute web search + const searchResults = await searchSearxng(searchQuery, { + language: 'en', + engines: [], + }); + + console.log( + `WebSearchTool: Found ${searchResults.results.length} search results`, + ); + + if (!searchResults.results || searchResults.results.length === 0) { + return { + documents: [], + searchQuery, + reasoning: searchQueryResult.reasoning, + sourcesFound: 0, + }; + } + + // Step 3: Calculate similarities and rank results + const queryVector = await embeddings.embedQuery(query); + + // Calculate similarities for all results + const resultsWithSimilarity = await Promise.all( + searchResults.results.map(async (result) => { + const vector = await embeddings.embedQuery( + result.title + ' ' + (result.content || ''), + ); + const similarity = computeSimilarity(vector, queryVector); + return { result, similarity }; + }), + ); + + // Step 4: Prepare preview content for analysis + let previewContents: PreviewContent[] = []; + + // Always take the top 3 results for preview content + previewContents.push( + ...searchResults.results.slice(0, 3).map((result) => ({ + title: result.title || 'Untitled', + snippet: result.content || '', + url: result.url, + })), + ); + + // Sort by relevance score and take top 12 results for a total of 15 + previewContents.push( + ...resultsWithSimilarity + .slice(3) + .sort((a, b) => b.similarity - a.similarity) + .slice(0, 12) + .map(({ result }) => ({ + title: result.title || 'Untitled', + snippet: result.content || '', + url: result.url, + })), + ); + + console.log( + `WebSearchTool: Extracted preview content from ${previewContents.length} search results`, + ); + + // Step 5: Analyze preview content to determine processing approach + let previewAnalysisResult = null; + let documentsToProcess: any[] = []; + + if (previewContents.length > 0) { + console.log( + 'WebSearchTool: Analyzing preview content to determine processing approach', + ); + + previewAnalysisResult = await analyzePreviewContent( + previewContents, + query, + query, // taskQuery same as query for tools + [], // no chat history for tools + llm, + config.configurable?.systemInstructions || + 'You are a helpful AI assistant.', + config?.signal || new AbortController().signal, + ); + + console.log( + 'WebSearchTool: Preview analysis result:', + previewAnalysisResult.isSufficient ? 'SUFFICIENT' : 'INSUFFICIENT', + ); + + if (!previewAnalysisResult.isSufficient) { + // Need full content retrieval - process top similarity results + documentsToProcess = resultsWithSimilarity + .sort((a, b) => b.similarity - a.similarity) + .slice(0, 5) + .map(({ result }) => result); + } else { + // Preview content is sufficient - no need for full content retrieval + console.log( + 'WebSearchTool: Preview content is sufficient, skipping full content retrieval', + ); + documentsToProcess = []; + } + } else { + // No preview content, process top results + documentsToProcess = searchResults.results.slice(0, 5); + } + + console.log( + `WebSearchTool: Processing ${documentsToProcess.length} URLs for content extraction`, + ); + + // Step 6: Extract content - either from full URLs or preview content + const documents: Document[] = []; + let processedCount = 0; + + if (previewAnalysisResult?.isSufficient) { + // Create documents from preview content since it's sufficient + console.log( + 'WebSearchTool: Creating documents from preview content (sufficient for answer)', + ); + + documents.push( + ...previewContents.map((previewContent) => { + return new Document({ + pageContent: `${previewContent.title}\n\n${previewContent.snippet}`, + metadata: { + title: previewContent.title, + url: previewContent.url, + source: previewContent.url, + processingType: 'preview-content', + searchQuery: searchQuery, + }, + }); + }), + ); + + console.log( + `WebSearchTool: Created ${documents.length} documents from preview content`, + ); + } else { + // Extract and summarize content from selected URLs + for (const result of documentsToProcess) { + if (processedCount >= 5) break; // Limit processing + + try { + console.log(`WebSearchTool: Processing ${result.url}`); + + const summaryResult = await summarizeWebContent( + result.url, + query, + llm, + config.configurable?.systemInstructions || + 'You are a helpful AI assistant.', + config?.signal || new AbortController().signal, + ); + + if (summaryResult.document) { + documents.push(summaryResult.document); + console.log( + `WebSearchTool: Successfully extracted content from ${result.url}`, + ); + } else { + console.log( + `WebSearchTool: No relevant content found for ${result.url}: ${summaryResult.notRelevantReason}`, + ); + } + + processedCount++; + } catch (error) { + console.error( + `WebSearchTool: Error processing ${result.url}:`, + error, + ); + continue; + } + } + + console.log( + `WebSearchTool: Successfully extracted ${documents.length} documents from ${processedCount} processed URLs`, + ); + } + + return { + documents, + searchQuery, + reasoning: searchQueryResult.reasoning, + sourcesFound: searchResults.results.length, + }; + } catch (error) { + console.error('WebSearchTool: Error during web search:', error); + const errorMessage = + error instanceof Error ? error.message : 'Unknown error'; + + return { + documents: [], + searchQuery: input.query, + reasoning: `Error occurred during web search: ${errorMessage}`, + sourcesFound: 0, + }; + } + }, + { + name: 'web_search', + description: + 'Performs web search using SearXNG, analyzes results, and extracts relevant content from top sources', + schema: WebSearchToolSchema, + }, +); diff --git a/src/lib/tools/index.ts b/src/lib/tools/index.ts index 65217f3..687178a 100644 --- a/src/lib/tools/index.ts +++ b/src/lib/tools/index.ts @@ -1,7 +1,13 @@ import { timezoneConverterTool } from './timezoneConverter'; import { dateDifferenceTool } from './dateDifference'; +// Agent tools for simplified chat agent (will be uncommented as implemented) +// import { allAgentTools } from './agents'; + export { timezoneConverterTool, dateDifferenceTool }; +// Export agent tools module (will be uncommented as implemented) +// export * from './agents'; + // Array containing all available tools export const allTools = [timezoneConverterTool, dateDifferenceTool]; diff --git a/src/lib/utils/analyzePreviewContent.ts b/src/lib/utils/analyzePreviewContent.ts index 8881525..bab34f8 100644 --- a/src/lib/utils/analyzePreviewContent.ts +++ b/src/lib/utils/analyzePreviewContent.ts @@ -88,7 +88,7 @@ Snippet: ${content.snippet} - Analyze the provided search result previews (titles + snippets), and chat history context to determine if they collectively contain enough information to provide a complete and accurate answer to the Task Query - If the preview content can provide a complete answer to the Task Query, consider it sufficient - If the preview content lacks important details, requires deeper analysis, or cannot fully answer the Task Query, consider it insufficient -- Be specific in your reasoning when the content is not sufficient +- Be specific in your reasoning when the content is not sufficient but keep the answer under 35 words - The original query is provided for additional context, only use it for clarification of overall expectations and intent. You do **not** need to answer the original query directly or completely # System Instructions diff --git a/src/lib/utils/summarizeWebContent.ts b/src/lib/utils/summarizeWebContent.ts index 2eda339..af0f9c6 100644 --- a/src/lib/utils/summarizeWebContent.ts +++ b/src/lib/utils/summarizeWebContent.ts @@ -19,7 +19,9 @@ const RelevanceCheckSchema = z.object({ .describe('Whether the content is relevant to the user query'), reason: z .string() - .describe("Brief explanation of why content is or isn't relevant"), + .describe( + "Brief explanation of why content is or isn't relevant. 20 words or less.", + ), }); export const summarizeWebContent = async (