feat(agent): Implement simplified chat agent tools and state management

This commit is contained in:
Willie Zutz 2025-07-29 10:18:11 -06:00
parent 755e1de28b
commit f6eab5a95a
27 changed files with 1929 additions and 2433 deletions

View file

@ -143,3 +143,9 @@ When working on this codebase, you might need to:
- Use try/catch blocks for async operations - Use try/catch blocks for async operations
- Return structured error responses from API routes - Return structured error responses from API routes
## Available Tools and Help
- You can use the context7 tool to get help using the following identifiers for libraries used in this project
- `/langchain-ai/langchainjs` for LangChain
- `/langchain-ai/langgraph` for LangGraph

View file

@ -1,12 +1,11 @@
/* React Grid Layout styles */ /* React Grid Layout styles */
@import "react-grid-layout/css/styles.css"; @import 'react-grid-layout/css/styles.css';
@import "react-resizable/css/styles.css"; @import 'react-resizable/css/styles.css';
@tailwind base; @tailwind base;
@tailwind components; @tailwind components;
@tailwind utilities; @tailwind utilities;
@layer base { @layer base {
.overflow-hidden-scrollable { .overflow-hidden-scrollable {
-ms-overflow-style: none; -ms-overflow-style: none;

View file

@ -435,7 +435,8 @@ const WidgetConfigModal = ({
</div> </div>
) : ( ) : (
<div className="text-sm text-black/50 dark:text-white/50 italic"> <div className="text-sm text-black/50 dark:text-white/50 italic">
Click &quot;Run Preview&quot; to see how your widget will look Click &quot;Run Preview&quot; to see how your widget
will look
</div> </div>
)} )}
</div> </div>

View file

@ -59,7 +59,10 @@ const WidgetDisplay = ({
className="widget-drag-handle flex-shrink-0 p-1 rounded hover:bg-light-secondary dark:hover:bg-dark-secondary cursor-move transition-colors" className="widget-drag-handle flex-shrink-0 p-1 rounded hover:bg-light-secondary dark:hover:bg-dark-secondary cursor-move transition-colors"
title="Drag to move widget" title="Drag to move widget"
> >
<GripVertical size={16} className="text-gray-400 dark:text-gray-500" /> <GripVertical
size={16}
className="text-gray-400 dark:text-gray-500"
/>
</div> </div>
<CardTitle className="text-lg font-medium truncate"> <CardTitle className="text-lg font-medium truncate">

View file

@ -1,81 +0,0 @@
import { BaseMessage } from '@langchain/core/messages';
import { Annotation, END } from '@langchain/langgraph';
import { Document } from 'langchain/document';
/**
* State interface for the agent supervisor workflow
*/
export const AgentState = Annotation.Root({
messages: Annotation<BaseMessage[]>({
reducer: (x, y) => x.concat(y),
default: () => [],
}),
query: Annotation<string>({
reducer: (x, y) => y ?? x,
default: () => '',
}),
relevantDocuments: Annotation<Document[]>({
reducer: (x, y) => x.concat(y),
default: () => [],
}),
bannedSummaryUrls: Annotation<string[]>({
reducer: (x, y) => x.concat(y),
default: () => [],
}),
bannedPreviewUrls: Annotation<string[]>({
reducer: (x, y) => x.concat(y),
default: () => [],
}),
searchInstructionHistory: Annotation<string[]>({
reducer: (x, y) => x.concat(y),
default: () => [],
}),
searchInstructions: Annotation<string>({
reducer: (x, y) => y ?? x,
default: () => '',
}),
next: Annotation<string>({
reducer: (x, y) => y ?? x ?? END,
default: () => END,
}),
analysis: Annotation<string>({
reducer: (x, y) => y ?? x,
default: () => '',
}),
fullAnalysisAttempts: Annotation<number>({
reducer: (x, y) => (y ?? 0) + x,
default: () => 0,
}),
tasks: Annotation<string[]>({
reducer: (x, y) => y ?? x,
default: () => [],
}),
currentTaskIndex: Annotation<number>({
reducer: (x, y) => y ?? x,
default: () => 0,
}),
originalQuery: Annotation<string>({
reducer: (x, y) => y ?? x,
default: () => '',
}),
fileIds: Annotation<string[]>({
reducer: (x, y) => y ?? x,
default: () => [],
}),
focusMode: Annotation<string>({
reducer: (x, y) => y ?? x,
default: () => 'webSearch',
}),
urlsToSummarize: Annotation<string[]>({
reducer: (x, y) => y ?? x,
default: () => [],
}),
summarizationIntent: Annotation<string>({
reducer: (x, y) => y ?? x,
default: () => '',
}),
recursionLimitReached: Annotation<boolean>({
reducer: (x, y) => y ?? x,
default: () => false,
}),
});

View file

@ -1,360 +0,0 @@
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import {
AIMessage,
HumanMessage,
SystemMessage,
} from '@langchain/core/messages';
import { ChatPromptTemplate } from '@langchain/core/prompts';
import { Command, END } from '@langchain/langgraph';
import { EventEmitter } from 'events';
import { z } from 'zod';
import LineOutputParser from '../outputParsers/lineOutputParser';
import { formatDateForLLM } from '../utils';
import { AgentState } from './agentState';
import { setTemperature } from '../utils/modelUtils';
import {
additionalUserInputPrompt,
additionalWebSearchPrompt,
decideNextActionPrompt,
} from '../prompts/analyzer';
import {
removeThinkingBlocks,
removeThinkingBlocksFromMessages,
} from '../utils/contentUtils';
import { withStructuredOutput } from '../utils/structuredOutput';
import next from 'next';
// Define Zod schemas for structured output
const NextActionSchema = z.object({
action: z
.enum(['good_content', 'need_user_info', 'need_more_info'])
.describe('The next action to take based on content analysis'),
reasoning: z
.string()
.describe('Brief explanation of why this action was chosen'),
});
const UserInfoRequestSchema = z.object({
question: z
.string()
.describe('A detailed question to ask the user for additional information'),
reasoning: z
.string()
.describe('Explanation of why this information is needed'),
});
const SearchRefinementSchema = z.object({
question: z
.string()
.describe('A refined search question to gather more specific information'),
reasoning: z
.string()
.describe(
'Explanation of what information is missing and why this search will help',
),
});
export class AnalyzerAgent {
private llm: BaseChatModel;
private emitter: EventEmitter;
private systemInstructions: string;
private signal: AbortSignal;
constructor(
llm: BaseChatModel,
emitter: EventEmitter,
systemInstructions: string,
signal: AbortSignal,
) {
this.llm = llm;
this.emitter = emitter;
this.systemInstructions = systemInstructions;
this.signal = signal;
}
async execute(state: typeof AgentState.State): Promise<Command> {
try {
//setTemperature(this.llm, 0.0);
// Initialize originalQuery if not set
if (!state.originalQuery) {
state.originalQuery = state.query;
}
// Check for URLs first - if found and not yet processed, route to URL summarization
if (!state.urlsToSummarize || state.urlsToSummarize.length === 0) {
const urlRegex = /https?:\/\/[^\s]+/gi;
const urls = [...new Set(state.query.match(urlRegex) || [])];
if (urls.length > 0) {
console.log(
'URLs detected in initial query, routing to URL summarization',
);
console.log(`URLs found: ${urls.join(', ')}`);
// Emit URL detection event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'URLS_DETECTED_ROUTING',
message: `Detected ${urls.length} URL(s) in query - processing content first`,
details: {
query: state.query,
urls: urls,
},
},
});
return new Command({
goto: 'url_summarization',
update: {
urlsToSummarize: urls,
summarizationIntent: `Process the content from the provided URLs to help answer: ${state.query}`,
},
});
}
}
// Skip full analysis if this is the first run.
//if (state.fullAnalysisAttempts > 0) {
// Emit initial analysis event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'ANALYZING_CONTEXT',
message:
'Analyzing the context to see if we have enough information to answer the query',
details: {
documentCount: state.relevantDocuments.length,
query: state.query,
searchIterations: state.searchInstructionHistory.length,
},
},
});
console.log(
`Analyzing ${state.relevantDocuments.length} documents for relevance...`,
);
const nextActionPrompt = await ChatPromptTemplate.fromTemplate(
decideNextActionPrompt,
).format({
systemInstructions: this.systemInstructions,
context: state.relevantDocuments
.map(
(doc, index) =>
`<source${index + 1}>${doc?.metadata?.title ? `<title>${doc?.metadata?.title}</title>` : ''}${doc?.metadata.url ? `<url>${doc?.metadata?.url}</url>` : ''}<content>${doc.pageContent}</content></source${index + 1}>`,
)
.join('\n\n'),
date: formatDateForLLM(new Date()),
searchInstructionHistory: state.searchInstructionHistory
.map((question) => `- ${question}`)
.join('\n'),
query: state.originalQuery || state.query, // Use original query for analysis context
});
const thinkingBlocksRemovedMessages = removeThinkingBlocksFromMessages(
state.messages,
);
// Use structured output for next action decision
const structuredLlm = withStructuredOutput(this.llm, NextActionSchema, {
name: 'analyze_content',
});
const nextActionResponse = await structuredLlm.invoke(
[...thinkingBlocksRemovedMessages, new HumanMessage(nextActionPrompt)],
{ signal: this.signal },
);
console.log('Next action response:', nextActionResponse);
if (nextActionResponse.action !== 'good_content') {
// If we don't have enough information, but we still have available tasks, proceed with the next task
if (state.tasks && state.tasks.length > 0) {
const hasMoreTasks = state.currentTaskIndex < state.tasks.length - 1;
if (hasMoreTasks) {
return new Command({
goto: 'task_manager',
});
}
}
if (nextActionResponse.action === 'need_user_info') {
// Use structured output for user info request
const userInfoLlm = withStructuredOutput(
this.llm,
UserInfoRequestSchema,
{
name: 'request_user_info',
},
);
const moreUserInfoPrompt = await ChatPromptTemplate.fromTemplate(
additionalUserInputPrompt,
).format({
systemInstructions: this.systemInstructions,
context: state.relevantDocuments
.map(
(doc, index) =>
`<source${index + 1}>${doc?.metadata?.title ? `<title>${doc?.metadata?.title}</title>` : ''}<content>${doc.pageContent}</content></source${index + 1}>`,
)
.join('\n\n'),
date: formatDateForLLM(new Date()),
searchInstructionHistory: state.searchInstructionHistory
.map((question) => `- ${question}`)
.join('\n'),
query: state.originalQuery || state.query, // Use original query for user info context
previousAnalysis: nextActionResponse.reasoning, // Include reasoning from previous analysis
});
const userInfoRequest = await userInfoLlm.invoke(
[
...removeThinkingBlocksFromMessages(state.messages),
new HumanMessage(moreUserInfoPrompt),
],
{ signal: this.signal },
);
// Emit the complete question to the user
this.emitter.emit(
'data',
JSON.stringify({
type: 'response',
data: userInfoRequest.question,
}),
);
this.emitter.emit('end');
// Create the final response message with the complete content
const response = new SystemMessage(userInfoRequest.question);
return new Command({
goto: END,
update: {
messages: [response],
},
});
}
// If we need more information from the LLM, generate a more specific search query
// Use structured output for search refinement
const searchRefinementLlm = withStructuredOutput(
this.llm,
SearchRefinementSchema,
{
name: 'refine_search',
},
);
const moreInfoPrompt = await ChatPromptTemplate.fromTemplate(
additionalWebSearchPrompt,
).format({
systemInstructions: this.systemInstructions,
context: state.relevantDocuments
.map(
(doc, index) =>
`<source${index + 1}>${doc?.metadata?.title ? `\n<title>${doc?.metadata?.title}</title>` : ''}\n<content>${doc.pageContent}</content>\n</source${index + 1}>`,
)
.join('\n\n'),
date: formatDateForLLM(new Date()),
searchInstructionHistory: state.searchInstructionHistory
.map((question) => `- ${question}`)
.join('\n'),
query: state.originalQuery || state.query, // Use original query for more info context
previousAnalysis: nextActionResponse.reasoning, // Include reasoning from previous analysis
});
const searchRefinement = await searchRefinementLlm.invoke(
[
...removeThinkingBlocksFromMessages(state.messages),
new HumanMessage(moreInfoPrompt),
],
{ signal: this.signal },
);
// Emit reanalyzing event when we need more information
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'MORE_DATA_NEEDED',
message:
'Current context is insufficient - analyzing search requirements',
details: {
nextSearchQuery: searchRefinement.question,
documentCount: state.relevantDocuments.length,
searchIterations: state.searchInstructionHistory.length,
query: state.originalQuery || state.query, // Show original query in details
currentSearchFocus: searchRefinement.question,
},
},
});
return new Command({
goto: 'task_manager',
update: {
// messages: [
// new AIMessage(
// `The following question can help refine the search: ${searchRefinement.question}`,
// ),
// ],
query: searchRefinement.question, // Use the refined question for TaskManager to analyze
searchInstructions: searchRefinement.question,
searchInstructionHistory: [
...(state.searchInstructionHistory || []),
searchRefinement.question,
],
fullAnalysisAttempts: 1,
originalQuery: state.originalQuery || state.query, // Preserve the original user query
// Reset task list so TaskManager can break down the search requirements again
tasks: [],
currentTaskIndex: 0,
},
});
}
// Emit information gathering complete event when we have sufficient information
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'INFORMATION_GATHERING_COMPLETE',
message: 'Ready to respond.',
details: {
documentCount: state.relevantDocuments.length,
searchIterations: state.searchInstructionHistory.length,
totalTasks: state.tasks?.length || 1,
query: state.originalQuery || state.query,
},
},
});
return new Command({
goto: 'synthesizer',
// update: {
// messages: [
// new AIMessage(
// `Analysis completed. We have sufficient information to answer the query.`,
// ),
// ],
// },
});
} catch (error) {
console.error('Analysis error:', error);
const errorMessage = new AIMessage(
`Analysis failed: ${error instanceof Error ? error.message : 'Unknown error'}`,
);
return new Command({
goto: END,
update: {
messages: [errorMessage],
},
});
} finally {
setTemperature(this.llm); // Reset temperature for subsequent actions
}
}
}

View file

@ -1,233 +0,0 @@
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { AIMessage } from '@langchain/core/messages';
import { PromptTemplate } from '@langchain/core/prompts';
import { Command, END } from '@langchain/langgraph';
import { EventEmitter } from 'events';
import { z } from 'zod';
import fs from 'node:fs';
import path from 'node:path';
import { AgentState } from './agentState';
import { contentRouterPrompt } from '../prompts/contentRouter';
import { removeThinkingBlocksFromMessages } from '../utils/contentUtils';
import { withStructuredOutput } from '../utils/structuredOutput';
// Define Zod schema for structured router decision output
const RouterDecisionSchema = z.object({
decision: z
.enum(['file_search', 'web_search', 'analyzer'])
.describe('The next step to take in the workflow'),
reasoning: z.string().describe('Explanation of why this decision was made'),
});
type RouterDecision = z.infer<typeof RouterDecisionSchema>;
export class ContentRouterAgent {
private llm: BaseChatModel;
private emitter: EventEmitter;
private systemInstructions: string;
private signal: AbortSignal;
constructor(
llm: BaseChatModel,
emitter: EventEmitter,
systemInstructions: string,
signal: AbortSignal,
) {
this.llm = llm;
this.emitter = emitter;
this.systemInstructions = systemInstructions;
this.signal = signal;
}
/**
* Content router agent node
*/
async execute(state: typeof AgentState.State): Promise<Command> {
try {
// Determine current task to process
const currentTask =
state.tasks && state.tasks.length > 0
? state.tasks[state.currentTaskIndex || 0]
: state.query;
console.log(
`Content router processing task ${(state.currentTaskIndex || 0) + 1} of ${state.tasks?.length || 1}: "${currentTask}"`,
);
// Extract focus mode from state - this should now come from the API
const focusMode = state.focusMode || 'webSearch';
const hasFiles = state.fileIds && state.fileIds.length > 0;
const documentCount = state.relevantDocuments.length;
const searchHistory = state.searchInstructionHistory.join(', ') || 'None';
// Extract file topics if files are available
const fileTopics = hasFiles
? await this.extractFileTopics(state.fileIds!)
: 'None';
// Emit routing decision event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'ROUTING_DECISION',
message: `Determining optimal information source for current task`,
details: {
query: state.query,
currentTask: currentTask,
taskIndex: (state.currentTaskIndex || 0) + 1,
totalTasks: state.tasks?.length || 1,
focusMode: focusMode,
hasFiles: hasFiles,
fileCount: state.fileIds?.length || 0,
documentCount: documentCount,
searchIterations: state.searchInstructionHistory.length,
},
},
});
const template = PromptTemplate.fromTemplate(contentRouterPrompt);
const prompt = await template.format({
systemInstructions: this.systemInstructions,
currentTask: currentTask,
query: state.originalQuery || state.query,
focusMode: focusMode,
hasFiles: hasFiles,
fileTopics: fileTopics,
documentCount: documentCount,
searchHistory: searchHistory,
});
// Use structured output for routing decision
const structuredLlm = withStructuredOutput(
this.llm,
RouterDecisionSchema,
{
name: 'route_content',
},
);
const routerDecision = (await structuredLlm.invoke(
[...removeThinkingBlocksFromMessages(state.messages), prompt],
{ signal: this.signal },
)) as RouterDecision;
console.log(`Router decision: ${routerDecision.decision}`);
console.log(`Router reasoning: ${routerDecision.reasoning}`);
console.log(`File topics: ${fileTopics}`);
console.log(`Focus mode: ${focusMode}`);
// Validate decision based on focus mode restrictions
const validatedDecision = this.validateDecision(
routerDecision,
focusMode,
hasFiles,
);
// Emit routing result event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'ROUTING_RESULT',
message: `Routing to ${validatedDecision.decision}: ${validatedDecision.reasoning}`,
details: {
query: state.query,
currentTask: currentTask,
taskIndex: (state.currentTaskIndex || 0) + 1,
totalTasks: state.tasks?.length || 1,
decision: validatedDecision.decision,
focusMode: focusMode,
hasFiles: hasFiles,
documentCount: documentCount,
searchIterations: state.searchInstructionHistory.length,
},
},
});
const responseMessage = `Content routing completed. Next step: ${validatedDecision.decision}`;
console.log(responseMessage);
return new Command({
goto: validatedDecision.decision,
// update: {
// messages: [new AIMessage(responseMessage)],
// },
});
} catch (error) {
console.error('Content router error:', error);
const errorMessage = new AIMessage(
`Content routing failed: ${error instanceof Error ? error.message : 'Unknown error'}`,
);
return new Command({
goto: END,
update: {
messages: [errorMessage],
},
});
}
}
/**
* Extract semantic topics from attached files for relevance assessment
*/
private async extractFileTopics(fileIds: string[]): Promise<string> {
try {
const topics = fileIds.map((fileId) => {
try {
const filePath = path.join(process.cwd(), 'uploads', fileId);
const contentPath = filePath + '-extracted.json';
if (fs.existsSync(contentPath)) {
const content = JSON.parse(fs.readFileSync(contentPath, 'utf8'));
const filename = content.title || 'Document';
// Use LLM-generated semantic topics if available, otherwise fall back to filename
const semanticTopics = content.topics;
return semanticTopics || filename;
}
return 'Unknown Document';
} catch (error) {
console.warn(`Error extracting topic for file ${fileId}:`, error);
return 'Unknown Document';
}
});
return topics.join('; ');
} catch (error) {
console.warn('Error extracting file topics:', error);
return 'Unable to determine file topics';
}
}
/**
* Validate and potentially override the router decision based on focus mode restrictions
*/
private validateDecision(
decision: RouterDecision,
focusMode: string,
hasFiles: boolean,
): RouterDecision {
// Enforce focus mode restrictions for chat and localResearch modes
if (
(focusMode === 'chat' || focusMode === 'localResearch') &&
decision.decision === 'web_search'
) {
// Override to file_search if files are available, otherwise analyzer
const fallbackDecision = hasFiles ? 'file_search' : 'analyzer';
console.log(
`Overriding web_search decision to ${fallbackDecision} due to focus mode restriction: ${focusMode}`,
);
return {
decision: fallbackDecision as 'file_search' | 'analyzer',
reasoning: `Overridden to ${fallbackDecision} - web search not allowed in ${focusMode} mode. ${decision.reasoning}`,
};
}
// For webSearch mode, trust the LLM's decision about file relevance
// No overrides needed - the enhanced prompt handles file relevance assessment
return decision;
}
}

View file

@ -1,238 +0,0 @@
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { AIMessage } from '@langchain/core/messages';
import { Command, END } from '@langchain/langgraph';
import { EventEmitter } from 'events';
import { Document } from 'langchain/document';
import { AgentState } from './agentState';
import { Embeddings } from '@langchain/core/embeddings';
import {
processFilesToDocuments,
getRankedDocs,
} from '../utils/fileProcessing';
export class FileSearchAgent {
private llm: BaseChatModel;
private emitter: EventEmitter;
private systemInstructions: string;
private signal: AbortSignal;
private embeddings: Embeddings;
constructor(
llm: BaseChatModel,
emitter: EventEmitter,
systemInstructions: string,
signal: AbortSignal,
embeddings: Embeddings,
) {
this.llm = llm;
this.emitter = emitter;
this.systemInstructions = systemInstructions;
this.signal = signal;
this.embeddings = embeddings;
}
/**
* File search agent node
*/
async execute(state: typeof AgentState.State): Promise<Command> {
try {
// Determine current task to process
const currentTask =
state.tasks && state.tasks.length > 0
? state.tasks[state.currentTaskIndex || 0]
: state.query;
console.log(
`Processing file search for task ${(state.currentTaskIndex || 0) + 1} of ${state.tasks?.length || 1}: "${currentTask}"`,
);
// Check if we have file IDs to process
if (!state.fileIds || state.fileIds.length === 0) {
console.log('No files attached for search');
return new Command({
goto: 'analyzer',
update: {
messages: [new AIMessage('No files attached to search.')],
},
});
}
// Emit consulting attached files event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'CONSULTING_ATTACHED_FILES',
message: `Consulting attached files...`,
details: {
query: state.query,
currentTask: currentTask,
taskIndex: (state.currentTaskIndex || 0) + 1,
totalTasks: state.tasks?.length || 1,
fileCount: state.fileIds.length,
documentCount: state.relevantDocuments.length,
},
},
});
// Process files to documents
const fileDocuments = await processFilesToDocuments(state.fileIds);
if (fileDocuments.length === 0) {
console.log('No processable file content found');
return new Command({
goto: 'analyzer',
// update: {
// messages: [
// new AIMessage('No searchable content found in attached files.'),
// ],
// },
});
}
console.log(
`Processed ${fileDocuments.length} file documents for search`,
);
// Emit searching file content event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'SEARCHING_FILE_CONTENT',
message: `Searching through ${fileDocuments.length} file sections for relevant information`,
details: {
query: state.query,
currentTask: currentTask,
taskIndex: (state.currentTaskIndex || 0) + 1,
totalTasks: state.tasks?.length || 1,
fileDocumentCount: fileDocuments.length,
documentCount: state.relevantDocuments.length,
},
},
});
// Generate query embedding for similarity search
const queryEmbedding = await this.embeddings.embedQuery(
state.originalQuery + ' ' + currentTask,
);
// Perform similarity search over file documents
const rankedDocuments = getRankedDocs(
queryEmbedding,
fileDocuments,
12, // maxDocs
0.3, // similarity threshold
);
console.log(`Found ${rankedDocuments.length} relevant file sections`);
if (rankedDocuments.length === 0) {
// Emit no relevant content event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'NO_RELEVANT_FILE_CONTENT',
message: `No relevant content found in attached files for the current task`,
details: {
query: state.query,
currentTask: currentTask,
taskIndex: (state.currentTaskIndex || 0) + 1,
totalTasks: state.tasks?.length || 1,
searchedDocuments: fileDocuments.length,
documentCount: state.relevantDocuments.length,
},
},
});
return new Command({
goto: 'analyzer',
// update: {
// messages: [
// new AIMessage(
// 'No relevant content found in attached files for the current task.',
// ),
// ],
// },
});
}
// Emit file content found event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'FILE_CONTENT_FOUND',
message: `Found ${rankedDocuments.length} relevant sections in attached files`,
details: {
query: state.query,
currentTask: currentTask,
taskIndex: (state.currentTaskIndex || 0) + 1,
totalTasks: state.tasks?.length || 1,
relevantSections: rankedDocuments.length,
searchedDocuments: fileDocuments.length,
documentCount:
state.relevantDocuments.length + rankedDocuments.length,
},
},
});
const responseMessage = `File search completed. Found ${rankedDocuments.length} relevant sections in attached files.`;
console.log(responseMessage);
return new Command({
goto: 'analyzer', // Route back to analyzer to process the results
update: {
// messages: [new AIMessage(responseMessage)],
relevantDocuments: rankedDocuments,
},
});
} catch (error) {
console.error('File search error:', error);
const errorMessage = new AIMessage(
`File search failed: ${error instanceof Error ? error.message : 'Unknown error'}`,
);
return new Command({
goto: END,
update: {
messages: [errorMessage],
},
});
}
}
/**
* Perform a similarity search over file documents
* @param state The current agent state
* @returns Ranked documents relevant to the current task
*/
async search(state: typeof AgentState.State): Promise<Document[]> {
if (!state.fileIds || state.fileIds.length === 0) {
return [];
}
// Process files to documents
const fileDocuments = await processFilesToDocuments(state.fileIds);
if (fileDocuments.length === 0) {
return [];
}
// Determine current task to search for
const currentTask =
state.tasks && state.tasks.length > 0
? state.tasks[state.currentTaskIndex || 0]
: state.query;
// Generate query embedding for similarity search
const queryEmbedding = await this.embeddings.embedQuery(
state.originalQuery + ' ' + currentTask,
);
// Perform similarity search and return ranked documents
return getRankedDocs(
queryEmbedding,
fileDocuments,
8, // maxDocs
0.3, // similarity threshold
);
}
}

View file

@ -1,8 +0,0 @@
export { AgentState } from './agentState';
export { WebSearchAgent } from './webSearchAgent';
export { AnalyzerAgent } from './analyzerAgent';
export { SynthesizerAgent } from './synthesizerAgent';
export { TaskManagerAgent } from './taskManagerAgent';
export { FileSearchAgent } from './fileSearchAgent';
export { ContentRouterAgent } from './contentRouterAgent';
export { URLSummarizationAgent } from './urlSummarizationAgent';

View file

@ -1,165 +0,0 @@
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { HumanMessage, SystemMessage } from '@langchain/core/messages';
import { PromptTemplate } from '@langchain/core/prompts';
import { Command, END } from '@langchain/langgraph';
import { EventEmitter } from 'events';
import { getModelName } from '../utils/modelUtils';
import { AgentState } from './agentState';
import { removeThinkingBlocksFromMessages } from '../utils/contentUtils';
import { synthesizerPrompt } from '../prompts/synthesizer';
export class SynthesizerAgent {
private llm: BaseChatModel;
private emitter: EventEmitter;
private personaInstructions: string;
private signal: AbortSignal;
constructor(
llm: BaseChatModel,
emitter: EventEmitter,
personaInstructions: string,
signal: AbortSignal,
) {
this.llm = llm;
this.emitter = emitter;
this.personaInstructions = personaInstructions;
this.signal = signal;
}
/**
* Synthesizer agent node that combines information to answer the query
*/
async execute(state: typeof AgentState.State): Promise<Command> {
try {
// Format the prompt using the external template
const template = PromptTemplate.fromTemplate(synthesizerPrompt);
const conversationHistory =
removeThinkingBlocksFromMessages(state.messages)
.map((msg) => `<${msg.getType()}>${msg.content}</${msg.getType()}>`)
.join('\n') || 'No previous conversation context';
const relevantDocuments = state.relevantDocuments
.map((doc, index) => {
const isFile = doc.metadata?.url?.toLowerCase().includes('file');
return `<${index + 1}>\n
<title>${doc.metadata.title}</title>
<source_type>${isFile ? 'file' : 'web'}</source_type>
${isFile ? '' : '\n<url>' + doc.metadata.url + '</url>'}
<content>\n${doc.pageContent}\n </content>
</${index + 1}>`;
})
.join('\n');
const recursionLimitMessage = state.recursionLimitReached
? `# ⚠️ IMPORTANT NOTICE - LIMITED INFORMATION
**The search process was interrupted due to complexity limits. You MUST start your response with a warning about incomplete information and qualify all statements appropriately.**
## CRITICAL: Incomplete Information Response Requirements
**You MUST:**
1. **Start your response** with a clear warning that the information may be incomplete or conflicting
2. **Acknowledge limitations** throughout your response where information gaps exist
3. **Be transparent** about what you cannot determine from the available sources
4. **Suggest follow-up actions** for the user to get more complete information
5. **Qualify your statements** with phrases like "based on available information" or "from the limited sources gathered"
**Example opening for incomplete information responses:**
" **Please note:** This response is based on incomplete information due to search complexity limits. The findings below may be missing important details or conflicting perspectives. I recommend verifying this information through additional research or rephrasing your query for better results.
`
: '';
// If we have limited documents due to recursion limit, acknowledge this
const documentsAvailable = state.relevantDocuments?.length || 0;
const limitedInfoNote =
state.recursionLimitReached && documentsAvailable === 0
? '**CRITICAL: No source documents were gathered due to search limitations.**\n\n'
: state.recursionLimitReached
? `**NOTICE: Search was interrupted with ${documentsAvailable} documents gathered.**\n\n`
: '';
const formattedPrompt = await template.format({
personaInstructions: this.personaInstructions,
conversationHistory: conversationHistory,
relevantDocuments: relevantDocuments,
query: state.originalQuery || state.query,
recursionLimitReached: recursionLimitMessage + limitedInfoNote,
});
// Stream the response in real-time using LLM streaming capabilities
let fullResponse = '';
// Emit the sources as a data response
this.emitter.emit(
'data',
JSON.stringify({
type: 'sources',
data: state.relevantDocuments,
searchQuery: '',
searchUrl: '',
}),
);
const stream = await this.llm.stream(
[
new SystemMessage(formattedPrompt),
new HumanMessage(state.originalQuery || state.query),
],
{ signal: this.signal },
);
for await (const chunk of stream) {
if (this.signal.aborted) {
break;
}
const content = chunk.content;
if (typeof content === 'string' && content.length > 0) {
fullResponse += content;
// Emit each chunk as a data response in real-time
this.emitter.emit(
'data',
JSON.stringify({
type: 'response',
data: content,
}),
);
}
}
// Emit model stats and end signal after streaming is complete
const modelName = getModelName(this.llm);
this.emitter.emit(
'stats',
JSON.stringify({
type: 'modelStats',
data: { modelName },
}),
);
this.emitter.emit('end');
// Create the final response message with the complete content
const response = new SystemMessage(fullResponse);
return new Command({
goto: END,
update: {
messages: [response],
},
});
} catch (error) {
console.error('Synthesis error:', error);
const errorMessage = new SystemMessage(
`Failed to synthesize answer: ${error instanceof Error ? error.message : 'Unknown error'}`,
);
return new Command({
goto: END,
update: {
messages: [errorMessage],
},
});
}
}
}

View file

@ -1,225 +0,0 @@
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { AIMessage } from '@langchain/core/messages';
import { PromptTemplate } from '@langchain/core/prompts';
import { Command } from '@langchain/langgraph';
import { EventEmitter } from 'events';
import { z } from 'zod';
import { taskBreakdownPrompt } from '../prompts/taskBreakdown';
import { AgentState } from './agentState';
import { setTemperature } from '../utils/modelUtils';
import { withStructuredOutput } from '../utils/structuredOutput';
// Define Zod schema for structured task breakdown output
const TaskBreakdownSchema = z.object({
tasks: z
.array(z.string())
.describe(
'Array of specific, focused tasks broken down from the original query',
),
reasoning: z
.string()
.describe(
'Explanation of how and why the query was broken down into these tasks',
),
});
type TaskBreakdown = z.infer<typeof TaskBreakdownSchema>;
export class TaskManagerAgent {
private llm: BaseChatModel;
private emitter: EventEmitter;
private systemInstructions: string;
private signal: AbortSignal;
constructor(
llm: BaseChatModel,
emitter: EventEmitter,
systemInstructions: string,
signal: AbortSignal,
) {
this.llm = llm;
this.emitter = emitter;
this.systemInstructions = systemInstructions;
this.signal = signal;
}
/**
* Task manager agent node - breaks down complex questions into smaller tasks
*/
async execute(state: typeof AgentState.State): Promise<Command> {
try {
//setTemperature(this.llm, 0); // Set temperature to 0 for deterministic output
// Check if we're in task progression mode (tasks already exist and we're processing them)
if (state.tasks && state.tasks.length > 0) {
const currentTaskIndex = state.currentTaskIndex || 0;
const hasMoreTasks = currentTaskIndex < state.tasks.length - 1;
if (hasMoreTasks) {
// Move to next task
const nextTaskIndex = currentTaskIndex + 1;
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'PROCEEDING_TO_NEXT_TASK',
message: `Task ${currentTaskIndex + 1} completed. Moving to task ${nextTaskIndex + 1} of ${state.tasks.length}.`,
details: {
completedTask: state.tasks[currentTaskIndex],
nextTask: state.tasks[nextTaskIndex],
taskIndex: nextTaskIndex + 1,
totalTasks: state.tasks.length,
documentCount: state.relevantDocuments.length,
query: state.originalQuery || state.query,
},
},
});
return new Command({
goto: 'content_router',
update: {
// messages: [
// new AIMessage(
// `Task ${currentTaskIndex + 1} completed. Processing task ${nextTaskIndex + 1} of ${state.tasks.length}: "${state.tasks[nextTaskIndex]}"`,
// ),
// ],
currentTaskIndex: nextTaskIndex,
},
});
} else {
// All tasks completed, move to analysis
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'ALL_TASKS_COMPLETED',
message: `All ${state.tasks.length} tasks completed. Ready for analysis.`,
details: {
totalTasks: state.tasks.length,
documentCount: state.relevantDocuments.length,
query: state.originalQuery || state.query,
},
},
});
return new Command({
goto: 'analyzer',
// update: {
// messages: [
// new AIMessage(
// `All ${state.tasks.length} tasks completed. Moving to analysis phase.`,
// ),
// ],
// },
});
}
}
// Original task breakdown logic for new queries
// Emit task analysis event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'ANALYZING_TASK_COMPLEXITY',
message: `Analyzing question to determine if it needs to be broken down into smaller tasks`,
details: {
query: state.query,
currentTasks: state.tasks?.length || 0,
},
},
});
const template = PromptTemplate.fromTemplate(taskBreakdownPrompt);
// Create file context information
const fileContext =
state.fileIds && state.fileIds.length > 0
? `Files attached: ${state.fileIds.length} file(s) are available for analysis. Consider creating tasks that can leverage these attached files when appropriate.`
: 'No files attached: Focus on tasks that can be answered through web research or general knowledge.';
const prompt = await template.format({
systemInstructions: this.systemInstructions,
fileContext: fileContext,
query: state.query,
});
// Use structured output for task breakdown
const structuredLlm = withStructuredOutput(
this.llm,
TaskBreakdownSchema,
{
name: 'break_down_tasks',
},
);
const taskBreakdownResult = (await structuredLlm.invoke([prompt], {
signal: this.signal,
})) as TaskBreakdown;
console.log('Task breakdown response:', taskBreakdownResult);
// Extract tasks from structured response
const taskLines = taskBreakdownResult.tasks.filter(
(task) => task.trim().length > 0,
);
if (taskLines.length === 0) {
// Fallback: if no tasks found, use the original query
taskLines.push(state.query);
}
console.log(
`Task breakdown completed: ${taskLines.length} tasks identified`,
);
console.log('Reasoning:', taskBreakdownResult.reasoning);
taskLines.forEach((task, index) => {
console.log(`Task ${index + 1}: ${task}`);
});
// Emit task breakdown completion event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'TASK_BREAKDOWN_COMPLETED',
message: `Question broken down into ${taskLines.length} focused ${taskLines.length === 1 ? 'task' : 'tasks'}`,
details: {
query: state.query,
taskCount: taskLines.length,
tasks: taskLines,
reasoning: taskBreakdownResult.reasoning,
},
},
});
const responseMessage =
taskLines.length === 1
? 'Question is already focused and ready for processing'
: `Question broken down into ${taskLines.length} focused tasks for parallel processing`;
return new Command({
goto: 'content_router', // Route to content router to decide between file search, web search, or analysis
update: {
// messages: [new AIMessage(responseMessage)],
tasks: taskLines,
currentTaskIndex: 0,
originalQuery: state.originalQuery || state.query, // Preserve original if not already set
},
});
} catch (error) {
console.error('Task breakdown error:', error);
const errorMessage = new AIMessage(
`Task breakdown failed: ${error instanceof Error ? error.message : 'Unknown error'}`,
);
return new Command({
goto: 'content_router', // Fallback to content router with original query
update: {
messages: [errorMessage],
tasks: [state.query], // Use original query as single task
currentTaskIndex: 0,
originalQuery: state.originalQuery || state.query, // Preserve original if not already set
},
});
} finally {
setTemperature(this.llm, undefined); // Reset temperature to default
}
}
}

View file

@ -1,300 +0,0 @@
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { AIMessage } from '@langchain/core/messages';
import { Command, END } from '@langchain/langgraph';
import { EventEmitter } from 'events';
import { Document } from 'langchain/document';
import { AgentState } from './agentState';
import { getWebContent } from '../utils/documents';
import { removeThinkingBlocks } from '../utils/contentUtils';
import { setTemperature } from '../utils/modelUtils';
export class URLSummarizationAgent {
private llm: BaseChatModel;
private emitter: EventEmitter;
private systemInstructions: string;
private signal: AbortSignal;
constructor(
llm: BaseChatModel,
emitter: EventEmitter,
systemInstructions: string,
signal: AbortSignal,
) {
this.llm = llm;
this.emitter = emitter;
this.systemInstructions = systemInstructions;
this.signal = signal;
}
/**
* URL processing agent node
*/
async execute(state: typeof AgentState.State): Promise<Command> {
try {
setTemperature(this.llm, 0); // Set temperature to 0 for deterministic output
// Use pre-analyzed URLs from ContentRouterAgent
const urlsToProcess = state.urlsToSummarize || [];
const summarizationIntent =
state.summarizationIntent ||
'process content to help answer the user query';
if (urlsToProcess.length === 0) {
console.log(
'No URLs found for processing, routing back to content router',
);
return new Command({
goto: 'content_router',
// update: {
// messages: [
// new AIMessage(
// 'No URLs found for processing, routing to content router',
// ),
// ],
// },
});
}
console.log(`URL processing detected. URLs: ${urlsToProcess.join(', ')}`);
console.log(`Processing intent: ${summarizationIntent}`);
// Emit URL detection event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'URL_PROCESSING_DETECTED',
message: `Processing ${urlsToProcess.length} URL(s) to extract content for analysis`,
details: {
query: state.query,
urls: urlsToProcess,
intent: summarizationIntent,
},
},
});
const documents: Document[] = [];
// Process each URL
for (const url of urlsToProcess) {
if (this.signal.aborted) {
console.warn('URL summarization operation aborted by signal');
break;
}
try {
// Emit URL processing event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'PROCESSING_URL',
message: `Retrieving and processing content from: ${url}`,
details: {
query: state.query,
sourceUrl: url,
intent: summarizationIntent,
},
},
});
// Fetch full content using the enhanced web content retrieval
const webContent = await getWebContent(url, true);
if (!webContent || !webContent.pageContent) {
console.warn(`No content retrieved from URL: ${url}`);
// Emit URL processing failure event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'URL_PROCESSING_FAILED',
message: `Failed to retrieve content from: ${url}`,
details: {
query: state.query,
sourceUrl: url,
reason: 'No content retrieved',
},
},
});
continue;
}
const contentLength = webContent.pageContent.length;
let finalContent: string;
let processingType: string;
// If content is short (< 4000 chars), use it directly; otherwise summarize
if (contentLength < 4000) {
finalContent = webContent.pageContent;
processingType = 'url-direct-content';
console.log(
`Content is short (${contentLength} chars), using directly without summarization`,
);
// Emit direct content usage event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'URL_DIRECT_CONTENT',
message: `Content is short (${contentLength} chars), using directly from: ${url}`,
details: {
query: state.query,
sourceUrl: url,
sourceTitle: webContent.metadata.title || 'Web Page',
contentLength: contentLength,
intent: summarizationIntent,
},
},
});
} else {
// Content is long, summarize using LLM
console.log(
`Content is long (${contentLength} chars), generating summary`,
);
const systemPrompt = this.systemInstructions
? `${this.systemInstructions}\n\n`
: '';
const summarizationPrompt = `${systemPrompt}You are a web content processor. Extract and summarize ONLY the information from the provided web page content that is relevant to the user's query.
# Critical Instructions
- Output ONLY a summary of the web page content provided below
- Focus on information that relates to or helps answer the user's query
- Do NOT add pleasantries, greetings, or conversational elements
- Do NOT mention missing URLs, other pages, or content not provided
- Do NOT ask follow-up questions or suggest additional actions
- Do NOT add commentary about the user's request or query
- Present the information in a clear, well-structured format with key facts and details
- Include all relevant details that could help answer the user's question
# User's Query: ${state.query}
# Content Title: ${webContent.metadata.title || 'Web Page'}
# Content URL: ${url}
# Web Page Content to Summarize:
${webContent.pageContent}
Provide a comprehensive summary of the above web page content, focusing on information relevant to the user's query:`;
const result = await this.llm.invoke(summarizationPrompt, {
signal: this.signal,
});
finalContent = removeThinkingBlocks(result.content as string);
processingType = 'url-content-extraction';
}
if (finalContent && finalContent.trim().length > 0) {
const document = new Document({
pageContent: finalContent,
metadata: {
title: webContent.metadata.title || 'URL Content',
url: url,
source: url,
processingType: processingType,
processingIntent: summarizationIntent,
originalContentLength: contentLength,
},
});
documents.push(document);
// Emit successful URL processing event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'URL_CONTENT_EXTRACTED',
message: `Successfully processed content from: ${url}`,
details: {
query: state.query,
sourceUrl: url,
sourceTitle: webContent.metadata.title || 'Web Page',
contentLength: finalContent.length,
originalContentLength: contentLength,
processingType: processingType,
intent: summarizationIntent,
},
},
});
console.log(
`Successfully processed content from ${url} (${finalContent.length} characters, ${processingType})`,
);
} else {
console.warn(`No valid content generated for URL: ${url}`);
}
} catch (error) {
console.error(`Error processing URL ${url}:`, error);
// Emit URL processing error event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'URL_PROCESSING_ERROR',
message: `Error processing URL: ${url}`,
details: {
query: state.query,
sourceUrl: url,
error: error instanceof Error ? error.message : 'Unknown error',
},
},
});
}
}
if (documents.length === 0) {
const errorMessage = `No content could be retrieved or summarized from the provided URL(s): ${urlsToProcess.join(', ')}`;
console.error(errorMessage);
return new Command({
goto: 'analyzer',
// update: {
// messages: [new AIMessage(errorMessage)],
// },
});
}
// Emit completion event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'URL_PROCESSING_COMPLETED',
message: `Successfully processed ${documents.length} URL(s) and extracted content`,
details: {
query: state.query,
processedUrls: urlsToProcess.length,
successfulExtractions: documents.length,
intent: summarizationIntent,
},
},
});
const responseMessage = `URL processing completed. Successfully processed ${documents.length} out of ${urlsToProcess.length} URLs.`;
console.log(responseMessage);
return new Command({
goto: 'analyzer', // Route to analyzer to continue with normal workflow after URL processing
update: {
// messages: [new AIMessage(responseMessage)],
relevantDocuments: documents,
},
});
} catch (error) {
console.error('URL summarization error:', error);
const errorMessage = new AIMessage(
`URL summarization failed: ${error instanceof Error ? error.message : 'Unknown error'}`,
);
return new Command({
goto: END,
update: {
messages: [errorMessage],
},
});
} finally {
setTemperature(this.llm, undefined); // Reset temperature to default
}
}
}

View file

@ -1,461 +0,0 @@
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { AIMessage } from '@langchain/core/messages';
import { PromptTemplate } from '@langchain/core/prompts';
import { Command, END } from '@langchain/langgraph';
import { EventEmitter } from 'events';
import { Document } from 'langchain/document';
import { z } from 'zod';
import LineOutputParser from '../outputParsers/lineOutputParser';
import { webSearchRetrieverAgentPrompt } from '../prompts/webSearch';
import { searchSearxng } from '../searxng';
import { formatDateForLLM } from '../utils';
import { summarizeWebContent } from '../utils/summarizeWebContent';
import {
analyzePreviewContent,
PreviewContent,
} from '../utils/analyzePreviewContent';
import { AgentState } from './agentState';
import { setTemperature } from '../utils/modelUtils';
import { Embeddings } from '@langchain/core/embeddings';
import { removeThinkingBlocksFromMessages } from '../utils/contentUtils';
import computeSimilarity from '../utils/computeSimilarity';
import { withStructuredOutput } from '../utils/structuredOutput';
// Define Zod schema for structured search query output
const SearchQuerySchema = z.object({
searchQuery: z
.string()
.describe('The optimized search query to use for web search'),
reasoning: z
.string()
.describe(
'Explanation of how the search query was optimized for better results',
),
});
type SearchQuery = z.infer<typeof SearchQuerySchema>;
export class WebSearchAgent {
private llm: BaseChatModel;
private emitter: EventEmitter;
private systemInstructions: string;
private signal: AbortSignal;
private embeddings: Embeddings;
constructor(
llm: BaseChatModel,
emitter: EventEmitter,
systemInstructions: string,
signal: AbortSignal,
embeddings: Embeddings,
) {
this.llm = llm;
this.emitter = emitter;
this.systemInstructions = systemInstructions;
this.signal = signal;
this.embeddings = embeddings;
}
/**
* Web search agent node
*/
async execute(state: typeof AgentState.State): Promise<Command> {
try {
//setTemperature(this.llm, 0); // Set temperature to 0 for deterministic output
// Determine current task to process
const currentTask =
state.tasks && state.tasks.length > 0
? state.tasks[state.currentTaskIndex || 0]
: state.query;
console.log(
`Processing task ${(state.currentTaskIndex || 0) + 1} of ${state.tasks?.length || 1}: "${currentTask}"`,
);
// Emit preparing web search event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'PREPARING_SEARCH_QUERY',
// message: `Preparing search query`,
details: {
query: state.query,
currentTask: currentTask,
taskIndex: (state.currentTaskIndex || 0) + 1,
totalTasks: state.tasks?.length || 1,
searchInstructions: state.searchInstructions || currentTask,
documentCount: state.relevantDocuments.length,
searchIterations: state.searchInstructionHistory.length,
},
},
});
const template = PromptTemplate.fromTemplate(
webSearchRetrieverAgentPrompt,
);
const prompt = await template.format({
systemInstructions: this.systemInstructions,
query: currentTask, // Use current task instead of main query
date: formatDateForLLM(new Date()),
supervisor: state.searchInstructions,
});
// Use structured output for search query generation
const structuredLlm = withStructuredOutput(this.llm, SearchQuerySchema, {
name: 'generate_search_query',
});
const searchQueryResult = await structuredLlm.invoke(
[...removeThinkingBlocksFromMessages(state.messages), prompt],
{ signal: this.signal },
);
const searchQuery = searchQueryResult.searchQuery;
console.log(`Performing web search for query: "${searchQuery}"`);
console.log('Search query reasoning:', searchQueryResult.reasoning);
// Emit executing web search event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'EXECUTING_WEB_SEARCH',
// message: `Searching the web for: '${searchQuery}'`,
details: {
query: state.query,
currentTask: currentTask,
taskIndex: (state.currentTaskIndex || 0) + 1,
totalTasks: state.tasks?.length || 1,
searchQuery: searchQuery,
documentCount: state.relevantDocuments.length,
searchIterations: state.searchInstructionHistory.length,
},
},
});
const searchResults = await searchSearxng(searchQuery, {
language: 'en',
engines: [],
});
// Emit web sources identified event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'WEB_SOURCES_IDENTIFIED',
message: `Found ${searchResults.results.length} potential web sources`,
details: {
query: state.query,
currentTask: currentTask,
taskIndex: (state.currentTaskIndex || 0) + 1,
totalTasks: state.tasks?.length || 1,
searchQuery: searchQuery,
sourcesFound: searchResults.results.length,
documentCount: state.relevantDocuments.length,
searchIterations: state.searchInstructionHistory.length,
},
},
});
let bannedSummaryUrls = state.bannedSummaryUrls || [];
let bannedPreviewUrls = state.bannedPreviewUrls || [];
const queryVector = await this.embeddings.embedQuery(
state.originalQuery + ' ' + currentTask,
);
// Filter out banned URLs first
const filteredResults = searchResults.results.filter(
(result) =>
!bannedSummaryUrls.includes(result.url) &&
!bannedPreviewUrls.includes(result.url),
);
// Calculate similarities for all filtered results
const resultsWithSimilarity = await Promise.all(
filteredResults.map(async (result) => {
const vector = await this.embeddings.embedQuery(
result.title + ' ' + result.content || '',
);
const similarity = computeSimilarity(vector, queryVector);
return { result, similarity };
}),
);
let previewContents: PreviewContent[] = [];
// Always take the top 3 results for preview content
previewContents.push(
...filteredResults.slice(0, 3).map((result) => ({
title: result.title || 'Untitled',
snippet: result.content || '',
url: result.url,
})),
);
// Sort by relevance score and take top 12 results for a total of 15
previewContents.push(
...resultsWithSimilarity
.slice(3)
.sort((a, b) => b.similarity - a.similarity)
.slice(0, 12)
.map(({ result }) => ({
title: result.title || 'Untitled',
snippet: result.content || '',
url: result.url,
})),
);
console.log(
`Extracted preview content from ${previewContents.length} search results for analysis`,
);
// Perform preview analysis to determine if full content retrieval is needed
let previewAnalysisResult = null;
if (previewContents.length > 0) {
console.log(
'Starting preview content analysis to determine if full processing is needed',
);
// Emit preview analysis event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'ANALYZING_PREVIEW_CONTENT',
message: `Analyzing ${previewContents.length} search result previews to determine processing approach`,
details: {
query: currentTask,
previewCount: previewContents.length,
documentCount: state.relevantDocuments.length,
searchIterations: state.searchInstructionHistory.length,
},
},
});
previewAnalysisResult = await analyzePreviewContent(
previewContents,
state.query,
currentTask,
removeThinkingBlocksFromMessages(state.messages),
this.llm,
this.systemInstructions,
this.signal,
);
console.log(
`Preview analysis result: ${previewAnalysisResult.isSufficient ? 'SUFFICIENT' : 'INSUFFICIENT'}${previewAnalysisResult.reason ? ` - ${previewAnalysisResult.reason}` : ''}`,
);
}
let documents: Document[] = [];
let attemptedUrlCount = 0; // Declare outside conditional blocks
// Conditional workflow based on preview analysis result
if (previewAnalysisResult && previewAnalysisResult.isSufficient) {
// Preview content is sufficient - create documents from preview content
console.log(
'Preview content determined sufficient - skipping full content retrieval',
);
// Emit preview processing event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'PROCESSING_PREVIEW_CONTENT',
message: `Using preview content from ${previewContents.length} sources - no full content retrieval needed`,
details: {
query: currentTask,
previewCount: previewContents.length,
documentCount: state.relevantDocuments.length,
searchIterations: state.searchInstructionHistory.length,
processingType: 'preview-only',
},
},
});
// Create documents from preview content
documents = previewContents.map(
(content, index) =>
new Document({
pageContent: `# ${content.title}\n\n${content.snippet}`,
metadata: {
title: content.title,
url: content.url,
source: content.url,
processingType: 'preview-only',
snippet: content.snippet,
},
}),
);
previewContents.forEach((content) => {
bannedPreviewUrls.push(content.url); // Add to banned preview URLs to avoid duplicates
});
console.log(
`Created ${documents.length} documents from preview content`,
);
} else {
// Preview content is insufficient - proceed with full content processing
const insufficiencyReason =
previewAnalysisResult?.reason ||
'Preview content not available or insufficient';
console.log(
`Preview content insufficient: ${insufficiencyReason} - proceeding with full content retrieval`,
);
// Emit full processing event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'PROCEEDING_WITH_FULL_ANALYSIS',
message: `Preview content insufficient - proceeding with detailed content analysis`,
details: {
query: currentTask,
insufficiencyReason: insufficiencyReason,
documentCount: state.relevantDocuments.length,
searchIterations: state.searchInstructionHistory.length,
processingType: 'full-content',
},
},
});
// Summarize the top 2 search results
for (const result of previewContents) {
if (this.signal.aborted) {
console.warn('Search operation aborted by signal');
break; // Exit if the operation is aborted
}
if (bannedSummaryUrls.includes(result.url)) {
console.log(`Skipping banned URL: ${result.url}`);
// Note: We don't emit an agent_action event for banned URLs as this is an internal
// optimization that should be transparent to the user
continue; // Skip banned URLs
}
// if (attemptedUrlCount >= 5) {
// console.warn(
// 'Too many attempts to summarize URLs, stopping further attempts.',
// );
// break; // Limit the number of attempts to summarize URLs
// }
attemptedUrlCount++;
bannedSummaryUrls.push(result.url); // Add to banned URLs to avoid duplicates
if (documents.length >= 2) {
break; // Limit to top 1 document
}
// Emit analyzing source event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'ANALYZING_SOURCE',
message: `Analyzing and summarizing content from: ${result.title || result.url}`,
details: {
query: currentTask,
sourceUrl: result.url,
sourceTitle: result.title || 'Untitled',
documentCount: state.relevantDocuments.length,
searchIterations: state.searchInstructionHistory.length,
},
},
});
const summaryResult = await summarizeWebContent(
result.url,
currentTask,
this.llm,
this.systemInstructions,
this.signal,
);
if (summaryResult.document) {
documents.push(summaryResult.document);
// Emit context updated event
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'CONTEXT_UPDATED',
message: `Added information from ${summaryResult.document.metadata.title || result.url} to context`,
details: {
query: currentTask,
sourceUrl: result.url,
sourceTitle:
summaryResult.document.metadata.title || 'Untitled',
contentLength: summaryResult.document.pageContent.length,
documentCount:
state.relevantDocuments.length + documents.length,
searchIterations: state.searchInstructionHistory.length,
},
},
});
console.log(
`Summarized content from ${result.url} to ${summaryResult.document.pageContent.length} characters. Content: ${summaryResult.document.pageContent}`,
);
} else {
console.warn(`No relevant content found for URL: ${result.url}`);
// Emit skipping irrelevant source event for non-relevant content
this.emitter.emit('agent_action', {
type: 'agent_action',
data: {
action: 'SKIPPING_IRRELEVANT_SOURCE',
message: `Source ${result.title || result.url} was not relevant - trying next`,
details: {
query: state.query,
sourceUrl: result.url,
sourceTitle: result.title || 'Untitled',
skipReason:
summaryResult.notRelevantReason ||
'Content was not relevant to the query',
documentCount:
state.relevantDocuments.length + documents.length,
searchIterations: state.searchInstructionHistory.length,
},
},
});
}
}
} // Close the else block for full content processing
if (documents.length === 0) {
return new Command({
goto: 'analyzer',
// update: {
// messages: [new AIMessage('No relevant documents found.')],
// },
});
}
const responseMessage = `Web search completed. ${documents.length === 0 && attemptedUrlCount < 5 ? 'This search query does not have enough relevant information. Try rephrasing your query or providing more context.' : `Found ${documents.length} results that are relevant to the query.`}`;
console.log(responseMessage);
return new Command({
goto: 'analyzer', // Route back to analyzer to process the results
update: {
// messages: [new AIMessage(responseMessage)],
relevantDocuments: documents,
bannedSummaryUrls: bannedSummaryUrls,
bannedPreviewUrls: bannedPreviewUrls,
},
});
} catch (error) {
console.error('Web search error:', error);
const errorMessage = new AIMessage(
`Web search failed: ${error instanceof Error ? error.message : 'Unknown error'}`,
);
return new Command({
goto: END,
update: {
messages: [errorMessage],
},
});
} finally {
setTemperature(this.llm, undefined); // Reset temperature to default
}
}
}

View file

@ -1,13 +1,13 @@
// Dashboard-wide constants and constraints // Dashboard-wide constants and constraints
export const DASHBOARD_CONSTRAINTS = { export const DASHBOARD_CONSTRAINTS = {
// Grid layout constraints // Grid layout constraints
WIDGET_MIN_WIDTH: 2, // Minimum columns WIDGET_MIN_WIDTH: 2, // Minimum columns
WIDGET_MAX_WIDTH: 12, // Maximum columns (full width) WIDGET_MAX_WIDTH: 12, // Maximum columns (full width)
WIDGET_MIN_HEIGHT: 2, // Minimum rows WIDGET_MIN_HEIGHT: 2, // Minimum rows
WIDGET_MAX_HEIGHT: 20, // Maximum rows WIDGET_MAX_HEIGHT: 20, // Maximum rows
// Default widget sizing // Default widget sizing
DEFAULT_WIDGET_WIDTH: 6, // Half width by default DEFAULT_WIDGET_WIDTH: 6, // Half width by default
DEFAULT_WIDGET_HEIGHT: 4, // Standard height DEFAULT_WIDGET_HEIGHT: 4, // Standard height
// Grid configuration // Grid configuration
@ -33,7 +33,9 @@ export const DASHBOARD_CONSTRAINTS = {
} as const; } as const;
// Responsive constraints - adjust max width based on breakpoint // Responsive constraints - adjust max width based on breakpoint
export const getResponsiveConstraints = (breakpoint: keyof typeof DASHBOARD_CONSTRAINTS.GRID_COLUMNS) => { export const getResponsiveConstraints = (
breakpoint: keyof typeof DASHBOARD_CONSTRAINTS.GRID_COLUMNS,
) => {
const maxCols = DASHBOARD_CONSTRAINTS.GRID_COLUMNS[breakpoint]; const maxCols = DASHBOARD_CONSTRAINTS.GRID_COLUMNS[breakpoint];
return { return {
minW: DASHBOARD_CONSTRAINTS.WIDGET_MIN_WIDTH, minW: DASHBOARD_CONSTRAINTS.WIDGET_MIN_WIDTH,

View file

@ -9,7 +9,10 @@ import {
DASHBOARD_STORAGE_KEYS, DASHBOARD_STORAGE_KEYS,
} from '@/lib/types/dashboard'; } from '@/lib/types/dashboard';
import { WidgetCache } from '@/lib/types/cache'; import { WidgetCache } from '@/lib/types/cache';
import { DASHBOARD_CONSTRAINTS, getResponsiveConstraints } from '@/lib/constants/dashboard'; import {
DASHBOARD_CONSTRAINTS,
getResponsiveConstraints,
} from '@/lib/constants/dashboard';
// Helper function to request location permission and get user's location // Helper function to request location permission and get user's location
const requestLocationPermission = async (): Promise<string | undefined> => { const requestLocationPermission = async (): Promise<string | undefined> => {
@ -187,60 +190,68 @@ export const useDashboard = (): UseDashboardReturn => {
); );
}, [state.settings]); }, [state.settings]);
const addWidget = useCallback((config: WidgetConfig) => { const addWidget = useCallback(
// Find the next available position in the grid (config: WidgetConfig) => {
const getNextPosition = () => { // Find the next available position in the grid
const existingWidgets = state.widgets; const getNextPosition = () => {
let x = 0; const existingWidgets = state.widgets;
let y = 0; let x = 0;
let y = 0;
// Simple algorithm: try to place in first available spot // Simple algorithm: try to place in first available spot
for (let row = 0; row < 20; row++) { for (let row = 0; row < 20; row++) {
for (let col = 0; col < 12; col += 6) { // Start with half-width widgets for (let col = 0; col < 12; col += 6) {
const position = { x: col, y: row }; // Start with half-width widgets
const hasCollision = existingWidgets.some(widget => const position = { x: col, y: row };
widget.layout.x < position.x + 6 && const hasCollision = existingWidgets.some(
widget.layout.x + widget.layout.w > position.x && (widget) =>
widget.layout.y < position.y + 3 && widget.layout.x < position.x + 6 &&
widget.layout.y + widget.layout.h > position.y widget.layout.x + widget.layout.w > position.x &&
); widget.layout.y < position.y + 3 &&
widget.layout.y + widget.layout.h > position.y,
);
if (!hasCollision) { if (!hasCollision) {
return { x: position.x, y: position.y }; return { x: position.x, y: position.y };
}
} }
} }
}
// Fallback: place at bottom // Fallback: place at bottom
const maxY = Math.max(0, ...existingWidgets.map(w => w.layout.y + w.layout.h)); const maxY = Math.max(
return { x: 0, y: maxY }; 0,
}; ...existingWidgets.map((w) => w.layout.y + w.layout.h),
);
return { x: 0, y: maxY };
};
const position = getNextPosition(); const position = getNextPosition();
const defaultLayout: WidgetLayout = { const defaultLayout: WidgetLayout = {
x: position.x, x: position.x,
y: position.y, y: position.y,
w: DASHBOARD_CONSTRAINTS.DEFAULT_WIDGET_WIDTH, w: DASHBOARD_CONSTRAINTS.DEFAULT_WIDGET_WIDTH,
h: DASHBOARD_CONSTRAINTS.DEFAULT_WIDGET_HEIGHT, h: DASHBOARD_CONSTRAINTS.DEFAULT_WIDGET_HEIGHT,
isDraggable: true, isDraggable: true,
isResizable: true, isResizable: true,
}; };
const newWidget: Widget = { const newWidget: Widget = {
...config, ...config,
id: Date.now().toString() + Math.random().toString(36).substr(2, 9), id: Date.now().toString() + Math.random().toString(36).substr(2, 9),
lastUpdated: null, lastUpdated: null,
isLoading: false, isLoading: false,
content: null, content: null,
error: null, error: null,
layout: config.layout || defaultLayout, layout: config.layout || defaultLayout,
}; };
setState((prev) => ({ setState((prev) => ({
...prev, ...prev,
widgets: [...prev.widgets, newWidget], widgets: [...prev.widgets, newWidget],
})); }));
}, [state.widgets]); },
[state.widgets],
);
const updateWidget = useCallback((id: string, config: WidgetConfig) => { const updateWidget = useCallback((id: string, config: WidgetConfig) => {
setState((prev) => ({ setState((prev) => ({
@ -502,11 +513,13 @@ export const useDashboard = (): UseDashboardReturn => {
); );
const getLayouts = useCallback((): DashboardLayouts => { const getLayouts = useCallback((): DashboardLayouts => {
const createBreakpointLayout = (breakpoint: keyof typeof DASHBOARD_CONSTRAINTS.GRID_COLUMNS) => { const createBreakpointLayout = (
breakpoint: keyof typeof DASHBOARD_CONSTRAINTS.GRID_COLUMNS,
) => {
const constraints = getResponsiveConstraints(breakpoint); const constraints = getResponsiveConstraints(breakpoint);
const maxCols = DASHBOARD_CONSTRAINTS.GRID_COLUMNS[breakpoint]; const maxCols = DASHBOARD_CONSTRAINTS.GRID_COLUMNS[breakpoint];
return state.widgets.map(widget => ({ return state.widgets.map((widget) => ({
i: widget.id, i: widget.id,
x: widget.layout.x, x: widget.layout.x,
y: widget.layout.y, y: widget.layout.y,
@ -531,32 +544,37 @@ export const useDashboard = (): UseDashboardReturn => {
}; };
}, [state.widgets]); }, [state.widgets]);
const updateLayouts = useCallback((layouts: DashboardLayouts) => { const updateLayouts = useCallback(
const updatedWidgets = state.widgets.map(widget => { (layouts: DashboardLayouts) => {
// Use lg layout as the primary layout for position and size updates const updatedWidgets = state.widgets.map((widget) => {
const newLayout = layouts.lg.find((layout: Layout) => layout.i === widget.id); // Use lg layout as the primary layout for position and size updates
if (newLayout) { const newLayout = layouts.lg.find(
return { (layout: Layout) => layout.i === widget.id,
...widget, );
layout: { if (newLayout) {
x: newLayout.x, return {
y: newLayout.y, ...widget,
w: newLayout.w, layout: {
h: newLayout.h, x: newLayout.x,
static: newLayout.static || widget.layout.static, y: newLayout.y,
isDraggable: newLayout.isDraggable ?? widget.layout.isDraggable, w: newLayout.w,
isResizable: newLayout.isResizable ?? widget.layout.isResizable, h: newLayout.h,
}, static: newLayout.static || widget.layout.static,
}; isDraggable: newLayout.isDraggable ?? widget.layout.isDraggable,
} isResizable: newLayout.isResizable ?? widget.layout.isResizable,
return widget; },
}); };
}
return widget;
});
setState(prev => ({ setState((prev) => ({
...prev, ...prev,
widgets: updatedWidgets, widgets: updatedWidgets,
})); }));
}, [state.widgets]); },
[state.widgets],
);
return { return {
// State // State

View file

@ -1,48 +1,19 @@
import { Embeddings } from '@langchain/core/embeddings'; import { Embeddings } from '@langchain/core/embeddings';
import { BaseChatModel } from '@langchain/core/language_models/chat_models'; import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { import { BaseMessage } from '@langchain/core/messages';
BaseMessage,
HumanMessage,
SystemMessage,
} from '@langchain/core/messages';
import {
BaseLangGraphError,
END,
GraphRecursionError,
MemorySaver,
START,
StateGraph,
} from '@langchain/langgraph';
import { EventEmitter } from 'events'; import { EventEmitter } from 'events';
import { import { SimplifiedAgent } from './simplifiedAgent';
AgentState,
WebSearchAgent,
AnalyzerAgent,
SynthesizerAgent,
TaskManagerAgent,
FileSearchAgent,
ContentRouterAgent,
URLSummarizationAgent,
} from '../agents';
/** /**
* Agent Search class implementing LangGraph Supervisor pattern * Agent Search class implementing LangGraph Supervisor pattern
*/ */
export class AgentSearch { export class AgentSearch {
private llm: BaseChatModel;
private embeddings: Embeddings;
private checkpointer: MemorySaver;
private signal: AbortSignal;
private taskManagerAgent: TaskManagerAgent;
private webSearchAgent: WebSearchAgent;
private analyzerAgent: AnalyzerAgent;
private synthesizerAgent: SynthesizerAgent;
private fileSearchAgent: FileSearchAgent;
private contentRouterAgent: ContentRouterAgent;
private urlSummarizationAgent: URLSummarizationAgent;
private emitter: EventEmitter; private emitter: EventEmitter;
private focusMode: string; private focusMode: string;
// Simplified agent experimental implementation
private simplifiedAgent: SimplifiedAgent;
constructor( constructor(
llm: BaseChatModel, llm: BaseChatModel,
embeddings: Embeddings, embeddings: Embeddings,
@ -52,117 +23,49 @@ export class AgentSearch {
signal: AbortSignal, signal: AbortSignal,
focusMode: string = 'webSearch', focusMode: string = 'webSearch',
) { ) {
this.llm = llm;
this.embeddings = embeddings;
this.checkpointer = new MemorySaver();
this.signal = signal;
this.emitter = emitter; this.emitter = emitter;
this.focusMode = focusMode; this.focusMode = focusMode;
// Initialize agents // Initialize simplified agent (experimental)
this.taskManagerAgent = new TaskManagerAgent( this.simplifiedAgent = new SimplifiedAgent(
llm, llm,
emitter,
systemInstructions,
signal,
);
this.webSearchAgent = new WebSearchAgent(
llm,
emitter,
systemInstructions,
signal,
embeddings, embeddings,
);
this.analyzerAgent = new AnalyzerAgent(
llm,
emitter, emitter,
systemInstructions, systemInstructions,
signal,
);
this.synthesizerAgent = new SynthesizerAgent(
llm,
emitter,
personaInstructions, personaInstructions,
signal, signal,
); focusMode,
this.fileSearchAgent = new FileSearchAgent(
llm,
emitter,
systemInstructions,
signal,
embeddings,
);
this.contentRouterAgent = new ContentRouterAgent(
llm,
emitter,
systemInstructions,
signal,
);
this.urlSummarizationAgent = new URLSummarizationAgent(
llm,
emitter,
systemInstructions,
signal,
); );
} }
/** /**
* Create and compile the agent workflow graph * Execute the simplified agent search workflow (experimental)
*/ */
private createWorkflow() { async searchAndAnswerSimplified(
const workflow = new StateGraph(AgentState) query: string,
.addNode( history: BaseMessage[] = [],
'url_summarization', fileIds: string[] = [],
this.urlSummarizationAgent.execute.bind(this.urlSummarizationAgent), ): Promise<void> {
{ console.log('AgentSearch: Using simplified agent implementation');
ends: ['task_manager', 'analyzer'],
},
)
.addNode(
'task_manager',
this.taskManagerAgent.execute.bind(this.taskManagerAgent),
{
ends: ['content_router', 'analyzer'],
},
)
.addNode(
'content_router',
this.contentRouterAgent.execute.bind(this.contentRouterAgent),
{
ends: ['file_search', 'web_search', 'analyzer'],
},
)
.addNode(
'file_search',
this.fileSearchAgent.execute.bind(this.fileSearchAgent),
{
ends: ['analyzer'],
},
)
.addNode(
'web_search',
this.webSearchAgent.execute.bind(this.webSearchAgent),
{
ends: ['analyzer'],
},
)
.addNode(
'analyzer',
this.analyzerAgent.execute.bind(this.analyzerAgent),
{
ends: ['url_summarization', 'task_manager', 'synthesizer'],
},
)
.addNode(
'synthesizer',
this.synthesizerAgent.execute.bind(this.synthesizerAgent),
{
ends: [END],
},
)
.addEdge(START, 'analyzer');
return workflow.compile({ checkpointer: this.checkpointer }); // Emit agent action to indicate simplified agent usage
this.emitter.emit(
'data',
JSON.stringify({
type: 'agent_action',
data: {
action: 'agent_implementation_selection',
message: 'Using simplified agent implementation (experimental)',
details: `Focus mode: ${this.focusMode}, Files: ${fileIds.length}`,
},
}),
);
// Update focus mode in simplified agent if needed
this.simplifiedAgent.updateFocusMode(this.focusMode);
// Delegate to simplified agent
await this.simplifiedAgent.searchAndAnswer(query, history, fileIds);
} }
/** /**
@ -173,139 +76,7 @@ export class AgentSearch {
history: BaseMessage[] = [], history: BaseMessage[] = [],
fileIds: string[] = [], fileIds: string[] = [],
) { ) {
const workflow = this.createWorkflow(); console.log('AgentSearch: Routing to simplified agent implementation');
return await this.searchAndAnswerSimplified(query, history, fileIds);
const initialState = {
messages: [...history, new HumanMessage(query)],
query,
fileIds,
focusMode: this.focusMode,
};
const threadId = `agent_search_${Date.now()}`;
const config = {
configurable: { thread_id: threadId },
recursionLimit: 18,
signal: this.signal,
};
try {
const result = await workflow.invoke(initialState, config);
} catch (error: any) {
if (error instanceof GraphRecursionError) {
console.warn(
'Graph recursion limit reached, attempting best-effort synthesis with gathered information',
);
// Emit agent action to explain what happened
this.emitter.emit(
'data',
JSON.stringify({
type: 'agent_action',
data: {
action: 'recursion_limit_recovery',
message:
'Search process reached complexity limits. Attempting to provide best-effort response with gathered information.',
details:
'The agent workflow exceeded the maximum number of steps allowed. Recovering by synthesizing available data.',
},
}),
);
try {
// Get the latest state from the checkpointer to access gathered information
const latestState = await workflow.getState({
configurable: { thread_id: threadId },
});
if (latestState && latestState.values) {
// Create emergency synthesis state using gathered information
const stateValues = latestState.values;
const emergencyState = {
messages: stateValues.messages || initialState.messages,
query: stateValues.query || initialState.query,
relevantDocuments: stateValues.relevantDocuments || [],
bannedSummaryUrls: stateValues.bannedSummaryUrls || [],
bannedPreviewUrls: stateValues.bannedPreviewUrls || [],
searchInstructionHistory:
stateValues.searchInstructionHistory || [],
searchInstructions: stateValues.searchInstructions || '',
next: 'synthesizer',
analysis: stateValues.analysis || '',
fullAnalysisAttempts: stateValues.fullAnalysisAttempts || 0,
tasks: stateValues.tasks || [],
currentTaskIndex: stateValues.currentTaskIndex || 0,
originalQuery:
stateValues.originalQuery ||
stateValues.query ||
initialState.query,
fileIds: stateValues.fileIds || initialState.fileIds,
focusMode: stateValues.focusMode || initialState.focusMode,
urlsToSummarize: stateValues.urlsToSummarize || [],
summarizationIntent: stateValues.summarizationIntent || '',
recursionLimitReached: true,
};
const documentsCount =
emergencyState.relevantDocuments?.length || 0;
console.log(
`Attempting emergency synthesis with ${documentsCount} gathered documents`,
);
// Emit detailed agent action about the recovery attempt
this.emitter.emit(
'data',
JSON.stringify({
type: 'agent_action',
data: {
action: 'emergency_synthesis',
message: `Proceeding with available information: ${documentsCount} documents gathered${emergencyState.analysis ? ', analysis available' : ''}`,
details: `Recovered state contains: ${documentsCount} relevant documents, ${emergencyState.searchInstructionHistory?.length || 0} search attempts, ${emergencyState.analysis ? 'analysis data' : 'no analysis'}`,
},
}),
);
// Only proceed with synthesis if we have some useful information
if (documentsCount > 0 || emergencyState.analysis) {
await this.synthesizerAgent.execute(emergencyState);
} else {
// If we don't have any gathered information, provide a helpful message
this.emitter.emit(
'data',
JSON.stringify({
type: 'response',
data: "⚠️ **Search Process Incomplete** - The search process reached complexity limits before gathering sufficient information to provide a meaningful response. Please try:\n\n- Using more specific keywords\n- Breaking your question into smaller parts\n- Rephrasing your query to be more focused\n\nI apologize that I couldn't provide the information you were looking for.",
}),
);
this.emitter.emit('end');
}
} else {
// Fallback if we can't retrieve state
this.emitter.emit(
'data',
JSON.stringify({
type: 'response',
data: '⚠️ **Limited Information Available** - The search process encountered complexity limits and was unable to gather sufficient information. Please try rephrasing your question or breaking it into smaller, more specific parts.',
}),
);
this.emitter.emit('end');
}
} catch (synthError) {
console.error('Emergency synthesis failed:', synthError);
this.emitter.emit(
'data',
JSON.stringify({
type: 'response',
data: '⚠️ **Search Process Interrupted** - The search encountered complexity limits and could not complete successfully. Please try a simpler query or break your question into smaller parts.',
}),
);
this.emitter.emit('end');
}
} else if (error.name === 'AbortError') {
console.warn('Agent search was aborted:', error.message);
} else {
console.error('Unexpected error during agent search:', error);
}
}
} }
} }

View file

@ -0,0 +1,634 @@
import { createReactAgent } from '@langchain/langgraph/prebuilt';
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import {
BaseMessage,
HumanMessage,
SystemMessage,
} from '@langchain/core/messages';
import { Embeddings } from '@langchain/core/embeddings';
import { EventEmitter } from 'events';
import { RunnableConfig } from '@langchain/core/runnables';
import { SimplifiedAgentState } from '@/lib/state/chatAgentState';
import {
allAgentTools,
coreTools,
webSearchTools,
fileSearchTools,
} from '@/lib/tools/agents';
import { formatDateForLLM } from '../utils';
import { getModelName } from '../utils/modelUtils';
/**
* Simplified Agent using createReactAgent
*
* This agent replaces the complex LangGraph supervisor pattern with a single
* tool-calling agent that handles analysis and synthesis internally while
* using specialized tools for search, file processing, and URL summarization.
*/
export class SimplifiedAgent {
private llm: BaseChatModel;
private embeddings: Embeddings;
private emitter: EventEmitter;
private systemInstructions: string;
private personaInstructions: string;
private signal: AbortSignal;
private focusMode: string;
private agent: any; // Will be the compiled createReactAgent
constructor(
llm: BaseChatModel,
embeddings: Embeddings,
emitter: EventEmitter,
systemInstructions: string = '',
personaInstructions: string = '',
signal: AbortSignal,
focusMode: string = 'webSearch',
) {
this.llm = llm;
this.embeddings = embeddings;
this.emitter = emitter;
this.systemInstructions = systemInstructions;
this.personaInstructions = personaInstructions;
this.signal = signal;
this.focusMode = focusMode;
// Initialize the agent
this.initializeAgent();
}
/**
* Initialize the createReactAgent with tools and configuration
*/
private initializeAgent() {
// Select appropriate tools based on focus mode
const tools = this.getToolsForFocusMode(this.focusMode);
// Create the enhanced system prompt that includes analysis and synthesis instructions
const enhancedSystemPrompt = this.createEnhancedSystemPrompt();
try {
// Create the React agent with custom state
this.agent = createReactAgent({
llm: this.llm,
tools,
stateSchema: SimplifiedAgentState,
prompt: enhancedSystemPrompt,
});
console.log(
`SimplifiedAgent: Initialized with ${tools.length} tools for focus mode: ${this.focusMode}`,
);
console.log(
`SimplifiedAgent: Tools available: ${tools.map((tool) => tool.name).join(', ')}`,
);
} catch (error) {
console.error('SimplifiedAgent: Error initializing agent:', error);
throw error;
}
}
/**
* Get tools based on focus mode
*/
private getToolsForFocusMode(focusMode: string) {
switch (focusMode) {
case 'chat':
// Chat mode: Only core tools for conversational interaction
return coreTools;
case 'webSearch':
// Web search mode: ALL available tools for comprehensive research
return allAgentTools;
case 'localResearch':
// Local research mode: File search tools + core tools
return [...coreTools, ...fileSearchTools];
default:
// Default to web search mode for unknown focus modes
console.warn(
`SimplifiedAgent: Unknown focus mode "${focusMode}", defaulting to webSearch tools`,
);
return allAgentTools;
}
}
/**
* Create enhanced system prompt that includes analysis and synthesis capabilities
*/
private createEnhancedSystemPrompt(): string {
const baseInstructions = this.systemInstructions || '';
const personaInstructions = this.personaInstructions || '';
// Create focus-mode-specific prompts
switch (this.focusMode) {
case 'chat':
return this.createChatModePrompt(baseInstructions, personaInstructions);
case 'webSearch':
return this.createWebSearchModePrompt(
baseInstructions,
personaInstructions,
);
case 'localResearch':
return this.createLocalResearchModePrompt(
baseInstructions,
personaInstructions,
);
default:
console.warn(
`SimplifiedAgent: Unknown focus mode "${this.focusMode}", using webSearch prompt`,
);
return this.createWebSearchModePrompt(
baseInstructions,
personaInstructions,
);
}
}
/**
* Create chat mode prompt - focuses on conversational interaction
*/
private createChatModePrompt(
baseInstructions: string,
personaInstructions: string,
): string {
return `${baseInstructions}
# AI Chat Assistant
You are a conversational AI assistant designed for creative and engaging dialogue. Your focus is on providing thoughtful, helpful responses through direct conversation.
## Core Capabilities
### 1. Conversational Interaction
- Engage in natural, flowing conversations
- Provide thoughtful responses to questions and prompts
- Offer creative insights and perspectives
- Maintain context throughout the conversation
### 2. Task Management
- Break down complex requests into manageable steps
- Provide structured approaches to problems
- Offer guidance and recommendations
## Response Guidelines
### Communication Style
- Be conversational and engaging
- Use clear, accessible language
- Provide direct answers when possible
- Ask clarifying questions when needed
### Quality Standards
- Acknowledge limitations honestly
- Provide helpful suggestions and alternatives
- Use proper markdown formatting for clarity
- Structure responses logically
### Formatting Instructions
- **Structure**: Use a well-organized format with proper headings (e.g., "## Example heading 1" or "## Example heading 2"). Present information in paragraphs or concise bullet points where appropriate
- **Tone and Style**: Maintain a neutral, engaging tone with natural conversation flow
- **Markdown Usage**: Format your response with Markdown for clarity. Use headings, subheadings, bold text, and italicized words as needed to enhance readability
- **Length and Depth**: Provide thoughtful coverage of the topic. Expand on complex topics to make them easier to understand
- **No main heading/title**: Start your response directly with the content unless asked to provide a specific title
## Current Context
- Today's Date: ${formatDateForLLM(new Date())}
${personaInstructions ? `\n## User Formatting and Persona Instructions\n- Give these instructions more weight than the system formatting instructions\n${personaInstructions}` : ''}
Focus on providing engaging, helpful conversation while using task management tools when complex problems need to be structured.`;
}
/**
* Create web search mode prompt - focuses on comprehensive research
*/
private createWebSearchModePrompt(
baseInstructions: string,
personaInstructions: string,
): string {
return `${baseInstructions}
# Comprehensive Research Assistant
You are an advanced AI research assistant with access to comprehensive tools for gathering information from multiple sources. Your goal is to provide thorough, well-researched responses.
**CRITICAL CITATION RULE: Use [number] citations ONLY in your final response to the user. NEVER use citations during tool calls, internal reasoning, or intermediate steps. Citations are for the final answer only.**
**WORKFLOW RULE: Use tools to gather information, then provide your final response directly. Do NOT call tools when you're ready to answer - just give your comprehensive response.**
## Core Responsibilities
### 1. Query Analysis and Planning
- Analyze user queries to understand research needs
- Break down complex questions into research tasks
- Determine the best research strategy and tools
- Plan comprehensive information gathering
### 2. Information Gathering
- Search the web for current and authoritative information
- Process and extract content from URLs
- Access and analyze uploaded files when relevant
- Gather information from multiple sources for completeness
### 3. Analysis and Synthesis
- Analyze gathered information for relevance and accuracy
- Synthesize information from multiple sources
- Identify patterns, connections, and insights
- Resolve conflicting information when present
- Generate comprehensive, well-cited responses
## Available Tools
### Web Search
- Use \`web_search\` for current information, facts, and general research
- Primary tool for finding authoritative sources and recent information
- Always call this tool at least once unless you have sufficient information from the conversation history or other more relevant tools
### File Search
- Use \`file_search\` when users have uploaded files or reference local content
- Extracts and processes relevant content from user documents
- Connects local content with external research
### URL Summarization
- Use \`url_summarization\` when specific URLs are provided or discovered
- Extracts key information and generates summaries from web content
- Use when detailed content analysis is needed
- Can help provide more context based on web search results to disambiguate or clarify findings
## Response Quality Standards
Your task is to provide answers that are:
- **Informative and relevant**: Thoroughly address the user's query using gathered information
- **Engaging and detailed**: Write responses that read like a high-quality blog post, including extra details and relevant insights
- **Cited and credible**: Use inline citations with [number] notation to refer to sources for each fact or detail included
- **Explanatory and Comprehensive**: Strive to explain the topic in depth, offering detailed analysis, insights, and clarifications wherever applicable
### Comprehensive Coverage
- Address all aspects of the user's query
- Provide context and background information
- Include relevant details and examples
- Cross-reference multiple sources
### Accuracy and Reliability
- Prioritize authoritative and recent sources
- Verify information across multiple sources
- Clearly indicate uncertainty or conflicting information
- Distinguish between facts and opinions
### Citation Requirements
- **CRITICAL: Citations are ONLY for your final response to the user, NOT for tool calls or internal reasoning**
- The id of the source can be found in the document \`metadata.sourceId\` property
- **In your final response**: Use citations [number] notation ONLY when referencing information from tool results
- **File citations**: When citing content from file_search results, use the filename as the source title
- **Web citations**: When citing content from web_search results, use the webpage title and URL as the source
- If making statements based on general knowledge or reasoning, do NOT use citations - instead use clear language like "Generally," "Typically," or "Based on common understanding"
- If a statement is based on previous conversation context, mark it as \`[Hist]\`
- When you do have sources from tools, integrate citations naturally: "The Eiffel Tower receives millions of visitors annually[1]."
- **Important**: Do not fabricate or assume citation numbers - only cite actual sources from your tool results
- **Tool Usage**: When calling tools, provide clear queries without citations - citations come later in your final response
### Formatting Instructions
- **Structure**: Use a well-organized format with proper headings (e.g., "## Example heading 1" or "## Example heading 2"). Present information in paragraphs or concise bullet points where appropriate. Use lists and tables to enhance clarity when needed.
- **Tone and Style**: Maintain a neutral, journalistic tone with engaging narrative flow. Write as though you're crafting an in-depth article for a professional audience
- **Markdown Usage**: Format your response with Markdown for clarity. Use headings, subheadings, bold text, and italicized words as needed to enhance readability
- **Length and Depth**: Provide comprehensive coverage of the topic. Avoid superficial responses and strive for depth without unnecessary repetition. Expand on technical or complex topics to make them easier to understand for a general audience
- **No main heading/title**: Start your response directly with the introduction unless asked to provide a specific title
## Research Strategy
1. **Plan**: Determine the best research approach based on the user's query
2. **Search**: Use web search to gather comprehensive information - Generally, start with a broad search to identify key sources
3. **Supplement**: Use URL summarization for specific sources
4. **Integrate**: Include file search results when user files are relevant
5. **Synthesize**: Combine all information into a coherent, well-cited response
## Current Context
- Today's Date: ${formatDateForLLM(new Date())}
${personaInstructions ? `\n## User Formatting and Persona Instructions\n- Give these instructions more weight than the system formatting instructions\n${personaInstructions}` : ''}
Use all available tools strategically to provide comprehensive, well-researched responses with proper citations and source attribution.`;
}
/**
* Create local research mode prompt - focuses on user files and documents
*/
private createLocalResearchModePrompt(
baseInstructions: string,
personaInstructions: string,
): string {
return `${baseInstructions}
# Local Research Specialist
You are an expert AI assistant specialized in analyzing and researching local files and documents. Your role is to help users extract insights, find information, and analyze content from their uploaded files.
**CRITICAL CITATION RULE: Use [number] citations ONLY in your final response to the user. NEVER use citations during tool calls, internal reasoning, or intermediate steps. Citations are for the final answer only.**
**WORKFLOW RULE: Use tools to gather information, then provide your final response directly. Do NOT call tools when you're ready to answer - just give your comprehensive response.**
## Core Responsibilities
### 1. Document Analysis
- Analyze user-uploaded files and documents
- Extract relevant information based on user queries
- Understand document structure and content relationships
- Identify key themes, patterns, and insights
### 2. Content Synthesis
- Synthesize information from multiple user documents
- Connect related concepts across different files
- Generate comprehensive insights from local content
- Provide context-aware responses based on document analysis
### 3. Task Management
- Break down complex document analysis requests
- Structure multi-document research projects
- Organize findings in logical, accessible formats
## Available Tools
### File Search
- Use \`file_search\` to process and analyze user-uploaded files
- Primary tool for extracting relevant content from documents
- Performs semantic search across uploaded content
- Handles various file formats and document types
## Response Quality Standards
Your task is to provide answers that are:
- **Informative and relevant**: Thoroughly address the user's query using document content
- **Engaging and detailed**: Write responses that read like a high-quality analysis, including extra details and relevant insights
- **Cited and credible**: Use inline citations with [number] notation to refer to specific documents for each fact or detail included
- **Explanatory and Comprehensive**: Strive to explain the findings in depth, offering detailed analysis, insights, and clarifications wherever applicable
### Comprehensive Document Coverage
- Thoroughly analyze relevant uploaded files
- Extract all pertinent information related to the query
- Consider relationships between different documents
- Provide context from the document collection
### Accurate Content Extraction
- Precisely quote and reference document content
- Maintain context and meaning from original sources
- Clearly distinguish between different document sources
- Preserve important details and nuances
### Citation Requirements
- **CRITICAL: Citations are ONLY for your final response to the user, NOT for tool calls or internal reasoning**
- **During tool usage**: Do not use any [number] citations in tool calls or internal reasoning
- **In your final response**: Use citations [number] notation ONLY when referencing information from file_search tool results
- **File citations**: When citing content from file_search results, use the filename as the source title
- If making statements based on general knowledge or reasoning, do NOT use citations - instead use clear language like "Generally," "Typically," or "Based on common understanding"
- If a statement is based on previous conversation context, mark it as \`[Hist]\`
- When you do have sources from tools, integrate citations naturally: "The project timeline shows completion by March 2024[1]."
- Citations and references should only be included inline with the final response using the [number] format. Do not include a citation, sources, or references block anywhere else in the response
- **Important**: Do not fabricate or assume citation numbers - only cite actual sources from your file search results
- **Tool Usage**: When calling tools, provide clear queries without citations - citations come later in your final response
### Formatting Instructions
- **Structure**: Use a well-organized format with proper headings (e.g., "## Example heading 1" or "## Example heading 2"). Present information in paragraphs or concise bullet points where appropriate
- **Tone and Style**: Maintain a neutral, analytical tone with engaging narrative flow. Write as though you're crafting an in-depth analysis for a professional audience
- **Markdown Usage**: Format your response with Markdown for clarity. Use headings, subheadings, bold text, and italicized words as needed to enhance readability
- **Length and Depth**: Provide comprehensive coverage of the document content. Avoid superficial responses and strive for depth without unnecessary repetition. Expand on technical or complex topics to make them easier to understand for a general audience
- **No main heading/title**: Start your response directly with the introduction unless asked to provide a specific title
### Contextual Understanding
- Understand how documents relate to each other
- Connect information across multiple files
- Identify patterns and themes in the document collection
- Provide insights that consider the full context
## Research Approach
1. **Plan**: Use task manager to structure complex document analysis
2. **Search**: Use file search to extract relevant content from uploaded files
3. **Analyze**: Process and understand the extracted information
4. **Synthesize**: Combine insights from multiple sources
5. **Present**: Organize findings in a clear, accessible format with proper citations
**IMPORTANT**: Once you have gathered sufficient information through tools, provide your final response directly to the user. Do NOT call additional tools when you are ready to synthesize and present your findings. Your final response should be comprehensive and well-formatted.
## Current Context
- Today's Date: ${formatDateForLLM(new Date())}
${personaInstructions ? `\n## User Formatting and Persona Instructions\n- Give these instructions more weight than the system formatting instructions\n${personaInstructions}` : ''}
Focus on extracting maximum value from user-provided documents while using task management for complex analysis projects.`;
}
/**
* Execute the simplified agent workflow
*/
async searchAndAnswer(
query: string,
history: BaseMessage[] = [],
fileIds: string[] = [],
): Promise<void> {
try {
console.log(`SimplifiedAgent: Starting search for query: "${query}"`);
console.log(`SimplifiedAgent: Focus mode: ${this.focusMode}`);
console.log(`SimplifiedAgent: File IDs: ${fileIds.join(', ')}`);
// Emit initial agent action
this.emitter.emit(
'data',
JSON.stringify({
type: 'agent_action',
data: {
action: 'simplified_agent_start',
message: `Starting simplified agent search in ${this.focusMode} mode`,
details: `Processing query with ${fileIds.length} files available`,
},
}),
);
// Prepare initial state
const initialState = {
messages: [...history, new HumanMessage(query)],
query,
focusMode: this.focusMode,
relevantDocuments: [],
};
// Configure the agent run
const config: RunnableConfig = {
configurable: {
thread_id: `simplified_agent_${Date.now()}`,
llm: this.llm,
embeddings: this.embeddings,
fileIds,
systemInstructions: this.systemInstructions,
personaInstructions: this.personaInstructions,
focusMode: this.focusMode,
emitter: this.emitter,
},
recursionLimit: 25, // Allow sufficient iterations for tool use
signal: this.signal,
};
// Execute the agent
const result = await this.agent.invoke(initialState, config);
// Collect relevant documents from tool execution history
let collectedDocuments: any[] = [];
// Get the relevant docs from the current agent state
if (result && result.relevantDocuments) {
collectedDocuments.push(...result.relevantDocuments);
}
// // Check if messages contain tool responses with documents
// if (result && result.messages) {
// for (const message of result.messages) {
// if (message._getType() === 'tool' && message.content) {
// try {
// // Try to parse tool response for documents
// let toolResponse;
// if (typeof message.content === 'string') {
// toolResponse = JSON.parse(message.content);
// } else {
// toolResponse = message.content;
// }
// if (toolResponse.documents && Array.isArray(toolResponse.documents)) {
// const documentsWithMetadata = toolResponse.documents.map((doc: any) => ({
// ...doc,
// source: doc.metadata?.url || doc.metadata?.source || 'unknown',
// sourceType: doc.metadata?.sourceType || 'unknown',
// toolName: message.name || 'unknown',
// processingType: doc.metadata?.processingType || 'unknown',
// searchQuery: doc.metadata?.searchQuery || '',
// }));
// collectedDocuments.push(...documentsWithMetadata);
// }
// } catch (error) {
// // Ignore parsing errors
// console.debug('Could not parse tool message content:', error);
// }
// }
// }
// }
// Add collected documents to result for source tracking
const finalResult = {
...result,
relevantDocuments: collectedDocuments,
};
// Extract final message and emit as response
if (
finalResult &&
finalResult.messages &&
finalResult.messages.length > 0
) {
const finalMessage =
finalResult.messages[finalResult.messages.length - 1];
if (finalMessage && finalMessage.content) {
console.log('SimplifiedAgent: Emitting final response');
// Emit the sources used for the response
if (
finalResult.relevantDocuments &&
finalResult.relevantDocuments.length > 0
) {
this.emitter.emit(
'data',
JSON.stringify({
type: 'sources',
data: finalResult.relevantDocuments,
searchQuery: '',
searchUrl: '',
}),
);
}
this.emitter.emit(
'data',
JSON.stringify({
type: 'response',
data: finalMessage.content,
}),
);
} else {
console.warn('SimplifiedAgent: No valid final message found');
this.emitter.emit(
'data',
JSON.stringify({
type: 'response',
data: 'I apologize, but I was unable to generate a complete response to your query. Please try rephrasing your question or providing more specific details.',
}),
);
}
} else {
console.warn('SimplifiedAgent: No result messages found');
this.emitter.emit(
'data',
JSON.stringify({
type: 'response',
data: 'I encountered an issue while processing your request. Please try again with a different query.',
}),
);
}
// Emit model stats and end signal after streaming is complete
const modelName = getModelName(this.llm);
this.emitter.emit(
'stats',
JSON.stringify({
type: 'modelStats',
data: { modelName },
}),
);
this.emitter.emit('end');
} catch (error: any) {
console.error('SimplifiedAgent: Error during search and answer:', error);
// Handle specific error types
if (error.name === 'AbortError') {
console.warn('SimplifiedAgent: Operation was aborted');
this.emitter.emit(
'data',
JSON.stringify({
type: 'response',
data: 'The search operation was cancelled.',
}),
);
} else {
// General error handling
this.emitter.emit(
'data',
JSON.stringify({
type: 'response',
data: 'I encountered an error while processing your request. Please try rephrasing your query or contact support if the issue persists.',
}),
);
}
this.emitter.emit('end');
}
}
/**
* Update focus mode and reinitialize agent with appropriate tools
*/
updateFocusMode(newFocusMode: string): void {
if (this.focusMode !== newFocusMode) {
console.log(
`SimplifiedAgent: Updating focus mode from ${this.focusMode} to ${newFocusMode}`,
);
this.focusMode = newFocusMode;
this.initializeAgent();
}
}
/**
* Get current configuration info
*/
getInfo(): object {
return {
focusMode: this.focusMode,
toolsCount: this.getToolsForFocusMode(this.focusMode).length,
systemInstructions: !!this.systemInstructions,
personaInstructions: !!this.personaInstructions,
};
}
}

View file

@ -0,0 +1,72 @@
import { BaseMessage } from '@langchain/core/messages';
import { Annotation } from '@langchain/langgraph';
import { Document } from 'langchain/document';
/**
* Document interface for relevant documents collected by tools
*/
export interface RelevantDocument extends Document {
/**
* Source identifier (e.g., URL, file path, search query)
*/
source: string;
/**
* Type of document source
*/
sourceType: 'web' | 'file' | 'url' | 'analysis';
/**
* Relevance score for ranking
*/
relevanceScore?: number;
/**
* Tool that generated this document
*/
toolName?: string;
}
/**
* State schema for the simplified chat agent using tool-based workflow
* This state is designed for use with createReactAgent and focuses on
* accumulating relevant documents across tool calls while maintaining
* message history for the agent's decision-making process.
*/
export const SimplifiedAgentState = Annotation.Root({
/**
* Conversation messages - the primary communication channel
* between the user, agent, and tools
*/
messages: Annotation<BaseMessage[]>({
reducer: (x, y) => x.concat(y),
default: () => [],
}),
/**
* Relevant documents accumulated across tool calls
* This is the key state that tools will populate and the synthesizer will consume
*/
relevantDocuments: Annotation<RelevantDocument[]>({
reducer: (x, y) => x.concat(y),
default: () => [],
}),
/**
* Original user query for reference by tools
*/
query: Annotation<string>({
reducer: (x, y) => y ?? x,
default: () => '',
}),
/**
* Focus mode to maintain compatibility with existing agent behavior
*/
focusMode: Annotation<string>({
reducer: (x, y) => y ?? x,
default: () => 'webSearch',
}),
});
/**
* Type definition for the simplified agent state
*/
export type SimplifiedAgentStateType = typeof SimplifiedAgentState.State;

View file

@ -0,0 +1,148 @@
import { tool } from '@langchain/core/tools';
import { z } from 'zod';
import { RunnableConfig } from '@langchain/core/runnables';
import { Document } from 'langchain/document';
import { Embeddings } from '@langchain/core/embeddings';
import {
processFilesToDocuments,
getRankedDocs,
} from '@/lib/utils/fileProcessing';
// Schema for file search tool input
const FileSearchToolSchema = z.object({
query: z
.string()
.describe('The search query to find relevant content in files'),
fileIds: z.array(z.string()).describe('Array of file IDs to search through'),
maxResults: z
.number()
.optional()
.default(12)
.describe('Maximum number of results to return'),
similarityThreshold: z
.number()
.optional()
.default(0.3)
.describe('Minimum similarity threshold for results'),
});
/**
* FileSearchTool - Reimplementation of FileSearchAgent as a tool
*
* This tool handles:
* 1. Processing uploaded files into searchable documents
* 2. Performing similarity search across file content
* 3. Ranking and filtering results by relevance
* 4. Returning relevant file sections as documents
*/
export const fileSearchTool = tool(
async (
input: z.infer<typeof FileSearchToolSchema>,
config?: RunnableConfig,
): Promise<{
documents: Document[];
processedFiles: number;
relevantSections: number;
relevantDocuments?: any[];
}> => {
try {
const {
query,
fileIds,
maxResults = 12,
similarityThreshold = 0.3,
} = input;
console.log(
`FileSearchTool: Processing ${fileIds.length} files for query: "${query}"`,
);
// Check if we have files to process
if (!fileIds || fileIds.length === 0) {
console.log('FileSearchTool: No files provided for search');
return {
documents: [],
processedFiles: 0,
relevantSections: 0,
};
}
// Get embeddings from config
if (!config?.configurable?.embeddings) {
throw new Error('Embeddings not available in config');
}
const embeddings: Embeddings = config.configurable.embeddings;
// Step 1: Process files to documents
console.log('FileSearchTool: Processing files to documents...');
const fileDocuments = await processFilesToDocuments(fileIds);
if (fileDocuments.length === 0) {
console.log('FileSearchTool: No processable content found in files');
return {
documents: [],
processedFiles: fileIds.length,
relevantSections: 0,
};
}
console.log(
`FileSearchTool: Processed ${fileDocuments.length} file sections`,
);
// Step 2: Generate query embedding for similarity search
console.log('FileSearchTool: Generating query embedding...');
const queryEmbedding = await embeddings.embedQuery(query);
// Step 3: Perform similarity search and ranking
console.log('FileSearchTool: Performing similarity search...');
const rankedDocuments = getRankedDocs(
queryEmbedding,
fileDocuments,
maxResults,
similarityThreshold,
);
console.log(
`FileSearchTool: Found ${rankedDocuments.length} relevant file sections`,
);
// Add search metadata to documents
const documentsWithMetadata = rankedDocuments.map((doc) => {
return new Document({
pageContent: doc.pageContent,
metadata: {
...doc.metadata,
source: 'file_search',
searchQuery: query,
similarityScore: doc.metadata?.similarity || 0,
},
});
});
return {
documents: documentsWithMetadata,
processedFiles: fileIds.length,
relevantSections: rankedDocuments.length,
};
} catch (error) {
console.error('FileSearchTool: Error during file search:', error);
const errorMessage =
error instanceof Error ? error.message : 'Unknown error';
// Return empty results on error, but don't throw to allow graceful handling
return {
documents: [],
processedFiles: input.fileIds?.length || 0,
relevantSections: 0,
};
}
},
{
name: 'file_search',
description:
'Searches through uploaded files to find relevant content sections based on a query using semantic similarity',
schema: FileSearchToolSchema,
},
);

View file

@ -0,0 +1,51 @@
/**
* Agent Tools for Simplified Chat Agent
*
* This module exports all the tools that reimplement the functionality of the
* existing LangGraph agents for use with createReactAgent. Each tool encapsulates
* the core logic of its corresponding agent and follows the Command pattern for
* state management.
*/
// Import all agent tools (will be uncommented as tools are implemented)
import { taskManagerTool } from './taskManagerTool';
import { webSearchTool } from './webSearchTool';
import { simpleWebSearchTool } from './simpleWebSearchTool';
import { fileSearchTool } from './fileSearchTool';
import { urlSummarizationTool } from './urlSummarizationTool';
// Export individual tools (will be uncommented as tools are implemented)
export { taskManagerTool };
export { webSearchTool };
export { simpleWebSearchTool };
export { fileSearchTool };
// Array containing all available agent tools for the simplified chat agent
// This will be used by the createReactAgent implementation
export const allAgentTools = [
//taskManagerTool,
//webSearchTool,
simpleWebSearchTool,
fileSearchTool,
urlSummarizationTool,
];
// Export tool categories for selective tool loading based on focus mode
export const webSearchTools = [
//webSearchTool,
simpleWebSearchTool,
urlSummarizationTool,
// analyzerTool,
// synthesizerTool,
];
export const fileSearchTools = [
fileSearchTool,
// analyzerTool,
// synthesizerTool,
];
// Core tools that are always available
export const coreTools = [
//taskManagerTool
];

View file

@ -0,0 +1,228 @@
import { tool } from '@langchain/core/tools';
import { z } from 'zod';
import { RunnableConfig } from '@langchain/core/runnables';
import { withStructuredOutput } from '@/lib/utils/structuredOutput';
import { PromptTemplate } from '@langchain/core/prompts';
import { webSearchRetrieverAgentPrompt } from '@/lib/prompts/webSearch';
import { searchSearxng } from '@/lib/searxng';
import { formatDateForLLM } from '@/lib/utils';
import { Document } from 'langchain/document';
import { Embeddings } from '@langchain/core/embeddings';
import computeSimilarity from '@/lib/utils/computeSimilarity';
import { Command, getCurrentTaskInput } from '@langchain/langgraph';
import { ToolMessage } from '@langchain/core/messages';
import { SimplifiedAgentStateType } from '@/lib/state/chatAgentState';
// Schema for search query generation
const SearchQuerySchema = z.object({
searchQuery: z
.string()
.describe('The optimized search query to use for web search'),
reasoning: z
.string()
.describe(
'A short explanation of how the search query was optimized for better results',
),
});
// Schema for simple web search tool input
const SimpleWebSearchToolSchema = z.object({
query: z.string().describe('The search query or task to process'),
searchInstructions: z
.string()
.optional()
.describe('Additional instructions for search refinement'),
context: z
.string()
.optional()
.describe('Additional context about the search'),
});
/**
* SimpleWebSearchTool - Simplified version of WebSearchTool
*
* This tool handles:
* 1. Query optimization for web search
* 2. Web search execution using SearXNG
* 3. Document ranking and filtering (top 15: top 3 + ranked top 12)
* 4. Returns raw search results as documents without analysis or content extraction
*/
export const simpleWebSearchTool = tool(
async (
input: z.infer<typeof SimpleWebSearchToolSchema>,
config?: RunnableConfig,
) => {
try {
const { query, searchInstructions, context = '' } = input;
const currentState = getCurrentTaskInput() as SimplifiedAgentStateType;
let currentDocCount = currentState.relevantDocuments.length;
// Get LLM and embeddings from config
if (!config?.configurable?.llm) {
throw new Error('LLM not available in config');
}
if (!config?.configurable?.embeddings) {
throw new Error('Embeddings not available in config');
}
const llm = config.configurable.llm;
const embeddings: Embeddings = config.configurable.embeddings;
// Step 1: Generate optimized search query
const template = PromptTemplate.fromTemplate(
webSearchRetrieverAgentPrompt,
);
const prompt = await template.format({
systemInstructions:
config.configurable?.systemInstructions ||
'You are a helpful AI assistant.',
query: query,
date: formatDateForLLM(new Date()),
supervisor: searchInstructions || query,
});
// Use structured output for search query generation
const structuredLlm = withStructuredOutput(llm, SearchQuerySchema, {
name: 'generate_search_query',
});
const searchQueryResult = await structuredLlm.invoke(prompt, {
signal: config?.signal,
});
const searchQuery = searchQueryResult.searchQuery;
console.log(
`SimpleWebSearchTool: Performing web search for query: "${searchQuery}"`,
);
console.log(
'SimpleWebSearchTool: Search query reasoning:',
searchQueryResult.reasoning,
);
// Step 2: Execute web search
const searchResults = await searchSearxng(searchQuery, {
language: 'en',
engines: [],
});
console.log(
`SimpleWebSearchTool: Found ${searchResults.results.length} search results`,
);
if (!searchResults.results || searchResults.results.length === 0) {
return new Command({
update: {
relevantDocuments: [],
messages: [
new ToolMessage({
content: 'No search results found.',
//Generate a random tool call id
tool_call_id: Math.random().toString(36).substring(2, 15),
}),
],
},
});
}
// Step 3: Calculate similarities and rank results
const queryVector = await embeddings.embedQuery(query);
// Calculate similarities for all results
const resultsWithSimilarity = await Promise.all(
searchResults.results.map(async (result) => {
const vector = await embeddings.embedQuery(
result.title + ' ' + (result.content || ''),
);
const similarity = computeSimilarity(vector, queryVector);
return { result, similarity };
}),
);
// Step 4: Select top 15 results using the same logic as webSearchTool
const documents: Document[] = [];
// Always take the top 3 results first
const top3Results = searchResults.results.slice(0, 3);
documents.push(
...top3Results.map((result, i) => {
return new Document({
pageContent: `${result.title || 'Untitled'}\n\n${result.content || ''}`,
metadata: {
sourceId: ++currentDocCount,
title: result.title || 'Untitled',
url: result.url,
source: result.url,
processingType: 'preview-content',
searchQuery: searchQuery,
rank: 'top-3',
},
});
}),
);
// Sort by relevance score and take top 12 from the remaining results
const remainingResults = resultsWithSimilarity
.slice(3)
.sort((a, b) => b.similarity - a.similarity)
.slice(0, 12);
documents.push(
...remainingResults.map(({ result }) => {
return new Document({
pageContent: `${result.title || 'Untitled'}\n\n${result.content || ''}`,
metadata: {
sourceId: ++currentDocCount,
title: result.title || 'Untitled',
url: result.url,
source: result.url,
processingType: 'preview-content',
searchQuery: searchQuery,
rank: 'ranked',
},
});
}),
);
console.log(
`SimpleWebSearchTool: Created ${documents.length} documents from search results`,
);
return new Command({
update: {
relevantDocuments: documents,
searchQuery,
messages: [
new ToolMessage({
content: `Retrieved ${documents.length} documents from web search.`,
//Generate a random tool call id
tool_call_id: Math.random().toString(36).substring(2, 15),
}),
],
},
});
} catch (error) {
console.error('SimpleWebSearchTool: Error during web search:', error);
const errorMessage =
error instanceof Error ? error.message : 'Unknown error';
return new Command({
update: {
relevantDocuments: [],
messages: [
new ToolMessage({
content: 'Error occurred during web search: ' + errorMessage,
//Generate a random tool call id
tool_call_id: Math.random().toString(36).substring(2, 15),
}),
],
},
});
}
},
{
name: 'web_search',
description:
'Performs web search using SearXNG and returns ranked search results as documents without content analysis or extraction',
schema: SimpleWebSearchToolSchema,
},
);

View file

@ -0,0 +1,112 @@
import { tool } from '@langchain/core/tools';
import { z } from 'zod';
import { RunnableConfig } from '@langchain/core/runnables';
import { withStructuredOutput } from '@/lib/utils/structuredOutput';
import { PromptTemplate } from '@langchain/core/prompts';
import { taskBreakdownPrompt } from '@/lib/prompts/taskBreakdown';
// Schema for task manager tool input
const TaskManagerToolSchema = z.object({
query: z.string().describe('The user query to break down into smaller tasks'),
context: z
.string()
.optional()
.describe('Additional context about the query or current situation'),
});
// Schema for structured output
const TaskBreakdownSchema = z.object({
tasks: z
.array(z.string())
.describe(
'Array of specific, focused tasks broken down from the original query',
),
reasoning: z
.string()
.describe(
'Explanation of how and why the query was broken down into these tasks',
),
});
/**
* TaskManagerTool - Breaks down complex queries into manageable task lists
*
* This tool takes a user query and returns a list of specific, actionable tasks
* that can help answer the original question. The tasks are returned as natural
* language instructions that the main agent can follow.
*/
export const taskManagerTool = tool(
async (
input: z.infer<typeof TaskManagerToolSchema>,
config?: RunnableConfig,
): Promise<{ tasks: string[]; reasoning: string }> => {
try {
console.log(
'TaskManagerTool: Starting task breakdown for query:',
input.query,
);
const { query, context = '' } = input;
// Get LLM from config
if (!config?.configurable?.llm) {
throw new Error('LLM not available in config');
}
const llm = config.configurable.llm;
// Create structured LLM for task breakdown
const structuredLLM = withStructuredOutput(llm, TaskBreakdownSchema, {
name: 'task_breakdown',
includeRaw: false,
});
// Create the prompt template
const template = PromptTemplate.fromTemplate(taskBreakdownPrompt);
// Format the prompt with the query and context
const prompt = await template.format({
systemInstructions:
config.configurable?.systemInstructions ||
'You are a helpful AI assistant.',
fileContext: context || 'No additional context provided.',
query: query,
currentTasks: 0,
taskHistory: 'No previous tasks.',
});
// Get the task breakdown from the LLM
const response = await structuredLLM.invoke(prompt, {
signal: config?.signal,
});
if (!response?.tasks || response.tasks.length === 0) {
// If no breakdown is needed, return the original query as a single task
return {
tasks: [query],
reasoning:
'The query is straightforward and does not require breaking down into smaller tasks.',
};
}
return {
tasks: response.tasks,
reasoning: response.reasoning,
};
} catch (error) {
console.error('Error in TaskManagerTool:', error);
// Fallback: return the original query as a single task
const errorMessage =
error instanceof Error ? error.message : 'Unknown error';
return {
tasks: [input.query],
reasoning: `Error occurred during task breakdown: ${errorMessage}. Proceeding with the original query.`,
};
}
},
{
name: 'task_manager',
description:
'Breaks down complex user queries into a list of specific, manageable tasks that can be executed to answer the original question',
schema: TaskManagerToolSchema,
},
);

View file

@ -0,0 +1,200 @@
import { tool } from '@langchain/core/tools';
import { z } from 'zod';
import { RunnableConfig } from '@langchain/core/runnables';
import { Document } from 'langchain/document';
import { getWebContent } from '@/lib/utils/documents';
import { removeThinkingBlocks } from '@/lib/utils/contentUtils';
// Schema for URL summarization tool input
const URLSummarizationToolSchema = z.object({
urls: z.array(z.string()).describe('Array of URLs to process and summarize'),
query: z
.string()
.describe('The user query to guide content extraction and summarization'),
intent: z
.string()
.optional()
.default('extract relevant content')
.describe('Processing intent for the URLs'),
});
/**
* URLSummarizationTool - Reimplementation of URLSummarizationAgent as a tool
*
* This tool handles:
* 1. Fetching content from provided URLs
* 2. Deciding whether to use content directly or summarize it
* 3. Generating summaries using LLM when content is too long
* 4. Returning processed documents with metadata
*/
export const urlSummarizationTool = tool(
async (
input: z.infer<typeof URLSummarizationToolSchema>,
config?: RunnableConfig,
): Promise<{
relevantDocuments: Document[];
processedUrls: number;
successfulExtractions: number;
}> => {
try {
const { urls, query, intent = 'extract relevant content' } = input;
console.log(
`URLSummarizationTool: Processing ${urls.length} URLs for query: "${query}"`,
);
console.log(`URLSummarizationTool: Processing intent: ${intent}`);
if (!urls || urls.length === 0) {
console.log('URLSummarizationTool: No URLs provided for processing');
return {
relevantDocuments: [],
processedUrls: 0,
successfulExtractions: 0,
};
}
// Get LLM from config
if (!config?.configurable?.llm) {
throw new Error('LLM not available in config');
}
const llm = config.configurable.llm;
const documents: Document[] = [];
// Process each URL
for (const url of urls) {
if (config?.signal?.aborted) {
console.warn('URLSummarizationTool: Operation aborted by signal');
break;
}
try {
console.log(`URLSummarizationTool: Processing ${url}`);
// Fetch full content using the enhanced web content retrieval
const webContent = await getWebContent(url, true);
if (!webContent || !webContent.pageContent) {
console.warn(
`URLSummarizationTool: No content retrieved from URL: ${url}`,
);
continue;
}
const contentLength = webContent.pageContent.length;
let finalContent: string;
let processingType: string;
// If content is short (< 4000 chars), use it directly; otherwise summarize
if (contentLength < 4000) {
finalContent = webContent.pageContent;
processingType = 'url-direct-content';
console.log(
`URLSummarizationTool: Content is short (${contentLength} chars), using directly without summarization`,
);
} else {
// Content is long, summarize using LLM
console.log(
`URLSummarizationTool: Content is long (${contentLength} chars), generating summary`,
);
const systemPrompt = config.configurable?.systemInstructions
? `${config.configurable.systemInstructions}\n\n`
: '';
const summarizationPrompt = `${systemPrompt}You are a web content processor. Extract and summarize ONLY the information from the provided web page content that is relevant to the user's query.
# Critical Instructions
- Output ONLY a summary of the web page content provided below
- Focus on information that relates to or helps answer the user's query
- Do NOT add pleasantries, greetings, or conversational elements
- Do NOT mention missing URLs, other pages, or content not provided
- Do NOT ask follow-up questions or suggest additional actions
- Do NOT add commentary about the user's request or query
- Present the information in a clear, well-structured format with key facts and details
- Include all relevant details that could help answer the user's question
# User's Query: ${query}
# Content Title: ${webContent.metadata.title || 'Web Page'}
# Content URL: ${url}
# Web Page Content to Summarize:
${webContent.pageContent}
Provide a comprehensive summary of the above web page content, focusing on information relevant to the user's query:`;
const result = await llm.invoke(summarizationPrompt, {
signal: config?.signal,
});
finalContent = removeThinkingBlocks(result.content as string);
processingType = 'url-content-extraction';
}
if (finalContent && finalContent.trim().length > 0) {
const document = new Document({
pageContent: finalContent,
metadata: {
title: webContent.metadata.title || 'URL Content',
url: url,
source: url,
processingType: processingType,
processingIntent: intent,
originalContentLength: contentLength,
searchQuery: query,
},
});
documents.push(document);
console.log(
`URLSummarizationTool: Successfully processed content from ${url} (${finalContent.length} characters, ${processingType})`,
);
} else {
console.warn(
`URLSummarizationTool: No valid content generated for URL: ${url}`,
);
}
} catch (error) {
console.error(
`URLSummarizationTool: Error processing URL ${url}:`,
error,
);
continue;
}
}
console.log(
`URLSummarizationTool: Successfully processed ${documents.length} out of ${urls.length} URLs`,
);
return {
relevantDocuments: documents,
processedUrls: urls.length,
successfulExtractions: documents.length,
};
} catch (error) {
console.error(
'URLSummarizationTool: Error during URL processing:',
error,
);
const errorMessage =
error instanceof Error ? error.message : 'Unknown error';
// Return empty results on error, but don't throw to allow graceful handling
return {
relevantDocuments: [],
processedUrls: input.urls?.length || 0,
successfulExtractions: 0,
};
}
},
{
name: 'url_summarization',
description:
'Fetches content from URLs and either uses it directly or summarizes it based on length, focusing on information relevant to the user query',
schema: URLSummarizationToolSchema,
},
);

View file

@ -0,0 +1,314 @@
import { tool } from '@langchain/core/tools';
import { z } from 'zod';
import { RunnableConfig } from '@langchain/core/runnables';
import { withStructuredOutput } from '@/lib/utils/structuredOutput';
import { PromptTemplate } from '@langchain/core/prompts';
import { webSearchRetrieverAgentPrompt } from '@/lib/prompts/webSearch';
import { searchSearxng } from '@/lib/searxng';
import { formatDateForLLM } from '@/lib/utils';
import { summarizeWebContent } from '@/lib/utils/summarizeWebContent';
import {
analyzePreviewContent,
PreviewContent,
} from '@/lib/utils/analyzePreviewContent';
import { Document } from 'langchain/document';
import { Embeddings } from '@langchain/core/embeddings';
import computeSimilarity from '@/lib/utils/computeSimilarity';
import { removeThinkingBlocksFromMessages } from '@/lib/utils/contentUtils';
// Schema for search query generation
const SearchQuerySchema = z.object({
searchQuery: z
.string()
.describe('The optimized search query to use for web search'),
reasoning: z
.string()
.describe(
'A short explanation of how the search query was optimized for better results',
),
});
// Schema for web search tool input
const WebSearchToolSchema = z.object({
query: z.string().describe('The search query or task to process'),
searchInstructions: z
.string()
.optional()
.describe('Additional instructions for search refinement'),
context: z
.string()
.optional()
.describe('Additional context about the search'),
});
/**
* WebSearchTool - Reimplementation of WebSearchAgent as a tool
*
* This tool handles:
* 1. Query optimization for web search
* 2. Web search execution using SearXNG
* 3. Content extraction and summarization
* 4. Document ranking and filtering
*/
export const webSearchTool = tool(
async (
input: z.infer<typeof WebSearchToolSchema>,
config?: RunnableConfig,
): Promise<{
documents: Document[];
searchQuery: string;
reasoning: string;
sourcesFound: number;
relevantDocuments?: any[];
}> => {
try {
const { query, searchInstructions, context = '' } = input;
// Get LLM and embeddings from config
if (!config?.configurable?.llm) {
throw new Error('LLM not available in config');
}
if (!config?.configurable?.embeddings) {
throw new Error('Embeddings not available in config');
}
const llm = config.configurable.llm;
const embeddings: Embeddings = config.configurable.embeddings;
// Step 1: Generate optimized search query
const template = PromptTemplate.fromTemplate(
webSearchRetrieverAgentPrompt,
);
const prompt = await template.format({
systemInstructions:
config.configurable?.systemInstructions ||
'You are a helpful AI assistant.',
query: query,
date: formatDateForLLM(new Date()),
supervisor: searchInstructions || query,
});
// Use structured output for search query generation
const structuredLlm = withStructuredOutput(llm, SearchQuerySchema, {
name: 'generate_search_query',
});
const searchQueryResult = await structuredLlm.invoke(prompt, {
signal: config?.signal,
});
const searchQuery = searchQueryResult.searchQuery;
console.log(
`WebSearchTool: Performing web search for query: "${searchQuery}"`,
);
console.log(
'WebSearchTool: Search query reasoning:',
searchQueryResult.reasoning,
);
// Step 2: Execute web search
const searchResults = await searchSearxng(searchQuery, {
language: 'en',
engines: [],
});
console.log(
`WebSearchTool: Found ${searchResults.results.length} search results`,
);
if (!searchResults.results || searchResults.results.length === 0) {
return {
documents: [],
searchQuery,
reasoning: searchQueryResult.reasoning,
sourcesFound: 0,
};
}
// Step 3: Calculate similarities and rank results
const queryVector = await embeddings.embedQuery(query);
// Calculate similarities for all results
const resultsWithSimilarity = await Promise.all(
searchResults.results.map(async (result) => {
const vector = await embeddings.embedQuery(
result.title + ' ' + (result.content || ''),
);
const similarity = computeSimilarity(vector, queryVector);
return { result, similarity };
}),
);
// Step 4: Prepare preview content for analysis
let previewContents: PreviewContent[] = [];
// Always take the top 3 results for preview content
previewContents.push(
...searchResults.results.slice(0, 3).map((result) => ({
title: result.title || 'Untitled',
snippet: result.content || '',
url: result.url,
})),
);
// Sort by relevance score and take top 12 results for a total of 15
previewContents.push(
...resultsWithSimilarity
.slice(3)
.sort((a, b) => b.similarity - a.similarity)
.slice(0, 12)
.map(({ result }) => ({
title: result.title || 'Untitled',
snippet: result.content || '',
url: result.url,
})),
);
console.log(
`WebSearchTool: Extracted preview content from ${previewContents.length} search results`,
);
// Step 5: Analyze preview content to determine processing approach
let previewAnalysisResult = null;
let documentsToProcess: any[] = [];
if (previewContents.length > 0) {
console.log(
'WebSearchTool: Analyzing preview content to determine processing approach',
);
previewAnalysisResult = await analyzePreviewContent(
previewContents,
query,
query, // taskQuery same as query for tools
[], // no chat history for tools
llm,
config.configurable?.systemInstructions ||
'You are a helpful AI assistant.',
config?.signal || new AbortController().signal,
);
console.log(
'WebSearchTool: Preview analysis result:',
previewAnalysisResult.isSufficient ? 'SUFFICIENT' : 'INSUFFICIENT',
);
if (!previewAnalysisResult.isSufficient) {
// Need full content retrieval - process top similarity results
documentsToProcess = resultsWithSimilarity
.sort((a, b) => b.similarity - a.similarity)
.slice(0, 5)
.map(({ result }) => result);
} else {
// Preview content is sufficient - no need for full content retrieval
console.log(
'WebSearchTool: Preview content is sufficient, skipping full content retrieval',
);
documentsToProcess = [];
}
} else {
// No preview content, process top results
documentsToProcess = searchResults.results.slice(0, 5);
}
console.log(
`WebSearchTool: Processing ${documentsToProcess.length} URLs for content extraction`,
);
// Step 6: Extract content - either from full URLs or preview content
const documents: Document[] = [];
let processedCount = 0;
if (previewAnalysisResult?.isSufficient) {
// Create documents from preview content since it's sufficient
console.log(
'WebSearchTool: Creating documents from preview content (sufficient for answer)',
);
documents.push(
...previewContents.map((previewContent) => {
return new Document({
pageContent: `${previewContent.title}\n\n${previewContent.snippet}`,
metadata: {
title: previewContent.title,
url: previewContent.url,
source: previewContent.url,
processingType: 'preview-content',
searchQuery: searchQuery,
},
});
}),
);
console.log(
`WebSearchTool: Created ${documents.length} documents from preview content`,
);
} else {
// Extract and summarize content from selected URLs
for (const result of documentsToProcess) {
if (processedCount >= 5) break; // Limit processing
try {
console.log(`WebSearchTool: Processing ${result.url}`);
const summaryResult = await summarizeWebContent(
result.url,
query,
llm,
config.configurable?.systemInstructions ||
'You are a helpful AI assistant.',
config?.signal || new AbortController().signal,
);
if (summaryResult.document) {
documents.push(summaryResult.document);
console.log(
`WebSearchTool: Successfully extracted content from ${result.url}`,
);
} else {
console.log(
`WebSearchTool: No relevant content found for ${result.url}: ${summaryResult.notRelevantReason}`,
);
}
processedCount++;
} catch (error) {
console.error(
`WebSearchTool: Error processing ${result.url}:`,
error,
);
continue;
}
}
console.log(
`WebSearchTool: Successfully extracted ${documents.length} documents from ${processedCount} processed URLs`,
);
}
return {
documents,
searchQuery,
reasoning: searchQueryResult.reasoning,
sourcesFound: searchResults.results.length,
};
} catch (error) {
console.error('WebSearchTool: Error during web search:', error);
const errorMessage =
error instanceof Error ? error.message : 'Unknown error';
return {
documents: [],
searchQuery: input.query,
reasoning: `Error occurred during web search: ${errorMessage}`,
sourcesFound: 0,
};
}
},
{
name: 'web_search',
description:
'Performs web search using SearXNG, analyzes results, and extracts relevant content from top sources',
schema: WebSearchToolSchema,
},
);

View file

@ -1,7 +1,13 @@
import { timezoneConverterTool } from './timezoneConverter'; import { timezoneConverterTool } from './timezoneConverter';
import { dateDifferenceTool } from './dateDifference'; import { dateDifferenceTool } from './dateDifference';
// Agent tools for simplified chat agent (will be uncommented as implemented)
// import { allAgentTools } from './agents';
export { timezoneConverterTool, dateDifferenceTool }; export { timezoneConverterTool, dateDifferenceTool };
// Export agent tools module (will be uncommented as implemented)
// export * from './agents';
// Array containing all available tools // Array containing all available tools
export const allTools = [timezoneConverterTool, dateDifferenceTool]; export const allTools = [timezoneConverterTool, dateDifferenceTool];

View file

@ -88,7 +88,7 @@ Snippet: ${content.snippet}
- Analyze the provided search result previews (titles + snippets), and chat history context to determine if they collectively contain enough information to provide a complete and accurate answer to the Task Query - Analyze the provided search result previews (titles + snippets), and chat history context to determine if they collectively contain enough information to provide a complete and accurate answer to the Task Query
- If the preview content can provide a complete answer to the Task Query, consider it sufficient - If the preview content can provide a complete answer to the Task Query, consider it sufficient
- If the preview content lacks important details, requires deeper analysis, or cannot fully answer the Task Query, consider it insufficient - If the preview content lacks important details, requires deeper analysis, or cannot fully answer the Task Query, consider it insufficient
- Be specific in your reasoning when the content is not sufficient - Be specific in your reasoning when the content is not sufficient but keep the answer under 35 words
- The original query is provided for additional context, only use it for clarification of overall expectations and intent. You do **not** need to answer the original query directly or completely - The original query is provided for additional context, only use it for clarification of overall expectations and intent. You do **not** need to answer the original query directly or completely
# System Instructions # System Instructions

View file

@ -19,7 +19,9 @@ const RelevanceCheckSchema = z.object({
.describe('Whether the content is relevant to the user query'), .describe('Whether the content is relevant to the user query'),
reason: z reason: z
.string() .string()
.describe("Brief explanation of why content is or isn't relevant"), .describe(
"Brief explanation of why content is or isn't relevant. 20 words or less.",
),
}); });
export const summarizeWebContent = async ( export const summarizeWebContent = async (