feat(agent): Add task manager. Agent feels pretty useful now?
This commit is contained in:
parent
60d36ab8f4
commit
7b127e5635
14 changed files with 634 additions and 55 deletions
|
|
@ -176,6 +176,42 @@ const AgentActionDisplay = ({
|
|||
<span>{event.details.reason}</span>
|
||||
</div>
|
||||
)}
|
||||
{event.details.taskCount !== undefined && (
|
||||
<div className="flex space-x-1">
|
||||
<span className="font-bold">Tasks:</span>
|
||||
<span>{event.details.taskCount}</span>
|
||||
</div>
|
||||
)}
|
||||
{event.details.currentTask && (
|
||||
<div className="flex space-x-1">
|
||||
<span className="font-bold">Current Task:</span>
|
||||
<span className="italic">"{event.details.currentTask}"</span>
|
||||
</div>
|
||||
)}
|
||||
{event.details.taskIndex !== undefined && event.details.totalTasks !== undefined && (
|
||||
<div className="flex space-x-1">
|
||||
<span className="font-bold">Progress:</span>
|
||||
<span>Task {event.details.taskIndex} of {event.details.totalTasks}</span>
|
||||
</div>
|
||||
)}
|
||||
{event.details.completedTask && (
|
||||
<div className="flex space-x-1">
|
||||
<span className="font-bold">Completed:</span>
|
||||
<span className="italic">"{event.details.completedTask}"</span>
|
||||
</div>
|
||||
)}
|
||||
{event.details.nextTask && (
|
||||
<div className="flex space-x-1">
|
||||
<span className="font-bold">Next:</span>
|
||||
<span className="italic">"{event.details.nextTask}"</span>
|
||||
</div>
|
||||
)}
|
||||
{event.details.currentSearchFocus && (
|
||||
<div className="flex space-x-1">
|
||||
<span className="font-bold">Search Focus:</span>
|
||||
<span className="italic">"{event.details.currentSearchFocus}"</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -46,4 +46,16 @@ export const AgentState = Annotation.Root({
|
|||
reducer: (x, y) => (y ?? 0) + x,
|
||||
default: () => 0,
|
||||
}),
|
||||
tasks: Annotation<string[]>({
|
||||
reducer: (x, y) => y ?? x,
|
||||
default: () => [],
|
||||
}),
|
||||
currentTaskIndex: Annotation<number>({
|
||||
reducer: (x, y) => y ?? x,
|
||||
default: () => 0,
|
||||
}),
|
||||
originalQuery: Annotation<string>({
|
||||
reducer: (x, y) => y ?? x,
|
||||
default: () => '',
|
||||
}),
|
||||
});
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ import {
|
|||
additionalWebSearchPrompt,
|
||||
decideNextActionPrompt,
|
||||
} from '../prompts/analyzer';
|
||||
import { removeThinkingBlocks } from '../utils/contentUtils';
|
||||
import { removeThinkingBlocks, removeThinkingBlocksFromMessages } from '../utils/contentUtils';
|
||||
|
||||
export class AnalyzerAgent {
|
||||
private llm: BaseChatModel;
|
||||
|
|
@ -40,6 +40,11 @@ export class AnalyzerAgent {
|
|||
try {
|
||||
setTemperature(this.llm, 0.0);
|
||||
|
||||
// Initialize originalQuery if not set
|
||||
if (!state.originalQuery) {
|
||||
state.originalQuery = state.query;
|
||||
}
|
||||
|
||||
let nextActionContent = 'need_more_info';
|
||||
// Skip full analysis if this is the first run.
|
||||
//if (state.fullAnalysisAttempts > 0) {
|
||||
|
|
@ -76,11 +81,13 @@ export class AnalyzerAgent {
|
|||
searchInstructionHistory: state.searchInstructionHistory
|
||||
.map((question) => `- ${question}`)
|
||||
.join('\n'),
|
||||
query: state.query,
|
||||
query: state.originalQuery || state.query, // Use original query for analysis context
|
||||
});
|
||||
|
||||
const thinkingBlocksRemovedMessages = removeThinkingBlocksFromMessages(state.messages);
|
||||
|
||||
const nextActionResponse = await this.llm.invoke(
|
||||
[...state.messages, new HumanMessage(nextActionPrompt)],
|
||||
[...thinkingBlocksRemovedMessages, new HumanMessage(nextActionPrompt)],
|
||||
{ signal: this.signal },
|
||||
);
|
||||
|
||||
|
|
@ -107,11 +114,11 @@ export class AnalyzerAgent {
|
|||
searchInstructionHistory: state.searchInstructionHistory
|
||||
.map((question) => `- ${question}`)
|
||||
.join('\n'),
|
||||
query: state.query,
|
||||
query: state.originalQuery || state.query, // Use original query for user info context
|
||||
});
|
||||
|
||||
const stream = await this.llm.stream(
|
||||
[...state.messages, new SystemMessage(moreUserInfoPrompt)],
|
||||
[...removeThinkingBlocksFromMessages(state.messages), new SystemMessage(moreUserInfoPrompt)],
|
||||
{ signal: this.signal },
|
||||
);
|
||||
|
||||
|
|
@ -164,11 +171,11 @@ export class AnalyzerAgent {
|
|||
searchInstructionHistory: state.searchInstructionHistory
|
||||
.map((question) => `- ${question}`)
|
||||
.join('\n'),
|
||||
query: state.query,
|
||||
query: state.originalQuery || state.query, // Use original query for more info context
|
||||
});
|
||||
|
||||
const moreInfoResponse = await this.llm.invoke(
|
||||
[...state.messages, new HumanMessage(moreInfoPrompt)],
|
||||
[...removeThinkingBlocksFromMessages(state.messages), new HumanMessage(moreInfoPrompt)],
|
||||
{ signal: this.signal },
|
||||
);
|
||||
|
||||
|
|
@ -182,27 +189,33 @@ export class AnalyzerAgent {
|
|||
data: {
|
||||
action: 'MORE_DATA_NEEDED',
|
||||
message:
|
||||
'Current context is insufficient - gathering more information',
|
||||
'Current context is insufficient - analyzing search requirements',
|
||||
details: {
|
||||
nextSearchQuery: moreInfoQuestion,
|
||||
documentCount: state.relevantDocuments.length,
|
||||
searchIterations: state.searchInstructionHistory.length,
|
||||
query: state.query,
|
||||
query: state.originalQuery || state.query, // Show original query in details
|
||||
currentSearchFocus: moreInfoQuestion,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
return new Command({
|
||||
goto: 'web_search',
|
||||
goto: 'task_manager',
|
||||
update: {
|
||||
messages: [
|
||||
new AIMessage(
|
||||
`The following question can help refine the search: ${moreInfoQuestion}`,
|
||||
),
|
||||
],
|
||||
query: moreInfoQuestion, // Use the refined question for TaskManager to analyze
|
||||
searchInstructions: moreInfoQuestion,
|
||||
searchInstructionHistory: [moreInfoQuestion],
|
||||
searchInstructionHistory: [...(state.searchInstructionHistory || []), moreInfoQuestion],
|
||||
fullAnalysisAttempts: 1,
|
||||
originalQuery: state.originalQuery || state.query, // Preserve the original user query
|
||||
// Reset task list so TaskManager can break down the search requirements again
|
||||
tasks: [],
|
||||
currentTaskIndex: 0,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
|
@ -216,7 +229,8 @@ export class AnalyzerAgent {
|
|||
details: {
|
||||
documentCount: state.relevantDocuments.length,
|
||||
searchIterations: state.searchInstructionHistory.length,
|
||||
query: state.query,
|
||||
totalTasks: state.tasks?.length || 1,
|
||||
query: state.originalQuery || state.query,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
|
|
|||
|
|
@ -2,3 +2,4 @@ export { AgentState } from './agentState';
|
|||
export { WebSearchAgent } from './webSearchAgent';
|
||||
export { AnalyzerAgent } from './analyzerAgent';
|
||||
export { SynthesizerAgent } from './synthesizerAgent';
|
||||
export { TaskManagerAgent } from './taskManagerAgent';
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import { Command, END } from '@langchain/langgraph';
|
|||
import { EventEmitter } from 'events';
|
||||
import { getModelName } from '../utils/modelUtils';
|
||||
import { AgentState } from './agentState';
|
||||
import { removeThinkingBlocksFromMessages } from '../utils/contentUtils';
|
||||
|
||||
export class SynthesizerAgent {
|
||||
private llm: BaseChatModel;
|
||||
|
|
@ -67,7 +68,7 @@ Your task is to provide answers that are:
|
|||
${this.personaInstructions}
|
||||
</personaInstructions>
|
||||
|
||||
User Query: ${state.query}
|
||||
User Query: ${state.originalQuery || state.query}
|
||||
|
||||
Available Information:
|
||||
${state.relevantDocuments
|
||||
|
|
@ -97,7 +98,7 @@ ${doc.metadata?.url.toLowerCase().includes('file') ? '' : '\n<url>' + doc.metada
|
|||
);
|
||||
|
||||
const stream = await this.llm.stream(
|
||||
[new SystemMessage(synthesisPrompt), new HumanMessage(state.query)],
|
||||
[...removeThinkingBlocksFromMessages(state.messages), new SystemMessage(synthesisPrompt), new HumanMessage(state.originalQuery || state.query)],
|
||||
{ signal: this.signal },
|
||||
);
|
||||
|
||||
|
|
|
|||
187
src/lib/agents/taskManagerAgent.ts
Normal file
187
src/lib/agents/taskManagerAgent.ts
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
import { AIMessage } from '@langchain/core/messages';
|
||||
import { PromptTemplate } from '@langchain/core/prompts';
|
||||
import { Command } from '@langchain/langgraph';
|
||||
import { EventEmitter } from 'events';
|
||||
import { taskBreakdownPrompt } from '../prompts/taskBreakdown';
|
||||
import { AgentState } from './agentState';
|
||||
import { setTemperature } from '../utils/modelUtils';
|
||||
|
||||
export class TaskManagerAgent {
|
||||
private llm: BaseChatModel;
|
||||
private emitter: EventEmitter;
|
||||
private systemInstructions: string;
|
||||
private signal: AbortSignal;
|
||||
|
||||
constructor(
|
||||
llm: BaseChatModel,
|
||||
emitter: EventEmitter,
|
||||
systemInstructions: string,
|
||||
signal: AbortSignal,
|
||||
) {
|
||||
this.llm = llm;
|
||||
this.emitter = emitter;
|
||||
this.systemInstructions = systemInstructions;
|
||||
this.signal = signal;
|
||||
}
|
||||
|
||||
/**
|
||||
* Task manager agent node - breaks down complex questions into smaller tasks
|
||||
*/
|
||||
async execute(state: typeof AgentState.State): Promise<Command> {
|
||||
try {
|
||||
setTemperature(this.llm, 0); // Set temperature to 0 for deterministic output
|
||||
|
||||
// Check if we're in task progression mode (tasks already exist and we're processing them)
|
||||
if (state.tasks && state.tasks.length > 0) {
|
||||
const currentTaskIndex = state.currentTaskIndex || 0;
|
||||
const hasMoreTasks = currentTaskIndex < state.tasks.length - 1;
|
||||
|
||||
if (hasMoreTasks) {
|
||||
// Move to next task
|
||||
const nextTaskIndex = currentTaskIndex + 1;
|
||||
this.emitter.emit('agent_action', {
|
||||
type: 'agent_action',
|
||||
data: {
|
||||
action: 'PROCEEDING_TO_NEXT_TASK',
|
||||
message: `Task ${currentTaskIndex + 1} completed. Moving to task ${nextTaskIndex + 1} of ${state.tasks.length}.`,
|
||||
details: {
|
||||
completedTask: state.tasks[currentTaskIndex],
|
||||
nextTask: state.tasks[nextTaskIndex],
|
||||
taskIndex: nextTaskIndex + 1,
|
||||
totalTasks: state.tasks.length,
|
||||
documentCount: state.relevantDocuments.length,
|
||||
query: state.originalQuery || state.query,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
return new Command({
|
||||
goto: 'web_search',
|
||||
update: {
|
||||
messages: [
|
||||
new AIMessage(
|
||||
`Task ${currentTaskIndex + 1} completed. Processing task ${nextTaskIndex + 1} of ${state.tasks.length}: "${state.tasks[nextTaskIndex]}"`,
|
||||
),
|
||||
],
|
||||
currentTaskIndex: nextTaskIndex,
|
||||
},
|
||||
});
|
||||
} else {
|
||||
// All tasks completed, move to analysis
|
||||
this.emitter.emit('agent_action', {
|
||||
type: 'agent_action',
|
||||
data: {
|
||||
action: 'ALL_TASKS_COMPLETED',
|
||||
message: `All ${state.tasks.length} tasks completed. Ready for analysis.`,
|
||||
details: {
|
||||
totalTasks: state.tasks.length,
|
||||
documentCount: state.relevantDocuments.length,
|
||||
query: state.originalQuery || state.query,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
return new Command({
|
||||
goto: 'analyzer',
|
||||
update: {
|
||||
messages: [
|
||||
new AIMessage(
|
||||
`All ${state.tasks.length} tasks completed. Moving to analysis phase.`,
|
||||
),
|
||||
],
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Original task breakdown logic for new queries
|
||||
// Emit task analysis event
|
||||
this.emitter.emit('agent_action', {
|
||||
type: 'agent_action',
|
||||
data: {
|
||||
action: 'ANALYZING_TASK_COMPLEXITY',
|
||||
message: `Analyzing question to determine if it needs to be broken down into smaller tasks`,
|
||||
details: {
|
||||
query: state.query,
|
||||
currentTasks: state.tasks?.length || 0,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const template = PromptTemplate.fromTemplate(taskBreakdownPrompt);
|
||||
const prompt = await template.format({
|
||||
systemInstructions: this.systemInstructions,
|
||||
query: state.query,
|
||||
});
|
||||
|
||||
const taskBreakdownResult = await this.llm.invoke(
|
||||
[prompt],
|
||||
{ signal: this.signal },
|
||||
);
|
||||
|
||||
// Parse the response to extract tasks
|
||||
const responseContent = taskBreakdownResult.content as string;
|
||||
const taskLines = responseContent
|
||||
.split('\n')
|
||||
.filter(line => line.trim().startsWith('TASK:'))
|
||||
.map(line => line.replace('TASK:', '').trim())
|
||||
.filter(task => task.length > 0);
|
||||
|
||||
if (taskLines.length === 0) {
|
||||
// Fallback: if no tasks found, use the original query
|
||||
taskLines.push(state.query);
|
||||
}
|
||||
|
||||
console.log(`Task breakdown completed: ${taskLines.length} tasks identified`);
|
||||
taskLines.forEach((task, index) => {
|
||||
console.log(`Task ${index + 1}: ${task}`);
|
||||
});
|
||||
|
||||
// Emit task breakdown completion event
|
||||
this.emitter.emit('agent_action', {
|
||||
type: 'agent_action',
|
||||
data: {
|
||||
action: 'TASK_BREAKDOWN_COMPLETED',
|
||||
message: `Question broken down into ${taskLines.length} focused ${taskLines.length === 1 ? 'task' : 'tasks'}`,
|
||||
details: {
|
||||
query: state.query,
|
||||
taskCount: taskLines.length,
|
||||
tasks: taskLines,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const responseMessage = taskLines.length === 1
|
||||
? 'Question is already focused and ready for processing'
|
||||
: `Question broken down into ${taskLines.length} focused tasks for parallel processing`;
|
||||
|
||||
return new Command({
|
||||
goto: 'web_search', // Next step would typically be web search for each task
|
||||
update: {
|
||||
messages: [new AIMessage(responseMessage)],
|
||||
tasks: taskLines,
|
||||
currentTaskIndex: 0,
|
||||
originalQuery: state.originalQuery || state.query, // Preserve original if not already set
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Task breakdown error:', error);
|
||||
const errorMessage = new AIMessage(
|
||||
`Task breakdown failed: ${error instanceof Error ? error.message : 'Unknown error'}`,
|
||||
);
|
||||
|
||||
return new Command({
|
||||
goto: 'web_search', // Fallback to web search with original query
|
||||
update: {
|
||||
messages: [errorMessage],
|
||||
tasks: [state.query], // Use original query as single task
|
||||
currentTaskIndex: 0,
|
||||
originalQuery: state.originalQuery || state.query, // Preserve original if not already set
|
||||
},
|
||||
});
|
||||
} finally {
|
||||
setTemperature(this.llm, undefined); // Reset temperature to default
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -15,6 +15,8 @@ import {
|
|||
} from '../utils/analyzePreviewContent';
|
||||
import { AgentState } from './agentState';
|
||||
import { setTemperature } from '../utils/modelUtils';
|
||||
import { Embeddings } from '@langchain/core/embeddings';
|
||||
import { removeThinkingBlocksFromMessages } from '../utils/contentUtils';
|
||||
|
||||
export class WebSearchAgent {
|
||||
private llm: BaseChatModel;
|
||||
|
|
@ -41,6 +43,13 @@ export class WebSearchAgent {
|
|||
try {
|
||||
setTemperature(this.llm, 0); // Set temperature to 0 for deterministic output
|
||||
|
||||
// Determine current task to process
|
||||
const currentTask = state.tasks && state.tasks.length > 0
|
||||
? state.tasks[state.currentTaskIndex || 0]
|
||||
: state.query;
|
||||
|
||||
console.log(`Processing task ${(state.currentTaskIndex || 0) + 1} of ${state.tasks?.length || 1}: "${currentTask}"`);
|
||||
|
||||
// Emit preparing web search event
|
||||
this.emitter.emit('agent_action', {
|
||||
type: 'agent_action',
|
||||
|
|
@ -49,7 +58,10 @@ export class WebSearchAgent {
|
|||
// message: `Preparing search query`,
|
||||
details: {
|
||||
query: state.query,
|
||||
searchInstructions: state.searchInstructions || state.query,
|
||||
currentTask: currentTask,
|
||||
taskIndex: (state.currentTaskIndex || 0) + 1,
|
||||
totalTasks: state.tasks?.length || 1,
|
||||
searchInstructions: state.searchInstructions || currentTask,
|
||||
documentCount: state.relevantDocuments.length,
|
||||
searchIterations: state.searchInstructionHistory.length,
|
||||
},
|
||||
|
|
@ -61,13 +73,13 @@ export class WebSearchAgent {
|
|||
);
|
||||
const prompt = await template.format({
|
||||
systemInstructions: this.systemInstructions,
|
||||
query: state.query,
|
||||
query: currentTask, // Use current task instead of main query
|
||||
date: formatDateForLLM(new Date()),
|
||||
supervisor: state.searchInstructions,
|
||||
});
|
||||
|
||||
const searchQueryResult = await this.llm.invoke(
|
||||
[...state.messages, prompt],
|
||||
[...removeThinkingBlocksFromMessages(state.messages), prompt],
|
||||
{ signal: this.signal },
|
||||
);
|
||||
|
||||
|
|
@ -87,6 +99,9 @@ export class WebSearchAgent {
|
|||
// message: `Searching the web for: '${searchQuery}'`,
|
||||
details: {
|
||||
query: state.query,
|
||||
currentTask: currentTask,
|
||||
taskIndex: (state.currentTaskIndex || 0) + 1,
|
||||
totalTasks: state.tasks?.length || 1,
|
||||
searchQuery: searchQuery,
|
||||
documentCount: state.relevantDocuments.length,
|
||||
searchIterations: state.searchInstructionHistory.length,
|
||||
|
|
@ -107,6 +122,9 @@ export class WebSearchAgent {
|
|||
message: `Found ${searchResults.results.length} potential web sources`,
|
||||
details: {
|
||||
query: state.query,
|
||||
currentTask: currentTask,
|
||||
taskIndex: (state.currentTaskIndex || 0) + 1,
|
||||
totalTasks: state.tasks?.length || 1,
|
||||
searchQuery: searchQuery,
|
||||
sourcesFound: searchResults.results.length,
|
||||
documentCount: state.relevantDocuments.length,
|
||||
|
|
@ -150,7 +168,7 @@ export class WebSearchAgent {
|
|||
action: 'ANALYZING_PREVIEW_CONTENT',
|
||||
message: `Analyzing ${previewContents.length} search result previews to determine processing approach`,
|
||||
details: {
|
||||
query: state.query,
|
||||
query: currentTask,
|
||||
previewCount: previewContents.length,
|
||||
documentCount: state.relevantDocuments.length,
|
||||
searchIterations: state.searchInstructionHistory.length,
|
||||
|
|
@ -160,8 +178,8 @@ export class WebSearchAgent {
|
|||
|
||||
previewAnalysisResult = await analyzePreviewContent(
|
||||
previewContents,
|
||||
state.query,
|
||||
state.messages,
|
||||
currentTask,
|
||||
removeThinkingBlocksFromMessages(state.messages),
|
||||
this.llm,
|
||||
this.systemInstructions,
|
||||
this.signal,
|
||||
|
|
@ -189,7 +207,7 @@ export class WebSearchAgent {
|
|||
action: 'PROCESSING_PREVIEW_CONTENT',
|
||||
message: `Using preview content from ${previewContents.length} sources - no full content retrieval needed`,
|
||||
details: {
|
||||
query: state.query,
|
||||
query: currentTask,
|
||||
previewCount: previewContents.length,
|
||||
documentCount: state.relevantDocuments.length,
|
||||
searchIterations: state.searchInstructionHistory.length,
|
||||
|
|
@ -236,7 +254,7 @@ export class WebSearchAgent {
|
|||
action: 'PROCEEDING_WITH_FULL_ANALYSIS',
|
||||
message: `Preview content insufficient - proceeding with detailed content analysis`,
|
||||
details: {
|
||||
query: state.query,
|
||||
query: currentTask,
|
||||
insufficiencyReason: insufficiencyReason,
|
||||
documentCount: state.relevantDocuments.length,
|
||||
searchIterations: state.searchInstructionHistory.length,
|
||||
|
|
@ -246,7 +264,7 @@ export class WebSearchAgent {
|
|||
});
|
||||
|
||||
// Summarize the top 2 search results
|
||||
for (const result of searchResults.results) {
|
||||
for (const result of searchResults.results.slice(0, 8)) {
|
||||
if (this.signal.aborted) {
|
||||
console.warn('Search operation aborted by signal');
|
||||
break; // Exit if the operation is aborted
|
||||
|
|
@ -258,17 +276,17 @@ export class WebSearchAgent {
|
|||
// optimization that should be transparent to the user
|
||||
continue; // Skip banned URLs
|
||||
}
|
||||
if (attemptedUrlCount >= 5) {
|
||||
console.warn(
|
||||
'Too many attempts to summarize URLs, stopping further attempts.',
|
||||
);
|
||||
break; // Limit the number of attempts to summarize URLs
|
||||
}
|
||||
// if (attemptedUrlCount >= 5) {
|
||||
// console.warn(
|
||||
// 'Too many attempts to summarize URLs, stopping further attempts.',
|
||||
// );
|
||||
// break; // Limit the number of attempts to summarize URLs
|
||||
// }
|
||||
attemptedUrlCount++;
|
||||
|
||||
bannedSummaryUrls.push(result.url); // Add to banned URLs to avoid duplicates
|
||||
|
||||
if (documents.length >= 1) {
|
||||
if (documents.length >= 2) {
|
||||
break; // Limit to top 1 document
|
||||
}
|
||||
|
||||
|
|
@ -279,7 +297,7 @@ export class WebSearchAgent {
|
|||
action: 'ANALYZING_SOURCE',
|
||||
message: `Analyzing content from: ${result.title || result.url}`,
|
||||
details: {
|
||||
query: state.query,
|
||||
query: currentTask,
|
||||
sourceUrl: result.url,
|
||||
sourceTitle: result.title || 'Untitled',
|
||||
documentCount: state.relevantDocuments.length,
|
||||
|
|
@ -290,7 +308,7 @@ export class WebSearchAgent {
|
|||
|
||||
const summaryResult = await summarizeWebContent(
|
||||
result.url,
|
||||
state.query,
|
||||
currentTask,
|
||||
this.llm,
|
||||
this.systemInstructions,
|
||||
this.signal,
|
||||
|
|
@ -306,7 +324,7 @@ export class WebSearchAgent {
|
|||
action: 'CONTEXT_UPDATED',
|
||||
message: `Added information from ${summaryResult.document.metadata.title || result.url} to context`,
|
||||
details: {
|
||||
query: state.query,
|
||||
query: currentTask,
|
||||
sourceUrl: result.url,
|
||||
sourceTitle:
|
||||
summaryResult.document.metadata.title || 'Untitled',
|
||||
|
|
@ -360,7 +378,7 @@ export class WebSearchAgent {
|
|||
console.log(responseMessage);
|
||||
|
||||
return new Command({
|
||||
goto: 'analyzer',
|
||||
goto: 'task_manager', // Route back to task manager to check if more tasks remain
|
||||
update: {
|
||||
messages: [new AIMessage(responseMessage)],
|
||||
relevantDocuments: documents,
|
||||
|
|
|
|||
|
|
@ -2,21 +2,67 @@ export const decideNextActionPrompt = `You are an expert content analyzer.
|
|||
Your task is to analyze the provided context and determine if we have enough information to fully answer the user's query.
|
||||
|
||||
# Instructions
|
||||
- Carefully analyze the content of the context provided and determine if it contains sufficient information to answer the user's query
|
||||
- The content should completely address the query, providing detailed explanations, relevant facts, and necessary context
|
||||
- Carefully analyze the content of the context provided and the historical context of the conversation to determine if it contains sufficient information to answer the user's query
|
||||
- Use the content provided in the \`context\` tag, as well as the historical context of the conversation, to make your determination
|
||||
- If the context provides conflicting information, explain the discrepancies and what additional information is needed to resolve them
|
||||
- If the user is asking for a specific number of sources and the context does not provide enough, consider the content insufficient
|
||||
|
||||
# Response Options
|
||||
- If the content is sufficient, respond with \`good_content\`
|
||||
- If the content is not sufficient you have two options
|
||||
- Option 1 - Ask the user for more information (Respond with \`need_user_info\`)
|
||||
- Use this option when the content is not sufficient due to information that is would not typically be available online, or when the query is too vague or broad
|
||||
- For example, if the query is asking for personal opinions, preferences, user experiences, settings, objects the user owns, or specific details that are not typically found in online content
|
||||
- Option 2 - Ask the LLM to generate a more specific search query (Respond with \`need_more_info\`)
|
||||
- Only use this option when the content is not sufficient due to missing information that could typically be found online and is not related to personal opinions, preferences, user experiences, or specific objects the user owns
|
||||
- The only output in your response should be one of the following:
|
||||
# Response Options Decision Tree
|
||||
|
||||
## Step 1: Check if content is sufficient
|
||||
- If the context fully answers the user's query with complete information → respond with \`good_content\`
|
||||
- If the user is requesting to use the existing context to answer their query → respond with \`good_content\`
|
||||
- If the user is requesting to avoid web searches → respond with \`good_content\`
|
||||
- If the user is asking you to be creative, such as writing a story, poem, or creative content → respond with \`good_content\` unless the context is clearly insufficient
|
||||
|
||||
## Step 2: If content is insufficient, determine the type of missing information
|
||||
|
||||
### Use \`need_user_info\` when the missing information is:
|
||||
**Personal/Subjective Information:**
|
||||
- User's personal preferences, opinions, or experiences
|
||||
- User's specific situation, location, or circumstances
|
||||
- User's budget, timeline, or constraints
|
||||
- User's skill level, background, or expertise
|
||||
- User's goals, intentions, or desired outcomes
|
||||
- Configuration details about user's specific setup/environment
|
||||
- User's past experiences with products/services
|
||||
- User's access to specific resources or tools
|
||||
- Related to creative or subjective tasks
|
||||
|
||||
**Context-Dependent Information:**
|
||||
- "What should I do in my specific situation?"
|
||||
- "What's best for me personally?"
|
||||
- "How do I configure my specific system?"
|
||||
- "What happened in my case?"
|
||||
|
||||
**Examples requiring user info:**
|
||||
- "What laptop should I buy?" (missing: budget, use case, preferences)
|
||||
- "How do I fix my computer?" (missing: specific problem, system details)
|
||||
- "What career should I pursue?" (missing: interests, skills, goals)
|
||||
- "Which restaurant should I go to?" (missing: location, cuisine preference, budget)
|
||||
|
||||
### Use \`need_more_info\` when the missing information is:
|
||||
**Factual/Objective Information that exists online:**
|
||||
- Technical specifications or details
|
||||
- Current prices, availability, or market data
|
||||
- Recent news, updates, or developments
|
||||
- Detailed how-to instructions or procedures
|
||||
- Comparative analysis between options
|
||||
- Expert opinions or reviews from credible sources
|
||||
- Statistical data or research findings
|
||||
|
||||
**Examples requiring more web search:**
|
||||
- "What are the latest features in iPhone 15?" (missing: recent tech specs)
|
||||
- "How to install Docker on Ubuntu 22.04?" (missing: specific installation steps)
|
||||
- "Compare Tesla Model 3 vs BMW i4" (missing: detailed comparison data)
|
||||
|
||||
# Critical Decision Point
|
||||
Ask yourself: "Could this missing information reasonably be found through a web search, or does it require the user to provide personal/subjective details?"
|
||||
|
||||
- If it's personal/subjective → \`need_user_info\`
|
||||
- If it's factual and searchable → \`need_more_info\`
|
||||
- If the context is complete or the user wants to use the existing context → \`good_content\`
|
||||
|
||||
The only output in your response should be one of the following:
|
||||
- \`good_content\`
|
||||
- \`need_user_info\`
|
||||
- \`need_more_info\`
|
||||
|
|
@ -67,9 +113,6 @@ Your task is to analyze the provided context and user query to determine what ad
|
|||
|
||||
# Instructions
|
||||
- Respond with a detailed question that will be directed to an LLM to gather more specific information that can help refine the search.
|
||||
- If if the query is asking about a complex topic, break it down into a single smaller question that can be answered one at a time. This search process can be iterative
|
||||
- Break down the query into a smaller, more focused question that can be answered with a web search
|
||||
- For example, if the query is asking about specific information from multiple locations, break the query into one smaller query for a single location
|
||||
- Avoid giving the same guidance more than once, and avoid repeating the same question multiple times
|
||||
- Avoid asking for general information or vague details; focus on specific, actionable questions that can lead to concrete answers
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,14 @@
|
|||
import { webSearchResponsePrompt, webSearchRetrieverPrompt } from './webSearch';
|
||||
import { localResearchPrompt } from './localResearch';
|
||||
import { chatPrompt } from './chat';
|
||||
import { taskBreakdownPrompt } from './taskBreakdown';
|
||||
|
||||
const prompts = {
|
||||
webSearchResponsePrompt,
|
||||
webSearchRetrieverPrompt,
|
||||
localResearchPrompt,
|
||||
chatPrompt,
|
||||
taskBreakdownPrompt,
|
||||
};
|
||||
|
||||
export default prompts;
|
||||
|
|
|
|||
69
src/lib/prompts/taskBreakdown.ts
Normal file
69
src/lib/prompts/taskBreakdown.ts
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
export const taskBreakdownPrompt = `You are a task breakdown specialist. Your job is to analyze a user's question and determine if it needs to be broken down into smaller, more focused questions that can be answered independently.
|
||||
|
||||
{systemInstructions}
|
||||
|
||||
## Analysis Guidelines:
|
||||
|
||||
### When to Break Down:
|
||||
1. **Multiple distinct subjects**: Questions asking about different people, places, things, or concepts
|
||||
2. **Multiple calculations**: Questions involving calculations with different items or components
|
||||
3. **Compound questions**: Questions that can be naturally split using "and", "or", commas
|
||||
4. **Lists or enumerations**: Questions asking about items in a list or series
|
||||
|
||||
### When NOT to Break Down:
|
||||
1. **Single focused question**: Already asks about one specific thing
|
||||
2. **Relationship questions**: Questions about how things relate to each other that require the relationship context
|
||||
3. **Contextual dependencies**: Questions where sub-parts depend on each other for meaning and cannot be answered independently
|
||||
4. **Procedural questions**: Questions asking about a specific process or sequence that must be answered as a whole
|
||||
|
||||
### Sub-Question Rules:
|
||||
1. Each sub-question should be **self-contained** and answerable independently
|
||||
2. Preserve the **original context and intent** in each sub-question
|
||||
3. Maintain **specific details** like quantities, measurements, and qualifiers
|
||||
4. Use **clear, unambiguous language** in each sub-question
|
||||
5. Keep the **same question type** (factual, analytical, etc.)
|
||||
|
||||
## Examples:
|
||||
|
||||
**Input**: "What's the capital of New York, California, and France?"
|
||||
**Analysis**: Multiple distinct geographical subjects
|
||||
**Output**:
|
||||
TASK: What's the capital of New York?
|
||||
TASK: What's the capital of California?
|
||||
TASK: What's the capital of France?
|
||||
|
||||
**Input**: "How many calories are in my meal of: One chicken breast, one apple, three oreo cookies, two cups of peanut butter"
|
||||
**Analysis**: Multiple food items requiring separate calorie calculations
|
||||
**Output**:
|
||||
TASK: How many calories are in one chicken breast?
|
||||
TASK: How many calories are in one apple?
|
||||
TASK: How many calories are in one oreo cookie?
|
||||
TASK: How many calories are in one cup of peanut butter?
|
||||
|
||||
**Input**: "What is the capital of France?"
|
||||
**Analysis**: Single focused question, no breakdown needed
|
||||
**Output**:
|
||||
TASK: What is the capital of France?
|
||||
|
||||
**Input**: "Compare the economies of Japan and Germany"
|
||||
**Analysis**: Comparative question requiring detailed data about each economy separately
|
||||
**Output**:
|
||||
TASK: What is the current state of Japan's economy?
|
||||
TASK: What is the current state of Germany's economy?
|
||||
|
||||
**Input**: "What are the side effects of aspirin, ibuprofen, and acetaminophen?"
|
||||
**Analysis**: Multiple distinct medications
|
||||
**Output**:
|
||||
TASK: What are the side effects of aspirin?
|
||||
TASK: What are the side effects of ibuprofen?
|
||||
TASK: What are the side effects of acetaminophen?
|
||||
|
||||
## Your Task:
|
||||
|
||||
Analyze this user question: "{query}"
|
||||
|
||||
Provide your response in the following format:
|
||||
- Each sub-question on a new line starting with "TASK:"
|
||||
- If the question is already focused, provide it as a single task
|
||||
|
||||
Your response:`;
|
||||
|
|
@ -187,6 +187,7 @@ export const webSearchRetrieverAgentPrompt = `
|
|||
- This includes but is not limited to things like sports scores, standings, weather, current events, etc.
|
||||
- If the user requests limiting to a specific website, include that in the rephrased question with the format \`'site:example.com'\`, be sure to include the quotes. Only do this if the limiting is explicitly mentioned in the question
|
||||
- You will be given additional instructions from a supervisor in the <supervisor> tag that will direct you to refine the question further or to include specific details. Follow these instructions carefully and incorporate them into your rephrased question
|
||||
- Give priority to the user question
|
||||
|
||||
# Data
|
||||
- The user question is contained in the <question> tag after the <examples> below
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ import {
|
|||
WebSearchAgent,
|
||||
AnalyzerAgent,
|
||||
SynthesizerAgent,
|
||||
TaskManagerAgent,
|
||||
} from '../agents';
|
||||
|
||||
/**
|
||||
|
|
@ -28,6 +29,7 @@ export class AgentSearch {
|
|||
private embeddings: Embeddings;
|
||||
private checkpointer: MemorySaver;
|
||||
private signal: AbortSignal;
|
||||
private taskManagerAgent: TaskManagerAgent;
|
||||
private webSearchAgent: WebSearchAgent;
|
||||
private analyzerAgent: AnalyzerAgent;
|
||||
private synthesizerAgent: SynthesizerAgent;
|
||||
|
|
@ -48,6 +50,12 @@ export class AgentSearch {
|
|||
this.emitter = emitter;
|
||||
|
||||
// Initialize agents
|
||||
this.taskManagerAgent = new TaskManagerAgent(
|
||||
llm,
|
||||
emitter,
|
||||
systemInstructions,
|
||||
signal,
|
||||
);
|
||||
this.webSearchAgent = new WebSearchAgent(
|
||||
llm,
|
||||
emitter,
|
||||
|
|
@ -73,18 +81,25 @@ export class AgentSearch {
|
|||
*/
|
||||
private createWorkflow() {
|
||||
const workflow = new StateGraph(AgentState)
|
||||
.addNode(
|
||||
'task_manager',
|
||||
this.taskManagerAgent.execute.bind(this.taskManagerAgent),
|
||||
{
|
||||
ends: ['web_search', 'analyzer'],
|
||||
},
|
||||
)
|
||||
.addNode(
|
||||
'web_search',
|
||||
this.webSearchAgent.execute.bind(this.webSearchAgent),
|
||||
{
|
||||
ends: ['analyzer'],
|
||||
ends: ['task_manager'],
|
||||
},
|
||||
)
|
||||
.addNode(
|
||||
'analyzer',
|
||||
this.analyzerAgent.execute.bind(this.analyzerAgent),
|
||||
{
|
||||
ends: ['web_search', 'synthesizer'],
|
||||
ends: ['task_manager', 'synthesizer'],
|
||||
},
|
||||
)
|
||||
.addNode(
|
||||
|
|
@ -113,7 +128,7 @@ export class AgentSearch {
|
|||
try {
|
||||
await workflow.invoke(initialState, {
|
||||
configurable: { thread_id: `agent_search_${Date.now()}` },
|
||||
recursionLimit: 10,
|
||||
recursionLimit: 20,
|
||||
signal: this.signal,
|
||||
});
|
||||
} catch (error: BaseLangGraphError | any) {
|
||||
|
|
|
|||
|
|
@ -1,3 +1,10 @@
|
|||
import {
|
||||
BaseMessage,
|
||||
AIMessage,
|
||||
HumanMessage,
|
||||
SystemMessage
|
||||
} from '@langchain/core/messages';
|
||||
|
||||
/**
|
||||
* Removes all content within <think>...</think> blocks
|
||||
* @param text The input text containing thinking blocks
|
||||
|
|
@ -8,3 +15,32 @@ export const removeThinkingBlocks = (text: string): string => {
|
|||
// Using the 's' flag to make dot match newlines
|
||||
return text.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
||||
};
|
||||
|
||||
/**
|
||||
* Removes thinking blocks from the content of an array of BaseMessage objects
|
||||
* @param messages Array of BaseMessage objects
|
||||
* @returns New array with thinking blocks removed from each message's content
|
||||
*/
|
||||
export const removeThinkingBlocksFromMessages = (messages: BaseMessage[]): BaseMessage[] => {
|
||||
return messages.map(message => {
|
||||
// Only process string content, leave complex content as-is
|
||||
if (typeof message.content !== 'string') {
|
||||
return message;
|
||||
}
|
||||
|
||||
const cleanedContent = removeThinkingBlocks(message.content);
|
||||
|
||||
// Create new instance of the same message type with cleaned content
|
||||
if (message instanceof AIMessage) {
|
||||
return new AIMessage(cleanedContent);
|
||||
} else if (message instanceof HumanMessage) {
|
||||
return new HumanMessage(cleanedContent);
|
||||
} else if (message instanceof SystemMessage) {
|
||||
return new SystemMessage(cleanedContent);
|
||||
} else {
|
||||
// For any other message types, return the original message unchanged
|
||||
// This is a safe fallback for custom message types
|
||||
return message;
|
||||
}
|
||||
});
|
||||
};
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
import { CheerioWebBaseLoader } from '@langchain/community/document_loaders/web/cheerio';
|
||||
import { PlaywrightWebBaseLoader } from '@langchain/community/document_loaders/web/playwright';
|
||||
import { Document } from '@langchain/core/documents';
|
||||
import { Embeddings } from '@langchain/core/embeddings';
|
||||
import { Readability } from '@mozilla/readability';
|
||||
import axios from 'axios';
|
||||
import { JSDOM } from 'jsdom';
|
||||
|
|
@ -8,6 +9,7 @@ import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
|||
import fetch from 'node-fetch';
|
||||
import pdfParse from 'pdf-parse';
|
||||
import type { Browser, Page } from 'playwright';
|
||||
import computeSimilarity from './computeSimilarity';
|
||||
|
||||
export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
|
||||
const splitter = new RecursiveCharacterTextSplitter();
|
||||
|
|
@ -275,3 +277,145 @@ export const getWebContentLite = async (
|
|||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Fetches web content from a given URL using LangChain's PlaywrightWebBaseLoader.
|
||||
* Parses it using Readability for better content extraction.
|
||||
* Returns a Document object containing relevant snippets of text using ranked text splitting.
|
||||
* Text is split into chunks of approximately 800 characters, with 100 characters overlap.
|
||||
*
|
||||
* @param url - The URL to fetch content from.
|
||||
* @param rankAgainstVector - The vector to rank the content against for relevance.
|
||||
* @param embeddings - The embeddings model to use for ranking the content.
|
||||
* @returns A Promise that resolves to a Document object or null if parsing fails.
|
||||
*/
|
||||
export const getRankedWebContentSnippets = async (
|
||||
url: string,
|
||||
rankAgainstVector: number[],
|
||||
embeddings: Embeddings,
|
||||
): Promise<Document | null> => {
|
||||
try {
|
||||
console.log(`Fetching ranked content snippets from URL: ${url}`);
|
||||
|
||||
const loader = new PlaywrightWebBaseLoader(url, {
|
||||
launchOptions: {
|
||||
headless: true,
|
||||
timeout: 30000,
|
||||
},
|
||||
gotoOptions: {
|
||||
waitUntil: 'domcontentloaded',
|
||||
timeout: 10000,
|
||||
},
|
||||
async evaluate(page: Page, browser: Browser) {
|
||||
// Wait for the content to load properly
|
||||
await page.waitForLoadState('networkidle', { timeout: 10000 });
|
||||
|
||||
// Allow some time for dynamic content to load
|
||||
await page.waitForTimeout(3000);
|
||||
|
||||
return await page.content();
|
||||
},
|
||||
});
|
||||
|
||||
const docs = await loader.load();
|
||||
|
||||
if (!docs || docs.length === 0) {
|
||||
console.warn(`Failed to load content for URL: ${url}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const doc = docs[0];
|
||||
|
||||
const dom = new JSDOM(doc.pageContent, { url });
|
||||
const reader = new Readability(dom.window.document, {
|
||||
charThreshold: 25,
|
||||
});
|
||||
const article = reader.parse();
|
||||
|
||||
// Split text into chunks with specified parameters
|
||||
const splitter = RecursiveCharacterTextSplitter.fromLanguage('html', {
|
||||
chunkSize: 800,
|
||||
chunkOverlap: 100,
|
||||
});
|
||||
|
||||
const textChunks = await splitter.splitText(article?.content || '');
|
||||
if (!textChunks || textChunks.length === 0) {
|
||||
console.warn(`No text chunks found for URL: ${url}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const similarity = await Promise.all(
|
||||
textChunks.map(async (chunk, i) => {
|
||||
const sim = computeSimilarity(
|
||||
rankAgainstVector,
|
||||
(await embeddings.embedDocuments([chunk]))[0],
|
||||
);
|
||||
return {
|
||||
index: i,
|
||||
similarity: sim,
|
||||
};
|
||||
}),
|
||||
);
|
||||
|
||||
let rankedChunks = similarity
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.map((sim) => textChunks[sim.index])
|
||||
.slice(0, 5);
|
||||
|
||||
// Combine chunks into a single document with the most relevant content
|
||||
const combinedContent = rankedChunks.join('\n\n');
|
||||
|
||||
const returnDoc = new Document({
|
||||
pageContent: combinedContent,
|
||||
metadata: {
|
||||
title: article?.title || doc.metadata.title || '',
|
||||
url: url,
|
||||
chunks: rankedChunks.length,
|
||||
},
|
||||
});
|
||||
|
||||
console.log(
|
||||
`Got ranked content snippets, URL: ${url}, Chunks: ${rankedChunks.length}, Total Length: ${returnDoc.pageContent.length}`,
|
||||
);
|
||||
|
||||
return returnDoc;
|
||||
} catch (error) {
|
||||
console.error(`Error fetching/parsing URL ${url}:`, error);
|
||||
|
||||
// Fallback to CheerioWebBaseLoader for simpler content extraction
|
||||
// try {
|
||||
// console.log(`Fallback to Cheerio for URL: ${url}`);
|
||||
// const cheerioLoader = new CheerioWebBaseLoader(url);
|
||||
// const docs = await cheerioLoader.load();
|
||||
|
||||
// if (docs && docs.length > 0) {
|
||||
// const doc = docs[0];
|
||||
|
||||
// // Apply the same splitting logic to fallback content
|
||||
// const splitter = new RecursiveCharacterTextSplitter({
|
||||
// chunkSize: 800,
|
||||
// chunkOverlap: 100,
|
||||
// });
|
||||
|
||||
// const textChunks = await splitter.splitText(doc.pageContent);
|
||||
// const combinedContent = textChunks.join('\n\n');
|
||||
|
||||
// return new Document({
|
||||
// pageContent: combinedContent,
|
||||
// metadata: {
|
||||
// title: doc.metadata.title || '',
|
||||
// url: url,
|
||||
// chunks: textChunks.length,
|
||||
// },
|
||||
// });
|
||||
// }
|
||||
// } catch (fallbackError) {
|
||||
// console.error(
|
||||
// `Cheerio fallback also failed for URL ${url}:`,
|
||||
// fallbackError,
|
||||
// );
|
||||
// }
|
||||
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue