feat(agent): More agent tweaks

This commit is contained in:
Willie Zutz 2025-06-22 13:35:01 -06:00
parent c3e845e0e2
commit a8eaadc6ed
11 changed files with 169 additions and 26 deletions

View file

@ -66,11 +66,41 @@ const handleEmitterEvents = async (
chatId: string, chatId: string,
startTime: number, startTime: number,
userMessageId: string, userMessageId: string,
abortController: AbortController,
) => { ) => {
let recievedMessage = ''; let recievedMessage = '';
let sources: any[] = []; let sources: any[] = [];
let searchQuery: string | undefined; let searchQuery: string | undefined;
let searchUrl: string | undefined; let searchUrl: string | undefined;
let isStreamActive = true;
// Keep-alive ping mechanism to prevent reverse proxy timeouts
const pingInterval = setInterval(() => {
if (isStreamActive) {
try {
writer.write(
encoder.encode(
JSON.stringify({
type: 'ping',
timestamp: Date.now(),
}) + '\n',
),
);
} catch (error) {
// If writing fails, the connection is likely closed
clearInterval(pingInterval);
isStreamActive = false;
}
} else {
clearInterval(pingInterval);
}
}, 30000); // Send ping every 30 seconds
// Clean up ping interval if request is cancelled
abortController.signal.addEventListener('abort', () => {
isStreamActive = false;
clearInterval(pingInterval);
});
stream.on('data', (data) => { stream.on('data', (data) => {
const parsedData = JSON.parse(data); const parsedData = JSON.parse(data);
@ -149,6 +179,9 @@ const handleEmitterEvents = async (
}); });
stream.on('end', () => { stream.on('end', () => {
isStreamActive = false;
clearInterval(pingInterval);
const endTime = Date.now(); const endTime = Date.now();
const duration = endTime - startTime; const duration = endTime - startTime;
@ -190,6 +223,9 @@ const handleEmitterEvents = async (
.execute(); .execute();
}); });
stream.on('error', (data) => { stream.on('error', (data) => {
isStreamActive = false;
clearInterval(pingInterval);
const parsedData = JSON.parse(data); const parsedData = JSON.parse(data);
writer.write( writer.write(
encoder.encode( encoder.encode(
@ -413,6 +449,7 @@ export const POST = async (req: Request) => {
message.chatId, message.chatId,
startTime, startTime,
message.messageId, message.messageId,
abortController,
); );
handleHistorySave(message, humanMessageId, body.focusMode, body.files); handleHistorySave(message, humanMessageId, body.focusMode, body.files);

View file

@ -192,6 +192,7 @@ export const POST = async (req: Request) => {
const stream = new ReadableStream({ const stream = new ReadableStream({
start(controller) { start(controller) {
let sources: any[] = []; let sources: any[] = [];
let isStreamActive = true;
controller.enqueue( controller.enqueue(
encoder.encode( encoder.encode(
@ -202,7 +203,31 @@ export const POST = async (req: Request) => {
), ),
); );
// Keep-alive ping mechanism to prevent reverse proxy timeouts
const pingInterval = setInterval(() => {
if (isStreamActive && !signal.aborted) {
try {
controller.enqueue(
encoder.encode(
JSON.stringify({
type: 'ping',
timestamp: Date.now(),
}) + '\n',
),
);
} catch (error) {
// If enqueueing fails, the connection is likely closed
clearInterval(pingInterval);
isStreamActive = false;
}
} else {
clearInterval(pingInterval);
}
}, 30000); // Send ping every 30 seconds
signal.addEventListener('abort', () => { signal.addEventListener('abort', () => {
isStreamActive = false;
clearInterval(pingInterval);
emitter.removeAllListeners(); emitter.removeAllListeners();
try { try {
@ -244,6 +269,9 @@ export const POST = async (req: Request) => {
emitter.on('end', () => { emitter.on('end', () => {
if (signal.aborted) return; if (signal.aborted) return;
isStreamActive = false;
clearInterval(pingInterval);
controller.enqueue( controller.enqueue(
encoder.encode( encoder.encode(
JSON.stringify({ JSON.stringify({
@ -257,6 +285,9 @@ export const POST = async (req: Request) => {
emitter.on('error', (error: any) => { emitter.on('error', (error: any) => {
if (signal.aborted) return; if (signal.aborted) return;
isStreamActive = false;
clearInterval(pingInterval);
controller.error(error); controller.error(error);
}); });
}, },

View file

@ -431,6 +431,14 @@ const ChatWindow = ({ id }: { id?: string }) => {
return; return;
} }
// Handle ping messages to keep connection alive (no action needed)
if (data.type === 'ping') {
console.debug('Ping received');
// Ping messages are used to keep the connection alive during long requests
// No action is required on the frontend
return;
}
if (data.type === 'agent_action') { if (data.type === 'agent_action') {
const agentActionEvent: AgentActionEvent = { const agentActionEvent: AgentActionEvent = {
action: data.data.action, action: data.data.action,

View file

@ -101,10 +101,27 @@ export class AnalyzerAgent {
); );
console.log('Next action response:', nextActionContent); console.log('Next action response:', nextActionContent);
//}
if (!nextActionContent.startsWith('good_content')) { if (
if (nextActionContent.startsWith('need_user_info')) { !nextActionContent.startsWith('good_content') &&
!nextActionContent.startsWith('`good_content`')
) {
// If we don't have enough information, but we still have available tasks, proceed with the next task
if (state.tasks && state.tasks.length > 0) {
const hasMoreTasks = state.currentTaskIndex < state.tasks.length - 1;
if (hasMoreTasks) {
return new Command({
goto: 'task_manager',
});
}
}
if (
nextActionContent.startsWith('need_user_info') ||
nextActionContent.startsWith('`need_user_info`')
) {
const moreUserInfoPrompt = await ChatPromptTemplate.fromTemplate( const moreUserInfoPrompt = await ChatPromptTemplate.fromTemplate(
additionalUserInputPrompt, additionalUserInputPrompt,
).format({ ).format({

View file

@ -52,6 +52,8 @@ Your task is to provide answers that are:
### Citation Requirements ### Citation Requirements
- Cite every single fact, statement, or sentence using [number] notation corresponding to the source from the provided \`context\` - Cite every single fact, statement, or sentence using [number] notation corresponding to the source from the provided \`context\`
- If a statement is based on AI model inference or training data, it must be marked as \`[AI]\` and not cited from the context
- If a statement is based on previous messages in the conversation history, it must be marked as \`[Hist]\` and not cited from the context
- Integrate citations naturally at the end of sentences or clauses as appropriate. For example, "The Eiffel Tower is one of the most visited landmarks in the world[1]." - Integrate citations naturally at the end of sentences or clauses as appropriate. For example, "The Eiffel Tower is one of the most visited landmarks in the world[1]."
- Ensure that **every sentence in your response includes at least one citation**, even when information is inferred or connected to general knowledge available in the provided context - Ensure that **every sentence in your response includes at least one citation**, even when information is inferred or connected to general knowledge available in the provided context
- Use multiple sources for a single detail if applicable, such as, "Paris is a cultural hub, attracting millions of visitors annually[1][2]." - Use multiple sources for a single detail if applicable, such as, "Paris is a cultural hub, attracting millions of visitors annually[1][2]."
@ -68,20 +70,29 @@ Your task is to provide answers that are:
${this.personaInstructions} ${this.personaInstructions}
</personaInstructions> </personaInstructions>
User Query: ${state.originalQuery || state.query} # Conversation History Context:
${
removeThinkingBlocksFromMessages(state.messages)
.map((msg) => `<${msg.getType()}>${msg.content}</${msg.getType()}>`)
.join('\n') || 'No previous conversation context'
}
Available Information: # Available Information:
${state.relevantDocuments ${state.relevantDocuments
.map( .map(
(doc, index) => (doc, index) =>
`<${index + 1}>\n `<${index + 1}>\n
<title>${doc.metadata.title}</title>\n <title>${doc.metadata.title}</title>\n
${doc.metadata?.url.toLowerCase().includes('file') ? '' : '\n<url>' + doc.metadata.url + '</url>\n'} ${doc.metadata?.url.toLowerCase().includes('file') ? '' : '\n<url>' + doc.metadata.url + '</url>\n'}
<content>\n${doc.pageContent}\n</content>\n <content>\n${doc.pageContent}\n</content>\n
</${index + 1}>`, </${index + 1}>`,
) )
.join('\n')} .join('\n')}
`;
# User Query: ${state.originalQuery || state.query}
Answer the user query:
`;
// Stream the response in real-time using LLM streaming capabilities // Stream the response in real-time using LLM streaming capabilities
let fullResponse = ''; let fullResponse = '';
@ -99,7 +110,6 @@ ${doc.metadata?.url.toLowerCase().includes('file') ? '' : '\n<url>' + doc.metada
const stream = await this.llm.stream( const stream = await this.llm.stream(
[ [
...removeThinkingBlocksFromMessages(state.messages),
new SystemMessage(synthesisPrompt), new SystemMessage(synthesisPrompt),
new HumanMessage(state.originalQuery || state.query), new HumanMessage(state.originalQuery || state.query),
], ],

View file

@ -121,6 +121,8 @@ export class TaskManagerAgent {
// Parse the response to extract tasks // Parse the response to extract tasks
const responseContent = taskBreakdownResult.content as string; const responseContent = taskBreakdownResult.content as string;
console.log('Task breakdown response:', responseContent);
const taskLines = responseContent const taskLines = responseContent
.split('\n') .split('\n')
.filter((line) => line.trim().startsWith('TASK:')) .filter((line) => line.trim().startsWith('TASK:'))

View file

@ -17,23 +17,27 @@ import { AgentState } from './agentState';
import { setTemperature } from '../utils/modelUtils'; import { setTemperature } from '../utils/modelUtils';
import { Embeddings } from '@langchain/core/embeddings'; import { Embeddings } from '@langchain/core/embeddings';
import { removeThinkingBlocksFromMessages } from '../utils/contentUtils'; import { removeThinkingBlocksFromMessages } from '../utils/contentUtils';
import computeSimilarity from '../utils/computeSimilarity';
export class WebSearchAgent { export class WebSearchAgent {
private llm: BaseChatModel; private llm: BaseChatModel;
private emitter: EventEmitter; private emitter: EventEmitter;
private systemInstructions: string; private systemInstructions: string;
private signal: AbortSignal; private signal: AbortSignal;
private embeddings: Embeddings;
constructor( constructor(
llm: BaseChatModel, llm: BaseChatModel,
emitter: EventEmitter, emitter: EventEmitter,
systemInstructions: string, systemInstructions: string,
signal: AbortSignal, signal: AbortSignal,
embeddings: Embeddings,
) { ) {
this.llm = llm; this.llm = llm;
this.emitter = emitter; this.emitter = emitter;
this.systemInstructions = systemInstructions; this.systemInstructions = systemInstructions;
this.signal = signal; this.signal = signal;
this.embeddings = embeddings;
} }
/** /**
@ -138,16 +142,33 @@ export class WebSearchAgent {
let bannedSummaryUrls = state.bannedSummaryUrls || []; let bannedSummaryUrls = state.bannedSummaryUrls || [];
let bannedPreviewUrls = state.bannedPreviewUrls || []; let bannedPreviewUrls = state.bannedPreviewUrls || [];
const queryVector = await this.embeddings.embedQuery(
state.originalQuery + ' ' + currentTask,
);
// Extract preview content from top 8 search results for analysis // Filter out banned URLs first
const previewContents: PreviewContent[] = searchResults.results const filteredResults = searchResults.results.filter(
.filter( (result) =>
(result) => !bannedSummaryUrls.includes(result.url) &&
!bannedSummaryUrls.includes(result.url) && !bannedPreviewUrls.includes(result.url),
!bannedPreviewUrls.includes(result.url), );
) // Filter out banned URLs first
.slice(0, 8) // Then take top 8 results // Calculate similarities for all filtered results
.map((result) => ({ const resultsWithSimilarity = await Promise.all(
filteredResults.map(async (result) => {
const vector = await this.embeddings.embedQuery(
result.title + ' ' + result.content || '',
);
const similarity = computeSimilarity(vector, queryVector);
return { result, similarity };
}),
);
// Sort by relevance score and take top 8 results
const previewContents: PreviewContent[] = resultsWithSimilarity
.sort((a, b) => b.similarity - a.similarity)
.slice(0, 8)
.map(({ result }) => ({
title: result.title || 'Untitled', title: result.title || 'Untitled',
snippet: result.content || '', snippet: result.content || '',
url: result.url, url: result.url,
@ -181,6 +202,7 @@ export class WebSearchAgent {
previewAnalysisResult = await analyzePreviewContent( previewAnalysisResult = await analyzePreviewContent(
previewContents, previewContents,
state.query,
currentTask, currentTask,
removeThinkingBlocksFromMessages(state.messages), removeThinkingBlocksFromMessages(state.messages),
this.llm, this.llm,
@ -267,7 +289,9 @@ export class WebSearchAgent {
}); });
// Summarize the top 2 search results // Summarize the top 2 search results
for (const result of searchResults.results.slice(0, 8)) { for (const result of resultsWithSimilarity
.slice(0, 8)
.map((r) => r.result)) {
if (this.signal.aborted) { if (this.signal.aborted) {
console.warn('Search operation aborted by signal'); console.warn('Search operation aborted by signal');
break; // Exit if the operation is aborted break; // Exit if the operation is aborted
@ -381,7 +405,7 @@ export class WebSearchAgent {
console.log(responseMessage); console.log(responseMessage);
return new Command({ return new Command({
goto: 'task_manager', // Route back to task manager to check if more tasks remain goto: 'analyzer', // Route back to analyzer to process the results
update: { update: {
messages: [new AIMessage(responseMessage)], messages: [new AIMessage(responseMessage)],
relevantDocuments: documents, relevantDocuments: documents,

View file

@ -9,6 +9,7 @@ Your task is to analyze the provided context and determine if we have enough inf
# Response Options Decision Tree # Response Options Decision Tree
## Step 1: Check if content is sufficient ## Step 1: Check if content is sufficient
- If your training data and the provided context contain enough information to answer the user's query respond with \`good_content\`
- If the context fully answers the user's query with complete information respond with \`good_content\` - If the context fully answers the user's query with complete information respond with \`good_content\`
- If the user is requesting to use the existing context to answer their query respond with \`good_content\` - If the user is requesting to use the existing context to answer their query respond with \`good_content\`
- If the user is requesting to avoid web searches respond with \`good_content\` - If the user is requesting to avoid web searches respond with \`good_content\`

View file

@ -22,6 +22,8 @@ export const taskBreakdownPrompt = `You are a task breakdown specialist. Your jo
3. Maintain **specific details** like quantities, measurements, and qualifiers 3. Maintain **specific details** like quantities, measurements, and qualifiers
4. Use **clear, unambiguous language** in each sub-question 4. Use **clear, unambiguous language** in each sub-question
5. Keep the **same question type** (factual, analytical, etc.) 5. Keep the **same question type** (factual, analytical, etc.)
6. Avoid introducing **new concepts** or information not present in the original question
7. **Do not** repeat the same question multiple times; each sub-question should be unique and focused on a specific aspect of the original query
## Examples: ## Examples:
@ -58,6 +60,11 @@ TASK: What are the side effects of aspirin?
TASK: What are the side effects of ibuprofen? TASK: What are the side effects of ibuprofen?
TASK: What are the side effects of acetaminophen? TASK: What are the side effects of acetaminophen?
**Input**: "What day is New Year's Day this year?"
**Analysis**: Single focused question, no breakdown needed
**Output**:
TASK: What day is New Year's Day this year?
## Your Task: ## Your Task:
Analyze this user question: "{query}" Analyze this user question: "{query}"

View file

@ -61,6 +61,7 @@ export class AgentSearch {
emitter, emitter,
systemInstructions, systemInstructions,
signal, signal,
embeddings,
); );
this.analyzerAgent = new AnalyzerAgent( this.analyzerAgent = new AnalyzerAgent(
llm, llm,

View file

@ -19,6 +19,7 @@ export type PreviewContent = {
export const analyzePreviewContent = async ( export const analyzePreviewContent = async (
previewContents: PreviewContent[], previewContents: PreviewContent[],
query: string, query: string,
taskQuery: string,
chatHistory: BaseMessage[], chatHistory: BaseMessage[],
llm: BaseChatModel, llm: BaseChatModel,
systemInstructions: string, systemInstructions: string,
@ -60,14 +61,15 @@ Snippet: ${content.snippet}
console.log(`Invoking LLM for preview content analysis`); console.log(`Invoking LLM for preview content analysis`);
const analysisResponse = await llm.invoke( const analysisResponse = await llm.invoke(
`${systemPrompt}You are a preview content analyzer, tasked with determining if search result snippets contain sufficient information to answer a user's query. `${systemPrompt}You are a preview content analyzer, tasked with determining if search result snippets contain sufficient information to answer the Task Query.
# Instructions # Instructions
- Analyze the provided search result previews (titles + snippets), and chat history context to determine if they collectively contain enough information to provide a complete and accurate answer to the user's query - Analyze the provided search result previews (titles + snippets), and chat history context to determine if they collectively contain enough information to provide a complete and accurate answer to the Task Query
- You must make a binary decision: either the preview content is sufficient OR it is not sufficient - You must make a binary decision: either the preview content is sufficient OR it is not sufficient
- If the preview content can provide a complete answer to the query, respond with "sufficient" - If the preview content can provide a complete answer to the Task Query, respond with "sufficient"
- If the preview content lacks important details, requires deeper analysis, or cannot fully answer the query, respond with "not_needed: [specific reason why full content analysis is required]" - If the preview content lacks important details, requires deeper analysis, or cannot fully answer the Task Query, respond with "not_needed: [specific reason why full content analysis is required]"
- Be specific in your reasoning when the content is not sufficient - Be specific in your reasoning when the content is not sufficient
- The original query is provided for additional context, only use it for clarification of overall expectations and intent. You do **not** need to answer the original query directly or completely
- Output your decision inside a \`decision\` XML tag - Output your decision inside a \`decision\` XML tag
# Information Context: # Information Context:
@ -79,6 +81,9 @@ ${formattedChatHistory ? formattedChatHistory : 'No previous conversation contex
# User Query: # User Query:
${query} ${query}
# Task Query (what to answer):
${taskQuery}
# Search Result Previews to Analyze: # Search Result Previews to Analyze:
${formattedPreviewContent} ${formattedPreviewContent}
`, `,