feat(agent): Implement structured output using Zod schemas for Analyzer, Task Manager, and Web Search agents

This commit is contained in:
Willie Zutz 2025-06-22 23:59:29 -06:00
parent a8eaadc6ed
commit b9d4a4e779
7 changed files with 205 additions and 173 deletions

View file

@ -7,6 +7,7 @@ import {
import { ChatPromptTemplate } from '@langchain/core/prompts';
import { Command, END } from '@langchain/langgraph';
import { EventEmitter } from 'events';
import { z } from 'zod';
import LineOutputParser from '../outputParsers/lineOutputParser';
import { formatDateForLLM } from '../utils';
import { AgentState } from './agentState';
@ -21,6 +22,22 @@ import {
removeThinkingBlocksFromMessages,
} from '../utils/contentUtils';
// Define Zod schemas for structured output
const NextActionSchema = z.object({
action: z.enum(['good_content', 'need_user_info', 'need_more_info']).describe('The next action to take based on content analysis'),
reasoning: z.string().describe('Brief explanation of why this action was chosen')
});
const UserInfoRequestSchema = z.object({
question: z.string().describe('A detailed question to ask the user for additional information'),
reasoning: z.string().describe('Explanation of why this information is needed')
});
const SearchRefinementSchema = z.object({
question: z.string().describe('A refined search question to gather more specific information'),
reasoning: z.string().describe('Explanation of what information is missing and why this search will help')
});
export class AnalyzerAgent {
private llm: BaseChatModel;
private emitter: EventEmitter;
@ -48,7 +65,6 @@ export class AnalyzerAgent {
state.originalQuery = state.query;
}
let nextActionContent = 'need_more_info';
// Skip full analysis if this is the first run.
//if (state.fullAnalysisAttempts > 0) {
// Emit initial analysis event
@ -91,20 +107,20 @@ export class AnalyzerAgent {
state.messages,
);
const nextActionResponse = await this.llm.invoke(
// Use structured output for next action decision
const structuredLlm = this.llm.withStructuredOutput(NextActionSchema, {
name: 'analyze_content',
});
const nextActionResponse = await structuredLlm.invoke(
[...thinkingBlocksRemovedMessages, new HumanMessage(nextActionPrompt)],
{ signal: this.signal },
);
nextActionContent = removeThinkingBlocks(
nextActionResponse.content as string,
);
console.log('Next action response:', nextActionContent);
console.log('Next action response:', nextActionResponse);
if (
!nextActionContent.startsWith('good_content') &&
!nextActionContent.startsWith('`good_content`')
nextActionResponse.action !== 'good_content'
) {
// If we don't have enough information, but we still have available tasks, proceed with the next task
@ -119,9 +135,13 @@ export class AnalyzerAgent {
}
if (
nextActionContent.startsWith('need_user_info') ||
nextActionContent.startsWith('`need_user_info`')
nextActionResponse.action === 'need_user_info'
) {
// Use structured output for user info request
const userInfoLlm = this.llm.withStructuredOutput(UserInfoRequestSchema, {
name: 'request_user_info',
});
const moreUserInfoPrompt = await ChatPromptTemplate.fromTemplate(
additionalUserInputPrompt,
).format({
@ -139,39 +159,27 @@ export class AnalyzerAgent {
query: state.originalQuery || state.query, // Use original query for user info context
});
const stream = await this.llm.stream(
const userInfoRequest = await userInfoLlm.invoke(
[
...removeThinkingBlocksFromMessages(state.messages),
new SystemMessage(moreUserInfoPrompt),
new HumanMessage(moreUserInfoPrompt),
],
{ signal: this.signal },
);
let fullResponse = '';
for await (const chunk of stream) {
if (this.signal.aborted) {
break;
}
const content = chunk.content;
if (typeof content === 'string' && content.length > 0) {
fullResponse += content;
// Emit each chunk as a data response in real-time
this.emitter.emit(
'data',
JSON.stringify({
type: 'response',
data: content,
}),
);
}
}
// Emit the complete question to the user
this.emitter.emit(
'data',
JSON.stringify({
type: 'response',
data: userInfoRequest.question,
}),
);
this.emitter.emit('end');
// Create the final response message with the complete content
const response = new SystemMessage(fullResponse);
const response = new SystemMessage(userInfoRequest.question);
return new Command({
goto: END,
@ -182,6 +190,11 @@ export class AnalyzerAgent {
}
// If we need more information from the LLM, generate a more specific search query
// Use structured output for search refinement
const searchRefinementLlm = this.llm.withStructuredOutput(SearchRefinementSchema, {
name: 'refine_search',
});
const moreInfoPrompt = await ChatPromptTemplate.fromTemplate(
additionalWebSearchPrompt,
).format({
@ -199,7 +212,7 @@ export class AnalyzerAgent {
query: state.originalQuery || state.query, // Use original query for more info context
});
const moreInfoResponse = await this.llm.invoke(
const searchRefinement = await searchRefinementLlm.invoke(
[
...removeThinkingBlocksFromMessages(state.messages),
new HumanMessage(moreInfoPrompt),
@ -207,10 +220,6 @@ export class AnalyzerAgent {
{ signal: this.signal },
);
const moreInfoQuestion = removeThinkingBlocks(
moreInfoResponse.content as string,
);
// Emit reanalyzing event when we need more information
this.emitter.emit('agent_action', {
type: 'agent_action',
@ -219,11 +228,11 @@ export class AnalyzerAgent {
message:
'Current context is insufficient - analyzing search requirements',
details: {
nextSearchQuery: moreInfoQuestion,
nextSearchQuery: searchRefinement.question,
documentCount: state.relevantDocuments.length,
searchIterations: state.searchInstructionHistory.length,
query: state.originalQuery || state.query, // Show original query in details
currentSearchFocus: moreInfoQuestion,
currentSearchFocus: searchRefinement.question,
},
},
});
@ -233,14 +242,14 @@ export class AnalyzerAgent {
update: {
messages: [
new AIMessage(
`The following question can help refine the search: ${moreInfoQuestion}`,
`The following question can help refine the search: ${searchRefinement.question}`,
),
],
query: moreInfoQuestion, // Use the refined question for TaskManager to analyze
searchInstructions: moreInfoQuestion,
query: searchRefinement.question, // Use the refined question for TaskManager to analyze
searchInstructions: searchRefinement.question,
searchInstructionHistory: [
...(state.searchInstructionHistory || []),
moreInfoQuestion,
searchRefinement.question,
],
fullAnalysisAttempts: 1,
originalQuery: state.originalQuery || state.query, // Preserve the original user query

View file

@ -3,10 +3,19 @@ import { AIMessage } from '@langchain/core/messages';
import { PromptTemplate } from '@langchain/core/prompts';
import { Command } from '@langchain/langgraph';
import { EventEmitter } from 'events';
import { z } from 'zod';
import { taskBreakdownPrompt } from '../prompts/taskBreakdown';
import { AgentState } from './agentState';
import { setTemperature } from '../utils/modelUtils';
// Define Zod schema for structured task breakdown output
const TaskBreakdownSchema = z.object({
tasks: z.array(z.string()).describe('Array of specific, focused tasks broken down from the original query'),
reasoning: z.string().describe('Explanation of how and why the query was broken down into these tasks')
});
type TaskBreakdown = z.infer<typeof TaskBreakdownSchema>;
export class TaskManagerAgent {
private llm: BaseChatModel;
private emitter: EventEmitter;
@ -115,19 +124,19 @@ export class TaskManagerAgent {
query: state.query,
});
const taskBreakdownResult = await this.llm.invoke([prompt], {
// Use structured output for task breakdown
const structuredLlm = this.llm.withStructuredOutput(TaskBreakdownSchema, {
name: 'break_down_tasks',
});
const taskBreakdownResult = await structuredLlm.invoke([prompt], {
signal: this.signal,
});
// Parse the response to extract tasks
const responseContent = taskBreakdownResult.content as string;
console.log('Task breakdown response:', taskBreakdownResult);
console.log('Task breakdown response:', responseContent);
const taskLines = responseContent
.split('\n')
.filter((line) => line.trim().startsWith('TASK:'))
.map((line) => line.replace('TASK:', '').trim())
.filter((task) => task.length > 0);
// Extract tasks from structured response
const taskLines = taskBreakdownResult.tasks.filter((task) => task.trim().length > 0);
if (taskLines.length === 0) {
// Fallback: if no tasks found, use the original query
@ -137,6 +146,7 @@ export class TaskManagerAgent {
console.log(
`Task breakdown completed: ${taskLines.length} tasks identified`,
);
console.log('Reasoning:', taskBreakdownResult.reasoning);
taskLines.forEach((task, index) => {
console.log(`Task ${index + 1}: ${task}`);
});
@ -151,6 +161,7 @@ export class TaskManagerAgent {
query: state.query,
taskCount: taskLines.length,
tasks: taskLines,
reasoning: taskBreakdownResult.reasoning,
},
},
});

View file

@ -4,6 +4,7 @@ import { PromptTemplate } from '@langchain/core/prompts';
import { Command, END } from '@langchain/langgraph';
import { EventEmitter } from 'events';
import { Document } from 'langchain/document';
import { z } from 'zod';
import LineOutputParser from '../outputParsers/lineOutputParser';
import { webSearchRetrieverAgentPrompt } from '../prompts/webSearch';
import { searchSearxng } from '../searxng';
@ -19,6 +20,14 @@ import { Embeddings } from '@langchain/core/embeddings';
import { removeThinkingBlocksFromMessages } from '../utils/contentUtils';
import computeSimilarity from '../utils/computeSimilarity';
// Define Zod schema for structured search query output
const SearchQuerySchema = z.object({
searchQuery: z.string().describe('The optimized search query to use for web search'),
reasoning: z.string().describe('Explanation of how the search query was optimized for better results')
});
type SearchQuery = z.infer<typeof SearchQuerySchema>;
export class WebSearchAgent {
private llm: BaseChatModel;
private emitter: EventEmitter;
@ -85,18 +94,20 @@ export class WebSearchAgent {
supervisor: state.searchInstructions,
});
const searchQueryResult = await this.llm.invoke(
// Use structured output for search query generation
const structuredLlm = this.llm.withStructuredOutput(SearchQuerySchema, {
name: 'generate_search_query',
});
const searchQueryResult = await structuredLlm.invoke(
[...removeThinkingBlocksFromMessages(state.messages), prompt],
{ signal: this.signal },
);
// Parse the response to extract the search query with the lineoutputparser
const lineOutputParser = new LineOutputParser({ key: 'answer' });
const searchQuery = await lineOutputParser.parse(
searchQueryResult.content as string,
);
const searchQuery = searchQueryResult.searchQuery;
console.log(`Performing web search for query: "${searchQuery}"`);
console.log('Search query reasoning:', searchQueryResult.reasoning);
// Emit executing web search event
this.emitter.emit('agent_action', {

View file

@ -63,11 +63,6 @@ Ask yourself: "Could this missing information reasonably be found through a web
- If it's factual and searchable \`need_more_info\`
- If the context is complete or the user wants to use the existing context \`good_content\`
The only output in your response should be one of the following:
- \`good_content\`
- \`need_user_info\`
- \`need_more_info\`
# System Instructions
{systemInstructions}
@ -81,7 +76,11 @@ Today's date is {date}
<context>
{context}
</context>
`;
# Search Instruction History
{searchInstructionHistory}
Provide your response as a JSON object with "action" and "reasoning" fields where action is one of: good_content, need_user_info, or need_more_info.`;
export const additionalUserInputPrompt = `You are an expert content analyzer.
Your task is to analyze the provided context and user query to determine what additional information is needed to fully answer the user's query.
@ -105,7 +104,7 @@ Today's date is {date}
</context>
# Instructions
Respond with a detailed question that will be directed to the user to gather more specific information that can help refine the search.
Respond with a JSON object containing "question" and "reasoning" fields. The question should be detailed and directed to the user to gather more specific information that can help refine the search. The reasoning should explain why this information is needed.
`;
export const additionalWebSearchPrompt = `
@ -135,5 +134,5 @@ Today's date is {date}
{context}
</context>
Respond with a detailed question that will be directed to an LLM to gather more specific information that can help refine the search.
Respond with a JSON object containing "question" and "reasoning" fields. The question should be detailed and directed to an LLM to gather more specific information that can help refine the search. The reasoning should explain what information is missing and why this search will help.
`;

View file

@ -30,47 +30,49 @@ export const taskBreakdownPrompt = `You are a task breakdown specialist. Your jo
**Input**: "What's the capital of New York, California, and France?"
**Analysis**: Multiple distinct geographical subjects
**Output**:
TASK: What's the capital of New York?
TASK: What's the capital of California?
TASK: What's the capital of France?
{{
"tasks": [
"What's the capital of New York?",
"What's the capital of California?",
"What's the capital of France?"
],
"reasoning": "The question asks about capitals of three distinct geographical entities that can each be answered independently."
}}
**Input**: "How many calories are in my meal of: One chicken breast, one apple, three oreo cookies, two cups of peanut butter"
**Analysis**: Multiple food items requiring separate calorie calculations
**Output**:
TASK: How many calories are in one chicken breast?
TASK: How many calories are in one apple?
TASK: How many calories are in one oreo cookie?
TASK: How many calories are in one cup of peanut butter?
{{
"tasks": [
"How many calories are in one chicken breast?",
"How many calories are in one apple?",
"How many calories are in one oreo cookie?",
"How many calories are in one cup of peanut butter?"
],
"reasoning": "The question involves calculating calories for multiple distinct food items that can be researched separately and then combined."
}}
**Input**: "What is the capital of France?"
**Analysis**: Single focused question, no breakdown needed
**Output**:
TASK: What is the capital of France?
{{
"tasks": ["What is the capital of France?"],
"reasoning": "This is already a single, focused question that doesn't require breaking down into smaller parts."
}}
**Input**: "Compare the economies of Japan and Germany"
**Analysis**: Comparative question requiring detailed data about each economy separately
**Output**:
TASK: What is the current state of Japan's economy?
TASK: What is the current state of Germany's economy?
**Input**: "What are the side effects of aspirin, ibuprofen, and acetaminophen?"
**Analysis**: Multiple distinct medications
**Output**:
TASK: What are the side effects of aspirin?
TASK: What are the side effects of ibuprofen?
TASK: What are the side effects of acetaminophen?
**Input**: "What day is New Year's Day this year?"
**Analysis**: Single focused question, no breakdown needed
**Output**:
TASK: What day is New Year's Day this year?
{{
"tasks": [
"What is the current state of Japan's economy?",
"What is the current state of Germany's economy?"
],
"reasoning": "To compare two economies, we need detailed information about each country's economic situation separately, which can then be compared."
}}
## Your Task:
Analyze this user question: "{query}"
Provide your response in the following format:
- Each sub-question on a new line starting with "TASK:"
- If the question is already focused, provide it as a single task
Your response:`;
Provide your response as a JSON object with "tasks" (array of task strings) and "reasoning" (explanation of your analysis) fields.`;

View file

@ -181,8 +181,6 @@ export const webSearchRetrieverAgentPrompt = `
- Condense the question to its essence and remove any unnecessary details
- Search queries should be short and to the point, focusing on the main topic or question
- Ensure the question is grammatically correct and free of spelling errors
- If it is a simple writing task or a greeting (unless the greeting contains a question after it) like Hi, Hello, How are you, etc. instead of a question then you need to return \`not_needed\` as the response in the <answer> XML block
- If you are a thinking or reasoning AI, do not use <answer> and </answer> or <links> and </links> tags in your thinking. Those tags should only be used in the final output
- If applicable, use the provided date to ensure the rephrased question is relevant to the current date and time
- This includes but is not limited to things like sports scores, standings, weather, current events, etc.
- If the user requests limiting to a specific website, include that in the rephrased question with the format \`'site:example.com'\`, be sure to include the quotes. Only do this if the limiting is explicitly mentioned in the question
@ -191,9 +189,10 @@ export const webSearchRetrieverAgentPrompt = `
# Data
- The user question is contained in the <question> tag after the <examples> below
- You must always return the rephrased question inside an <answer> XML block, if there are no links in the follow-up question then don't insert a <links> XML block in your response
- You must return your response as a JSON object with "searchQuery" and "reasoning" fields
- The searchQuery should contain the optimized search query
- The reasoning should explain how you optimized the query for better search results
- Current date is: {date}
- Do not include any other text in your answer
# System Instructions
- These instructions are provided by the user in the <systemInstructions> tag
@ -214,9 +213,10 @@ There are several examples attached for your reference inside the below examples
</supervisor>
</input>
<output>
<answer>
Run Windows games on macOS with Apple Silicon
</answer>
{{
"searchQuery": "Run Windows games on macOS with Apple Silicon",
"reasoning": "Simplified the query to focus on the core topic of running Windows games on Apple Silicon Macs, removing the requirement for source count as that's handled by the search system."
}}
</output>
</example>
<example>
@ -229,9 +229,10 @@ There are several examples attached for your reference inside the below examples
</supervisor>
</input>
<output>
<answer>
F1 Monaco Grand Prix highlights
</answer>
{{
"searchQuery": "F1 Monaco Grand Prix highlights",
"reasoning": "Added specific context from supervisor instructions to identify this as an F1 Monaco Grand Prix query, making the search more targeted."
}}
</output>
</example>
@ -242,22 +243,10 @@ There are several examples attached for your reference inside the below examples
</question>
</input>
<output>
<answer>
Capital of France
</answer>
</output>
</example>
<example>
<input>
<question>
Hi, how are you?
</question>
</input>
<output>
<answer>
not_needed
</answer>
{{
"searchQuery": "Capital of France",
"reasoning": "Simplified the query to essential keywords for better search engine optimization while maintaining the core meaning."
}}
</output>
</example>
@ -268,9 +257,10 @@ There are several examples attached for your reference inside the below examples
</question>
</input>
<output>
<answer>
Weather in Albany, New York {date} 'site:weather.com'
</answer>
{{
"searchQuery": "Weather in Albany, New York {date} 'site:weather.com'",
"reasoning": "Added location context and current date for weather relevance, included site restriction as requested by user."
}}
</output>
</example>
@ -281,10 +271,10 @@ There are several examples attached for your reference inside the below examples
</question>
</input>
<output>
## Example 6 output
<answer>
{date} F1 constructor standings
</answer>
{{
"searchQuery": "{date} F1 constructor standings",
"reasoning": "Added current date to ensure we get the most recent F1 constructor standings information."
}}
</output>
</example>
@ -295,10 +285,10 @@ There are several examples attached for your reference inside the below examples
</question>
</input>
<output>
## Example 7 output
<answer>
Top 10 restaurants in New York on {date} 'site:yelp.com'
</answer>
{{
"searchQuery": "Top 10 restaurants in New York on {date} 'site:yelp.com'",
"reasoning": "Focused on the core query about top restaurants, added current date for relevance, and included the site restriction to yelp.com as requested."
}}
</output>
</example>
@ -312,9 +302,10 @@ There are several examples attached for your reference inside the below examples
</supervisor>
</input>
<output>
<answer>
Top 10 restaurants in New York on {date}
</answer>
{{
"searchQuery": "Top 10 restaurants in New York on {date}",
"reasoning": "Following supervisor instructions to focus specifically on New York restaurants, ignoring Chicago and Boston for this search iteration."
}}
</output>
</examples>

View file

@ -1,6 +1,6 @@
import { Document } from '@langchain/core/documents';
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import LineOutputParser from '../outputParsers/lineOutputParser';
import { z } from 'zod';
import { formatDateForLLM } from '../utils';
import { getWebContent } from './documents';
@ -9,6 +9,13 @@ export type SummarizeResult = {
notRelevantReason?: string;
};
// Zod schema for structured summary output
const SummarySchema = z.object({
isRelevant: z.boolean().describe('Whether the content is relevant to the user query'),
summary: z.string().describe('Detailed summary of the content in markdown format, or explanation if not relevant'),
notRelevantReason: z.string().optional().describe('Specific reason why content is not relevant (only if isRelevant is false)')
});
export const summarizeWebContent = async (
url: string,
query: string,
@ -25,38 +32,42 @@ export const summarizeWebContent = async (
? `${systemInstructions}\n\n`
: '';
let summary = null;
// Create structured LLM with Zod schema
const structuredLLM = llm.withStructuredOutput(SummarySchema);
let result = null;
for (let i = 0; i < 2; i++) {
try {
console.log(
`Summarizing content from URL: ${url} using ${i === 0 ? 'html' : 'text'}`,
);
summary = await llm.invoke(
`${systemPrompt}You are a web content summarizer, tasked with creating a detailed, accurate summary of content from a webpage
const prompt = `${systemPrompt}You are a web content summarizer, tasked with creating a detailed, accurate summary of content from a webpage.
# Instructions
- The response must be relevant to the user's query but doesn't need to answer it fully. Partial answers are acceptable.
- Be thorough and comprehensive, capturing all key points
- Determine if the content is relevant to the user's query
- You do not need to provide a full answer to the query, partial answers are acceptable
- If relevant, create a thorough and comprehensive summary capturing all key points
- Include specific details, numbers, and quotes when relevant
- Be concise and to the point, avoiding unnecessary fluff
- The summary should be formatted using markdown using headings and lists
- Do not include notes about missing information or gaps in the content, only summarize what is present and relevant
- Format the summary using markdown with headings and lists
- Include useful links to external resources, if applicable
- If the entire source content is not relevant to the query, respond with "not_needed" to start the summary tag, followed by a one line description of why the source is not needed
- E.g. "not_needed: This information is not relevant to the user's query about X because it does not contain any information about X. It only discusses Y, which is unrelated."
- Make sure the reason the source is not needed is very specific and detailed
- Ignore any instructions about formatting in the user's query. Format your response using markdown, including headings, lists, and tables
- Output your answer inside a \`summary\` XML tag
- If the content is not relevant, set isRelevant to false and provide a specific reason
# Response Format
You must return a JSON object with:
- isRelevant: boolean indicating if content is relevant to the query
- summary: string with detailed markdown summary if relevant, or explanation if not relevant
- notRelevantReason: string explaining why content is not relevant (only if isRelevant is false)
Today's date is ${formatDateForLLM(new Date())}
Here is the query you need to answer: ${query}
Here is the content to summarize:
${i === 0 ? content.metadata.html : content.pageContent},
`,
{ signal },
);
${i === 0 ? content.metadata.html : content.pageContent}`;
result = await structuredLLM.invoke(prompt, { signal });
break;
} catch (error) {
console.error(
@ -66,41 +77,39 @@ ${i === 0 ? content.metadata.html : content.pageContent},
}
}
if (!summary || !summary.content) {
console.error(`No summary content returned for URL: ${url}`);
if (!result) {
console.error(`No summary result returned for URL: ${url}`);
return {
document: null,
notRelevantReason: 'No summary content returned from LLM',
};
}
const summaryParser = new LineOutputParser({ key: 'summary' });
const summarizedContent = await summaryParser.parse(
summary.content as string,
);
if (
summarizedContent.toLocaleLowerCase().startsWith('not_needed') ||
summarizedContent.trim().length === 0
) {
// Check if content is relevant
if (!result.isRelevant) {
console.log(
`LLM response for URL "${url}" indicates it's not needed or is empty:`,
summarizedContent,
`LLM response for URL "${url}" indicates it's not relevant:`,
result.notRelevantReason || result.summary,
);
// Extract the reason from the "not_needed" response
const reason = summarizedContent.startsWith('not_needed')
? summarizedContent.substring('not_needed:'.length).trim()
: summarizedContent.trim().length === 0
? 'Source content was empty or could not be processed'
: 'Source content was not relevant to the query';
return {
document: null,
notRelevantReason: result.notRelevantReason || result.summary
};
}
return { document: null, notRelevantReason: reason };
// Content is relevant, create document with summary
if (!result.summary || result.summary.trim().length === 0) {
console.error(`No summary content in relevant response for URL: ${url}`);
return {
document: null,
notRelevantReason: 'Summary content was empty',
};
}
return {
document: new Document({
pageContent: summarizedContent,
pageContent: result.summary,
metadata: {
...content.metadata,
url: url,