feat(agent): Enhanced structured output support for chat models

- Introduced `withStructuredOutput` function to configure structured output for LLMs.
- Added support for Groq models using 'jsonMode' method.
- Enhanced flexibility with optional parameters for naming and raw output inclusion.
This commit is contained in:
Willie Zutz 2025-07-12 15:44:17 -06:00
parent 37c93c3c9b
commit de2459a624
16 changed files with 1995 additions and 1820 deletions

1077
package-lock.json generated

File diff suppressed because it is too large Load diff

View file

@ -16,6 +16,7 @@ import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { ChatOpenAI } from '@langchain/openai';
import { ChatOllama } from '@langchain/ollama';
import { z } from 'zod';
import { withStructuredOutput } from '@/lib/utils/structuredOutput';
interface FileRes {
fileName: string;
@ -66,7 +67,7 @@ ${excerpt}
Generate topics that describe what this document is about, its domain, and key subject areas. Focus on topics that would help determine relevance for search queries.`;
// Use structured output for reliable topic extraction
const structuredLlm = llm.withStructuredOutput(TopicsSchema, {
const structuredLlm = withStructuredOutput(llm, TopicsSchema, {
name: 'generate_topics',
});

View file

@ -21,6 +21,7 @@ import {
removeThinkingBlocks,
removeThinkingBlocksFromMessages,
} from '../utils/contentUtils';
import { withStructuredOutput } from '../utils/structuredOutput';
import next from 'next';
// Define Zod schemas for structured output
@ -157,7 +158,7 @@ export class AnalyzerAgent {
);
// Use structured output for next action decision
const structuredLlm = this.llm.withStructuredOutput(NextActionSchema, {
const structuredLlm = withStructuredOutput(this.llm, NextActionSchema, {
name: 'analyze_content',
});
@ -183,7 +184,8 @@ export class AnalyzerAgent {
if (nextActionResponse.action === 'need_user_info') {
// Use structured output for user info request
const userInfoLlm = this.llm.withStructuredOutput(
const userInfoLlm = withStructuredOutput(
this.llm,
UserInfoRequestSchema,
{
name: 'request_user_info',
@ -240,7 +242,8 @@ export class AnalyzerAgent {
// If we need more information from the LLM, generate a more specific search query
// Use structured output for search refinement
const searchRefinementLlm = this.llm.withStructuredOutput(
const searchRefinementLlm = withStructuredOutput(
this.llm,
SearchRefinementSchema,
{
name: 'refine_search',
@ -254,7 +257,7 @@ export class AnalyzerAgent {
context: state.relevantDocuments
.map(
(doc, index) =>
`<source${index + 1}>${doc?.metadata?.title ? `<title>${doc?.metadata?.title}</title>` : ''}<content>${doc.pageContent}</content></source${index + 1}>`,
`<source${index + 1}>${doc?.metadata?.title ? `\n<title>${doc?.metadata?.title}</title>` : ''}\n<content>${doc.pageContent}</content>\n</source${index + 1}>`,
)
.join('\n\n'),
date: formatDateForLLM(new Date()),
@ -293,11 +296,11 @@ export class AnalyzerAgent {
return new Command({
goto: 'task_manager',
update: {
messages: [
new AIMessage(
`The following question can help refine the search: ${searchRefinement.question}`,
),
],
// messages: [
// new AIMessage(
// `The following question can help refine the search: ${searchRefinement.question}`,
// ),
// ],
query: searchRefinement.question, // Use the refined question for TaskManager to analyze
searchInstructions: searchRefinement.question,
searchInstructionHistory: [
@ -330,13 +333,13 @@ export class AnalyzerAgent {
return new Command({
goto: 'synthesizer',
update: {
messages: [
new AIMessage(
`Analysis completed. We have sufficient information to answer the query.`,
),
],
},
// update: {
// messages: [
// new AIMessage(
// `Analysis completed. We have sufficient information to answer the query.`,
// ),
// ],
// },
});
} catch (error) {
console.error('Analysis error:', error);

View file

@ -9,6 +9,7 @@ import path from 'node:path';
import { AgentState } from './agentState';
import { contentRouterPrompt } from '../prompts/contentRouter';
import { removeThinkingBlocksFromMessages } from '../utils/contentUtils';
import { withStructuredOutput } from '../utils/structuredOutput';
// Define Zod schema for structured router decision output
const RouterDecisionSchema = z.object({
@ -87,6 +88,7 @@ export class ContentRouterAgent {
const template = PromptTemplate.fromTemplate(contentRouterPrompt);
const prompt = await template.format({
systemInstructions: this.systemInstructions,
currentTask: currentTask,
query: state.originalQuery || state.query,
focusMode: focusMode,
@ -97,7 +99,8 @@ export class ContentRouterAgent {
});
// Use structured output for routing decision
const structuredLlm = this.llm.withStructuredOutput(
const structuredLlm = withStructuredOutput(
this.llm,
RouterDecisionSchema,
{
name: 'route_content',
@ -146,9 +149,9 @@ export class ContentRouterAgent {
return new Command({
goto: validatedDecision.decision,
update: {
messages: [new AIMessage(responseMessage)],
},
// update: {
// messages: [new AIMessage(responseMessage)],
// },
});
} catch (error) {
console.error('Content router error:', error);

View file

@ -81,11 +81,11 @@ export class FileSearchAgent {
console.log('No processable file content found');
return new Command({
goto: 'analyzer',
update: {
messages: [
new AIMessage('No searchable content found in attached files.'),
],
},
// update: {
// messages: [
// new AIMessage('No searchable content found in attached files.'),
// ],
// },
});
}
@ -145,13 +145,13 @@ export class FileSearchAgent {
return new Command({
goto: 'analyzer',
update: {
messages: [
new AIMessage(
'No relevant content found in attached files for the current task.',
),
],
},
// update: {
// messages: [
// new AIMessage(
// 'No relevant content found in attached files for the current task.',
// ),
// ],
// },
});
}
@ -180,7 +180,7 @@ export class FileSearchAgent {
return new Command({
goto: 'analyzer', // Route back to analyzer to process the results
update: {
messages: [new AIMessage(responseMessage)],
// messages: [new AIMessage(responseMessage)],
relevantDocuments: rankedDocuments,
},
});

View file

@ -43,11 +43,11 @@ export class SynthesizerAgent {
.map((doc, index) => {
const isFile = doc.metadata?.url?.toLowerCase().includes('file');
return `<${index + 1}>\n
<title>${doc.metadata.title}</title>\n
<source_type>${isFile ? 'file' : 'web'}</source_type>\n
${isFile ? '' : '\n<url>' + doc.metadata.url + '</url>\n'}
<content>\n${doc.pageContent}\n</content>\n
</${index + 1}>`;
<title>${doc.metadata.title}</title>
<source_type>${isFile ? 'file' : 'web'}</source_type>
${isFile ? '' : '\n<url>' + doc.metadata.url + '</url>'}
<content>\n${doc.pageContent}\n </content>
</${index + 1}>`;
})
.join('\n');

View file

@ -7,6 +7,7 @@ import { z } from 'zod';
import { taskBreakdownPrompt } from '../prompts/taskBreakdown';
import { AgentState } from './agentState';
import { setTemperature } from '../utils/modelUtils';
import { withStructuredOutput } from '../utils/structuredOutput';
// Define Zod schema for structured task breakdown output
const TaskBreakdownSchema = z.object({
@ -76,11 +77,11 @@ export class TaskManagerAgent {
return new Command({
goto: 'content_router',
update: {
messages: [
new AIMessage(
`Task ${currentTaskIndex + 1} completed. Processing task ${nextTaskIndex + 1} of ${state.tasks.length}: "${state.tasks[nextTaskIndex]}"`,
),
],
// messages: [
// new AIMessage(
// `Task ${currentTaskIndex + 1} completed. Processing task ${nextTaskIndex + 1} of ${state.tasks.length}: "${state.tasks[nextTaskIndex]}"`,
// ),
// ],
currentTaskIndex: nextTaskIndex,
},
});
@ -101,13 +102,13 @@ export class TaskManagerAgent {
return new Command({
goto: 'analyzer',
update: {
messages: [
new AIMessage(
`All ${state.tasks.length} tasks completed. Moving to analysis phase.`,
),
],
},
// update: {
// messages: [
// new AIMessage(
// `All ${state.tasks.length} tasks completed. Moving to analysis phase.`,
// ),
// ],
// },
});
}
}
@ -141,7 +142,7 @@ export class TaskManagerAgent {
});
// Use structured output for task breakdown
const structuredLlm = this.llm.withStructuredOutput(TaskBreakdownSchema, {
const structuredLlm = withStructuredOutput(this.llm, TaskBreakdownSchema, {
name: 'break_down_tasks',
});
@ -192,7 +193,7 @@ export class TaskManagerAgent {
return new Command({
goto: 'content_router', // Route to content router to decide between file search, web search, or analysis
update: {
messages: [new AIMessage(responseMessage)],
// messages: [new AIMessage(responseMessage)],
tasks: taskLines,
currentTaskIndex: 0,
originalQuery: state.originalQuery || state.query, // Preserve original if not already set

View file

@ -45,13 +45,13 @@ export class URLSummarizationAgent {
);
return new Command({
goto: 'content_router',
update: {
messages: [
new AIMessage(
'No URLs found for processing, routing to content router',
),
],
},
// update: {
// messages: [
// new AIMessage(
// 'No URLs found for processing, routing to content router',
// ),
// ],
// },
});
}
@ -250,9 +250,9 @@ Provide a comprehensive summary of the above web page content, focusing on infor
return new Command({
goto: 'analyzer',
update: {
messages: [new AIMessage(errorMessage)],
},
// update: {
// messages: [new AIMessage(errorMessage)],
// },
});
}
@ -277,7 +277,7 @@ Provide a comprehensive summary of the above web page content, focusing on infor
return new Command({
goto: 'analyzer', // Route to analyzer to continue with normal workflow after URL processing
update: {
messages: [new AIMessage(responseMessage)],
// messages: [new AIMessage(responseMessage)],
relevantDocuments: documents,
},
});

View file

@ -19,6 +19,7 @@ import { setTemperature } from '../utils/modelUtils';
import { Embeddings } from '@langchain/core/embeddings';
import { removeThinkingBlocksFromMessages } from '../utils/contentUtils';
import computeSimilarity from '../utils/computeSimilarity';
import { withStructuredOutput } from '../utils/structuredOutput';
// Define Zod schema for structured search query output
const SearchQuerySchema = z.object({
@ -101,7 +102,7 @@ export class WebSearchAgent {
});
// Use structured output for search query generation
const structuredLlm = this.llm.withStructuredOutput(SearchQuerySchema, {
const structuredLlm = withStructuredOutput(this.llm, SearchQuerySchema, {
name: 'generate_search_query',
});
@ -423,9 +424,9 @@ export class WebSearchAgent {
if (documents.length === 0) {
return new Command({
goto: 'analyzer',
update: {
messages: [new AIMessage('No relevant documents found.')],
},
// update: {
// messages: [new AIMessage('No relevant documents found.')],
// },
});
}
@ -435,7 +436,7 @@ export class WebSearchAgent {
return new Command({
goto: 'analyzer', // Route back to analyzer to process the results
update: {
messages: [new AIMessage(responseMessage)],
// messages: [new AIMessage(responseMessage)],
relevantDocuments: documents,
bannedSummaryUrls: bannedSummaryUrls,
bannedPreviewUrls: bannedPreviewUrls,

View file

@ -1,32 +1,34 @@
export const decideNextActionPrompt = `You are an expert content analyzer.
Your task is to analyze the provided context and determine if we have enough information to fully answer the user's query.
Your task is to analyze the provided content and determine if we have enough information to fully answer the user's query.
# Instructions
- Carefully analyze the content of the context provided and the historical context of the conversation to determine if it contains sufficient information to answer the user's query
- Use the content provided in the \`context\` tag, as well as the historical context of the conversation, to make your determination
- Consider both file-based documents (from attached files) and web-based documents when analyzing context
- Carefully analyze the content of the context provided **and** the historical content of the conversation to determine if it contains sufficient information to answer the user's query
- The context may be empty, if the historical content is sufficient, you can still consider it sufficient
- Historic content should generally be considered factual and does not require additional confirmation unless the user explicitly asks for confirmation or indicates that it was incorrect
- If the user is asking for a specific number of sources and the context does not provide enough, consider the content insufficient
# Source Type Awareness
When analyzing the context, be aware that documents may come from different sources:
When analyzing the content, be aware that documents may come from different sources:
- **File documents**: Content extracted from user-attached files (identified by metadata indicating file source)
- **Web documents**: Content retrieved from web searches (identified by URLs and web source metadata)
- **Mixed sources**: Both file and web content may be present
- **Chat history**: Previous messages in the conversation that may provide additional content
- **Mixed sources**: The content may include a combination of file documents, web documents, and chat history
Consider the following when evaluating sufficiency:
- File documents may contain user-specific, proprietary, or contextual information that cannot be found elsewhere
- Web documents provide current, general, and publicly available information
- The combination of both sources may be needed for comprehensive answers
- Chat history provides conversational context and may include user preferences, past interactions, or clarifications
- The combination of these sources should be evaluated holistically to determine if they collectively provide enough information to answer the user's query
- File content should be prioritized when answering questions specifically about attached documents
# Response Options Decision Tree
## Step 1: Check if content is sufficient
- If provided context contains enough information to answer the user's query respond with \`good_content\`
- If the context fully answers the user's query with complete information respond with \`good_content\`
- If the user is requesting to use the existing context to answer their query respond with \`good_content\`
- If provided content contains enough information to answer the user's query respond with \`good_content\`
- If the content fully answers the user's query with complete information respond with \`good_content\`
- If the user is requesting to use the existing content to answer their query respond with \`good_content\`
- If the user is requesting to avoid web searches respond with \`good_content\`
- If the user is asking you to be creative, such as writing a story, poem, or creative content respond with \`good_content\` unless the context is clearly insufficient
- If the user is asking you to be creative, such as writing a story, poem, or creative content respond with \`good_content\` unless the content is clearly insufficient
- If file documents contain complete information for file-specific queries respond with \`good_content\`
- If the user is requesting specific web content and there is a source that corresponds to that request in the context, it can be considered sufficient even if the content is not exhaustive or looks like errors respond with \`good_content\`
@ -65,7 +67,7 @@ Consider the following when evaluating sufficiency:
- Comparative analysis between options
- Expert opinions or reviews from credible sources
- Statistical data or research findings
- Additional context to supplement file content with current information
- Existing content is not sufficient to answer the query, but the information can be found through a web search
**Examples requiring more web search:**
- "What are the latest features in iPhone 15?" (missing: recent tech specs)
@ -76,10 +78,9 @@ Consider the following when evaluating sufficiency:
# Critical Decision Point
Ask yourself: "Could this missing information reasonably be found through a web search, or does it require the user to provide specific details?"
- If it's personal/subjective or requires user feedback \`need_user_info\`
- If it's factual and searchable \`need_more_info\`
- If the context is complete or the user wants to use the existing context \`good_content\`
- If file content is complete for file-specific questions \`good_content\`
- If the content is complete and sufficient to answer the query, or the user wants to use the existing content \`good_content\`
- If the query is personal/subjective or requires user feedback \`need_user_info\`
- If the query is factual and searchable \`need_more_info\`
# System Instructions
{systemInstructions}
@ -98,7 +99,14 @@ Today's date is {date}
# Search Instruction History
{searchInstructionHistory}
Provide your response as a JSON object with "action" and "reasoning" fields where action is one of: good_content, need_user_info, or need_more_info.`;
#Response Format
Respond with a JSON object that matches this structure:
{{
"action": "string",
"reasoning": "string"
}}
Your response should contain only the JSON object, no additional text or formatting.`;
export const additionalUserInputPrompt = `You are an expert content analyzer.
Your task is to analyze the provided context and user query to determine what additional information is needed to fully answer the user's query.

View file

@ -1,5 +1,8 @@
export const contentRouterPrompt = `You are a content routing agent responsible for deciding the next step in information gathering.
# System Instructions
{systemInstructions}
# Your Role
Analyze the current task and available context to determine whether to:
1. Search attached files (\`file_search\`)
@ -46,36 +49,39 @@ When files are attached, first determine if they are likely to contain informati
- The question can be answered with general knowledge without additional research
# Response Format
Respond with your decision and reasoning:
Respond with a JSON object that matches this structure:
{{
"decision": "string", // One of: "file_search", "web_search", "analyzer"
"reasoning": "string" // Brief explanation of why this decision was made
}}
Decision: [file_search/web_search/analyzer]
Reasoning: [Brief explanation of why this choice was made, including file relevance assessment if applicable]
Your response should contain only the JSON object, no additional text or formatting.
# Examples
## Example 1: Relevant files
Current task: "Summarize the main points of this document"
File topics: "Product roadmap, feature specifications"
Decision: file_search
Reasoning: Task directly requests summary of attached document content
decision: file_search
reasoning: Task directly requests summary of attached document content
## Example 2: Irrelevant files
Current task: "What is the current weather in New York?"
File topics: "Resume, personal portfolio"
Decision: web_search
Reasoning: Attached files (resume, portfolio) are not relevant to weather query - need current web data
decision: web_search
reasoning: Attached files (resume, portfolio) are not relevant to weather query - need current web data
## Example 3: Partially relevant files
Current task: "How does machine learning work and what are the latest trends?"
File topics: "ML basics tutorial"
Decision: file_search
Reasoning: Files contain ML basics which could help with first part, then may need web search for latest trends
decision: file_search
reasoning: Files contain ML basics which could help with first part, then may need web search for latest trends
## Example 4: Technical question with unrelated files
Current task: "Explain React hooks"
File topics: "Marketing strategy document"
Decision: web_search
Reasoning: Marketing documents won't contain React programming information - need web search
decision: web_search
reasoning: Marketing documents won't contain React programming information - need web search
Your turn:
Current task: {currentTask}

View file

@ -1,34 +1,36 @@
export const taskBreakdownPrompt = `You are a task breakdown specialist. Your job is to analyze a user's question and determine if it needs to be broken down into smaller, more focused questions that can be answered independently.
# System Instructions:
{systemInstructions}
## File Context Awareness:
# File Context Awareness:
{fileContext}
## Analysis Guidelines:
# Analysis Guidelines:
### When to Break Down:
## When to Break Down:
1. **Multiple distinct subjects**: Questions asking about different people, places, things, or concepts
2. **Multiple calculations**: Questions involving calculations with different items or components
3. **Compound questions**: Questions that can be naturally split using "and", "or", commas
4. **Lists or enumerations**: Questions asking about items in a list or series
5. **File + external research**: Questions that require both analyzing attached files AND gathering external information
### When NOT to Break Down:
## When NOT to Break Down:
1. **Single focused question**: Already asks about one specific thing
2. **Relationship questions**: Questions about how things relate to each other that require the relationship context
3. **Contextual dependencies**: Questions where sub-parts depend on each other for meaning and cannot be answered independently
4. **Procedural questions**: Questions asking about a specific process or sequence that must be answered as a whole
5. **File-only questions**: Questions that can be fully answered using only the attached files
6. **Short factual questions**: Simple factual questions that do not require detailed analysis or multiple steps
### File-Aware Task Creation:
## File-Aware Task Creation:
When files are attached, consider creating tasks that:
- **Analyze file content**: "Summarize the main findings in the attached document"
- **Extract specific information**: "What are the project timelines mentioned in the attached proposal?"
- **Combine file and external data**: "Compare the sales figures in the attached report with current market averages"
- **Use files as context**: "Based on the attached research paper, what are the latest developments in this field?"
### Sub-Question Rules:
## Sub-Question Rules:
1. Each sub-question should be **self-contained** and answerable independently
2. Preserve the **original context and intent** in each sub-question
3. Maintain **specific details** like quantities, measurements, and qualifiers
@ -84,7 +86,7 @@ When files are attached, consider creating tasks that:
"reasoning": "To compare two economies, we need detailed information about each country's economic situation separately, which can then be compared."
}}
## Your Task:
# Your Task:
Analyze this user question: "{query}"

View file

@ -4,6 +4,7 @@ import { z } from 'zod';
import { formatDateForLLM } from '../utils';
import { ChatOpenAI, OpenAIClient } from '@langchain/openai';
import { removeThinkingBlocks } from './contentUtils';
import { withStructuredOutput } from './structuredOutput';
export type PreviewAnalysisResult = {
isSufficient: boolean;
@ -76,10 +77,12 @@ Snippet: ${content.snippet}
console.log(`Invoking LLM for preview content analysis`);
// Create structured LLM with Zod schema
const structuredLLM = llm.withStructuredOutput(PreviewAnalysisSchema);
const structuredLLM = withStructuredOutput(llm, PreviewAnalysisSchema, {
name: 'analyze_preview_content',
});
const analysisResult = await structuredLLM.invoke(
`${systemPrompt}You are a preview content analyzer, tasked with determining if search result snippets contain sufficient information to answer the Task Query.
`You are a preview content analyzer, tasked with determining if search result snippets contain sufficient information to answer the Task Query.
# Instructions
- Analyze the provided search result previews (titles + snippets), and chat history context to determine if they collectively contain enough information to provide a complete and accurate answer to the Task Query
@ -88,6 +91,18 @@ Snippet: ${content.snippet}
- Be specific in your reasoning when the content is not sufficient
- The original query is provided for additional context, only use it for clarification of overall expectations and intent. You do **not** need to answer the original query directly or completely
# System Instructions
${systemPrompt}
# Response Format
Respond with a JSON object that matches this structure:
{
"isSufficient": boolean,
"reason": "string"
}
Your response should contain only the JSON object, no additional text or formatting.
# Information Context:
Today's date is ${formatDateForLLM(new Date())}

View file

@ -0,0 +1,31 @@
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { ChatGroq } from '@langchain/groq';
import { z } from 'zod';
interface StructuredOutputOptions {
name?: string;
includeRaw?: boolean;
}
/**
* Configures structured output for the given LLM with appropriate method based on the model type.
* For Groq models, uses 'jsonMode' method. For other models, omits the method property.
*/
export function withStructuredOutput<T extends z.ZodType>(
llm: BaseChatModel,
schema: T,
options: StructuredOutputOptions = {}
) {
const isGroqModel = llm instanceof ChatGroq;
if (isGroqModel) {
return llm.withStructuredOutput(schema, {
name: options.name,
method: 'jsonMode' as const,
});
} else {
return llm.withStructuredOutput(schema, {
name: options.name,
});
}
}

View file

@ -5,6 +5,7 @@ import { formatDateForLLM } from '../utils';
import { getWebContent } from './documents';
import { removeThinkingBlocks } from './contentUtils';
import { setTemperature } from './modelUtils';
import { withStructuredOutput } from './structuredOutput';
export type SummarizeResult = {
document: Document | null;
@ -51,7 +52,9 @@ export const summarizeWebContent = async (
try {
// Create structured LLM with Zod schema
const structuredLLM = llm.withStructuredOutput(RelevanceCheckSchema);
const structuredLLM = withStructuredOutput(llm, RelevanceCheckSchema, {
name: 'check_content_relevance',
});
const relevanceResult = await structuredLLM.invoke(
`${systemPrompt}You are a content relevance checker. Your task is to determine if the given content is relevant to the user's query.
@ -61,6 +64,25 @@ export const summarizeWebContent = async (
- You do not need to provide a full answer to the query in order to be relevant, partial answers are acceptable
- Provide a brief explanation of your reasoning
# Response Format
Respond with a JSON object that matches this structure:
{
"relevant": boolean, // true if content is relevant, false otherwise
"reason": "string" // Brief explanation of why content is or isn't relevant
}
Your response should contain only the JSON object, no additional text or formatting.
Do not include data that would require escape characters, do not escape quotes or other characters.
This is important for the application to parse the response correctly.
# Example Response
{
"relevant": true,
"reason": "The content discusses the main features of the product which directly relate to the user's query about its capabilities."
}
# Context
Today's date is ${formatDateForLLM(new Date())}
Here is the query you need to answer: ${query}
@ -126,6 +148,9 @@ ${contentToAnalyze}`,
- Format the summary using markdown with headings and lists
- Include useful links to external resources, if applicable
# Response Format
- Respond with a detailed summary of the content, formatted in markdown. Do not include any additional text or explanations outside the summary.
# Decision Tree
- If the content is NOT relevant to the query, do not provide a summary; respond with 'not_relevant'
- If the content is relevant, return a detailed summary following the instructions above

2428
yarn.lock

File diff suppressed because it is too large Load diff