feat(agent): Enhanced structured output support for chat models

- Introduced `withStructuredOutput` function to configure structured output for LLMs. - Added support for Groq models using 'jsonMode' method. - Enhanced flexibility with optional parameters for naming and raw output inclusion.
2025-07-12 15:44:17 -06:00 · 2025-07-12 15:44:17 -06:00 · de2459a624
commit de2459a624
parent 37c93c3c9b
16 changed files with 1995 additions and 1820 deletions
--- a/package-lock.json
+++ b/package-lock.json
--- a/src/app/api/uploads/route.ts
+++ b/src/app/api/uploads/route.ts
@ -16,6 +16,7 @@ import { BaseChatModel } from '@langchain/core/language_models/chat_models';
 import { ChatOpenAI } from '@langchain/openai';
 import { ChatOllama } from '@langchain/ollama';
 import { z } from 'zod';
+import { withStructuredOutput } from '@/lib/utils/structuredOutput';

 interface FileRes {
  fileName: string;
@ -66,7 +67,7 @@ ${excerpt}
 Generate topics that describe what this document is about, its domain, and key subject areas. Focus on topics that would help determine relevance for search queries.`;

    // Use structured output for reliable topic extraction
-    const structuredLlm = llm.withStructuredOutput(TopicsSchema, {
+    const structuredLlm = withStructuredOutput(llm, TopicsSchema, {
      name: 'generate_topics',
    });

--- a/src/lib/agents/analyzerAgent.ts
+++ b/src/lib/agents/analyzerAgent.ts
@ -21,6 +21,7 @@ import {
  removeThinkingBlocks,
  removeThinkingBlocksFromMessages,
 } from '../utils/contentUtils';
+import { withStructuredOutput } from '../utils/structuredOutput';
 import next from 'next';

 // Define Zod schemas for structured output
@ -157,7 +158,7 @@ export class AnalyzerAgent {
      );

      // Use structured output for next action decision
-      const structuredLlm = this.llm.withStructuredOutput(NextActionSchema, {
+      const structuredLlm = withStructuredOutput(this.llm, NextActionSchema, {
        name: 'analyze_content',
      });

@ -183,7 +184,8 @@ export class AnalyzerAgent {

        if (nextActionResponse.action === 'need_user_info') {
          // Use structured output for user info request
-          const userInfoLlm = this.llm.withStructuredOutput(
+          const userInfoLlm = withStructuredOutput(
+            this.llm,
            UserInfoRequestSchema,
            {
              name: 'request_user_info',
@ -240,7 +242,8 @@ export class AnalyzerAgent {

        // If we need more information from the LLM, generate a more specific search query
        // Use structured output for search refinement
-        const searchRefinementLlm = this.llm.withStructuredOutput(
+        const searchRefinementLlm = withStructuredOutput(
+          this.llm,
          SearchRefinementSchema,
          {
            name: 'refine_search',
@ -254,7 +257,7 @@ export class AnalyzerAgent {
          context: state.relevantDocuments
            .map(
              (doc, index) =>
-                `<source${index + 1}>${doc?.metadata?.title ? `<title>${doc?.metadata?.title}</title>` : ''}<content>${doc.pageContent}</content></source${index + 1}>`,
+                `<source${index + 1}>${doc?.metadata?.title ? `\n<title>${doc?.metadata?.title}</title>` : ''}\n<content>${doc.pageContent}</content>\n</source${index + 1}>`,
            )
            .join('\n\n'),
          date: formatDateForLLM(new Date()),
@ -293,11 +296,11 @@ export class AnalyzerAgent {
        return new Command({
          goto: 'task_manager',
          update: {
-            messages: [
-              new AIMessage(
-                `The following question can help refine the search: ${searchRefinement.question}`,
-              ),
-            ],
+            // messages: [
+            //   new AIMessage(
+            //     `The following question can help refine the search: ${searchRefinement.question}`,
+            //   ),
+            // ],
            query: searchRefinement.question, // Use the refined question for TaskManager to analyze
            searchInstructions: searchRefinement.question,
            searchInstructionHistory: [
@ -330,13 +333,13 @@ export class AnalyzerAgent {

      return new Command({
        goto: 'synthesizer',
-        update: {
-          messages: [
-            new AIMessage(
-              `Analysis completed. We have sufficient information to answer the query.`,
-            ),
-          ],
-        },
+        // update: {
+        //   messages: [
+        //     new AIMessage(
+        //       `Analysis completed. We have sufficient information to answer the query.`,
+        //     ),
+        //   ],
+        // },
      });
    } catch (error) {
      console.error('Analysis error:', error);
--- a/src/lib/agents/contentRouterAgent.ts
+++ b/src/lib/agents/contentRouterAgent.ts
@ -9,6 +9,7 @@ import path from 'node:path';
 import { AgentState } from './agentState';
 import { contentRouterPrompt } from '../prompts/contentRouter';
 import { removeThinkingBlocksFromMessages } from '../utils/contentUtils';
+import { withStructuredOutput } from '../utils/structuredOutput';

 // Define Zod schema for structured router decision output
 const RouterDecisionSchema = z.object({
@ -87,6 +88,7 @@ export class ContentRouterAgent {

      const template = PromptTemplate.fromTemplate(contentRouterPrompt);
      const prompt = await template.format({
+        systemInstructions: this.systemInstructions,
        currentTask: currentTask,
        query: state.originalQuery || state.query,
        focusMode: focusMode,
@ -97,7 +99,8 @@ export class ContentRouterAgent {
      });

      // Use structured output for routing decision
-      const structuredLlm = this.llm.withStructuredOutput(
+      const structuredLlm = withStructuredOutput(
+        this.llm,
        RouterDecisionSchema,
        {
          name: 'route_content',
@ -146,9 +149,9 @@ export class ContentRouterAgent {

      return new Command({
        goto: validatedDecision.decision,
-        update: {
-          messages: [new AIMessage(responseMessage)],
-        },
+        // update: {
+        //   messages: [new AIMessage(responseMessage)],
+        // },
      });
    } catch (error) {
      console.error('Content router error:', error);
--- a/src/lib/agents/fileSearchAgent.ts
+++ b/src/lib/agents/fileSearchAgent.ts
@ -81,11 +81,11 @@ export class FileSearchAgent {
        console.log('No processable file content found');
        return new Command({
          goto: 'analyzer',
-          update: {
-            messages: [
-              new AIMessage('No searchable content found in attached files.'),
-            ],
-          },
+          // update: {
+          //   messages: [
+          //     new AIMessage('No searchable content found in attached files.'),
+          //   ],
+          // },
        });
      }

@ -145,13 +145,13 @@ export class FileSearchAgent {

        return new Command({
          goto: 'analyzer',
-          update: {
-            messages: [
-              new AIMessage(
-                'No relevant content found in attached files for the current task.',
-              ),
-            ],
-          },
+          // update: {
+          //   messages: [
+          //     new AIMessage(
+          //       'No relevant content found in attached files for the current task.',
+          //     ),
+          //   ],
+          // },
        });
      }

@ -180,7 +180,7 @@ export class FileSearchAgent {
      return new Command({
        goto: 'analyzer', // Route back to analyzer to process the results
        update: {
-          messages: [new AIMessage(responseMessage)],
+          // messages: [new AIMessage(responseMessage)],
          relevantDocuments: rankedDocuments,
        },
      });
--- a/src/lib/agents/synthesizerAgent.ts
+++ b/src/lib/agents/synthesizerAgent.ts
@ -43,11 +43,11 @@ export class SynthesizerAgent {
        .map((doc, index) => {
          const isFile = doc.metadata?.url?.toLowerCase().includes('file');
          return `<${index + 1}>\n
-    <title>${doc.metadata.title}</title>\n
-    <source_type>${isFile ? 'file' : 'web'}</source_type>\n
-    ${isFile ? '' : '\n<url>' + doc.metadata.url + '</url>\n'}
-    <content>\n${doc.pageContent}\n</content>\n
-    </${index + 1}>`;
+    <title>${doc.metadata.title}</title>
+    <source_type>${isFile ? 'file' : 'web'}</source_type>
+    ${isFile ? '' : '\n<url>' + doc.metadata.url + '</url>'}
+    <content>\n${doc.pageContent}\n    </content>
+</${index + 1}>`;
        })
        .join('\n');

--- a/src/lib/agents/taskManagerAgent.ts
+++ b/src/lib/agents/taskManagerAgent.ts
@ -7,6 +7,7 @@ import { z } from 'zod';
 import { taskBreakdownPrompt } from '../prompts/taskBreakdown';
 import { AgentState } from './agentState';
 import { setTemperature } from '../utils/modelUtils';
+import { withStructuredOutput } from '../utils/structuredOutput';

 // Define Zod schema for structured task breakdown output
 const TaskBreakdownSchema = z.object({
@ -76,11 +77,11 @@ export class TaskManagerAgent {
          return new Command({
            goto: 'content_router',
            update: {
-              messages: [
-                new AIMessage(
-                  `Task ${currentTaskIndex + 1} completed. Processing task ${nextTaskIndex + 1} of ${state.tasks.length}: "${state.tasks[nextTaskIndex]}"`,
-                ),
-              ],
+              // messages: [
+              //   new AIMessage(
+              //     `Task ${currentTaskIndex + 1} completed. Processing task ${nextTaskIndex + 1} of ${state.tasks.length}: "${state.tasks[nextTaskIndex]}"`,
+              //   ),
+              // ],
              currentTaskIndex: nextTaskIndex,
            },
          });
@ -101,13 +102,13 @@ export class TaskManagerAgent {

          return new Command({
            goto: 'analyzer',
-            update: {
-              messages: [
-                new AIMessage(
-                  `All ${state.tasks.length} tasks completed. Moving to analysis phase.`,
-                ),
-              ],
-            },
+            // update: {
+            //   messages: [
+            //     new AIMessage(
+            //       `All ${state.tasks.length} tasks completed. Moving to analysis phase.`,
+            //     ),
+            //   ],
+            // },
          });
        }
      }
@ -141,7 +142,7 @@ export class TaskManagerAgent {
      });

      // Use structured output for task breakdown
-      const structuredLlm = this.llm.withStructuredOutput(TaskBreakdownSchema, {
+      const structuredLlm = withStructuredOutput(this.llm, TaskBreakdownSchema, {
        name: 'break_down_tasks',
      });

@ -192,7 +193,7 @@ export class TaskManagerAgent {
      return new Command({
        goto: 'content_router', // Route to content router to decide between file search, web search, or analysis
        update: {
-          messages: [new AIMessage(responseMessage)],
+          // messages: [new AIMessage(responseMessage)],
          tasks: taskLines,
          currentTaskIndex: 0,
          originalQuery: state.originalQuery || state.query, // Preserve original if not already set
--- a/src/lib/agents/urlSummarizationAgent.ts
+++ b/src/lib/agents/urlSummarizationAgent.ts
@ -45,13 +45,13 @@ export class URLSummarizationAgent {
        );
        return new Command({
          goto: 'content_router',
-          update: {
-            messages: [
-              new AIMessage(
-                'No URLs found for processing, routing to content router',
-              ),
-            ],
-          },
+          // update: {
+          //   messages: [
+          //     new AIMessage(
+          //       'No URLs found for processing, routing to content router',
+          //     ),
+          //   ],
+          // },
        });
      }

@ -250,9 +250,9 @@ Provide a comprehensive summary of the above web page content, focusing on infor

        return new Command({
          goto: 'analyzer',
-          update: {
-            messages: [new AIMessage(errorMessage)],
-          },
+          // update: {
+          //   messages: [new AIMessage(errorMessage)],
+          // },
        });
      }

@ -277,7 +277,7 @@ Provide a comprehensive summary of the above web page content, focusing on infor
      return new Command({
        goto: 'analyzer', // Route to analyzer to continue with normal workflow after URL processing
        update: {
-          messages: [new AIMessage(responseMessage)],
+          // messages: [new AIMessage(responseMessage)],
          relevantDocuments: documents,
        },
      });
--- a/src/lib/agents/webSearchAgent.ts
+++ b/src/lib/agents/webSearchAgent.ts
@ -19,6 +19,7 @@ import { setTemperature } from '../utils/modelUtils';
 import { Embeddings } from '@langchain/core/embeddings';
 import { removeThinkingBlocksFromMessages } from '../utils/contentUtils';
 import computeSimilarity from '../utils/computeSimilarity';
+import { withStructuredOutput } from '../utils/structuredOutput';

 // Define Zod schema for structured search query output
 const SearchQuerySchema = z.object({
@ -101,7 +102,7 @@ export class WebSearchAgent {
      });

      // Use structured output for search query generation
-      const structuredLlm = this.llm.withStructuredOutput(SearchQuerySchema, {
+      const structuredLlm = withStructuredOutput(this.llm, SearchQuerySchema, {
        name: 'generate_search_query',
      });

@ -423,9 +424,9 @@ export class WebSearchAgent {
      if (documents.length === 0) {
        return new Command({
          goto: 'analyzer',
-          update: {
-            messages: [new AIMessage('No relevant documents found.')],
-          },
+          // update: {
+          //   messages: [new AIMessage('No relevant documents found.')],
+          // },
        });
      }

@ -435,7 +436,7 @@ export class WebSearchAgent {
      return new Command({
        goto: 'analyzer', // Route back to analyzer to process the results
        update: {
-          messages: [new AIMessage(responseMessage)],
+          // messages: [new AIMessage(responseMessage)],
          relevantDocuments: documents,
          bannedSummaryUrls: bannedSummaryUrls,
          bannedPreviewUrls: bannedPreviewUrls,
--- a/src/lib/prompts/analyzer.ts
+++ b/src/lib/prompts/analyzer.ts
@ -1,32 +1,34 @@
 export const decideNextActionPrompt = `You are an expert content analyzer.
-Your task is to analyze the provided context and determine if we have enough information to fully answer the user's query.
+Your task is to analyze the provided content and determine if we have enough information to fully answer the user's query.

 # Instructions
- Carefully analyze the content of the context provided and the historical context of the conversation to determine if it contains sufficient information to answer the user's query
- Use the content provided in the \`context\` tag, as well as the historical context of the conversation, to make your determination
- Consider both file-based documents (from attached files) and web-based documents when analyzing context
+- Carefully analyze the content of the context provided **and** the historical content of the conversation to determine if it contains sufficient information to answer the user's query
+- The context may be empty, if the historical content is sufficient, you can still consider it sufficient
+- Historic content should generally be considered factual and does not require additional confirmation unless the user explicitly asks for confirmation or indicates that it was incorrect
 - If the user is asking for a specific number of sources and the context does not provide enough, consider the content insufficient

 # Source Type Awareness
-When analyzing the context, be aware that documents may come from different sources:
+When analyzing the content, be aware that documents may come from different sources:
 - **File documents**: Content extracted from user-attached files (identified by metadata indicating file source)
 - **Web documents**: Content retrieved from web searches (identified by URLs and web source metadata)
- **Mixed sources**: Both file and web content may be present
+- **Chat history**: Previous messages in the conversation that may provide additional content
+- **Mixed sources**: The content may include a combination of file documents, web documents, and chat history

 Consider the following when evaluating sufficiency:
 - File documents may contain user-specific, proprietary, or contextual information that cannot be found elsewhere
 - Web documents provide current, general, and publicly available information
- The combination of both sources may be needed for comprehensive answers
+- Chat history provides conversational context and may include user preferences, past interactions, or clarifications
+- The combination of these sources should be evaluated holistically to determine if they collectively provide enough information to answer the user's query
 - File content should be prioritized when answering questions specifically about attached documents

 # Response Options Decision Tree

 ## Step 1: Check if content is sufficient
- If provided context contains enough information to answer the user's query → respond with \`good_content\`
- If the context fully answers the user's query with complete information → respond with \`good_content\`
- If the user is requesting to use the existing context to answer their query → respond with \`good_content\`
+- If provided content contains enough information to answer the user's query → respond with \`good_content\`
+- If the content fully answers the user's query with complete information → respond with \`good_content\`
+- If the user is requesting to use the existing content to answer their query → respond with \`good_content\`
 - If the user is requesting to avoid web searches → respond with \`good_content\`
- If the user is asking you to be creative, such as writing a story, poem, or creative content → respond with \`good_content\` unless the context is clearly insufficient
+- If the user is asking you to be creative, such as writing a story, poem, or creative content → respond with \`good_content\` unless the content is clearly insufficient
 - If file documents contain complete information for file-specific queries → respond with \`good_content\`
 - If the user is requesting specific web content and there is a source that corresponds to that request in the context, it can be considered sufficient even if the content is not exhaustive or looks like errors → respond with \`good_content\`

@ -65,7 +67,7 @@ Consider the following when evaluating sufficiency:
 - Comparative analysis between options
 - Expert opinions or reviews from credible sources
 - Statistical data or research findings
- Additional context to supplement file content with current information
+- Existing content is not sufficient to answer the query, but the information can be found through a web search

 **Examples requiring more web search:**
 - "What are the latest features in iPhone 15?" (missing: recent tech specs)
@ -76,10 +78,9 @@ Consider the following when evaluating sufficiency:
 # Critical Decision Point
 Ask yourself: "Could this missing information reasonably be found through a web search, or does it require the user to provide specific details?"

- If it's personal/subjective or requires user feedback → \`need_user_info\`
- If it's factual and searchable → \`need_more_info\`
- If the context is complete or the user wants to use the existing context → \`good_content\`
- If file content is complete for file-specific questions → \`good_content\`
+- If the content is complete and sufficient to answer the query, or the user wants to use the existing content → \`good_content\`
+- If the query is personal/subjective or requires user feedback → \`need_user_info\`
+- If the query is factual and searchable → \`need_more_info\`

 # System Instructions
 {systemInstructions}
@ -98,7 +99,14 @@ Today's date is {date}
 # Search Instruction History
 {searchInstructionHistory}

-Provide your response as a JSON object with "action" and "reasoning" fields where action is one of: good_content, need_user_info, or need_more_info.`;
+#Response Format
+Respond with a JSON object that matches this structure:
+{{
+  "action": "string",
+  "reasoning": "string"
+}}
+
+Your response should contain only the JSON object, no additional text or formatting.`;

 export const additionalUserInputPrompt = `You are an expert content analyzer.
 Your task is to analyze the provided context and user query to determine what additional information is needed to fully answer the user's query.
--- a/src/lib/prompts/contentRouter.ts
+++ b/src/lib/prompts/contentRouter.ts
@ -1,5 +1,8 @@
 export const contentRouterPrompt = `You are a content routing agent responsible for deciding the next step in information gathering.

+# System Instructions
+{systemInstructions}
+
 # Your Role
 Analyze the current task and available context to determine whether to:
 1. Search attached files (\`file_search\`)
@ -46,36 +49,39 @@ When files are attached, first determine if they are likely to contain informati
 - The question can be answered with general knowledge without additional research

 # Response Format
-Respond with your decision and reasoning:
+Respond with a JSON object that matches this structure:
+{{
+  "decision": "string", // One of: "file_search", "web_search", "analyzer"
+  "reasoning": "string" // Brief explanation of why this decision was made
+}}

-Decision: [file_search/web_search/analyzer]
-Reasoning: [Brief explanation of why this choice was made, including file relevance assessment if applicable]
+Your response should contain only the JSON object, no additional text or formatting.

 # Examples

 ## Example 1: Relevant files
 Current task: "Summarize the main points of this document"
 File topics: "Product roadmap, feature specifications"
-→ Decision: file_search
-→ Reasoning: Task directly requests summary of attached document content
+→ decision: file_search
+→ reasoning: Task directly requests summary of attached document content

 ## Example 2: Irrelevant files
 Current task: "What is the current weather in New York?"
 File topics: "Resume, personal portfolio"
-→ Decision: web_search
-→ Reasoning: Attached files (resume, portfolio) are not relevant to weather query - need current web data
+→ decision: web_search
+→ reasoning: Attached files (resume, portfolio) are not relevant to weather query - need current web data

 ## Example 3: Partially relevant files
 Current task: "How does machine learning work and what are the latest trends?"
 File topics: "ML basics tutorial"
-→ Decision: file_search
-→ Reasoning: Files contain ML basics which could help with first part, then may need web search for latest trends
+→ decision: file_search
+→ reasoning: Files contain ML basics which could help with first part, then may need web search for latest trends

 ## Example 4: Technical question with unrelated files
 Current task: "Explain React hooks"
 File topics: "Marketing strategy document"
-→ Decision: web_search
-→ Reasoning: Marketing documents won't contain React programming information - need web search
+→ decision: web_search
+→ reasoning: Marketing documents won't contain React programming information - need web search

 Your turn:
 Current task: {currentTask}
--- a/src/lib/prompts/taskBreakdown.ts
+++ b/src/lib/prompts/taskBreakdown.ts
@ -1,34 +1,36 @@
 export const taskBreakdownPrompt = `You are a task breakdown specialist. Your job is to analyze a user's question and determine if it needs to be broken down into smaller, more focused questions that can be answered independently.

+# System Instructions:
 {systemInstructions}

-## File Context Awareness:
+# File Context Awareness:
 {fileContext}

-## Analysis Guidelines:
+# Analysis Guidelines:

-### When to Break Down:
+## When to Break Down:
 1. **Multiple distinct subjects**: Questions asking about different people, places, things, or concepts
 2. **Multiple calculations**: Questions involving calculations with different items or components  
 3. **Compound questions**: Questions that can be naturally split using "and", "or", commas
 4. **Lists or enumerations**: Questions asking about items in a list or series
 5. **File + external research**: Questions that require both analyzing attached files AND gathering external information

-### When NOT to Break Down:
+## When NOT to Break Down:
 1. **Single focused question**: Already asks about one specific thing
 2. **Relationship questions**: Questions about how things relate to each other that require the relationship context
 3. **Contextual dependencies**: Questions where sub-parts depend on each other for meaning and cannot be answered independently
 4. **Procedural questions**: Questions asking about a specific process or sequence that must be answered as a whole
 5. **File-only questions**: Questions that can be fully answered using only the attached files
+6. **Short factual questions**: Simple factual questions that do not require detailed analysis or multiple steps

-### File-Aware Task Creation:
+## File-Aware Task Creation:
 When files are attached, consider creating tasks that:
 - **Analyze file content**: "Summarize the main findings in the attached document"
 - **Extract specific information**: "What are the project timelines mentioned in the attached proposal?"
 - **Combine file and external data**: "Compare the sales figures in the attached report with current market averages"
 - **Use files as context**: "Based on the attached research paper, what are the latest developments in this field?"

-### Sub-Question Rules:
+## Sub-Question Rules:
 1. Each sub-question should be **self-contained** and answerable independently
 2. Preserve the **original context and intent** in each sub-question
 3. Maintain **specific details** like quantities, measurements, and qualifiers
@ -84,7 +86,7 @@ When files are attached, consider creating tasks that:
  "reasoning": "To compare two economies, we need detailed information about each country's economic situation separately, which can then be compared."
 }}

-## Your Task:
+# Your Task:

 Analyze this user question: "{query}"

--- a/src/lib/utils/analyzePreviewContent.ts
+++ b/src/lib/utils/analyzePreviewContent.ts
@ -4,6 +4,7 @@ import { z } from 'zod';
 import { formatDateForLLM } from '../utils';
 import { ChatOpenAI, OpenAIClient } from '@langchain/openai';
 import { removeThinkingBlocks } from './contentUtils';
+import { withStructuredOutput } from './structuredOutput';

 export type PreviewAnalysisResult = {
  isSufficient: boolean;
@ -76,10 +77,12 @@ Snippet: ${content.snippet}
    console.log(`Invoking LLM for preview content analysis`);

    // Create structured LLM with Zod schema
-    const structuredLLM = llm.withStructuredOutput(PreviewAnalysisSchema);
+    const structuredLLM = withStructuredOutput(llm, PreviewAnalysisSchema, {
+      name: 'analyze_preview_content',
+    });

    const analysisResult = await structuredLLM.invoke(
-      `${systemPrompt}You are a preview content analyzer, tasked with determining if search result snippets contain sufficient information to answer the Task Query.
+      `You are a preview content analyzer, tasked with determining if search result snippets contain sufficient information to answer the Task Query.

 # Instructions
 - Analyze the provided search result previews (titles + snippets), and chat history context to determine if they collectively contain enough information to provide a complete and accurate answer to the Task Query
@ -88,6 +91,18 @@ Snippet: ${content.snippet}
 - Be specific in your reasoning when the content is not sufficient
 - The original query is provided for additional context, only use it for clarification of overall expectations and intent. You do **not** need to answer the original query directly or completely

+# System Instructions
+${systemPrompt}
+
+# Response Format
+Respond with a JSON object that matches this structure:
+{
+  "isSufficient": boolean,
+  "reason": "string"
+}
+
+Your response should contain only the JSON object, no additional text or formatting.
+
 # Information Context:
 Today's date is ${formatDateForLLM(new Date())}

--- a/src/lib/utils/structuredOutput.ts
+++ b/src/lib/utils/structuredOutput.ts
@ -0,0 +1,31 @@
+import { BaseChatModel } from '@langchain/core/language_models/chat_models';
+import { ChatGroq } from '@langchain/groq';
+import { z } from 'zod';
+
+interface StructuredOutputOptions {
+  name?: string;
+  includeRaw?: boolean;
+}
+
+/**
+ * Configures structured output for the given LLM with appropriate method based on the model type.
+ * For Groq models, uses 'jsonMode' method. For other models, omits the method property.
+ */
+export function withStructuredOutput<T extends z.ZodType>(
+  llm: BaseChatModel,
+  schema: T,
+  options: StructuredOutputOptions = {}
+) {
+  const isGroqModel = llm instanceof ChatGroq;
+  
+  if (isGroqModel) {
+    return llm.withStructuredOutput(schema, {
+      name: options.name,
+      method: 'jsonMode' as const,
+    });
+  } else {
+    return llm.withStructuredOutput(schema, {
+      name: options.name,
+    });
+  }
+}
--- a/src/lib/utils/summarizeWebContent.ts
+++ b/src/lib/utils/summarizeWebContent.ts
@ -5,6 +5,7 @@ import { formatDateForLLM } from '../utils';
 import { getWebContent } from './documents';
 import { removeThinkingBlocks } from './contentUtils';
 import { setTemperature } from './modelUtils';
+import { withStructuredOutput } from './structuredOutput';

 export type SummarizeResult = {
  document: Document | null;
@ -51,7 +52,9 @@ export const summarizeWebContent = async (

        try {
          // Create structured LLM with Zod schema
-          const structuredLLM = llm.withStructuredOutput(RelevanceCheckSchema);
+          const structuredLLM = withStructuredOutput(llm, RelevanceCheckSchema, {
+            name: 'check_content_relevance',
+          });

          const relevanceResult = await structuredLLM.invoke(
            `${systemPrompt}You are a content relevance checker. Your task is to determine if the given content is relevant to the user's query.
@ -61,6 +64,25 @@ export const summarizeWebContent = async (
 - You do not need to provide a full answer to the query in order to be relevant, partial answers are acceptable
 - Provide a brief explanation of your reasoning

+# Response Format
+Respond with a JSON object that matches this structure:
+{
+  "relevant": boolean, // true if content is relevant, false otherwise
+  "reason": "string" // Brief explanation of why content is or isn't relevant
+}
+
+Your response should contain only the JSON object, no additional text or formatting.
+Do not include data that would require escape characters, do not escape quotes or other characters.
+This is important for the application to parse the response correctly.
+
+# Example Response
+{
+  "relevant": true,
+  "reason": "The content discusses the main features of the product which directly relate to the user's query about its capabilities."
+}
+
+# Context
+
 Today's date is ${formatDateForLLM(new Date())}

 Here is the query you need to answer: ${query}
@ -126,6 +148,9 @@ ${contentToAnalyze}`,
 - Format the summary using markdown with headings and lists
 - Include useful links to external resources, if applicable

+# Response Format
+- Respond with a detailed summary of the content, formatted in markdown. Do not include any additional text or explanations outside the summary.
+
 # Decision Tree
 - If the content is NOT relevant to the query, do not provide a summary; respond with 'not_relevant'
 - If the content is relevant, return a detailed summary following the instructions above
--- a/yarn.lock
+++ b/yarn.lock