feat(agent): Enhance AnalyzerAgent with previous analysis reasoning and improve prompt clarity in task breakdown and summarization

2025-06-23 01:28:54 -06:00 · 2025-06-23 01:28:54 -06:00 · b97383bf0f
commit b97383bf0f
parent b9d4a4e779
5 changed files with 61 additions and 72 deletions
--- a/src/lib/agents/analyzerAgent.ts
+++ b/src/lib/agents/analyzerAgent.ts
@ -21,6 +21,7 @@ import {
  removeThinkingBlocks,
  removeThinkingBlocksFromMessages,
 } from '../utils/contentUtils';
 import next from 'next';
 // Define Zod schemas for structured output
 const NextActionSchema = z.object({
@ -157,6 +158,7 @@ export class AnalyzerAgent {
              .map((question) => `- ${question}`)
              .join('\n'),
            query: state.originalQuery || state.query, // Use original query for user info context
            previousAnalysis: nextActionResponse.reasoning, // Include reasoning from previous analysis
          });
          const userInfoRequest = await userInfoLlm.invoke(
@ -210,6 +212,7 @@ export class AnalyzerAgent {
            .map((question) => `- ${question}`)
            .join('\n'),
          query: state.originalQuery || state.query, // Use original query for more info context
          previousAnalysis: nextActionResponse.reasoning, // Include reasoning from previous analysis
        });
        const searchRefinement = await searchRefinementLlm.invoke(
--- a/src/lib/prompts/analyzer.ts
+++ b/src/lib/prompts/analyzer.ts
@ -9,7 +9,7 @@ Your task is to analyze the provided context and determine if we have enough inf
 # Response Options Decision Tree
 ## Step 1: Check if content is sufficient
- If your training data and the provided context contain enough information to answer the user's query → respond with \`good_content\`
+- If provided context contains enough information to answer the user's query → respond with \`good_content\`
 - If the context fully answers the user's query with complete information → respond with \`good_content\`
 - If the user is requesting to use the existing context to answer their query → respond with \`good_content\`
 - If the user is requesting to avoid web searches → respond with \`good_content\`
@ -57,9 +57,9 @@ Your task is to analyze the provided context and determine if we have enough inf
 - "Compare Tesla Model 3 vs BMW i4" (missing: detailed comparison data)
 # Critical Decision Point
-Ask yourself: "Could this missing information reasonably be found through a web search, or does it require the user to provide personal/subjective details?"
+Ask yourself: "Could this missing information reasonably be found through a web search, or does it require the user to provide specific details?"
- If it's personal/subjective → \`need_user_info\`
+- If it's personal/subjective or requires user feedback → \`need_user_info\`
 - If it's factual and searchable → \`need_more_info\`
 - If the context is complete or the user wants to use the existing context → \`good_content\`
@ -85,6 +85,10 @@ Provide your response as a JSON object with "action" and "reasoning" fields wher
 export const additionalUserInputPrompt = `You are an expert content analyzer.
 Your task is to analyze the provided context and user query to determine what additional information is needed to fully answer the user's query.
 # Previous Analysis
 - The LLM analyzed the provided context and user query and determined that additional information is needed to fully answer the user's query, here is the analysis result:
 {previousAnalysis}
 # Refinement History
 - The following automated questions have already been asked to refine the search
 {searchInstructionHistory}
@ -112,10 +116,15 @@ You are an expert content analyzer.
 Your task is to analyze the provided context and user query to determine what additional information is needed to fully answer the user's query.
 # Instructions
- Respond with a detailed question that will be directed to an LLM to gather more specific information that can help refine the search.
+- Respond with a detailed question that will be directed to an LLM to create a web search instruction
 - The question should not require user input, but rather be designed to gather more specific information that can help refine the search
 - Avoid giving the same guidance more than once, and avoid repeating the same question multiple times
 - Avoid asking for general information or vague details; focus on specific, actionable questions that can lead to concrete answers
 # Previous Analysis
 - The LLM analyzed the provided context and user query and determined that additional information is needed to fully answer the user's query, here is the analysis result:
 {previousAnalysis}
 # Refinement History
 - The following automated questions have already been asked to refine the search
 {searchInstructionHistory}
--- a/src/lib/prompts/taskBreakdown.ts
+++ b/src/lib/prompts/taskBreakdown.ts
@ -24,6 +24,8 @@ export const taskBreakdownPrompt = `You are a task breakdown specialist. Your jo
 5. Keep the **same question type** (factual, analytical, etc.)
 6. Avoid introducing **new concepts** or information not present in the original question
 7. **Do not** repeat the same question multiple times; each sub-question should be unique and focused on a specific aspect of the original query
 8. Questions should **not** require user input for additional context; they should be designed to be answered by an LLM or through research via web search
 9. Do not ask questions that are based on opinion, personal preference, usage habits, subjective interpretation, etc...
 ## Examples:
--- a/src/lib/utils/analyzePreviewContent.ts
+++ b/src/lib/utils/analyzePreviewContent.ts
@ -1,6 +1,6 @@
 import { BaseChatModel } from '@langchain/core/language_models/chat_models';
 import { BaseMessage } from '@langchain/core/messages';
-import LineOutputParser from '../outputParsers/lineOutputParser';
+import { z } from 'zod';
 import { formatDateForLLM } from '../utils';
 import { ChatOpenAI, OpenAIClient } from '@langchain/openai';
 import { removeThinkingBlocks } from './contentUtils';
@ -16,6 +16,12 @@ export type PreviewContent = {
  url: string;
 };
 // Zod schema for structured preview analysis output
 const PreviewAnalysisSchema = z.object({
  isSufficient: z.boolean().describe('Whether the preview content is sufficient to answer the task query'),
  reason: z.string().optional().nullable().describe('Specific reason why full content analysis is required (only if isSufficient is false)')
 });
 export const analyzePreviewContent = async (
  previewContents: PreviewContent[],
  query: string,
@ -60,17 +66,24 @@ Snippet: ${content.snippet}
    console.log(`Invoking LLM for preview content analysis`);
-    const analysisResponse = await llm.invoke(
+    // Create structured LLM with Zod schema
    const structuredLLM = llm.withStructuredOutput(PreviewAnalysisSchema);
    const analysisResult = await structuredLLM.invoke(
      `${systemPrompt}You are a preview content analyzer, tasked with determining if search result snippets contain sufficient information to answer the Task Query.
 # Instructions
 - Analyze the provided search result previews (titles + snippets), and chat history context to determine if they collectively contain enough information to provide a complete and accurate answer to the Task Query
 - You must make a binary decision: either the preview content is sufficient OR it is not sufficient
- If the preview content can provide a complete answer to the Task Query, respond with "sufficient"
+- If the preview content can provide a complete answer to the Task Query, set isSufficient to true
- If the preview content lacks important details, requires deeper analysis, or cannot fully answer the Task Query, respond with "not_needed: [specific reason why full content analysis is required]"
+- If the preview content lacks important details, requires deeper analysis, or cannot fully answer the Task Query, set isSufficient to false and provide a specific reason
 - Be specific in your reasoning when the content is not sufficient
 - The original query is provided for additional context, only use it for clarification of overall expectations and intent. You do **not** need to answer the original query directly or completely
- Output your decision inside a \`decision\` XML tag
+
 # Response Format
 You must return a JSON object with:
 - isSufficient: boolean indicating whether preview content is sufficient
 - reason: string explaining why full content analysis is required (only if isSufficient is false)
 # Information Context:
 Today's date is ${formatDateForLLM(new Date())}
@ -90,8 +103,8 @@ ${formattedPreviewContent}
      { signal },
    );
-    if (!analysisResponse || !analysisResponse.content) {
+    if (!analysisResult) {
-      console.error('No analysis response returned from LLM');
+      console.error('No analysis result returned from LLM');
      return {
        isSufficient: false,
        reason:
@ -99,37 +112,20 @@ ${formattedPreviewContent}
      };
    }
-    const decisionParser = new LineOutputParser({ key: 'decision' });
+    console.log(`LLM analysis result:`, analysisResult);
    const decision = await decisionParser.parse(
      analysisResponse.content as string,
    );
-    console.log(`LLM decision response:`, decision);
+    if (analysisResult.isSufficient) {
    if (decision.toLowerCase().trim() === 'sufficient') {
      console.log(
        'Preview content determined to be sufficient for answering the query',
      );
      return { isSufficient: true };
    } else if (decision.toLowerCase().startsWith('not_needed')) {
      // Extract the reason from the "not_needed" response
      const reason = decision.startsWith('not_needed')
        ? decision.substring('not_needed:'.length).trim()
        : 'Preview content insufficient for complete answer';
      console.log(
        `Preview content determined to be insufficient. Reason: ${reason}`,
      );
      return { isSufficient: false, reason };
    } else {
      // Default to not sufficient if unclear response
      console.log(
-        `Unclear LLM response, defaulting to insufficient: ${decision}`,
+        `Preview content determined to be insufficient. Reason: ${analysisResult.reason}`,
      );
      return { 
        isSufficient: false, 
-        reason:
+        reason: analysisResult.reason || 'Preview content insufficient for complete answer'
          'Unclear analysis response - falling back to full content processing',
      };
    }
  } catch (error) {
--- a/src/lib/utils/summarizeWebContent.ts
+++ b/src/lib/utils/summarizeWebContent.ts
@ -1,21 +1,14 @@
 import { Document } from '@langchain/core/documents';
 import { BaseChatModel } from '@langchain/core/language_models/chat_models';
 import { z } from 'zod';
 import { formatDateForLLM } from '../utils';
 import { getWebContent } from './documents';
 import { removeThinkingBlocks } from './contentUtils';
 export type SummarizeResult = {
  document: Document | null;
  notRelevantReason?: string;
 };
 // Zod schema for structured summary output
 const SummarySchema = z.object({
  isRelevant: z.boolean().describe('Whether the content is relevant to the user query'),
  summary: z.string().describe('Detailed summary of the content in markdown format, or explanation if not relevant'),
  notRelevantReason: z.string().optional().describe('Specific reason why content is not relevant (only if isRelevant is false)')
 });
 export const summarizeWebContent = async (
  url: string,
  query: string,
@ -32,10 +25,7 @@ export const summarizeWebContent = async (
        ? `${systemInstructions}\n\n`
        : '';
-      // Create structured LLM with Zod schema
+      let summary = null;
      const structuredLLM = llm.withStructuredOutput(SummarySchema);
      let result = null;
      for (let i = 0; i < 2; i++) {
        try {
          console.log(
@ -45,20 +35,17 @@ export const summarizeWebContent = async (
          const prompt = `${systemPrompt}You are a web content summarizer, tasked with creating a detailed, accurate summary of content from a webpage.
 # Instructions
- Determine if the content is relevant to the user's query
+- First determine if the content is relevant to the user's query
- You do not need to provide a full answer to the query, partial answers are acceptable
+- You do not need to provide a full answer to the query in order to be relevant, partial answers are acceptable
- If relevant, create a thorough and comprehensive summary capturing all key points
+- If the content is relevant, return a thorough and comprehensive summary capturing all key points
 - Include specific details, numbers, and quotes when relevant
 - Be concise and to the point, avoiding unnecessary fluff
 - Format the summary using markdown with headings and lists
 - Include useful links to external resources, if applicable
 - If the content is not relevant, set isRelevant to false and provide a specific reason
-# Response Format
+# Decision Tree
-You must return a JSON object with:
+- If the content is NOT relevant to the query, do not provide a summary; respond with 'not_relevant'
- isRelevant: boolean indicating if content is relevant to the query
+- If the content is relevant, return a detailed summary following the instructions above
 - summary: string with detailed markdown summary if relevant, or explanation if not relevant
 - notRelevantReason: string explaining why content is not relevant (only if isRelevant is false)
 Today's date is ${formatDateForLLM(new Date())}
@ -67,7 +54,8 @@ Here is the query you need to answer: ${query}
 Here is the content to summarize:
 ${i === 0 ? content.metadata.html : content.pageContent}`;
-          result = await structuredLLM.invoke(prompt, { signal });
+          const result = await llm.invoke(prompt, { signal });
          summary = removeThinkingBlocks(result.content as string);
          break;
        } catch (error) {
          console.error(
@ -77,7 +65,7 @@ ${i === 0 ? content.metadata.html : content.pageContent}`;
        }
      }
-      if (!result) {
+      if (!summary) {
        console.error(`No summary result returned for URL: ${url}`);
        return {
          document: null,
@ -85,31 +73,22 @@ ${i === 0 ? content.metadata.html : content.pageContent}`;
        };
      }
-      // Check if content is relevant
+      // Check if content is relevant (empty or very short response indicates not relevant)
-      if (!result.isRelevant) {
+      const trimmedSummary = summary.trim();
      if (trimmedSummary.length === 0 || trimmedSummary.length < 25) {
        console.log(
-          `LLM response for URL "${url}" indicates it's not relevant:`,
+          `LLM response for URL "${url}" indicates it's not relevant (empty or very short response)`,
          result.notRelevantReason || result.summary,
        );
        return { 
          document: null, 
-          notRelevantReason: result.notRelevantReason || result.summary 
+          notRelevantReason: 'Content not relevant to query' 
        };
      }
      // Content is relevant, create document with summary
      if (!result.summary || result.summary.trim().length === 0) {
        console.error(`No summary content in relevant response for URL: ${url}`);
        return {
          document: null,
          notRelevantReason: 'Summary content was empty',
        };
      }
      return {
        document: new Document({
-          pageContent: result.summary,
+          pageContent: trimmedSummary,
          metadata: {
            ...content.metadata,
            url: url,