From b97383bf0f7e81f771a7a2c644c86fc78df28a67 Mon Sep 17 00:00:00 2001
From: Willie Zutz <boarder2@bit-shift.com>
Date: Mon, 23 Jun 2025 01:28:54 -0600
Subject: [PATCH] feat(agent): Enhance AnalyzerAgent with previous analysis
 reasoning and improve prompt clarity in task breakdown and summarization

---
 src/lib/agents/analyzerAgent.ts        |  3 ++
 src/lib/prompts/analyzer.ts            | 17 ++++++--
 src/lib/prompts/taskBreakdown.ts       |  2 +
 src/lib/utils/analyzePreviewContent.ts | 56 ++++++++++++--------------
 src/lib/utils/summarizeWebContent.ts   | 55 ++++++++-----------------
 5 files changed, 61 insertions(+), 72 deletions(-)

diff --git a/src/lib/agents/analyzerAgent.ts b/src/lib/agents/analyzerAgent.ts
index 1eea51e..1551f83 100644
--- a/src/lib/agents/analyzerAgent.ts
+++ b/src/lib/agents/analyzerAgent.ts
@@ -21,6 +21,7 @@ import {
   removeThinkingBlocks,
   removeThinkingBlocksFromMessages,
 } from '../utils/contentUtils';
+import next from 'next';
 
 // Define Zod schemas for structured output
 const NextActionSchema = z.object({
@@ -157,6 +158,7 @@ export class AnalyzerAgent {
               .map((question) => `- ${question}`)
               .join('\n'),
             query: state.originalQuery || state.query, // Use original query for user info context
+            previousAnalysis: nextActionResponse.reasoning, // Include reasoning from previous analysis
           });
 
           const userInfoRequest = await userInfoLlm.invoke(
@@ -210,6 +212,7 @@ export class AnalyzerAgent {
             .map((question) => `- ${question}`)
             .join('\n'),
           query: state.originalQuery || state.query, // Use original query for more info context
+          previousAnalysis: nextActionResponse.reasoning, // Include reasoning from previous analysis
         });
 
         const searchRefinement = await searchRefinementLlm.invoke(
diff --git a/src/lib/prompts/analyzer.ts b/src/lib/prompts/analyzer.ts
index 2084162..4be5565 100644
--- a/src/lib/prompts/analyzer.ts
+++ b/src/lib/prompts/analyzer.ts
@@ -9,7 +9,7 @@ Your task is to analyze the provided context and determine if we have enough inf
 # Response Options Decision Tree
 
 ## Step 1: Check if content is sufficient
-- If your training data and the provided context contain enough information to answer the user's query → respond with \`good_content\`
+- If provided context contains enough information to answer the user's query → respond with \`good_content\`
 - If the context fully answers the user's query with complete information → respond with \`good_content\`
 - If the user is requesting to use the existing context to answer their query → respond with \`good_content\`
 - If the user is requesting to avoid web searches → respond with \`good_content\`
@@ -57,9 +57,9 @@ Your task is to analyze the provided context and determine if we have enough inf
 - "Compare Tesla Model 3 vs BMW i4" (missing: detailed comparison data)
 
 # Critical Decision Point
-Ask yourself: "Could this missing information reasonably be found through a web search, or does it require the user to provide personal/subjective details?"
+Ask yourself: "Could this missing information reasonably be found through a web search, or does it require the user to provide specific details?"
 
-- If it's personal/subjective → \`need_user_info\`
+- If it's personal/subjective or requires user feedback → \`need_user_info\`
 - If it's factual and searchable → \`need_more_info\`
 - If the context is complete or the user wants to use the existing context → \`good_content\`
 
@@ -85,6 +85,10 @@ Provide your response as a JSON object with "action" and "reasoning" fields wher
 export const additionalUserInputPrompt = `You are an expert content analyzer.
 Your task is to analyze the provided context and user query to determine what additional information is needed to fully answer the user's query.
 
+# Previous Analysis
+- The LLM analyzed the provided context and user query and determined that additional information is needed to fully answer the user's query, here is the analysis result:
+{previousAnalysis}
+
 # Refinement History
 - The following automated questions have already been asked to refine the search
 {searchInstructionHistory}
@@ -112,10 +116,15 @@ You are an expert content analyzer.
 Your task is to analyze the provided context and user query to determine what additional information is needed to fully answer the user's query.
 
 # Instructions
-- Respond with a detailed question that will be directed to an LLM to gather more specific information that can help refine the search.
+- Respond with a detailed question that will be directed to an LLM to create a web search instruction
+- The question should not require user input, but rather be designed to gather more specific information that can help refine the search
 - Avoid giving the same guidance more than once, and avoid repeating the same question multiple times
 - Avoid asking for general information or vague details; focus on specific, actionable questions that can lead to concrete answers
 
+# Previous Analysis
+- The LLM analyzed the provided context and user query and determined that additional information is needed to fully answer the user's query, here is the analysis result:
+{previousAnalysis}
+
 # Refinement History
 - The following automated questions have already been asked to refine the search
 {searchInstructionHistory}
diff --git a/src/lib/prompts/taskBreakdown.ts b/src/lib/prompts/taskBreakdown.ts
index 9561906..152d83c 100644
--- a/src/lib/prompts/taskBreakdown.ts
+++ b/src/lib/prompts/taskBreakdown.ts
@@ -24,6 +24,8 @@ export const taskBreakdownPrompt = `You are a task breakdown specialist. Your jo
 5. Keep the **same question type** (factual, analytical, etc.)
 6. Avoid introducing **new concepts** or information not present in the original question
 7. **Do not** repeat the same question multiple times; each sub-question should be unique and focused on a specific aspect of the original query
+8. Questions should **not** require user input for additional context; they should be designed to be answered by an LLM or through research via web search
+9. Do not ask questions that are based on opinion, personal preference, usage habits, subjective interpretation, etc...
 
 ## Examples:
 
diff --git a/src/lib/utils/analyzePreviewContent.ts b/src/lib/utils/analyzePreviewContent.ts
index f8638bf..29d0aac 100644
--- a/src/lib/utils/analyzePreviewContent.ts
+++ b/src/lib/utils/analyzePreviewContent.ts
@@ -1,6 +1,6 @@
 import { BaseChatModel } from '@langchain/core/language_models/chat_models';
 import { BaseMessage } from '@langchain/core/messages';
-import LineOutputParser from '../outputParsers/lineOutputParser';
+import { z } from 'zod';
 import { formatDateForLLM } from '../utils';
 import { ChatOpenAI, OpenAIClient } from '@langchain/openai';
 import { removeThinkingBlocks } from './contentUtils';
@@ -16,6 +16,12 @@ export type PreviewContent = {
   url: string;
 };
 
+// Zod schema for structured preview analysis output
+const PreviewAnalysisSchema = z.object({
+  isSufficient: z.boolean().describe('Whether the preview content is sufficient to answer the task query'),
+  reason: z.string().optional().nullable().describe('Specific reason why full content analysis is required (only if isSufficient is false)')
+});
+
 export const analyzePreviewContent = async (
   previewContents: PreviewContent[],
   query: string,
@@ -60,17 +66,24 @@ Snippet: ${content.snippet}
 
     console.log(`Invoking LLM for preview content analysis`);
 
-    const analysisResponse = await llm.invoke(
+    // Create structured LLM with Zod schema
+    const structuredLLM = llm.withStructuredOutput(PreviewAnalysisSchema);
+
+    const analysisResult = await structuredLLM.invoke(
       `${systemPrompt}You are a preview content analyzer, tasked with determining if search result snippets contain sufficient information to answer the Task Query.
 
 # Instructions
 - Analyze the provided search result previews (titles + snippets), and chat history context to determine if they collectively contain enough information to provide a complete and accurate answer to the Task Query
 - You must make a binary decision: either the preview content is sufficient OR it is not sufficient
-- If the preview content can provide a complete answer to the Task Query, respond with "sufficient"
-- If the preview content lacks important details, requires deeper analysis, or cannot fully answer the Task Query, respond with "not_needed: [specific reason why full content analysis is required]"
+- If the preview content can provide a complete answer to the Task Query, set isSufficient to true
+- If the preview content lacks important details, requires deeper analysis, or cannot fully answer the Task Query, set isSufficient to false and provide a specific reason
 - Be specific in your reasoning when the content is not sufficient
 - The original query is provided for additional context, only use it for clarification of overall expectations and intent. You do **not** need to answer the original query directly or completely
-- Output your decision inside a \`decision\` XML tag
+
+# Response Format
+You must return a JSON object with:
+- isSufficient: boolean indicating whether preview content is sufficient
+- reason: string explaining why full content analysis is required (only if isSufficient is false)
 
 # Information Context:
 Today's date is ${formatDateForLLM(new Date())}
@@ -90,8 +103,8 @@ ${formattedPreviewContent}
       { signal },
     );
 
-    if (!analysisResponse || !analysisResponse.content) {
-      console.error('No analysis response returned from LLM');
+    if (!analysisResult) {
+      console.error('No analysis result returned from LLM');
       return {
         isSufficient: false,
         reason:
@@ -99,37 +112,20 @@ ${formattedPreviewContent}
       };
     }
 
-    const decisionParser = new LineOutputParser({ key: 'decision' });
-    const decision = await decisionParser.parse(
-      analysisResponse.content as string,
-    );
+    console.log(`LLM analysis result:`, analysisResult);
 
-    console.log(`LLM decision response:`, decision);
-
-    if (decision.toLowerCase().trim() === 'sufficient') {
+    if (analysisResult.isSufficient) {
       console.log(
         'Preview content determined to be sufficient for answering the query',
       );
       return { isSufficient: true };
-    } else if (decision.toLowerCase().startsWith('not_needed')) {
-      // Extract the reason from the "not_needed" response
-      const reason = decision.startsWith('not_needed')
-        ? decision.substring('not_needed:'.length).trim()
-        : 'Preview content insufficient for complete answer';
-
-      console.log(
-        `Preview content determined to be insufficient. Reason: ${reason}`,
-      );
-      return { isSufficient: false, reason };
     } else {
-      // Default to not sufficient if unclear response
       console.log(
-        `Unclear LLM response, defaulting to insufficient: ${decision}`,
+        `Preview content determined to be insufficient. Reason: ${analysisResult.reason}`,
       );
-      return {
-        isSufficient: false,
-        reason:
-          'Unclear analysis response - falling back to full content processing',
+      return { 
+        isSufficient: false, 
+        reason: analysisResult.reason || 'Preview content insufficient for complete answer'
       };
     }
   } catch (error) {
diff --git a/src/lib/utils/summarizeWebContent.ts b/src/lib/utils/summarizeWebContent.ts
index f78287a..0ad494c 100644
--- a/src/lib/utils/summarizeWebContent.ts
+++ b/src/lib/utils/summarizeWebContent.ts
@@ -1,21 +1,14 @@
 import { Document } from '@langchain/core/documents';
 import { BaseChatModel } from '@langchain/core/language_models/chat_models';
-import { z } from 'zod';
 import { formatDateForLLM } from '../utils';
 import { getWebContent } from './documents';
+import { removeThinkingBlocks } from './contentUtils';
 
 export type SummarizeResult = {
   document: Document | null;
   notRelevantReason?: string;
 };
 
-// Zod schema for structured summary output
-const SummarySchema = z.object({
-  isRelevant: z.boolean().describe('Whether the content is relevant to the user query'),
-  summary: z.string().describe('Detailed summary of the content in markdown format, or explanation if not relevant'),
-  notRelevantReason: z.string().optional().describe('Specific reason why content is not relevant (only if isRelevant is false)')
-});
-
 export const summarizeWebContent = async (
   url: string,
   query: string,
@@ -32,10 +25,7 @@ export const summarizeWebContent = async (
         ? `${systemInstructions}\n\n`
         : '';
 
-      // Create structured LLM with Zod schema
-      const structuredLLM = llm.withStructuredOutput(SummarySchema);
-
-      let result = null;
+      let summary = null;
       for (let i = 0; i < 2; i++) {
         try {
           console.log(
@@ -45,20 +35,17 @@ export const summarizeWebContent = async (
           const prompt = `${systemPrompt}You are a web content summarizer, tasked with creating a detailed, accurate summary of content from a webpage.
 
 # Instructions
-- Determine if the content is relevant to the user's query
-- You do not need to provide a full answer to the query, partial answers are acceptable
-- If relevant, create a thorough and comprehensive summary capturing all key points
+- First determine if the content is relevant to the user's query
+- You do not need to provide a full answer to the query in order to be relevant, partial answers are acceptable
+- If the content is relevant, return a thorough and comprehensive summary capturing all key points
 - Include specific details, numbers, and quotes when relevant
 - Be concise and to the point, avoiding unnecessary fluff
 - Format the summary using markdown with headings and lists
 - Include useful links to external resources, if applicable
-- If the content is not relevant, set isRelevant to false and provide a specific reason
 
-# Response Format
-You must return a JSON object with:
-- isRelevant: boolean indicating if content is relevant to the query
-- summary: string with detailed markdown summary if relevant, or explanation if not relevant
-- notRelevantReason: string explaining why content is not relevant (only if isRelevant is false)
+# Decision Tree
+- If the content is NOT relevant to the query, do not provide a summary; respond with 'not_relevant'
+- If the content is relevant, return a detailed summary following the instructions above
 
 Today's date is ${formatDateForLLM(new Date())}
 
@@ -67,7 +54,8 @@ Here is the query you need to answer: ${query}
 Here is the content to summarize:
 ${i === 0 ? content.metadata.html : content.pageContent}`;
 
-          result = await structuredLLM.invoke(prompt, { signal });
+          const result = await llm.invoke(prompt, { signal });
+          summary = removeThinkingBlocks(result.content as string);
           break;
         } catch (error) {
           console.error(
@@ -77,7 +65,7 @@ ${i === 0 ? content.metadata.html : content.pageContent}`;
         }
       }
 
-      if (!result) {
+      if (!summary) {
         console.error(`No summary result returned for URL: ${url}`);
         return {
           document: null,
@@ -85,31 +73,22 @@ ${i === 0 ? content.metadata.html : content.pageContent}`;
         };
       }
 
-      // Check if content is relevant
-      if (!result.isRelevant) {
+      // Check if content is relevant (empty or very short response indicates not relevant)
+      const trimmedSummary = summary.trim();
+      if (trimmedSummary.length === 0 || trimmedSummary.length < 25) {
         console.log(
-          `LLM response for URL "${url}" indicates it's not relevant:`,
-          result.notRelevantReason || result.summary,
+          `LLM response for URL "${url}" indicates it's not relevant (empty or very short response)`,
         );
 
         return { 
           document: null, 
-          notRelevantReason: result.notRelevantReason || result.summary 
-        };
-      }
-
-      // Content is relevant, create document with summary
-      if (!result.summary || result.summary.trim().length === 0) {
-        console.error(`No summary content in relevant response for URL: ${url}`);
-        return {
-          document: null,
-          notRelevantReason: 'Summary content was empty',
+          notRelevantReason: 'Content not relevant to query' 
         };
       }
 
       return {
         document: new Document({
-          pageContent: result.summary,
+          pageContent: trimmedSummary,
           metadata: {
             ...content.metadata,
             url: url,