feat(agent): Improve relevance checking and preview content handling in web search agent.

Better groq support.
2025-07-01 00:36:12 -06:00 · 2025-07-01 00:36:12 -06:00 · b392aa2c21
commit b392aa2c21
parent 2eb0d60918
4 changed files with 144 additions and 134 deletions
--- a/src/lib/agents/webSearchAgent.ts
+++ b/src/lib/agents/webSearchAgent.ts
@ -181,15 +181,25 @@ export class WebSearchAgent {
        }),
      );

-      // Sort by relevance score and take top 12 results
-      const previewContents: PreviewContent[] = resultsWithSimilarity
+      let previewContents: PreviewContent[] = [];
+      // Always take the top 3 results for preview content
+      previewContents.push(...filteredResults.slice(0, 3)
+        .map((result) => ({
+          title: result.title || 'Untitled',
+          snippet: result.content || '',
+          url: result.url,
+        }))
+      );
+
+      // Sort by relevance score and take top 12 results for a total of 15
+      previewContents.push(...resultsWithSimilarity.slice(3)
        .sort((a, b) => b.similarity - a.similarity)
        .slice(0, 12)
        .map(({ result }) => ({
          title: result.title || 'Untitled',
          snippet: result.content || '',
          url: result.url,
-        }));
+        })));

      console.log(
        `Extracted preview content from ${previewContents.length} search results for analysis`,
@ -306,9 +316,7 @@ export class WebSearchAgent {
        });

        // Summarize the top 2 search results
-        for (const result of resultsWithSimilarity
-          .slice(0, 12)
-          .map((r) => r.result)) {
+        for (const result of previewContents) {
          if (this.signal.aborted) {
            console.warn('Search operation aborted by signal');
            break; // Exit if the operation is aborted
--- a/src/lib/providers/groq.ts
+++ b/src/lib/providers/groq.ts
@ -1,91 +1,91 @@
 import { ChatOpenAI } from '@langchain/openai';
 import { getGroqApiKey } from '../config';
 import { ChatModel } from '.';
+import { BaseChatModel } from '@langchain/core/language_models/chat_models';

 export const PROVIDER_INFO = {
  key: 'groq',
  displayName: 'Groq',
 };
-import { BaseChatModel } from '@langchain/core/language_models/chat_models';

-const groqChatModels: Record<string, string>[] = [
-  {
-    displayName: 'Gemma2 9B IT',
-    key: 'gemma2-9b-it',
-  },
-  {
-    displayName: 'Llama 3.3 70B Versatile',
-    key: 'llama-3.3-70b-versatile',
-  },
-  {
-    displayName: 'Llama 3.1 8B Instant',
-    key: 'llama-3.1-8b-instant',
-  },
-  {
-    displayName: 'Llama3 70B 8192',
-    key: 'llama3-70b-8192',
-  },
-  {
-    displayName: 'Llama3 8B 8192',
-    key: 'llama3-8b-8192',
-  },
-  {
-    displayName: 'Mixtral 8x7B 32768',
-    key: 'mixtral-8x7b-32768',
-  },
-  {
-    displayName: 'Qwen QWQ 32B (Preview)',
-    key: 'qwen-qwq-32b',
-  },
-  {
-    displayName: 'Mistral Saba 24B (Preview)',
-    key: 'mistral-saba-24b',
-  },
-  {
-    displayName: 'Qwen 2.5 Coder 32B (Preview)',
-    key: 'qwen-2.5-coder-32b',
-  },
-  {
-    displayName: 'Qwen 2.5 32B (Preview)',
-    key: 'qwen-2.5-32b',
-  },
-  {
-    displayName: 'DeepSeek R1 Distill Qwen 32B (Preview)',
-    key: 'deepseek-r1-distill-qwen-32b',
-  },
-  {
-    displayName: 'DeepSeek R1 Distill Llama 70B (Preview)',
-    key: 'deepseek-r1-distill-llama-70b',
-  },
-  {
-    displayName: 'Llama 3.3 70B SpecDec (Preview)',
-    key: 'llama-3.3-70b-specdec',
-  },
-  {
-    displayName: 'Llama 3.2 1B Preview (Preview)',
-    key: 'llama-3.2-1b-preview',
-  },
-  {
-    displayName: 'Llama 3.2 3B Preview (Preview)',
-    key: 'llama-3.2-3b-preview',
-  },
-  {
-    displayName: 'Llama 3.2 11B Vision Preview (Preview)',
-    key: 'llama-3.2-11b-vision-preview',
-  },
-  {
-    displayName: 'Llama 3.2 90B Vision Preview (Preview)',
-    key: 'llama-3.2-90b-vision-preview',
-  },
-  /* {
-    displayName: 'Llama 4 Maverick 17B 128E Instruct (Preview)',
-    key: 'meta-llama/llama-4-maverick-17b-128e-instruct',
-  }, */
-  {
-    displayName: 'Llama 4 Scout 17B 16E Instruct (Preview)',
-    key: 'meta-llama/llama-4-scout-17b-16e-instruct',
-  },
-];
+interface GroqModel {
+  id: string;
+  object: string;
+  created: number;
+  owned_by: string;
+  active: boolean;
+  context_window: number;
+  max_completion_tokens: number;
+}
+
+interface GroqModelsResponse {
+  object: string;
+  data: GroqModel[];
+}
+
+const generateDisplayName = (modelId: string, ownedBy: string): string => {
+  // Handle special cases for better display names
+  const modelMap: Record<string, string> = {
+    'gemma2-9b-it': 'Gemma2 9B IT',
+    'llama-3.3-70b-versatile': 'Llama 3.3 70B Versatile',
+    'llama-3.1-8b-instant': 'Llama 3.1 8B Instant',
+    'llama3-70b-8192': 'Llama3 70B 8192',
+    'llama3-8b-8192': 'Llama3 8B 8192',
+    'mixtral-8x7b-32768': 'Mixtral 8x7B 32768',
+    'qwen-qwq-32b': 'Qwen QWQ 32B',
+    'mistral-saba-24b': 'Mistral Saba 24B',
+    'deepseek-r1-distill-llama-70b': 'DeepSeek R1 Distill Llama 70B',
+    'deepseek-r1-distill-qwen-32b': 'DeepSeek R1 Distill Qwen 32B',
+  };
+
+  // Return mapped name if available
+  if (modelMap[modelId]) {
+    return modelMap[modelId];
+  }
+
+  // Generate display name from model ID
+  let displayName = modelId
+    .replace(/[-_]/g, ' ')
+    .split(' ')
+    .map(word => word.charAt(0).toUpperCase() + word.slice(1))
+    .join(' ');
+
+  // Add owner info for certain models
+  if (modelId.includes('meta-llama/')) {
+    displayName = displayName.replace('Meta Llama/', '');
+  }
+
+  return displayName;
+};
+
+const fetchGroqModels = async (apiKey: string): Promise<GroqModel[]> => {
+  try {
+    const response = await fetch('https://api.groq.com/openai/v1/models', {
+      headers: {
+        'Authorization': `Bearer ${apiKey}`,
+        'Content-Type': 'application/json',
+      },
+    });
+
+    if (!response.ok) {
+      throw new Error(`Failed to fetch models: ${response.status} ${response.statusText}`);
+    }
+
+    const data: GroqModelsResponse = await response.json();
+    
+    // Filter for active chat completion models (exclude audio/whisper models)
+    return data.data.filter(model => 
+      model.active && 
+      !model.id.includes('whisper') &&
+      !model.id.includes('tts') &&
+      !model.id.includes('guard') &&
+      !model.id.includes('prompt-guard')
+    );
+  } catch (error) {
+    console.error('Error fetching Groq models:', error);
+    return [];
+  }
+};

 export const loadGroqChatModels = async () => {
  const groqApiKey = getGroqApiKey();
@ -95,12 +95,15 @@ export const loadGroqChatModels = async () => {
  try {
    const chatModels: Record<string, ChatModel> = {};

-    groqChatModels.forEach((model) => {
-      chatModels[model.key] = {
-        displayName: model.displayName,
+    // Fetch available models from Groq API
+    const availableModels = await fetchGroqModels(groqApiKey);
+
+    availableModels.forEach((model) => {
+      chatModels[model.id] = {
+        displayName: generateDisplayName(model.id, model.owned_by),
        model: new ChatOpenAI({
          openAIApiKey: groqApiKey,
-          modelName: model.key,
+          modelName: model.id,
          // temperature: 0.7,
          configuration: {
            baseURL: 'https://api.groq.com/openai/v1',
--- a/src/lib/utils/analyzePreviewContent.ts
+++ b/src/lib/utils/analyzePreviewContent.ts
@ -83,9 +83,8 @@ Snippet: ${content.snippet}

 # Instructions
 - Analyze the provided search result previews (titles + snippets), and chat history context to determine if they collectively contain enough information to provide a complete and accurate answer to the Task Query
- You must make a binary decision: either the preview content is sufficient OR it is not sufficient
- If the preview content can provide a complete answer to the Task Query, set isSufficient to true
- If the preview content lacks important details, requires deeper analysis, or cannot fully answer the Task Query, set isSufficient to false and provide a specific reason
+- If the preview content can provide a complete answer to the Task Query, consider it sufficient
+- If the preview content lacks important details, requires deeper analysis, or cannot fully answer the Task Query, consider it insufficient
 - Be specific in your reasoning when the content is not sufficient
 - The original query is provided for additional context, only use it for clarification of overall expectations and intent. You do **not** need to answer the original query directly or completely

@ -103,11 +102,6 @@ ${taskQuery}

 # Search Result Previews to Analyze:
 ${formattedPreviewContent}
-
-# Response Format
-You must return a JSON object with:
- isSufficient: boolean indicating whether preview content is sufficient
- reason: string explaining why full content analysis is required (only if isSufficient is false)
 `,
      { signal },
    );
--- a/src/lib/utils/summarizeWebContent.ts
+++ b/src/lib/utils/summarizeWebContent.ts
@ -1,5 +1,6 @@
 import { Document } from '@langchain/core/documents';
 import { BaseChatModel } from '@langchain/core/language_models/chat_models';
+import { z } from 'zod';
 import { formatDateForLLM } from '../utils';
 import { getWebContent } from './documents';
 import { removeThinkingBlocks } from './contentUtils';
@ -10,6 +11,16 @@ export type SummarizeResult = {
  notRelevantReason?: string;
 };

+// Zod schema for structured relevance check output
+const RelevanceCheckSchema = z.object({
+  relevant: z
+    .boolean()
+    .describe('Whether the content is relevant to the user query'),
+  reason: z
+    .string()
+    .describe('Brief explanation of why content is or isn\'t relevant'),
+});
+
 export const summarizeWebContent = async (
  url: string,
  query: string,
@ -37,55 +48,49 @@ export const summarizeWebContent = async (
          `Short content detected (${contentToAnalyze.length} chars) for URL: ${url}, checking relevance only`,
        );

-        const relevancePrompt = `${systemPrompt}You are a content relevance checker. Your task is to determine if the given content is relevant to the user's query.
+        try {
+          // Create structured LLM with Zod schema
+          const structuredLLM = llm.withStructuredOutput(RelevanceCheckSchema);
+
+          const relevanceResult = await structuredLLM.invoke(
+            `${systemPrompt}You are a content relevance checker. Your task is to determine if the given content is relevant to the user's query.

 # Instructions
 - Analyze the content to determine if it contains information relevant to the user's query
 - You do not need to provide a full answer to the query in order to be relevant, partial answers are acceptable
- Respond with valid JSON in the following format:
-{
-  "relevant": true/false,
-  "reason": "brief explanation of why content is or isn't relevant"
-}
+- Provide a brief explanation of your reasoning

 Today's date is ${formatDateForLLM(new Date())}

 Here is the query you need to answer: ${query}

 Here is the content to analyze:
-${contentToAnalyze}`;
+${contentToAnalyze}`,
+            { signal }
+          );

-        try {
-          const result = await llm.invoke(relevancePrompt, { signal });
-          const responseText = removeThinkingBlocks(result.content as string).trim();
-          
-          try {
-            const parsedResponse = JSON.parse(responseText);
-            
-            if (parsedResponse.relevant === true) {
-              console.log(`Short content for URL "${url}" is relevant: ${parsedResponse.reason}`);
-              return {
-                document: new Document({
-                  pageContent: content.pageContent,
-                  metadata: {
-                    ...content.metadata,
-                    url: url,
-                    processingType: 'short-content',
-                  },
-                }),
-                notRelevantReason: undefined,
-              };
-            } else {
-              console.log(`Short content for URL "${url}" is not relevant: ${parsedResponse.reason}`);
-              return {
-                document: null,
-                notRelevantReason: parsedResponse.reason || 'Content not relevant to query',
-              };
-            }
-          } catch (parseError) {
-            console.error(`Error parsing JSON response for URL ${url}:`, parseError);
-            console.error(`Raw response:`, responseText);
+          if (!relevanceResult) {
+            console.error(`No relevance result returned for URL ${url}`);
            // Fall through to full summarization as fallback
+          } else if (relevanceResult.relevant) {
+            console.log(`Short content for URL "${url}" is relevant: ${relevanceResult.reason}`);
+            return {
+              document: new Document({
+                pageContent: content.pageContent,
+                metadata: {
+                  ...content.metadata,
+                  url: url,
+                  processingType: 'short-content',
+                },
+              }),
+              notRelevantReason: undefined,
+            };
+          } else {
+            console.log(`Short content for URL "${url}" is not relevant: ${relevanceResult.reason}`);
+            return {
+              document: null,
+              notRelevantReason: relevanceResult.reason || 'Content not relevant to query',
+            };
          }
        } catch (error) {
          console.error(`Error checking relevance for short content from URL ${url}:`, error);