diff --git a/src/lib/agents/webSearchAgent.ts b/src/lib/agents/webSearchAgent.ts index 4508210..2c37c67 100644 --- a/src/lib/agents/webSearchAgent.ts +++ b/src/lib/agents/webSearchAgent.ts @@ -181,15 +181,25 @@ export class WebSearchAgent { }), ); - // Sort by relevance score and take top 12 results - const previewContents: PreviewContent[] = resultsWithSimilarity + let previewContents: PreviewContent[] = []; + // Always take the top 3 results for preview content + previewContents.push(...filteredResults.slice(0, 3) + .map((result) => ({ + title: result.title || 'Untitled', + snippet: result.content || '', + url: result.url, + })) + ); + + // Sort by relevance score and take top 12 results for a total of 15 + previewContents.push(...resultsWithSimilarity.slice(3) .sort((a, b) => b.similarity - a.similarity) .slice(0, 12) .map(({ result }) => ({ title: result.title || 'Untitled', snippet: result.content || '', url: result.url, - })); + }))); console.log( `Extracted preview content from ${previewContents.length} search results for analysis`, @@ -306,9 +316,7 @@ export class WebSearchAgent { }); // Summarize the top 2 search results - for (const result of resultsWithSimilarity - .slice(0, 12) - .map((r) => r.result)) { + for (const result of previewContents) { if (this.signal.aborted) { console.warn('Search operation aborted by signal'); break; // Exit if the operation is aborted diff --git a/src/lib/providers/groq.ts b/src/lib/providers/groq.ts index 0cb376b..081e308 100644 --- a/src/lib/providers/groq.ts +++ b/src/lib/providers/groq.ts @@ -1,91 +1,91 @@ import { ChatOpenAI } from '@langchain/openai'; import { getGroqApiKey } from '../config'; import { ChatModel } from '.'; +import { BaseChatModel } from '@langchain/core/language_models/chat_models'; export const PROVIDER_INFO = { key: 'groq', displayName: 'Groq', }; -import { BaseChatModel } from '@langchain/core/language_models/chat_models'; -const groqChatModels: Record[] = [ - { - displayName: 'Gemma2 9B IT', - key: 'gemma2-9b-it', - }, - { - displayName: 'Llama 3.3 70B Versatile', - key: 'llama-3.3-70b-versatile', - }, - { - displayName: 'Llama 3.1 8B Instant', - key: 'llama-3.1-8b-instant', - }, - { - displayName: 'Llama3 70B 8192', - key: 'llama3-70b-8192', - }, - { - displayName: 'Llama3 8B 8192', - key: 'llama3-8b-8192', - }, - { - displayName: 'Mixtral 8x7B 32768', - key: 'mixtral-8x7b-32768', - }, - { - displayName: 'Qwen QWQ 32B (Preview)', - key: 'qwen-qwq-32b', - }, - { - displayName: 'Mistral Saba 24B (Preview)', - key: 'mistral-saba-24b', - }, - { - displayName: 'Qwen 2.5 Coder 32B (Preview)', - key: 'qwen-2.5-coder-32b', - }, - { - displayName: 'Qwen 2.5 32B (Preview)', - key: 'qwen-2.5-32b', - }, - { - displayName: 'DeepSeek R1 Distill Qwen 32B (Preview)', - key: 'deepseek-r1-distill-qwen-32b', - }, - { - displayName: 'DeepSeek R1 Distill Llama 70B (Preview)', - key: 'deepseek-r1-distill-llama-70b', - }, - { - displayName: 'Llama 3.3 70B SpecDec (Preview)', - key: 'llama-3.3-70b-specdec', - }, - { - displayName: 'Llama 3.2 1B Preview (Preview)', - key: 'llama-3.2-1b-preview', - }, - { - displayName: 'Llama 3.2 3B Preview (Preview)', - key: 'llama-3.2-3b-preview', - }, - { - displayName: 'Llama 3.2 11B Vision Preview (Preview)', - key: 'llama-3.2-11b-vision-preview', - }, - { - displayName: 'Llama 3.2 90B Vision Preview (Preview)', - key: 'llama-3.2-90b-vision-preview', - }, - /* { - displayName: 'Llama 4 Maverick 17B 128E Instruct (Preview)', - key: 'meta-llama/llama-4-maverick-17b-128e-instruct', - }, */ - { - displayName: 'Llama 4 Scout 17B 16E Instruct (Preview)', - key: 'meta-llama/llama-4-scout-17b-16e-instruct', - }, -]; +interface GroqModel { + id: string; + object: string; + created: number; + owned_by: string; + active: boolean; + context_window: number; + max_completion_tokens: number; +} + +interface GroqModelsResponse { + object: string; + data: GroqModel[]; +} + +const generateDisplayName = (modelId: string, ownedBy: string): string => { + // Handle special cases for better display names + const modelMap: Record = { + 'gemma2-9b-it': 'Gemma2 9B IT', + 'llama-3.3-70b-versatile': 'Llama 3.3 70B Versatile', + 'llama-3.1-8b-instant': 'Llama 3.1 8B Instant', + 'llama3-70b-8192': 'Llama3 70B 8192', + 'llama3-8b-8192': 'Llama3 8B 8192', + 'mixtral-8x7b-32768': 'Mixtral 8x7B 32768', + 'qwen-qwq-32b': 'Qwen QWQ 32B', + 'mistral-saba-24b': 'Mistral Saba 24B', + 'deepseek-r1-distill-llama-70b': 'DeepSeek R1 Distill Llama 70B', + 'deepseek-r1-distill-qwen-32b': 'DeepSeek R1 Distill Qwen 32B', + }; + + // Return mapped name if available + if (modelMap[modelId]) { + return modelMap[modelId]; + } + + // Generate display name from model ID + let displayName = modelId + .replace(/[-_]/g, ' ') + .split(' ') + .map(word => word.charAt(0).toUpperCase() + word.slice(1)) + .join(' '); + + // Add owner info for certain models + if (modelId.includes('meta-llama/')) { + displayName = displayName.replace('Meta Llama/', ''); + } + + return displayName; +}; + +const fetchGroqModels = async (apiKey: string): Promise => { + try { + const response = await fetch('https://api.groq.com/openai/v1/models', { + headers: { + 'Authorization': `Bearer ${apiKey}`, + 'Content-Type': 'application/json', + }, + }); + + if (!response.ok) { + throw new Error(`Failed to fetch models: ${response.status} ${response.statusText}`); + } + + const data: GroqModelsResponse = await response.json(); + + // Filter for active chat completion models (exclude audio/whisper models) + return data.data.filter(model => + model.active && + !model.id.includes('whisper') && + !model.id.includes('tts') && + !model.id.includes('guard') && + !model.id.includes('prompt-guard') + ); + } catch (error) { + console.error('Error fetching Groq models:', error); + return []; + } +}; export const loadGroqChatModels = async () => { const groqApiKey = getGroqApiKey(); @@ -95,12 +95,15 @@ export const loadGroqChatModels = async () => { try { const chatModels: Record = {}; - groqChatModels.forEach((model) => { - chatModels[model.key] = { - displayName: model.displayName, + // Fetch available models from Groq API + const availableModels = await fetchGroqModels(groqApiKey); + + availableModels.forEach((model) => { + chatModels[model.id] = { + displayName: generateDisplayName(model.id, model.owned_by), model: new ChatOpenAI({ openAIApiKey: groqApiKey, - modelName: model.key, + modelName: model.id, // temperature: 0.7, configuration: { baseURL: 'https://api.groq.com/openai/v1', diff --git a/src/lib/utils/analyzePreviewContent.ts b/src/lib/utils/analyzePreviewContent.ts index be731e7..f1aa11a 100644 --- a/src/lib/utils/analyzePreviewContent.ts +++ b/src/lib/utils/analyzePreviewContent.ts @@ -83,9 +83,8 @@ Snippet: ${content.snippet} # Instructions - Analyze the provided search result previews (titles + snippets), and chat history context to determine if they collectively contain enough information to provide a complete and accurate answer to the Task Query -- You must make a binary decision: either the preview content is sufficient OR it is not sufficient -- If the preview content can provide a complete answer to the Task Query, set isSufficient to true -- If the preview content lacks important details, requires deeper analysis, or cannot fully answer the Task Query, set isSufficient to false and provide a specific reason +- If the preview content can provide a complete answer to the Task Query, consider it sufficient +- If the preview content lacks important details, requires deeper analysis, or cannot fully answer the Task Query, consider it insufficient - Be specific in your reasoning when the content is not sufficient - The original query is provided for additional context, only use it for clarification of overall expectations and intent. You do **not** need to answer the original query directly or completely @@ -103,11 +102,6 @@ ${taskQuery} # Search Result Previews to Analyze: ${formattedPreviewContent} - -# Response Format -You must return a JSON object with: -- isSufficient: boolean indicating whether preview content is sufficient -- reason: string explaining why full content analysis is required (only if isSufficient is false) `, { signal }, ); diff --git a/src/lib/utils/summarizeWebContent.ts b/src/lib/utils/summarizeWebContent.ts index fc6ca4c..ee49926 100644 --- a/src/lib/utils/summarizeWebContent.ts +++ b/src/lib/utils/summarizeWebContent.ts @@ -1,5 +1,6 @@ import { Document } from '@langchain/core/documents'; import { BaseChatModel } from '@langchain/core/language_models/chat_models'; +import { z } from 'zod'; import { formatDateForLLM } from '../utils'; import { getWebContent } from './documents'; import { removeThinkingBlocks } from './contentUtils'; @@ -10,6 +11,16 @@ export type SummarizeResult = { notRelevantReason?: string; }; +// Zod schema for structured relevance check output +const RelevanceCheckSchema = z.object({ + relevant: z + .boolean() + .describe('Whether the content is relevant to the user query'), + reason: z + .string() + .describe('Brief explanation of why content is or isn\'t relevant'), +}); + export const summarizeWebContent = async ( url: string, query: string, @@ -37,55 +48,49 @@ export const summarizeWebContent = async ( `Short content detected (${contentToAnalyze.length} chars) for URL: ${url}, checking relevance only`, ); - const relevancePrompt = `${systemPrompt}You are a content relevance checker. Your task is to determine if the given content is relevant to the user's query. + try { + // Create structured LLM with Zod schema + const structuredLLM = llm.withStructuredOutput(RelevanceCheckSchema); + + const relevanceResult = await structuredLLM.invoke( + `${systemPrompt}You are a content relevance checker. Your task is to determine if the given content is relevant to the user's query. # Instructions - Analyze the content to determine if it contains information relevant to the user's query - You do not need to provide a full answer to the query in order to be relevant, partial answers are acceptable -- Respond with valid JSON in the following format: -{ - "relevant": true/false, - "reason": "brief explanation of why content is or isn't relevant" -} +- Provide a brief explanation of your reasoning Today's date is ${formatDateForLLM(new Date())} Here is the query you need to answer: ${query} Here is the content to analyze: -${contentToAnalyze}`; +${contentToAnalyze}`, + { signal } + ); - try { - const result = await llm.invoke(relevancePrompt, { signal }); - const responseText = removeThinkingBlocks(result.content as string).trim(); - - try { - const parsedResponse = JSON.parse(responseText); - - if (parsedResponse.relevant === true) { - console.log(`Short content for URL "${url}" is relevant: ${parsedResponse.reason}`); - return { - document: new Document({ - pageContent: content.pageContent, - metadata: { - ...content.metadata, - url: url, - processingType: 'short-content', - }, - }), - notRelevantReason: undefined, - }; - } else { - console.log(`Short content for URL "${url}" is not relevant: ${parsedResponse.reason}`); - return { - document: null, - notRelevantReason: parsedResponse.reason || 'Content not relevant to query', - }; - } - } catch (parseError) { - console.error(`Error parsing JSON response for URL ${url}:`, parseError); - console.error(`Raw response:`, responseText); + if (!relevanceResult) { + console.error(`No relevance result returned for URL ${url}`); // Fall through to full summarization as fallback + } else if (relevanceResult.relevant) { + console.log(`Short content for URL "${url}" is relevant: ${relevanceResult.reason}`); + return { + document: new Document({ + pageContent: content.pageContent, + metadata: { + ...content.metadata, + url: url, + processingType: 'short-content', + }, + }), + notRelevantReason: undefined, + }; + } else { + console.log(`Short content for URL "${url}" is not relevant: ${relevanceResult.reason}`); + return { + document: null, + notRelevantReason: relevanceResult.reason || 'Content not relevant to query', + }; } } catch (error) { console.error(`Error checking relevance for short content from URL ${url}:`, error);