Perplexica/src/lib/utils/analyzePreviewContent.ts

149 lines
4.9 KiB
TypeScript

import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { BaseMessage } from '@langchain/core/messages';
import { z } from 'zod';
import { formatDateForLLM } from '../utils';
import { ChatOpenAI, OpenAIClient } from '@langchain/openai';
import { removeThinkingBlocks } from './contentUtils';
export type PreviewAnalysisResult = {
isSufficient: boolean;
reason?: string;
};
export type PreviewContent = {
title: string;
snippet: string;
url: string;
};
// Zod schema for structured preview analysis output
const PreviewAnalysisSchema = z.object({
isSufficient: z
.boolean()
.describe(
'Whether the preview content is sufficient to answer the task query',
),
reason: z
.string()
.nullable()
.describe(
'Specific reason why full content analysis is required (only if isSufficient is false)',
),
});
export const analyzePreviewContent = async (
previewContents: PreviewContent[],
query: string,
taskQuery: string,
chatHistory: BaseMessage[],
llm: BaseChatModel,
systemInstructions: string,
signal: AbortSignal,
): Promise<PreviewAnalysisResult> => {
try {
console.log(`Analyzing preview content for query: "${query}"`);
console.log(
`Preview content being analyzed:`,
previewContents.map((content) => ({
title: content.title,
snippet: content.snippet.substring(0, 100) + '...',
url: content.url,
})),
);
// Format preview content for analysis
const formattedPreviewContent = previewContents
.map(
(content, index) =>
`Source ${index + 1}:
Title: ${content.title}
Snippet: ${content.snippet}
---`,
)
.join('\n\n');
// Format chat history for context
const formattedChatHistory = chatHistory
.slice(-10) // Only include last 10 messages for context
.map(
(message) =>
`${message.getType()}: ${removeThinkingBlocks(message.content.toString())}`,
)
.join('\n');
const systemPrompt = systemInstructions ? `${systemInstructions}\n\n` : '';
console.log(`Invoking LLM for preview content analysis`);
// Create structured LLM with Zod schema
const structuredLLM = llm.withStructuredOutput(PreviewAnalysisSchema);
const analysisResult = await structuredLLM.invoke(
`${systemPrompt}You are a preview content analyzer, tasked with determining if search result snippets contain sufficient information to answer the Task Query.
# Instructions
- Analyze the provided search result previews (titles + snippets), and chat history context to determine if they collectively contain enough information to provide a complete and accurate answer to the Task Query
- You must make a binary decision: either the preview content is sufficient OR it is not sufficient
- If the preview content can provide a complete answer to the Task Query, set isSufficient to true
- If the preview content lacks important details, requires deeper analysis, or cannot fully answer the Task Query, set isSufficient to false and provide a specific reason
- Be specific in your reasoning when the content is not sufficient
- The original query is provided for additional context, only use it for clarification of overall expectations and intent. You do **not** need to answer the original query directly or completely
# Information Context:
Today's date is ${formatDateForLLM(new Date())}
# Chat History Context:
${formattedChatHistory ? formattedChatHistory : 'No previous conversation context'}
# User Query:
${query}
# Task Query (what to answer):
${taskQuery}
# Search Result Previews to Analyze:
${formattedPreviewContent}
# Response Format
You must return a JSON object with:
- isSufficient: boolean indicating whether preview content is sufficient
- reason: string explaining why full content analysis is required (only if isSufficient is false)
`,
{ signal },
);
if (!analysisResult) {
console.error('No analysis result returned from LLM');
return {
isSufficient: false,
reason:
'No analysis response returned from LLM - falling back to full content processing',
};
}
console.log(`LLM analysis result:`, analysisResult);
if (analysisResult.isSufficient) {
console.log(
'Preview content determined to be sufficient for answering the query',
);
return { isSufficient: true };
} else {
console.log(
`Preview content determined to be insufficient. Reason: ${analysisResult.reason}`,
);
return {
isSufficient: false,
reason:
analysisResult.reason ||
'Preview content insufficient for complete answer',
};
}
} catch (error) {
console.error('Error analyzing preview content:', error);
return {
isSufficient: false,
reason: `Error during preview analysis: ${error instanceof Error ? error.message : 'Unknown error'} - falling back to full content processing`,
};
}
};