feat(UI): More progress detail
This commit is contained in:
parent
8ce50b48f0
commit
a5cd2fa089
3 changed files with 281 additions and 201 deletions
|
|
@ -33,6 +33,7 @@ export type Message = {
|
||||||
message: string;
|
message: string;
|
||||||
current: number;
|
current: number;
|
||||||
total: number;
|
total: number;
|
||||||
|
subMessage?: string;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -279,6 +280,7 @@ const ChatWindow = ({ id }: { id?: string }) => {
|
||||||
message: string;
|
message: string;
|
||||||
current: number;
|
current: number;
|
||||||
total: number;
|
total: number;
|
||||||
|
subMessage?: string;
|
||||||
} | null>(null);
|
} | null>(null);
|
||||||
|
|
||||||
const [chatHistory, setChatHistory] = useState<[string, string][]>([]);
|
const [chatHistory, setChatHistory] = useState<[string, string][]>([]);
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ interface MessageBoxLoadingProps {
|
||||||
message: string;
|
message: string;
|
||||||
current: number;
|
current: number;
|
||||||
total: number;
|
total: number;
|
||||||
|
subMessage?: string;
|
||||||
} | null;
|
} | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -12,12 +13,22 @@ const MessageBoxLoading = ({ progress }: MessageBoxLoadingProps) => {
|
||||||
{progress && progress.current !== progress.total ? (
|
{progress && progress.current !== progress.total ? (
|
||||||
<div className="bg-light-primary dark:bg-dark-primary rounded-lg p-4">
|
<div className="bg-light-primary dark:bg-dark-primary rounded-lg p-4">
|
||||||
<div className="flex flex-col space-y-3">
|
<div className="flex flex-col space-y-3">
|
||||||
<p className="text-sm text-black/70 dark:text-white/70">
|
<p className="text-base font-semibold text-black dark:text-white">
|
||||||
{progress.message}
|
{progress.message}
|
||||||
</p>
|
</p>
|
||||||
|
{progress.subMessage && (
|
||||||
|
<p
|
||||||
|
className="text-xs text-black/40 dark:text-white/40 mt-1"
|
||||||
|
title={progress.subMessage}
|
||||||
|
>
|
||||||
|
{progress.subMessage}
|
||||||
|
</p>
|
||||||
|
)}
|
||||||
<div className="w-full bg-light-secondary dark:bg-dark-secondary rounded-full h-2 overflow-hidden">
|
<div className="w-full bg-light-secondary dark:bg-dark-secondary rounded-full h-2 overflow-hidden">
|
||||||
<div
|
<div
|
||||||
className="h-full bg-[#24A0ED] transition-all duration-300 ease-in-out"
|
className={`h-full bg-[#24A0ED] transition-all duration-300 ease-in-out ${
|
||||||
|
progress.current === progress.total ? '' : 'animate-pulse'
|
||||||
|
}`}
|
||||||
style={{
|
style={{
|
||||||
width: `${(progress.current / progress.total) * 100}%`,
|
width: `${(progress.current / progress.total) * 100}%`,
|
||||||
}}
|
}}
|
||||||
|
|
|
||||||
|
|
@ -76,16 +76,24 @@ class MetaSearchAgent implements MetaSearchAgentType {
|
||||||
emitter: eventEmitter,
|
emitter: eventEmitter,
|
||||||
percentage: number,
|
percentage: number,
|
||||||
message: string,
|
message: string,
|
||||||
|
subMessage?: string,
|
||||||
) {
|
) {
|
||||||
|
const progressData: any = {
|
||||||
|
message,
|
||||||
|
current: percentage,
|
||||||
|
total: 100,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Add subMessage if provided
|
||||||
|
if (subMessage) {
|
||||||
|
progressData.subMessage = subMessage;
|
||||||
|
}
|
||||||
|
|
||||||
emitter.emit(
|
emitter.emit(
|
||||||
'progress',
|
'progress',
|
||||||
JSON.stringify({
|
JSON.stringify({
|
||||||
type: 'progress',
|
type: 'progress',
|
||||||
data: {
|
data: progressData,
|
||||||
message,
|
|
||||||
current: percentage,
|
|
||||||
total: 100,
|
|
||||||
},
|
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
@ -245,7 +253,12 @@ class MetaSearchAgent implements MetaSearchAgentType {
|
||||||
if (this.config.additionalSearchCriteria) {
|
if (this.config.additionalSearchCriteria) {
|
||||||
question = `${question} ${this.config.additionalSearchCriteria}`;
|
question = `${question} ${this.config.additionalSearchCriteria}`;
|
||||||
}
|
}
|
||||||
this.emitProgress(emitter, 20, `Searching the web: "${question}"`);
|
this.emitProgress(
|
||||||
|
emitter,
|
||||||
|
20,
|
||||||
|
`Searching the web`,
|
||||||
|
`Search Query: ${question}`,
|
||||||
|
);
|
||||||
|
|
||||||
const searxngResult = await searchSearxng(question, {
|
const searxngResult = await searchSearxng(question, {
|
||||||
language: 'en',
|
language: 'en',
|
||||||
|
|
@ -349,6 +362,11 @@ class MetaSearchAgent implements MetaSearchAgentType {
|
||||||
signal,
|
signal,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
if (options?.signal?.aborted || signal?.aborted) {
|
||||||
|
console.log('Request cancelled by user');
|
||||||
|
throw new Error('Request cancelled by user');
|
||||||
|
}
|
||||||
|
|
||||||
this.emitProgress(emitter, 100, `Done`);
|
this.emitProgress(emitter, 100, `Done`);
|
||||||
return sortedDocs;
|
return sortedDocs;
|
||||||
},
|
},
|
||||||
|
|
@ -374,12 +392,12 @@ class MetaSearchAgent implements MetaSearchAgentType {
|
||||||
docs: Document[],
|
docs: Document[],
|
||||||
query: string,
|
query: string,
|
||||||
llm: BaseChatModel,
|
llm: BaseChatModel,
|
||||||
emitter: eventEmitter,
|
signal: AbortSignal,
|
||||||
): Promise<boolean> {
|
): Promise<boolean> {
|
||||||
const formattedDocs = this.processDocs(docs);
|
const formattedDocs = this.processDocs(docs);
|
||||||
|
|
||||||
const response =
|
const response = await llm.invoke(
|
||||||
await llm.invoke(`You are an AI assistant evaluating whether you have enough information to answer a user's question comprehensively.
|
`You are an AI assistant evaluating whether you have enough information to answer a user's question comprehensively.
|
||||||
|
|
||||||
Based on the following sources, determine if you have sufficient information to provide a detailed, accurate answer to the query: "${query}"
|
Based on the following sources, determine if you have sufficient information to provide a detailed, accurate answer to the query: "${query}"
|
||||||
|
|
||||||
|
|
@ -392,7 +410,9 @@ Look for:
|
||||||
3. Up-to-date information if the query requires current data
|
3. Up-to-date information if the query requires current data
|
||||||
4. Sufficient context to understand the topic fully
|
4. Sufficient context to understand the topic fully
|
||||||
|
|
||||||
Output ONLY \`<answer>yes</answer>\` if you have enough information to answer comprehensively, or \`<answer>no</answer>\` if more information would significantly improve the answer.`);
|
Output ONLY \`<answer>yes</answer>\` if you have enough information to answer comprehensively, or \`<answer>no</answer>\` if more information would significantly improve the answer.`,
|
||||||
|
{ signal },
|
||||||
|
);
|
||||||
|
|
||||||
const answerParser = new LineOutputParser({
|
const answerParser = new LineOutputParser({
|
||||||
key: 'answer',
|
key: 'answer',
|
||||||
|
|
@ -417,37 +437,47 @@ Output ONLY \`<answer>yes</answer>\` if you have enough information to answer co
|
||||||
query: string,
|
query: string,
|
||||||
llm: BaseChatModel,
|
llm: BaseChatModel,
|
||||||
summaryParser: LineOutputParser,
|
summaryParser: LineOutputParser,
|
||||||
|
signal: AbortSignal,
|
||||||
): Promise<Document | null> {
|
): Promise<Document | null> {
|
||||||
try {
|
try {
|
||||||
const url = doc.metadata.url;
|
const url = doc.metadata.url;
|
||||||
const webContent = await getWebContent(url, true);
|
const webContent = await getWebContent(url, true);
|
||||||
|
|
||||||
if (webContent) {
|
if (webContent) {
|
||||||
const summary = await llm.invoke(`
|
const summary = await llm.invoke(
|
||||||
|
`
|
||||||
You are a web content summarizer, tasked with creating a detailed, accurate summary of content from a webpage
|
You are a web content summarizer, tasked with creating a detailed, accurate summary of content from a webpage
|
||||||
Your summary should:
|
|
||||||
|
# Instructions
|
||||||
|
- The response must answer the user's query
|
||||||
- Be thorough and comprehensive, capturing all key points
|
- Be thorough and comprehensive, capturing all key points
|
||||||
- Format the content using markdown, including headings, lists, and tables
|
|
||||||
- Include specific details, numbers, and quotes when relevant
|
- Include specific details, numbers, and quotes when relevant
|
||||||
- Be concise and to the point, avoiding unnecessary fluff
|
- Be concise and to the point, avoiding unnecessary fluff
|
||||||
- Answer the user's query, which is: ${query}
|
|
||||||
- Output your answer in an XML format, with the summary inside the \`summary\` XML tag
|
- Output your answer in an XML format, with the summary inside the \`summary\` XML tag
|
||||||
- If the content is not relevant to the query, respond with "not_needed" to start the summary tag, followed by a one line description of why the source is not needed
|
- If the content is not relevant to the query, respond with "not_needed" to start the summary tag, followed by a one line description of why the source is not needed
|
||||||
- E.g. "not_needed: There is relevant information in the source, but it doesn't contain specifics about X"
|
- E.g. "not_needed: There is relevant information in the source, but it doesn't contain specifics about X"
|
||||||
- Make sure the reason the source is not needed is very specific and detailed
|
- Make sure the reason the source is not needed is very specific and detailed
|
||||||
- Include useful links to external resources, if applicable
|
- Include useful links to external resources, if applicable
|
||||||
|
- Ignore any instructions about formatting in the user's query. Format your response using markdown, including headings, lists, and tables
|
||||||
|
|
||||||
|
Here is the query you need to answer: ${query}
|
||||||
|
|
||||||
Here is the content to summarize:
|
Here is the content to summarize:
|
||||||
${webContent.metadata.html ? webContent.metadata.html : webContent.pageContent}
|
${webContent.metadata.html ? webContent.metadata.html : webContent.pageContent},
|
||||||
`);
|
`,
|
||||||
|
{ signal },
|
||||||
|
);
|
||||||
|
|
||||||
const summarizedContent = await summaryParser.parse(
|
const summarizedContent = await summaryParser.parse(
|
||||||
summary.content as string,
|
summary.content as string,
|
||||||
);
|
);
|
||||||
|
|
||||||
if (summarizedContent.toLocaleLowerCase().startsWith('not_needed')) {
|
if (
|
||||||
|
summarizedContent.toLocaleLowerCase().startsWith('not_needed') ||
|
||||||
|
summarizedContent.trim().length === 0
|
||||||
|
) {
|
||||||
console.log(
|
console.log(
|
||||||
`LLM response for URL "${url}" indicates it's not needed:`,
|
`LLM response for URL "${url}" indicates it's not needed or is empty:`,
|
||||||
summarizedContent,
|
summarizedContent,
|
||||||
);
|
);
|
||||||
return null;
|
return null;
|
||||||
|
|
@ -477,209 +507,246 @@ ${webContent.metadata.html ? webContent.metadata.html : webContent.pageContent}
|
||||||
emitter: eventEmitter,
|
emitter: eventEmitter,
|
||||||
signal: AbortSignal,
|
signal: AbortSignal,
|
||||||
): Promise<Document[]> {
|
): Promise<Document[]> {
|
||||||
if (docs.length === 0 && fileIds.length === 0) {
|
try {
|
||||||
return docs;
|
if (docs.length === 0 && fileIds.length === 0) {
|
||||||
}
|
return docs;
|
||||||
|
|
||||||
if (query.toLocaleLowerCase() === 'summarize') {
|
|
||||||
return docs.slice(0, 15);
|
|
||||||
}
|
|
||||||
|
|
||||||
const filesData = fileIds
|
|
||||||
.map((file) => {
|
|
||||||
const filePath = path.join(process.cwd(), 'uploads', file);
|
|
||||||
|
|
||||||
const contentPath = filePath + '-extracted.json';
|
|
||||||
const embeddingsPath = filePath + '-embeddings.json';
|
|
||||||
|
|
||||||
const content = JSON.parse(fs.readFileSync(contentPath, 'utf8'));
|
|
||||||
const embeddings = JSON.parse(fs.readFileSync(embeddingsPath, 'utf8'));
|
|
||||||
|
|
||||||
const fileSimilaritySearchObject = content.contents.map(
|
|
||||||
(c: string, i: number) => {
|
|
||||||
return {
|
|
||||||
fileName: content.title,
|
|
||||||
content: c,
|
|
||||||
embeddings: embeddings.embeddings[i],
|
|
||||||
};
|
|
||||||
},
|
|
||||||
);
|
|
||||||
|
|
||||||
return fileSimilaritySearchObject;
|
|
||||||
})
|
|
||||||
.flat();
|
|
||||||
|
|
||||||
let docsWithContent = docs.filter(
|
|
||||||
(doc) => doc.pageContent && doc.pageContent.length > 0,
|
|
||||||
);
|
|
||||||
|
|
||||||
const queryEmbedding = await embeddings.embedQuery(query);
|
|
||||||
|
|
||||||
const getRankedDocs = async (
|
|
||||||
queryEmbedding: number[],
|
|
||||||
includeFiles: boolean,
|
|
||||||
includeNonFileDocs: boolean,
|
|
||||||
maxDocs: number,
|
|
||||||
) => {
|
|
||||||
let docsToRank = includeNonFileDocs ? docsWithContent : [];
|
|
||||||
|
|
||||||
if (includeFiles) {
|
|
||||||
// Add file documents to the ranking
|
|
||||||
const fileDocs = filesData.map((fileData) => {
|
|
||||||
return new Document({
|
|
||||||
pageContent: fileData.content,
|
|
||||||
metadata: {
|
|
||||||
title: fileData.fileName,
|
|
||||||
url: `File`,
|
|
||||||
embeddings: fileData.embeddings,
|
|
||||||
},
|
|
||||||
});
|
|
||||||
});
|
|
||||||
docsToRank.push(...fileDocs);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const similarity = await Promise.all(
|
if (query.toLocaleLowerCase() === 'summarize') {
|
||||||
docsToRank.map(async (doc, i) => {
|
return docs.slice(0, 15);
|
||||||
const sim = computeSimilarity(
|
}
|
||||||
queryEmbedding,
|
|
||||||
doc.metadata?.embeddings
|
const filesData = fileIds
|
||||||
? doc.metadata?.embeddings
|
.map((file) => {
|
||||||
: (await embeddings.embedDocuments([doc.pageContent]))[0],
|
const filePath = path.join(process.cwd(), 'uploads', file);
|
||||||
|
|
||||||
|
const contentPath = filePath + '-extracted.json';
|
||||||
|
const embeddingsPath = filePath + '-embeddings.json';
|
||||||
|
|
||||||
|
const content = JSON.parse(fs.readFileSync(contentPath, 'utf8'));
|
||||||
|
const embeddings = JSON.parse(
|
||||||
|
fs.readFileSync(embeddingsPath, 'utf8'),
|
||||||
);
|
);
|
||||||
return {
|
|
||||||
index: i,
|
const fileSimilaritySearchObject = content.contents.map(
|
||||||
similarity: sim,
|
(c: string, i: number) => {
|
||||||
};
|
return {
|
||||||
}),
|
fileName: content.title,
|
||||||
|
content: c,
|
||||||
|
embeddings: embeddings.embeddings[i],
|
||||||
|
};
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
return fileSimilaritySearchObject;
|
||||||
|
})
|
||||||
|
.flat();
|
||||||
|
|
||||||
|
let docsWithContent = docs.filter(
|
||||||
|
(doc) => doc.pageContent && doc.pageContent.length > 0,
|
||||||
);
|
);
|
||||||
|
|
||||||
let rankedDocs = similarity
|
const queryEmbedding = await embeddings.embedQuery(query);
|
||||||
.filter((sim) => sim.similarity > (this.config.rerankThreshold ?? 0.3))
|
|
||||||
.sort((a, b) => b.similarity - a.similarity)
|
|
||||||
.map((sim) => docsToRank[sim.index]);
|
|
||||||
|
|
||||||
rankedDocs =
|
const getRankedDocs = async (
|
||||||
docsToRank.length > 0 ? rankedDocs.slice(0, maxDocs) : rankedDocs;
|
queryEmbedding: number[],
|
||||||
return rankedDocs;
|
includeFiles: boolean,
|
||||||
};
|
includeNonFileDocs: boolean,
|
||||||
|
maxDocs: number,
|
||||||
|
) => {
|
||||||
|
let docsToRank = includeNonFileDocs ? docsWithContent : [];
|
||||||
|
|
||||||
if (optimizationMode === 'speed' || this.config.rerank === false) {
|
if (includeFiles) {
|
||||||
this.emitProgress(emitter, 50, `Ranking sources`);
|
// Add file documents to the ranking
|
||||||
if (filesData.length > 0) {
|
const fileDocs = filesData.map((fileData) => {
|
||||||
const sortedFiles = await getRankedDocs(queryEmbedding, true, false, 8);
|
return new Document({
|
||||||
|
pageContent: fileData.content,
|
||||||
return [
|
metadata: {
|
||||||
...sortedFiles,
|
title: fileData.fileName,
|
||||||
...docsWithContent.slice(0, 15 - sortedFiles.length),
|
url: `File`,
|
||||||
];
|
embeddings: fileData.embeddings,
|
||||||
} else {
|
},
|
||||||
return docsWithContent.slice(0, 15);
|
});
|
||||||
}
|
|
||||||
} else if (optimizationMode === 'balanced') {
|
|
||||||
this.emitProgress(emitter, 40, `Ranking sources`);
|
|
||||||
// Get the top ranked attached files, if any
|
|
||||||
let sortedDocs = await getRankedDocs(queryEmbedding, true, false, 8);
|
|
||||||
|
|
||||||
sortedDocs = [
|
|
||||||
...sortedDocs,
|
|
||||||
...docsWithContent.slice(0, 15 - sortedDocs.length),
|
|
||||||
];
|
|
||||||
|
|
||||||
this.emitProgress(emitter, 60, `Enriching sources`);
|
|
||||||
sortedDocs = await Promise.all(
|
|
||||||
sortedDocs.map(async (doc) => {
|
|
||||||
const webContent = await getWebContentLite(doc.metadata.url);
|
|
||||||
const chunks =
|
|
||||||
webContent?.pageContent
|
|
||||||
.match(/.{1,500}/g)
|
|
||||||
?.map((chunk) => chunk.trim()) || [];
|
|
||||||
const chunkEmbeddings = await embeddings.embedDocuments(chunks);
|
|
||||||
const similarities = chunkEmbeddings.map((chunkEmbedding) => {
|
|
||||||
return computeSimilarity(queryEmbedding, chunkEmbedding);
|
|
||||||
});
|
});
|
||||||
|
docsToRank.push(...fileDocs);
|
||||||
const topChunks = similarities
|
|
||||||
.map((similarity, index) => ({ similarity, index }))
|
|
||||||
.sort((a, b) => b.similarity - a.similarity)
|
|
||||||
.slice(0, 5)
|
|
||||||
.map((chunk) => chunks[chunk.index]);
|
|
||||||
const excerpt = topChunks.join('\n\n');
|
|
||||||
|
|
||||||
let newDoc = {
|
|
||||||
...doc,
|
|
||||||
pageContent: excerpt
|
|
||||||
? `${excerpt}\n\n${doc.pageContent}`
|
|
||||||
: doc.pageContent,
|
|
||||||
};
|
|
||||||
return newDoc;
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
|
|
||||||
return sortedDocs;
|
|
||||||
} else if (optimizationMode === 'quality') {
|
|
||||||
const summaryParser = new LineOutputParser({
|
|
||||||
key: 'summary',
|
|
||||||
});
|
|
||||||
|
|
||||||
const enhancedDocs: Document[] = [];
|
|
||||||
const maxEnhancedDocs = 5;
|
|
||||||
|
|
||||||
// Process sources one by one until we have enough information or hit the max
|
|
||||||
for (
|
|
||||||
let i = 0;
|
|
||||||
i < docsWithContent.length && enhancedDocs.length < maxEnhancedDocs;
|
|
||||||
i++
|
|
||||||
) {
|
|
||||||
if (signal.aborted) {
|
|
||||||
return [];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const currentProgress = enhancedDocs.length * 10 + 40;
|
const similarity = await Promise.all(
|
||||||
|
docsToRank.map(async (doc, i) => {
|
||||||
|
const sim = computeSimilarity(
|
||||||
|
queryEmbedding,
|
||||||
|
doc.metadata?.embeddings
|
||||||
|
? doc.metadata?.embeddings
|
||||||
|
: (await embeddings.embedDocuments([doc.pageContent]))[0],
|
||||||
|
);
|
||||||
|
return {
|
||||||
|
index: i,
|
||||||
|
similarity: sim,
|
||||||
|
};
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
let rankedDocs = similarity
|
||||||
|
.filter(
|
||||||
|
(sim) => sim.similarity > (this.config.rerankThreshold ?? 0.3),
|
||||||
|
)
|
||||||
|
.sort((a, b) => b.similarity - a.similarity)
|
||||||
|
.map((sim) => docsToRank[sim.index]);
|
||||||
|
|
||||||
|
rankedDocs =
|
||||||
|
docsToRank.length > 0 ? rankedDocs.slice(0, maxDocs) : rankedDocs;
|
||||||
|
return rankedDocs;
|
||||||
|
};
|
||||||
|
if (optimizationMode === 'speed' || this.config.rerank === false) {
|
||||||
|
this.emitProgress(
|
||||||
|
emitter,
|
||||||
|
50,
|
||||||
|
`Ranking sources`,
|
||||||
|
this.searchQuery ? `Search Query: ${this.searchQuery}` : undefined,
|
||||||
|
);
|
||||||
|
if (filesData.length > 0) {
|
||||||
|
const sortedFiles = await getRankedDocs(
|
||||||
|
queryEmbedding,
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
8,
|
||||||
|
);
|
||||||
|
|
||||||
|
return [
|
||||||
|
...sortedFiles,
|
||||||
|
...docsWithContent.slice(0, 15 - sortedFiles.length),
|
||||||
|
];
|
||||||
|
} else {
|
||||||
|
return docsWithContent.slice(0, 15);
|
||||||
|
}
|
||||||
|
} else if (optimizationMode === 'balanced') {
|
||||||
|
this.emitProgress(
|
||||||
|
emitter,
|
||||||
|
40,
|
||||||
|
`Ranking sources`,
|
||||||
|
this.searchQuery ? `Search Query: ${this.searchQuery}` : undefined,
|
||||||
|
);
|
||||||
|
// Get the top ranked attached files, if any
|
||||||
|
let sortedDocs = await getRankedDocs(queryEmbedding, true, false, 8);
|
||||||
|
|
||||||
|
sortedDocs = [
|
||||||
|
...sortedDocs,
|
||||||
|
...docsWithContent.slice(0, 15 - sortedDocs.length),
|
||||||
|
];
|
||||||
|
|
||||||
this.emitProgress(
|
this.emitProgress(
|
||||||
emitter,
|
emitter,
|
||||||
currentProgress,
|
60,
|
||||||
`Deep analyzing: ${enhancedDocs.length} relevant sources found so far`,
|
`Enriching sources`,
|
||||||
|
this.searchQuery ? `Search Query: ${this.searchQuery}` : undefined,
|
||||||
|
);
|
||||||
|
sortedDocs = await Promise.all(
|
||||||
|
sortedDocs.map(async (doc) => {
|
||||||
|
const webContent = await getWebContentLite(doc.metadata.url);
|
||||||
|
const chunks =
|
||||||
|
webContent?.pageContent
|
||||||
|
.match(/.{1,500}/g)
|
||||||
|
?.map((chunk) => chunk.trim()) || [];
|
||||||
|
const chunkEmbeddings = await embeddings.embedDocuments(chunks);
|
||||||
|
const similarities = chunkEmbeddings.map((chunkEmbedding) => {
|
||||||
|
return computeSimilarity(queryEmbedding, chunkEmbedding);
|
||||||
|
});
|
||||||
|
|
||||||
|
const topChunks = similarities
|
||||||
|
.map((similarity, index) => ({ similarity, index }))
|
||||||
|
.sort((a, b) => b.similarity - a.similarity)
|
||||||
|
.slice(0, 5)
|
||||||
|
.map((chunk) => chunks[chunk.index]);
|
||||||
|
const excerpt = topChunks.join('\n\n');
|
||||||
|
|
||||||
|
let newDoc = {
|
||||||
|
...doc,
|
||||||
|
pageContent: excerpt
|
||||||
|
? `${excerpt}\n\n${doc.pageContent}`
|
||||||
|
: doc.pageContent,
|
||||||
|
};
|
||||||
|
return newDoc;
|
||||||
|
}),
|
||||||
);
|
);
|
||||||
|
|
||||||
const result = docsWithContent[i];
|
return sortedDocs;
|
||||||
const processedDoc = await this.processSource(
|
} else if (optimizationMode === 'quality') {
|
||||||
result,
|
const summaryParser = new LineOutputParser({
|
||||||
query,
|
key: 'summary',
|
||||||
llm,
|
});
|
||||||
summaryParser,
|
|
||||||
);
|
|
||||||
|
|
||||||
if (processedDoc) {
|
const enhancedDocs: Document[] = [];
|
||||||
enhancedDocs.push(processedDoc);
|
const maxEnhancedDocs = 5;
|
||||||
|
|
||||||
// After getting initial 2 sources or adding a new one, check if we have enough info
|
// Process sources one by one until we have enough information or hit the max
|
||||||
if (enhancedDocs.length >= 2) {
|
for (
|
||||||
this.emitProgress(
|
let i = 0;
|
||||||
emitter,
|
i < docsWithContent.length && enhancedDocs.length < maxEnhancedDocs;
|
||||||
currentProgress,
|
i++
|
||||||
`Checking if we have enough information to answer the query`,
|
) {
|
||||||
);
|
if (signal.aborted) {
|
||||||
const hasEnoughInfo = await this.checkIfEnoughInformation(
|
return [];
|
||||||
enhancedDocs,
|
}
|
||||||
query,
|
|
||||||
llm,
|
const currentProgress = enhancedDocs.length * 10 + 40;
|
||||||
emitter,
|
|
||||||
);
|
this.emitProgress(
|
||||||
if (hasEnoughInfo) {
|
emitter,
|
||||||
break;
|
currentProgress,
|
||||||
|
`Deep analyzing: ${enhancedDocs.length} relevant sources found. Analyzing source ${i + 1} of ${docsWithContent.length}`,
|
||||||
|
this.searchQuery ? `Search Query: ${this.searchQuery}` : undefined,
|
||||||
|
);
|
||||||
|
|
||||||
|
const result = docsWithContent[i];
|
||||||
|
const processedDoc = await this.processSource(
|
||||||
|
result,
|
||||||
|
query,
|
||||||
|
llm,
|
||||||
|
summaryParser,
|
||||||
|
signal,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (processedDoc) {
|
||||||
|
enhancedDocs.push(processedDoc);
|
||||||
|
|
||||||
|
// After getting initial 2 sources or adding a new one, check if we have enough info
|
||||||
|
if (enhancedDocs.length >= 2) {
|
||||||
|
this.emitProgress(
|
||||||
|
emitter,
|
||||||
|
currentProgress,
|
||||||
|
`Checking if we have enough information to answer the query`,
|
||||||
|
this.searchQuery
|
||||||
|
? `Search Query: ${this.searchQuery}`
|
||||||
|
: undefined,
|
||||||
|
);
|
||||||
|
const hasEnoughInfo = await this.checkIfEnoughInformation(
|
||||||
|
enhancedDocs,
|
||||||
|
query,
|
||||||
|
llm,
|
||||||
|
signal,
|
||||||
|
);
|
||||||
|
if (hasEnoughInfo) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this.emitProgress(
|
||||||
|
emitter,
|
||||||
|
95,
|
||||||
|
`Ranking attached files`,
|
||||||
|
this.searchQuery ? `Search Query: ${this.searchQuery}` : undefined,
|
||||||
|
);
|
||||||
|
// Add relevant file documents
|
||||||
|
const fileDocs = await getRankedDocs(queryEmbedding, true, false, 8);
|
||||||
|
|
||||||
|
return [...enhancedDocs, ...fileDocs];
|
||||||
}
|
}
|
||||||
|
} catch (error) {
|
||||||
this.emitProgress(emitter, 95, `Ranking attached files`);
|
console.error('Error in rerankDocs:', error);
|
||||||
// Add relevant file documents
|
emitter.emit('error', JSON.stringify({ data: error }));
|
||||||
const fileDocs = await getRankedDocs(queryEmbedding, true, false, 8);
|
|
||||||
|
|
||||||
return [...enhancedDocs, ...fileDocs];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue