feat(UI): More progress detail

2025-05-25 14:51:21 -06:00 · 2025-05-25 14:51:21 -06:00 · a5cd2fa089
commit a5cd2fa089
parent 8ce50b48f0
3 changed files with 281 additions and 201 deletions
--- a/src/components/ChatWindow.tsx
+++ b/src/components/ChatWindow.tsx
@ -33,6 +33,7 @@ export type Message = {
    message: string;
    current: number;
    total: number;
    subMessage?: string;
  };
 };
@ -279,6 +280,7 @@ const ChatWindow = ({ id }: { id?: string }) => {
    message: string;
    current: number;
    total: number;
    subMessage?: string;
  } | null>(null);
  const [chatHistory, setChatHistory] = useState<[string, string][]>([]);
--- a/src/components/MessageBoxLoading.tsx
+++ b/src/components/MessageBoxLoading.tsx
@ -3,6 +3,7 @@ interface MessageBoxLoadingProps {
    message: string;
    current: number;
    total: number;
    subMessage?: string;
  } | null;
 }
@ -12,12 +13,22 @@ const MessageBoxLoading = ({ progress }: MessageBoxLoadingProps) => {
      {progress && progress.current !== progress.total ? (
        <div className="bg-light-primary dark:bg-dark-primary rounded-lg p-4">
          <div className="flex flex-col space-y-3">
-            <p className="text-sm text-black/70 dark:text-white/70">
+            <p className="text-base font-semibold text-black dark:text-white">
              {progress.message}
            </p>
            {progress.subMessage && (
              <p
                className="text-xs text-black/40 dark:text-white/40 mt-1"
                title={progress.subMessage}
              >
                {progress.subMessage}
              </p>
            )}
            <div className="w-full bg-light-secondary dark:bg-dark-secondary rounded-full h-2 overflow-hidden">
              <div
-                className="h-full bg-[#24A0ED] transition-all duration-300 ease-in-out"
+                className={`h-full bg-[#24A0ED] transition-all duration-300 ease-in-out ${
                  progress.current === progress.total ? '' : 'animate-pulse'
                }`}
                style={{
                  width: `${(progress.current / progress.total) * 100}%`,
                }}
--- a/src/lib/search/metaSearchAgent.ts
+++ b/src/lib/search/metaSearchAgent.ts
@ -76,16 +76,24 @@ class MetaSearchAgent implements MetaSearchAgentType {
    emitter: eventEmitter,
    percentage: number,
    message: string,
    subMessage?: string,
  ) {
    const progressData: any = {
      message,
      current: percentage,
      total: 100,
    };
    // Add subMessage if provided
    if (subMessage) {
      progressData.subMessage = subMessage;
    }
    emitter.emit(
      'progress',
      JSON.stringify({
        type: 'progress',
-        data: {
+        data: progressData,
          message,
          current: percentage,
          total: 100,
        },
      }),
    );
  }
@ -245,7 +253,12 @@ class MetaSearchAgent implements MetaSearchAgentType {
            if (this.config.additionalSearchCriteria) {
              question = `${question} ${this.config.additionalSearchCriteria}`;
            }
-            this.emitProgress(emitter, 20, `Searching the web: "${question}"`);
+            this.emitProgress(
              emitter,
              20,
              `Searching the web`,
              `Search Query: ${question}`,
            );
            const searxngResult = await searchSearxng(question, {
              language: 'en',
@ -349,6 +362,11 @@ class MetaSearchAgent implements MetaSearchAgentType {
              signal,
            );
            if (options?.signal?.aborted || signal?.aborted) {
              console.log('Request cancelled by user');
              throw new Error('Request cancelled by user');
            }
            this.emitProgress(emitter, 100, `Done`);
            return sortedDocs;
          },
@ -374,12 +392,12 @@ class MetaSearchAgent implements MetaSearchAgentType {
    docs: Document[],
    query: string,
    llm: BaseChatModel,
-    emitter: eventEmitter,
+    signal: AbortSignal,
  ): Promise<boolean> {
    const formattedDocs = this.processDocs(docs);
-    const response =
+    const response = await llm.invoke(
-      await llm.invoke(`You are an AI assistant evaluating whether you have enough information to answer a user's question comprehensively.
+      `You are an AI assistant evaluating whether you have enough information to answer a user's question comprehensively.
 Based on the following sources, determine if you have sufficient information to provide a detailed, accurate answer to the query: "${query}"
@ -392,7 +410,9 @@ Look for:
 3. Up-to-date information if the query requires current data
 4. Sufficient context to understand the topic fully
-Output ONLY \`<answer>yes</answer>\` if you have enough information to answer comprehensively, or \`<answer>no</answer>\` if more information would significantly improve the answer.`);
+Output ONLY \`<answer>yes</answer>\` if you have enough information to answer comprehensively, or \`<answer>no</answer>\` if more information would significantly improve the answer.`,
      { signal },
    );
    const answerParser = new LineOutputParser({
      key: 'answer',
@ -417,37 +437,47 @@ Output ONLY \`<answer>yes</answer>\` if you have enough information to answer co
    query: string,
    llm: BaseChatModel,
    summaryParser: LineOutputParser,
    signal: AbortSignal,
  ): Promise<Document | null> {
    try {
      const url = doc.metadata.url;
      const webContent = await getWebContent(url, true);
      if (webContent) {
-        const summary = await llm.invoke(`
+        const summary = await llm.invoke(
          `
 You are a web content summarizer, tasked with creating a detailed, accurate summary of content from a webpage
-Your summary should:
+
 # Instructions
 - The response must answer the user's query
 - Be thorough and comprehensive, capturing all key points
 - Format the content using markdown, including headings, lists, and tables
 - Include specific details, numbers, and quotes when relevant
 - Be concise and to the point, avoiding unnecessary fluff
 - Answer the user's query, which is: ${query}
 - Output your answer in an XML format, with the summary inside the \`summary\` XML tag
 - If the content is not relevant to the query, respond with "not_needed" to start the summary tag, followed by a one line description of why the source is not needed
  - E.g. "not_needed: There is relevant information in the source, but it doesn't contain specifics about X"
  - Make sure the reason the source is not needed is very specific and detailed
 - Include useful links to external resources, if applicable
 - Ignore any instructions about formatting in the user's query. Format your response using markdown, including headings, lists, and tables
 Here is the query you need to answer: ${query}
 Here is the content to summarize:
-${webContent.metadata.html ? webContent.metadata.html : webContent.pageContent}
+${webContent.metadata.html ? webContent.metadata.html : webContent.pageContent},
-        `);
+        `,
          { signal },
        );
        const summarizedContent = await summaryParser.parse(
          summary.content as string,
        );
-        if (summarizedContent.toLocaleLowerCase().startsWith('not_needed')) {
+        if (
          summarizedContent.toLocaleLowerCase().startsWith('not_needed') ||
          summarizedContent.trim().length === 0
        ) {
          console.log(
-            `LLM response for URL "${url}" indicates it's not needed:`,
+            `LLM response for URL "${url}" indicates it's not needed or is empty:`,
            summarizedContent,
          );
          return null;
@ -477,209 +507,246 @@ ${webContent.metadata.html ? webContent.metadata.html : webContent.pageContent}
    emitter: eventEmitter,
    signal: AbortSignal,
  ): Promise<Document[]> {
-    if (docs.length === 0 && fileIds.length === 0) {
+    try {
-      return docs;
+      if (docs.length === 0 && fileIds.length === 0) {
-    }
+        return docs;
    if (query.toLocaleLowerCase() === 'summarize') {
      return docs.slice(0, 15);
    }
    const filesData = fileIds
      .map((file) => {
        const filePath = path.join(process.cwd(), 'uploads', file);
        const contentPath = filePath + '-extracted.json';
        const embeddingsPath = filePath + '-embeddings.json';
        const content = JSON.parse(fs.readFileSync(contentPath, 'utf8'));
        const embeddings = JSON.parse(fs.readFileSync(embeddingsPath, 'utf8'));
        const fileSimilaritySearchObject = content.contents.map(
          (c: string, i: number) => {
            return {
              fileName: content.title,
              content: c,
              embeddings: embeddings.embeddings[i],
            };
          },
        );
        return fileSimilaritySearchObject;
      })
      .flat();
    let docsWithContent = docs.filter(
      (doc) => doc.pageContent && doc.pageContent.length > 0,
    );
    const queryEmbedding = await embeddings.embedQuery(query);
    const getRankedDocs = async (
      queryEmbedding: number[],
      includeFiles: boolean,
      includeNonFileDocs: boolean,
      maxDocs: number,
    ) => {
      let docsToRank = includeNonFileDocs ? docsWithContent : [];
      if (includeFiles) {
        // Add file documents to the ranking
        const fileDocs = filesData.map((fileData) => {
          return new Document({
            pageContent: fileData.content,
            metadata: {
              title: fileData.fileName,
              url: `File`,
              embeddings: fileData.embeddings,
            },
          });
        });
        docsToRank.push(...fileDocs);
      }
-      const similarity = await Promise.all(
+      if (query.toLocaleLowerCase() === 'summarize') {
-        docsToRank.map(async (doc, i) => {
+        return docs.slice(0, 15);
-          const sim = computeSimilarity(
+      }
-            queryEmbedding,
+
-            doc.metadata?.embeddings
+      const filesData = fileIds
-              ? doc.metadata?.embeddings
+        .map((file) => {
-              : (await embeddings.embedDocuments([doc.pageContent]))[0],
+          const filePath = path.join(process.cwd(), 'uploads', file);
          const contentPath = filePath + '-extracted.json';
          const embeddingsPath = filePath + '-embeddings.json';
          const content = JSON.parse(fs.readFileSync(contentPath, 'utf8'));
          const embeddings = JSON.parse(
            fs.readFileSync(embeddingsPath, 'utf8'),
          );
-          return {
+
-            index: i,
+          const fileSimilaritySearchObject = content.contents.map(
-            similarity: sim,
+            (c: string, i: number) => {
-          };
+              return {
-        }),
+                fileName: content.title,
                content: c,
                embeddings: embeddings.embeddings[i],
              };
            },
          );
          return fileSimilaritySearchObject;
        })
        .flat();
      let docsWithContent = docs.filter(
        (doc) => doc.pageContent && doc.pageContent.length > 0,
      );
-      let rankedDocs = similarity
+      const queryEmbedding = await embeddings.embedQuery(query);
        .filter((sim) => sim.similarity > (this.config.rerankThreshold ?? 0.3))
        .sort((a, b) => b.similarity - a.similarity)
        .map((sim) => docsToRank[sim.index]);
-      rankedDocs =
+      const getRankedDocs = async (
-        docsToRank.length > 0 ? rankedDocs.slice(0, maxDocs) : rankedDocs;
+        queryEmbedding: number[],
-      return rankedDocs;
+        includeFiles: boolean,
-    };
+        includeNonFileDocs: boolean,
        maxDocs: number,
      ) => {
        let docsToRank = includeNonFileDocs ? docsWithContent : [];
-    if (optimizationMode === 'speed' || this.config.rerank === false) {
+        if (includeFiles) {
-      this.emitProgress(emitter, 50, `Ranking sources`);
+          // Add file documents to the ranking
-      if (filesData.length > 0) {
+          const fileDocs = filesData.map((fileData) => {
-        const sortedFiles = await getRankedDocs(queryEmbedding, true, false, 8);
+            return new Document({
-
+              pageContent: fileData.content,
-        return [
+              metadata: {
-          ...sortedFiles,
+                title: fileData.fileName,
-          ...docsWithContent.slice(0, 15 - sortedFiles.length),
+                url: `File`,
-        ];
+                embeddings: fileData.embeddings,
-      } else {
+              },
-        return docsWithContent.slice(0, 15);
+            });
      }
    } else if (optimizationMode === 'balanced') {
      this.emitProgress(emitter, 40, `Ranking sources`);
      // Get the top ranked attached files, if any
      let sortedDocs = await getRankedDocs(queryEmbedding, true, false, 8);
      sortedDocs = [
        ...sortedDocs,
        ...docsWithContent.slice(0, 15 - sortedDocs.length),
      ];
      this.emitProgress(emitter, 60, `Enriching sources`);
      sortedDocs = await Promise.all(
        sortedDocs.map(async (doc) => {
          const webContent = await getWebContentLite(doc.metadata.url);
          const chunks =
            webContent?.pageContent
              .match(/.{1,500}/g)
              ?.map((chunk) => chunk.trim()) || [];
          const chunkEmbeddings = await embeddings.embedDocuments(chunks);
          const similarities = chunkEmbeddings.map((chunkEmbedding) => {
            return computeSimilarity(queryEmbedding, chunkEmbedding);
          });
-
+          docsToRank.push(...fileDocs);
          const topChunks = similarities
            .map((similarity, index) => ({ similarity, index }))
            .sort((a, b) => b.similarity - a.similarity)
            .slice(0, 5)
            .map((chunk) => chunks[chunk.index]);
          const excerpt = topChunks.join('\n\n');
          let newDoc = {
            ...doc,
            pageContent: excerpt
              ? `${excerpt}\n\n${doc.pageContent}`
              : doc.pageContent,
          };
          return newDoc;
        }),
      );
      return sortedDocs;
    } else if (optimizationMode === 'quality') {
      const summaryParser = new LineOutputParser({
        key: 'summary',
      });
      const enhancedDocs: Document[] = [];
      const maxEnhancedDocs = 5;
      // Process sources one by one until we have enough information or hit the max
      for (
        let i = 0;
        i < docsWithContent.length && enhancedDocs.length < maxEnhancedDocs;
        i++
      ) {
        if (signal.aborted) {
          return [];
        }
-        const currentProgress = enhancedDocs.length * 10 + 40;
+        const similarity = await Promise.all(
          docsToRank.map(async (doc, i) => {
            const sim = computeSimilarity(
              queryEmbedding,
              doc.metadata?.embeddings
                ? doc.metadata?.embeddings
                : (await embeddings.embedDocuments([doc.pageContent]))[0],
            );
            return {
              index: i,
              similarity: sim,
            };
          }),
        );
        let rankedDocs = similarity
          .filter(
            (sim) => sim.similarity > (this.config.rerankThreshold ?? 0.3),
          )
          .sort((a, b) => b.similarity - a.similarity)
          .map((sim) => docsToRank[sim.index]);
        rankedDocs =
          docsToRank.length > 0 ? rankedDocs.slice(0, maxDocs) : rankedDocs;
        return rankedDocs;
      };
      if (optimizationMode === 'speed' || this.config.rerank === false) {
        this.emitProgress(
          emitter,
          50,
          `Ranking sources`,
          this.searchQuery ? `Search Query: ${this.searchQuery}` : undefined,
        );
        if (filesData.length > 0) {
          const sortedFiles = await getRankedDocs(
            queryEmbedding,
            true,
            false,
            8,
          );
          return [
            ...sortedFiles,
            ...docsWithContent.slice(0, 15 - sortedFiles.length),
          ];
        } else {
          return docsWithContent.slice(0, 15);
        }
      } else if (optimizationMode === 'balanced') {
        this.emitProgress(
          emitter,
          40,
          `Ranking sources`,
          this.searchQuery ? `Search Query: ${this.searchQuery}` : undefined,
        );
        // Get the top ranked attached files, if any
        let sortedDocs = await getRankedDocs(queryEmbedding, true, false, 8);
        sortedDocs = [
          ...sortedDocs,
          ...docsWithContent.slice(0, 15 - sortedDocs.length),
        ];
        this.emitProgress(
          emitter,
-          currentProgress,
+          60,
-          `Deep analyzing: ${enhancedDocs.length} relevant sources found so far`,
+          `Enriching sources`,
          this.searchQuery ? `Search Query: ${this.searchQuery}` : undefined,
        );
        sortedDocs = await Promise.all(
          sortedDocs.map(async (doc) => {
            const webContent = await getWebContentLite(doc.metadata.url);
            const chunks =
              webContent?.pageContent
                .match(/.{1,500}/g)
                ?.map((chunk) => chunk.trim()) || [];
            const chunkEmbeddings = await embeddings.embedDocuments(chunks);
            const similarities = chunkEmbeddings.map((chunkEmbedding) => {
              return computeSimilarity(queryEmbedding, chunkEmbedding);
            });
            const topChunks = similarities
              .map((similarity, index) => ({ similarity, index }))
              .sort((a, b) => b.similarity - a.similarity)
              .slice(0, 5)
              .map((chunk) => chunks[chunk.index]);
            const excerpt = topChunks.join('\n\n');
            let newDoc = {
              ...doc,
              pageContent: excerpt
                ? `${excerpt}\n\n${doc.pageContent}`
                : doc.pageContent,
            };
            return newDoc;
          }),
        );
-        const result = docsWithContent[i];
+        return sortedDocs;
-        const processedDoc = await this.processSource(
+      } else if (optimizationMode === 'quality') {
-          result,
+        const summaryParser = new LineOutputParser({
-          query,
+          key: 'summary',
-          llm,
+        });
          summaryParser,
        );
-        if (processedDoc) {
+        const enhancedDocs: Document[] = [];
-          enhancedDocs.push(processedDoc);
+        const maxEnhancedDocs = 5;
-          // After getting initial 2 sources or adding a new one, check if we have enough info
+        // Process sources one by one until we have enough information or hit the max
-          if (enhancedDocs.length >= 2) {
+        for (
-            this.emitProgress(
+          let i = 0;
-              emitter,
+          i < docsWithContent.length && enhancedDocs.length < maxEnhancedDocs;
-              currentProgress,
+          i++
-              `Checking if we have enough information to answer the query`,
+        ) {
-            );
+          if (signal.aborted) {
-            const hasEnoughInfo = await this.checkIfEnoughInformation(
+            return [];
-              enhancedDocs,
+          }
-              query,
+
-              llm,
+          const currentProgress = enhancedDocs.length * 10 + 40;
-              emitter,
+
-            );
+          this.emitProgress(
-            if (hasEnoughInfo) {
+            emitter,
-              break;
+            currentProgress,
            `Deep analyzing: ${enhancedDocs.length} relevant sources found. Analyzing source ${i + 1} of ${docsWithContent.length}`,
            this.searchQuery ? `Search Query: ${this.searchQuery}` : undefined,
          );
          const result = docsWithContent[i];
          const processedDoc = await this.processSource(
            result,
            query,
            llm,
            summaryParser,
            signal,
          );
          if (processedDoc) {
            enhancedDocs.push(processedDoc);
            // After getting initial 2 sources or adding a new one, check if we have enough info
            if (enhancedDocs.length >= 2) {
              this.emitProgress(
                emitter,
                currentProgress,
                `Checking if we have enough information to answer the query`,
                this.searchQuery
                  ? `Search Query: ${this.searchQuery}`
                  : undefined,
              );
              const hasEnoughInfo = await this.checkIfEnoughInformation(
                enhancedDocs,
                query,
                llm,
                signal,
              );
              if (hasEnoughInfo) {
                break;
              }
            }
          }
        }
        this.emitProgress(
          emitter,
          95,
          `Ranking attached files`,
          this.searchQuery ? `Search Query: ${this.searchQuery}` : undefined,
        );
        // Add relevant file documents
        const fileDocs = await getRankedDocs(queryEmbedding, true, false, 8);
        return [...enhancedDocs, ...fileDocs];
      }
-
+    } catch (error) {
-      this.emitProgress(emitter, 95, `Ranking attached files`);
+      console.error('Error in rerankDocs:', error);
-      // Add relevant file documents
+      emitter.emit('error', JSON.stringify({ data: error }));
      const fileDocs = await getRankedDocs(queryEmbedding, true, false, 8);
      return [...enhancedDocs, ...fileDocs];
    }
    return [];
  }