feat(agent): Process feels decent now, however it can be very slow. This is a checkpoint to come back to. Going to start trying to make it faster.

2025-06-19 12:49:37 -06:00 · 2025-06-19 12:49:37 -06:00 · 60d36ab8f4
commit 60d36ab8f4
parent 72c2ddc3a0
27 changed files with 396 additions and 211 deletions
--- a/src/app/api/chat/route.ts
+++ b/src/app/api/chat/route.ts
@ -311,7 +311,7 @@ export const POST = async (req: Request) => {
      llm = new ChatOpenAI({
        openAIApiKey: getCustomOpenaiApiKey(),
        modelName: getCustomOpenaiModelName(),
-        temperature: 0.7,
+        // temperature: 0.7,
        configuration: {
          baseURL: getCustomOpenaiApiUrl(),
        },
--- a/src/app/api/images/route.ts
+++ b/src/app/api/images/route.ts
@ -55,7 +55,7 @@ export const POST = async (req: Request) => {
      llm = new ChatOpenAI({
        openAIApiKey: getCustomOpenaiApiKey(),
        modelName: getCustomOpenaiModelName(),
-        temperature: 0.7,
+        // temperature: 0.7,
        configuration: {
          baseURL: getCustomOpenaiApiUrl(),
        },
--- a/src/app/api/search/route.ts
+++ b/src/app/api/search/route.ts
@ -86,7 +86,7 @@ export const POST = async (req: Request) => {
        modelName: body.chatModel?.name || getCustomOpenaiModelName(),
        openAIApiKey:
          body.chatModel?.customOpenAIKey || getCustomOpenaiApiKey(),
-        temperature: 0.7,
+        // temperature: 0.7,
        configuration: {
          baseURL:
            body.chatModel?.customOpenAIBaseURL || getCustomOpenaiApiUrl(),
--- a/src/app/api/suggestions/route.ts
+++ b/src/app/api/suggestions/route.ts
@ -54,7 +54,7 @@ export const POST = async (req: Request) => {
      llm = new ChatOpenAI({
        openAIApiKey: getCustomOpenaiApiKey(),
        modelName: getCustomOpenaiModelName(),
-        temperature: 0.7,
+        // temperature: 0.7,
        configuration: {
          baseURL: getCustomOpenaiApiUrl(),
        },
--- a/src/components/AgentActionDisplay.tsx
+++ b/src/components/AgentActionDisplay.tsx
@ -15,9 +15,14 @@ import { AgentActionEvent } from './ChatWindow';
 interface AgentActionDisplayProps {
  events: AgentActionEvent[];
  messageId: string;
+  isLoading: boolean;
 }

-const AgentActionDisplay = ({ events, messageId }: AgentActionDisplayProps) => {
+const AgentActionDisplay = ({
+  events,
+  messageId,
+  isLoading,
+}: AgentActionDisplayProps) => {
  const [isExpanded, setIsExpanded] = useState(false);

  // Get the most recent event for collapsed view
@ -54,10 +59,17 @@ const AgentActionDisplay = ({ events, messageId }: AgentActionDisplayProps) => {
      >
        <div className="flex items-center space-x-2">
          {getActionIcon(latestEvent.action)}
-          <span className="font-medium text-base text-black/70 dark:text-white/70 tracking-wide capitalize">
-            {latestEvent.action === 'SYNTHESIZING_RESPONSE'
+          <span className="font-medium text-base text-black/70 dark:text-white/70 tracking-wide capitalize flex items-center">
+            {!isLoading ||
+            latestEvent.action === 'INFORMATION_GATHERING_COMPLETE'
              ? 'Agent Log'
              : formatActionName(latestEvent.action)}
+            {isLoading &&
+              latestEvent.action !== 'INFORMATION_GATHERING_COMPLETE' && (
+                <span className="ml-2 inline-block align-middle">
+                  <span className="animate-spin inline-block w-4 h-4 border-2 border-t-transparent border-[#9C27B0] rounded-full align-middle"></span>
+                </span>
+              )}
          </span>
        </div>
        {isExpanded ? (
--- a/src/components/Chat.tsx
+++ b/src/components/Chat.tsx
@ -233,13 +233,18 @@ const Chat = ({
                  <AgentActionDisplay
                    messageId={msg.messageId}
                    events={msg.agentActions}
+                    isLoading={loading}
                  />
                )}
                {/* Show empty agent action display if this is the last user message and we're loading */}
                {loading &&
                  isLast &&
                  (!msg.agentActions || msg.agentActions.length === 0) && (
-                    <AgentActionDisplay messageId={msg.messageId} events={[]} />
+                    <AgentActionDisplay
+                      messageId={msg.messageId}
+                      events={[]}
+                      isLoading={loading}
+                    />
                  )}
              </>
            )}
--- a/src/components/ChatWindow.tsx
+++ b/src/components/ChatWindow.tsx
@ -496,18 +496,17 @@ const ChatWindow = ({ id }: { id?: string }) => {
            },
          ]);
          added = true;
+        } else {
+          setMessages((prev) =>
+            prev.map((message) => {
+              if (message.messageId === data.messageId) {
+                return { ...message, content: message.content + data.data };
+              }
+              return message;
+            }),
+          );
        }

-        setMessages((prev) =>
-          prev.map((message) => {
-            if (message.messageId === data.messageId) {
-              return { ...message, content: message.content + data.data };
-            }
-
-            return message;
-          }),
-        );
-
        recievedMessage += data.data;
        setScrollTrigger((prev) => prev + 1);
      }
--- a/src/lib/agents/agentState.ts
+++ b/src/lib/agents/agentState.ts
@ -18,7 +18,11 @@ export const AgentState = Annotation.Root({
    reducer: (x, y) => x.concat(y),
    default: () => [],
  }),
-  bannedUrls: Annotation<string[]>({
+  bannedSummaryUrls: Annotation<string[]>({
+    reducer: (x, y) => x.concat(y),
+    default: () => [],
+  }),
+  bannedPreviewUrls: Annotation<string[]>({
    reducer: (x, y) => x.concat(y),
    default: () => [],
  }),
@ -38,4 +42,8 @@ export const AgentState = Annotation.Root({
    reducer: (x, y) => y ?? x,
    default: () => '',
  }),
+  fullAnalysisAttempts: Annotation<number>({
+    reducer: (x, y) => (y ?? 0) + x,
+    default: () => 0,
+  }),
 });
--- a/src/lib/agents/analyzerAgent.ts
+++ b/src/lib/agents/analyzerAgent.ts
@ -1,11 +1,22 @@
 import { BaseChatModel } from '@langchain/core/language_models/chat_models';
-import { AIMessage } from '@langchain/core/messages';
+import {
+  AIMessage,
+  HumanMessage,
+  SystemMessage,
+} from '@langchain/core/messages';
 import { ChatPromptTemplate } from '@langchain/core/prompts';
 import { Command, END } from '@langchain/langgraph';
 import { EventEmitter } from 'events';
 import LineOutputParser from '../outputParsers/lineOutputParser';
 import { formatDateForLLM } from '../utils';
 import { AgentState } from './agentState';
+import { setTemperature } from '../utils/modelUtils';
+import {
+  additionalUserInputPrompt,
+  additionalWebSearchPrompt,
+  decideNextActionPrompt,
+} from '../prompts/analyzer';
+import { removeThinkingBlocks } from '../utils/contentUtils';

 export class AnalyzerAgent {
  private llm: BaseChatModel;
@ -27,6 +38,11 @@ export class AnalyzerAgent {

  async execute(state: typeof AgentState.State): Promise<Command> {
    try {
+      setTemperature(this.llm, 0.0);
+
+      let nextActionContent = 'need_more_info';
+      // Skip full analysis if this is the first run.
+      //if (state.fullAnalysisAttempts > 0) {
      // Emit initial analysis event
      this.emitter.emit('agent_action', {
        type: 'agent_action',
@ -45,53 +61,9 @@ export class AnalyzerAgent {
      console.log(
        `Analyzing ${state.relevantDocuments.length} documents for relevance...`,
      );
-      const analysisPromptTemplate = `You are an expert content analyzer. Your task is to analyze the provided document and determine if we have enough relevant information to fully answer the user's query. If the content is not sufficient, you will suggest a more specific search query to gather additional information.
-# Instructions
- Carefully analyze the content of the context provided and determine if it contains sufficient information to answer the user's query
- The content should completely address the query, providing detailed explanations, relevant facts, and necessary context
- Use the content provided in the \`context\` tag, as well as the historical context of the conversation, to make your determination
- If the context provides conflicting information, explain the discrepancies and what additional information is needed to resolve them
- If the user is asking for a specific number of sources and the context does not provide enough, consider the content insufficient

-# Output Format
- If the content is sufficient, respond with "good_content" in an <answer> XML tag
- If the content is not sufficient, respond with "need_more_info" in an <answer> XML tag and provide a detailed question that would help gather more specific information to answer the query in a <question> XML tag
-  - This question will be used to generate a web search query to gather more information and should be specific, actionable, and focused on the gaps in the current content
-  - This step will be repeated until sufficient information is gathered to answer the query. Do not try to answer the entire query at once
-  - It should be concise and avoid pleasantries or unnecessary details
-  - Break down the query into a smaller, more focused question that can be answered with a web search
-  - For example, if the query is asking about specific information from multiple locations, break the query into one smaller query for a single location
-  - If if the query is asking about a complex topic, break it down into a single smaller question that can be answered one at a time
-  - Avoid asking for general information or vague details; focus on specific, actionable questions that can lead to concrete answers
-  - Avoid giving the same guidance more than once, and avoid repeating the same question multiple times
- Respond with your answer in a <answer> XML tag
- If you need more information, provide a detailed question in a <question> XML tag
- If you need more information, provide a detailed one line reason why the content is not sufficient in a <reason> XML tag
-
-# Refinement History
- The following questions have been asked to refine the search
-${state.searchInstructionHistory.map((question) => `  - ${question}`).join('\n')}
-
-# System Instructions
- The system instructions provided to you are:
-{systemInstructions}
-
-# Example Output
- If the content is sufficient:
-<answer>good_content</answer>
- If the content is not sufficient:
-<answer>need_more_info</answer>
-<question>A question that would help gather more specific information to answer the query?</question>
-<reason>A one line reason why the content is not sufficient</reason>
-
-# Context
-<context>
-Today's date is ${formatDateForLLM(new Date())}
-{context}
-</context>`;
-
-      const analysisPrompt = await ChatPromptTemplate.fromTemplate(
-        analysisPromptTemplate,
+      const nextActionPrompt = await ChatPromptTemplate.fromTemplate(
+        decideNextActionPrompt,
      ).format({
        systemInstructions: this.systemInstructions,
        context: state.relevantDocuments
@ -100,32 +72,110 @@ Today's date is ${formatDateForLLM(new Date())}
              `<source${index + 1}>${doc?.metadata?.title ? `<title>${doc?.metadata?.title}</title>` : ''}<content>${doc.pageContent}</content></source${index + 1}>`,
          )
          .join('\n\n'),
+        date: formatDateForLLM(new Date()),
+        searchInstructionHistory: state.searchInstructionHistory
+          .map((question) => `- ${question}`)
+          .join('\n'),
+        query: state.query,
      });

-      const response = await this.llm.invoke(
-        [...state.messages, new AIMessage(analysisPrompt)],
+      const nextActionResponse = await this.llm.invoke(
+        [...state.messages, new HumanMessage(nextActionPrompt)],
        { signal: this.signal },
      );

-      console.log('Analysis response:', response.content);
-      // Parse the response to extract the analysis result
-      const analysisOutputParser = new LineOutputParser({ key: 'answer' });
-      const moreInfoOutputParser = new LineOutputParser({ key: 'question' });
-      const reasonOutputParser = new LineOutputParser({ key: 'reason' });
-
-      const analysisResult = await analysisOutputParser.parse(
-        response.content as string,
+      nextActionContent = removeThinkingBlocks(
+        nextActionResponse.content as string,
      );
-      const moreInfoQuestion = await moreInfoOutputParser.parse(
-        response.content as string,
-      );
-      const reason = await reasonOutputParser.parse(response.content as string);

-      console.log('Analysis result:', analysisResult);
-      console.log('More info question:', moreInfoQuestion);
-      console.log('Reason for insufficiency:', reason);
+      console.log('Next action response:', nextActionContent);
+      //}
+
+      if (!nextActionContent.startsWith('good_content')) {
+        if (nextActionContent.startsWith('need_user_info')) {
+          const moreUserInfoPrompt = await ChatPromptTemplate.fromTemplate(
+            additionalUserInputPrompt,
+          ).format({
+            systemInstructions: this.systemInstructions,
+            context: state.relevantDocuments
+              .map(
+                (doc, index) =>
+                  `<source${index + 1}>${doc?.metadata?.title ? `<title>${doc?.metadata?.title}</title>` : ''}<content>${doc.pageContent}</content></source${index + 1}>`,
+              )
+              .join('\n\n'),
+            date: formatDateForLLM(new Date()),
+            searchInstructionHistory: state.searchInstructionHistory
+              .map((question) => `- ${question}`)
+              .join('\n'),
+            query: state.query,
+          });
+
+          const stream = await this.llm.stream(
+            [...state.messages, new SystemMessage(moreUserInfoPrompt)],
+            { signal: this.signal },
+          );
+
+          let fullResponse = '';
+          for await (const chunk of stream) {
+            if (this.signal.aborted) {
+              break;
+            }
+
+            const content = chunk.content;
+            if (typeof content === 'string' && content.length > 0) {
+              fullResponse += content;
+
+              // Emit each chunk as a data response in real-time
+              this.emitter.emit(
+                'data',
+                JSON.stringify({
+                  type: 'response',
+                  data: content,
+                }),
+              );
+            }
+          }
+
+          this.emitter.emit('end');
+
+          // Create the final response message with the complete content
+          const response = new SystemMessage(fullResponse);
+
+          return new Command({
+            goto: END,
+            update: {
+              messages: [response],
+            },
+          });
+        }
+
+        // If we need more information from the LLM, generate a more specific search query
+        const moreInfoPrompt = await ChatPromptTemplate.fromTemplate(
+          additionalWebSearchPrompt,
+        ).format({
+          systemInstructions: this.systemInstructions,
+          context: state.relevantDocuments
+            .map(
+              (doc, index) =>
+                `<source${index + 1}>${doc?.metadata?.title ? `<title>${doc?.metadata?.title}</title>` : ''}<content>${doc.pageContent}</content></source${index + 1}>`,
+            )
+            .join('\n\n'),
+          date: formatDateForLLM(new Date()),
+          searchInstructionHistory: state.searchInstructionHistory
+            .map((question) => `- ${question}`)
+            .join('\n'),
+          query: state.query,
+        });
+
+        const moreInfoResponse = await this.llm.invoke(
+          [...state.messages, new HumanMessage(moreInfoPrompt)],
+          { signal: this.signal },
+        );
+
+        const moreInfoQuestion = removeThinkingBlocks(
+          moreInfoResponse.content as string,
+        );

-      if (!analysisResult.startsWith('good_content')) {
        // Emit reanalyzing event when we need more information
        this.emitter.emit('agent_action', {
          type: 'agent_action',
@ -134,7 +184,6 @@ Today's date is ${formatDateForLLM(new Date())}
            message:
              'Current context is insufficient - gathering more information',
            details: {
-              reason: reason,
              nextSearchQuery: moreInfoQuestion,
              documentCount: state.relevantDocuments.length,
              searchIterations: state.searchInstructionHistory.length,
@ -153,6 +202,7 @@ Today's date is ${formatDateForLLM(new Date())}
            ],
            searchInstructions: moreInfoQuestion,
            searchInstructionHistory: [moreInfoQuestion],
+            fullAnalysisAttempts: 1,
          },
        });
      }
@ -162,8 +212,7 @@ Today's date is ${formatDateForLLM(new Date())}
        type: 'agent_action',
        data: {
          action: 'INFORMATION_GATHERING_COMPLETE',
-          message:
-            'Sufficient information gathered - ready to synthesize response',
+          message: 'Sufficient information gathered, ready to respond.',
          details: {
            documentCount: state.relevantDocuments.length,
            searchIterations: state.searchInstructionHistory.length,
@ -194,6 +243,8 @@ Today's date is ${formatDateForLLM(new Date())}
          messages: [errorMessage],
        },
      });
+    } finally {
+      setTemperature(this.llm); // Reset temperature for subsequent actions
    }
  }
 }
--- a/src/lib/agents/synthesizerAgent.ts
+++ b/src/lib/agents/synthesizerAgent.ts
@ -28,20 +28,6 @@ export class SynthesizerAgent {
   */
  async execute(state: typeof AgentState.State): Promise<Command> {
    try {
-      // Emit synthesizing response event
-      this.emitter.emit('agent_action', {
-        type: 'agent_action',
-        data: {
-          action: 'SYNTHESIZING_RESPONSE',
-          message: 'Synthesizing final answer...',
-          details: {
-            query: state.query,
-            documentCount: state.relevantDocuments.length,
-            searchIterations: state.searchInstructionHistory.length,
-          },
-        },
-      });
-
      const synthesisPrompt = `You are an expert information synthesizer. Based on the search results and analysis provided, create a comprehensive, well-structured answer to the user's query.

 ## Response Instructions
--- a/src/lib/agents/webSearchAgent.ts
+++ b/src/lib/agents/webSearchAgent.ts
@ -14,6 +14,7 @@ import {
  PreviewContent,
 } from '../utils/analyzePreviewContent';
 import { AgentState } from './agentState';
+import { setTemperature } from '../utils/modelUtils';

 export class WebSearchAgent {
  private llm: BaseChatModel;
@ -37,41 +38,45 @@ export class WebSearchAgent {
   * Web search agent node
   */
  async execute(state: typeof AgentState.State): Promise<Command> {
-    // Emit preparing web search event
-    this.emitter.emit('agent_action', {
-      type: 'agent_action',
-      data: {
-        action: 'PREPARING_SEARCH_QUERY',
-        // message: `Preparing search query`,
-        details: {
-          query: state.query,
-          searchInstructions: state.searchInstructions || state.query,
-          documentCount: state.relevantDocuments.length,
-          searchIterations: state.searchInstructionHistory.length,
-        },
-      },
-    });
-
-    const template = PromptTemplate.fromTemplate(webSearchRetrieverAgentPrompt);
-    const prompt = await template.format({
-      systemInstructions: this.systemInstructions,
-      query: state.query,
-      date: formatDateForLLM(new Date()),
-      supervisor: state.searchInstructions,
-    });
-
-    const searchQueryResult = await this.llm.invoke(
-      [...state.messages, prompt],
-      { signal: this.signal },
-    );
-
-    // Parse the response to extract the search query with the lineoutputparser
-    const lineOutputParser = new LineOutputParser({ key: 'answer' });
-    const searchQuery = await lineOutputParser.parse(
-      searchQueryResult.content as string,
-    );
-
    try {
+      setTemperature(this.llm, 0); // Set temperature to 0 for deterministic output
+
+      // Emit preparing web search event
+      this.emitter.emit('agent_action', {
+        type: 'agent_action',
+        data: {
+          action: 'PREPARING_SEARCH_QUERY',
+          // message: `Preparing search query`,
+          details: {
+            query: state.query,
+            searchInstructions: state.searchInstructions || state.query,
+            documentCount: state.relevantDocuments.length,
+            searchIterations: state.searchInstructionHistory.length,
+          },
+        },
+      });
+
+      const template = PromptTemplate.fromTemplate(
+        webSearchRetrieverAgentPrompt,
+      );
+      const prompt = await template.format({
+        systemInstructions: this.systemInstructions,
+        query: state.query,
+        date: formatDateForLLM(new Date()),
+        supervisor: state.searchInstructions,
+      });
+
+      const searchQueryResult = await this.llm.invoke(
+        [...state.messages, prompt],
+        { signal: this.signal },
+      );
+
+      // Parse the response to extract the search query with the lineoutputparser
+      const lineOutputParser = new LineOutputParser({ key: 'answer' });
+      const searchQuery = await lineOutputParser.parse(
+        searchQueryResult.content as string,
+      );
+
      console.log(`Performing web search for query: "${searchQuery}"`);

      // Emit executing web search event
@ -110,11 +115,16 @@ export class WebSearchAgent {
        },
      });

-      let bannedUrls = state.bannedUrls || [];
+      let bannedSummaryUrls = state.bannedSummaryUrls || [];
+      let bannedPreviewUrls = state.bannedPreviewUrls || [];

      // Extract preview content from top 8 search results for analysis
      const previewContents: PreviewContent[] = searchResults.results
-        .filter((result) => !bannedUrls.includes(result.url)) // Filter out banned URLs first
+        .filter(
+          (result) =>
+            !bannedSummaryUrls.includes(result.url) &&
+            !bannedPreviewUrls.includes(result.url),
+        ) // Filter out banned URLs first
        .slice(0, 8) // Then take top 8 results
        .map((result) => ({
          title: result.title || 'Untitled',
@ -203,6 +213,10 @@ export class WebSearchAgent {
            }),
        );

+        previewContents.forEach((content) => {
+          bannedPreviewUrls.push(content.url); // Add to banned preview URLs to avoid duplicates
+        });
+
        console.log(
          `Created ${documents.length} documents from preview content`,
        );
@ -233,7 +247,12 @@ export class WebSearchAgent {

        // Summarize the top 2 search results
        for (const result of searchResults.results) {
-          if (bannedUrls.includes(result.url)) {
+          if (this.signal.aborted) {
+            console.warn('Search operation aborted by signal');
+            break; // Exit if the operation is aborted
+          }
+
+          if (bannedSummaryUrls.includes(result.url)) {
            console.log(`Skipping banned URL: ${result.url}`);
            // Note: We don't emit an agent_action event for banned URLs as this is an internal
            // optimization that should be transparent to the user
@ -247,7 +266,7 @@ export class WebSearchAgent {
          }
          attemptedUrlCount++;

-          bannedUrls.push(result.url); // Add to banned URLs to avoid duplicates
+          bannedSummaryUrls.push(result.url); // Add to banned URLs to avoid duplicates

          if (documents.length >= 1) {
            break; // Limit to top 1 document
@ -345,7 +364,8 @@ export class WebSearchAgent {
        update: {
          messages: [new AIMessage(responseMessage)],
          relevantDocuments: documents,
-          bannedUrls: bannedUrls,
+          bannedSummaryUrls: bannedSummaryUrls,
+          bannedPreviewUrls: bannedPreviewUrls,
        },
      });
    } catch (error) {
@ -360,6 +380,8 @@ export class WebSearchAgent {
          messages: [errorMessage],
        },
      });
+    } finally {
+      setTemperature(this.llm, undefined); // Reset temperature to default
    }
  }
 }
--- a/src/lib/outputParsers/lineOutputParser.ts
+++ b/src/lib/outputParsers/lineOutputParser.ts
@ -1,4 +1,5 @@
 import { BaseOutputParser } from '@langchain/core/output_parsers';
+import { removeThinkingBlocks } from '../utils/contentUtils';

 interface LineOutputParserArgs {
  key?: string;
@ -23,7 +24,7 @@ class LineOutputParser extends BaseOutputParser<string> {

    // First, remove all <think>...</think> blocks to avoid parsing tags inside thinking content
    // This might be a little aggressive. Prompt massaging might be all we need, but this is a guarantee and should rarely mess anything up.
-    text = this.removeThinkingBlocks(text);
+    text = removeThinkingBlocks(text);

    const regex = /^(\s*(-|\*|\d+\.\s|\d+\)\s|\u2022)\s*)+/;
    const startKeyIndex = text.indexOf(`<${this.key}>`);
@ -44,17 +45,6 @@ class LineOutputParser extends BaseOutputParser<string> {
    return line;
  }

-  /**
-   * Removes all content within <think>...</think> blocks
-   * @param text The input text containing thinking blocks
-   * @returns The text with all thinking blocks removed
-   */
-  private removeThinkingBlocks(text: string): string {
-    // Use regex to identify and remove all <think>...</think> blocks
-    // Using the 's' flag to make dot match newlines
-    return text.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
-  }
-
  getFormatInstructions(): string {
    throw new Error('Not implemented.');
  }
--- a/src/lib/outputParsers/listLineOutputParser.ts
+++ b/src/lib/outputParsers/listLineOutputParser.ts
@ -1,4 +1,5 @@
 import { BaseOutputParser } from '@langchain/core/output_parsers';
+import { removeThinkingBlocks } from '../utils/contentUtils';

 interface LineListOutputParserArgs {
  key?: string;
@ -23,7 +24,7 @@ class LineListOutputParser extends BaseOutputParser<string[]> {

    // First, remove all <think>...</think> blocks to avoid parsing tags inside thinking content
    // This might be a little aggressive. Prompt massaging might be all we need, but this is a guarantee and should rarely mess anything up.
-    text = this.removeThinkingBlocks(text);
+    text = removeThinkingBlocks(text);

    const regex = /^(\s*(-|\*|\d+\.\s|\d+\)\s|\u2022)\s*)+/;
    const startKeyIndex = text.indexOf(`<${this.key}>`);
@ -46,17 +47,6 @@ class LineListOutputParser extends BaseOutputParser<string[]> {
    return lines;
  }

-  /**
-   * Removes all content within <think>...</think> blocks
-   * @param text The input text containing thinking blocks
-   * @returns The text with all thinking blocks removed
-   */
-  private removeThinkingBlocks(text: string): string {
-    // Use regex to identify and remove all <think>...</think> blocks
-    // Using [\s\S] pattern to match all characters including newlines
-    return text.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
-  }
-
  getFormatInstructions(): string {
    throw new Error('Not implemented.');
  }
--- a/src/lib/prompts/analyzer.ts
+++ b/src/lib/prompts/analyzer.ts
@ -0,0 +1,95 @@
+export const decideNextActionPrompt = `You are an expert content analyzer.
+Your task is to analyze the provided context and determine if we have enough information to fully answer the user's query.
+
+# Instructions
+- Carefully analyze the content of the context provided and determine if it contains sufficient information to answer the user's query
+- The content should completely address the query, providing detailed explanations, relevant facts, and necessary context
+- Use the content provided in the \`context\` tag, as well as the historical context of the conversation, to make your determination
+- If the context provides conflicting information, explain the discrepancies and what additional information is needed to resolve them
+- If the user is asking for a specific number of sources and the context does not provide enough, consider the content insufficient
+
+# Response Options
+- If the content is sufficient, respond with \`good_content\`
+- If the content is not sufficient you have two options
+    - Option 1 - Ask the user for more information (Respond with \`need_user_info\`)
+        - Use this option when the content is not sufficient due to information that is would not typically be available online, or when the query is too vague or broad
+    - For example, if the query is asking for personal opinions, preferences, user experiences, settings, objects the user owns, or specific details that are not typically found in online content
+    - Option 2 - Ask the LLM to generate a more specific search query (Respond with \`need_more_info\`)
+        - Only use this option when the content is not sufficient due to missing information that could typically be found online and is not related to personal opinions, preferences, user experiences, or specific objects the user owns
+- The only output in your response should be one of the following:
+    - \`good_content\`
+    - \`need_user_info\`
+    - \`need_more_info\`
+
+# System Instructions
+{systemInstructions}
+
+# Date
+Today's date is {date}
+
+# User Query
+{query}
+
+# Context
+<context>
+{context}
+</context>
+`;
+
+export const additionalUserInputPrompt = `You are an expert content analyzer.
+Your task is to analyze the provided context and user query to determine what additional information is needed to fully answer the user's query.
+
+# Refinement History
+- The following automated questions have already been asked to refine the search
+{searchInstructionHistory}
+
+# System Instructions
+{systemInstructions}
+
+# Date
+Today's date is {date}
+
+# User Query
+{query}
+
+# Context
+<context>
+{context}
+</context>
+
+# Instructions
+Respond with a detailed question that will be directed to the user to gather more specific information that can help refine the search.
+`;
+
+export const additionalWebSearchPrompt = `
+You are an expert content analyzer.
+Your task is to analyze the provided context and user query to determine what additional information is needed to fully answer the user's query.
+
+# Instructions
+- Respond with a detailed question that will be directed to an LLM to gather more specific information that can help refine the search.
+- If if the query is asking about a complex topic, break it down into a single smaller question that can be answered one at a time. This search process can be iterative
+    - Break down the query into a smaller, more focused question that can be answered with a web search
+    - For example, if the query is asking about specific information from multiple locations, break the query into one smaller query for a single location
+- Avoid giving the same guidance more than once, and avoid repeating the same question multiple times
+- Avoid asking for general information or vague details; focus on specific, actionable questions that can lead to concrete answers
+
+# Refinement History
+- The following automated questions have already been asked to refine the search
+{searchInstructionHistory}
+
+# System Instructions
+{systemInstructions}
+
+# Date
+Today's date is {date}
+
+# User Query
+{query}
+
+# Context
+<context>
+{context}
+</context>
+
+Respond with a detailed question that will be directed to an LLM to gather more specific information that can help refine the search.
+`;
--- a/src/lib/providers/anthropic.ts
+++ b/src/lib/providers/anthropic.ts
@ -57,7 +57,7 @@ export const loadAnthropicChatModels = async () => {
        model: new ChatAnthropic({
          apiKey: anthropicApiKey,
          modelName: model.key,
-          temperature: 0.7,
+          // temperature: 0.7,
        }) as unknown as BaseChatModel,
      };
    });
--- a/src/lib/providers/deepseek.ts
+++ b/src/lib/providers/deepseek.ts
@ -33,7 +33,7 @@ export const loadDeepseekChatModels = async () => {
        model: new ChatOpenAI({
          openAIApiKey: deepseekApiKey,
          modelName: model.key,
-          temperature: 0.7,
+          // temperature: 0.7,
          configuration: {
            baseURL: 'https://api.deepseek.com',
          },
--- a/src/lib/providers/gemini.ts
+++ b/src/lib/providers/gemini.ts
@ -68,7 +68,7 @@ export const loadGeminiChatModels = async () => {
        model: new ChatGoogleGenerativeAI({
          apiKey: geminiApiKey,
          model: model.key,
-          temperature: 0.7,
+          // temperature: 0.7,
        }) as unknown as BaseChatModel,
      };
    });
--- a/src/lib/providers/groq.ts
+++ b/src/lib/providers/groq.ts
@ -101,7 +101,7 @@ export const loadGroqChatModels = async () => {
        model: new ChatOpenAI({
          openAIApiKey: groqApiKey,
          modelName: model.key,
-          temperature: 0.7,
+          // temperature: 0.7,
          configuration: {
            baseURL: 'https://api.groq.com/openai/v1',
          },
--- a/src/lib/providers/index.ts
+++ b/src/lib/providers/index.ts
@ -119,7 +119,7 @@ export const getAvailableChatModelProviders = async () => {
            model: new ChatOpenAI({
              openAIApiKey: customOpenAiApiKey,
              modelName: customOpenAiModelName,
-              temperature: 0.7,
+              // temperature: 0.7,
              configuration: {
                baseURL: customOpenAiApiUrl,
              },
--- a/src/lib/providers/lmstudio.ts
+++ b/src/lib/providers/lmstudio.ts
@ -52,7 +52,7 @@ export const loadLMStudioChatModels = async () => {
            baseURL: ensureV1Endpoint(endpoint),
          },
          modelName: model.id,
-          temperature: 0.7,
+          // temperature: 0.7,
          streaming: true,
          maxRetries: 3,
        }) as unknown as BaseChatModel,
--- a/src/lib/providers/ollama.ts
+++ b/src/lib/providers/ollama.ts
@ -31,7 +31,7 @@ export const loadOllamaChatModels = async () => {
        model: new ChatOllama({
          baseUrl: ollamaApiEndpoint,
          model: model.model,
-          temperature: 0.7,
+          // temperature: 0.7,
          keepAlive: getKeepAlive(),
        }),
      };
--- a/src/lib/providers/openai.ts
+++ b/src/lib/providers/openai.ts
@ -69,7 +69,7 @@ export const loadOpenAIChatModels = async () => {
        model: new ChatOpenAI({
          openAIApiKey: openaiApiKey,
          modelName: model.key,
-          temperature: 0.7,
+          // temperature: 0.7,
        }) as unknown as BaseChatModel,
      };
    });
--- a/src/lib/search/agentSearch.ts
+++ b/src/lib/search/agentSearch.ts
@ -5,7 +5,13 @@ import {
  HumanMessage,
  SystemMessage,
 } from '@langchain/core/messages';
-import { END, MemorySaver, START, StateGraph } from '@langchain/langgraph';
+import {
+  BaseLangGraphError,
+  END,
+  MemorySaver,
+  START,
+  StateGraph,
+} from '@langchain/langgraph';
 import { EventEmitter } from 'events';
 import {
  AgentState,
@ -25,6 +31,7 @@ export class AgentSearch {
  private webSearchAgent: WebSearchAgent;
  private analyzerAgent: AnalyzerAgent;
  private synthesizerAgent: SynthesizerAgent;
+  private emitter: EventEmitter;

  constructor(
    llm: BaseChatModel,
@ -38,6 +45,7 @@ export class AgentSearch {
    this.embeddings = embeddings;
    this.checkpointer = new MemorySaver();
    this.signal = signal;
+    this.emitter = emitter;

    // Initialize agents
    this.webSearchAgent = new WebSearchAgent(
@ -97,40 +105,35 @@ export class AgentSearch {
  async searchAndAnswer(query: string, history: BaseMessage[] = []) {
    const workflow = this.createWorkflow();

-    try {
-      const initialState = {
-        messages: [...history, new HumanMessage(query)],
-        query,
-      };
+    const initialState = {
+      messages: [...history, new HumanMessage(query)],
+      query,
+    };

-      const result = await workflow.invoke(initialState, {
+    try {
+      await workflow.invoke(initialState, {
        configurable: { thread_id: `agent_search_${Date.now()}` },
-        recursionLimit: 20,
+        recursionLimit: 10,
        signal: this.signal,
      });
-
-      return result;
-    } catch (error) {
-      console.error('Agent workflow error:', error);
-
-      // Fallback to a simple response
-      const fallbackResponse = await this.llm.invoke(
-        [
-          new SystemMessage(
-            "You are a helpful assistant. The advanced agent workflow failed, so please provide a basic response to the user's query based on your knowledge.",
-          ),
-          new HumanMessage(query),
-        ],
-        { signal: this.signal },
-      );
-
-      return {
-        messages: [...history, new HumanMessage(query), fallbackResponse],
-        query,
-        searchResults: [],
-        next: END,
-        analysis: '',
-      };
+    } catch (error: BaseLangGraphError | any) {
+      if (error instanceof BaseLangGraphError) {
+        console.error('LangGraph error occurred:', error.message);
+        if (error.lc_error_code === 'GRAPH_RECURSION_LIMIT') {
+          this.emitter.emit(
+            'data',
+            JSON.stringify({
+              type: 'response',
+              data: "I've been working on this for a while and can't find a solution. Please try again with a different query.",
+            }),
+          );
+          this.emitter.emit('end');
+        }
+      } else if (error.name === 'AbortError') {
+        console.warn('Agent search was aborted:', error.message);
+      } else {
+        console.error('Unexpected error during agent search:', error);
+      }
    }
  }
 }
--- a/src/lib/search/metaSearchAgent.ts
+++ b/src/lib/search/metaSearchAgent.ts
@ -103,6 +103,7 @@ class MetaSearchAgent implements MetaSearchAgentType {
    emitter: eventEmitter,
    signal: AbortSignal,
  ) {
+    // TODO: Don't we want to set this back to default once search is done?
    (llm as unknown as ChatOpenAI).temperature = 0;

    this.emitProgress(emitter, 10, `Building search query`);
@ -693,7 +694,7 @@ ${docs[index].metadata?.url.toLowerCase().includes('file') ? '' : '\n<url>' + do
      );

      // Execute the agent workflow
-      const result = await agentSearch.searchAndAnswer(message, history);
+      await agentSearch.searchAndAnswer(message, history);

      // No need to emit end signals here since synthesizerAgent
      // is now streaming in real-time and emits them
--- a/src/lib/utils/analyzePreviewContent.ts
+++ b/src/lib/utils/analyzePreviewContent.ts
@ -2,6 +2,8 @@ import { BaseChatModel } from '@langchain/core/language_models/chat_models';
 import { BaseMessage } from '@langchain/core/messages';
 import LineOutputParser from '../outputParsers/lineOutputParser';
 import { formatDateForLLM } from '../utils';
+import { ChatOpenAI, OpenAIClient } from '@langchain/openai';
+import { removeThinkingBlocks } from './contentUtils';

 export type PreviewAnalysisResult = {
  isSufficient: boolean;
@ -40,7 +42,6 @@ export const analyzePreviewContent = async (
          `Source ${index + 1}:
 Title: ${content.title}
 Snippet: ${content.snippet}
-URL: ${content.url}
 ---`,
      )
      .join('\n\n');
@ -48,7 +49,10 @@ URL: ${content.url}
    // Format chat history for context
    const formattedChatHistory = chatHistory
      .slice(-10) // Only include last 10 messages for context
-      .map((message, index) => `${message._getType()}: ${message.content}`)
+      .map(
+        (message) =>
+          `${message.getType()}: ${removeThinkingBlocks(message.content.toString())}`,
+      )
      .join('\n');

    const systemPrompt = systemInstructions ? `${systemInstructions}\n\n` : '';
@ -59,16 +63,14 @@ URL: ${content.url}
      `${systemPrompt}You are a preview content analyzer, tasked with determining if search result snippets contain sufficient information to answer a user's query.

 # Instructions
- Analyze the provided search result previews (titles + snippets) to determine if they collectively contain enough information to provide a complete and accurate answer to the user's query
- Consider the chat history context when making your decision
+- Analyze the provided search result previews (titles + snippets), and chat history context to determine if they collectively contain enough information to provide a complete and accurate answer to the user's query
 - You must make a binary decision: either the preview content is sufficient OR it is not sufficient
 - If the preview content can provide a complete answer to the query, respond with "sufficient"
 - If the preview content lacks important details, requires deeper analysis, or cannot fully answer the query, respond with "not_needed: [specific reason why full content analysis is required]"
 - Be specific in your reasoning when the content is not sufficient
- Consider query complexity: simple factual questions may be answerable from snippets, while complex research questions typically need full content
- Consider information completeness: if key details are missing from the snippets that would be needed for a complete answer, full analysis is required
 - Output your decision inside a \`decision\` XML tag

+# Information Context:
 Today's date is ${formatDateForLLM(new Date())}

 # Chat History Context:
@ -79,7 +81,7 @@ ${query}

 # Search Result Previews to Analyze:
 ${formattedPreviewContent}
-      `,
+`,
      { signal },
    );

--- a/src/lib/utils/contentUtils.ts
+++ b/src/lib/utils/contentUtils.ts
@ -0,0 +1,10 @@
+/**
+ * Removes all content within <think>...</think> blocks
+ * @param text The input text containing thinking blocks
+ * @returns The text with all thinking blocks removed
+ */
+export const removeThinkingBlocks = (text: string): string => {
+  // Use regex to identify and remove all <think>...</think> blocks
+  // Using the 's' flag to make dot match newlines
+  return text.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
+};
--- a/src/lib/utils/modelUtils.ts
+++ b/src/lib/utils/modelUtils.ts
@ -50,3 +50,14 @@ export function getModelName(llm: BaseChatModel): string {
    return 'Unknown';
  }
 }
+
+export function setTemperature(llm: BaseChatModel, temperature?: number) {
+  try {
+    // @ts-ignore - Different LLM implementations have different properties
+    if ('temperature' in llm) {
+      (llm as any).temperature = temperature;
+    }
+  } catch (e) {
+    console.error('Failed to set temperature:', e);
+  }
+}