feat(ImageSearch): Implement image search tool and integrate with existing agent tools

This commit is contained in:
Willie Zutz 2025-08-13 23:51:58 -06:00
parent 24ec6f0a5f
commit a0bc5401e9
5 changed files with 181 additions and 5 deletions

View file

@ -9,6 +9,7 @@ import {
FileText, FileText,
Globe, Globe,
Settings, Settings,
Image as ImageIcon,
} from 'lucide-react'; } from 'lucide-react';
import Markdown, { MarkdownToJSX } from 'markdown-to-jsx'; import Markdown, { MarkdownToJSX } from 'markdown-to-jsx';
import { useEffect, useState } from 'react'; import { useEffect, useState } from 'react';
@ -20,6 +21,7 @@ import {
import ThinkBox from './ThinkBox'; import ThinkBox from './ThinkBox';
import { Document } from '@langchain/core/documents'; import { Document } from '@langchain/core/documents';
import CitationLink from './CitationLink'; import CitationLink from './CitationLink';
import { decodeHtmlEntities } from '@/lib/utils/html';
// Helper functions for think overlay // Helper functions for think overlay
const extractThinkContent = (content: string): string | null => { const extractThinkContent = (content: string): string | null => {
@ -87,6 +89,9 @@ const ToolCall = ({
case 'url': case 'url':
case 'url_summarization': case 'url_summarization':
return <Globe size={16} className="text-purple-600" />; return <Globe size={16} className="text-purple-600" />;
case 'image':
case 'image_search':
return <ImageIcon size={16} className="text-blue-600" />;
default: default:
return <Settings size={16} className="text-fg/70" />; return <Settings size={16} className="text-fg/70" />;
} }
@ -99,7 +104,7 @@ const ToolCall = ({
<span className="mr-2">{getIcon(type)}</span> <span className="mr-2">{getIcon(type)}</span>
<span>Web search:</span> <span>Web search:</span>
<span className="ml-2 px-2 py-0.5 bg-fg/5 rounded font-mono text-sm"> <span className="ml-2 px-2 py-0.5 bg-fg/5 rounded font-mono text-sm">
{query || children} {decodeHtmlEntities(query || (children as string))}
</span> </span>
</> </>
); );
@ -111,7 +116,7 @@ const ToolCall = ({
<span className="mr-2">{getIcon(type)}</span> <span className="mr-2">{getIcon(type)}</span>
<span>File search:</span> <span>File search:</span>
<span className="ml-2 px-2 py-0.5 bg-fg/5 rounded font-mono text-sm"> <span className="ml-2 px-2 py-0.5 bg-fg/5 rounded font-mono text-sm">
{query || children} {decodeHtmlEntities(query || (children as string))}
</span> </span>
</> </>
); );
@ -130,6 +135,18 @@ const ToolCall = ({
); );
} }
if (type === 'image' || type === 'image_search') {
return (
<>
<span className="mr-2">{getIcon(type)}</span>
<span>Image search:</span>
<span className="ml-2 px-2 py-0.5 bg-fg/5 rounded font-mono text-sm">
{decodeHtmlEntities(query || (children as string))}
</span>
</>
);
}
// Fallback for unknown tool types // Fallback for unknown tool types
return ( return (
<> <>

View file

@ -23,6 +23,7 @@ import {
removeThinkingBlocksFromMessages, removeThinkingBlocksFromMessages,
} from '../utils/contentUtils'; } from '../utils/contentUtils';
import { getLangfuseCallbacks } from '@/lib/tracing/langfuse'; import { getLangfuseCallbacks } from '@/lib/tracing/langfuse';
import { encodeHtmlAttribute } from '@/lib/utils/html';
/** /**
* Normalize usage metadata from different LLM providers * Normalize usage metadata from different LLM providers
@ -360,10 +361,16 @@ Your task is to provide answers that are:
- Do not simulate searches, utilize the web search tool directly - Do not simulate searches, utilize the web search tool directly
${alwaysSearchInstruction} ${alwaysSearchInstruction}
${explicitUrlInstruction} ${explicitUrlInstruction}
2.1. **Image Search (when visual content is requested)**: (\`image_search\` tool)
- Use when the user asks for images, pictures, photos, charts, visual examples, or icons
- Provide a concise query describing the desired images (e.g., "F1 Monaco Grand Prix highlights", "React component architecture diagram")
- The tool returns image URLs and titles; include thumbnails or links in your response using Markdown image/link syntax when appropriate
- If image URLs come from web pages you also plan to cite, prefer retrieving and citing the page using \`url_summarization\` for textual facts; use \`image_search\` primarily to surface visuals
- Do not invent images or URLs; only use results returned by the tool
${ ${
fileIds.length > 0 fileIds.length > 0
? ` ? `
2.1. **File Search**: (\`file_search\` tool) Search through uploaded documents when relevant 2.2. **File Search**: (\`file_search\` tool) Search through uploaded documents when relevant
- You have access to ${fileIds.length} uploaded file${fileIds.length === 1 ? '' : 's'} that may contain relevant information - You have access to ${fileIds.length} uploaded file${fileIds.length === 1 ? '' : 's'} that may contain relevant information
- Use the file search tool to find specific information in the uploaded documents - Use the file search tool to find specific information in the uploaded documents
- Give the file search tool a specific question or topic to extract from the documents - Give the file search tool a specific question or topic to extract from the documents
@ -657,10 +664,10 @@ Use all available tools strategically to provide comprehensive, well-researched,
let toolMarkdown = ''; let toolMarkdown = '';
switch (toolName) { switch (toolName) {
case 'web_search': case 'web_search':
toolMarkdown = `<ToolCall type="search" query="${(toolArgs.query || 'relevant information').replace(/"/g, '&quot;')}"></ToolCall>`; toolMarkdown = `<ToolCall type=\"search\" query=\"${encodeHtmlAttribute(toolArgs.query || 'relevant information')}\"></ToolCall>`;
break; break;
case 'file_search': case 'file_search':
toolMarkdown = `<ToolCall type="file" query="${(toolArgs.query || 'relevant information').replace(/"/g, '&quot;')}"></ToolCall>`; toolMarkdown = `<ToolCall type=\"file\" query=\"${encodeHtmlAttribute(toolArgs.query || 'relevant information')}\"></ToolCall>`;
break; break;
case 'url_summarization': case 'url_summarization':
if (Array.isArray(toolArgs.urls)) { if (Array.isArray(toolArgs.urls)) {
@ -669,6 +676,9 @@ Use all available tools strategically to provide comprehensive, well-researched,
toolMarkdown = `<ToolCall type="url" count="1"></ToolCall>`; toolMarkdown = `<ToolCall type="url" count="1"></ToolCall>`;
} }
break; break;
case 'image_search':
toolMarkdown = `<ToolCall type=\"image\" query=\"${encodeHtmlAttribute(toolArgs.query || 'relevant images')}\"></ToolCall>`;
break;
default: default:
toolMarkdown = `<ToolCall type="${toolName}"></ToolCall>`; toolMarkdown = `<ToolCall type="${toolName}"></ToolCall>`;
} }

View file

@ -0,0 +1,118 @@
import { tool } from '@langchain/core/tools';
import { z } from 'zod';
import { RunnableConfig } from '@langchain/core/runnables';
import { Document } from 'langchain/document';
import { searchSearxng } from '@/lib/searxng';
import { Command, getCurrentTaskInput } from '@langchain/langgraph';
import { SimplifiedAgentStateType } from '@/lib/state/chatAgentState';
import { ToolMessage } from '@langchain/core/messages';
// Schema for image search tool input
const ImageSearchToolSchema = z.object({
query: z
.string()
.describe(
'The image search query. Provide a concise description of what images to find.',
),
maxResults: z
.number()
.optional()
.default(12)
.describe('Maximum number of image results to return.'),
});
/**
* ImageSearchTool - Performs image search via SearXNG and returns image results
*
* Responsibilities:
* 1. Execute image-specific search using image engines
* 2. Normalize results to a consistent structure
* 3. Return results as Documents in state (metadata contains image fields)
*/
export const imageSearchTool = tool(
async (
input: z.infer<typeof ImageSearchToolSchema>,
config?: RunnableConfig,
) => {
try {
const { query, maxResults = 12 } = input;
const currentState = getCurrentTaskInput() as SimplifiedAgentStateType;
let currentDocCount = currentState.relevantDocuments.length;
console.log(`ImageSearchTool: Searching images for query: "${query}"`);
const searchResults = await searchSearxng(query, {
language: 'en',
engines: ['bing images', 'google images'],
});
const images = (searchResults.results || [])
.filter((r: any) => r && r.img_src && r.url)
.slice(0, maxResults);
if (images.length === 0) {
return new Command({
update: {
messages: [
new ToolMessage({
content: 'No image results found.',
tool_call_id: (config as any)?.toolCall?.id,
}),
],
},
});
}
const documents: Document[] = images.map(
(img: any) =>
new Document({
pageContent: `${img.title || 'Image'}\n${img.url}`,
metadata: {
sourceId: ++currentDocCount,
title: img.title || 'Image',
url: img.url,
source: img.url,
img_src: img.img_src,
thumbnail: img.thumbnail || undefined,
processingType: 'image-search',
searchQuery: query,
},
}),
);
return new Command({
update: {
relevantDocuments: documents,
messages: [
new ToolMessage({
content: JSON.stringify({ images }),
tool_call_id: (config as any)?.toolCall?.id,
}),
],
},
});
} catch (error) {
console.error('ImageSearchTool: Error during image search:', error);
const errorMessage =
error instanceof Error ? error.message : 'Unknown error';
return new Command({
update: {
messages: [
new ToolMessage({
content: 'Error occurred during image search: ' + errorMessage,
tool_call_id: (config as any)?.toolCall?.id,
}),
],
},
});
}
},
{
name: 'image_search',
description:
'Searches the web for images related to a query using SearXNG and returns image URLs, titles, and sources. Use when the user asks for pictures, photos, charts, or visual examples.',
schema: ImageSearchToolSchema,
},
);

View file

@ -11,12 +11,14 @@
import { taskManagerTool } from './taskManagerTool'; import { taskManagerTool } from './taskManagerTool';
import { simpleWebSearchTool } from './simpleWebSearchTool'; import { simpleWebSearchTool } from './simpleWebSearchTool';
import { fileSearchTool } from './fileSearchTool'; import { fileSearchTool } from './fileSearchTool';
import { imageSearchTool } from './imageSearchTool';
import { urlSummarizationTool } from './urlSummarizationTool'; import { urlSummarizationTool } from './urlSummarizationTool';
// Export individual tools (will be uncommented as tools are implemented) // Export individual tools (will be uncommented as tools are implemented)
export { taskManagerTool }; export { taskManagerTool };
export { simpleWebSearchTool }; export { simpleWebSearchTool };
export { fileSearchTool }; export { fileSearchTool };
export { imageSearchTool };
// Array containing all available agent tools for the simplified chat agent // Array containing all available agent tools for the simplified chat agent
// This will be used by the createReactAgent implementation // This will be used by the createReactAgent implementation
@ -26,6 +28,7 @@ export const allAgentTools = [
simpleWebSearchTool, simpleWebSearchTool,
fileSearchTool, fileSearchTool,
urlSummarizationTool, urlSummarizationTool,
imageSearchTool,
]; ];
// Export tool categories for selective tool loading based on focus mode // Export tool categories for selective tool loading based on focus mode
@ -33,6 +36,7 @@ export const webSearchTools = [
//webSearchTool, //webSearchTool,
simpleWebSearchTool, simpleWebSearchTool,
urlSummarizationTool, urlSummarizationTool,
imageSearchTool,
// analyzerTool, // analyzerTool,
// synthesizerTool, // synthesizerTool,
]; ];

27
src/lib/utils/html.ts Normal file
View file

@ -0,0 +1,27 @@
export function encodeHtmlAttribute(value: string): string {
if (!value) return '';
return value
.replaceAll('&', '&amp;')
.replaceAll('<', '&lt;')
.replaceAll('>', '&gt;')
.replaceAll('"', '&quot;')
.replaceAll("'", '&#39;');
}
export function decodeHtmlEntities(value: string): string {
if (!value) return '';
const numericDecoded = value
.replace(/&#(\d+);/g, (_, dec) => String.fromCharCode(parseInt(dec, 10)))
.replace(/&#x([\da-fA-F]+);/g, (_, hex) =>
String.fromCharCode(parseInt(hex, 16)),
);
return numericDecoded
.replaceAll('&quot;', '"')
.replaceAll('&apos;', "'")
.replaceAll('&#39;', "'")
.replaceAll('&lt;', '<')
.replaceAll('&gt;', '>')
.replaceAll('&amp;', '&');
}