feat(ImageSearch): Implement image search tool and integrate with existing agent tools

This commit is contained in:
Willie Zutz 2025-08-13 23:51:58 -06:00
parent 24ec6f0a5f
commit a0bc5401e9
5 changed files with 181 additions and 5 deletions

View file

@ -9,6 +9,7 @@ import {
FileText,
Globe,
Settings,
Image as ImageIcon,
} from 'lucide-react';
import Markdown, { MarkdownToJSX } from 'markdown-to-jsx';
import { useEffect, useState } from 'react';
@ -20,6 +21,7 @@ import {
import ThinkBox from './ThinkBox';
import { Document } from '@langchain/core/documents';
import CitationLink from './CitationLink';
import { decodeHtmlEntities } from '@/lib/utils/html';
// Helper functions for think overlay
const extractThinkContent = (content: string): string | null => {
@ -87,6 +89,9 @@ const ToolCall = ({
case 'url':
case 'url_summarization':
return <Globe size={16} className="text-purple-600" />;
case 'image':
case 'image_search':
return <ImageIcon size={16} className="text-blue-600" />;
default:
return <Settings size={16} className="text-fg/70" />;
}
@ -99,7 +104,7 @@ const ToolCall = ({
<span className="mr-2">{getIcon(type)}</span>
<span>Web search:</span>
<span className="ml-2 px-2 py-0.5 bg-fg/5 rounded font-mono text-sm">
{query || children}
{decodeHtmlEntities(query || (children as string))}
</span>
</>
);
@ -111,7 +116,7 @@ const ToolCall = ({
<span className="mr-2">{getIcon(type)}</span>
<span>File search:</span>
<span className="ml-2 px-2 py-0.5 bg-fg/5 rounded font-mono text-sm">
{query || children}
{decodeHtmlEntities(query || (children as string))}
</span>
</>
);
@ -130,6 +135,18 @@ const ToolCall = ({
);
}
if (type === 'image' || type === 'image_search') {
return (
<>
<span className="mr-2">{getIcon(type)}</span>
<span>Image search:</span>
<span className="ml-2 px-2 py-0.5 bg-fg/5 rounded font-mono text-sm">
{decodeHtmlEntities(query || (children as string))}
</span>
</>
);
}
// Fallback for unknown tool types
return (
<>

View file

@ -23,6 +23,7 @@ import {
removeThinkingBlocksFromMessages,
} from '../utils/contentUtils';
import { getLangfuseCallbacks } from '@/lib/tracing/langfuse';
import { encodeHtmlAttribute } from '@/lib/utils/html';
/**
* Normalize usage metadata from different LLM providers
@ -360,10 +361,16 @@ Your task is to provide answers that are:
- Do not simulate searches, utilize the web search tool directly
${alwaysSearchInstruction}
${explicitUrlInstruction}
2.1. **Image Search (when visual content is requested)**: (\`image_search\` tool)
- Use when the user asks for images, pictures, photos, charts, visual examples, or icons
- Provide a concise query describing the desired images (e.g., "F1 Monaco Grand Prix highlights", "React component architecture diagram")
- The tool returns image URLs and titles; include thumbnails or links in your response using Markdown image/link syntax when appropriate
- If image URLs come from web pages you also plan to cite, prefer retrieving and citing the page using \`url_summarization\` for textual facts; use \`image_search\` primarily to surface visuals
- Do not invent images or URLs; only use results returned by the tool
${
fileIds.length > 0
? `
2.1. **File Search**: (\`file_search\` tool) Search through uploaded documents when relevant
2.2. **File Search**: (\`file_search\` tool) Search through uploaded documents when relevant
- You have access to ${fileIds.length} uploaded file${fileIds.length === 1 ? '' : 's'} that may contain relevant information
- Use the file search tool to find specific information in the uploaded documents
- Give the file search tool a specific question or topic to extract from the documents
@ -657,10 +664,10 @@ Use all available tools strategically to provide comprehensive, well-researched,
let toolMarkdown = '';
switch (toolName) {
case 'web_search':
toolMarkdown = `<ToolCall type="search" query="${(toolArgs.query || 'relevant information').replace(/"/g, '&quot;')}"></ToolCall>`;
toolMarkdown = `<ToolCall type=\"search\" query=\"${encodeHtmlAttribute(toolArgs.query || 'relevant information')}\"></ToolCall>`;
break;
case 'file_search':
toolMarkdown = `<ToolCall type="file" query="${(toolArgs.query || 'relevant information').replace(/"/g, '&quot;')}"></ToolCall>`;
toolMarkdown = `<ToolCall type=\"file\" query=\"${encodeHtmlAttribute(toolArgs.query || 'relevant information')}\"></ToolCall>`;
break;
case 'url_summarization':
if (Array.isArray(toolArgs.urls)) {
@ -669,6 +676,9 @@ Use all available tools strategically to provide comprehensive, well-researched,
toolMarkdown = `<ToolCall type="url" count="1"></ToolCall>`;
}
break;
case 'image_search':
toolMarkdown = `<ToolCall type=\"image\" query=\"${encodeHtmlAttribute(toolArgs.query || 'relevant images')}\"></ToolCall>`;
break;
default:
toolMarkdown = `<ToolCall type="${toolName}"></ToolCall>`;
}

View file

@ -0,0 +1,118 @@
import { tool } from '@langchain/core/tools';
import { z } from 'zod';
import { RunnableConfig } from '@langchain/core/runnables';
import { Document } from 'langchain/document';
import { searchSearxng } from '@/lib/searxng';
import { Command, getCurrentTaskInput } from '@langchain/langgraph';
import { SimplifiedAgentStateType } from '@/lib/state/chatAgentState';
import { ToolMessage } from '@langchain/core/messages';
// Schema for image search tool input
const ImageSearchToolSchema = z.object({
query: z
.string()
.describe(
'The image search query. Provide a concise description of what images to find.',
),
maxResults: z
.number()
.optional()
.default(12)
.describe('Maximum number of image results to return.'),
});
/**
* ImageSearchTool - Performs image search via SearXNG and returns image results
*
* Responsibilities:
* 1. Execute image-specific search using image engines
* 2. Normalize results to a consistent structure
* 3. Return results as Documents in state (metadata contains image fields)
*/
export const imageSearchTool = tool(
async (
input: z.infer<typeof ImageSearchToolSchema>,
config?: RunnableConfig,
) => {
try {
const { query, maxResults = 12 } = input;
const currentState = getCurrentTaskInput() as SimplifiedAgentStateType;
let currentDocCount = currentState.relevantDocuments.length;
console.log(`ImageSearchTool: Searching images for query: "${query}"`);
const searchResults = await searchSearxng(query, {
language: 'en',
engines: ['bing images', 'google images'],
});
const images = (searchResults.results || [])
.filter((r: any) => r && r.img_src && r.url)
.slice(0, maxResults);
if (images.length === 0) {
return new Command({
update: {
messages: [
new ToolMessage({
content: 'No image results found.',
tool_call_id: (config as any)?.toolCall?.id,
}),
],
},
});
}
const documents: Document[] = images.map(
(img: any) =>
new Document({
pageContent: `${img.title || 'Image'}\n${img.url}`,
metadata: {
sourceId: ++currentDocCount,
title: img.title || 'Image',
url: img.url,
source: img.url,
img_src: img.img_src,
thumbnail: img.thumbnail || undefined,
processingType: 'image-search',
searchQuery: query,
},
}),
);
return new Command({
update: {
relevantDocuments: documents,
messages: [
new ToolMessage({
content: JSON.stringify({ images }),
tool_call_id: (config as any)?.toolCall?.id,
}),
],
},
});
} catch (error) {
console.error('ImageSearchTool: Error during image search:', error);
const errorMessage =
error instanceof Error ? error.message : 'Unknown error';
return new Command({
update: {
messages: [
new ToolMessage({
content: 'Error occurred during image search: ' + errorMessage,
tool_call_id: (config as any)?.toolCall?.id,
}),
],
},
});
}
},
{
name: 'image_search',
description:
'Searches the web for images related to a query using SearXNG and returns image URLs, titles, and sources. Use when the user asks for pictures, photos, charts, or visual examples.',
schema: ImageSearchToolSchema,
},
);

View file

@ -11,12 +11,14 @@
import { taskManagerTool } from './taskManagerTool';
import { simpleWebSearchTool } from './simpleWebSearchTool';
import { fileSearchTool } from './fileSearchTool';
import { imageSearchTool } from './imageSearchTool';
import { urlSummarizationTool } from './urlSummarizationTool';
// Export individual tools (will be uncommented as tools are implemented)
export { taskManagerTool };
export { simpleWebSearchTool };
export { fileSearchTool };
export { imageSearchTool };
// Array containing all available agent tools for the simplified chat agent
// This will be used by the createReactAgent implementation
@ -26,6 +28,7 @@ export const allAgentTools = [
simpleWebSearchTool,
fileSearchTool,
urlSummarizationTool,
imageSearchTool,
];
// Export tool categories for selective tool loading based on focus mode
@ -33,6 +36,7 @@ export const webSearchTools = [
//webSearchTool,
simpleWebSearchTool,
urlSummarizationTool,
imageSearchTool,
// analyzerTool,
// synthesizerTool,
];

27
src/lib/utils/html.ts Normal file
View file

@ -0,0 +1,27 @@
export function encodeHtmlAttribute(value: string): string {
if (!value) return '';
return value
.replaceAll('&', '&amp;')
.replaceAll('<', '&lt;')
.replaceAll('>', '&gt;')
.replaceAll('"', '&quot;')
.replaceAll("'", '&#39;');
}
export function decodeHtmlEntities(value: string): string {
if (!value) return '';
const numericDecoded = value
.replace(/&#(\d+);/g, (_, dec) => String.fromCharCode(parseInt(dec, 10)))
.replace(/&#x([\da-fA-F]+);/g, (_, hex) =>
String.fromCharCode(parseInt(hex, 16)),
);
return numericDecoded
.replaceAll('&quot;', '"')
.replaceAll('&apos;', "'")
.replaceAll('&#39;', "'")
.replaceAll('&lt;', '<')
.replaceAll('&gt;', '>')
.replaceAll('&amp;', '&');
}