feat(ImageSearch): Implement image search tool and integrate with existing agent tools
This commit is contained in:
parent
24ec6f0a5f
commit
a0bc5401e9
5 changed files with 181 additions and 5 deletions
|
|
@ -9,6 +9,7 @@ import {
|
|||
FileText,
|
||||
Globe,
|
||||
Settings,
|
||||
Image as ImageIcon,
|
||||
} from 'lucide-react';
|
||||
import Markdown, { MarkdownToJSX } from 'markdown-to-jsx';
|
||||
import { useEffect, useState } from 'react';
|
||||
|
|
@ -20,6 +21,7 @@ import {
|
|||
import ThinkBox from './ThinkBox';
|
||||
import { Document } from '@langchain/core/documents';
|
||||
import CitationLink from './CitationLink';
|
||||
import { decodeHtmlEntities } from '@/lib/utils/html';
|
||||
|
||||
// Helper functions for think overlay
|
||||
const extractThinkContent = (content: string): string | null => {
|
||||
|
|
@ -87,6 +89,9 @@ const ToolCall = ({
|
|||
case 'url':
|
||||
case 'url_summarization':
|
||||
return <Globe size={16} className="text-purple-600" />;
|
||||
case 'image':
|
||||
case 'image_search':
|
||||
return <ImageIcon size={16} className="text-blue-600" />;
|
||||
default:
|
||||
return <Settings size={16} className="text-fg/70" />;
|
||||
}
|
||||
|
|
@ -99,7 +104,7 @@ const ToolCall = ({
|
|||
<span className="mr-2">{getIcon(type)}</span>
|
||||
<span>Web search:</span>
|
||||
<span className="ml-2 px-2 py-0.5 bg-fg/5 rounded font-mono text-sm">
|
||||
{query || children}
|
||||
{decodeHtmlEntities(query || (children as string))}
|
||||
</span>
|
||||
</>
|
||||
);
|
||||
|
|
@ -111,7 +116,7 @@ const ToolCall = ({
|
|||
<span className="mr-2">{getIcon(type)}</span>
|
||||
<span>File search:</span>
|
||||
<span className="ml-2 px-2 py-0.5 bg-fg/5 rounded font-mono text-sm">
|
||||
{query || children}
|
||||
{decodeHtmlEntities(query || (children as string))}
|
||||
</span>
|
||||
</>
|
||||
);
|
||||
|
|
@ -130,6 +135,18 @@ const ToolCall = ({
|
|||
);
|
||||
}
|
||||
|
||||
if (type === 'image' || type === 'image_search') {
|
||||
return (
|
||||
<>
|
||||
<span className="mr-2">{getIcon(type)}</span>
|
||||
<span>Image search:</span>
|
||||
<span className="ml-2 px-2 py-0.5 bg-fg/5 rounded font-mono text-sm">
|
||||
{decodeHtmlEntities(query || (children as string))}
|
||||
</span>
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
// Fallback for unknown tool types
|
||||
return (
|
||||
<>
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ import {
|
|||
removeThinkingBlocksFromMessages,
|
||||
} from '../utils/contentUtils';
|
||||
import { getLangfuseCallbacks } from '@/lib/tracing/langfuse';
|
||||
import { encodeHtmlAttribute } from '@/lib/utils/html';
|
||||
|
||||
/**
|
||||
* Normalize usage metadata from different LLM providers
|
||||
|
|
@ -360,10 +361,16 @@ Your task is to provide answers that are:
|
|||
- Do not simulate searches, utilize the web search tool directly
|
||||
${alwaysSearchInstruction}
|
||||
${explicitUrlInstruction}
|
||||
2.1. **Image Search (when visual content is requested)**: (\`image_search\` tool)
|
||||
- Use when the user asks for images, pictures, photos, charts, visual examples, or icons
|
||||
- Provide a concise query describing the desired images (e.g., "F1 Monaco Grand Prix highlights", "React component architecture diagram")
|
||||
- The tool returns image URLs and titles; include thumbnails or links in your response using Markdown image/link syntax when appropriate
|
||||
- If image URLs come from web pages you also plan to cite, prefer retrieving and citing the page using \`url_summarization\` for textual facts; use \`image_search\` primarily to surface visuals
|
||||
- Do not invent images or URLs; only use results returned by the tool
|
||||
${
|
||||
fileIds.length > 0
|
||||
? `
|
||||
2.1. **File Search**: (\`file_search\` tool) Search through uploaded documents when relevant
|
||||
2.2. **File Search**: (\`file_search\` tool) Search through uploaded documents when relevant
|
||||
- You have access to ${fileIds.length} uploaded file${fileIds.length === 1 ? '' : 's'} that may contain relevant information
|
||||
- Use the file search tool to find specific information in the uploaded documents
|
||||
- Give the file search tool a specific question or topic to extract from the documents
|
||||
|
|
@ -657,10 +664,10 @@ Use all available tools strategically to provide comprehensive, well-researched,
|
|||
let toolMarkdown = '';
|
||||
switch (toolName) {
|
||||
case 'web_search':
|
||||
toolMarkdown = `<ToolCall type="search" query="${(toolArgs.query || 'relevant information').replace(/"/g, '"')}"></ToolCall>`;
|
||||
toolMarkdown = `<ToolCall type=\"search\" query=\"${encodeHtmlAttribute(toolArgs.query || 'relevant information')}\"></ToolCall>`;
|
||||
break;
|
||||
case 'file_search':
|
||||
toolMarkdown = `<ToolCall type="file" query="${(toolArgs.query || 'relevant information').replace(/"/g, '"')}"></ToolCall>`;
|
||||
toolMarkdown = `<ToolCall type=\"file\" query=\"${encodeHtmlAttribute(toolArgs.query || 'relevant information')}\"></ToolCall>`;
|
||||
break;
|
||||
case 'url_summarization':
|
||||
if (Array.isArray(toolArgs.urls)) {
|
||||
|
|
@ -669,6 +676,9 @@ Use all available tools strategically to provide comprehensive, well-researched,
|
|||
toolMarkdown = `<ToolCall type="url" count="1"></ToolCall>`;
|
||||
}
|
||||
break;
|
||||
case 'image_search':
|
||||
toolMarkdown = `<ToolCall type=\"image\" query=\"${encodeHtmlAttribute(toolArgs.query || 'relevant images')}\"></ToolCall>`;
|
||||
break;
|
||||
default:
|
||||
toolMarkdown = `<ToolCall type="${toolName}"></ToolCall>`;
|
||||
}
|
||||
|
|
|
|||
118
src/lib/tools/agents/imageSearchTool.ts
Normal file
118
src/lib/tools/agents/imageSearchTool.ts
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
import { tool } from '@langchain/core/tools';
|
||||
import { z } from 'zod';
|
||||
import { RunnableConfig } from '@langchain/core/runnables';
|
||||
import { Document } from 'langchain/document';
|
||||
import { searchSearxng } from '@/lib/searxng';
|
||||
import { Command, getCurrentTaskInput } from '@langchain/langgraph';
|
||||
import { SimplifiedAgentStateType } from '@/lib/state/chatAgentState';
|
||||
import { ToolMessage } from '@langchain/core/messages';
|
||||
|
||||
// Schema for image search tool input
|
||||
const ImageSearchToolSchema = z.object({
|
||||
query: z
|
||||
.string()
|
||||
.describe(
|
||||
'The image search query. Provide a concise description of what images to find.',
|
||||
),
|
||||
maxResults: z
|
||||
.number()
|
||||
.optional()
|
||||
.default(12)
|
||||
.describe('Maximum number of image results to return.'),
|
||||
});
|
||||
|
||||
/**
|
||||
* ImageSearchTool - Performs image search via SearXNG and returns image results
|
||||
*
|
||||
* Responsibilities:
|
||||
* 1. Execute image-specific search using image engines
|
||||
* 2. Normalize results to a consistent structure
|
||||
* 3. Return results as Documents in state (metadata contains image fields)
|
||||
*/
|
||||
export const imageSearchTool = tool(
|
||||
async (
|
||||
input: z.infer<typeof ImageSearchToolSchema>,
|
||||
config?: RunnableConfig,
|
||||
) => {
|
||||
try {
|
||||
const { query, maxResults = 12 } = input;
|
||||
|
||||
const currentState = getCurrentTaskInput() as SimplifiedAgentStateType;
|
||||
let currentDocCount = currentState.relevantDocuments.length;
|
||||
|
||||
console.log(`ImageSearchTool: Searching images for query: "${query}"`);
|
||||
|
||||
const searchResults = await searchSearxng(query, {
|
||||
language: 'en',
|
||||
engines: ['bing images', 'google images'],
|
||||
});
|
||||
|
||||
const images = (searchResults.results || [])
|
||||
.filter((r: any) => r && r.img_src && r.url)
|
||||
.slice(0, maxResults);
|
||||
|
||||
if (images.length === 0) {
|
||||
return new Command({
|
||||
update: {
|
||||
messages: [
|
||||
new ToolMessage({
|
||||
content: 'No image results found.',
|
||||
tool_call_id: (config as any)?.toolCall?.id,
|
||||
}),
|
||||
],
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const documents: Document[] = images.map(
|
||||
(img: any) =>
|
||||
new Document({
|
||||
pageContent: `${img.title || 'Image'}\n${img.url}`,
|
||||
metadata: {
|
||||
sourceId: ++currentDocCount,
|
||||
title: img.title || 'Image',
|
||||
url: img.url,
|
||||
source: img.url,
|
||||
img_src: img.img_src,
|
||||
thumbnail: img.thumbnail || undefined,
|
||||
processingType: 'image-search',
|
||||
searchQuery: query,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
return new Command({
|
||||
update: {
|
||||
relevantDocuments: documents,
|
||||
messages: [
|
||||
new ToolMessage({
|
||||
content: JSON.stringify({ images }),
|
||||
tool_call_id: (config as any)?.toolCall?.id,
|
||||
}),
|
||||
],
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('ImageSearchTool: Error during image search:', error);
|
||||
const errorMessage =
|
||||
error instanceof Error ? error.message : 'Unknown error';
|
||||
|
||||
return new Command({
|
||||
update: {
|
||||
messages: [
|
||||
new ToolMessage({
|
||||
content: 'Error occurred during image search: ' + errorMessage,
|
||||
tool_call_id: (config as any)?.toolCall?.id,
|
||||
}),
|
||||
],
|
||||
},
|
||||
});
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'image_search',
|
||||
description:
|
||||
'Searches the web for images related to a query using SearXNG and returns image URLs, titles, and sources. Use when the user asks for pictures, photos, charts, or visual examples.',
|
||||
schema: ImageSearchToolSchema,
|
||||
},
|
||||
);
|
||||
|
|
@ -11,12 +11,14 @@
|
|||
import { taskManagerTool } from './taskManagerTool';
|
||||
import { simpleWebSearchTool } from './simpleWebSearchTool';
|
||||
import { fileSearchTool } from './fileSearchTool';
|
||||
import { imageSearchTool } from './imageSearchTool';
|
||||
import { urlSummarizationTool } from './urlSummarizationTool';
|
||||
|
||||
// Export individual tools (will be uncommented as tools are implemented)
|
||||
export { taskManagerTool };
|
||||
export { simpleWebSearchTool };
|
||||
export { fileSearchTool };
|
||||
export { imageSearchTool };
|
||||
|
||||
// Array containing all available agent tools for the simplified chat agent
|
||||
// This will be used by the createReactAgent implementation
|
||||
|
|
@ -26,6 +28,7 @@ export const allAgentTools = [
|
|||
simpleWebSearchTool,
|
||||
fileSearchTool,
|
||||
urlSummarizationTool,
|
||||
imageSearchTool,
|
||||
];
|
||||
|
||||
// Export tool categories for selective tool loading based on focus mode
|
||||
|
|
@ -33,6 +36,7 @@ export const webSearchTools = [
|
|||
//webSearchTool,
|
||||
simpleWebSearchTool,
|
||||
urlSummarizationTool,
|
||||
imageSearchTool,
|
||||
// analyzerTool,
|
||||
// synthesizerTool,
|
||||
];
|
||||
|
|
|
|||
27
src/lib/utils/html.ts
Normal file
27
src/lib/utils/html.ts
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
export function encodeHtmlAttribute(value: string): string {
|
||||
if (!value) return '';
|
||||
return value
|
||||
.replaceAll('&', '&')
|
||||
.replaceAll('<', '<')
|
||||
.replaceAll('>', '>')
|
||||
.replaceAll('"', '"')
|
||||
.replaceAll("'", ''');
|
||||
}
|
||||
|
||||
export function decodeHtmlEntities(value: string): string {
|
||||
if (!value) return '';
|
||||
|
||||
const numericDecoded = value
|
||||
.replace(/&#(\d+);/g, (_, dec) => String.fromCharCode(parseInt(dec, 10)))
|
||||
.replace(/&#x([\da-fA-F]+);/g, (_, hex) =>
|
||||
String.fromCharCode(parseInt(hex, 16)),
|
||||
);
|
||||
|
||||
return numericDecoded
|
||||
.replaceAll('"', '"')
|
||||
.replaceAll(''', "'")
|
||||
.replaceAll(''', "'")
|
||||
.replaceAll('<', '<')
|
||||
.replaceAll('>', '>')
|
||||
.replaceAll('&', '&');
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue