feat(ImageSearch): Implement image search tool and integrate with existing agent tools
This commit is contained in:
parent
24ec6f0a5f
commit
a0bc5401e9
5 changed files with 181 additions and 5 deletions
|
|
@ -9,6 +9,7 @@ import {
|
||||||
FileText,
|
FileText,
|
||||||
Globe,
|
Globe,
|
||||||
Settings,
|
Settings,
|
||||||
|
Image as ImageIcon,
|
||||||
} from 'lucide-react';
|
} from 'lucide-react';
|
||||||
import Markdown, { MarkdownToJSX } from 'markdown-to-jsx';
|
import Markdown, { MarkdownToJSX } from 'markdown-to-jsx';
|
||||||
import { useEffect, useState } from 'react';
|
import { useEffect, useState } from 'react';
|
||||||
|
|
@ -20,6 +21,7 @@ import {
|
||||||
import ThinkBox from './ThinkBox';
|
import ThinkBox from './ThinkBox';
|
||||||
import { Document } from '@langchain/core/documents';
|
import { Document } from '@langchain/core/documents';
|
||||||
import CitationLink from './CitationLink';
|
import CitationLink from './CitationLink';
|
||||||
|
import { decodeHtmlEntities } from '@/lib/utils/html';
|
||||||
|
|
||||||
// Helper functions for think overlay
|
// Helper functions for think overlay
|
||||||
const extractThinkContent = (content: string): string | null => {
|
const extractThinkContent = (content: string): string | null => {
|
||||||
|
|
@ -87,6 +89,9 @@ const ToolCall = ({
|
||||||
case 'url':
|
case 'url':
|
||||||
case 'url_summarization':
|
case 'url_summarization':
|
||||||
return <Globe size={16} className="text-purple-600" />;
|
return <Globe size={16} className="text-purple-600" />;
|
||||||
|
case 'image':
|
||||||
|
case 'image_search':
|
||||||
|
return <ImageIcon size={16} className="text-blue-600" />;
|
||||||
default:
|
default:
|
||||||
return <Settings size={16} className="text-fg/70" />;
|
return <Settings size={16} className="text-fg/70" />;
|
||||||
}
|
}
|
||||||
|
|
@ -99,7 +104,7 @@ const ToolCall = ({
|
||||||
<span className="mr-2">{getIcon(type)}</span>
|
<span className="mr-2">{getIcon(type)}</span>
|
||||||
<span>Web search:</span>
|
<span>Web search:</span>
|
||||||
<span className="ml-2 px-2 py-0.5 bg-fg/5 rounded font-mono text-sm">
|
<span className="ml-2 px-2 py-0.5 bg-fg/5 rounded font-mono text-sm">
|
||||||
{query || children}
|
{decodeHtmlEntities(query || (children as string))}
|
||||||
</span>
|
</span>
|
||||||
</>
|
</>
|
||||||
);
|
);
|
||||||
|
|
@ -111,7 +116,7 @@ const ToolCall = ({
|
||||||
<span className="mr-2">{getIcon(type)}</span>
|
<span className="mr-2">{getIcon(type)}</span>
|
||||||
<span>File search:</span>
|
<span>File search:</span>
|
||||||
<span className="ml-2 px-2 py-0.5 bg-fg/5 rounded font-mono text-sm">
|
<span className="ml-2 px-2 py-0.5 bg-fg/5 rounded font-mono text-sm">
|
||||||
{query || children}
|
{decodeHtmlEntities(query || (children as string))}
|
||||||
</span>
|
</span>
|
||||||
</>
|
</>
|
||||||
);
|
);
|
||||||
|
|
@ -130,6 +135,18 @@ const ToolCall = ({
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (type === 'image' || type === 'image_search') {
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
<span className="mr-2">{getIcon(type)}</span>
|
||||||
|
<span>Image search:</span>
|
||||||
|
<span className="ml-2 px-2 py-0.5 bg-fg/5 rounded font-mono text-sm">
|
||||||
|
{decodeHtmlEntities(query || (children as string))}
|
||||||
|
</span>
|
||||||
|
</>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// Fallback for unknown tool types
|
// Fallback for unknown tool types
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,7 @@ import {
|
||||||
removeThinkingBlocksFromMessages,
|
removeThinkingBlocksFromMessages,
|
||||||
} from '../utils/contentUtils';
|
} from '../utils/contentUtils';
|
||||||
import { getLangfuseCallbacks } from '@/lib/tracing/langfuse';
|
import { getLangfuseCallbacks } from '@/lib/tracing/langfuse';
|
||||||
|
import { encodeHtmlAttribute } from '@/lib/utils/html';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Normalize usage metadata from different LLM providers
|
* Normalize usage metadata from different LLM providers
|
||||||
|
|
@ -360,10 +361,16 @@ Your task is to provide answers that are:
|
||||||
- Do not simulate searches, utilize the web search tool directly
|
- Do not simulate searches, utilize the web search tool directly
|
||||||
${alwaysSearchInstruction}
|
${alwaysSearchInstruction}
|
||||||
${explicitUrlInstruction}
|
${explicitUrlInstruction}
|
||||||
|
2.1. **Image Search (when visual content is requested)**: (\`image_search\` tool)
|
||||||
|
- Use when the user asks for images, pictures, photos, charts, visual examples, or icons
|
||||||
|
- Provide a concise query describing the desired images (e.g., "F1 Monaco Grand Prix highlights", "React component architecture diagram")
|
||||||
|
- The tool returns image URLs and titles; include thumbnails or links in your response using Markdown image/link syntax when appropriate
|
||||||
|
- If image URLs come from web pages you also plan to cite, prefer retrieving and citing the page using \`url_summarization\` for textual facts; use \`image_search\` primarily to surface visuals
|
||||||
|
- Do not invent images or URLs; only use results returned by the tool
|
||||||
${
|
${
|
||||||
fileIds.length > 0
|
fileIds.length > 0
|
||||||
? `
|
? `
|
||||||
2.1. **File Search**: (\`file_search\` tool) Search through uploaded documents when relevant
|
2.2. **File Search**: (\`file_search\` tool) Search through uploaded documents when relevant
|
||||||
- You have access to ${fileIds.length} uploaded file${fileIds.length === 1 ? '' : 's'} that may contain relevant information
|
- You have access to ${fileIds.length} uploaded file${fileIds.length === 1 ? '' : 's'} that may contain relevant information
|
||||||
- Use the file search tool to find specific information in the uploaded documents
|
- Use the file search tool to find specific information in the uploaded documents
|
||||||
- Give the file search tool a specific question or topic to extract from the documents
|
- Give the file search tool a specific question or topic to extract from the documents
|
||||||
|
|
@ -657,10 +664,10 @@ Use all available tools strategically to provide comprehensive, well-researched,
|
||||||
let toolMarkdown = '';
|
let toolMarkdown = '';
|
||||||
switch (toolName) {
|
switch (toolName) {
|
||||||
case 'web_search':
|
case 'web_search':
|
||||||
toolMarkdown = `<ToolCall type="search" query="${(toolArgs.query || 'relevant information').replace(/"/g, '"')}"></ToolCall>`;
|
toolMarkdown = `<ToolCall type=\"search\" query=\"${encodeHtmlAttribute(toolArgs.query || 'relevant information')}\"></ToolCall>`;
|
||||||
break;
|
break;
|
||||||
case 'file_search':
|
case 'file_search':
|
||||||
toolMarkdown = `<ToolCall type="file" query="${(toolArgs.query || 'relevant information').replace(/"/g, '"')}"></ToolCall>`;
|
toolMarkdown = `<ToolCall type=\"file\" query=\"${encodeHtmlAttribute(toolArgs.query || 'relevant information')}\"></ToolCall>`;
|
||||||
break;
|
break;
|
||||||
case 'url_summarization':
|
case 'url_summarization':
|
||||||
if (Array.isArray(toolArgs.urls)) {
|
if (Array.isArray(toolArgs.urls)) {
|
||||||
|
|
@ -669,6 +676,9 @@ Use all available tools strategically to provide comprehensive, well-researched,
|
||||||
toolMarkdown = `<ToolCall type="url" count="1"></ToolCall>`;
|
toolMarkdown = `<ToolCall type="url" count="1"></ToolCall>`;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case 'image_search':
|
||||||
|
toolMarkdown = `<ToolCall type=\"image\" query=\"${encodeHtmlAttribute(toolArgs.query || 'relevant images')}\"></ToolCall>`;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
toolMarkdown = `<ToolCall type="${toolName}"></ToolCall>`;
|
toolMarkdown = `<ToolCall type="${toolName}"></ToolCall>`;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
118
src/lib/tools/agents/imageSearchTool.ts
Normal file
118
src/lib/tools/agents/imageSearchTool.ts
Normal file
|
|
@ -0,0 +1,118 @@
|
||||||
|
import { tool } from '@langchain/core/tools';
|
||||||
|
import { z } from 'zod';
|
||||||
|
import { RunnableConfig } from '@langchain/core/runnables';
|
||||||
|
import { Document } from 'langchain/document';
|
||||||
|
import { searchSearxng } from '@/lib/searxng';
|
||||||
|
import { Command, getCurrentTaskInput } from '@langchain/langgraph';
|
||||||
|
import { SimplifiedAgentStateType } from '@/lib/state/chatAgentState';
|
||||||
|
import { ToolMessage } from '@langchain/core/messages';
|
||||||
|
|
||||||
|
// Schema for image search tool input
|
||||||
|
const ImageSearchToolSchema = z.object({
|
||||||
|
query: z
|
||||||
|
.string()
|
||||||
|
.describe(
|
||||||
|
'The image search query. Provide a concise description of what images to find.',
|
||||||
|
),
|
||||||
|
maxResults: z
|
||||||
|
.number()
|
||||||
|
.optional()
|
||||||
|
.default(12)
|
||||||
|
.describe('Maximum number of image results to return.'),
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ImageSearchTool - Performs image search via SearXNG and returns image results
|
||||||
|
*
|
||||||
|
* Responsibilities:
|
||||||
|
* 1. Execute image-specific search using image engines
|
||||||
|
* 2. Normalize results to a consistent structure
|
||||||
|
* 3. Return results as Documents in state (metadata contains image fields)
|
||||||
|
*/
|
||||||
|
export const imageSearchTool = tool(
|
||||||
|
async (
|
||||||
|
input: z.infer<typeof ImageSearchToolSchema>,
|
||||||
|
config?: RunnableConfig,
|
||||||
|
) => {
|
||||||
|
try {
|
||||||
|
const { query, maxResults = 12 } = input;
|
||||||
|
|
||||||
|
const currentState = getCurrentTaskInput() as SimplifiedAgentStateType;
|
||||||
|
let currentDocCount = currentState.relevantDocuments.length;
|
||||||
|
|
||||||
|
console.log(`ImageSearchTool: Searching images for query: "${query}"`);
|
||||||
|
|
||||||
|
const searchResults = await searchSearxng(query, {
|
||||||
|
language: 'en',
|
||||||
|
engines: ['bing images', 'google images'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const images = (searchResults.results || [])
|
||||||
|
.filter((r: any) => r && r.img_src && r.url)
|
||||||
|
.slice(0, maxResults);
|
||||||
|
|
||||||
|
if (images.length === 0) {
|
||||||
|
return new Command({
|
||||||
|
update: {
|
||||||
|
messages: [
|
||||||
|
new ToolMessage({
|
||||||
|
content: 'No image results found.',
|
||||||
|
tool_call_id: (config as any)?.toolCall?.id,
|
||||||
|
}),
|
||||||
|
],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const documents: Document[] = images.map(
|
||||||
|
(img: any) =>
|
||||||
|
new Document({
|
||||||
|
pageContent: `${img.title || 'Image'}\n${img.url}`,
|
||||||
|
metadata: {
|
||||||
|
sourceId: ++currentDocCount,
|
||||||
|
title: img.title || 'Image',
|
||||||
|
url: img.url,
|
||||||
|
source: img.url,
|
||||||
|
img_src: img.img_src,
|
||||||
|
thumbnail: img.thumbnail || undefined,
|
||||||
|
processingType: 'image-search',
|
||||||
|
searchQuery: query,
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
return new Command({
|
||||||
|
update: {
|
||||||
|
relevantDocuments: documents,
|
||||||
|
messages: [
|
||||||
|
new ToolMessage({
|
||||||
|
content: JSON.stringify({ images }),
|
||||||
|
tool_call_id: (config as any)?.toolCall?.id,
|
||||||
|
}),
|
||||||
|
],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
console.error('ImageSearchTool: Error during image search:', error);
|
||||||
|
const errorMessage =
|
||||||
|
error instanceof Error ? error.message : 'Unknown error';
|
||||||
|
|
||||||
|
return new Command({
|
||||||
|
update: {
|
||||||
|
messages: [
|
||||||
|
new ToolMessage({
|
||||||
|
content: 'Error occurred during image search: ' + errorMessage,
|
||||||
|
tool_call_id: (config as any)?.toolCall?.id,
|
||||||
|
}),
|
||||||
|
],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'image_search',
|
||||||
|
description:
|
||||||
|
'Searches the web for images related to a query using SearXNG and returns image URLs, titles, and sources. Use when the user asks for pictures, photos, charts, or visual examples.',
|
||||||
|
schema: ImageSearchToolSchema,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
@ -11,12 +11,14 @@
|
||||||
import { taskManagerTool } from './taskManagerTool';
|
import { taskManagerTool } from './taskManagerTool';
|
||||||
import { simpleWebSearchTool } from './simpleWebSearchTool';
|
import { simpleWebSearchTool } from './simpleWebSearchTool';
|
||||||
import { fileSearchTool } from './fileSearchTool';
|
import { fileSearchTool } from './fileSearchTool';
|
||||||
|
import { imageSearchTool } from './imageSearchTool';
|
||||||
import { urlSummarizationTool } from './urlSummarizationTool';
|
import { urlSummarizationTool } from './urlSummarizationTool';
|
||||||
|
|
||||||
// Export individual tools (will be uncommented as tools are implemented)
|
// Export individual tools (will be uncommented as tools are implemented)
|
||||||
export { taskManagerTool };
|
export { taskManagerTool };
|
||||||
export { simpleWebSearchTool };
|
export { simpleWebSearchTool };
|
||||||
export { fileSearchTool };
|
export { fileSearchTool };
|
||||||
|
export { imageSearchTool };
|
||||||
|
|
||||||
// Array containing all available agent tools for the simplified chat agent
|
// Array containing all available agent tools for the simplified chat agent
|
||||||
// This will be used by the createReactAgent implementation
|
// This will be used by the createReactAgent implementation
|
||||||
|
|
@ -26,6 +28,7 @@ export const allAgentTools = [
|
||||||
simpleWebSearchTool,
|
simpleWebSearchTool,
|
||||||
fileSearchTool,
|
fileSearchTool,
|
||||||
urlSummarizationTool,
|
urlSummarizationTool,
|
||||||
|
imageSearchTool,
|
||||||
];
|
];
|
||||||
|
|
||||||
// Export tool categories for selective tool loading based on focus mode
|
// Export tool categories for selective tool loading based on focus mode
|
||||||
|
|
@ -33,6 +36,7 @@ export const webSearchTools = [
|
||||||
//webSearchTool,
|
//webSearchTool,
|
||||||
simpleWebSearchTool,
|
simpleWebSearchTool,
|
||||||
urlSummarizationTool,
|
urlSummarizationTool,
|
||||||
|
imageSearchTool,
|
||||||
// analyzerTool,
|
// analyzerTool,
|
||||||
// synthesizerTool,
|
// synthesizerTool,
|
||||||
];
|
];
|
||||||
|
|
|
||||||
27
src/lib/utils/html.ts
Normal file
27
src/lib/utils/html.ts
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
export function encodeHtmlAttribute(value: string): string {
|
||||||
|
if (!value) return '';
|
||||||
|
return value
|
||||||
|
.replaceAll('&', '&')
|
||||||
|
.replaceAll('<', '<')
|
||||||
|
.replaceAll('>', '>')
|
||||||
|
.replaceAll('"', '"')
|
||||||
|
.replaceAll("'", ''');
|
||||||
|
}
|
||||||
|
|
||||||
|
export function decodeHtmlEntities(value: string): string {
|
||||||
|
if (!value) return '';
|
||||||
|
|
||||||
|
const numericDecoded = value
|
||||||
|
.replace(/&#(\d+);/g, (_, dec) => String.fromCharCode(parseInt(dec, 10)))
|
||||||
|
.replace(/&#x([\da-fA-F]+);/g, (_, hex) =>
|
||||||
|
String.fromCharCode(parseInt(hex, 16)),
|
||||||
|
);
|
||||||
|
|
||||||
|
return numericDecoded
|
||||||
|
.replaceAll('"', '"')
|
||||||
|
.replaceAll(''', "'")
|
||||||
|
.replaceAll(''', "'")
|
||||||
|
.replaceAll('<', '<')
|
||||||
|
.replaceAll('>', '>')
|
||||||
|
.replaceAll('&', '&');
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue