2025-07-29 10:18:11 -06:00
import { tool } from '@langchain/core/tools' ;
import { z } from 'zod' ;
import { RunnableConfig } from '@langchain/core/runnables' ;
import { Document } from 'langchain/document' ;
import { Embeddings } from '@langchain/core/embeddings' ;
2025-08-02 12:52:14 -06:00
import { Command , getCurrentTaskInput } from '@langchain/langgraph' ;
import { ToolMessage } from '@langchain/core/messages' ;
import { SimplifiedAgentStateType } from '@/lib/state/chatAgentState' ;
2025-07-29 10:18:11 -06:00
import {
processFilesToDocuments ,
getRankedDocs ,
} from '@/lib/utils/fileProcessing' ;
// Schema for file search tool input
const FileSearchToolSchema = z . object ( {
query : z
. string ( )
. describe ( 'The search query to find relevant content in files' ) ,
maxResults : z
. number ( )
. optional ( )
. default ( 12 )
. describe ( 'Maximum number of results to return' ) ,
similarityThreshold : z
. number ( )
. optional ( )
. default ( 0.3 )
. describe ( 'Minimum similarity threshold for results' ) ,
} ) ;
/ * *
* FileSearchTool - Reimplementation of FileSearchAgent as a tool
*
* This tool handles :
* 1 . Processing uploaded files into searchable documents
* 2 . Performing similarity search across file content
* 3 . Ranking and filtering results by relevance
* 4 . Returning relevant file sections as documents
* /
export const fileSearchTool = tool (
async (
input : z.infer < typeof FileSearchToolSchema > ,
config? : RunnableConfig ,
2025-08-02 12:52:14 -06:00
) = > {
2025-07-29 10:18:11 -06:00
try {
2025-08-02 12:52:14 -06:00
const { query , maxResults = 12 , similarityThreshold = 0.3 } = input ;
const currentState = getCurrentTaskInput ( ) as SimplifiedAgentStateType ;
let currentDocCount = currentState . relevantDocuments . length ;
// Get fileIds from config (provided by the agent)
const fileIds : string [ ] = config ? . configurable ? . fileIds || [ ] ;
2025-07-29 10:18:11 -06:00
console . log (
` FileSearchTool: Processing ${ fileIds . length } files for query: " ${ query } " ` ,
) ;
// Check if we have files to process
if ( ! fileIds || fileIds . length === 0 ) {
console . log ( 'FileSearchTool: No files provided for search' ) ;
2025-08-02 12:52:14 -06:00
return new Command ( {
update : {
relevantDocuments : [ ] ,
messages : [
new ToolMessage ( {
content : 'No files attached to search.' ,
tool_call_id : ( config as any ) ? . toolCall . id ,
} ) ,
] ,
} ,
} ) ;
2025-07-29 10:18:11 -06:00
}
// Get embeddings from config
if ( ! config ? . configurable ? . embeddings ) {
throw new Error ( 'Embeddings not available in config' ) ;
}
const embeddings : Embeddings = config . configurable . embeddings ;
// Step 1: Process files to documents
console . log ( 'FileSearchTool: Processing files to documents...' ) ;
const fileDocuments = await processFilesToDocuments ( fileIds ) ;
if ( fileDocuments . length === 0 ) {
console . log ( 'FileSearchTool: No processable content found in files' ) ;
2025-08-02 12:52:14 -06:00
return new Command ( {
update : {
relevantDocuments : [ ] ,
messages : [
new ToolMessage ( {
content : 'No searchable content found in attached files.' ,
tool_call_id : ( config as any ) ? . toolCall . id ,
} ) ,
] ,
} ,
} ) ;
2025-07-29 10:18:11 -06:00
}
console . log (
` FileSearchTool: Processed ${ fileDocuments . length } file sections ` ,
) ;
// Step 2: Generate query embedding for similarity search
console . log ( 'FileSearchTool: Generating query embedding...' ) ;
const queryEmbedding = await embeddings . embedQuery ( query ) ;
// Step 3: Perform similarity search and ranking
console . log ( 'FileSearchTool: Performing similarity search...' ) ;
const rankedDocuments = getRankedDocs (
queryEmbedding ,
fileDocuments ,
maxResults ,
similarityThreshold ,
) ;
console . log (
` FileSearchTool: Found ${ rankedDocuments . length } relevant file sections ` ,
) ;
2025-08-02 12:52:14 -06:00
// Add search metadata to documents and remove embeddings to reduce context size
2025-07-29 10:18:11 -06:00
const documentsWithMetadata = rankedDocuments . map ( ( doc ) = > {
2025-08-02 12:52:14 -06:00
// Extract metadata and exclude embeddings
const { embeddings : _ , . . . metadataWithoutEmbeddings } =
doc . metadata || { } ;
2025-07-29 10:18:11 -06:00
return new Document ( {
pageContent : doc.pageContent ,
metadata : {
2025-08-02 12:52:14 -06:00
. . . metadataWithoutEmbeddings ,
sourceId : ++ currentDocCount ,
2025-07-29 10:18:11 -06:00
source : 'file_search' ,
searchQuery : query ,
similarityScore : doc.metadata?.similarity || 0 ,
} ,
} ) ;
} ) ;
2025-08-02 12:52:14 -06:00
console . log (
` FileSearchTool: Created ${ documentsWithMetadata . length } documents from file search ` ,
) ;
return new Command ( {
update : {
relevantDocuments : documentsWithMetadata ,
messages : [
new ToolMessage ( {
content : JSON.stringify ( {
documents : documentsWithMetadata ,
processedFiles : fileIds.length ,
relevantSections : rankedDocuments.length ,
} ) ,
tool_call_id : ( config as any ) ? . toolCall . id ,
} ) ,
] ,
} ,
} ) ;
2025-07-29 10:18:11 -06:00
} catch ( error ) {
console . error ( 'FileSearchTool: Error during file search:' , error ) ;
const errorMessage =
error instanceof Error ? error . message : 'Unknown error' ;
2025-08-02 12:52:14 -06:00
return new Command ( {
update : {
relevantDocuments : [ ] ,
messages : [
new ToolMessage ( {
content : 'Error occurred during file search: ' + errorMessage ,
tool_call_id : ( config as any ) ? . toolCall . id ,
} ) ,
] ,
} ,
} ) ;
2025-07-29 10:18:11 -06:00
}
} ,
{
name : 'file_search' ,
description :
2025-08-02 12:52:14 -06:00
'Searches through all uploaded files to find relevant content sections based on a query using semantic similarity. Automatically searches all available files - no need to specify file IDs.' ,
2025-07-29 10:18:11 -06:00
schema : FileSearchToolSchema ,
} ,
) ;