diff --git a/.gitignore b/.gitignore index 8391d19..240381c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ node_modules/ npm-debug.log yarn-error.log +package-lock.json # Build output /.next/ @@ -37,3 +38,6 @@ Thumbs.db # Db db.sqlite /searxng + +# Dev +docker-compose-dev.yaml diff --git a/README.md b/README.md index 866b585..3b80e58 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,6 @@ [![Discord](https://dcbadge.vercel.app/api/server/26aArMy8tT?style=flat&compact=true)](https://discord.gg/26aArMy8tT) - ![preview](.assets/perplexica-screenshot.png?) ## Table of Contents diff --git a/docker-compose.yaml b/docker-compose.yaml index a0e1d73..38c3f5d 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -4,7 +4,7 @@ services: volumes: - ./searxng:/etc/searxng:rw ports: - - 4000:8080 + - '4000:8080' networks: - perplexica-network restart: unless-stopped @@ -19,7 +19,7 @@ services: depends_on: - searxng ports: - - 3001:3001 + - '3001:3001' volumes: - backend-dbstore:/home/perplexica/data - uploads:/home/perplexica/uploads @@ -41,7 +41,7 @@ services: depends_on: - perplexica-backend ports: - - 3000:3000 + - '3000:3000' networks: - perplexica-network restart: unless-stopped diff --git a/package.json b/package.json index 6041377..1a53d8f 100644 --- a/package.json +++ b/package.json @@ -30,8 +30,8 @@ "@iarna/toml": "^2.2.5", "@langchain/anthropic": "^0.2.3", "@langchain/community": "^0.2.16", - "@langchain/openai": "^0.0.25", "@langchain/google-genai": "^0.0.23", + "@langchain/openai": "^0.0.25", "@xenova/transformers": "^2.17.1", "axios": "^1.6.8", "better-sqlite3": "^11.0.0", diff --git a/sample.config.toml b/sample.config.toml index 7b09d67..d51889e 100644 --- a/sample.config.toml +++ b/sample.config.toml @@ -3,6 +3,12 @@ PORT = 3001 # Port to run the server on SIMILARITY_MEASURE = "cosine" # "cosine" or "dot" KEEP_ALIVE = "5m" # How long to keep Ollama models loaded into memory. (Instead of using -1 use "-1m") +[SEARCH_ENGINE_BACKENDS] # "google" | "searxng" | "bing" | "brave" | "yacy" +SEARCH = "searxng" +IMAGE = "searxng" +VIDEO = "searxng" +NEWS = "searxng" + [MODELS.OPENAI] API_KEY = "" @@ -22,5 +28,18 @@ API_URL = "" [MODELS.OLLAMA] API_URL = "" # Ollama API URL - http://host.docker.internal:11434 -[API_ENDPOINTS] -SEARXNG = "http://localhost:32768" # SearxNG API URL \ No newline at end of file +[SEARCH_ENGINES.GOOGLE] +API_KEY = "" +CSE_ID = "" + +[SEARCH_ENGINES.SEARXNG] +ENDPOINT = "" + +[SEARCH_ENGINES.BING] +SUBSCRIPTION_KEY = "" + +[SEARCH_ENGINES.BRAVE] +API_KEY = "" + +[SEARCH_ENGINES.YACY] +ENDPOINT = "" diff --git a/searxng/settings.yml b/searxng/settings.yml index 54d27c4..7f04b6a 100644 --- a/searxng/settings.yml +++ b/searxng/settings.yml @@ -15,3 +15,5 @@ server: engines: - name: wolframalpha disabled: false + - name: qwant + disabled: true diff --git a/src/chains/imageSearchAgent.ts b/src/chains/imageSearchAgent.ts index 167019f..01f4ada 100644 --- a/src/chains/imageSearchAgent.ts +++ b/src/chains/imageSearchAgent.ts @@ -7,7 +7,12 @@ import { PromptTemplate } from '@langchain/core/prompts'; import formatChatHistoryAsString from '../utils/formatHistory'; import { BaseMessage } from '@langchain/core/messages'; import { StringOutputParser } from '@langchain/core/output_parsers'; -import { searchSearxng } from '../lib/searxng'; +import { searchSearxng } from '../lib/searchEngines/searxng'; +import { searchGooglePSE } from '../lib/searchEngines/google_pse'; +import { searchBraveAPI } from '../lib/searchEngines/brave'; +import { searchYaCy } from '../lib/searchEngines/yacy'; +import { searchBingAPI } from '../lib/searchEngines/bing'; +import { getImageSearchEngineBackend } from '../config'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; const imageSearchChainPrompt = ` @@ -36,6 +41,103 @@ type ImageSearchChainInput = { query: string; }; +async function performImageSearch(query: string) { + const searchEngine = getImageSearchEngineBackend(); + let images = []; + + switch (searchEngine) { + case 'google': { + const googleResult = await searchGooglePSE(query); + images = googleResult.results + .map((result) => { + if (result.img_src && result.url && result.title) { + return { + img_src: result.img_src, + url: result.url, + title: result.title, + source: result.displayLink, + }; + } + }) + .filter(Boolean); + break; + } + + case 'searxng': { + const searxResult = await searchSearxng(query, { + engines: ['google images', 'bing images'], + pageno: 1, + }); + searxResult.results.forEach((result) => { + if (result.img_src && result.url && result.title) { + images.push({ + img_src: result.img_src, + url: result.url, + title: result.title, + }); + } + }); + break; + } + + case 'brave': { + const braveResult = await searchBraveAPI(query); + images = braveResult.results + .map((result) => { + if (result.img_src && result.url && result.title) { + return { + img_src: result.img_src, + url: result.url, + title: result.title, + source: result.url, + }; + } + }) + .filter(Boolean); + break; + } + + case 'yacy': { + const yacyResult = await searchYaCy(query); + images = yacyResult.results + .map((result) => { + if (result.img_src && result.url && result.title) { + return { + img_src: result.img_src, + url: result.url, + title: result.title, + source: result.url, + }; + } + }) + .filter(Boolean); + break; + } + + case 'bing': { + const bingResult = await searchBingAPI(query); + images = bingResult.results + .map((result) => { + if (result.img_src && result.url && result.title) { + return { + img_src: result.img_src, + url: result.url, + title: result.title, + source: result.url, + }; + } + }) + .filter(Boolean); + break; + } + + default: + throw new Error(`Unknown search engine ${searchEngine}`); + } + + return images; +} + const strParser = new StringOutputParser(); const createImageSearchChain = (llm: BaseChatModel) => { @@ -52,22 +154,7 @@ const createImageSearchChain = (llm: BaseChatModel) => { llm, strParser, RunnableLambda.from(async (input: string) => { - const res = await searchSearxng(input, { - engines: ['bing images', 'google images'], - }); - - const images = []; - - res.results.forEach((result) => { - if (result.img_src && result.url && result.title) { - images.push({ - img_src: result.img_src, - url: result.url, - title: result.title, - }); - } - }); - + const images = await performImageSearch(input); return images.slice(0, 10); }), ]); diff --git a/src/chains/videoSearchAgent.ts b/src/chains/videoSearchAgent.ts index cdd1ac0..331b8c8 100644 --- a/src/chains/videoSearchAgent.ts +++ b/src/chains/videoSearchAgent.ts @@ -7,26 +7,30 @@ import { PromptTemplate } from '@langchain/core/prompts'; import formatChatHistoryAsString from '../utils/formatHistory'; import { BaseMessage } from '@langchain/core/messages'; import { StringOutputParser } from '@langchain/core/output_parsers'; -import { searchSearxng } from '../lib/searxng'; +import { searchSearxng } from '../lib/searchEngines/searxng'; +import { searchGooglePSE } from '../lib/searchEngines/google_pse'; +import { searchBraveAPI } from '../lib/searchEngines/brave'; +import { searchBingAPI } from '../lib/searchEngines/bing'; +import { getVideoSearchEngineBackend } from '../config'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; const VideoSearchChainPrompt = ` You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search Youtube for videos. You need to make sure the rephrased question agrees with the conversation and is relevant to the conversation. - + Example: 1. Follow up question: How does a car work? Rephrased: How does a car work? - + 2. Follow up question: What is the theory of relativity? Rephrased: What is theory of relativity - + 3. Follow up question: How does an AC work? Rephrased: How does an AC work - + Conversation: {chat_history} - + Follow up question: {query} Rephrased question: `; @@ -38,6 +42,102 @@ type VideoSearchChainInput = { const strParser = new StringOutputParser(); +async function performVideoSearch(query: string) { + const searchEngine = getVideoSearchEngineBackend(); + const youtubeQuery = `${query} site:youtube.com`; + let videos = []; + + switch (searchEngine) { + case 'google': { + const googleResult = await searchGooglePSE(youtubeQuery); + googleResult.results.forEach((result) => { + // Use .results instead of .originalres + if (result.img_src && result.url && result.title) { + const videoId = new URL(result.url).searchParams.get('v'); + videos.push({ + img_src: result.img_src, + url: result.url, + title: result.title, + iframe_src: videoId + ? `https://www.youtube.com/embed/${videoId}` + : null, + }); + } + }); + break; + } + + case 'searxng': { + const searxResult = await searchSearxng(query, { + engines: ['youtube'], + }); + searxResult.results.forEach((result) => { + if ( + result.thumbnail && + result.url && + result.title && + result.iframe_src + ) { + videos.push({ + img_src: result.thumbnail, + url: result.url, + title: result.title, + iframe_src: result.iframe_src, + }); + } + }); + break; + } + + case 'brave': { + const braveResult = await searchBraveAPI(youtubeQuery); + braveResult.results.forEach((result) => { + if (result.img_src && result.url && result.title) { + const videoId = new URL(result.url).searchParams.get('v'); + videos.push({ + img_src: result.img_src, + url: result.url, + title: result.title, + iframe_src: videoId + ? `https://www.youtube.com/embed/${videoId}` + : null, + }); + } + }); + break; + } + + case 'yacy': { + console.log('Not available for yacy'); + videos = []; + break; + } + + case 'bing': { + const bingResult = await searchBingAPI(youtubeQuery); + bingResult.results.forEach((result) => { + if (result.img_src && result.url && result.title) { + const videoId = new URL(result.url).searchParams.get('v'); + videos.push({ + img_src: result.img_src, + url: result.url, + title: result.title, + iframe_src: videoId + ? `https://www.youtube.com/embed/${videoId}` + : null, + }); + } + }); + break; + } + + default: + throw new Error(`Unknown search engine ${searchEngine}`); + } + + return videos; +} + const createVideoSearchChain = (llm: BaseChatModel) => { return RunnableSequence.from([ RunnableMap.from({ @@ -52,28 +152,7 @@ const createVideoSearchChain = (llm: BaseChatModel) => { llm, strParser, RunnableLambda.from(async (input: string) => { - const res = await searchSearxng(input, { - engines: ['youtube'], - }); - - const videos = []; - - res.results.forEach((result) => { - if ( - result.thumbnail && - result.url && - result.title && - result.iframe_src - ) { - videos.push({ - img_src: result.thumbnail, - url: result.url, - title: result.title, - iframe_src: result.iframe_src, - }); - } - }); - + const videos = await performVideoSearch(input); return videos.slice(0, 10); }), ]); diff --git a/src/config.ts b/src/config.ts index ab2a5db..70e292b 100644 --- a/src/config.ts +++ b/src/config.ts @@ -10,6 +10,12 @@ interface Config { SIMILARITY_MEASURE: string; KEEP_ALIVE: string; }; + SEARCH_ENGINE_BACKENDS: { + SEARCH: string; + IMAGE: string; + VIDEO: string; + NEWS: string; + }; MODELS: { OPENAI: { API_KEY: string; @@ -32,8 +38,23 @@ interface Config { MODEL_NAME: string; }; }; - API_ENDPOINTS: { - SEARXNG: string; + SEARCH_ENGINES: { + GOOGLE: { + API_KEY: string; + CSE_ID: string; + }; + SEARXNG: { + ENDPOINT: string; + }; + BING: { + SUBSCRIPTION_KEY: string; + }; + BRAVE: { + API_KEY: string; + }; + YACY: { + ENDPOINT: string; + }; }; } @@ -61,8 +82,32 @@ export const getAnthropicApiKey = () => loadConfig().MODELS.ANTHROPIC.API_KEY; export const getGeminiApiKey = () => loadConfig().MODELS.GEMINI.API_KEY; +export const getSearchEngineBackend = () => + loadConfig().SEARCH_ENGINE_BACKENDS.SEARCH; + +export const getImageSearchEngineBackend = () => + loadConfig().SEARCH_ENGINE_BACKENDS.IMAGE || getSearchEngineBackend(); + +export const getVideoSearchEngineBackend = () => + loadConfig().SEARCH_ENGINE_BACKENDS.VIDEO || getSearchEngineBackend(); + +export const getNewsSearchEngineBackend = () => + loadConfig().SEARCH_ENGINE_BACKENDS.NEWS || getSearchEngineBackend(); + +export const getGoogleApiKey = () => loadConfig().SEARCH_ENGINES.GOOGLE.API_KEY; + +export const getGoogleCseId = () => loadConfig().SEARCH_ENGINES.GOOGLE.CSE_ID; + +export const getBraveApiKey = () => loadConfig().SEARCH_ENGINES.BRAVE.API_KEY; + +export const getBingSubscriptionKey = () => + loadConfig().SEARCH_ENGINES.BING.SUBSCRIPTION_KEY; + +export const getYacyJsonEndpoint = () => + loadConfig().SEARCH_ENGINES.YACY.ENDPOINT; + export const getSearxngApiEndpoint = () => - process.env.SEARXNG_API_URL || loadConfig().API_ENDPOINTS.SEARXNG; + process.env.SEARXNG_API_URL || loadConfig().SEARCH_ENGINES.SEARXNG.ENDPOINT; export const getOllamaApiEndpoint = () => loadConfig().MODELS.OLLAMA.API_URL; diff --git a/src/lib/searchEngines/bing.ts b/src/lib/searchEngines/bing.ts new file mode 100644 index 0000000..0d46a10 --- /dev/null +++ b/src/lib/searchEngines/bing.ts @@ -0,0 +1,105 @@ +import axios from 'axios'; +import { getBingSubscriptionKey } from '../../config'; + +interface BingAPISearchResult { + _type: string; + name: string; + url: string; + displayUrl: string; + snippet?: string; + dateLastCrawled?: string; + thumbnailUrl?: string; + contentUrl?: string; + hostPageUrl?: string; + width?: number; + height?: number; + accentColor?: string; + contentSize?: string; + datePublished?: string; + encodingFormat?: string; + hostPageDisplayUrl?: string; + id?: string; + isLicensed?: boolean; + isFamilyFriendly?: boolean; + language?: string; + mediaUrl?: string; + motionThumbnailUrl?: string; + publisher?: string; + viewCount?: number; + webSearchUrl?: string; + primaryImageOfPage?: { + thumbnailUrl?: string; + width?: number; + height?: number; + }; + video?: { + allowHttpsEmbed?: boolean; + embedHtml?: string; + allowMobileEmbed?: boolean; + viewCount?: number; + duration?: string; + }; + image?: { + thumbnail?: { + contentUrl?: string; + width?: number; + height?: number; + }; + imageInsightsToken?: string; + imageId?: string; + }; +} + +export const searchBingAPI = async (query: string) => { + try { + const bingApiKey = await getBingSubscriptionKey(); + const url = new URL(`https://api.cognitive.microsoft.com/bing/v7.0/search`); + url.searchParams.append('q', query); + url.searchParams.append('responseFilter', 'Webpages,Images,Videos'); + + const res = await axios.get(url.toString(), { + headers: { + 'Ocp-Apim-Subscription-Key': bingApiKey, + Accept: 'application/json', + }, + }); + + if (res.data.error) { + throw new Error(`Bing API Error: ${res.data.error.message}`); + } + + const originalres = res.data; + + // Extract web, image, and video results + const webResults = originalres.webPages?.value || []; + const imageResults = originalres.images?.value || []; + const videoResults = originalres.videos?.value || []; + + const results = webResults.map((item: BingAPISearchResult) => ({ + title: item.name, + url: item.url, + content: item.snippet, + img_src: + item.primaryImageOfPage?.thumbnailUrl || + imageResults.find((img: any) => img.hostPageUrl === item.url) + ?.thumbnailUrl || + videoResults.find((vid: any) => vid.hostPageUrl === item.url) + ?.thumbnailUrl, + ...(item.video && { + videoData: { + duration: item.video.duration, + embedUrl: item.video.embedHtml?.match(/src="(.*?)"/)?.[1], + }, + publisher: item.publisher, + datePublished: item.datePublished, + }), + })); + + return { results, originalres }; + } catch (error) { + const errorMessage = error.response?.data + ? JSON.stringify(error.response.data, null, 2) + : error.message || 'Unknown error'; + throw new Error(`Bing API Error: ${errorMessage}`); + } +}; diff --git a/src/lib/searchEngines/brave.ts b/src/lib/searchEngines/brave.ts new file mode 100644 index 0000000..ba6ae5a --- /dev/null +++ b/src/lib/searchEngines/brave.ts @@ -0,0 +1,102 @@ +import axios from 'axios'; +import { getBraveApiKey } from '../../config'; + +interface BraveSearchResult { + title: string; + url: string; + content?: string; + img_src?: string; + age?: string; + family_friendly?: boolean; + language?: string; + video?: { + embedUrl?: string; + duration?: string; + }; + rating?: { + value: number; + scale: number; + }; + products?: Array<{ + name: string; + price?: string; + }>; + recipe?: { + ingredients?: string[]; + cookTime?: string; + }; + meta?: { + fetched?: string; + lastCrawled?: string; + }; +} + +export const searchBraveAPI = async ( + query: string, + numResults: number = 20, +): Promise<{ results: BraveSearchResult[]; originalres: any }> => { + try { + const braveApiKey = await getBraveApiKey(); + const url = new URL(`https://api.search.brave.com/res/v1/web/search`); + + url.searchParams.append('q', query); + url.searchParams.append('count', numResults.toString()); + + const res = await axios.get(url.toString(), { + headers: { + 'X-Subscription-Token': braveApiKey, + Accept: 'application/json', + }, + }); + + if (res.data.error) { + throw new Error(`Brave API Error: ${res.data.error.message}`); + } + + const originalres = res.data; + const webResults = originalres.web?.results || []; + + const results: BraveSearchResult[] = webResults.map((item: any) => ({ + title: item.title, + url: item.url, + content: item.description, + img_src: item.thumbnail?.src || item.deep_results?.images?.[0]?.src, + age: item.age, + family_friendly: item.family_friendly, + language: item.language, + video: item.video + ? { + embedUrl: item.video.embed_url, + duration: item.video.duration, + } + : undefined, + rating: item.rating + ? { + value: item.rating.value, + scale: item.rating.scale_max, + } + : undefined, + products: item.deep_results?.product_cluster?.map((p: any) => ({ + name: p.name, + price: p.price, + })), + recipe: item.recipe + ? { + ingredients: item.recipe.ingredients, + cookTime: item.recipe.cook_time, + } + : undefined, + meta: { + fetched: item.meta?.fetched, + lastCrawled: item.meta?.last_crawled, + }, + })); + + return { results, originalres }; + } catch (error) { + const errorMessage = error.response?.data + ? JSON.stringify(error.response.data, null, 2) + : error.message || 'Unknown error'; + throw new Error(`Brave API Error: ${errorMessage}`); + } +}; diff --git a/src/lib/searchEngines/google_pse.ts b/src/lib/searchEngines/google_pse.ts new file mode 100644 index 0000000..4cdb659 --- /dev/null +++ b/src/lib/searchEngines/google_pse.ts @@ -0,0 +1,85 @@ +import axios from 'axios'; +import { getGoogleApiKey, getGoogleCseId } from '../../config'; + +interface GooglePSESearchResult { + kind: string; + title: string; + htmlTitle: string; + link: string; + displayLink: string; + snippet?: string; + htmlSnippet?: string; + cacheId?: string; + formattedUrl: string; + htmlFormattedUrl: string; + pagemap?: { + videoobject: any; + cse_thumbnail?: Array<{ + src: string; + width: string; + height: string; + }>; + metatags?: Array<{ + [key: string]: string; + author?: string; + }>; + cse_image?: Array<{ + src: string; + }>; + }; + fileFormat?: string; + image?: { + contextLink: string; + thumbnailLink: string; + }; + mime?: string; + labels?: Array<{ + name: string; + displayName: string; + }>; +} + +export const searchGooglePSE = async (query: string) => { + try { + const [googleApiKey, googleCseID] = await Promise.all([ + getGoogleApiKey(), + getGoogleCseId(), + ]); + + const url = new URL(`https://www.googleapis.com/customsearch/v1`); + url.searchParams.append('q', query); + url.searchParams.append('cx', googleCseID); + url.searchParams.append('key', googleApiKey); + + const res = await axios.get(url.toString()); + + if (res.data.error) { + throw new Error(`Google PSE Error: ${res.data.error.message}`); + } + + const originalres = res.data.items; + + const results = originalres.map((item: GooglePSESearchResult) => ({ + title: item.title, + url: item.link, + content: item.snippet, + img_src: + item.pagemap?.cse_image?.[0]?.src || + item.pagemap?.cse_thumbnail?.[0]?.src || + item.image?.thumbnailLink, + ...(item.pagemap?.videoobject?.[0] && { + videoData: { + duration: item.pagemap.videoobject[0].duration, + embedUrl: item.pagemap.videoobject[0].embedurl, + }, + }), + })); + + return { results, originalres }; + } catch (error) { + const errorMessage = error.response?.data + ? JSON.stringify(error.response.data, null, 2) + : error.message || 'Unknown error'; + throw new Error(`Google PSE Error: ${errorMessage}`); + } +}; diff --git a/src/lib/searxng.ts b/src/lib/searchEngines/searxng.ts similarity index 94% rename from src/lib/searxng.ts rename to src/lib/searchEngines/searxng.ts index da62457..bfa3d4e 100644 --- a/src/lib/searxng.ts +++ b/src/lib/searchEngines/searxng.ts @@ -1,5 +1,5 @@ import axios from 'axios'; -import { getSearxngApiEndpoint } from '../config'; +import { getSearxngApiEndpoint } from '../../config'; interface SearxngSearchOptions { categories?: string[]; diff --git a/src/lib/searchEngines/yacy.ts b/src/lib/searchEngines/yacy.ts new file mode 100644 index 0000000..74fc76c --- /dev/null +++ b/src/lib/searchEngines/yacy.ts @@ -0,0 +1,79 @@ +import axios from 'axios'; +import { getYacyJsonEndpoint } from '../../config'; + +interface YaCySearchResult { + channels: { + title: string; + description: string; + link: string; + image: { + url: string; + title: string; + link: string; + }; + startIndex: string; + itemsPerPage: string; + searchTerms: string; + items: { + title: string; + link: string; + code: string; + description: string; + pubDate: string; + image?: string; + size: string; + sizename: string; + guid: string; + faviconUrl: string; + host: string; + path: string; + file: string; + urlhash: string; + ranking: string; + }[]; + navigation: { + facetname: string; + displayname: string; + type: string; + min: string; + max: string; + mean: string; + elements: { + name: string; + count: string; + modifier: string; + url: string; + }[]; + }[]; + }[]; +} + +export const searchYaCy = async (query: string, numResults: number = 20) => { + try { + const yacyBaseUrl = getYacyJsonEndpoint(); + + const url = new URL(`${yacyBaseUrl}/yacysearch.json`); + url.searchParams.append('query', query); + url.searchParams.append('count', numResults.toString()); + + const res = await axios.get(url.toString()); + + const originalres = res.data as YaCySearchResult; + + const results = originalres.channels[0].items.map((item) => ({ + title: item.title, + url: item.link, + content: item.description, + img_src: item.image || null, + pubDate: item.pubDate, + host: item.host, + })); + + return { results, originalres }; + } catch (error) { + const errorMessage = error.response?.data + ? JSON.stringify(error.response.data, null, 2) + : error.message || 'Unknown error'; + throw new Error(`YaCy Error: ${errorMessage}`); + } +}; diff --git a/src/routes/config.ts b/src/routes/config.ts index 18b370d..c4fe167 100644 --- a/src/routes/config.ts +++ b/src/routes/config.ts @@ -13,6 +13,16 @@ import { getCustomOpenaiApiUrl, getCustomOpenaiApiKey, getCustomOpenaiModelName, + getSearchEngineBackend, + getImageSearchEngineBackend, + getVideoSearchEngineBackend, + getNewsSearchEngineBackend, + getSearxngApiEndpoint, + getGoogleApiKey, + getGoogleCseId, + getBingSubscriptionKey, + getBraveApiKey, + getYacyJsonEndpoint, } from '../config'; import logger from '../utils/logger'; @@ -60,6 +70,21 @@ router.get('/', async (_, res) => { config['customOpenaiApiUrl'] = getCustomOpenaiApiUrl(); config['customOpenaiApiKey'] = getCustomOpenaiApiKey(); config['customOpenaiModelName'] = getCustomOpenaiModelName(); + + // Add search engine configuration + config['searchEngineBackends'] = { + search: getSearchEngineBackend(), + image: getImageSearchEngineBackend(), + video: getVideoSearchEngineBackend(), + news: getNewsSearchEngineBackend(), + }; + + config['searxngEndpoint'] = getSearxngApiEndpoint(); + config['googleApiKey'] = getGoogleApiKey(); + config['googleCseId'] = getGoogleCseId(); + config['bingSubscriptionKey'] = getBingSubscriptionKey(); + config['braveApiKey'] = getBraveApiKey(); + config['yacyEndpoint'] = getYacyJsonEndpoint(); res.status(200).json(config); } catch (err: any) { @@ -94,6 +119,30 @@ router.post('/', async (req, res) => { MODEL_NAME: config.customOpenaiModelName, }, }, + SEARCH_ENGINE_BACKENDS: config.searchEngineBackends ? { + SEARCH: config.searchEngineBackends.search, + IMAGE: config.searchEngineBackends.image, + VIDEO: config.searchEngineBackends.video, + NEWS: config.searchEngineBackends.news, + } : undefined, + SEARCH_ENGINES: { + GOOGLE: { + API_KEY: config.googleApiKey, + CSE_ID: config.googleCseId, + }, + SEARXNG: { + ENDPOINT: config.searxngEndpoint, + }, + BING: { + SUBSCRIPTION_KEY: config.bingSubscriptionKey, + }, + BRAVE: { + API_KEY: config.braveApiKey, + }, + YACY: { + ENDPOINT: config.yacyEndpoint, + }, + }, }; updateConfig(updatedConfig); diff --git a/src/routes/discover.ts b/src/routes/discover.ts index b6f8ff9..3d3f1e3 100644 --- a/src/routes/discover.ts +++ b/src/routes/discover.ts @@ -1,42 +1,125 @@ import express from 'express'; -import { searchSearxng } from '../lib/searxng'; +import { searchSearxng } from '../lib/searchEngines/searxng'; +import { searchGooglePSE } from '../lib/searchEngines/google_pse'; +import { searchBraveAPI } from '../lib/searchEngines/brave'; +import { searchYaCy } from '../lib/searchEngines/yacy'; +import { searchBingAPI } from '../lib/searchEngines/bing'; +import { getNewsSearchEngineBackend } from '../config'; import logger from '../utils/logger'; const router = express.Router(); +async function performSearch(query: string, site: string) { + const searchEngine = getNewsSearchEngineBackend(); + switch (searchEngine) { + case 'google': { + const googleResult = await searchGooglePSE(query); + + return googleResult.originalres.map((item) => { + const imageSources = [ + item.pagemap?.cse_image?.[0]?.src, + item.pagemap?.cse_thumbnail?.[0]?.src, + item.pagemap?.metatags?.[0]?.['og:image'], + item.pagemap?.metatags?.[0]?.['twitter:image'], + item.pagemap?.metatags?.[0]?.['image'], + ].filter(Boolean); // Remove undefined values + + return { + title: item.title, + url: item.link, + content: item.snippet, + thumbnail: imageSources[0], // First available image + img_src: imageSources[0], // Same as thumbnail for consistency + iframe_src: null, + author: item.pagemap?.metatags?.[0]?.['og:site_name'] || site, + publishedDate: + item.pagemap?.metatags?.[0]?.['article:published_time'], + }; + }); + } + + case 'searxng': { + const searxResult = await searchSearxng(query, { + engines: ['bing news'], + pageno: 1, + }); + return searxResult.results; + } + + case 'brave': { + const braveResult = await searchBraveAPI(query); + return braveResult.results.map((item) => ({ + title: item.title, + url: item.url, + content: item.content, + thumbnail: item.img_src, + img_src: item.img_src, + iframe_src: null, + author: item.meta?.fetched || site, + publishedDate: item.meta?.lastCrawled, + })); + } + + case 'yacy': { + const yacyResult = await searchYaCy(query); + return yacyResult.results.map((item) => ({ + title: item.title, + url: item.url, + content: item.content, + thumbnail: item.img_src, + img_src: item.img_src, + iframe_src: null, + author: item?.host || site, + publishedDate: item?.pubDate, + })); + } + + case 'bing': { + const bingResult = await searchBingAPI(query); + return bingResult.results.map((item) => ({ + title: item.title, + url: item.url, + content: item.content, + thumbnail: item.img_src, + img_src: item.img_src, + iframe_src: null, + author: item?.publisher || site, + publishedDate: item?.datePublished, + })); + } + + default: + throw new Error(`Unknown search engine ${searchEngine}`); + } +} + router.get('/', async (req, res) => { try { + const queries = [ + { site: 'businessinsider.com', topic: 'AI' }, + { site: 'www.exchangewire.com', topic: 'AI' }, + { site: 'yahoo.com', topic: 'AI' }, + { site: 'businessinsider.com', topic: 'tech' }, + { site: 'www.exchangewire.com', topic: 'tech' }, + { site: 'yahoo.com', topic: 'tech' }, + ]; + const data = ( - await Promise.all([ - searchSearxng('site:businessinsider.com AI', { - engines: ['bing news'], - pageno: 1, + await Promise.all( + queries.map(async ({ site, topic }) => { + try { + const query = `site:${site} ${topic}`; + return await performSearch(query, site); + } catch (error) { + logger.error(`Error searching ${site}: ${error.message}`); + return []; + } }), - searchSearxng('site:www.exchangewire.com AI', { - engines: ['bing news'], - pageno: 1, - }), - searchSearxng('site:yahoo.com AI', { - engines: ['bing news'], - pageno: 1, - }), - searchSearxng('site:businessinsider.com tech', { - engines: ['bing news'], - pageno: 1, - }), - searchSearxng('site:www.exchangewire.com tech', { - engines: ['bing news'], - pageno: 1, - }), - searchSearxng('site:yahoo.com tech', { - engines: ['bing news'], - pageno: 1, - }), - ]) + ) ) - .map((result) => result.results) .flat() - .sort(() => Math.random() - 0.5); + .sort(() => Math.random() - 0.5) + .filter((item) => item.title && item.url && item.content); return res.json({ blogs: data }); } catch (err: any) { diff --git a/src/search/metaSearchAgent.ts b/src/search/metaSearchAgent.ts index ee82c10..81dd063 100644 --- a/src/search/metaSearchAgent.ts +++ b/src/search/metaSearchAgent.ts @@ -17,7 +17,12 @@ import LineListOutputParser from '../lib/outputParsers/listLineOutputParser'; import LineOutputParser from '../lib/outputParsers/lineOutputParser'; import { getDocumentsFromLinks } from '../utils/documents'; import { Document } from 'langchain/document'; -import { searchSearxng } from '../lib/searxng'; +import { searchSearxng } from '../lib/searchEngines/searxng'; +import { searchGooglePSE } from '../lib/searchEngines/google_pse'; +import { searchBingAPI } from '../lib/searchEngines/bing'; +import { searchBraveAPI } from '../lib/searchEngines/brave'; +import { searchYaCy } from '../lib/searchEngines/yacy'; +import { getSearchEngineBackend } from '../config'; import path from 'path'; import fs from 'fs'; import computeSimilarity from '../utils/computeSimilarity'; @@ -132,7 +137,7 @@ class MetaSearchAgent implements MetaSearchAgentType { You are a web search summarizer, tasked with summarizing a piece of text retrieved from a web search. Your job is to summarize the text into a detailed, 2-4 paragraph explanation that captures the main ideas and provides a comprehensive answer to the query. If the query is \"summarize\", you should provide a detailed summary of the text. If the query is a specific question, you should answer it in the summary. - + - **Journalistic tone**: The summary should sound professional and journalistic, not too casual or vague. - **Thorough and detailed**: Ensure that every key point from the text is captured and that the summary directly answers the query. - **Not too lengthy, but detailed**: The summary should be informative but not excessively long. Focus on providing detailed information in a concise format. @@ -203,10 +208,37 @@ class MetaSearchAgent implements MetaSearchAgentType { return { query: question, docs: docs }; } else { - const res = await searchSearxng(question, { - language: 'en', - engines: this.config.activeEngines, - }); + const searchEngine = getSearchEngineBackend(); + + let res; + switch (searchEngine) { + case 'searxng': + res = await searchSearxng(question, { + language: 'en', + engines: this.config.activeEngines, + }); + break; + case 'google': + res = await searchGooglePSE(question); + break; + case 'bing': + res = await searchBingAPI(question); + break; + case 'brave': + res = await searchBraveAPI(question); + break; + case 'yacy': + res = await searchYaCy(question); + break; + default: + throw new Error(`Unknown search engine ${searchEngine}`); + } + + if (!res?.results) { + throw new Error( + `No results found for search engine: ${searchEngine}`, + ); + } const documents = res.results.map( (result) => diff --git a/ui/app/settings/page.tsx b/ui/app/settings/page.tsx index 6aff1b0..ea8a912 100644 --- a/ui/app/settings/page.tsx +++ b/ui/app/settings/page.tsx @@ -23,6 +23,18 @@ interface SettingsType { customOpenaiApiKey: string; customOpenaiApiUrl: string; customOpenaiModelName: string; + searchEngineBackends: { + search: string; + image: string; + video: string; + news: string; + }; + searxngEndpoint: string; + googleApiKey: string; + googleCseId: string; + bingSubscriptionKey: string; + braveApiKey: string; + yacyEndpoint: string; } interface InputProps extends React.InputHTMLAttributes { @@ -112,6 +124,12 @@ const Page = () => { const [automaticImageSearch, setAutomaticImageSearch] = useState(false); const [automaticVideoSearch, setAutomaticVideoSearch] = useState(false); const [savingStates, setSavingStates] = useState>({}); + const [searchEngineBackends, setSearchEngineBackends] = useState({ + search: '', + image: '', + video: '', + news: '', + }); useEffect(() => { const fetchConfig = async () => { @@ -125,6 +143,16 @@ const Page = () => { const data = (await res.json()) as SettingsType; setConfig(data); + // Set search engine backends if they exist in the response + if (data.searchEngineBackends) { + setSearchEngineBackends({ + search: data.searchEngineBackends.search || '', + image: data.searchEngineBackends.image || '', + video: data.searchEngineBackends.video || '', + news: data.searchEngineBackends.news || '', + }); + } + const chatModelProvidersKeys = Object.keys(data.chatModelProviders || {}); const embeddingModelProvidersKeys = Object.keys( data.embeddingModelProviders || {}, @@ -331,6 +359,8 @@ const Page = () => { localStorage.setItem('embeddingModelProvider', value); } else if (key === 'embeddingModel') { localStorage.setItem('embeddingModel', value); + } else if (key === 'searchEngineBackends') { + localStorage.setItem('searchEngineBackends', value); } } catch (err) { console.error('Failed to save:', err); @@ -793,6 +823,234 @@ const Page = () => { + + +
+
+

+ Default Search Engine +

+ { + const value = e.target.value; + setSearchEngineBackends((prev) => ({ + ...prev, + image: value, + })); + saveConfig('searchEngineBackends', { + ...searchEngineBackends, + image: value, + }); + }} + options={[ + { value: '', label: 'Use Default Search Engine' }, + { value: 'searxng', label: 'SearXNG' }, + { value: 'google', label: 'Google' }, + { value: 'bing', label: 'Bing' }, + { value: 'brave', label: 'Brave' }, + ]} + /> +
+ +
+

+ Video Search Engine +

+ { + const value = e.target.value; + setSearchEngineBackends((prev) => ({ + ...prev, + news: value, + })); + saveConfig('searchEngineBackends', { + ...searchEngineBackends, + news: value, + }); + }} + options={[ + { value: '', label: 'Use Default Search Engine' }, + { value: 'searxng', label: 'SearXNG' }, + { value: 'google', label: 'Google' }, + { value: 'bing', label: 'Bing' }, + { value: 'brave', label: 'Brave' }, + ]} + /> +
+ +
+
+

+ SearXNG Endpoint +

+ { + setConfig((prev) => ({ + ...prev!, + searxngEndpoint: e.target.value, + })); + }} + onSave={(value) => saveConfig('searxngEndpoint', value)} + /> +
+
+ +
+

+ Google API Key +

+ { + setConfig((prev) => ({ + ...prev!, + googleApiKey: e.target.value, + })); + }} + onSave={(value) => saveConfig('googleApiKey', value)} + /> +
+ +
+

+ Google CSE ID +

+ { + setConfig((prev) => ({ + ...prev!, + googleCseId: e.target.value, + })); + }} + onSave={(value) => saveConfig('googleCseId', value)} + /> +
+ +
+

+ Bing Subscription Key +

+ { + setConfig((prev) => ({ + ...prev!, + bingSubscriptionKey: e.target.value, + })); + }} + onSave={(value) => saveConfig('bingSubscriptionKey', value)} + /> +
+ +
+

+ Brave API Key +

+ { + setConfig((prev) => ({ + ...prev!, + braveApiKey: e.target.value, + })); + }} + onSave={(value) => saveConfig('braveApiKey', value)} + /> +
+ +
+

+ YaCy Endpoint +

+ { + setConfig((prev) => ({ + ...prev!, + yacyEndpoint: e.target.value, + })); + }} + onSave={(value) => saveConfig('yacyEndpoint', value)} + /> +
+
+
) )} diff --git a/ui/components/MessageBox.tsx b/ui/components/MessageBox.tsx index a7942d1..61f4f05 100644 --- a/ui/components/MessageBox.tsx +++ b/ui/components/MessageBox.tsx @@ -68,7 +68,13 @@ const MessageBox = ({ return (
{message.role === 'user' && ( -
+

{message.content}