From 4d438f06cd68ed1d1cd71341d0a30033f4774cb2 Mon Sep 17 00:00:00 2001 From: Khanhlq Date: Mon, 17 Mar 2025 12:54:10 +0700 Subject: [PATCH] Add PDF search feature and update Docker configuration --- app.dockerfile | 20 ++- backend.dockerfile | 19 +-- docker-compose.yaml | 16 +++ src/utils/documents.ts | 1 + ui/components/MessageBox.tsx | 8 ++ ui/components/MessageSources.tsx | 8 +- ui/components/SearchPDFs.tsx | 207 +++++++++++++++++++++++++++++++ 7 files changed, 265 insertions(+), 14 deletions(-) create mode 100644 ui/components/SearchPDFs.tsx diff --git a/app.dockerfile b/app.dockerfile index 488e64b..ba22a33 100644 --- a/app.dockerfile +++ b/app.dockerfile @@ -1,15 +1,25 @@ -FROM node:20.18.0-alpine +FROM node:20.18.0-alpine as base +WORKDIR /home/perplexica +# Development stage +FROM base as development +ENV NODE_ENV=development ARG NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001 ARG NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api ENV NEXT_PUBLIC_WS_URL=${NEXT_PUBLIC_WS_URL} ENV NEXT_PUBLIC_API_URL=${NEXT_PUBLIC_API_URL} +COPY ui/package.json ui/yarn.lock ./ +RUN yarn install +EXPOSE 3000 +CMD ["yarn", "dev"] -WORKDIR /home/perplexica - +# Production stage +FROM base as production +ARG NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001 +ARG NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api +ENV NEXT_PUBLIC_WS_URL=${NEXT_PUBLIC_WS_URL} +ENV NEXT_PUBLIC_API_URL=${NEXT_PUBLIC_API_URL} COPY ui /home/perplexica/ - RUN yarn install --frozen-lockfile RUN yarn build - CMD ["yarn", "start"] \ No newline at end of file diff --git a/backend.dockerfile b/backend.dockerfile index b6ab95a..6727df2 100644 --- a/backend.dockerfile +++ b/backend.dockerfile @@ -1,17 +1,22 @@ -FROM node:18-slim - +FROM node:18-slim as base WORKDIR /home/perplexica +RUN mkdir -p /home/perplexica/data /home/perplexica/uploads +# Development stage +FROM base as development +ENV NODE_ENV=development +COPY package.json yarn.lock ./ +RUN yarn install --frozen-lockfile --network-timeout 600000 +EXPOSE 3001 +CMD ["yarn", "dev"] + +# Production stage +FROM base as production COPY src /home/perplexica/src COPY tsconfig.json /home/perplexica/ COPY drizzle.config.ts /home/perplexica/ COPY package.json /home/perplexica/ COPY yarn.lock /home/perplexica/ - -RUN mkdir /home/perplexica/data -RUN mkdir /home/perplexica/uploads - RUN yarn install --frozen-lockfile --network-timeout 600000 RUN yarn build - CMD ["yarn", "start"] \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index a0e1d73..27233dd 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -13,9 +13,11 @@ services: build: context: . dockerfile: backend.dockerfile + target: development image: itzcrazykns1337/perplexica-backend:main environment: - SEARXNG_API_URL=http://searxng:8080 + - NODE_ENV=development depends_on: - searxng ports: @@ -24,27 +26,41 @@ services: - backend-dbstore:/home/perplexica/data - uploads:/home/perplexica/uploads - ./config.toml:/home/perplexica/config.toml + - ./src:/home/perplexica/src + - ./package.json:/home/perplexica/package.json + - ./yarn.lock:/home/perplexica/yarn.lock + - ./tsconfig.json:/home/perplexica/tsconfig.json + - ./drizzle.config.ts:/home/perplexica/drizzle.config.ts extra_hosts: - 'host.docker.internal:host-gateway' networks: - perplexica-network restart: unless-stopped + command: yarn dev perplexica-frontend: build: context: . dockerfile: app.dockerfile + target: development args: - NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api - NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001 image: itzcrazykns1337/perplexica-frontend:main depends_on: - perplexica-backend + environment: + - NODE_ENV=development ports: - 3000:3000 + volumes: + - ./ui:/home/perplexica + - /home/perplexica/node_modules + - /home/perplexica/.next networks: - perplexica-network restart: unless-stopped + command: yarn dev networks: perplexica-network: diff --git a/src/utils/documents.ts b/src/utils/documents.ts index 5cd0366..25c90e2 100644 --- a/src/utils/documents.ts +++ b/src/utils/documents.ts @@ -40,6 +40,7 @@ export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => { metadata: { title: title, url: link, + type: 'pdf', // Add this type indicator }, }); }); diff --git a/ui/components/MessageBox.tsx b/ui/components/MessageBox.tsx index 61f4f05..7adc929 100644 --- a/ui/components/MessageBox.tsx +++ b/ui/components/MessageBox.tsx @@ -19,6 +19,7 @@ import MessageSources from './MessageSources'; import SearchImages from './SearchImages'; import SearchVideos from './SearchVideos'; import { useSpeech } from 'react-text-to-speech'; +import { SearchPDFs } from './SearchPDFs'; const MessageBox = ({ message, @@ -63,6 +64,7 @@ const MessageBox = ({ setParsedMessage(message.content); }, [message.content, message.sources, message.role]); + const { speechStatus, start, stop } = useSpeech({ text: speechMessage }); return ( @@ -119,6 +121,7 @@ const MessageBox = ({ > {parsedMessage} + {loading && isLast ? null : (
@@ -187,6 +190,7 @@ const MessageBox = ({
)} + {/* PDF Documents Section - Moved from sidebar to under the answer */}
@@ -198,6 +202,10 @@ const MessageBox = ({ chatHistory={history.slice(0, messageIndex - 1)} query={history[messageIndex - 1].content} /> +
)} diff --git a/ui/components/MessageSources.tsx b/ui/components/MessageSources.tsx index c7ee945..129a880 100644 --- a/ui/components/MessageSources.tsx +++ b/ui/components/MessageSources.tsx @@ -69,11 +69,15 @@ const MessageSources = ({ sources }: { sources: Document[] }) => {
{sources.slice(3, 6).map((source, i) => { return source.metadata.url === 'File' ? ( -
+
) : ( {

-
+
{i + 1}
diff --git a/ui/components/SearchPDFs.tsx b/ui/components/SearchPDFs.tsx new file mode 100644 index 0000000..c46e0ab --- /dev/null +++ b/ui/components/SearchPDFs.tsx @@ -0,0 +1,207 @@ +/* eslint-disable @next/next/no-img-element */ +'use client'; +import { File, ExternalLink, PlusIcon, Download } from 'lucide-react'; +import { useState } from 'react'; +import { Message } from './ChatWindow'; + +type PDF = { + title: string; + url: string; + type?: 'academic' | 'document' | 'article'; +}; + +export const SearchPDFs = ({ + query, + chatHistory, +}: { + query: string; + chatHistory: Message[]; +}) => { + const [pdfs, setPdfs] = useState(null); + const [loading, setLoading] = useState(false); + + const getPDFType = (title: string): 'academic' | 'document' | 'article' => { + const lowerTitle = title.toLowerCase(); + if (lowerTitle.includes('paper') || lowerTitle.includes('journal') || lowerTitle.includes('research')) { + return 'academic'; + } + if (lowerTitle.includes('article') || lowerTitle.includes('blog')) { + return 'article'; + } + return 'document'; + }; + + const getTypeColor = (type: 'academic' | 'document' | 'article') => { + switch (type) { + case 'academic': + return 'bg-blue-500/10 text-blue-500'; + case 'article': + return 'bg-green-500/10 text-green-500'; + default: + return 'bg-red-500/10 text-red-500'; + } + }; + + const formatChatHistory = (history: Message[]) => { + return history.map(msg => { + return [msg.role === 'user' ? 'human' : 'ai', msg.content]; + }); + }; + + const searchForPDFs = async () => { + setLoading(true); + + const chatModelProvider = localStorage.getItem('chatModelProvider'); + const chatModel = localStorage.getItem('chatModel'); + + const customOpenAIBaseURL = localStorage.getItem('openAIBaseURL'); + const customOpenAIKey = localStorage.getItem('openAIApiKey'); + + try { + const res = await fetch( + `${process.env.NEXT_PUBLIC_API_URL}/search`, + { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + query: `PDF documents about: ${query}`, + focusMode: 'webSearch', + optimizationMode: 'balanced', + history: formatChatHistory(chatHistory), + chatModel: { + provider: chatModelProvider, + model: chatModel, + ...(chatModelProvider === 'custom_openai' && { + customOpenAIKey: customOpenAIKey, + customOpenAIBaseURL: customOpenAIBaseURL, + }), + }, + }), + }, + ); + + const data = await res.json(); + console.log("Search response:", data); + + // Extract PDF results from the message and sources + let pdfResults: PDF[] = []; + + // Check for PDF URLs in sources + if (data.sources && Array.isArray(data.sources)) { + pdfResults = data.sources + .filter((source: any) => + source.metadata?.url?.toLowerCase().endsWith('.pdf') || + source.metadata?.title?.includes('PDF') || + source.metadata?.url?.includes('.pdf') + ) + .map((source: any) => ({ + title: source.metadata.title || 'PDF Document', + url: source.metadata.url, + type: getPDFType(source.metadata.title || '') + })); + } + + setPdfs(pdfResults); + } catch (error) { + console.error('Error searching for PDFs:', error); + setPdfs([]); + } finally { + setLoading(false); + } + }; + + return ( + <> + {!loading && pdfs === null && ( + + )} + + {loading && ( +
+ {[...Array(4)].map((_, i) => ( +
+ ))} +
+ )} + + {pdfs !== null && pdfs.length > 0 && ( +
+
+
+ +

PDF Documents

+ ({pdfs.length}) +
+
+ +
+ {pdfs.map((pdf, i) => ( +
+
+
+
+ +
+
+

+ {pdf.title} +

+

+ {new URL(pdf.url).hostname} +

+
+
+ +
+
+ ))} +
+
+ )} + + {pdfs !== null && pdfs.length === 0 && ( +
+
+ +

PDF Documents

+
+

No PDF documents found related to your query.

+
+ )} + + ); +}; \ No newline at end of file