Add PDF search feature and update Docker configuration
This commit is contained in:
parent
060c68a900
commit
4d438f06cd
7 changed files with 265 additions and 14 deletions
|
|
@ -1,15 +1,25 @@
|
|||
FROM node:20.18.0-alpine
|
||||
FROM node:20.18.0-alpine as base
|
||||
WORKDIR /home/perplexica
|
||||
|
||||
# Development stage
|
||||
FROM base as development
|
||||
ENV NODE_ENV=development
|
||||
ARG NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001
|
||||
ARG NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api
|
||||
ENV NEXT_PUBLIC_WS_URL=${NEXT_PUBLIC_WS_URL}
|
||||
ENV NEXT_PUBLIC_API_URL=${NEXT_PUBLIC_API_URL}
|
||||
COPY ui/package.json ui/yarn.lock ./
|
||||
RUN yarn install
|
||||
EXPOSE 3000
|
||||
CMD ["yarn", "dev"]
|
||||
|
||||
WORKDIR /home/perplexica
|
||||
|
||||
# Production stage
|
||||
FROM base as production
|
||||
ARG NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001
|
||||
ARG NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api
|
||||
ENV NEXT_PUBLIC_WS_URL=${NEXT_PUBLIC_WS_URL}
|
||||
ENV NEXT_PUBLIC_API_URL=${NEXT_PUBLIC_API_URL}
|
||||
COPY ui /home/perplexica/
|
||||
|
||||
RUN yarn install --frozen-lockfile
|
||||
RUN yarn build
|
||||
|
||||
CMD ["yarn", "start"]
|
||||
|
|
@ -1,17 +1,22 @@
|
|||
FROM node:18-slim
|
||||
|
||||
FROM node:18-slim as base
|
||||
WORKDIR /home/perplexica
|
||||
RUN mkdir -p /home/perplexica/data /home/perplexica/uploads
|
||||
|
||||
# Development stage
|
||||
FROM base as development
|
||||
ENV NODE_ENV=development
|
||||
COPY package.json yarn.lock ./
|
||||
RUN yarn install --frozen-lockfile --network-timeout 600000
|
||||
EXPOSE 3001
|
||||
CMD ["yarn", "dev"]
|
||||
|
||||
# Production stage
|
||||
FROM base as production
|
||||
COPY src /home/perplexica/src
|
||||
COPY tsconfig.json /home/perplexica/
|
||||
COPY drizzle.config.ts /home/perplexica/
|
||||
COPY package.json /home/perplexica/
|
||||
COPY yarn.lock /home/perplexica/
|
||||
|
||||
RUN mkdir /home/perplexica/data
|
||||
RUN mkdir /home/perplexica/uploads
|
||||
|
||||
RUN yarn install --frozen-lockfile --network-timeout 600000
|
||||
RUN yarn build
|
||||
|
||||
CMD ["yarn", "start"]
|
||||
|
|
@ -13,9 +13,11 @@ services:
|
|||
build:
|
||||
context: .
|
||||
dockerfile: backend.dockerfile
|
||||
target: development
|
||||
image: itzcrazykns1337/perplexica-backend:main
|
||||
environment:
|
||||
- SEARXNG_API_URL=http://searxng:8080
|
||||
- NODE_ENV=development
|
||||
depends_on:
|
||||
- searxng
|
||||
ports:
|
||||
|
|
@ -24,27 +26,41 @@ services:
|
|||
- backend-dbstore:/home/perplexica/data
|
||||
- uploads:/home/perplexica/uploads
|
||||
- ./config.toml:/home/perplexica/config.toml
|
||||
- ./src:/home/perplexica/src
|
||||
- ./package.json:/home/perplexica/package.json
|
||||
- ./yarn.lock:/home/perplexica/yarn.lock
|
||||
- ./tsconfig.json:/home/perplexica/tsconfig.json
|
||||
- ./drizzle.config.ts:/home/perplexica/drizzle.config.ts
|
||||
extra_hosts:
|
||||
- 'host.docker.internal:host-gateway'
|
||||
networks:
|
||||
- perplexica-network
|
||||
restart: unless-stopped
|
||||
command: yarn dev
|
||||
|
||||
perplexica-frontend:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: app.dockerfile
|
||||
target: development
|
||||
args:
|
||||
- NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api
|
||||
- NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001
|
||||
image: itzcrazykns1337/perplexica-frontend:main
|
||||
depends_on:
|
||||
- perplexica-backend
|
||||
environment:
|
||||
- NODE_ENV=development
|
||||
ports:
|
||||
- 3000:3000
|
||||
volumes:
|
||||
- ./ui:/home/perplexica
|
||||
- /home/perplexica/node_modules
|
||||
- /home/perplexica/.next
|
||||
networks:
|
||||
- perplexica-network
|
||||
restart: unless-stopped
|
||||
command: yarn dev
|
||||
|
||||
networks:
|
||||
perplexica-network:
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
|
|||
metadata: {
|
||||
title: title,
|
||||
url: link,
|
||||
type: 'pdf', // Add this type indicator
|
||||
},
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ import MessageSources from './MessageSources';
|
|||
import SearchImages from './SearchImages';
|
||||
import SearchVideos from './SearchVideos';
|
||||
import { useSpeech } from 'react-text-to-speech';
|
||||
import { SearchPDFs } from './SearchPDFs';
|
||||
|
||||
const MessageBox = ({
|
||||
message,
|
||||
|
|
@ -63,6 +64,7 @@ const MessageBox = ({
|
|||
setParsedMessage(message.content);
|
||||
}, [message.content, message.sources, message.role]);
|
||||
|
||||
|
||||
const { speechStatus, start, stop } = useSpeech({ text: speechMessage });
|
||||
|
||||
return (
|
||||
|
|
@ -119,6 +121,7 @@ const MessageBox = ({
|
|||
>
|
||||
{parsedMessage}
|
||||
</Markdown>
|
||||
|
||||
{loading && isLast ? null : (
|
||||
<div className="flex flex-row items-center justify-between w-full text-black dark:text-white py-4 -mx-2">
|
||||
<div className="flex flex-row items-center space-x-1">
|
||||
|
|
@ -187,6 +190,7 @@ const MessageBox = ({
|
|||
</div>
|
||||
</>
|
||||
)}
|
||||
{/* PDF Documents Section - Moved from sidebar to under the answer */}
|
||||
</div>
|
||||
</div>
|
||||
<div className="lg:sticky lg:top-20 flex flex-col items-center space-y-3 w-full lg:w-3/12 z-30 h-full pb-4">
|
||||
|
|
@ -198,6 +202,10 @@ const MessageBox = ({
|
|||
chatHistory={history.slice(0, messageIndex - 1)}
|
||||
query={history[messageIndex - 1].content}
|
||||
/>
|
||||
<SearchPDFs
|
||||
query={history[messageIndex - 1]?.content}
|
||||
chatHistory={history.slice(0, messageIndex - 1)}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
|
|
|||
|
|
@ -69,11 +69,15 @@ const MessageSources = ({ sources }: { sources: Document[] }) => {
|
|||
<div className="flex flex-row items-center space-x-1">
|
||||
{sources.slice(3, 6).map((source, i) => {
|
||||
return source.metadata.url === 'File' ? (
|
||||
<div className="bg-dark-200 hover:bg-dark-100 transition duration-200 flex items-center justify-center w-6 h-6 rounded-full">
|
||||
<div
|
||||
key={`source-icon-${i}`}
|
||||
className="bg-dark-200 hover:bg-dark-100 transition duration-200 flex items-center justify-center w-6 h-6 rounded-full"
|
||||
>
|
||||
<File size={12} className="text-white/70" />
|
||||
</div>
|
||||
) : (
|
||||
<img
|
||||
key={`source-icon-${i}`}
|
||||
src={`https://s2.googleusercontent.com/s2/favicons?domain_url=${source.metadata.url}`}
|
||||
width={16}
|
||||
height={16}
|
||||
|
|
@ -139,7 +143,7 @@ const MessageSources = ({ sources }: { sources: Document[] }) => {
|
|||
</p>
|
||||
</div>
|
||||
<div className="flex flex-row items-center space-x-1 text-black/50 dark:text-white/50 text-xs">
|
||||
<div className="bg-black/50 dark:bg-white/50 h-[4px] w-[4px] rounded-full" />
|
||||
<div className="bg-black/50 dark:bg.white/50 h-[4px] w-[4px] rounded-full" />
|
||||
<span>{i + 1}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
|||
207
ui/components/SearchPDFs.tsx
Normal file
207
ui/components/SearchPDFs.tsx
Normal file
|
|
@ -0,0 +1,207 @@
|
|||
/* eslint-disable @next/next/no-img-element */
|
||||
'use client';
|
||||
import { File, ExternalLink, PlusIcon, Download } from 'lucide-react';
|
||||
import { useState } from 'react';
|
||||
import { Message } from './ChatWindow';
|
||||
|
||||
type PDF = {
|
||||
title: string;
|
||||
url: string;
|
||||
type?: 'academic' | 'document' | 'article';
|
||||
};
|
||||
|
||||
export const SearchPDFs = ({
|
||||
query,
|
||||
chatHistory,
|
||||
}: {
|
||||
query: string;
|
||||
chatHistory: Message[];
|
||||
}) => {
|
||||
const [pdfs, setPdfs] = useState<PDF[] | null>(null);
|
||||
const [loading, setLoading] = useState(false);
|
||||
|
||||
const getPDFType = (title: string): 'academic' | 'document' | 'article' => {
|
||||
const lowerTitle = title.toLowerCase();
|
||||
if (lowerTitle.includes('paper') || lowerTitle.includes('journal') || lowerTitle.includes('research')) {
|
||||
return 'academic';
|
||||
}
|
||||
if (lowerTitle.includes('article') || lowerTitle.includes('blog')) {
|
||||
return 'article';
|
||||
}
|
||||
return 'document';
|
||||
};
|
||||
|
||||
const getTypeColor = (type: 'academic' | 'document' | 'article') => {
|
||||
switch (type) {
|
||||
case 'academic':
|
||||
return 'bg-blue-500/10 text-blue-500';
|
||||
case 'article':
|
||||
return 'bg-green-500/10 text-green-500';
|
||||
default:
|
||||
return 'bg-red-500/10 text-red-500';
|
||||
}
|
||||
};
|
||||
|
||||
const formatChatHistory = (history: Message[]) => {
|
||||
return history.map(msg => {
|
||||
return [msg.role === 'user' ? 'human' : 'ai', msg.content];
|
||||
});
|
||||
};
|
||||
|
||||
const searchForPDFs = async () => {
|
||||
setLoading(true);
|
||||
|
||||
const chatModelProvider = localStorage.getItem('chatModelProvider');
|
||||
const chatModel = localStorage.getItem('chatModel');
|
||||
|
||||
const customOpenAIBaseURL = localStorage.getItem('openAIBaseURL');
|
||||
const customOpenAIKey = localStorage.getItem('openAIApiKey');
|
||||
|
||||
try {
|
||||
const res = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_API_URL}/search`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
query: `PDF documents about: ${query}`,
|
||||
focusMode: 'webSearch',
|
||||
optimizationMode: 'balanced',
|
||||
history: formatChatHistory(chatHistory),
|
||||
chatModel: {
|
||||
provider: chatModelProvider,
|
||||
model: chatModel,
|
||||
...(chatModelProvider === 'custom_openai' && {
|
||||
customOpenAIKey: customOpenAIKey,
|
||||
customOpenAIBaseURL: customOpenAIBaseURL,
|
||||
}),
|
||||
},
|
||||
}),
|
||||
},
|
||||
);
|
||||
|
||||
const data = await res.json();
|
||||
console.log("Search response:", data);
|
||||
|
||||
// Extract PDF results from the message and sources
|
||||
let pdfResults: PDF[] = [];
|
||||
|
||||
// Check for PDF URLs in sources
|
||||
if (data.sources && Array.isArray(data.sources)) {
|
||||
pdfResults = data.sources
|
||||
.filter((source: any) =>
|
||||
source.metadata?.url?.toLowerCase().endsWith('.pdf') ||
|
||||
source.metadata?.title?.includes('PDF') ||
|
||||
source.metadata?.url?.includes('.pdf')
|
||||
)
|
||||
.map((source: any) => ({
|
||||
title: source.metadata.title || 'PDF Document',
|
||||
url: source.metadata.url,
|
||||
type: getPDFType(source.metadata.title || '')
|
||||
}));
|
||||
}
|
||||
|
||||
setPdfs(pdfResults);
|
||||
} catch (error) {
|
||||
console.error('Error searching for PDFs:', error);
|
||||
setPdfs([]);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<>
|
||||
{!loading && pdfs === null && (
|
||||
<button
|
||||
id="search-pdfs"
|
||||
onClick={searchForPDFs}
|
||||
className="border border-dashed border-light-200 dark:border-dark-200 hover:bg-light-200 dark:hover:bg-dark-200 active:scale-95 duration-200 transition px-4 py-2 flex flex-row items-center justify-between rounded-lg dark:text-white text-sm w-full"
|
||||
>
|
||||
<div className="flex flex-row items-center space-x-2">
|
||||
<File size={17} />
|
||||
<p>Search PDFs</p>
|
||||
</div>
|
||||
<PlusIcon className="text-[#24A0ED]" size={17} />
|
||||
</button>
|
||||
)}
|
||||
|
||||
{loading && (
|
||||
<div className="grid grid-cols-2 gap-2">
|
||||
{[...Array(4)].map((_, i) => (
|
||||
<div
|
||||
key={i}
|
||||
className="bg-light-secondary dark:bg-dark-secondary h-32 w-full rounded-lg animate-pulse aspect-video object-cover"
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{pdfs !== null && pdfs.length > 0 && (
|
||||
<div className="bg-light-secondary dark:bg-dark-secondary w-full rounded-lg p-4 flex flex-col space-y-4">
|
||||
<div className="flex flex-row items-center justify-between">
|
||||
<div className="flex flex-row items-center space-x-2">
|
||||
<File />
|
||||
<h3 className="font-medium text-black dark:text-white">PDF Documents</h3>
|
||||
<span className="text-xs text-black/50 dark:text-white/50">({pdfs.length})</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="flex flex-col space-y-2">
|
||||
{pdfs.map((pdf, i) => (
|
||||
<div
|
||||
key={i}
|
||||
className="bg-light-100 dark:bg-dark-100 rounded-lg p-3 hover:bg-light-200 dark:hover:bg-dark-200 transition duration-200"
|
||||
>
|
||||
<div className="flex flex-row items-center justify-between">
|
||||
<div className="flex flex-row items-center space-x-3 flex-grow min-w-0">
|
||||
<div className={`p-2 rounded-md ${getTypeColor(pdf.type || 'document')}`}>
|
||||
<File size={16} />
|
||||
</div>
|
||||
<div className="flex flex-col space-y-1 flex-grow min-w-0">
|
||||
<p className="text-sm font-medium text-black dark:text-white truncate" title={pdf.title}>
|
||||
{pdf.title}
|
||||
</p>
|
||||
<p className="text-xs text-black/50 dark:text-white/50 truncate">
|
||||
{new URL(pdf.url).hostname}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex flex-row items-center space-x-2 flex-shrink-0 ml-2">
|
||||
<a
|
||||
href={pdf.url}
|
||||
target="_blank"
|
||||
rel="noreferrer"
|
||||
className="p-2 text-black/70 dark:text-white/70 rounded-xl hover:bg-light-secondary dark:hover:bg-dark-secondary transition duration-200 hover:text-black dark:hover:text-white"
|
||||
>
|
||||
<ExternalLink size={16} />
|
||||
</a>
|
||||
<a
|
||||
href={pdf.url}
|
||||
download
|
||||
className="p-2 text-black/70 dark:text-white/70 rounded-xl hover:bg-light-secondary dark:hover:bg-dark-secondary transition duration-200 hover:text-black dark:hover:text-white"
|
||||
>
|
||||
<Download size={16} />
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{pdfs !== null && pdfs.length === 0 && (
|
||||
<div className="bg-light-secondary dark:bg-dark-secondary w-full rounded-lg p-4 flex flex-col space-y-2">
|
||||
<div className="flex flex-row items-center space-x-2">
|
||||
<File />
|
||||
<h3 className="font-medium text-black dark:text-white">PDF Documents</h3>
|
||||
</div>
|
||||
<p className="text-sm text-black/60 dark:text-white/60">No PDF documents found related to your query.</p>
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
};
|
||||
Loading…
Add table
Add a link
Reference in a new issue