diff --git a/.assets/manifest.json b/.assets/manifest.json deleted file mode 100644 index e69de29..0000000 diff --git a/.assets/perplexica-screenshot.png b/.assets/perplexica-screenshot.png index fc7a697..c47a544 100644 Binary files a/.assets/perplexica-screenshot.png and b/.assets/perplexica-screenshot.png differ diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..1c735cc --- /dev/null +++ b/.env.example @@ -0,0 +1,5 @@ +PORT=3001 +OLLAMA_URL=http://localhost:11434 # url of the ollama server +SIMILARITY_MEASURE=cosine # cosine or dot +SEARXNG_API_URL= # no need to fill this if using docker +MODEL_NAME=llama2 \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 1de1177..e065bb4 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -4,6 +4,7 @@ about: Create an issue to help us fix bugs title: '' labels: bug assignees: '' + --- **Describe the bug** @@ -11,7 +12,6 @@ A clear and concise description of what the bug is. **To Reproduce** Steps to reproduce the behavior: - 1. Go to '...' 2. Click on '....' 3. Scroll down to '....' diff --git a/.github/ISSUE_TEMPLATE/custom.md b/.github/ISSUE_TEMPLATE/custom.md index 96a4735..48d5f81 100644 --- a/.github/ISSUE_TEMPLATE/custom.md +++ b/.github/ISSUE_TEMPLATE/custom.md @@ -4,4 +4,7 @@ about: Describe this issue template's purpose here. title: '' labels: '' assignees: '' + --- + + diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 5f0a04c..11fc491 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -4,6 +4,7 @@ about: Suggest an idea for this project title: '' labels: enhancement assignees: '' + --- **Is your feature request related to a problem? Please describe.** diff --git a/.github/workflows/docker-build.yaml b/.github/workflows/docker-build.yaml deleted file mode 100644 index 29f7987..0000000 --- a/.github/workflows/docker-build.yaml +++ /dev/null @@ -1,138 +0,0 @@ -name: Build & Push Docker Images - -on: - push: - branches: - - master - release: - types: [published] - -jobs: - build-amd64: - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - with: - install: true - - - name: Log in to DockerHub - uses: docker/login-action@v2 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - - - name: Extract version from release tag - if: github.event_name == 'release' - id: version - run: echo "RELEASE_VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV - - - name: Build and push AMD64 Docker image - if: github.ref == 'refs/heads/master' && github.event_name == 'push' - run: | - DOCKERFILE=app.dockerfile - IMAGE_NAME=perplexica - docker buildx build --platform linux/amd64 \ - --cache-from=type=registry,ref=itzcrazykns1337/${IMAGE_NAME}:amd64 \ - --cache-to=type=inline \ - --provenance false \ - -f $DOCKERFILE \ - -t itzcrazykns1337/${IMAGE_NAME}:amd64 \ - --push . - - - name: Build and push AMD64 release Docker image - if: github.event_name == 'release' - run: | - DOCKERFILE=app.dockerfile - IMAGE_NAME=perplexica - docker buildx build --platform linux/amd64 \ - --cache-from=type=registry,ref=itzcrazykns1337/${IMAGE_NAME}:${{ env.RELEASE_VERSION }}-amd64 \ - --cache-to=type=inline \ - --provenance false \ - -f $DOCKERFILE \ - -t itzcrazykns1337/${IMAGE_NAME}:${{ env.RELEASE_VERSION }}-amd64 \ - --push . - - build-arm64: - runs-on: ubuntu-24.04-arm - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - with: - install: true - - - name: Log in to DockerHub - uses: docker/login-action@v2 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - - - name: Extract version from release tag - if: github.event_name == 'release' - id: version - run: echo "RELEASE_VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV - - - name: Build and push ARM64 Docker image - if: github.ref == 'refs/heads/master' && github.event_name == 'push' - run: | - DOCKERFILE=app.dockerfile - IMAGE_NAME=perplexica - docker buildx build --platform linux/arm64 \ - --cache-from=type=registry,ref=itzcrazykns1337/${IMAGE_NAME}:arm64 \ - --cache-to=type=inline \ - --provenance false \ - -f $DOCKERFILE \ - -t itzcrazykns1337/${IMAGE_NAME}:arm64 \ - --push . - - - name: Build and push ARM64 release Docker image - if: github.event_name == 'release' - run: | - DOCKERFILE=app.dockerfile - IMAGE_NAME=perplexica - docker buildx build --platform linux/arm64 \ - --cache-from=type=registry,ref=itzcrazykns1337/${IMAGE_NAME}:${{ env.RELEASE_VERSION }}-arm64 \ - --cache-to=type=inline \ - --provenance false \ - -f $DOCKERFILE \ - -t itzcrazykns1337/${IMAGE_NAME}:${{ env.RELEASE_VERSION }}-arm64 \ - --push . - - manifest: - needs: [build-amd64, build-arm64] - runs-on: ubuntu-latest - steps: - - name: Log in to DockerHub - uses: docker/login-action@v2 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - - - name: Extract version from release tag - if: github.event_name == 'release' - id: version - run: echo "RELEASE_VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV - - - name: Create and push multi-arch manifest for main - if: github.ref == 'refs/heads/master' && github.event_name == 'push' - run: | - IMAGE_NAME=perplexica - docker manifest create itzcrazykns1337/${IMAGE_NAME}:main \ - --amend itzcrazykns1337/${IMAGE_NAME}:amd64 \ - --amend itzcrazykns1337/${IMAGE_NAME}:arm64 - docker manifest push itzcrazykns1337/${IMAGE_NAME}:main - - - name: Create and push multi-arch manifest for releases - if: github.event_name == 'release' - run: | - IMAGE_NAME=perplexica - docker manifest create itzcrazykns1337/${IMAGE_NAME}:${{ env.RELEASE_VERSION }} \ - --amend itzcrazykns1337/${IMAGE_NAME}:${{ env.RELEASE_VERSION }}-amd64 \ - --amend itzcrazykns1337/${IMAGE_NAME}:${{ env.RELEASE_VERSION }}-arm64 - docker manifest push itzcrazykns1337/${IMAGE_NAME}:${{ env.RELEASE_VERSION }} diff --git a/.gitignore b/.gitignore index 9fb5e4c..0f857e0 100644 --- a/.gitignore +++ b/.gitignore @@ -4,9 +4,8 @@ npm-debug.log yarn-error.log # Build output -.next/ -out/ -dist/ +/.next/ +/out/ # IDE/Editor specific .vscode/ @@ -20,9 +19,6 @@ dist/ .env.test.local .env.production.local -# Config files -config.toml - # Log files logs/ *.log @@ -32,10 +28,4 @@ logs/ # Miscellaneous .DS_Store -Thumbs.db - -# Db -db.sqlite -/searxng - -certificates \ No newline at end of file +Thumbs.db \ No newline at end of file diff --git a/.prettierignore b/.prettierignore deleted file mode 100644 index 55d3c7c..0000000 --- a/.prettierignore +++ /dev/null @@ -1,41 +0,0 @@ -# Ignore all files in the node_modules directory -node_modules - -# Ignore all files in the .next directory (Next.js build output) -.next - -# Ignore all files in the .out directory (TypeScript build output) -.out - -# Ignore all files in the .cache directory (Prettier cache) -.cache - -# Ignore all files in the .vscode directory (Visual Studio Code settings) -.vscode - -# Ignore all files in the .idea directory (IntelliJ IDEA settings) -.idea - -# Ignore all files in the dist directory (build output) -dist - -# Ignore all files in the build directory (build output) -build - -# Ignore all files in the coverage directory (test coverage reports) -coverage - -# Ignore all files with the .log extension -*.log - -# Ignore all files with the .tmp extension -*.tmp - -# Ignore all files with the .swp extension -*.swp - -# Ignore all files with the .DS_Store extension (macOS specific) -.DS_Store - -# Ignore all files in uploads directory -uploads \ No newline at end of file diff --git a/.prettierrc.js b/.prettierrc.js index 8ca480f..1937ff1 100644 --- a/.prettierrc.js +++ b/.prettierrc.js @@ -6,6 +6,7 @@ const config = { endOfLine: 'auto', singleQuote: true, tabWidth: 2, + semi: true, }; module.exports = config; diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7bbb280..af43ae1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,43 +1,32 @@ # How to Contribute to Perplexica -Thanks for your interest in contributing to Perplexica! Your help makes this project better. This guide explains how to contribute effectively. - -Perplexica is a modern AI chat application with advanced search capabilities. +Hey there, thanks for deciding to contribute to Perplexica. Anything you help with will support the development of Perplexica and will make it better. Let's walk you through the key aspects to ensure your contributions are effective and in harmony with the project's setup. ## Project Structure -Perplexica's codebase is organized as follows: +Perplexica's design consists of two main domains: -- **UI Components and Pages**: - - **Components (`src/components`)**: Reusable UI components. - - **Pages and Routes (`src/app`)**: Next.js app directory structure with page components. - - Main app routes include: home (`/`), chat (`/c`), discover (`/discover`), library (`/library`), and settings (`/settings`). - - **API Routes (`src/app/api`)**: API endpoints implemented with Next.js API routes. - - `/api/chat`: Handles chat interactions. - - `/api/search`: Provides direct access to Perplexica's search capabilities. - - Other endpoints for models, files, and suggestions. -- **Backend Logic (`src/lib`)**: Contains all the backend functionality including search, database, and API logic. - - The search functionality is present inside `src/lib/search` directory. - - All of the focus modes are implemented using the Meta Search Agent class in `src/lib/search/metaSearchAgent.ts`. - - Database functionality is in `src/lib/db`. - - Chat model and embedding model providers are managed in `src/lib/providers`. - - Prompt templates and LLM chain definitions are in `src/lib/prompts` and `src/lib/chains` respectively. +- **Frontend (`ui` directory)**: This is a Next.js application holding all user interface components. It's a self-contained environment that manages everything the user interacts with. +- **Backend (root and `src` directory)**: The backend logic is situated in the `src` folder, but the root directory holds the main `package.json` for backend dependency management. -## API Documentation - -Perplexica exposes several API endpoints for programmatic access, including: - -- **Search API**: Access Perplexica's advanced search capabilities directly via the `/api/search` endpoint. For detailed documentation, see `docs/api/search.md`. +Both the root directory (for backend configurations outside `src`) and the `ui` folder come with an `.env.example` file. These are templates for environment variables that you need to set up manually for the application to run correctly. ## Setting Up Your Environment Before diving into coding, setting up your local environment is key. Here's what you need to do: -1. In the root directory, locate the `sample.config.toml` file. -2. Rename it to `config.toml` and fill in the necessary configuration fields. -3. Run `npm install` to install all dependencies. -4. Run `npm run db:push` to set up the local sqlite database. -5. Use `npm run dev` to start the application in development mode. +### Backend + +1. In the root directory, locate the `.env.example` file. +2. Rename it to `.env` and fill in the necessary environment variables specific to the backend. +3. Run `npm install` to install dependencies. +4. Use `npm run dev` to start the backend in development mode. + +### Frontend + +1. Navigate to the `ui` folder and repeat the process of renaming `.env.example` to `.env`, making sure to provide the frontend-specific variables. +2. Execute `npm install` within the `ui` directory to get the frontend dependencies ready. +3. Launch the frontend development server with `npm run dev`. **Please note**: Docker configurations are present for setting up production environments, whereas `npm run dev` is used for development purposes. diff --git a/README.md b/README.md index 5eb0713..562bd65 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,6 @@ # 🚀 Perplexica - An AI-powered search engine 🔎 -
- Special thanks to: -
-
- - Warp sponsorship - - -### [Warp, the AI Devtool that lives in your terminal](https://www.warp.dev/perplexica) - -[Available for MacOS, Linux, & Windows](https://www.warp.dev/perplexica) - -
- -
- -[![Discord](https://dcbadge.limes.pink/api/server/26aArMy8tT?style=flat)](https://discord.gg/26aArMy8tT) - -![preview](.assets/perplexica-screenshot.png?) +![preview](.assets/perplexica-screenshot.png) ## Table of Contents @@ -28,44 +10,34 @@ - [Installation](#installation) - [Getting Started with Docker (Recommended)](#getting-started-with-docker-recommended) - [Non-Docker Installation](#non-docker-installation) - - [Ollama Connection Errors](#ollama-connection-errors) -- [Using as a Search Engine](#using-as-a-search-engine) -- [Using Perplexica's API](#using-perplexicas-api) -- [Expose Perplexica to a network](#expose-perplexica-to-network) -- [One-Click Deployment](#one-click-deployment) - [Upcoming Features](#upcoming-features) - [Support Us](#support-us) - - [Donations](#donations) - [Contribution](#contribution) -- [Help and Support](#help-and-support) +- [Acknowledgements](#acknowledgements) ## Overview Perplexica is an open-source AI-powered searching tool or an AI-powered search engine that goes deep into the internet to find answers. Inspired by Perplexity AI, it's an open-source option that not just searches the web but understands your questions. It uses advanced machine learning algorithms like similarity searching and embeddings to refine results and provides clear answers with sources cited. -Using SearxNG to stay current and fully open source, Perplexica ensures you always get the most up-to-date information without compromising your privacy. - -Want to know more about its architecture and how it works? You can read it [here](https://github.com/ItzCrazyKns/Perplexica/tree/master/docs/architecture/README.md). - ## Preview ![video-preview](.assets/perplexica-preview.gif) ## Features -- **Local LLMs**: You can make use local LLMs such as Llama3 and Mixtral using Ollama. - **Two Main Modes:** - **Copilot Mode:** (In development) Boosts search by generating different queries to find more relevant internet sources. Like normal search instead of just using the context by SearxNG, it visits the top matches and tries to find relevant sources to the user's query directly from the page. - **Normal Mode:** Processes your query and performs a web search. - **Focus Modes:** Special modes to better answer specific types of questions. Perplexica currently has 6 focus modes: - - **All Mode:** Searches the entire web to find the best results. - - **Writing Assistant Mode:** Helpful for writing tasks that do not require searching the web. - - **Academic Search Mode:** Finds articles and papers, ideal for academic research. - - **YouTube Search Mode:** Finds YouTube videos based on the search query. - - **Wolfram Alpha Search Mode:** Answers queries that need calculations or data analysis using Wolfram Alpha. - - **Reddit Search Mode:** Searches Reddit for discussions and opinions related to the query. -- **Current Information:** Some search tools might give you outdated info because they use data from crawling bots and convert them into embeddings and store them in a index. Unlike them, Perplexica uses SearxNG, a metasearch engine to get the results and rerank and get the most relevant source out of it, ensuring you always get the latest information without the overhead of daily data updates. -- **API**: Integrate Perplexica into your existing applications and make use of its capibilities. + + 1. **All Mode:** Searches the entire web to find the best results. + 2. **Writing Assistant Mode:** Helpful for writing tasks that does not require searching the web. + 3. **Academic Search Mode:** Finds articles and papers, ideal for academic research. + 4. **YouTube Search Mode:** Finds YouTube videos based on the search query. + 5. **Wolfram Alpha Search Mode:** Answers queries that need calculations or data analysis using Wolfram Alpha. + 6. **Reddit Search Mode:** Searches Reddit for discussions and opinions related to the query. + +- **Current Information:** Some search tools might give you outdated info because they use data from crawling bots and convert them into embeddings and store them in a index (its like converting the web into embeddings which is quite expensive.). Unlike them, Perplexica uses SearxNG, a metasearch engine to get the results and rerank and get the most relevent source out of it, ensuring you always get the latest information without the overhead of daily data updates. It has many more features like image and video search. Some of the planned features are mentioned in [upcoming features](#upcoming-features). @@ -79,119 +51,54 @@ There are mainly 2 ways of installing Perplexica - With Docker, Without Docker. 2. Clone the Perplexica repository: ```bash - git clone https://github.com/ItzCrazyKns/Perplexica.git + git clone -b feat/ollama-support https://github.com/ItzCrazyKns/Perplexica.git ``` 3. After cloning, navigate to the directory containing the project files. -4. Rename the `sample.config.toml` file to `config.toml`. For Docker setups, you need only fill in the following fields: +4. Rename the `.env.example` file to `.env`. For Docker setups, you need only fill in the following fields: - - `OPENAI`: Your OpenAI API key. **You only need to fill this if you wish to use OpenAI's models**. - - `OLLAMA`: Your Ollama API URL. You should enter it as `http://host.docker.internal:PORT_NUMBER`. If you installed Ollama on port 11434, use `http://host.docker.internal:11434`. For other ports, adjust accordingly. **You need to fill this if you wish to use Ollama's models instead of OpenAI's**. - - `GROQ`: Your Groq API key. **You only need to fill this if you wish to use Groq's hosted models**. - - `ANTHROPIC`: Your Anthropic API key. **You only need to fill this if you wish to use Anthropic models**. - - `Gemini`: Your Gemini API key. **You only need to fill this if you wish to use Google's models**. - - `DEEPSEEK`: Your Deepseek API key. **Only needed if you want Deepseek models.** - - `AIMLAPI`: Your AI/ML API key. **Only needed if you want to use AI/ML API models and embeddings.** - - **Note**: You can change these after starting Perplexica from the settings dialog. - - - `SIMILARITY_MEASURE`: The similarity measure to use (This is filled by default; you can leave it as is if you are unsure about it.) + - `OLLAMA_URL` (It should be the URL where Ollama is running; it is also filled by default but you need to replace it if your Ollama URL is different.) + - `MODEL_NAME` (This is filled by default; you can change it if you want to use a different model.) + - `SIMILARITY_MEASURE` (This is filled by default; you can leave it as is if you are unsure about it.) 5. Ensure you are in the directory containing the `docker-compose.yaml` file and execute: ```bash - docker compose up -d + docker compose up ``` 6. Wait a few minutes for the setup to complete. You can access Perplexica at http://localhost:3000 in your web browser. -**Note**: After the containers are built, you can start Perplexica directly from Docker without having to open a terminal. +**Note**: Once the terminal is stopped, Perplexica will also stop. To restart it, you will need to open Docker Desktop and run Perplexica again. ### Non-Docker Installation -1. Install SearXNG and allow `JSON` format in the SearXNG settings. -2. Clone the repository and rename the `sample.config.toml` file to `config.toml` in the root directory. Ensure you complete all required fields in this file. -3. After populating the configuration run `npm i`. -4. Install the dependencies and then execute `npm run build`. -5. Finally, start the app by running `npm run start` +For setups without Docker: + +1. Follow the initial steps to clone the repository and rename the `.env.example` file to `.env` in the root directory. You will need to fill in all the fields in this file. +2. Additionally, rename the `.env.example` file to `.env` in the `ui` folder and complete all fields. +3. The non-Docker setup requires manual configuration of both the backend and frontend. **Note**: Using Docker is recommended as it simplifies the setup process, especially for managing environment variables and dependencies. -See the [installation documentation](https://github.com/ItzCrazyKns/Perplexica/tree/master/docs/installation) for more information like updating, etc. - -### Ollama Connection Errors - -If you're encountering an Ollama connection error, it is likely due to the backend being unable to connect to Ollama's API. To fix this issue you can: - -1. **Check your Ollama API URL:** Ensure that the API URL is correctly set in the settings menu. -2. **Update API URL Based on OS:** - - - **Windows:** Use `http://host.docker.internal:11434` - - **Mac:** Use `http://host.docker.internal:11434` - - **Linux:** Use `http://:11434` - - Adjust the port number if you're using a different one. - -3. **Linux Users - Expose Ollama to Network:** - - - Inside `/etc/systemd/system/ollama.service`, you need to add `Environment="OLLAMA_HOST=0.0.0.0:11434"`. (Change the port number if you are using a different one.) Then reload the systemd manager configuration with `systemctl daemon-reload`, and restart Ollama by `systemctl restart ollama`. For more information see [Ollama docs](https://github.com/ollama/ollama/blob/main/docs/faq.md#setting-environment-variables-on-linux) - - - Ensure that the port (default is 11434) is not blocked by your firewall. - -## Using as a Search Engine - -If you wish to use Perplexica as an alternative to traditional search engines like Google or Bing, or if you want to add a shortcut for quick access from your browser's search bar, follow these steps: - -1. Open your browser's settings. -2. Navigate to the 'Search Engines' section. -3. Add a new site search with the following URL: `http://localhost:3000/?q=%s`. Replace `localhost` with your IP address or domain name, and `3000` with the port number if Perplexica is not hosted locally. -4. Click the add button. Now, you can use Perplexica directly from your browser's search bar. - -## Using Perplexica's API - -Perplexica also provides an API for developers looking to integrate its powerful search engine into their own applications. You can run searches, use multiple models and get answers to your queries. - -For more details, check out the full documentation [here](https://github.com/ItzCrazyKns/Perplexica/tree/master/docs/API/SEARCH.md). - -## Expose Perplexica to network - -Perplexica runs on Next.js and handles all API requests. It works right away on the same network and stays accessible even with port forwarding. - -## One-Click Deployment - -[![Deploy to Sealos](https://raw.githubusercontent.com/labring-actions/templates/main/Deploy-on-Sealos.svg)](https://usw.sealos.io/?openapp=system-template%3FtemplateName%3Dperplexica) -[![Deploy to RepoCloud](https://d16t0pc4846x52.cloudfront.net/deploylobe.svg)](https://repocloud.io/details/?app_id=267) -[![Run on ClawCloud](https://raw.githubusercontent.com/ClawCloud/Run-Template/refs/heads/main/Run-on-ClawCloud.svg)](https://template.run.claw.cloud/?referralCode=U11MRQ8U9RM4&openapp=system-fastdeploy%3FtemplateName%3Dperplexica) - ## Upcoming Features -- [x] Add settings page -- [x] Adding support for local LLMs -- [x] History Saving features -- [x] Introducing various Focus Modes -- [x] Adding API support -- [x] Adding Discover - [ ] Finalizing Copilot Mode +- [ ] Adding support for multiple local LLMs and LLM providers such as Anthropic, Google, etc. +- [ ] Adding Discover and History Saving features +- [x] Introducing various Focus Modes ## Support Us -If you find Perplexica useful, consider giving us a star on GitHub. This helps more people discover Perplexica and supports the development of new features. Your support is greatly appreciated. - -### Donations - -We also accept donations to help sustain our project. If you would like to contribute, you can use the following options to donate. Thank you for your support! - -| Ethereum | -| ----------------------------------------------------- | -| Address: `0xB025a84b2F269570Eb8D4b05DEdaA41D8525B6DD` | +If you find Perplexica useful, consider giving us a star on GitHub. This helps more people discover Perplexica and supports the development of new features. Your support is appreciated. ## Contribution Perplexica is built on the idea that AI and large language models should be easy for everyone to use. If you find bugs or have ideas, please share them in via GitHub Issues. For more information on contributing to Perplexica you can read the [CONTRIBUTING.md](CONTRIBUTING.md) file to learn more about Perplexica and how you can contribute to it. -## Help and Support +## Acknowledgements -If you have any questions or feedback, please feel free to reach out to us. You can create an issue on GitHub or join our Discord server. There, you can connect with other users, share your experiences and reviews, and receive more personalized help. [Click here](https://discord.gg/EFwsmQDgAu) to join the Discord server. To discuss matters outside of regular support, feel free to contact me on Discord at `itzcrazykns`. +Inspired by Perplexity AI, Perplexica aims to provide a similar service but always up-to-date and fully open source, thanks to SearxNG. -Thank you for exploring Perplexica, the AI-powered search engine designed to enhance your search experience. We are constantly working to improve Perplexica and expand its capabilities. We value your feedback and contributions which help us make Perplexica even better. Don't forget to check back for updates and new features! +If you have any queries you can reach me via my Discord - `itzcrazykns`. Thanks for checking out Perplexica. diff --git a/app.dockerfile b/app.dockerfile index c3c0fd0..105cf86 100644 --- a/app.dockerfile +++ b/app.dockerfile @@ -1,35 +1,15 @@ -FROM node:20.18.0-slim AS builder +FROM node:alpine + +ARG NEXT_PUBLIC_WS_URL +ARG NEXT_PUBLIC_API_URL +ENV NEXT_PUBLIC_WS_URL=${NEXT_PUBLIC_WS_URL} +ENV NEXT_PUBLIC_API_URL=${NEXT_PUBLIC_API_URL} WORKDIR /home/perplexica -COPY package.json yarn.lock ./ -RUN yarn install --frozen-lockfile --network-timeout 600000 +COPY ui /home/perplexica/ -COPY tsconfig.json next.config.mjs next-env.d.ts postcss.config.js drizzle.config.ts tailwind.config.ts ./ -COPY src ./src -COPY public ./public - -RUN mkdir -p /home/perplexica/data +RUN yarn install RUN yarn build -RUN yarn add --dev @vercel/ncc -RUN yarn ncc build ./src/lib/db/migrate.ts -o migrator - -FROM node:20.18.0-slim - -WORKDIR /home/perplexica - -COPY --from=builder /home/perplexica/public ./public -COPY --from=builder /home/perplexica/.next/static ./public/_next/static - -COPY --from=builder /home/perplexica/.next/standalone ./ -COPY --from=builder /home/perplexica/data ./data -COPY drizzle ./drizzle -COPY --from=builder /home/perplexica/migrator/build ./build -COPY --from=builder /home/perplexica/migrator/index.js ./migrate.js - -RUN mkdir /home/perplexica/uploads - -COPY entrypoint.sh ./entrypoint.sh -RUN chmod +x ./entrypoint.sh -CMD ["./entrypoint.sh"] \ No newline at end of file +CMD ["yarn", "start"] \ No newline at end of file diff --git a/backend.dockerfile b/backend.dockerfile new file mode 100644 index 0000000..6cbd192 --- /dev/null +++ b/backend.dockerfile @@ -0,0 +1,17 @@ +FROM node:alpine + +ARG SEARXNG_API_URL +ENV SEARXNG_API_URL=${SEARXNG_API_URL} + +WORKDIR /home/perplexica + +COPY src /home/perplexica/src +COPY tsconfig.json /home/perplexica/ +COPY .env /home/perplexica/ +COPY package.json /home/perplexica/ +COPY yarn.lock /home/perplexica/ + +RUN yarn install +RUN yarn build + +CMD ["yarn", "start"] \ No newline at end of file diff --git a/data/.gitignore b/data/.gitignore deleted file mode 100644 index d6b7ef3..0000000 --- a/data/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!.gitignore diff --git a/docker-compose.yaml b/docker-compose.yaml index b32e0a9..3a73660 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,35 +1,44 @@ services: searxng: - image: docker.io/searxng/searxng:latest - volumes: - - ./searxng:/etc/searxng:rw + build: + context: . + dockerfile: searxng.dockerfile + expose: + - 4000 ports: - 4000:8080 networks: - perplexica-network - restart: unless-stopped + perplexica-backend: + build: + context: . + dockerfile: backend.dockerfile + args: + - SEARXNG_API_URL=http://searxng:8080 + depends_on: + - searxng + expose: + - 3001 + ports: + - 3001:3001 + networks: + - perplexica-network - app: - image: itzcrazykns1337/perplexica:main + perplexica-frontend: build: context: . dockerfile: app.dockerfile - environment: - - SEARXNG_API_URL=http://searxng:8080 - - DATA_DIR=/home/perplexica + args: + - NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api + - NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001 + depends_on: + - perplexica-backend + expose: + - 3000 ports: - 3000:3000 networks: - perplexica-network - volumes: - - backend-dbstore:/home/perplexica/data - - uploads:/home/perplexica/uploads - - ./config.toml:/home/perplexica/config.toml - restart: unless-stopped networks: perplexica-network: - -volumes: - backend-dbstore: - uploads: diff --git a/docs/API/SEARCH.md b/docs/API/SEARCH.md deleted file mode 100644 index b67b62b..0000000 --- a/docs/API/SEARCH.md +++ /dev/null @@ -1,145 +0,0 @@ -# Perplexica Search API Documentation - -## Overview - -Perplexica’s Search API makes it easy to use our AI-powered search engine. You can run different types of searches, pick the models you want to use, and get the most recent info. Follow the following headings to learn more about Perplexica's search API. - -## Endpoint - -### **POST** `http://localhost:3000/api/search` - -**Note**: Replace `3000` with any other port if you've changed the default PORT - -### Request - -The API accepts a JSON object in the request body, where you define the focus mode, chat models, embedding models, and your query. - -#### Request Body Structure - -```json -{ - "chatModel": { - "provider": "openai", - "name": "gpt-4o-mini" - }, - "embeddingModel": { - "provider": "openai", - "name": "text-embedding-3-large" - }, - "optimizationMode": "speed", - "focusMode": "webSearch", - "query": "What is Perplexica", - "history": [ - ["human", "Hi, how are you?"], - ["assistant", "I am doing well, how can I help you today?"] - ], - "systemInstructions": "Focus on providing technical details about Perplexica's architecture.", - "stream": false -} -``` - -### Request Parameters - -- **`chatModel`** (object, optional): Defines the chat model to be used for the query. For model details you can send a GET request at `http://localhost:3000/api/models`. Make sure to use the key value (For example "gpt-4o-mini" instead of the display name "GPT 4 omni mini"). - - - `provider`: Specifies the provider for the chat model (e.g., `openai`, `ollama`). - - `name`: The specific model from the chosen provider (e.g., `gpt-4o-mini`). - - Optional fields for custom OpenAI configuration: - - `customOpenAIBaseURL`: If you’re using a custom OpenAI instance, provide the base URL. - - `customOpenAIKey`: The API key for a custom OpenAI instance. - -- **`embeddingModel`** (object, optional): Defines the embedding model for similarity-based searching. For model details you can send a GET request at `http://localhost:3000/api/models`. Make sure to use the key value (For example "text-embedding-3-large" instead of the display name "Text Embedding 3 Large"). - - - `provider`: The provider for the embedding model (e.g., `openai`). - - `name`: The specific embedding model (e.g., `text-embedding-3-large`). - -- **`focusMode`** (string, required): Specifies which focus mode to use. Available modes: - - - `webSearch`, `academicSearch`, `writingAssistant`, `wolframAlphaSearch`, `youtubeSearch`, `redditSearch`. - -- **`optimizationMode`** (string, optional): Specifies the optimization mode to control the balance between performance and quality. Available modes: - - - `speed`: Prioritize speed and return the fastest answer. - - `balanced`: Provide a balanced answer with good speed and reasonable quality. - -- **`query`** (string, required): The search query or question. - -- **`systemInstructions`** (string, optional): Custom instructions provided by the user to guide the AI's response. These instructions are treated as user preferences and have lower priority than the system's core instructions. For example, you can specify a particular writing style, format, or focus area. - -- **`history`** (array, optional): An array of message pairs representing the conversation history. Each pair consists of a role (either 'human' or 'assistant') and the message content. This allows the system to use the context of the conversation to refine results. Example: - - ```json - [ - ["human", "What is Perplexica?"], - ["assistant", "Perplexica is an AI-powered search engine..."] - ] - ``` - -- **`stream`** (boolean, optional): When set to `true`, enables streaming responses. Default is `false`. - -### Response - -The response from the API includes both the final message and the sources used to generate that message. - -#### Standard Response (stream: false) - -```json -{ - "message": "Perplexica is an innovative, open-source AI-powered search engine designed to enhance the way users search for information online. Here are some key features and characteristics of Perplexica:\n\n- **AI-Powered Technology**: It utilizes advanced machine learning algorithms to not only retrieve information but also to understand the context and intent behind user queries, providing more relevant results [1][5].\n\n- **Open-Source**: Being open-source, Perplexica offers flexibility and transparency, allowing users to explore its functionalities without the constraints of proprietary software [3][10].", - "sources": [ - { - "pageContent": "Perplexica is an innovative, open-source AI-powered search engine designed to enhance the way users search for information online.", - "metadata": { - "title": "What is Perplexica, and how does it function as an AI-powered search ...", - "url": "https://askai.glarity.app/search/What-is-Perplexica--and-how-does-it-function-as-an-AI-powered-search-engine" - } - }, - { - "pageContent": "Perplexica is an open-source AI-powered search tool that dives deep into the internet to find precise answers.", - "metadata": { - "title": "Sahar Mor's Post", - "url": "https://www.linkedin.com/posts/sahar-mor_a-new-open-source-project-called-perplexica-activity-7204489745668694016-ncja" - } - } - .... - ] -} -``` - -#### Streaming Response (stream: true) - -When streaming is enabled, the API returns a stream of newline-delimited JSON objects. Each line contains a complete, valid JSON object. The response has Content-Type: application/json. - -Example of streamed response objects: - -``` -{"type":"init","data":"Stream connected"} -{"type":"sources","data":[{"pageContent":"...","metadata":{"title":"...","url":"..."}},...]} -{"type":"response","data":"Perplexica is an "} -{"type":"response","data":"innovative, open-source "} -{"type":"response","data":"AI-powered search engine..."} -{"type":"done"} -``` - -Clients should process each line as a separate JSON object. The different message types include: - -- **`init`**: Initial connection message -- **`sources`**: All sources used for the response -- **`response`**: Chunks of the generated answer text -- **`done`**: Indicates the stream is complete - -### Fields in the Response - -- **`message`** (string): The search result, generated based on the query and focus mode. -- **`sources`** (array): A list of sources that were used to generate the search result. Each source includes: - - `pageContent`: A snippet of the relevant content from the source. - - `metadata`: Metadata about the source, including: - - `title`: The title of the webpage. - - `url`: The URL of the webpage. - -### Error Handling - -If an error occurs during the search process, the API will return an appropriate error message with an HTTP status code. - -- **400**: If the request is malformed or missing required fields (e.g., no focus mode or query). -- **500**: If an internal server error occurs during the search. diff --git a/docs/architecture/README.md b/docs/architecture/README.md deleted file mode 100644 index 5732471..0000000 --- a/docs/architecture/README.md +++ /dev/null @@ -1,11 +0,0 @@ -# Perplexica's Architecture - -Perplexica's architecture consists of the following key components: - -1. **User Interface**: A web-based interface that allows users to interact with Perplexica for searching images, videos, and much more. -2. **Agent/Chains**: These components predict Perplexica's next actions, understand user queries, and decide whether a web search is necessary. -3. **SearXNG**: A metadata search engine used by Perplexica to search the web for sources. -4. **LLMs (Large Language Models)**: Utilized by agents and chains for tasks like understanding content, writing responses, and citing sources. Examples include Claude, GPTs, etc. -5. **Embedding Models**: To improve the accuracy of search results, embedding models re-rank the results using similarity search algorithms such as cosine similarity and dot product distance. - -For a more detailed explanation of how these components work together, see [WORKING.md](https://github.com/ItzCrazyKns/Perplexica/tree/master/docs/architecture/WORKING.md). diff --git a/docs/architecture/WORKING.md b/docs/architecture/WORKING.md deleted file mode 100644 index 6bad4f9..0000000 --- a/docs/architecture/WORKING.md +++ /dev/null @@ -1,19 +0,0 @@ -# How does Perplexica work? - -Curious about how Perplexica works? Don't worry, we'll cover it here. Before we begin, make sure you've read about the architecture of Perplexica to ensure you understand what it's made up of. Haven't read it? You can read it [here](https://github.com/ItzCrazyKns/Perplexica/tree/master/docs/architecture/README.md). - -We'll understand how Perplexica works by taking an example of a scenario where a user asks: "How does an A.C. work?". We'll break down the process into steps to make it easier to understand. The steps are as follows: - -1. The message is sent to the `/api/chat` route where it invokes the chain. The chain will depend on your focus mode. For this example, let's assume we use the "webSearch" focus mode. -2. The chain is now invoked; first, the message is passed to another chain where it first predicts (using the chat history and the question) whether there is a need for sources and searching the web. If there is, it will generate a query (in accordance with the chat history) for searching the web that we'll take up later. If not, the chain will end there, and then the answer generator chain, also known as the response generator, will be started. -3. The query returned by the first chain is passed to SearXNG to search the web for information. -4. After the information is retrieved, it is based on keyword-based search. We then convert the information into embeddings and the query as well, then we perform a similarity search to find the most relevant sources to answer the query. -5. After all this is done, the sources are passed to the response generator. This chain takes all the chat history, the query, and the sources. It generates a response that is streamed to the UI. - -## How are the answers cited? - -The LLMs are prompted to do so. We've prompted them so well that they cite the answers themselves, and using some UI magic, we display it to the user. - -## Image and Video Search - -Image and video searches are conducted in a similar manner. A query is always generated first, then we search the web for images and videos that match the query. These results are then returned to the user. diff --git a/docs/installation/UPDATING.md b/docs/installation/UPDATING.md deleted file mode 100644 index 66edf5c..0000000 --- a/docs/installation/UPDATING.md +++ /dev/null @@ -1,46 +0,0 @@ -# Update Perplexica to the latest version - -To update Perplexica to the latest version, follow these steps: - -## For Docker users - -1. Clone the latest version of Perplexica from GitHub: - - ```bash - git clone https://github.com/ItzCrazyKns/Perplexica.git - ``` - -2. Navigate to the project directory. - -3. Check for changes in the configuration files. If the `sample.config.toml` file contains new fields, delete your existing `config.toml` file, rename `sample.config.toml` to `config.toml`, and update the configuration accordingly. - -4. Pull the latest images from the registry. - - ```bash - docker compose pull - ``` - -5. Update and recreate the containers. - - ```bash - docker compose up -d - ``` - -6. Once the command completes, go to http://localhost:3000 and verify the latest changes. - -## For non-Docker users - -1. Clone the latest version of Perplexica from GitHub: - - ```bash - git clone https://github.com/ItzCrazyKns/Perplexica.git - ``` - -2. Navigate to the project directory. - -3. Check for changes in the configuration files. If the `sample.config.toml` file contains new fields, delete your existing `config.toml` file, rename `sample.config.toml` to `config.toml`, and update the configuration accordingly. -4. After populating the configuration run `npm i`. -5. Install the dependencies and then execute `npm run build`. -6. Finally, start the app by running `npm run start` - ---- diff --git a/drizzle.config.ts b/drizzle.config.ts deleted file mode 100644 index a029112..0000000 --- a/drizzle.config.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { defineConfig } from 'drizzle-kit'; -import path from 'path'; - -export default defineConfig({ - dialect: 'sqlite', - schema: './src/lib/db/schema.ts', - out: './drizzle', - dbCredentials: { - url: path.join(process.cwd(), 'data', 'db.sqlite'), - }, -}); diff --git a/drizzle/0000_fuzzy_randall.sql b/drizzle/0000_fuzzy_randall.sql deleted file mode 100644 index 0a2ff07..0000000 --- a/drizzle/0000_fuzzy_randall.sql +++ /dev/null @@ -1,16 +0,0 @@ -CREATE TABLE IF NOT EXISTS `chats` ( - `id` text PRIMARY KEY NOT NULL, - `title` text NOT NULL, - `createdAt` text NOT NULL, - `focusMode` text NOT NULL, - `files` text DEFAULT '[]' -); ---> statement-breakpoint -CREATE TABLE IF NOT EXISTS `messages` ( - `id` integer PRIMARY KEY NOT NULL, - `content` text NOT NULL, - `chatId` text NOT NULL, - `messageId` text NOT NULL, - `type` text, - `metadata` text -); diff --git a/drizzle/meta/0000_snapshot.json b/drizzle/meta/0000_snapshot.json deleted file mode 100644 index 850bcd3..0000000 --- a/drizzle/meta/0000_snapshot.json +++ /dev/null @@ -1,116 +0,0 @@ -{ - "version": "6", - "dialect": "sqlite", - "id": "ef3a044b-0f34-40b5-babb-2bb3a909ba27", - "prevId": "00000000-0000-0000-0000-000000000000", - "tables": { - "chats": { - "name": "chats", - "columns": { - "id": { - "name": "id", - "type": "text", - "primaryKey": true, - "notNull": true, - "autoincrement": false - }, - "title": { - "name": "title", - "type": "text", - "primaryKey": false, - "notNull": true, - "autoincrement": false - }, - "createdAt": { - "name": "createdAt", - "type": "text", - "primaryKey": false, - "notNull": true, - "autoincrement": false - }, - "focusMode": { - "name": "focusMode", - "type": "text", - "primaryKey": false, - "notNull": true, - "autoincrement": false - }, - "files": { - "name": "files", - "type": "text", - "primaryKey": false, - "notNull": false, - "autoincrement": false, - "default": "'[]'" - } - }, - "indexes": {}, - "foreignKeys": {}, - "compositePrimaryKeys": {}, - "uniqueConstraints": {}, - "checkConstraints": {} - }, - "messages": { - "name": "messages", - "columns": { - "id": { - "name": "id", - "type": "integer", - "primaryKey": true, - "notNull": true, - "autoincrement": false - }, - "content": { - "name": "content", - "type": "text", - "primaryKey": false, - "notNull": true, - "autoincrement": false - }, - "chatId": { - "name": "chatId", - "type": "text", - "primaryKey": false, - "notNull": true, - "autoincrement": false - }, - "messageId": { - "name": "messageId", - "type": "text", - "primaryKey": false, - "notNull": true, - "autoincrement": false - }, - "type": { - "name": "type", - "type": "text", - "primaryKey": false, - "notNull": false, - "autoincrement": false - }, - "metadata": { - "name": "metadata", - "type": "text", - "primaryKey": false, - "notNull": false, - "autoincrement": false - } - }, - "indexes": {}, - "foreignKeys": {}, - "compositePrimaryKeys": {}, - "uniqueConstraints": {}, - "checkConstraints": {} - } - }, - "views": {}, - "enums": {}, - "_meta": { - "schemas": {}, - "tables": {}, - "columns": {} - }, - "internal": { - "indexes": {} - } -} diff --git a/drizzle/meta/_journal.json b/drizzle/meta/_journal.json deleted file mode 100644 index 5db59d1..0000000 --- a/drizzle/meta/_journal.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "version": "7", - "dialect": "sqlite", - "entries": [ - { - "idx": 0, - "version": "6", - "when": 1748405503809, - "tag": "0000_fuzzy_randall", - "breakpoints": true - } - ] -} diff --git a/entrypoint.sh b/entrypoint.sh deleted file mode 100644 index 9f9448a..0000000 --- a/entrypoint.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -set -e - -node migrate.js - -exec node server.js \ No newline at end of file diff --git a/next-env.d.ts b/next-env.d.ts deleted file mode 100644 index 1b3be08..0000000 --- a/next-env.d.ts +++ /dev/null @@ -1,5 +0,0 @@ -/// -/// - -// NOTE: This file should not be edited -// see https://nextjs.org/docs/app/api-reference/config/typescript for more information. diff --git a/package.json b/package.json index 5715c2a..c2f1aba 100644 --- a/package.json +++ b/package.json @@ -1,70 +1,33 @@ { - "name": "perplexica-frontend", - "version": "1.11.0-rc2", + "name": "perplexica-backend", + "version": "1.0.0", "license": "MIT", "author": "ItzCrazyKns", "scripts": { - "dev": "next dev", - "build": "npm run db:push && next build", - "start": "next start", - "lint": "next lint", - "format:write": "prettier . --write", - "db:push": "drizzle-kit push" - }, - "dependencies": { - "@headlessui/react": "^2.2.0", - "@iarna/toml": "^2.2.5", - "@icons-pack/react-simple-icons": "^12.3.0", - "@langchain/anthropic": "^0.3.24", - "@langchain/community": "^0.3.49", - "@langchain/core": "^0.3.66", - "@langchain/google-genai": "^0.2.15", - "@langchain/groq": "^0.2.3", - "@langchain/ollama": "^0.2.3", - "@langchain/openai": "^0.6.2", - "@langchain/textsplitters": "^0.1.0", - "@tailwindcss/typography": "^0.5.12", - "@xenova/transformers": "^2.17.2", - "axios": "^1.8.3", - "better-sqlite3": "^11.9.1", - "clsx": "^2.1.0", - "compute-cosine-similarity": "^1.1.0", - "compute-dot": "^1.1.0", - "drizzle-orm": "^0.40.1", - "html-to-text": "^9.0.5", - "jspdf": "^3.0.1", - "langchain": "^0.3.30", - "lucide-react": "^0.363.0", - "mammoth": "^1.9.1", - "markdown-to-jsx": "^7.7.2", - "next": "^15.2.2", - "next-themes": "^0.3.0", - "pdf-parse": "^1.1.1", - "react": "^18", - "react-dom": "^18", - "react-text-to-speech": "^0.14.5", - "react-textarea-autosize": "^8.5.3", - "sonner": "^1.4.41", - "tailwind-merge": "^2.2.2", - "winston": "^3.17.0", - "yet-another-react-lightbox": "^3.17.2", - "zod": "^3.22.4" + "start": "node --env-file=.env dist/app.js", + "build": "tsc", + "dev": "nodemon -r dotenv/config src/app.ts", + "format": "prettier . --check", + "format:write": "prettier . --write" }, "devDependencies": { - "@types/better-sqlite3": "^7.6.12", - "@types/html-to-text": "^9.0.4", - "@types/jspdf": "^2.0.0", - "@types/node": "^20", - "@types/pdf-parse": "^1.1.4", - "@types/react": "^18", - "@types/react-dom": "^18", - "autoprefixer": "^10.0.1", - "drizzle-kit": "^0.30.5", - "eslint": "^8", - "eslint-config-next": "14.1.4", - "postcss": "^8", + "@types/cors": "^2.8.17", + "@types/express": "^4.17.21", + "@types/readable-stream": "^4.0.11", "prettier": "^3.2.5", - "tailwindcss": "^3.3.0", - "typescript": "^5" + "ts-node": "^10.9.2", + "typescript": "^5.4.3" + }, + "dependencies": { + "@langchain/openai": "^0.0.25", + "axios": "^1.6.8", + "compute-cosine-similarity": "^1.1.0", + "compute-dot": "^1.1.0", + "cors": "^2.8.5", + "dotenv": "^16.4.5", + "express": "^4.19.2", + "langchain": "^0.1.30", + "ws": "^8.16.0", + "zod": "^3.22.4" } } diff --git a/public/icon-100.png b/public/icon-100.png deleted file mode 100644 index 98fa242..0000000 Binary files a/public/icon-100.png and /dev/null differ diff --git a/public/icon-50.png b/public/icon-50.png deleted file mode 100644 index 9bb7a0e..0000000 Binary files a/public/icon-50.png and /dev/null differ diff --git a/public/icon.png b/public/icon.png deleted file mode 100644 index f6fe3c7..0000000 Binary files a/public/icon.png and /dev/null differ diff --git a/public/screenshots/p1.png b/public/screenshots/p1.png deleted file mode 100644 index 02f01e5..0000000 Binary files a/public/screenshots/p1.png and /dev/null differ diff --git a/public/screenshots/p1_small.png b/public/screenshots/p1_small.png deleted file mode 100644 index 13d9a42..0000000 Binary files a/public/screenshots/p1_small.png and /dev/null differ diff --git a/public/screenshots/p2.png b/public/screenshots/p2.png deleted file mode 100644 index 1171675..0000000 Binary files a/public/screenshots/p2.png and /dev/null differ diff --git a/public/screenshots/p2_small.png b/public/screenshots/p2_small.png deleted file mode 100644 index bd8d673..0000000 Binary files a/public/screenshots/p2_small.png and /dev/null differ diff --git a/public/weather-ico/clear-day.svg b/public/weather-ico/clear-day.svg deleted file mode 100644 index d97d28b..0000000 --- a/public/weather-ico/clear-day.svg +++ /dev/null @@ -1,131 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/public/weather-ico/clear-night.svg b/public/weather-ico/clear-night.svg deleted file mode 100644 index 005ac63..0000000 --- a/public/weather-ico/clear-night.svg +++ /dev/null @@ -1,159 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/public/weather-ico/cloudy-1-day.svg b/public/weather-ico/cloudy-1-day.svg deleted file mode 100644 index 823fea1..0000000 --- a/public/weather-ico/cloudy-1-day.svg +++ /dev/null @@ -1,178 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/public/weather-ico/cloudy-1-night.svg b/public/weather-ico/cloudy-1-night.svg deleted file mode 100644 index 3fe1541..0000000 --- a/public/weather-ico/cloudy-1-night.svg +++ /dev/null @@ -1,206 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/public/weather-ico/fog-day.svg b/public/weather-ico/fog-day.svg deleted file mode 100644 index ed834cf..0000000 --- a/public/weather-ico/fog-day.svg +++ /dev/null @@ -1,244 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - F - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/public/weather-ico/fog-night.svg b/public/weather-ico/fog-night.svg deleted file mode 100644 index d59f98f..0000000 --- a/public/weather-ico/fog-night.svg +++ /dev/null @@ -1,309 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/public/weather-ico/frost-day.svg b/public/weather-ico/frost-day.svg deleted file mode 100644 index 16d591c..0000000 --- a/public/weather-ico/frost-day.svg +++ /dev/null @@ -1,204 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - F - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/public/weather-ico/frost-night.svg b/public/weather-ico/frost-night.svg deleted file mode 100644 index ff2c8dc..0000000 --- a/public/weather-ico/frost-night.svg +++ /dev/null @@ -1,269 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/public/weather-ico/rain-and-sleet-mix.svg b/public/weather-ico/rain-and-sleet-mix.svg deleted file mode 100644 index 172010d..0000000 --- a/public/weather-ico/rain-and-sleet-mix.svg +++ /dev/null @@ -1,141 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/public/weather-ico/rainy-1-day.svg b/public/weather-ico/rainy-1-day.svg deleted file mode 100644 index 2faf06e..0000000 --- a/public/weather-ico/rainy-1-day.svg +++ /dev/null @@ -1,179 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/public/weather-ico/rainy-1-night.svg b/public/weather-ico/rainy-1-night.svg deleted file mode 100644 index ee8ffd8..0000000 --- a/public/weather-ico/rainy-1-night.svg +++ /dev/null @@ -1,243 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/public/weather-ico/rainy-2-day.svg b/public/weather-ico/rainy-2-day.svg deleted file mode 100644 index affdfff..0000000 --- a/public/weather-ico/rainy-2-day.svg +++ /dev/null @@ -1,204 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/public/weather-ico/rainy-2-night.svg b/public/weather-ico/rainy-2-night.svg deleted file mode 100644 index 9c3ae20..0000000 --- a/public/weather-ico/rainy-2-night.svg +++ /dev/null @@ -1,256 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/public/weather-ico/rainy-3-day.svg b/public/weather-ico/rainy-3-day.svg deleted file mode 100644 index b0b5754..0000000 --- a/public/weather-ico/rainy-3-day.svg +++ /dev/null @@ -1,206 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/public/weather-ico/rainy-3-night.svg b/public/weather-ico/rainy-3-night.svg deleted file mode 100644 index 4078e7d..0000000 --- a/public/weather-ico/rainy-3-night.svg +++ /dev/null @@ -1,270 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/public/weather-ico/scattered-thunderstorms-day.svg b/public/weather-ico/scattered-thunderstorms-day.svg deleted file mode 100644 index 0cfbccc..0000000 --- a/public/weather-ico/scattered-thunderstorms-day.svg +++ /dev/null @@ -1,374 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/public/weather-ico/scattered-thunderstorms-night.svg b/public/weather-ico/scattered-thunderstorms-night.svg deleted file mode 100644 index 72cf7a6..0000000 --- a/public/weather-ico/scattered-thunderstorms-night.svg +++ /dev/null @@ -1,283 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/public/weather-ico/severe-thunderstorm.svg b/public/weather-ico/severe-thunderstorm.svg deleted file mode 100644 index 223198b..0000000 --- a/public/weather-ico/severe-thunderstorm.svg +++ /dev/null @@ -1,307 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/public/weather-ico/snowy-1-day.svg b/public/weather-ico/snowy-1-day.svg deleted file mode 100644 index fb73943..0000000 --- a/public/weather-ico/snowy-1-day.svg +++ /dev/null @@ -1,241 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/public/weather-ico/snowy-1-night.svg b/public/weather-ico/snowy-1-night.svg deleted file mode 100644 index 039ea2e..0000000 --- a/public/weather-ico/snowy-1-night.svg +++ /dev/null @@ -1,269 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/public/weather-ico/snowy-2-day.svg b/public/weather-ico/snowy-2-day.svg deleted file mode 100644 index 323a616..0000000 --- a/public/weather-ico/snowy-2-day.svg +++ /dev/null @@ -1,273 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/public/weather-ico/snowy-2-night.svg b/public/weather-ico/snowy-2-night.svg deleted file mode 100644 index 10dcbfa..0000000 --- a/public/weather-ico/snowy-2-night.svg +++ /dev/null @@ -1,301 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/public/weather-ico/snowy-3-day.svg b/public/weather-ico/snowy-3-day.svg deleted file mode 100644 index 846c17a..0000000 --- a/public/weather-ico/snowy-3-day.svg +++ /dev/null @@ -1,334 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/public/weather-ico/snowy-3-night.svg b/public/weather-ico/snowy-3-night.svg deleted file mode 100644 index b3c8c24..0000000 --- a/public/weather-ico/snowy-3-night.svg +++ /dev/null @@ -1,361 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/sample.config.toml b/sample.config.toml deleted file mode 100644 index ba3e98e..0000000 --- a/sample.config.toml +++ /dev/null @@ -1,35 +0,0 @@ -[GENERAL] -SIMILARITY_MEASURE = "cosine" # "cosine" or "dot" -KEEP_ALIVE = "5m" # How long to keep Ollama models loaded into memory. (Instead of using -1 use "-1m") - -[MODELS.OPENAI] -API_KEY = "" - -[MODELS.GROQ] -API_KEY = "" - -[MODELS.ANTHROPIC] -API_KEY = "" - -[MODELS.GEMINI] -API_KEY = "" - -[MODELS.CUSTOM_OPENAI] -API_KEY = "" -API_URL = "" -MODEL_NAME = "" - -[MODELS.OLLAMA] -API_URL = "" # Ollama API URL - http://host.docker.internal:11434 - -[MODELS.DEEPSEEK] -API_KEY = "" - -[MODELS.AIMLAPI] -API_KEY = "" # Required to use AI/ML API chat and embedding models - -[MODELS.LM_STUDIO] -API_URL = "" # LM Studio API URL - http://host.docker.internal:1234 - -[API_ENDPOINTS] -SEARXNG = "" # SearxNG API URL - http://localhost:32768 diff --git a/searxng-settings.yml b/searxng-settings.yml new file mode 100644 index 0000000..d4a8bad --- /dev/null +++ b/searxng-settings.yml @@ -0,0 +1,2380 @@ +general: + # Debug mode, only for development. Is overwritten by ${SEARXNG_DEBUG} + debug: false + # displayed name + instance_name: 'searxng' + # For example: https://example.com/privacy + privacypolicy_url: false + # use true to use your own donation page written in searx/info/en/donate.md + # use false to disable the donation link + donation_url: false + # mailto:contact@example.com + contact_url: false + # record stats + enable_metrics: true + +brand: + new_issue_url: https://github.com/searxng/searxng/issues/new + docs_url: https://docs.searxng.org/ + public_instances: https://searx.space + wiki_url: https://github.com/searxng/searxng/wiki + issue_url: https://github.com/searxng/searxng/issues + # custom: + # maintainer: "Jon Doe" + # # Custom entries in the footer: [title]: [link] + # links: + # Uptime: https://uptime.searxng.org/history/darmarit-org + # About: "https://searxng.org" + +search: + # Filter results. 0: None, 1: Moderate, 2: Strict + safe_search: 0 + # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "yandex", "mwmbl", + # "seznam", "startpage", "stract", "swisscows", "qwant", "wikipedia" - leave blank to turn it off + # by default. + autocomplete: 'google' + # minimun characters to type before autocompleter starts + autocomplete_min: 4 + # Default search language - leave blank to detect from browser information or + # use codes from 'languages.py' + default_lang: 'auto' + # max_page: 0 # if engine supports paging, 0 means unlimited numbers of pages + # Available languages + # languages: + # - all + # - en + # - en-US + # - de + # - it-IT + # - fr + # - fr-BE + # ban time in seconds after engine errors + ban_time_on_fail: 5 + # max ban time in seconds after engine errors + max_ban_time_on_fail: 120 + suspended_times: + # Engine suspension time after error (in seconds; set to 0 to disable) + # For error "Access denied" and "HTTP error [402, 403]" + SearxEngineAccessDenied: 86400 + # For error "CAPTCHA" + SearxEngineCaptcha: 86400 + # For error "Too many request" and "HTTP error 429" + SearxEngineTooManyRequests: 3600 + # Cloudflare CAPTCHA + cf_SearxEngineCaptcha: 1296000 + cf_SearxEngineAccessDenied: 86400 + # ReCAPTCHA + recaptcha_SearxEngineCaptcha: 604800 + + # remove format to deny access, use lower case. + # formats: [html, csv, json, rss] + formats: + - html + - json + +server: + # Is overwritten by ${SEARXNG_PORT} and ${SEARXNG_BIND_ADDRESS} + port: 8888 + bind_address: '127.0.0.1' + # public URL of the instance, to ensure correct inbound links. Is overwritten + # by ${SEARXNG_URL}. + base_url: / # "http://example.com/location" + limiter: false # rate limit the number of request on the instance, block some bots + public_instance: false # enable features designed only for public instances + + # If your instance owns a /etc/searxng/settings.yml file, then set the following + # values there. + + secret_key: 'a2fb23f1b02e6ee83875b09826990de0f6bd908b6638e8c10277d415f6ab852b' # Is overwritten by ${SEARXNG_SECRET} + # Proxying image results through searx + image_proxy: false + # 1.0 and 1.1 are supported + http_protocol_version: '1.0' + # POST queries are more secure as they don't show up in history but may cause + # problems when using Firefox containers + method: 'POST' + default_http_headers: + X-Content-Type-Options: nosniff + X-Download-Options: noopen + X-Robots-Tag: noindex, nofollow + Referrer-Policy: no-referrer + +redis: + # URL to connect redis database. Is overwritten by ${SEARXNG_REDIS_URL}. + # https://docs.searxng.org/admin/settings/settings_redis.html#settings-redis + url: false + +ui: + # Custom static path - leave it blank if you didn't change + static_path: '' + static_use_hash: false + # Custom templates path - leave it blank if you didn't change + templates_path: '' + # query_in_title: When true, the result page's titles contains the query + # it decreases the privacy, since the browser can records the page titles. + query_in_title: false + # infinite_scroll: When true, automatically loads the next page when scrolling to bottom of the current page. + infinite_scroll: false + # ui theme + default_theme: simple + # center the results ? + center_alignment: false + # URL prefix of the internet archive, don't forget trailing slash (if needed). + # cache_url: "https://webcache.googleusercontent.com/search?q=cache:" + # Default interface locale - leave blank to detect from browser information or + # use codes from the 'locales' config section + default_locale: '' + # Open result links in a new tab by default + # results_on_new_tab: false + theme_args: + # style of simple theme: auto, light, dark + simple_style: auto + # Perform search immediately if a category selected. + # Disable to select multiple categories at once and start the search manually. + search_on_category_select: true + # Hotkeys: default or vim + hotkeys: default + +# Lock arbitrary settings on the preferences page. To find the ID of the user +# setting you want to lock, check the ID of the form on the page "preferences". +# +# preferences: +# lock: +# - language +# - autocomplete +# - method +# - query_in_title + +# searx supports result proxification using an external service: +# https://github.com/asciimoo/morty uncomment below section if you have running +# morty proxy the key is base64 encoded (keep the !!binary notation) +# Note: since commit af77ec3, morty accepts a base64 encoded key. +# +# result_proxy: +# url: http://127.0.0.1:3000/ +# # the key is a base64 encoded string, the YAML !!binary prefix is optional +# key: !!binary "your_morty_proxy_key" +# # [true|false] enable the "proxy" button next to each result +# proxify_results: true + +# communication with search engines +# +outgoing: + # default timeout in seconds, can be override by engine + request_timeout: 3.0 + # the maximum timeout in seconds + # max_request_timeout: 10.0 + # suffix of searx_useragent, could contain information like an email address + # to the administrator + useragent_suffix: '' + # The maximum number of concurrent connections that may be established. + pool_connections: 100 + # Allow the connection pool to maintain keep-alive connections below this + # point. + pool_maxsize: 20 + # See https://www.python-httpx.org/http2/ + enable_http2: true + # uncomment below section if you want to use a custom server certificate + # see https://www.python-httpx.org/advanced/#changing-the-verification-defaults + # and https://www.python-httpx.org/compatibility/#ssl-configuration + # verify: ~/.mitmproxy/mitmproxy-ca-cert.cer + # + # uncomment below section if you want to use a proxyq see: SOCKS proxies + # https://2.python-requests.org/en/latest/user/advanced/#proxies + # are also supported: see + # https://2.python-requests.org/en/latest/user/advanced/#socks + # + # proxies: + # all://: + # - http://proxy1:8080 + # - http://proxy2:8080 + # + # using_tor_proxy: true + # + # Extra seconds to add in order to account for the time taken by the proxy + # + # extra_proxy_timeout: 10.0 + # + # uncomment below section only if you have more than one network interface + # which can be the source of outgoing search requests + # + # source_ips: + # - 1.1.1.1 + # - 1.1.1.2 + # - fe80::/126 + +# External plugin configuration, for more details see +# https://docs.searxng.org/dev/plugins.html +# +# plugins: +# - plugin1 +# - plugin2 +# - ... + +# Comment or un-comment plugin to activate / deactivate by default. +# +# enabled_plugins: +# # these plugins are enabled if nothing is configured .. +# - 'Hash plugin' +# - 'Self Information' +# - 'Tracker URL remover' +# - 'Ahmia blacklist' # activation depends on outgoing.using_tor_proxy +# # these plugins are disabled if nothing is configured .. +# - 'Hostname replace' # see hostname_replace configuration below +# - 'Open Access DOI rewrite' +# - 'Tor check plugin' +# # Read the docs before activate: auto-detection of the language could be +# # detrimental to users expectations / users can activate the plugin in the +# # preferences if they want. +# - 'Autodetect search language' + +# Configuration of the "Hostname replace" plugin: +# +# hostname_replace: +# '(.*\.)?youtube\.com$': 'invidious.example.com' +# '(.*\.)?youtu\.be$': 'invidious.example.com' +# '(.*\.)?youtube-noocookie\.com$': 'yotter.example.com' +# '(.*\.)?reddit\.com$': 'teddit.example.com' +# '(.*\.)?redd\.it$': 'teddit.example.com' +# '(www\.)?twitter\.com$': 'nitter.example.com' +# # to remove matching host names from result list, set value to false +# 'spam\.example\.com': false + +checker: + # disable checker when in debug mode + off_when_debug: true + + # use "scheduling: false" to disable scheduling + # scheduling: interval or int + + # to activate the scheduler: + # * uncomment "scheduling" section + # * add "cache2 = name=searxngcache,items=2000,blocks=2000,blocksize=4096,bitmap=1" + # to your uwsgi.ini + + # scheduling: + # start_after: [300, 1800] # delay to start the first run of the checker + # every: [86400, 90000] # how often the checker runs + + # additional tests: only for the YAML anchors (see the engines section) + # + additional_tests: + rosebud: &test_rosebud + matrix: + query: rosebud + lang: en + result_container: + - not_empty + - ['one_title_contains', 'citizen kane'] + test: + - unique_results + + android: &test_android + matrix: + query: ['android'] + lang: ['en', 'de', 'fr', 'zh-CN'] + result_container: + - not_empty + - ['one_title_contains', 'google'] + test: + - unique_results + + # tests: only for the YAML anchors (see the engines section) + tests: + infobox: &tests_infobox + infobox: + matrix: + query: ['linux', 'new york', 'bbc'] + result_container: + - has_infobox + +categories_as_tabs: + general: + images: + videos: + news: + map: + music: + it: + science: + files: + social media: + +engines: + - name: 9gag + engine: 9gag + shortcut: 9g + disabled: true + + - name: annas archive + engine: annas_archive + disabled: true + shortcut: aa + + # - name: annas articles + # engine: annas_archive + # shortcut: aaa + # # https://docs.searxng.org/dev/engines/online/annas_archive.html + # aa_content: 'journal_article' # book_any .. magazine, standards_document + # aa_ext: 'pdf' # pdf, epub, .. + # aa_sort: 'newest' # newest, oldest, largest, smallest + + - name: apk mirror + engine: apkmirror + timeout: 4.0 + shortcut: apkm + disabled: true + + - name: apple app store + engine: apple_app_store + shortcut: aps + disabled: true + + # Requires Tor + - name: ahmia + engine: ahmia + categories: onions + enable_http: true + shortcut: ah + + - name: anaconda + engine: xpath + paging: true + first_page_num: 0 + search_url: https://anaconda.org/search?q={query}&page={pageno} + results_xpath: //tbody/tr + url_xpath: ./td/h5/a[last()]/@href + title_xpath: ./td/h5 + content_xpath: ./td[h5]/text() + categories: it + timeout: 6.0 + shortcut: conda + disabled: true + + - name: arch linux wiki + engine: archlinux + shortcut: al + + - name: artic + engine: artic + shortcut: arc + timeout: 4.0 + + - name: arxiv + engine: arxiv + shortcut: arx + timeout: 4.0 + + - name: ask + engine: ask + shortcut: ask + disabled: true + + # tmp suspended: dh key too small + # - name: base + # engine: base + # shortcut: bs + + - name: bandcamp + engine: bandcamp + shortcut: bc + categories: music + + - name: wikipedia + engine: wikipedia + shortcut: wp + # add "list" to the array to get results in the results list + display_type: ['infobox'] + base_url: 'https://{language}.wikipedia.org/' + categories: [general] + + - name: bilibili + engine: bilibili + shortcut: bil + disabled: true + + - name: bing + engine: bing + shortcut: bi + disabled: true + + - name: bing images + engine: bing_images + shortcut: bii + + - name: bing news + engine: bing_news + shortcut: bin + + - name: bing videos + engine: bing_videos + shortcut: biv + + - name: bitbucket + engine: xpath + paging: true + search_url: https://bitbucket.org/repo/all/{pageno}?name={query} + url_xpath: //article[@class="repo-summary"]//a[@class="repo-link"]/@href + title_xpath: //article[@class="repo-summary"]//a[@class="repo-link"] + content_xpath: //article[@class="repo-summary"]/p + categories: [it, repos] + timeout: 4.0 + disabled: true + shortcut: bb + about: + website: https://bitbucket.org/ + wikidata_id: Q2493781 + official_api_documentation: https://developer.atlassian.com/bitbucket + use_official_api: false + require_api_key: false + results: HTML + + - name: bpb + engine: bpb + shortcut: bpb + disabled: true + + - name: btdigg + engine: btdigg + shortcut: bt + disabled: true + + - name: ccc-tv + engine: xpath + paging: false + search_url: https://media.ccc.de/search/?q={query} + url_xpath: //div[@class="caption"]/h3/a/@href + title_xpath: //div[@class="caption"]/h3/a/text() + content_xpath: //div[@class="caption"]/h4/@title + categories: videos + disabled: true + shortcut: c3tv + about: + website: https://media.ccc.de/ + wikidata_id: Q80729951 + official_api_documentation: https://github.com/voc/voctoweb + use_official_api: false + require_api_key: false + results: HTML + # We don't set language: de here because media.ccc.de is not just + # for a German audience. It contains many English videos and many + # German videos have English subtitles. + + - name: openverse + engine: openverse + categories: images + shortcut: opv + + - name: chefkoch + engine: chefkoch + shortcut: chef + # to show premium or plus results too: + # skip_premium: false + + # - name: core.ac.uk + # engine: core + # categories: science + # shortcut: cor + # # get your API key from: https://core.ac.uk/api-keys/register/ + # api_key: 'unset' + + - name: crossref + engine: crossref + shortcut: cr + timeout: 30 + disabled: true + + - name: crowdview + engine: json_engine + shortcut: cv + categories: general + paging: false + search_url: https://crowdview-next-js.onrender.com/api/search-v3?query={query} + results_query: results + url_query: link + title_query: title + content_query: snippet + disabled: true + about: + website: https://crowdview.ai/ + + - name: yep + engine: yep + shortcut: yep + categories: general + search_type: web + disabled: true + + - name: yep images + engine: yep + shortcut: yepi + categories: images + search_type: images + disabled: true + + - name: yep news + engine: yep + shortcut: yepn + categories: news + search_type: news + disabled: true + + - name: curlie + engine: xpath + shortcut: cl + categories: general + disabled: true + paging: true + lang_all: '' + search_url: https://curlie.org/search?q={query}&lang={lang}&start={pageno}&stime=92452189 + page_size: 20 + results_xpath: //div[@id="site-list-content"]/div[@class="site-item"] + url_xpath: ./div[@class="title-and-desc"]/a/@href + title_xpath: ./div[@class="title-and-desc"]/a/div + content_xpath: ./div[@class="title-and-desc"]/div[@class="site-descr"] + about: + website: https://curlie.org/ + wikidata_id: Q60715723 + use_official_api: false + require_api_key: false + results: HTML + + - name: currency + engine: currency_convert + categories: general + shortcut: cc + + - name: bahnhof + engine: json_engine + search_url: https://www.bahnhof.de/api/stations/search/{query} + url_prefix: https://www.bahnhof.de/ + url_query: slug + title_query: name + content_query: state + shortcut: bf + disabled: true + about: + website: https://www.bahn.de + wikidata_id: Q22811603 + use_official_api: false + require_api_key: false + results: JSON + language: de + + - name: deezer + engine: deezer + shortcut: dz + disabled: true + + - name: destatis + engine: destatis + shortcut: destat + disabled: true + + - name: deviantart + engine: deviantart + shortcut: da + timeout: 3.0 + + - name: ddg definitions + engine: duckduckgo_definitions + shortcut: ddd + weight: 2 + disabled: true + tests: *tests_infobox + + # cloudflare protected + # - name: digbt + # engine: digbt + # shortcut: dbt + # timeout: 6.0 + # disabled: true + + - name: docker hub + engine: docker_hub + shortcut: dh + categories: [it, packages] + + - name: erowid + engine: xpath + paging: true + first_page_num: 0 + page_size: 30 + search_url: https://www.erowid.org/search.php?q={query}&s={pageno} + url_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/@href + title_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/text() + content_xpath: //dl[@class="results-list"]/dd[@class="result-details"] + categories: [] + shortcut: ew + disabled: true + about: + website: https://www.erowid.org/ + wikidata_id: Q1430691 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + # - name: elasticsearch + # shortcut: es + # engine: elasticsearch + # base_url: http://localhost:9200 + # username: elastic + # password: changeme + # index: my-index + # # available options: match, simple_query_string, term, terms, custom + # query_type: match + # # if query_type is set to custom, provide your query here + # #custom_query_json: {"query":{"match_all": {}}} + # #show_metadata: false + # disabled: true + + - name: wikidata + engine: wikidata + shortcut: wd + timeout: 3.0 + weight: 2 + # add "list" to the array to get results in the results list + display_type: ['infobox'] + tests: *tests_infobox + categories: [general] + + - name: duckduckgo + engine: duckduckgo + shortcut: ddg + + - name: duckduckgo images + engine: duckduckgo_extra + categories: [images, web] + ddg_category: images + shortcut: ddi + disabled: true + + - name: duckduckgo videos + engine: duckduckgo_extra + categories: [videos, web] + ddg_category: videos + shortcut: ddv + disabled: true + + - name: duckduckgo news + engine: duckduckgo_extra + categories: [news, web] + ddg_category: news + shortcut: ddn + disabled: true + + - name: duckduckgo weather + engine: duckduckgo_weather + shortcut: ddw + disabled: true + + - name: apple maps + engine: apple_maps + shortcut: apm + disabled: true + timeout: 5.0 + + - name: emojipedia + engine: emojipedia + timeout: 4.0 + shortcut: em + disabled: true + + - name: tineye + engine: tineye + shortcut: tin + timeout: 9.0 + disabled: true + + - name: etymonline + engine: xpath + paging: true + search_url: https://etymonline.com/search?page={pageno}&q={query} + url_xpath: //a[contains(@class, "word__name--")]/@href + title_xpath: //a[contains(@class, "word__name--")] + content_xpath: //section[contains(@class, "word__defination")] + first_page_num: 1 + shortcut: et + categories: [dictionaries] + about: + website: https://www.etymonline.com/ + wikidata_id: Q1188617 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + # - name: ebay + # engine: ebay + # shortcut: eb + # base_url: 'https://www.ebay.com' + # disabled: true + # timeout: 5 + + - name: 1x + engine: www1x + shortcut: 1x + timeout: 3.0 + disabled: true + + - name: fdroid + engine: fdroid + shortcut: fd + disabled: true + + - name: flickr + categories: images + shortcut: fl + # You can use the engine using the official stable API, but you need an API + # key, see: https://www.flickr.com/services/apps/create/ + # engine: flickr + # api_key: 'apikey' # required! + # Or you can use the html non-stable engine, activated by default + engine: flickr_noapi + + - name: free software directory + engine: mediawiki + shortcut: fsd + categories: [it, software wikis] + base_url: https://directory.fsf.org/ + search_type: title + timeout: 5.0 + disabled: true + about: + website: https://directory.fsf.org/ + wikidata_id: Q2470288 + + # - name: freesound + # engine: freesound + # shortcut: fnd + # disabled: true + # timeout: 15.0 + # API key required, see: https://freesound.org/docs/api/overview.html + # api_key: MyAPIkey + + - name: frinkiac + engine: frinkiac + shortcut: frk + disabled: true + + - name: fyyd + engine: fyyd + shortcut: fy + timeout: 8.0 + disabled: true + + - name: genius + engine: genius + shortcut: gen + + - name: gentoo + engine: gentoo + shortcut: ge + timeout: 10.0 + + - name: gitlab + engine: json_engine + paging: true + search_url: https://gitlab.com/api/v4/projects?search={query}&page={pageno} + url_query: web_url + title_query: name_with_namespace + content_query: description + page_size: 20 + categories: [it, repos] + shortcut: gl + timeout: 10.0 + disabled: true + about: + website: https://about.gitlab.com/ + wikidata_id: Q16639197 + official_api_documentation: https://docs.gitlab.com/ee/api/ + use_official_api: false + require_api_key: false + results: JSON + + - name: github + engine: github + shortcut: gh + + # This a Gitea service. If you would like to use a different instance, + # change codeberg.org to URL of the desired Gitea host. Or you can create a + # new engine by copying this and changing the name, shortcut and search_url. + + - name: codeberg + engine: json_engine + search_url: https://codeberg.org/api/v1/repos/search?q={query}&limit=10 + url_query: html_url + title_query: name + content_query: description + categories: [it, repos] + shortcut: cb + disabled: true + about: + website: https://codeberg.org/ + wikidata_id: + official_api_documentation: https://try.gitea.io/api/swagger + use_official_api: false + require_api_key: false + results: JSON + + - name: goodreads + engine: goodreads + shortcut: good + timeout: 4.0 + disabled: true + + - name: google + engine: google + shortcut: go + # additional_tests: + # android: *test_android + + - name: google images + engine: google_images + shortcut: goi + # additional_tests: + # android: *test_android + # dali: + # matrix: + # query: ['Dali Christ'] + # lang: ['en', 'de', 'fr', 'zh-CN'] + # result_container: + # - ['one_title_contains', 'Salvador'] + + - name: google news + engine: google_news + shortcut: gon + # additional_tests: + # android: *test_android + + - name: google videos + engine: google_videos + shortcut: gov + # additional_tests: + # android: *test_android + + - name: google scholar + engine: google_scholar + shortcut: gos + + - name: google play apps + engine: google_play + categories: [files, apps] + shortcut: gpa + play_categ: apps + disabled: true + + - name: google play movies + engine: google_play + categories: videos + shortcut: gpm + play_categ: movies + disabled: true + + - name: material icons + engine: material_icons + categories: images + shortcut: mi + disabled: true + + - name: gpodder + engine: json_engine + shortcut: gpod + timeout: 4.0 + paging: false + search_url: https://gpodder.net/search.json?q={query} + url_query: url + title_query: title + content_query: description + page_size: 19 + categories: music + disabled: true + about: + website: https://gpodder.net + wikidata_id: Q3093354 + official_api_documentation: https://gpoddernet.readthedocs.io/en/latest/api/ + use_official_api: false + requires_api_key: false + results: JSON + + - name: habrahabr + engine: xpath + paging: true + search_url: https://habr.com/en/search/page{pageno}/?q={query} + results_xpath: //article[contains(@class, "tm-articles-list__item")] + url_xpath: .//a[@class="tm-title__link"]/@href + title_xpath: .//a[@class="tm-title__link"] + content_xpath: .//div[contains(@class, "article-formatted-body")] + categories: it + timeout: 4.0 + disabled: true + shortcut: habr + about: + website: https://habr.com/ + wikidata_id: Q4494434 + official_api_documentation: https://habr.com/en/docs/help/api/ + use_official_api: false + require_api_key: false + results: HTML + + - name: hackernews + engine: hackernews + shortcut: hn + disabled: true + + - name: hoogle + engine: xpath + paging: true + search_url: https://hoogle.haskell.org/?hoogle={query}&start={pageno} + results_xpath: '//div[@class="result"]' + title_xpath: './/div[@class="ans"]//a' + url_xpath: './/div[@class="ans"]//a/@href' + content_xpath: './/div[@class="from"]' + page_size: 20 + categories: [it, packages] + shortcut: ho + about: + website: https://hoogle.haskell.org/ + wikidata_id: Q34010 + official_api_documentation: https://hackage.haskell.org/api + use_official_api: false + require_api_key: false + results: JSON + + - name: imdb + engine: imdb + shortcut: imdb + timeout: 6.0 + disabled: true + + - name: imgur + engine: imgur + shortcut: img + disabled: true + + - name: ina + engine: ina + shortcut: in + timeout: 6.0 + disabled: true + + - name: invidious + engine: invidious + # Instanes will be selected randomly, see https://api.invidious.io/ for + # instances that are stable (good uptime) and close to you. + base_url: + - https://invidious.io.lol + - https://invidious.fdn.fr + - https://yt.artemislena.eu + - https://invidious.tiekoetter.com + - https://invidious.flokinet.to + - https://vid.puffyan.us + - https://invidious.privacydev.net + - https://inv.tux.pizza + shortcut: iv + timeout: 3.0 + disabled: true + + - name: jisho + engine: jisho + shortcut: js + timeout: 3.0 + disabled: true + + - name: kickass + engine: kickass + base_url: + - https://kickasstorrents.to + - https://kickasstorrents.cr + - https://kickasstorrent.cr + - https://kickass.sx + - https://kat.am + shortcut: kc + timeout: 4.0 + + - name: lemmy communities + engine: lemmy + lemmy_type: Communities + shortcut: leco + + - name: lemmy users + engine: lemmy + network: lemmy communities + lemmy_type: Users + shortcut: leus + + - name: lemmy posts + engine: lemmy + network: lemmy communities + lemmy_type: Posts + shortcut: lepo + + - name: lemmy comments + engine: lemmy + network: lemmy communities + lemmy_type: Comments + shortcut: lecom + + - name: library genesis + engine: xpath + # search_url: https://libgen.is/search.php?req={query} + search_url: https://libgen.rs/search.php?req={query} + url_xpath: //a[contains(@href,"book/index.php?md5")]/@href + title_xpath: //a[contains(@href,"book/")]/text()[1] + content_xpath: //td/a[1][contains(@href,"=author")]/text() + categories: files + timeout: 7.0 + disabled: true + shortcut: lg + about: + website: https://libgen.fun/ + wikidata_id: Q22017206 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: z-library + engine: zlibrary + shortcut: zlib + categories: files + timeout: 7.0 + + - name: library of congress + engine: loc + shortcut: loc + categories: images + + - name: lingva + engine: lingva + shortcut: lv + # set lingva instance in url, by default it will use the official instance + # url: https://lingva.thedaviddelta.com + + - name: lobste.rs + engine: xpath + search_url: https://lobste.rs/search?utf8=%E2%9C%93&q={query}&what=stories&order=relevance + results_xpath: //li[contains(@class, "story")] + url_xpath: .//a[@class="u-url"]/@href + title_xpath: .//a[@class="u-url"] + content_xpath: .//a[@class="domain"] + categories: it + shortcut: lo + timeout: 5.0 + disabled: true + about: + website: https://lobste.rs/ + wikidata_id: Q60762874 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: azlyrics + shortcut: lyrics + engine: xpath + timeout: 4.0 + disabled: true + categories: [music, lyrics] + paging: true + search_url: https://search.azlyrics.com/search.php?q={query}&w=lyrics&p={pageno} + url_xpath: //td[@class="text-left visitedlyr"]/a/@href + title_xpath: //span/b/text() + content_xpath: //td[@class="text-left visitedlyr"]/a/small + about: + website: https://azlyrics.com + wikidata_id: Q66372542 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: mastodon users + engine: mastodon + mastodon_type: accounts + base_url: https://mastodon.social + shortcut: mau + + - name: mastodon hashtags + engine: mastodon + mastodon_type: hashtags + base_url: https://mastodon.social + shortcut: mah + + # - name: matrixrooms + # engine: mrs + # # https://docs.searxng.org/dev/engines/online/mrs.html + # # base_url: https://mrs-api-host + # shortcut: mtrx + # disabled: true + + - name: mdn + shortcut: mdn + engine: json_engine + categories: [it] + paging: true + search_url: https://developer.mozilla.org/api/v1/search?q={query}&page={pageno} + results_query: documents + url_query: mdn_url + url_prefix: https://developer.mozilla.org + title_query: title + content_query: summary + about: + website: https://developer.mozilla.org + wikidata_id: Q3273508 + official_api_documentation: null + use_official_api: false + require_api_key: false + results: JSON + + - name: metacpan + engine: metacpan + shortcut: cpan + disabled: true + number_of_results: 20 + + # - name: meilisearch + # engine: meilisearch + # shortcut: mes + # enable_http: true + # base_url: http://localhost:7700 + # index: my-index + + - name: mixcloud + engine: mixcloud + shortcut: mc + + # MongoDB engine + # Required dependency: pymongo + # - name: mymongo + # engine: mongodb + # shortcut: md + # exact_match_only: false + # host: '127.0.0.1' + # port: 27017 + # enable_http: true + # results_per_page: 20 + # database: 'business' + # collection: 'reviews' # name of the db collection + # key: 'name' # key in the collection to search for + + - name: mozhi + engine: mozhi + base_url: + - https://mozhi.aryak.me + - https://translate.bus-hit.me + - https://nyc1.mz.ggtyler.dev + # mozhi_engine: google - see https://mozhi.aryak.me for supported engines + timeout: 4.0 + shortcut: mz + disabled: true + + - name: mwmbl + engine: mwmbl + # api_url: https://api.mwmbl.org + shortcut: mwm + disabled: true + + - name: npm + engine: json_engine + paging: true + first_page_num: 0 + search_url: https://api.npms.io/v2/search?q={query}&size=25&from={pageno} + results_query: results + url_query: package/links/npm + title_query: package/name + content_query: package/description + page_size: 25 + categories: [it, packages] + disabled: true + timeout: 5.0 + shortcut: npm + about: + website: https://npms.io/ + wikidata_id: Q7067518 + official_api_documentation: https://api-docs.npms.io/ + use_official_api: false + require_api_key: false + results: JSON + + - name: nyaa + engine: nyaa + shortcut: nt + disabled: true + + - name: mankier + engine: json_engine + search_url: https://www.mankier.com/api/v2/mans/?q={query} + results_query: results + url_query: url + title_query: name + content_query: description + categories: it + shortcut: man + about: + website: https://www.mankier.com/ + official_api_documentation: https://www.mankier.com/api + use_official_api: true + require_api_key: false + results: JSON + + - name: odysee + engine: odysee + shortcut: od + disabled: true + + - name: openairedatasets + engine: json_engine + paging: true + search_url: https://api.openaire.eu/search/datasets?format=json&page={pageno}&size=10&title={query} + results_query: response/results/result + url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ + title_query: metadata/oaf:entity/oaf:result/title/$ + content_query: metadata/oaf:entity/oaf:result/description/$ + content_html_to_text: true + categories: 'science' + shortcut: oad + timeout: 5.0 + about: + website: https://www.openaire.eu/ + wikidata_id: Q25106053 + official_api_documentation: https://api.openaire.eu/ + use_official_api: false + require_api_key: false + results: JSON + + - name: openairepublications + engine: json_engine + paging: true + search_url: https://api.openaire.eu/search/publications?format=json&page={pageno}&size=10&title={query} + results_query: response/results/result + url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ + title_query: metadata/oaf:entity/oaf:result/title/$ + content_query: metadata/oaf:entity/oaf:result/description/$ + content_html_to_text: true + categories: science + shortcut: oap + timeout: 5.0 + about: + website: https://www.openaire.eu/ + wikidata_id: Q25106053 + official_api_documentation: https://api.openaire.eu/ + use_official_api: false + require_api_key: false + results: JSON + + # - name: opensemanticsearch + # engine: opensemantic + # shortcut: oss + # base_url: 'http://localhost:8983/solr/opensemanticsearch/' + + - name: openstreetmap + engine: openstreetmap + shortcut: osm + + - name: openrepos + engine: xpath + paging: true + search_url: https://openrepos.net/search/node/{query}?page={pageno} + url_xpath: //li[@class="search-result"]//h3[@class="title"]/a/@href + title_xpath: //li[@class="search-result"]//h3[@class="title"]/a + content_xpath: //li[@class="search-result"]//div[@class="search-snippet-info"]//p[@class="search-snippet"] + categories: files + timeout: 4.0 + disabled: true + shortcut: or + about: + website: https://openrepos.net/ + wikidata_id: + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: packagist + engine: json_engine + paging: true + search_url: https://packagist.org/search.json?q={query}&page={pageno} + results_query: results + url_query: url + title_query: name + content_query: description + categories: [it, packages] + disabled: true + timeout: 5.0 + shortcut: pack + about: + website: https://packagist.org + wikidata_id: Q108311377 + official_api_documentation: https://packagist.org/apidoc + use_official_api: true + require_api_key: false + results: JSON + + - name: pdbe + engine: pdbe + shortcut: pdb + # Hide obsolete PDB entries. Default is not to hide obsolete structures + # hide_obsolete: false + + - name: photon + engine: photon + shortcut: ph + + - name: pinterest + engine: pinterest + shortcut: pin + + - name: piped + engine: piped + shortcut: ppd + categories: videos + piped_filter: videos + timeout: 3.0 + + # URL to use as link and for embeds + frontend_url: https://srv.piped.video + # Instance will be selected randomly, for more see https://piped-instances.kavin.rocks/ + backend_url: + - https://pipedapi.kavin.rocks + - https://pipedapi-libre.kavin.rocks + - https://pipedapi.adminforge.de + + - name: piped.music + engine: piped + network: piped + shortcut: ppdm + categories: music + piped_filter: music_songs + timeout: 3.0 + + - name: piratebay + engine: piratebay + shortcut: tpb + # You may need to change this URL to a proxy if piratebay is blocked in your + # country + url: https://thepiratebay.org/ + timeout: 3.0 + + - name: podcastindex + engine: podcastindex + shortcut: podcast + + # Required dependency: psychopg2 + # - name: postgresql + # engine: postgresql + # database: postgres + # username: postgres + # password: postgres + # limit: 10 + # query_str: 'SELECT * from my_table WHERE my_column = %(query)s' + # shortcut : psql + + - name: presearch + engine: presearch + search_type: search + categories: [general, web] + shortcut: ps + timeout: 4.0 + disabled: true + + - name: presearch images + engine: presearch + network: presearch + search_type: images + categories: [images, web] + timeout: 4.0 + shortcut: psimg + disabled: true + + - name: presearch videos + engine: presearch + network: presearch + search_type: videos + categories: [general, web] + timeout: 4.0 + shortcut: psvid + disabled: true + + - name: presearch news + engine: presearch + network: presearch + search_type: news + categories: [news, web] + timeout: 4.0 + shortcut: psnews + disabled: true + + - name: pub.dev + engine: xpath + shortcut: pd + search_url: https://pub.dev/packages?q={query}&page={pageno} + paging: true + results_xpath: //div[contains(@class,"packages-item")] + url_xpath: ./div/h3/a/@href + title_xpath: ./div/h3/a + content_xpath: ./div/div/div[contains(@class,"packages-description")]/span + categories: [packages, it] + timeout: 3.0 + disabled: true + first_page_num: 1 + about: + website: https://pub.dev/ + official_api_documentation: https://pub.dev/help/api + use_official_api: false + require_api_key: false + results: HTML + + - name: pubmed + engine: pubmed + shortcut: pub + timeout: 3.0 + + - name: pypi + shortcut: pypi + engine: xpath + paging: true + search_url: https://pypi.org/search/?q={query}&page={pageno} + results_xpath: /html/body/main/div/div/div/form/div/ul/li/a[@class="package-snippet"] + url_xpath: ./@href + title_xpath: ./h3/span[@class="package-snippet__name"] + content_xpath: ./p + suggestion_xpath: /html/body/main/div/div/div/form/div/div[@class="callout-block"]/p/span/a[@class="link"] + first_page_num: 1 + categories: [it, packages] + about: + website: https://pypi.org + wikidata_id: Q2984686 + official_api_documentation: https://warehouse.readthedocs.io/api-reference/index.html + use_official_api: false + require_api_key: false + results: HTML + + - name: qwant + qwant_categ: web + engine: qwant + shortcut: qw + categories: [general, web] + additional_tests: + rosebud: *test_rosebud + + - name: qwant news + qwant_categ: news + engine: qwant + shortcut: qwn + categories: news + network: qwant + + - name: qwant images + qwant_categ: images + engine: qwant + shortcut: qwi + categories: [images, web] + network: qwant + + - name: qwant videos + qwant_categ: videos + engine: qwant + shortcut: qwv + categories: [videos, web] + network: qwant + + # - name: library + # engine: recoll + # shortcut: lib + # base_url: 'https://recoll.example.org/' + # search_dir: '' + # mount_prefix: /export + # dl_prefix: 'https://download.example.org' + # timeout: 30.0 + # categories: files + # disabled: true + + # - name: recoll library reference + # engine: recoll + # base_url: 'https://recoll.example.org/' + # search_dir: reference + # mount_prefix: /export + # dl_prefix: 'https://download.example.org' + # shortcut: libr + # timeout: 30.0 + # categories: files + # disabled: true + + - name: radio browser + engine: radio_browser + shortcut: rb + + - name: reddit + engine: reddit + shortcut: re + page_size: 25 + + - name: rottentomatoes + engine: rottentomatoes + shortcut: rt + disabled: true + + # Required dependency: redis + # - name: myredis + # shortcut : rds + # engine: redis_server + # exact_match_only: false + # host: '127.0.0.1' + # port: 6379 + # enable_http: true + # password: '' + # db: 0 + + # tmp suspended: bad certificate + # - name: scanr structures + # shortcut: scs + # engine: scanr_structures + # disabled: true + + - name: sepiasearch + engine: sepiasearch + shortcut: sep + + - name: soundcloud + engine: soundcloud + shortcut: sc + + - name: stackoverflow + engine: stackexchange + shortcut: st + api_site: 'stackoverflow' + categories: [it, q&a] + + - name: askubuntu + engine: stackexchange + shortcut: ubuntu + api_site: 'askubuntu' + categories: [it, q&a] + + - name: internetarchivescholar + engine: internet_archive_scholar + shortcut: ias + timeout: 5.0 + + - name: superuser + engine: stackexchange + shortcut: su + api_site: 'superuser' + categories: [it, q&a] + + - name: searchcode code + engine: searchcode_code + shortcut: scc + disabled: true + + - name: framalibre + engine: framalibre + shortcut: frl + disabled: true + + # - name: searx + # engine: searx_engine + # shortcut: se + # instance_urls : + # - http://127.0.0.1:8888/ + # - ... + # disabled: true + + - name: semantic scholar + engine: semantic_scholar + disabled: true + shortcut: se + + # Spotify needs API credentials + # - name: spotify + # engine: spotify + # shortcut: stf + # api_client_id: ******* + # api_client_secret: ******* + + # - name: solr + # engine: solr + # shortcut: slr + # base_url: http://localhost:8983 + # collection: collection_name + # sort: '' # sorting: asc or desc + # field_list: '' # comma separated list of field names to display on the UI + # default_fields: '' # default field to query + # query_fields: '' # query fields + # enable_http: true + + # - name: springer nature + # engine: springer + # # get your API key from: https://dev.springernature.com/signup + # # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601" + # api_key: 'unset' + # shortcut: springer + # timeout: 15.0 + + - name: startpage + engine: startpage + shortcut: sp + timeout: 6.0 + disabled: true + additional_tests: + rosebud: *test_rosebud + + - name: tokyotoshokan + engine: tokyotoshokan + shortcut: tt + timeout: 6.0 + disabled: true + + - name: solidtorrents + engine: solidtorrents + shortcut: solid + timeout: 4.0 + base_url: + - https://solidtorrents.to + - https://bitsearch.to + + # For this demo of the sqlite engine download: + # https://liste.mediathekview.de/filmliste-v2.db.bz2 + # and unpack into searx/data/filmliste-v2.db + # Query to test: "!demo concert" + # + # - name: demo + # engine: sqlite + # shortcut: demo + # categories: general + # result_template: default.html + # database: searx/data/filmliste-v2.db + # query_str: >- + # SELECT title || ' (' || time(duration, 'unixepoch') || ')' AS title, + # COALESCE( NULLIF(url_video_hd,''), NULLIF(url_video_sd,''), url_video) AS url, + # description AS content + # FROM film + # WHERE title LIKE :wildcard OR description LIKE :wildcard + # ORDER BY duration DESC + + - name: tagesschau + engine: tagesschau + # when set to false, display URLs from Tagesschau, and not the actual source + # (e.g. NDR, WDR, SWR, HR, ...) + use_source_url: true + shortcut: ts + disabled: true + + - name: tmdb + engine: xpath + paging: true + categories: movies + search_url: https://www.themoviedb.org/search?page={pageno}&query={query} + results_xpath: //div[contains(@class,"movie") or contains(@class,"tv")]//div[contains(@class,"card")] + url_xpath: .//div[contains(@class,"poster")]/a/@href + thumbnail_xpath: .//img/@src + title_xpath: .//div[contains(@class,"title")]//h2 + content_xpath: .//div[contains(@class,"overview")] + shortcut: tm + disabled: true + + # Requires Tor + - name: torch + engine: xpath + paging: true + search_url: http://xmh57jrknzkhv6y3ls3ubitzfqnkrwxhopf5aygthi7d6rplyvk3noyd.onion/cgi-bin/omega/omega?P={query}&DEFAULTOP=and + results_xpath: //table//tr + url_xpath: ./td[2]/a + title_xpath: ./td[2]/b + content_xpath: ./td[2]/small + categories: onions + enable_http: true + shortcut: tch + + # torznab engine lets you query any torznab compatible indexer. Using this + # engine in combination with Jackett opens the possibility to query a lot of + # public and private indexers directly from SearXNG. More details at: + # https://docs.searxng.org/dev/engines/online/torznab.html + # + # - name: Torznab EZTV + # engine: torznab + # shortcut: eztv + # base_url: http://localhost:9117/api/v2.0/indexers/eztv/results/torznab + # enable_http: true # if using localhost + # api_key: xxxxxxxxxxxxxxx + # show_magnet_links: true + # show_torrent_files: false + # # https://github.com/Jackett/Jackett/wiki/Jackett-Categories + # torznab_categories: # optional + # - 2000 + # - 5000 + + # tmp suspended - too slow, too many errors + # - name: urbandictionary + # engine : xpath + # search_url : https://www.urbandictionary.com/define.php?term={query} + # url_xpath : //*[@class="word"]/@href + # title_xpath : //*[@class="def-header"] + # content_xpath: //*[@class="meaning"] + # shortcut: ud + + - name: unsplash + engine: unsplash + shortcut: us + + - name: yandex music + engine: yandex_music + shortcut: ydm + disabled: true + # https://yandex.com/support/music/access.html + inactive: true + + - name: yahoo + engine: yahoo + shortcut: yh + disabled: true + + - name: yahoo news + engine: yahoo_news + shortcut: yhn + + - name: youtube + shortcut: yt + # You can use the engine using the official stable API, but you need an API + # key See: https://console.developers.google.com/project + # + # engine: youtube_api + # api_key: 'apikey' # required! + # + # Or you can use the html non-stable engine, activated by default + engine: youtube_noapi + + - name: dailymotion + engine: dailymotion + shortcut: dm + + - name: vimeo + engine: vimeo + shortcut: vm + + - name: wiby + engine: json_engine + paging: true + search_url: https://wiby.me/json/?q={query}&p={pageno} + url_query: URL + title_query: Title + content_query: Snippet + categories: [general, web] + shortcut: wib + disabled: true + about: + website: https://wiby.me/ + + - name: alexandria + engine: json_engine + shortcut: alx + categories: general + paging: true + search_url: https://api.alexandria.org/?a=1&q={query}&p={pageno} + results_query: results + title_query: title + url_query: url + content_query: snippet + timeout: 1.5 + disabled: true + about: + website: https://alexandria.org/ + official_api_documentation: https://github.com/alexandria-org/alexandria-api/raw/master/README.md + use_official_api: true + require_api_key: false + results: JSON + + - name: wikibooks + engine: mediawiki + weight: 0.5 + shortcut: wb + categories: [general, wikimedia] + base_url: 'https://{language}.wikibooks.org/' + search_type: text + disabled: true + about: + website: https://www.wikibooks.org/ + wikidata_id: Q367 + + - name: wikinews + engine: mediawiki + shortcut: wn + categories: [news, wikimedia] + base_url: 'https://{language}.wikinews.org/' + search_type: text + srsort: create_timestamp_desc + about: + website: https://www.wikinews.org/ + wikidata_id: Q964 + + - name: wikiquote + engine: mediawiki + weight: 0.5 + shortcut: wq + categories: [general, wikimedia] + base_url: 'https://{language}.wikiquote.org/' + search_type: text + disabled: true + additional_tests: + rosebud: *test_rosebud + about: + website: https://www.wikiquote.org/ + wikidata_id: Q369 + + - name: wikisource + engine: mediawiki + weight: 0.5 + shortcut: ws + categories: [general, wikimedia] + base_url: 'https://{language}.wikisource.org/' + search_type: text + disabled: true + about: + website: https://www.wikisource.org/ + wikidata_id: Q263 + + - name: wikispecies + engine: mediawiki + shortcut: wsp + categories: [general, science, wikimedia] + base_url: 'https://species.wikimedia.org/' + search_type: text + disabled: true + about: + website: https://species.wikimedia.org/ + wikidata_id: Q13679 + + - name: wiktionary + engine: mediawiki + shortcut: wt + categories: [dictionaries, wikimedia] + base_url: 'https://{language}.wiktionary.org/' + search_type: text + about: + website: https://www.wiktionary.org/ + wikidata_id: Q151 + + - name: wikiversity + engine: mediawiki + weight: 0.5 + shortcut: wv + categories: [general, wikimedia] + base_url: 'https://{language}.wikiversity.org/' + search_type: text + disabled: true + about: + website: https://www.wikiversity.org/ + wikidata_id: Q370 + + - name: wikivoyage + engine: mediawiki + weight: 0.5 + shortcut: wy + categories: [general, wikimedia] + base_url: 'https://{language}.wikivoyage.org/' + search_type: text + disabled: true + about: + website: https://www.wikivoyage.org/ + wikidata_id: Q373 + + - name: wikicommons.images + engine: wikicommons + shortcut: wc + categories: images + number_of_results: 10 + + - name: wolframalpha + shortcut: wa + # You can use the engine using the official stable API, but you need an API + # key. See: https://products.wolframalpha.com/api/ + # + # engine: wolframalpha_api + # api_key: '' + # + # Or you can use the html non-stable engine, activated by default + engine: wolframalpha_noapi + timeout: 6.0 + categories: general + disabled: false + + - name: dictzone + engine: dictzone + shortcut: dc + + - name: mymemory translated + engine: translated + shortcut: tl + timeout: 5.0 + # You can use without an API key, but you are limited to 1000 words/day + # See: https://mymemory.translated.net/doc/usagelimits.php + # api_key: '' + + # Required dependency: mysql-connector-python + # - name: mysql + # engine: mysql_server + # database: mydatabase + # username: user + # password: pass + # limit: 10 + # query_str: 'SELECT * from mytable WHERE fieldname=%(query)s' + # shortcut: mysql + + - name: 1337x + engine: 1337x + shortcut: 1337x + disabled: true + + - name: duden + engine: duden + shortcut: du + disabled: true + + - name: seznam + shortcut: szn + engine: seznam + disabled: true + + # - name: deepl + # engine: deepl + # shortcut: dpl + # # You can use the engine using the official stable API, but you need an API key + # # See: https://www.deepl.com/pro-api?cta=header-pro-api + # api_key: '' # required! + # timeout: 5.0 + # disabled: true + + - name: mojeek + shortcut: mjk + engine: xpath + paging: true + categories: [general, web] + search_url: https://www.mojeek.com/search?q={query}&s={pageno}&lang={lang}&lb={lang} + results_xpath: //ul[@class="results-standard"]/li/a[@class="ob"] + url_xpath: ./@href + title_xpath: ../h2/a + content_xpath: ..//p[@class="s"] + suggestion_xpath: //div[@class="top-info"]/p[@class="top-info spell"]/em/a + first_page_num: 0 + page_size: 10 + max_page: 100 + disabled: true + about: + website: https://www.mojeek.com/ + wikidata_id: Q60747299 + official_api_documentation: https://www.mojeek.com/services/api.html/ + use_official_api: false + require_api_key: false + results: HTML + + - name: moviepilot + engine: moviepilot + shortcut: mp + disabled: true + + - name: naver + shortcut: nvr + categories: [general, web] + engine: xpath + paging: true + search_url: https://search.naver.com/search.naver?where=webkr&sm=osp_hty&ie=UTF-8&query={query}&start={pageno} + url_xpath: //a[@class="link_tit"]/@href + title_xpath: //a[@class="link_tit"] + content_xpath: //a[@class="total_dsc"]/div + first_page_num: 1 + page_size: 10 + disabled: true + about: + website: https://www.naver.com/ + wikidata_id: Q485639 + official_api_documentation: https://developers.naver.com/docs/nmt/examples/ + use_official_api: false + require_api_key: false + results: HTML + language: ko + + - name: rubygems + shortcut: rbg + engine: xpath + paging: true + search_url: https://rubygems.org/search?page={pageno}&query={query} + results_xpath: /html/body/main/div/a[@class="gems__gem"] + url_xpath: ./@href + title_xpath: ./span/h2 + content_xpath: ./span/p + suggestion_xpath: /html/body/main/div/div[@class="search__suggestions"]/p/a + first_page_num: 1 + categories: [it, packages] + disabled: true + about: + website: https://rubygems.org/ + wikidata_id: Q1853420 + official_api_documentation: https://guides.rubygems.org/rubygems-org-api/ + use_official_api: false + require_api_key: false + results: HTML + + - name: peertube + engine: peertube + shortcut: ptb + paging: true + # alternatives see: https://instances.joinpeertube.org/instances + # base_url: https://tube.4aem.com + categories: videos + disabled: true + timeout: 6.0 + + - name: mediathekviewweb + engine: mediathekviewweb + shortcut: mvw + disabled: true + + - name: yacy + engine: yacy + categories: general + search_type: text + base_url: https://yacy.searchlab.eu + shortcut: ya + disabled: true + # required if you aren't using HTTPS for your local yacy instance + # https://docs.searxng.org/dev/engines/online/yacy.html + # enable_http: true + # timeout: 3.0 + # search_mode: 'global' + + - name: yacy images + engine: yacy + categories: images + search_type: image + base_url: https://yacy.searchlab.eu + shortcut: yai + disabled: true + + - name: rumble + engine: rumble + shortcut: ru + base_url: https://rumble.com/ + paging: true + categories: videos + disabled: true + + - name: livespace + engine: livespace + shortcut: ls + categories: videos + disabled: true + timeout: 5.0 + + - name: wordnik + engine: wordnik + shortcut: def + base_url: https://www.wordnik.com/ + categories: [dictionaries] + timeout: 5.0 + + - name: woxikon.de synonyme + engine: xpath + shortcut: woxi + categories: [dictionaries] + timeout: 5.0 + disabled: true + search_url: https://synonyme.woxikon.de/synonyme/{query}.php + url_xpath: //div[@class="upper-synonyms"]/a/@href + content_xpath: //div[@class="synonyms-list-group"] + title_xpath: //div[@class="upper-synonyms"]/a + no_result_for_http_status: [404] + about: + website: https://www.woxikon.de/ + wikidata_id: # No Wikidata ID + use_official_api: false + require_api_key: false + results: HTML + language: de + + - name: seekr news + engine: seekr + shortcut: senews + categories: news + seekr_category: news + disabled: true + + - name: seekr images + engine: seekr + network: seekr news + shortcut: seimg + categories: images + seekr_category: images + disabled: true + + - name: seekr videos + engine: seekr + network: seekr news + shortcut: sevid + categories: videos + seekr_category: videos + disabled: true + + - name: sjp.pwn + engine: sjp + shortcut: sjp + base_url: https://sjp.pwn.pl/ + timeout: 5.0 + disabled: true + + - name: stract + engine: stract + shortcut: str + disabled: true + + - name: svgrepo + engine: svgrepo + shortcut: svg + timeout: 10.0 + disabled: true + + - name: tootfinder + engine: tootfinder + shortcut: toot + + - name: wallhaven + engine: wallhaven + # api_key: abcdefghijklmnopqrstuvwxyz + shortcut: wh + + # wikimini: online encyclopedia for children + # The fulltext and title parameter is necessary for Wikimini because + # sometimes it will not show the results and redirect instead + - name: wikimini + engine: xpath + shortcut: wkmn + search_url: https://fr.wikimini.org/w/index.php?search={query}&title=Sp%C3%A9cial%3ASearch&fulltext=Search + url_xpath: //li/div[@class="mw-search-result-heading"]/a/@href + title_xpath: //li//div[@class="mw-search-result-heading"]/a + content_xpath: //li/div[@class="searchresult"] + categories: general + disabled: true + about: + website: https://wikimini.org/ + wikidata_id: Q3568032 + use_official_api: false + require_api_key: false + results: HTML + language: fr + + - name: wttr.in + engine: wttr + shortcut: wttr + timeout: 9.0 + + - name: yummly + engine: yummly + shortcut: yum + disabled: true + + - name: brave + engine: brave + shortcut: br + time_range_support: true + paging: true + categories: [general, web] + brave_category: search + # brave_spellcheck: true + + - name: brave.images + engine: brave + network: brave + shortcut: brimg + categories: [images, web] + brave_category: images + + - name: brave.videos + engine: brave + network: brave + shortcut: brvid + categories: [videos, web] + brave_category: videos + + - name: brave.news + engine: brave + network: brave + shortcut: brnews + categories: news + brave_category: news + + # - name: brave.goggles + # engine: brave + # network: brave + # shortcut: brgog + # time_range_support: true + # paging: true + # categories: [general, web] + # brave_category: goggles + # Goggles: # required! This should be a URL ending in .goggle + + - name: lib.rs + shortcut: lrs + engine: xpath + search_url: https://lib.rs/search?q={query} + results_xpath: /html/body/main/div/ol/li/a + url_xpath: ./@href + title_xpath: ./div[@class="h"]/h4 + content_xpath: ./div[@class="h"]/p + categories: [it, packages] + disabled: true + about: + website: https://lib.rs + wikidata_id: Q113486010 + use_official_api: false + require_api_key: false + results: HTML + + - name: sourcehut + shortcut: srht + engine: xpath + paging: true + search_url: https://sr.ht/projects?page={pageno}&search={query} + results_xpath: (//div[@class="event-list"])[1]/div[@class="event"] + url_xpath: ./h4/a[2]/@href + title_xpath: ./h4/a[2] + content_xpath: ./p + first_page_num: 1 + categories: [it, repos] + disabled: true + about: + website: https://sr.ht + wikidata_id: Q78514485 + official_api_documentation: https://man.sr.ht/ + use_official_api: false + require_api_key: false + results: HTML + + - name: goo + shortcut: goo + engine: xpath + paging: true + search_url: https://search.goo.ne.jp/web.jsp?MT={query}&FR={pageno}0 + url_xpath: //div[@class="result"]/p[@class='title fsL1']/a/@href + title_xpath: //div[@class="result"]/p[@class='title fsL1']/a + content_xpath: //p[contains(@class,'url fsM')]/following-sibling::p + first_page_num: 0 + categories: [general, web] + disabled: true + timeout: 4.0 + about: + website: https://search.goo.ne.jp + wikidata_id: Q249044 + use_official_api: false + require_api_key: false + results: HTML + language: ja + + - name: bt4g + engine: bt4g + shortcut: bt4g + + - name: pkg.go.dev + engine: xpath + shortcut: pgo + search_url: https://pkg.go.dev/search?limit=100&m=package&q={query} + results_xpath: /html/body/main/div[contains(@class,"SearchResults")]/div[not(@class)]/div[@class="SearchSnippet"] + url_xpath: ./div[@class="SearchSnippet-headerContainer"]/h2/a/@href + title_xpath: ./div[@class="SearchSnippet-headerContainer"]/h2/a + content_xpath: ./p[@class="SearchSnippet-synopsis"] + categories: [packages, it] + timeout: 3.0 + disabled: true + about: + website: https://pkg.go.dev/ + use_official_api: false + require_api_key: false + results: HTML + +# Doku engine lets you access to any Doku wiki instance: +# A public one or a privete/corporate one. +# - name: ubuntuwiki +# engine: doku +# shortcut: uw +# base_url: 'https://doc.ubuntu-fr.org' + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: git grep +# engine: command +# command: ['git', 'grep', '{{QUERY}}'] +# shortcut: gg +# tokens: [] +# disabled: true +# delimiter: +# chars: ':' +# keys: ['filepath', 'code'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: locate +# engine: command +# command: ['locate', '{{QUERY}}'] +# shortcut: loc +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: find +# engine: command +# command: ['find', '.', '-name', '{{QUERY}}'] +# query_type: path +# shortcut: fnd +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: pattern search in files +# engine: command +# command: ['fgrep', '{{QUERY}}'] +# shortcut: fgr +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: regex search in files +# engine: command +# command: ['grep', '{{QUERY}}'] +# shortcut: gr +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +doi_resolvers: + oadoi.org: 'https://oadoi.org/' + doi.org: 'https://doi.org/' + doai.io: 'https://dissem.in/' + sci-hub.se: 'https://sci-hub.se/' + sci-hub.st: 'https://sci-hub.st/' + sci-hub.ru: 'https://sci-hub.ru/' + +default_doi_resolver: 'oadoi.org' diff --git a/searxng.dockerfile b/searxng.dockerfile new file mode 100644 index 0000000..8bcd2b2 --- /dev/null +++ b/searxng.dockerfile @@ -0,0 +1,3 @@ +FROM searxng/searxng + +COPY searxng-settings.yml /etc/searxng/settings.yml \ No newline at end of file diff --git a/searxng/limiter.toml b/searxng/limiter.toml deleted file mode 100644 index ae69bd3..0000000 --- a/searxng/limiter.toml +++ /dev/null @@ -1,3 +0,0 @@ -[botdetection.ip_limit] -# activate link_token method in the ip_limit method -link_token = true \ No newline at end of file diff --git a/searxng/settings.yml b/searxng/settings.yml deleted file mode 100644 index 54d27c4..0000000 --- a/searxng/settings.yml +++ /dev/null @@ -1,17 +0,0 @@ -use_default_settings: true - -general: - instance_name: 'searxng' - -search: - autocomplete: 'google' - formats: - - html - - json - -server: - secret_key: 'a2fb23f1b02e6ee83875b09826990de0f6bd908b6638e8c10277d415f6ab852b' # Is overwritten by ${SEARXNG_SECRET} - -engines: - - name: wolframalpha - disabled: false diff --git a/searxng/uwsgi.ini b/searxng/uwsgi.ini deleted file mode 100644 index dd1247a..0000000 --- a/searxng/uwsgi.ini +++ /dev/null @@ -1,50 +0,0 @@ -[uwsgi] -# Who will run the code -uid = searxng -gid = searxng - -# Number of workers (usually CPU count) -# default value: %k (= number of CPU core, see Dockerfile) -workers = %k - -# Number of threads per worker -# default value: 4 (see Dockerfile) -threads = 4 - -# The right granted on the created socket -chmod-socket = 666 - -# Plugin to use and interpreter config -single-interpreter = true -master = true -plugin = python3 -lazy-apps = true -enable-threads = 4 - -# Module to import -module = searx.webapp - -# Virtualenv and python path -pythonpath = /usr/local/searxng/ -chdir = /usr/local/searxng/searx/ - -# automatically set processes name to something meaningful -auto-procname = true - -# Disable request logging for privacy -disable-logging = true -log-5xx = true - -# Set the max size of a request (request-body excluded) -buffer-size = 8192 - -# No keep alive -# See https://github.com/searx/searx-docker/issues/24 -add-header = Connection: close - -# uwsgi serves the static files -static-map = /static=/usr/local/searxng/searx/static -# expires set to one day -static-expires = /* 86400 -static-gzip-all = True -offload-threads = 4 diff --git a/src/agents/academicSearchAgent.ts b/src/agents/academicSearchAgent.ts new file mode 100644 index 0000000..0e78581 --- /dev/null +++ b/src/agents/academicSearchAgent.ts @@ -0,0 +1,260 @@ +import { BaseMessage } from '@langchain/core/messages'; +import { + PromptTemplate, + ChatPromptTemplate, + MessagesPlaceholder, +} from '@langchain/core/prompts'; +import { + RunnableSequence, + RunnableMap, + RunnableLambda, +} from '@langchain/core/runnables'; +import { ChatOllama } from '@langchain/community/chat_models/ollama'; +import { Ollama } from '@langchain/community/llms/ollama'; +import { OllamaEmbeddings } from '@langchain/community/embeddings/ollama'; +import { StringOutputParser } from '@langchain/core/output_parsers'; +import { Document } from '@langchain/core/documents'; +import { searchSearxng } from '../core/searxng'; +import type { StreamEvent } from '@langchain/core/tracers/log_stream'; +import formatChatHistoryAsString from '../utils/formatHistory'; +import eventEmitter from 'events'; +import computeSimilarity from '../utils/computeSimilarity'; + +const chatLLM = new ChatOllama({ + baseUrl: process.env.OLLAMA_URL, + model: process.env.MODEL_NAME, + temperature: 0.7, +}); + +const llm = new Ollama({ + temperature: 0, + model: process.env.MODEL_NAME, + baseUrl: process.env.OLLAMA_URL, +}); + +const embeddings = new OllamaEmbeddings({ + model: process.env.MODEL_NAME, + baseUrl: process.env.OLLAMA_URL, +}); + +const basicAcademicSearchRetrieverPrompt = ` +You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. +If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response. + +Example: +1. Follow up question: How does stable diffusion work? +Rephrased: Stable diffusion working + +2. Follow up question: What is linear algebra? +Rephrased: Linear algebra + +3. Follow up question: What is the third law of thermodynamics? +Rephrased: Third law of thermodynamics + +Conversation: +{chat_history} + +Follow up question: {query} +Rephrased question: +`; + +const basicAcademicSearchResponsePrompt = ` + You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are set on focus mode 'Acadedemic', this means you will be searching for academic papers and articles on the web. + + Generate a response that is informative and relevant to the user's query based on provided context (the context consits of search results containg a brief description of the content of that page). + You must use this context to answer the user's query in the best way possible. Use an unbaised and journalistic tone in your response. Do not repeat the text. + You must not tell the user to open any link or visit any website to get the answer. You must provide the answer in the response itself. If the user asks for links you can provide them. + Your responses should be medium to long in length be informative and relevant to the user's query. You can use markdowns to format your response. You should use bullet points to list the information. Make sure the answer is not short and is informative. + You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from. + Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2]. + However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer. + + Aything inside the following \`context\` HTML block provided below is for your knowledge returned by the search engine and is not shared by the user. You have to answer question on the basis of it and cite the relevant information from it but you do not have to + talk about the context in your response. + + + {context} + + + If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'. + Anything between the \`context\` is retrieved from a search engine and is not a part of the conversation with the user. Today's date is ${new Date().toISOString()} +`; + +const strParser = new StringOutputParser(); + +const handleStream = async ( + stream: AsyncGenerator, + emitter: eventEmitter, +) => { + for await (const event of stream) { + if ( + event.event === 'on_chain_end' && + event.name === 'FinalSourceRetriever' + ) { + emitter.emit( + 'data', + JSON.stringify({ type: 'sources', data: event.data.output }), + ); + } + if ( + event.event === 'on_chain_stream' && + event.name === 'FinalResponseGenerator' + ) { + emitter.emit( + 'data', + JSON.stringify({ type: 'response', data: event.data.chunk }), + ); + } + if ( + event.event === 'on_chain_end' && + event.name === 'FinalResponseGenerator' + ) { + emitter.emit('end'); + } + } +}; + +const processDocs = async (docs: Document[]) => { + return docs + .map((_, index) => `${index + 1}. ${docs[index].pageContent}`) + .join('\n'); +}; + +const rerankDocs = async ({ + query, + docs, +}: { + query: string; + docs: Document[]; +}) => { + if (docs.length === 0) { + return docs; + } + + const docsWithContent = docs.filter( + (doc) => doc.pageContent && doc.pageContent.length > 0, + ); + + const docEmbeddings = await embeddings.embedDocuments( + docsWithContent.map((doc) => doc.pageContent), + ); + + const queryEmbedding = await embeddings.embedQuery(query); + + const similarity = docEmbeddings.map((docEmbedding, i) => { + const sim = computeSimilarity(queryEmbedding, docEmbedding); + + return { + index: i, + similarity: sim, + }; + }); + + const sortedDocs = similarity + .sort((a, b) => b.similarity - a.similarity) + .slice(0, 15) + .map((sim) => docsWithContent[sim.index]); + + return sortedDocs; +}; + +type BasicChainInput = { + chat_history: BaseMessage[]; + query: string; +}; + +const basicAcademicSearchRetrieverChain = RunnableSequence.from([ + PromptTemplate.fromTemplate(basicAcademicSearchRetrieverPrompt), + llm, + strParser, + RunnableLambda.from(async (input: string) => { + if (input === 'not_needed') { + return { query: '', docs: [] }; + } + + const res = await searchSearxng(input, { + language: 'en', + engines: [ + 'arxiv', + 'google_scholar', + 'internet_archive_scholar', + 'pubmed', + ], + }); + + const documents = res.results.map( + (result) => + new Document({ + pageContent: result.content, + metadata: { + title: result.title, + url: result.url, + ...(result.img_src && { img_src: result.img_src }), + }, + }), + ); + + return { query: input, docs: documents }; + }), +]); + +const basicAcademicSearchAnsweringChain = RunnableSequence.from([ + RunnableMap.from({ + query: (input: BasicChainInput) => input.query, + chat_history: (input: BasicChainInput) => input.chat_history, + context: RunnableSequence.from([ + (input) => ({ + query: input.query, + chat_history: formatChatHistoryAsString(input.chat_history), + }), + basicAcademicSearchRetrieverChain + .pipe(rerankDocs) + .withConfig({ + runName: 'FinalSourceRetriever', + }) + .pipe(processDocs), + ]), + }), + ChatPromptTemplate.fromMessages([ + ['system', basicAcademicSearchResponsePrompt], + new MessagesPlaceholder('chat_history'), + ['user', '{query}'], + ]), + chatLLM, + strParser, +]).withConfig({ + runName: 'FinalResponseGenerator', +}); + +const basicAcademicSearch = (query: string, history: BaseMessage[]) => { + const emitter = new eventEmitter(); + + try { + const stream = basicAcademicSearchAnsweringChain.streamEvents( + { + chat_history: history, + query: query, + }, + { + version: 'v1', + }, + ); + + handleStream(stream, emitter); + } catch (err) { + emitter.emit( + 'error', + JSON.stringify({ data: 'An error has occurred please try again later' }), + ); + console.error(err); + } + + return emitter; +}; + +const handleAcademicSearch = (message: string, history: BaseMessage[]) => { + const emitter = basicAcademicSearch(message, history); + return emitter; +}; + +export default handleAcademicSearch; diff --git a/src/agents/imageSearchAgent.ts b/src/agents/imageSearchAgent.ts new file mode 100644 index 0000000..5e38123 --- /dev/null +++ b/src/agents/imageSearchAgent.ts @@ -0,0 +1,81 @@ +import { + RunnableSequence, + RunnableMap, + RunnableLambda, +} from '@langchain/core/runnables'; +import { PromptTemplate } from '@langchain/core/prompts'; +import { Ollama } from '@langchain/community/llms/ollama'; +import formatChatHistoryAsString from '../utils/formatHistory'; +import { BaseMessage } from '@langchain/core/messages'; +import { StringOutputParser } from '@langchain/core/output_parsers'; +import { searchSearxng } from '../core/searxng'; + +const llm = new Ollama({ + temperature: 0, + model: process.env.MODEL_NAME, + baseUrl: process.env.OLLAMA_URL, +}); + +const imageSearchChainPrompt = ` +You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search the web for images. +You need to make sure the rephrased question agrees with the conversation and is relevant to the conversation. + +Example: +1. Follow up question: What is a cat? +Rephrased: A cat + +2. Follow up question: What is a car? How does it works? +Rephrased: Car working + +3. Follow up question: How does an AC work? +Rephrased: AC working + +Conversation: +{chat_history} + +Follow up question: {query} +Rephrased question: +`; + +type ImageSearchChainInput = { + chat_history: BaseMessage[]; + query: string; +}; + +const strParser = new StringOutputParser(); + +const imageSearchChain = RunnableSequence.from([ + RunnableMap.from({ + chat_history: (input: ImageSearchChainInput) => { + return formatChatHistoryAsString(input.chat_history); + }, + query: (input: ImageSearchChainInput) => { + return input.query; + }, + }), + PromptTemplate.fromTemplate(imageSearchChainPrompt), + llm, + strParser, + RunnableLambda.from(async (input: string) => { + const res = await searchSearxng(input, { + categories: ['images'], + engines: ['bing_images', 'google_images'], + }); + + const images = []; + + res.results.forEach((result) => { + if (result.img_src && result.url && result.title) { + images.push({ + img_src: result.img_src, + url: result.url, + title: result.title, + }); + } + }); + + return images.slice(0, 10); + }), +]); + +export default imageSearchChain; diff --git a/src/agents/redditSearchAgent.ts b/src/agents/redditSearchAgent.ts new file mode 100644 index 0000000..d5ab77c --- /dev/null +++ b/src/agents/redditSearchAgent.ts @@ -0,0 +1,256 @@ +import { BaseMessage } from '@langchain/core/messages'; +import { + PromptTemplate, + ChatPromptTemplate, + MessagesPlaceholder, +} from '@langchain/core/prompts'; +import { + RunnableSequence, + RunnableMap, + RunnableLambda, +} from '@langchain/core/runnables'; +import { ChatOllama } from '@langchain/community/chat_models/ollama'; +import { Ollama } from '@langchain/community/llms/ollama'; +import { OllamaEmbeddings } from '@langchain/community/embeddings/ollama'; +import { StringOutputParser } from '@langchain/core/output_parsers'; +import { Document } from '@langchain/core/documents'; +import { searchSearxng } from '../core/searxng'; +import type { StreamEvent } from '@langchain/core/tracers/log_stream'; +import formatChatHistoryAsString from '../utils/formatHistory'; +import eventEmitter from 'events'; +import computeSimilarity from '../utils/computeSimilarity'; + +const chatLLM = new ChatOllama({ + baseUrl: process.env.OLLAMA_URL, + model: process.env.MODEL_NAME, + temperature: 0.7, +}); + +const llm = new Ollama({ + temperature: 0, + model: process.env.MODEL_NAME, + baseUrl: process.env.OLLAMA_URL, +}); + +const embeddings = new OllamaEmbeddings({ + model: process.env.MODEL_NAME, + baseUrl: process.env.OLLAMA_URL, +}); + +const basicRedditSearchRetrieverPrompt = ` +You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. +If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response. + +Example: +1. Follow up question: Which company is most likely to create an AGI +Rephrased: Which company is most likely to create an AGI + +2. Follow up question: Is Earth flat? +Rephrased: Is Earth flat? + +3. Follow up question: Is there life on Mars? +Rephrased: Is there life on Mars? + +Conversation: +{chat_history} + +Follow up question: {query} +Rephrased question: +`; + +const basicRedditSearchResponsePrompt = ` + You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are set on focus mode 'Reddit', this means you will be searching for information, opinions and discussions on the web using Reddit. + + Generate a response that is informative and relevant to the user's query based on provided context (the context consits of search results containg a brief description of the content of that page). + You must use this context to answer the user's query in the best way possible. Use an unbaised and journalistic tone in your response. Do not repeat the text. + You must not tell the user to open any link or visit any website to get the answer. You must provide the answer in the response itself. If the user asks for links you can provide them. + Your responses should be medium to long in length be informative and relevant to the user's query. You can use markdowns to format your response. You should use bullet points to list the information. Make sure the answer is not short and is informative. + You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from. + Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2]. + However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer. + + Aything inside the following \`context\` HTML block provided below is for your knowledge returned by Reddit and is not shared by the user. You have to answer question on the basis of it and cite the relevant information from it but you do not have to + talk about the context in your response. + + + {context} + + + If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'. + Anything between the \`context\` is retrieved from Reddit and is not a part of the conversation with the user. Today's date is ${new Date().toISOString()} +`; + +const strParser = new StringOutputParser(); + +const handleStream = async ( + stream: AsyncGenerator, + emitter: eventEmitter, +) => { + for await (const event of stream) { + if ( + event.event === 'on_chain_end' && + event.name === 'FinalSourceRetriever' + ) { + emitter.emit( + 'data', + JSON.stringify({ type: 'sources', data: event.data.output }), + ); + } + if ( + event.event === 'on_chain_stream' && + event.name === 'FinalResponseGenerator' + ) { + emitter.emit( + 'data', + JSON.stringify({ type: 'response', data: event.data.chunk }), + ); + } + if ( + event.event === 'on_chain_end' && + event.name === 'FinalResponseGenerator' + ) { + emitter.emit('end'); + } + } +}; + +const processDocs = async (docs: Document[]) => { + return docs + .map((_, index) => `${index + 1}. ${docs[index].pageContent}`) + .join('\n'); +}; + +const rerankDocs = async ({ + query, + docs, +}: { + query: string; + docs: Document[]; +}) => { + if (docs.length === 0) { + return docs; + } + + const docsWithContent = docs.filter( + (doc) => doc.pageContent && doc.pageContent.length > 0, + ); + + const docEmbeddings = await embeddings.embedDocuments( + docsWithContent.map((doc) => doc.pageContent), + ); + + const queryEmbedding = await embeddings.embedQuery(query); + + const similarity = docEmbeddings.map((docEmbedding, i) => { + const sim = computeSimilarity(queryEmbedding, docEmbedding); + + return { + index: i, + similarity: sim, + }; + }); + + const sortedDocs = similarity + .sort((a, b) => b.similarity - a.similarity) + .slice(0, 15) + .filter((sim) => sim.similarity > 0.3) + .map((sim) => docsWithContent[sim.index]); + + return sortedDocs; +}; + +type BasicChainInput = { + chat_history: BaseMessage[]; + query: string; +}; + +const basicRedditSearchRetrieverChain = RunnableSequence.from([ + PromptTemplate.fromTemplate(basicRedditSearchRetrieverPrompt), + llm, + strParser, + RunnableLambda.from(async (input: string) => { + if (input === 'not_needed') { + return { query: '', docs: [] }; + } + + const res = await searchSearxng(input, { + language: 'en', + engines: ['reddit'], + }); + + const documents = res.results.map( + (result) => + new Document({ + pageContent: result.content ? result.content : result.title, + metadata: { + title: result.title, + url: result.url, + ...(result.img_src && { img_src: result.img_src }), + }, + }), + ); + + return { query: input, docs: documents }; + }), +]); + +const basicRedditSearchAnsweringChain = RunnableSequence.from([ + RunnableMap.from({ + query: (input: BasicChainInput) => input.query, + chat_history: (input: BasicChainInput) => input.chat_history, + context: RunnableSequence.from([ + (input) => ({ + query: input.query, + chat_history: formatChatHistoryAsString(input.chat_history), + }), + basicRedditSearchRetrieverChain + .pipe(rerankDocs) + .withConfig({ + runName: 'FinalSourceRetriever', + }) + .pipe(processDocs), + ]), + }), + ChatPromptTemplate.fromMessages([ + ['system', basicRedditSearchResponsePrompt], + new MessagesPlaceholder('chat_history'), + ['user', '{query}'], + ]), + chatLLM, + strParser, +]).withConfig({ + runName: 'FinalResponseGenerator', +}); + +const basicRedditSearch = (query: string, history: BaseMessage[]) => { + const emitter = new eventEmitter(); + + try { + const stream = basicRedditSearchAnsweringChain.streamEvents( + { + chat_history: history, + query: query, + }, + { + version: 'v1', + }, + ); + + handleStream(stream, emitter); + } catch (err) { + emitter.emit( + 'error', + JSON.stringify({ data: 'An error has occurred please try again later' }), + ); + console.error(err); + } + + return emitter; +}; + +const handleRedditSearch = (message: string, history: BaseMessage[]) => { + const emitter = basicRedditSearch(message, history); + return emitter; +}; + +export default handleRedditSearch; diff --git a/src/agents/webSearchAgent.ts b/src/agents/webSearchAgent.ts new file mode 100644 index 0000000..5d60dda --- /dev/null +++ b/src/agents/webSearchAgent.ts @@ -0,0 +1,255 @@ +import { BaseMessage } from '@langchain/core/messages'; +import { + PromptTemplate, + ChatPromptTemplate, + MessagesPlaceholder, +} from '@langchain/core/prompts'; +import { + RunnableSequence, + RunnableMap, + RunnableLambda, +} from '@langchain/core/runnables'; +import { ChatOllama } from '@langchain/community/chat_models/ollama'; +import { Ollama } from '@langchain/community/llms/ollama'; +import { OllamaEmbeddings } from '@langchain/community/embeddings/ollama'; +import { StringOutputParser } from '@langchain/core/output_parsers'; +import { Document } from '@langchain/core/documents'; +import { searchSearxng } from '../core/searxng'; +import type { StreamEvent } from '@langchain/core/tracers/log_stream'; +import formatChatHistoryAsString from '../utils/formatHistory'; +import eventEmitter from 'events'; +import computeSimilarity from '../utils/computeSimilarity'; + +const chatLLM = new ChatOllama({ + baseUrl: process.env.OLLAMA_URL, + model: process.env.MODEL_NAME, + temperature: 0.7, +}); + +const llm = new Ollama({ + temperature: 0, + model: process.env.MODEL_NAME, + baseUrl: process.env.OLLAMA_URL, +}); + +const embeddings = new OllamaEmbeddings({ + model: process.env.MODEL_NAME, + baseUrl: process.env.OLLAMA_URL, +}); + +const basicSearchRetrieverPrompt = ` +You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. +If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response. + +Example: +1. Follow up question: What is the capital of France? +Rephrased: Capital of france + +2. Follow up question: What is the population of New York City? +Rephrased: Population of New York City + +3. Follow up question: What is Docker? +Rephrased: What is Docker + +Conversation: +{chat_history} + +Follow up question: {query} +Rephrased question: +`; + +const basicWebSearchResponsePrompt = ` + You are Perplexica, an AI model who is expert at searching the web and answering user's queries. + + Generate a response that is informative and relevant to the user's query based on provided context (the context consits of search results containg a brief description of the content of that page). + You must use this context to answer the user's query in the best way possible. Use an unbaised and journalistic tone in your response. Do not repeat the text. + You must not tell the user to open any link or visit any website to get the answer. You must provide the answer in the response itself. If the user asks for links you can provide them. + Your responses should be medium to long in length be informative and relevant to the user's query. You can use markdowns to format your response. You should use bullet points to list the information. Make sure the answer is not short and is informative. + You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from. + Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2]. + However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer. + + Aything inside the following \`context\` HTML block provided below is for your knowledge returned by the search engine and is not shared by the user. You have to answer question on the basis of it and cite the relevant information from it but you do not have to + talk about the context in your response. + + + {context} + + + If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'. + Anything between the \`context\` is retrieved from a search engine and is not a part of the conversation with the user. Today's date is ${new Date().toISOString()} +`; + +const strParser = new StringOutputParser(); + +const handleStream = async ( + stream: AsyncGenerator, + emitter: eventEmitter, +) => { + for await (const event of stream) { + if ( + event.event === 'on_chain_end' && + event.name === 'FinalSourceRetriever' + ) { + emitter.emit( + 'data', + JSON.stringify({ type: 'sources', data: event.data.output }), + ); + } + if ( + event.event === 'on_chain_stream' && + event.name === 'FinalResponseGenerator' + ) { + emitter.emit( + 'data', + JSON.stringify({ type: 'response', data: event.data.chunk }), + ); + } + if ( + event.event === 'on_chain_end' && + event.name === 'FinalResponseGenerator' + ) { + emitter.emit('end'); + } + } +}; + +const processDocs = async (docs: Document[]) => { + return docs + .map((_, index) => `${index + 1}. ${docs[index].pageContent}`) + .join('\n'); +}; + +const rerankDocs = async ({ + query, + docs, +}: { + query: string; + docs: Document[]; +}) => { + if (docs.length === 0) { + return docs; + } + + const docsWithContent = docs.filter( + (doc) => doc.pageContent && doc.pageContent.length > 0, + ); + + const docEmbeddings = await embeddings.embedDocuments( + docsWithContent.map((doc) => doc.pageContent), + ); + + const queryEmbedding = await embeddings.embedQuery(query); + + const similarity = docEmbeddings.map((docEmbedding, i) => { + const sim = computeSimilarity(queryEmbedding, docEmbedding); + + return { + index: i, + similarity: sim, + }; + }); + + const sortedDocs = similarity + .sort((a, b) => b.similarity - a.similarity) + .filter((sim) => sim.similarity > 0.5) + .slice(0, 15) + .map((sim) => docsWithContent[sim.index]); + + return sortedDocs; +}; + +type BasicChainInput = { + chat_history: BaseMessage[]; + query: string; +}; + +const basicWebSearchRetrieverChain = RunnableSequence.from([ + PromptTemplate.fromTemplate(basicSearchRetrieverPrompt), + llm, + strParser, + RunnableLambda.from(async (input: string) => { + if (input === 'not_needed') { + return { query: '', docs: [] }; + } + + const res = await searchSearxng(input, { + language: 'en', + }); + + const documents = res.results.map( + (result) => + new Document({ + pageContent: result.content, + metadata: { + title: result.title, + url: result.url, + ...(result.img_src && { img_src: result.img_src }), + }, + }), + ); + + return { query: input, docs: documents }; + }), +]); + +const basicWebSearchAnsweringChain = RunnableSequence.from([ + RunnableMap.from({ + query: (input: BasicChainInput) => input.query, + chat_history: (input: BasicChainInput) => input.chat_history, + context: RunnableSequence.from([ + (input) => ({ + query: input.query, + chat_history: formatChatHistoryAsString(input.chat_history), + }), + basicWebSearchRetrieverChain + .pipe(rerankDocs) + .withConfig({ + runName: 'FinalSourceRetriever', + }) + .pipe(processDocs), + ]), + }), + ChatPromptTemplate.fromMessages([ + ['system', basicWebSearchResponsePrompt], + new MessagesPlaceholder('chat_history'), + ['user', '{query}'], + ]), + chatLLM, + strParser, +]).withConfig({ + runName: 'FinalResponseGenerator', +}); + +const basicWebSearch = (query: string, history: BaseMessage[]) => { + const emitter = new eventEmitter(); + + try { + const stream = basicWebSearchAnsweringChain.streamEvents( + { + chat_history: history, + query: query, + }, + { + version: 'v1', + }, + ); + + handleStream(stream, emitter); + } catch (err) { + emitter.emit( + 'error', + JSON.stringify({ data: 'An error has occurred please try again later' }), + ); + console.error(err); + } + + return emitter; +}; + +const handleWebSearch = (message: string, history: BaseMessage[]) => { + const emitter = basicWebSearch(message, history); + return emitter; +}; + +export default handleWebSearch; diff --git a/src/agents/wolframAlphaSearchAgent.ts b/src/agents/wolframAlphaSearchAgent.ts new file mode 100644 index 0000000..5f42ed7 --- /dev/null +++ b/src/agents/wolframAlphaSearchAgent.ts @@ -0,0 +1,212 @@ +import { BaseMessage } from '@langchain/core/messages'; +import { + PromptTemplate, + ChatPromptTemplate, + MessagesPlaceholder, +} from '@langchain/core/prompts'; +import { + RunnableSequence, + RunnableMap, + RunnableLambda, +} from '@langchain/core/runnables'; +import { ChatOllama } from '@langchain/community/chat_models/ollama'; +import { Ollama } from '@langchain/community/llms/ollama'; +import { StringOutputParser } from '@langchain/core/output_parsers'; +import { Document } from '@langchain/core/documents'; +import { searchSearxng } from '../core/searxng'; +import type { StreamEvent } from '@langchain/core/tracers/log_stream'; +import formatChatHistoryAsString from '../utils/formatHistory'; +import eventEmitter from 'events'; + +const chatLLM = new ChatOllama({ + baseUrl: process.env.OLLAMA_URL, + model: process.env.MODEL_NAME, + temperature: 0.7, +}); + +const llm = new Ollama({ + temperature: 0, + model: process.env.MODEL_NAME, + baseUrl: process.env.OLLAMA_URL, +}); + +const basicWolframAlphaSearchRetrieverPrompt = ` +You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. +If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response. + +Example: +1. Follow up question: What is the atomic radius of S? +Rephrased: Atomic radius of S + +2. Follow up question: What is linear algebra? +Rephrased: Linear algebra + +3. Follow up question: What is the third law of thermodynamics? +Rephrased: Third law of thermodynamics + +Conversation: +{chat_history} + +Follow up question: {query} +Rephrased question: +`; + +const basicWolframAlphaSearchResponsePrompt = ` + You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are set on focus mode 'Wolfram Alpha', this means you will be searching for information on the web using Wolfram Alpha. It is a computational knowledge engine that can answer factual queries and perform computations. + + Generate a response that is informative and relevant to the user's query based on provided context (the context consits of search results containg a brief description of the content of that page). + You must use this context to answer the user's query in the best way possible. Use an unbaised and journalistic tone in your response. Do not repeat the text. + You must not tell the user to open any link or visit any website to get the answer. You must provide the answer in the response itself. If the user asks for links you can provide them. + Your responses should be medium to long in length be informative and relevant to the user's query. You can use markdowns to format your response. You should use bullet points to list the information. Make sure the answer is not short and is informative. + You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from. + Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2]. + However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer. + + Aything inside the following \`context\` HTML block provided below is for your knowledge returned by Wolfram Alpha and is not shared by the user. You have to answer question on the basis of it and cite the relevant information from it but you do not have to + talk about the context in your response. + + + {context} + + + If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'. + Anything between the \`context\` is retrieved from Wolfram Alpha and is not a part of the conversation with the user. Today's date is ${new Date().toISOString()} +`; + +const strParser = new StringOutputParser(); + +const handleStream = async ( + stream: AsyncGenerator, + emitter: eventEmitter, +) => { + for await (const event of stream) { + if ( + event.event === 'on_chain_end' && + event.name === 'FinalSourceRetriever' + ) { + emitter.emit( + 'data', + JSON.stringify({ type: 'sources', data: event.data.output }), + ); + } + if ( + event.event === 'on_chain_stream' && + event.name === 'FinalResponseGenerator' + ) { + emitter.emit( + 'data', + JSON.stringify({ type: 'response', data: event.data.chunk }), + ); + } + if ( + event.event === 'on_chain_end' && + event.name === 'FinalResponseGenerator' + ) { + emitter.emit('end'); + } + } +}; + +const processDocs = async (docs: Document[]) => { + return docs + .map((_, index) => `${index + 1}. ${docs[index].pageContent}`) + .join('\n'); +}; + +type BasicChainInput = { + chat_history: BaseMessage[]; + query: string; +}; + +const basicWolframAlphaSearchRetrieverChain = RunnableSequence.from([ + PromptTemplate.fromTemplate(basicWolframAlphaSearchRetrieverPrompt), + llm, + strParser, + RunnableLambda.from(async (input: string) => { + if (input === 'not_needed') { + return { query: '', docs: [] }; + } + + const res = await searchSearxng(input, { + language: 'en', + engines: ['wolframalpha'], + }); + + const documents = res.results.map( + (result) => + new Document({ + pageContent: result.content, + metadata: { + title: result.title, + url: result.url, + ...(result.img_src && { img_src: result.img_src }), + }, + }), + ); + + return { query: input, docs: documents }; + }), +]); + +const basicWolframAlphaSearchAnsweringChain = RunnableSequence.from([ + RunnableMap.from({ + query: (input: BasicChainInput) => input.query, + chat_history: (input: BasicChainInput) => input.chat_history, + context: RunnableSequence.from([ + (input) => ({ + query: input.query, + chat_history: formatChatHistoryAsString(input.chat_history), + }), + basicWolframAlphaSearchRetrieverChain + .pipe(({ query, docs }) => { + return docs; + }) + .withConfig({ + runName: 'FinalSourceRetriever', + }) + .pipe(processDocs), + ]), + }), + ChatPromptTemplate.fromMessages([ + ['system', basicWolframAlphaSearchResponsePrompt], + new MessagesPlaceholder('chat_history'), + ['user', '{query}'], + ]), + chatLLM, + strParser, +]).withConfig({ + runName: 'FinalResponseGenerator', +}); + +const basicWolframAlphaSearch = (query: string, history: BaseMessage[]) => { + const emitter = new eventEmitter(); + + try { + const stream = basicWolframAlphaSearchAnsweringChain.streamEvents( + { + chat_history: history, + query: query, + }, + { + version: 'v1', + }, + ); + + handleStream(stream, emitter); + } catch (err) { + emitter.emit( + 'error', + JSON.stringify({ data: 'An error has occurred please try again later' }), + ); + console.error(err); + } + + return emitter; +}; + +const handleWolframAlphaSearch = (message: string, history: BaseMessage[]) => { + const emitter = basicWolframAlphaSearch(message, history); + return emitter; +}; + +export default handleWolframAlphaSearch; diff --git a/src/agents/writingAssistant.ts b/src/agents/writingAssistant.ts new file mode 100644 index 0000000..eba9872 --- /dev/null +++ b/src/agents/writingAssistant.ts @@ -0,0 +1,86 @@ +import { BaseMessage } from '@langchain/core/messages'; +import { + ChatPromptTemplate, + MessagesPlaceholder, +} from '@langchain/core/prompts'; +import { RunnableSequence } from '@langchain/core/runnables'; +import { ChatOllama } from '@langchain/community/chat_models/ollama'; +import { StringOutputParser } from '@langchain/core/output_parsers'; +import type { StreamEvent } from '@langchain/core/tracers/log_stream'; +import eventEmitter from 'events'; + +const chatLLM = new ChatOllama({ + baseUrl: process.env.OLLAMA_URL, + model: process.env.MODEL_NAME, + temperature: 0.7, +}); + +const writingAssistantPrompt = ` +You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are currently set on focus mode 'Writing Assistant', this means you will be helping the user write a response to a given query. +Since you are a writing assistant, you would not perform web searches. If you think you lack information to answer the query, you can ask the user for more information or suggest them to switch to a different focus mode. +`; + +const strParser = new StringOutputParser(); + +const handleStream = async ( + stream: AsyncGenerator, + emitter: eventEmitter, +) => { + for await (const event of stream) { + if ( + event.event === 'on_chain_stream' && + event.name === 'FinalResponseGenerator' + ) { + emitter.emit( + 'data', + JSON.stringify({ type: 'response', data: event.data.chunk }), + ); + } + if ( + event.event === 'on_chain_end' && + event.name === 'FinalResponseGenerator' + ) { + emitter.emit('end'); + } + } +}; + +const writingAssistantChain = RunnableSequence.from([ + ChatPromptTemplate.fromMessages([ + ['system', writingAssistantPrompt], + new MessagesPlaceholder('chat_history'), + ['user', '{query}'], + ]), + chatLLM, + strParser, +]).withConfig({ + runName: 'FinalResponseGenerator', +}); + +const handleWritingAssistant = (query: string, history: BaseMessage[]) => { + const emitter = new eventEmitter(); + + try { + const stream = writingAssistantChain.streamEvents( + { + chat_history: history, + query: query, + }, + { + version: 'v1', + }, + ); + + handleStream(stream, emitter); + } catch (err) { + emitter.emit( + 'error', + JSON.stringify({ data: 'An error has occurred please try again later' }), + ); + console.error(err); + } + + return emitter; +}; + +export default handleWritingAssistant; diff --git a/src/agents/youtubeSearchAgent.ts b/src/agents/youtubeSearchAgent.ts new file mode 100644 index 0000000..7fa258b --- /dev/null +++ b/src/agents/youtubeSearchAgent.ts @@ -0,0 +1,256 @@ +import { BaseMessage } from '@langchain/core/messages'; +import { + PromptTemplate, + ChatPromptTemplate, + MessagesPlaceholder, +} from '@langchain/core/prompts'; +import { + RunnableSequence, + RunnableMap, + RunnableLambda, +} from '@langchain/core/runnables'; +import { ChatOllama } from '@langchain/community/chat_models/ollama'; +import { Ollama } from '@langchain/community/llms/ollama'; +import { OllamaEmbeddings } from '@langchain/community/embeddings/ollama'; +import { StringOutputParser } from '@langchain/core/output_parsers'; +import { Document } from '@langchain/core/documents'; +import { searchSearxng } from '../core/searxng'; +import type { StreamEvent } from '@langchain/core/tracers/log_stream'; +import formatChatHistoryAsString from '../utils/formatHistory'; +import eventEmitter from 'events'; +import computeSimilarity from '../utils/computeSimilarity'; + +const chatLLM = new ChatOllama({ + baseUrl: process.env.OLLAMA_URL, + model: process.env.MODEL_NAME, + temperature: 0.7, +}); + +const llm = new Ollama({ + temperature: 0, + model: process.env.MODEL_NAME, + baseUrl: process.env.OLLAMA_URL, +}); + +const embeddings = new OllamaEmbeddings({ + model: process.env.MODEL_NAME, + baseUrl: process.env.OLLAMA_URL, +}); + +const basicYoutubeSearchRetrieverPrompt = ` +You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. +If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response. + +Example: +1. Follow up question: How does an A.C work? +Rephrased: A.C working + +2. Follow up question: Linear algebra explanation video +Rephrased: What is linear algebra? + +3. Follow up question: What is theory of relativity? +Rephrased: What is theory of relativity? + +Conversation: +{chat_history} + +Follow up question: {query} +Rephrased question: +`; + +const basicYoutubeSearchResponsePrompt = ` + You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are set on focus mode 'Youtube', this means you will be searching for videos on the web using Youtube and providing information based on the video's transcript. + + Generate a response that is informative and relevant to the user's query based on provided context (the context consits of search results containg a brief description of the content of that page). + You must use this context to answer the user's query in the best way possible. Use an unbaised and journalistic tone in your response. Do not repeat the text. + You must not tell the user to open any link or visit any website to get the answer. You must provide the answer in the response itself. If the user asks for links you can provide them. + Your responses should be medium to long in length be informative and relevant to the user's query. You can use markdowns to format your response. You should use bullet points to list the information. Make sure the answer is not short and is informative. + You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from. + Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2]. + However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer. + + Aything inside the following \`context\` HTML block provided below is for your knowledge returned by Youtube and is not shared by the user. You have to answer question on the basis of it and cite the relevant information from it but you do not have to + talk about the context in your response. + + + {context} + + + If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'. + Anything between the \`context\` is retrieved from Youtube and is not a part of the conversation with the user. Today's date is ${new Date().toISOString()} +`; + +const strParser = new StringOutputParser(); + +const handleStream = async ( + stream: AsyncGenerator, + emitter: eventEmitter, +) => { + for await (const event of stream) { + if ( + event.event === 'on_chain_end' && + event.name === 'FinalSourceRetriever' + ) { + emitter.emit( + 'data', + JSON.stringify({ type: 'sources', data: event.data.output }), + ); + } + if ( + event.event === 'on_chain_stream' && + event.name === 'FinalResponseGenerator' + ) { + emitter.emit( + 'data', + JSON.stringify({ type: 'response', data: event.data.chunk }), + ); + } + if ( + event.event === 'on_chain_end' && + event.name === 'FinalResponseGenerator' + ) { + emitter.emit('end'); + } + } +}; + +const processDocs = async (docs: Document[]) => { + return docs + .map((_, index) => `${index + 1}. ${docs[index].pageContent}`) + .join('\n'); +}; + +const rerankDocs = async ({ + query, + docs, +}: { + query: string; + docs: Document[]; +}) => { + if (docs.length === 0) { + return docs; + } + + const docsWithContent = docs.filter( + (doc) => doc.pageContent && doc.pageContent.length > 0, + ); + + const docEmbeddings = await embeddings.embedDocuments( + docsWithContent.map((doc) => doc.pageContent), + ); + + const queryEmbedding = await embeddings.embedQuery(query); + + const similarity = docEmbeddings.map((docEmbedding, i) => { + const sim = computeSimilarity(queryEmbedding, docEmbedding); + + return { + index: i, + similarity: sim, + }; + }); + + const sortedDocs = similarity + .sort((a, b) => b.similarity - a.similarity) + .slice(0, 15) + .filter((sim) => sim.similarity > 0.3) + .map((sim) => docsWithContent[sim.index]); + + return sortedDocs; +}; + +type BasicChainInput = { + chat_history: BaseMessage[]; + query: string; +}; + +const basicYoutubeSearchRetrieverChain = RunnableSequence.from([ + PromptTemplate.fromTemplate(basicYoutubeSearchRetrieverPrompt), + llm, + strParser, + RunnableLambda.from(async (input: string) => { + if (input === 'not_needed') { + return { query: '', docs: [] }; + } + + const res = await searchSearxng(input, { + language: 'en', + engines: ['youtube'], + }); + + const documents = res.results.map( + (result) => + new Document({ + pageContent: result.content ? result.content : result.title, + metadata: { + title: result.title, + url: result.url, + ...(result.img_src && { img_src: result.img_src }), + }, + }), + ); + + return { query: input, docs: documents }; + }), +]); + +const basicYoutubeSearchAnsweringChain = RunnableSequence.from([ + RunnableMap.from({ + query: (input: BasicChainInput) => input.query, + chat_history: (input: BasicChainInput) => input.chat_history, + context: RunnableSequence.from([ + (input) => ({ + query: input.query, + chat_history: formatChatHistoryAsString(input.chat_history), + }), + basicYoutubeSearchRetrieverChain + .pipe(rerankDocs) + .withConfig({ + runName: 'FinalSourceRetriever', + }) + .pipe(processDocs), + ]), + }), + ChatPromptTemplate.fromMessages([ + ['system', basicYoutubeSearchResponsePrompt], + new MessagesPlaceholder('chat_history'), + ['user', '{query}'], + ]), + chatLLM, + strParser, +]).withConfig({ + runName: 'FinalResponseGenerator', +}); + +const basicYoutubeSearch = (query: string, history: BaseMessage[]) => { + const emitter = new eventEmitter(); + + try { + const stream = basicYoutubeSearchAnsweringChain.streamEvents( + { + chat_history: history, + query: query, + }, + { + version: 'v1', + }, + ); + + handleStream(stream, emitter); + } catch (err) { + emitter.emit( + 'error', + JSON.stringify({ data: 'An error has occurred please try again later' }), + ); + console.error(err); + } + + return emitter; +}; + +const handleYoutubeSearch = (message: string, history: BaseMessage[]) => { + const emitter = basicYoutubeSearch(message, history); + return emitter; +}; + +export default handleYoutubeSearch; diff --git a/src/app.ts b/src/app.ts new file mode 100644 index 0000000..993cb23 --- /dev/null +++ b/src/app.ts @@ -0,0 +1,26 @@ +import { startWebSocketServer } from './websocket'; +import express from 'express'; +import cors from 'cors'; +import http from 'http'; +import routes from './routes'; + +const app = express(); +const server = http.createServer(app); + +const corsOptions = { + origin: '*', +}; + +app.use(cors(corsOptions)); +app.use(express.json()); + +app.use('/api', routes); +app.get('/api', (_, res) => { + res.status(200).json({ status: 'ok' }); +}); + +server.listen(process.env.PORT!, () => { + console.log(`API server started on port ${process.env.PORT}`); +}); + +startWebSocketServer(server); diff --git a/src/app/api/chat/route.ts b/src/app/api/chat/route.ts deleted file mode 100644 index ba88da6..0000000 --- a/src/app/api/chat/route.ts +++ /dev/null @@ -1,310 +0,0 @@ -import crypto from 'crypto'; -import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages'; -import { EventEmitter } from 'stream'; -import { - getAvailableChatModelProviders, - getAvailableEmbeddingModelProviders, -} from '@/lib/providers'; -import db from '@/lib/db'; -import { chats, messages as messagesSchema } from '@/lib/db/schema'; -import { and, eq, gt } from 'drizzle-orm'; -import { getFileDetails } from '@/lib/utils/files'; -import { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import { ChatOpenAI } from '@langchain/openai'; -import { - getCustomOpenaiApiKey, - getCustomOpenaiApiUrl, - getCustomOpenaiModelName, -} from '@/lib/config'; -import { searchHandlers } from '@/lib/search'; - -export const runtime = 'nodejs'; -export const dynamic = 'force-dynamic'; - -type Message = { - messageId: string; - chatId: string; - content: string; -}; - -type ChatModel = { - provider: string; - name: string; -}; - -type EmbeddingModel = { - provider: string; - name: string; -}; - -type Body = { - message: Message; - optimizationMode: 'speed' | 'balanced' | 'quality'; - focusMode: string; - history: Array<[string, string]>; - files: Array; - chatModel: ChatModel; - embeddingModel: EmbeddingModel; - systemInstructions: string; -}; - -const handleEmitterEvents = async ( - stream: EventEmitter, - writer: WritableStreamDefaultWriter, - encoder: TextEncoder, - aiMessageId: string, - chatId: string, -) => { - let recievedMessage = ''; - let sources: any[] = []; - - stream.on('data', (data) => { - const parsedData = JSON.parse(data); - if (parsedData.type === 'response') { - writer.write( - encoder.encode( - JSON.stringify({ - type: 'message', - data: parsedData.data, - messageId: aiMessageId, - }) + '\n', - ), - ); - - recievedMessage += parsedData.data; - } else if (parsedData.type === 'sources') { - writer.write( - encoder.encode( - JSON.stringify({ - type: 'sources', - data: parsedData.data, - messageId: aiMessageId, - }) + '\n', - ), - ); - - sources = parsedData.data; - } - }); - stream.on('end', () => { - writer.write( - encoder.encode( - JSON.stringify({ - type: 'messageEnd', - messageId: aiMessageId, - }) + '\n', - ), - ); - writer.close(); - - db.insert(messagesSchema) - .values({ - content: recievedMessage, - chatId: chatId, - messageId: aiMessageId, - role: 'assistant', - metadata: JSON.stringify({ - createdAt: new Date(), - ...(sources && sources.length > 0 && { sources }), - }), - }) - .execute(); - }); - stream.on('error', (data) => { - const parsedData = JSON.parse(data); - writer.write( - encoder.encode( - JSON.stringify({ - type: 'error', - data: parsedData.data, - }), - ), - ); - writer.close(); - }); -}; - -const handleHistorySave = async ( - message: Message, - humanMessageId: string, - focusMode: string, - files: string[], -) => { - const chat = await db.query.chats.findFirst({ - where: eq(chats.id, message.chatId), - }); - - const fileData = files.map(getFileDetails); - - if (!chat) { - await db - .insert(chats) - .values({ - id: message.chatId, - title: message.content, - createdAt: new Date().toString(), - focusMode: focusMode, - files: fileData, - }) - .execute(); - } else if (JSON.stringify(chat.files ?? []) != JSON.stringify(fileData)) { - db.update(chats) - .set({ - files: files.map(getFileDetails), - }) - .where(eq(chats.id, message.chatId)); - } - - const messageExists = await db.query.messages.findFirst({ - where: eq(messagesSchema.messageId, humanMessageId), - }); - - if (!messageExists) { - await db - .insert(messagesSchema) - .values({ - content: message.content, - chatId: message.chatId, - messageId: humanMessageId, - role: 'user', - metadata: JSON.stringify({ - createdAt: new Date(), - }), - }) - .execute(); - } else { - await db - .delete(messagesSchema) - .where( - and( - gt(messagesSchema.id, messageExists.id), - eq(messagesSchema.chatId, message.chatId), - ), - ) - .execute(); - } -}; - -export const POST = async (req: Request) => { - try { - const body = (await req.json()) as Body; - const { message } = body; - - if (message.content === '') { - return Response.json( - { - message: 'Please provide a message to process', - }, - { status: 400 }, - ); - } - - const [chatModelProviders, embeddingModelProviders] = await Promise.all([ - getAvailableChatModelProviders(), - getAvailableEmbeddingModelProviders(), - ]); - - const chatModelProvider = - chatModelProviders[ - body.chatModel?.provider || Object.keys(chatModelProviders)[0] - ]; - const chatModel = - chatModelProvider[ - body.chatModel?.name || Object.keys(chatModelProvider)[0] - ]; - - const embeddingProvider = - embeddingModelProviders[ - body.embeddingModel?.provider || Object.keys(embeddingModelProviders)[0] - ]; - const embeddingModel = - embeddingProvider[ - body.embeddingModel?.name || Object.keys(embeddingProvider)[0] - ]; - - let llm: BaseChatModel | undefined; - let embedding = embeddingModel.model; - - if (body.chatModel?.provider === 'custom_openai') { - llm = new ChatOpenAI({ - apiKey: getCustomOpenaiApiKey(), - modelName: getCustomOpenaiModelName(), - temperature: 0.7, - configuration: { - baseURL: getCustomOpenaiApiUrl(), - }, - }) as unknown as BaseChatModel; - } else if (chatModelProvider && chatModel) { - llm = chatModel.model; - } - - if (!llm) { - return Response.json({ error: 'Invalid chat model' }, { status: 400 }); - } - - if (!embedding) { - return Response.json( - { error: 'Invalid embedding model' }, - { status: 400 }, - ); - } - - const humanMessageId = - message.messageId ?? crypto.randomBytes(7).toString('hex'); - const aiMessageId = crypto.randomBytes(7).toString('hex'); - - const history: BaseMessage[] = body.history.map((msg) => { - if (msg[0] === 'human') { - return new HumanMessage({ - content: msg[1], - }); - } else { - return new AIMessage({ - content: msg[1], - }); - } - }); - - const handler = searchHandlers[body.focusMode]; - - if (!handler) { - return Response.json( - { - message: 'Invalid focus mode', - }, - { status: 400 }, - ); - } - - const stream = await handler.searchAndAnswer( - message.content, - history, - llm, - embedding, - body.optimizationMode, - body.files, - body.systemInstructions, - ); - - const responseStream = new TransformStream(); - const writer = responseStream.writable.getWriter(); - const encoder = new TextEncoder(); - - handleEmitterEvents(stream, writer, encoder, aiMessageId, message.chatId); - handleHistorySave(message, humanMessageId, body.focusMode, body.files); - - return new Response(responseStream.readable, { - headers: { - 'Content-Type': 'text/event-stream', - Connection: 'keep-alive', - 'Cache-Control': 'no-cache, no-transform', - }, - }); - } catch (err) { - console.error('An error occurred while processing chat request:', err); - return Response.json( - { message: 'An error occurred while processing chat request' }, - { status: 500 }, - ); - } -}; diff --git a/src/app/api/chats/[id]/route.ts b/src/app/api/chats/[id]/route.ts deleted file mode 100644 index 6891454..0000000 --- a/src/app/api/chats/[id]/route.ts +++ /dev/null @@ -1,69 +0,0 @@ -import db from '@/lib/db'; -import { chats, messages } from '@/lib/db/schema'; -import { eq } from 'drizzle-orm'; - -export const GET = async ( - req: Request, - { params }: { params: Promise<{ id: string }> }, -) => { - try { - const { id } = await params; - - const chatExists = await db.query.chats.findFirst({ - where: eq(chats.id, id), - }); - - if (!chatExists) { - return Response.json({ message: 'Chat not found' }, { status: 404 }); - } - - const chatMessages = await db.query.messages.findMany({ - where: eq(messages.chatId, id), - }); - - return Response.json( - { - chat: chatExists, - messages: chatMessages, - }, - { status: 200 }, - ); - } catch (err) { - console.error('Error in getting chat by id: ', err); - return Response.json( - { message: 'An error has occurred.' }, - { status: 500 }, - ); - } -}; - -export const DELETE = async ( - req: Request, - { params }: { params: Promise<{ id: string }> }, -) => { - try { - const { id } = await params; - - const chatExists = await db.query.chats.findFirst({ - where: eq(chats.id, id), - }); - - if (!chatExists) { - return Response.json({ message: 'Chat not found' }, { status: 404 }); - } - - await db.delete(chats).where(eq(chats.id, id)).execute(); - await db.delete(messages).where(eq(messages.chatId, id)).execute(); - - return Response.json( - { message: 'Chat deleted successfully' }, - { status: 200 }, - ); - } catch (err) { - console.error('Error in deleting chat by id: ', err); - return Response.json( - { message: 'An error has occurred.' }, - { status: 500 }, - ); - } -}; diff --git a/src/app/api/chats/route.ts b/src/app/api/chats/route.ts deleted file mode 100644 index 986a192..0000000 --- a/src/app/api/chats/route.ts +++ /dev/null @@ -1,15 +0,0 @@ -import db from '@/lib/db'; - -export const GET = async (req: Request) => { - try { - let chats = await db.query.chats.findMany(); - chats = chats.reverse(); - return Response.json({ chats: chats }, { status: 200 }); - } catch (err) { - console.error('Error in getting chats: ', err); - return Response.json( - { message: 'An error has occurred.' }, - { status: 500 }, - ); - } -}; diff --git a/src/app/api/config/route.ts b/src/app/api/config/route.ts deleted file mode 100644 index f117cce..0000000 --- a/src/app/api/config/route.ts +++ /dev/null @@ -1,127 +0,0 @@ -import { - getAnthropicApiKey, - getCustomOpenaiApiKey, - getCustomOpenaiApiUrl, - getCustomOpenaiModelName, - getGeminiApiKey, - getGroqApiKey, - getOllamaApiEndpoint, - getOpenaiApiKey, - getDeepseekApiKey, - getAimlApiKey, - getLMStudioApiEndpoint, - updateConfig, - getOllamaApiKey, -} from '@/lib/config'; -import { - getAvailableChatModelProviders, - getAvailableEmbeddingModelProviders, -} from '@/lib/providers'; - -export const GET = async (req: Request) => { - try { - const config: Record = {}; - - const [chatModelProviders, embeddingModelProviders] = await Promise.all([ - getAvailableChatModelProviders(), - getAvailableEmbeddingModelProviders(), - ]); - - config['chatModelProviders'] = {}; - config['embeddingModelProviders'] = {}; - - for (const provider in chatModelProviders) { - config['chatModelProviders'][provider] = Object.keys( - chatModelProviders[provider], - ).map((model) => { - return { - name: model, - displayName: chatModelProviders[provider][model].displayName, - }; - }); - } - - for (const provider in embeddingModelProviders) { - config['embeddingModelProviders'][provider] = Object.keys( - embeddingModelProviders[provider], - ).map((model) => { - return { - name: model, - displayName: embeddingModelProviders[provider][model].displayName, - }; - }); - } - - config['openaiApiKey'] = getOpenaiApiKey(); - config['ollamaApiUrl'] = getOllamaApiEndpoint(); - config['ollamaApiKey'] = getOllamaApiKey(); - config['lmStudioApiUrl'] = getLMStudioApiEndpoint(); - config['anthropicApiKey'] = getAnthropicApiKey(); - config['groqApiKey'] = getGroqApiKey(); - config['geminiApiKey'] = getGeminiApiKey(); - config['deepseekApiKey'] = getDeepseekApiKey(); - config['aimlApiKey'] = getAimlApiKey(); - config['customOpenaiApiUrl'] = getCustomOpenaiApiUrl(); - config['customOpenaiApiKey'] = getCustomOpenaiApiKey(); - config['customOpenaiModelName'] = getCustomOpenaiModelName(); - - return Response.json({ ...config }, { status: 200 }); - } catch (err) { - console.error('An error occurred while getting config:', err); - return Response.json( - { message: 'An error occurred while getting config' }, - { status: 500 }, - ); - } -}; - -export const POST = async (req: Request) => { - try { - const config = await req.json(); - - const updatedConfig = { - MODELS: { - OPENAI: { - API_KEY: config.openaiApiKey, - }, - GROQ: { - API_KEY: config.groqApiKey, - }, - ANTHROPIC: { - API_KEY: config.anthropicApiKey, - }, - GEMINI: { - API_KEY: config.geminiApiKey, - }, - OLLAMA: { - API_URL: config.ollamaApiUrl, - API_KEY: config.ollamaApiKey, - }, - DEEPSEEK: { - API_KEY: config.deepseekApiKey, - }, - AIMLAPI: { - API_KEY: config.aimlApiKey, - }, - LM_STUDIO: { - API_URL: config.lmStudioApiUrl, - }, - CUSTOM_OPENAI: { - API_URL: config.customOpenaiApiUrl, - API_KEY: config.customOpenaiApiKey, - MODEL_NAME: config.customOpenaiModelName, - }, - }, - }; - - updateConfig(updatedConfig); - - return Response.json({ message: 'Config updated' }, { status: 200 }); - } catch (err) { - console.error('An error occurred while updating config:', err); - return Response.json( - { message: 'An error occurred while updating config' }, - { status: 500 }, - ); - } -}; diff --git a/src/app/api/discover/route.ts b/src/app/api/discover/route.ts deleted file mode 100644 index 415aee8..0000000 --- a/src/app/api/discover/route.ts +++ /dev/null @@ -1,98 +0,0 @@ -import { searchSearxng } from '@/lib/searxng'; - -const websitesForTopic = { - tech: { - query: ['technology news', 'latest tech', 'AI', 'science and innovation'], - links: ['techcrunch.com', 'wired.com', 'theverge.com'], - }, - finance: { - query: ['finance news', 'economy', 'stock market', 'investing'], - links: ['bloomberg.com', 'cnbc.com', 'marketwatch.com'], - }, - art: { - query: ['art news', 'culture', 'modern art', 'cultural events'], - links: ['artnews.com', 'hyperallergic.com', 'theartnewspaper.com'], - }, - sports: { - query: ['sports news', 'latest sports', 'cricket football tennis'], - links: ['espn.com', 'bbc.com/sport', 'skysports.com'], - }, - entertainment: { - query: ['entertainment news', 'movies', 'TV shows', 'celebrities'], - links: ['hollywoodreporter.com', 'variety.com', 'deadline.com'], - }, -}; - -type Topic = keyof typeof websitesForTopic; - -export const GET = async (req: Request) => { - try { - const params = new URL(req.url).searchParams; - - const mode: 'normal' | 'preview' = - (params.get('mode') as 'normal' | 'preview') || 'normal'; - const topic: Topic = (params.get('topic') as Topic) || 'tech'; - - const selectedTopic = websitesForTopic[topic]; - - let data = []; - - if (mode === 'normal') { - const seenUrls = new Set(); - - data = ( - await Promise.all( - selectedTopic.links.flatMap((link) => - selectedTopic.query.map(async (query) => { - return ( - await searchSearxng(`site:${link} ${query}`, { - engines: ['bing news'], - pageno: 1, - language: 'en', - }) - ).results; - }), - ), - ) - ) - .flat() - .filter((item) => { - const url = item.url?.toLowerCase().trim(); - if (seenUrls.has(url)) return false; - seenUrls.add(url); - return true; - }) - .sort(() => Math.random() - 0.5); - } else { - data = ( - await searchSearxng( - `site:${selectedTopic.links[Math.floor(Math.random() * selectedTopic.links.length)]} ${selectedTopic.query[Math.floor(Math.random() * selectedTopic.query.length)]}`, - { - engines: ['bing news'], - pageno: 1, - language: 'en', - }, - ) - ).results; - } - - return Response.json( - { - blogs: data, - }, - { - status: 200, - }, - ); - } catch (err) { - console.error(`An error occurred in discover route: ${err}`); - return Response.json( - { - message: 'An error has occurred', - }, - { - status: 500, - }, - ); - } -}; diff --git a/src/app/api/images/route.ts b/src/app/api/images/route.ts deleted file mode 100644 index e02854d..0000000 --- a/src/app/api/images/route.ts +++ /dev/null @@ -1,83 +0,0 @@ -import handleImageSearch from '@/lib/chains/imageSearchAgent'; -import { - getCustomOpenaiApiKey, - getCustomOpenaiApiUrl, - getCustomOpenaiModelName, -} from '@/lib/config'; -import { getAvailableChatModelProviders } from '@/lib/providers'; -import { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages'; -import { ChatOpenAI } from '@langchain/openai'; - -interface ChatModel { - provider: string; - model: string; -} - -interface ImageSearchBody { - query: string; - chatHistory: any[]; - chatModel?: ChatModel; -} - -export const POST = async (req: Request) => { - try { - const body: ImageSearchBody = await req.json(); - - const chatHistory = body.chatHistory - .map((msg: any) => { - if (msg.role === 'user') { - return new HumanMessage(msg.content); - } else if (msg.role === 'assistant') { - return new AIMessage(msg.content); - } - }) - .filter((msg) => msg !== undefined) as BaseMessage[]; - - const chatModelProviders = await getAvailableChatModelProviders(); - - const chatModelProvider = - chatModelProviders[ - body.chatModel?.provider || Object.keys(chatModelProviders)[0] - ]; - const chatModel = - chatModelProvider[ - body.chatModel?.model || Object.keys(chatModelProvider)[0] - ]; - - let llm: BaseChatModel | undefined; - - if (body.chatModel?.provider === 'custom_openai') { - llm = new ChatOpenAI({ - apiKey: getCustomOpenaiApiKey(), - modelName: getCustomOpenaiModelName(), - temperature: 0.7, - configuration: { - baseURL: getCustomOpenaiApiUrl(), - }, - }) as unknown as BaseChatModel; - } else if (chatModelProvider && chatModel) { - llm = chatModel.model; - } - - if (!llm) { - return Response.json({ error: 'Invalid chat model' }, { status: 400 }); - } - - const images = await handleImageSearch( - { - chat_history: chatHistory, - query: body.query, - }, - llm, - ); - - return Response.json({ images }, { status: 200 }); - } catch (err) { - console.error(`An error occurred while searching images: ${err}`); - return Response.json( - { message: 'An error occurred while searching images' }, - { status: 500 }, - ); - } -}; diff --git a/src/app/api/models/route.ts b/src/app/api/models/route.ts deleted file mode 100644 index 04a6949..0000000 --- a/src/app/api/models/route.ts +++ /dev/null @@ -1,47 +0,0 @@ -import { - getAvailableChatModelProviders, - getAvailableEmbeddingModelProviders, -} from '@/lib/providers'; - -export const GET = async (req: Request) => { - try { - const [chatModelProviders, embeddingModelProviders] = await Promise.all([ - getAvailableChatModelProviders(), - getAvailableEmbeddingModelProviders(), - ]); - - Object.keys(chatModelProviders).forEach((provider) => { - Object.keys(chatModelProviders[provider]).forEach((model) => { - delete (chatModelProviders[provider][model] as { model?: unknown }) - .model; - }); - }); - - Object.keys(embeddingModelProviders).forEach((provider) => { - Object.keys(embeddingModelProviders[provider]).forEach((model) => { - delete (embeddingModelProviders[provider][model] as { model?: unknown }) - .model; - }); - }); - - return Response.json( - { - chatModelProviders, - embeddingModelProviders, - }, - { - status: 200, - }, - ); - } catch (err) { - console.error('An error occurred while fetching models', err); - return Response.json( - { - message: 'An error has occurred.', - }, - { - status: 500, - }, - ); - } -}; diff --git a/src/app/api/search/route.ts b/src/app/api/search/route.ts deleted file mode 100644 index 5f752ec..0000000 --- a/src/app/api/search/route.ts +++ /dev/null @@ -1,269 +0,0 @@ -import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import type { Embeddings } from '@langchain/core/embeddings'; -import { ChatOpenAI } from '@langchain/openai'; -import { - getAvailableChatModelProviders, - getAvailableEmbeddingModelProviders, -} from '@/lib/providers'; -import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages'; -import { MetaSearchAgentType } from '@/lib/search/metaSearchAgent'; -import { - getCustomOpenaiApiKey, - getCustomOpenaiApiUrl, - getCustomOpenaiModelName, -} from '@/lib/config'; -import { searchHandlers } from '@/lib/search'; - -interface chatModel { - provider: string; - name: string; - customOpenAIKey?: string; - customOpenAIBaseURL?: string; -} - -interface embeddingModel { - provider: string; - name: string; -} - -interface ChatRequestBody { - optimizationMode: 'speed' | 'balanced'; - focusMode: string; - chatModel?: chatModel; - embeddingModel?: embeddingModel; - query: string; - history: Array<[string, string]>; - stream?: boolean; - systemInstructions?: string; -} - -export const POST = async (req: Request) => { - try { - const body: ChatRequestBody = await req.json(); - - if (!body.focusMode || !body.query) { - return Response.json( - { message: 'Missing focus mode or query' }, - { status: 400 }, - ); - } - - body.history = body.history || []; - body.optimizationMode = body.optimizationMode || 'balanced'; - body.stream = body.stream || false; - - const history: BaseMessage[] = body.history.map((msg) => { - return msg[0] === 'human' - ? new HumanMessage({ content: msg[1] }) - : new AIMessage({ content: msg[1] }); - }); - - const [chatModelProviders, embeddingModelProviders] = await Promise.all([ - getAvailableChatModelProviders(), - getAvailableEmbeddingModelProviders(), - ]); - - const chatModelProvider = - body.chatModel?.provider || Object.keys(chatModelProviders)[0]; - const chatModel = - body.chatModel?.name || - Object.keys(chatModelProviders[chatModelProvider])[0]; - - const embeddingModelProvider = - body.embeddingModel?.provider || Object.keys(embeddingModelProviders)[0]; - const embeddingModel = - body.embeddingModel?.name || - Object.keys(embeddingModelProviders[embeddingModelProvider])[0]; - - let llm: BaseChatModel | undefined; - let embeddings: Embeddings | undefined; - - if (body.chatModel?.provider === 'custom_openai') { - llm = new ChatOpenAI({ - modelName: body.chatModel?.name || getCustomOpenaiModelName(), - apiKey: body.chatModel?.customOpenAIKey || getCustomOpenaiApiKey(), - temperature: 0.7, - configuration: { - baseURL: - body.chatModel?.customOpenAIBaseURL || getCustomOpenaiApiUrl(), - }, - }) as unknown as BaseChatModel; - } else if ( - chatModelProviders[chatModelProvider] && - chatModelProviders[chatModelProvider][chatModel] - ) { - llm = chatModelProviders[chatModelProvider][chatModel] - .model as unknown as BaseChatModel | undefined; - } - - if ( - embeddingModelProviders[embeddingModelProvider] && - embeddingModelProviders[embeddingModelProvider][embeddingModel] - ) { - embeddings = embeddingModelProviders[embeddingModelProvider][ - embeddingModel - ].model as Embeddings | undefined; - } - - if (!llm || !embeddings) { - return Response.json( - { message: 'Invalid model selected' }, - { status: 400 }, - ); - } - - const searchHandler: MetaSearchAgentType = searchHandlers[body.focusMode]; - - if (!searchHandler) { - return Response.json({ message: 'Invalid focus mode' }, { status: 400 }); - } - - const emitter = await searchHandler.searchAndAnswer( - body.query, - history, - llm, - embeddings, - body.optimizationMode, - [], - body.systemInstructions || '', - ); - - if (!body.stream) { - return new Promise( - ( - resolve: (value: Response) => void, - reject: (value: Response) => void, - ) => { - let message = ''; - let sources: any[] = []; - - emitter.on('data', (data: string) => { - try { - const parsedData = JSON.parse(data); - if (parsedData.type === 'response') { - message += parsedData.data; - } else if (parsedData.type === 'sources') { - sources = parsedData.data; - } - } catch (error) { - reject( - Response.json( - { message: 'Error parsing data' }, - { status: 500 }, - ), - ); - } - }); - - emitter.on('end', () => { - resolve(Response.json({ message, sources }, { status: 200 })); - }); - - emitter.on('error', (error: any) => { - reject( - Response.json( - { message: 'Search error', error }, - { status: 500 }, - ), - ); - }); - }, - ); - } - - const encoder = new TextEncoder(); - - const abortController = new AbortController(); - const { signal } = abortController; - - const stream = new ReadableStream({ - start(controller) { - let sources: any[] = []; - - controller.enqueue( - encoder.encode( - JSON.stringify({ - type: 'init', - data: 'Stream connected', - }) + '\n', - ), - ); - - signal.addEventListener('abort', () => { - emitter.removeAllListeners(); - - try { - controller.close(); - } catch (error) {} - }); - - emitter.on('data', (data: string) => { - if (signal.aborted) return; - - try { - const parsedData = JSON.parse(data); - - if (parsedData.type === 'response') { - controller.enqueue( - encoder.encode( - JSON.stringify({ - type: 'response', - data: parsedData.data, - }) + '\n', - ), - ); - } else if (parsedData.type === 'sources') { - sources = parsedData.data; - controller.enqueue( - encoder.encode( - JSON.stringify({ - type: 'sources', - data: sources, - }) + '\n', - ), - ); - } - } catch (error) { - controller.error(error); - } - }); - - emitter.on('end', () => { - if (signal.aborted) return; - - controller.enqueue( - encoder.encode( - JSON.stringify({ - type: 'done', - }) + '\n', - ), - ); - controller.close(); - }); - - emitter.on('error', (error: any) => { - if (signal.aborted) return; - - controller.error(error); - }); - }, - cancel() { - abortController.abort(); - }, - }); - - return new Response(stream, { - headers: { - 'Content-Type': 'text/event-stream', - 'Cache-Control': 'no-cache, no-transform', - Connection: 'keep-alive', - }, - }); - } catch (err: any) { - console.error(`Error in getting search results: ${err.message}`); - return Response.json( - { message: 'An error has occurred.' }, - { status: 500 }, - ); - } -}; diff --git a/src/app/api/suggestions/route.ts b/src/app/api/suggestions/route.ts deleted file mode 100644 index 99179d2..0000000 --- a/src/app/api/suggestions/route.ts +++ /dev/null @@ -1,81 +0,0 @@ -import generateSuggestions from '@/lib/chains/suggestionGeneratorAgent'; -import { - getCustomOpenaiApiKey, - getCustomOpenaiApiUrl, - getCustomOpenaiModelName, -} from '@/lib/config'; -import { getAvailableChatModelProviders } from '@/lib/providers'; -import { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages'; -import { ChatOpenAI } from '@langchain/openai'; - -interface ChatModel { - provider: string; - model: string; -} - -interface SuggestionsGenerationBody { - chatHistory: any[]; - chatModel?: ChatModel; -} - -export const POST = async (req: Request) => { - try { - const body: SuggestionsGenerationBody = await req.json(); - - const chatHistory = body.chatHistory - .map((msg: any) => { - if (msg.role === 'user') { - return new HumanMessage(msg.content); - } else if (msg.role === 'assistant') { - return new AIMessage(msg.content); - } - }) - .filter((msg) => msg !== undefined) as BaseMessage[]; - - const chatModelProviders = await getAvailableChatModelProviders(); - - const chatModelProvider = - chatModelProviders[ - body.chatModel?.provider || Object.keys(chatModelProviders)[0] - ]; - const chatModel = - chatModelProvider[ - body.chatModel?.model || Object.keys(chatModelProvider)[0] - ]; - - let llm: BaseChatModel | undefined; - - if (body.chatModel?.provider === 'custom_openai') { - llm = new ChatOpenAI({ - apiKey: getCustomOpenaiApiKey(), - modelName: getCustomOpenaiModelName(), - temperature: 0.7, - configuration: { - baseURL: getCustomOpenaiApiUrl(), - }, - }) as unknown as BaseChatModel; - } else if (chatModelProvider && chatModel) { - llm = chatModel.model; - } - - if (!llm) { - return Response.json({ error: 'Invalid chat model' }, { status: 400 }); - } - - const suggestions = await generateSuggestions( - { - chat_history: chatHistory, - }, - llm, - ); - - return Response.json({ suggestions }, { status: 200 }); - } catch (err) { - console.error(`An error occurred while generating suggestions: ${err}`); - return Response.json( - { message: 'An error occurred while generating suggestions' }, - { status: 500 }, - ); - } -}; diff --git a/src/app/api/uploads/route.ts b/src/app/api/uploads/route.ts deleted file mode 100644 index 9fbaf2d..0000000 --- a/src/app/api/uploads/route.ts +++ /dev/null @@ -1,134 +0,0 @@ -import { NextResponse } from 'next/server'; -import fs from 'fs'; -import path from 'path'; -import crypto from 'crypto'; -import { getAvailableEmbeddingModelProviders } from '@/lib/providers'; -import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf'; -import { DocxLoader } from '@langchain/community/document_loaders/fs/docx'; -import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; -import { Document } from 'langchain/document'; - -interface FileRes { - fileName: string; - fileExtension: string; - fileId: string; -} - -const uploadDir = path.join(process.cwd(), 'uploads'); - -if (!fs.existsSync(uploadDir)) { - fs.mkdirSync(uploadDir, { recursive: true }); -} - -const splitter = new RecursiveCharacterTextSplitter({ - chunkSize: 500, - chunkOverlap: 100, -}); - -export async function POST(req: Request) { - try { - const formData = await req.formData(); - - const files = formData.getAll('files') as File[]; - const embedding_model = formData.get('embedding_model'); - const embedding_model_provider = formData.get('embedding_model_provider'); - - if (!embedding_model || !embedding_model_provider) { - return NextResponse.json( - { message: 'Missing embedding model or provider' }, - { status: 400 }, - ); - } - - const embeddingModels = await getAvailableEmbeddingModelProviders(); - const provider = - embedding_model_provider ?? Object.keys(embeddingModels)[0]; - const embeddingModel = - embedding_model ?? Object.keys(embeddingModels[provider as string])[0]; - - let embeddingsModel = - embeddingModels[provider as string]?.[embeddingModel as string]?.model; - if (!embeddingsModel) { - return NextResponse.json( - { message: 'Invalid embedding model selected' }, - { status: 400 }, - ); - } - - const processedFiles: FileRes[] = []; - - await Promise.all( - files.map(async (file: any) => { - const fileExtension = file.name.split('.').pop(); - if (!['pdf', 'docx', 'txt'].includes(fileExtension!)) { - return NextResponse.json( - { message: 'File type not supported' }, - { status: 400 }, - ); - } - - const uniqueFileName = `${crypto.randomBytes(16).toString('hex')}.${fileExtension}`; - const filePath = path.join(uploadDir, uniqueFileName); - - const buffer = Buffer.from(await file.arrayBuffer()); - fs.writeFileSync(filePath, new Uint8Array(buffer)); - - let docs: any[] = []; - if (fileExtension === 'pdf') { - const loader = new PDFLoader(filePath); - docs = await loader.load(); - } else if (fileExtension === 'docx') { - const loader = new DocxLoader(filePath); - docs = await loader.load(); - } else if (fileExtension === 'txt') { - const text = fs.readFileSync(filePath, 'utf-8'); - docs = [ - new Document({ pageContent: text, metadata: { title: file.name } }), - ]; - } - - const splitted = await splitter.splitDocuments(docs); - - const extractedDataPath = filePath.replace(/\.\w+$/, '-extracted.json'); - fs.writeFileSync( - extractedDataPath, - JSON.stringify({ - title: file.name, - contents: splitted.map((doc) => doc.pageContent), - }), - ); - - const embeddings = await embeddingsModel.embedDocuments( - splitted.map((doc) => doc.pageContent), - ); - const embeddingsDataPath = filePath.replace( - /\.\w+$/, - '-embeddings.json', - ); - fs.writeFileSync( - embeddingsDataPath, - JSON.stringify({ - title: file.name, - embeddings, - }), - ); - - processedFiles.push({ - fileName: file.name, - fileExtension: fileExtension, - fileId: uniqueFileName.replace(/\.\w+$/, ''), - }); - }), - ); - - return NextResponse.json({ - files: processedFiles, - }); - } catch (error) { - console.error('Error uploading file:', error); - return NextResponse.json( - { message: 'An error has occurred.' }, - { status: 500 }, - ); - } -} diff --git a/src/app/api/videos/route.ts b/src/app/api/videos/route.ts deleted file mode 100644 index 7e8288b..0000000 --- a/src/app/api/videos/route.ts +++ /dev/null @@ -1,83 +0,0 @@ -import handleVideoSearch from '@/lib/chains/videoSearchAgent'; -import { - getCustomOpenaiApiKey, - getCustomOpenaiApiUrl, - getCustomOpenaiModelName, -} from '@/lib/config'; -import { getAvailableChatModelProviders } from '@/lib/providers'; -import { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages'; -import { ChatOpenAI } from '@langchain/openai'; - -interface ChatModel { - provider: string; - model: string; -} - -interface VideoSearchBody { - query: string; - chatHistory: any[]; - chatModel?: ChatModel; -} - -export const POST = async (req: Request) => { - try { - const body: VideoSearchBody = await req.json(); - - const chatHistory = body.chatHistory - .map((msg: any) => { - if (msg.role === 'user') { - return new HumanMessage(msg.content); - } else if (msg.role === 'assistant') { - return new AIMessage(msg.content); - } - }) - .filter((msg) => msg !== undefined) as BaseMessage[]; - - const chatModelProviders = await getAvailableChatModelProviders(); - - const chatModelProvider = - chatModelProviders[ - body.chatModel?.provider || Object.keys(chatModelProviders)[0] - ]; - const chatModel = - chatModelProvider[ - body.chatModel?.model || Object.keys(chatModelProvider)[0] - ]; - - let llm: BaseChatModel | undefined; - - if (body.chatModel?.provider === 'custom_openai') { - llm = new ChatOpenAI({ - apiKey: getCustomOpenaiApiKey(), - modelName: getCustomOpenaiModelName(), - temperature: 0.7, - configuration: { - baseURL: getCustomOpenaiApiUrl(), - }, - }) as unknown as BaseChatModel; - } else if (chatModelProvider && chatModel) { - llm = chatModel.model; - } - - if (!llm) { - return Response.json({ error: 'Invalid chat model' }, { status: 400 }); - } - - const videos = await handleVideoSearch( - { - chat_history: chatHistory, - query: body.query, - }, - llm, - ); - - return Response.json({ videos }, { status: 200 }); - } catch (err) { - console.error(`An error occurred while searching videos: ${err}`); - return Response.json( - { message: 'An error occurred while searching videos' }, - { status: 500 }, - ); - } -}; diff --git a/src/app/api/weather/route.ts b/src/app/api/weather/route.ts deleted file mode 100644 index afaf8a6..0000000 --- a/src/app/api/weather/route.ts +++ /dev/null @@ -1,174 +0,0 @@ -export const POST = async (req: Request) => { - try { - const body: { - lat: number; - lng: number; - measureUnit: 'Imperial' | 'Metric'; - } = await req.json(); - - if (!body.lat || !body.lng) { - return Response.json( - { - message: 'Invalid request.', - }, - { status: 400 }, - ); - } - - const res = await fetch( - `https://api.open-meteo.com/v1/forecast?latitude=${body.lat}&longitude=${body.lng}¤t=weather_code,temperature_2m,is_day,relative_humidity_2m,wind_speed_10m&timezone=auto${ - body.measureUnit === 'Metric' ? '' : '&temperature_unit=fahrenheit' - }${body.measureUnit === 'Metric' ? '' : '&wind_speed_unit=mph'}`, - ); - - const data = await res.json(); - - if (data.error) { - console.error(`Error fetching weather data: ${data.reason}`); - return Response.json( - { - message: 'An error has occurred.', - }, - { status: 500 }, - ); - } - - const weather: { - temperature: number; - condition: string; - humidity: number; - windSpeed: number; - icon: string; - temperatureUnit: 'C' | 'F'; - windSpeedUnit: 'm/s' | 'mph'; - } = { - temperature: data.current.temperature_2m, - condition: '', - humidity: data.current.relative_humidity_2m, - windSpeed: data.current.wind_speed_10m, - icon: '', - temperatureUnit: body.measureUnit === 'Metric' ? 'C' : 'F', - windSpeedUnit: body.measureUnit === 'Metric' ? 'm/s' : 'mph', - }; - - const code = data.current.weather_code; - const isDay = data.current.is_day === 1; - const dayOrNight = isDay ? 'day' : 'night'; - - switch (code) { - case 0: - weather.icon = `clear-${dayOrNight}`; - weather.condition = 'Clear'; - break; - - case 1: - weather.condition = 'Mainly Clear'; - case 2: - weather.condition = 'Partly Cloudy'; - case 3: - weather.icon = `cloudy-1-${dayOrNight}`; - weather.condition = 'Cloudy'; - break; - - case 45: - weather.condition = 'Fog'; - case 48: - weather.icon = `fog-${dayOrNight}`; - weather.condition = 'Fog'; - break; - - case 51: - weather.condition = 'Light Drizzle'; - case 53: - weather.condition = 'Moderate Drizzle'; - case 55: - weather.icon = `rainy-1-${dayOrNight}`; - weather.condition = 'Dense Drizzle'; - break; - - case 56: - weather.condition = 'Light Freezing Drizzle'; - case 57: - weather.icon = `frost-${dayOrNight}`; - weather.condition = 'Dense Freezing Drizzle'; - break; - - case 61: - weather.condition = 'Slight Rain'; - case 63: - weather.condition = 'Moderate Rain'; - case 65: - weather.condition = 'Heavy Rain'; - weather.icon = `rainy-2-${dayOrNight}`; - break; - - case 66: - weather.condition = 'Light Freezing Rain'; - case 67: - weather.condition = 'Heavy Freezing Rain'; - weather.icon = 'rain-and-sleet-mix'; - break; - - case 71: - weather.condition = 'Slight Snow Fall'; - case 73: - weather.condition = 'Moderate Snow Fall'; - case 75: - weather.condition = 'Heavy Snow Fall'; - weather.icon = `snowy-2-${dayOrNight}`; - break; - - case 77: - weather.condition = 'Snow'; - weather.icon = `snowy-1-${dayOrNight}`; - break; - - case 80: - weather.condition = 'Slight Rain Showers'; - case 81: - weather.condition = 'Moderate Rain Showers'; - case 82: - weather.condition = 'Heavy Rain Showers'; - weather.icon = `rainy-3-${dayOrNight}`; - break; - - case 85: - weather.condition = 'Slight Snow Showers'; - case 86: - weather.condition = 'Moderate Snow Showers'; - case 87: - weather.condition = 'Heavy Snow Showers'; - weather.icon = `snowy-3-${dayOrNight}`; - break; - - case 95: - weather.condition = 'Thunderstorm'; - weather.icon = `scattered-thunderstorms-${dayOrNight}`; - break; - - case 96: - weather.condition = 'Thunderstorm with Slight Hail'; - case 99: - weather.condition = 'Thunderstorm with Heavy Hail'; - weather.icon = 'severe-thunderstorm'; - break; - - default: - weather.icon = `clear-${dayOrNight}`; - weather.condition = 'Clear'; - break; - } - - return Response.json(weather); - } catch (err) { - console.error('An error occurred while getting home widgets', err); - return Response.json( - { - message: 'An error has occurred.', - }, - { - status: 500, - }, - ); - } -}; diff --git a/src/app/c/[chatId]/page.tsx b/src/app/c/[chatId]/page.tsx deleted file mode 100644 index 672107a..0000000 --- a/src/app/c/[chatId]/page.tsx +++ /dev/null @@ -1,17 +0,0 @@ -'use client'; - -import ChatWindow from '@/components/ChatWindow'; -import { useParams } from 'next/navigation'; -import React from 'react'; -import { ChatProvider } from '@/lib/hooks/useChat'; - -const Page = () => { - const { chatId }: { chatId: string } = useParams(); - return ( - - - - ); -}; - -export default Page; diff --git a/src/app/discover/page.tsx b/src/app/discover/page.tsx deleted file mode 100644 index 8e20e50..0000000 --- a/src/app/discover/page.tsx +++ /dev/null @@ -1,158 +0,0 @@ -'use client'; - -import { Search } from 'lucide-react'; -import { useEffect, useState } from 'react'; -import Link from 'next/link'; -import { toast } from 'sonner'; -import { cn } from '@/lib/utils'; - -interface Discover { - title: string; - content: string; - url: string; - thumbnail: string; -} - -const topics: { key: string; display: string }[] = [ - { - display: 'Tech & Science', - key: 'tech', - }, - { - display: 'Finance', - key: 'finance', - }, - { - display: 'Art & Culture', - key: 'art', - }, - { - display: 'Sports', - key: 'sports', - }, - { - display: 'Entertainment', - key: 'entertainment', - }, -]; - -const Page = () => { - const [discover, setDiscover] = useState(null); - const [loading, setLoading] = useState(true); - const [activeTopic, setActiveTopic] = useState(topics[0].key); - - const fetchArticles = async (topic: string) => { - setLoading(true); - try { - const res = await fetch(`/api/discover?topic=${topic}`, { - method: 'GET', - headers: { - 'Content-Type': 'application/json', - }, - }); - - const data = await res.json(); - - if (!res.ok) { - throw new Error(data.message); - } - - data.blogs = data.blogs.filter((blog: Discover) => blog.thumbnail); - - setDiscover(data.blogs); - } catch (err: any) { - console.error('Error fetching data:', err.message); - toast.error('Error fetching data'); - } finally { - setLoading(false); - } - }; - - useEffect(() => { - fetchArticles(activeTopic); - }, [activeTopic]); - - return ( - <> -
-
-
- -

Discover

-
-
-
- -
- {topics.map((t, i) => ( -
setActiveTopic(t.key)} - > - {t.display} -
- ))} -
- - {loading ? ( -
- -
- ) : ( -
- {discover && - discover?.map((item, i) => ( - - {item.title} -
-
- {item.title.slice(0, 100)}... -
-

- {item.content.slice(0, 100)}... -

-
- - ))} -
- )} -
- - ); -}; - -export default Page; diff --git a/src/app/library/layout.tsx b/src/app/library/layout.tsx deleted file mode 100644 index 00d4a3b..0000000 --- a/src/app/library/layout.tsx +++ /dev/null @@ -1,12 +0,0 @@ -import { Metadata } from 'next'; -import React from 'react'; - -export const metadata: Metadata = { - title: 'Library - Perplexica', -}; - -const Layout = ({ children }: { children: React.ReactNode }) => { - return
{children}
; -}; - -export default Layout; diff --git a/src/app/library/page.tsx b/src/app/library/page.tsx deleted file mode 100644 index 9c40b2b..0000000 --- a/src/app/library/page.tsx +++ /dev/null @@ -1,114 +0,0 @@ -'use client'; - -import DeleteChat from '@/components/DeleteChat'; -import { cn, formatTimeDifference } from '@/lib/utils'; -import { BookOpenText, ClockIcon, Delete, ScanEye } from 'lucide-react'; -import Link from 'next/link'; -import { useEffect, useState } from 'react'; - -export interface Chat { - id: string; - title: string; - createdAt: string; - focusMode: string; -} - -const Page = () => { - const [chats, setChats] = useState([]); - const [loading, setLoading] = useState(true); - - useEffect(() => { - const fetchChats = async () => { - setLoading(true); - - const res = await fetch(`/api/chats`, { - method: 'GET', - headers: { - 'Content-Type': 'application/json', - }, - }); - - const data = await res.json(); - - setChats(data.chats); - setLoading(false); - }; - - fetchChats(); - }, []); - - return loading ? ( -
- -
- ) : ( -
-
-
- -

Library

-
-
-
- {chats.length === 0 && ( -
-

- No chats found. -

-
- )} - {chats.length > 0 && ( -
- {chats.map((chat, i) => ( -
- - {chat.title} - -
-
- -

- {formatTimeDifference(new Date(), chat.createdAt)} Ago -

-
- -
-
- ))} -
- )} -
- ); -}; - -export default Page; diff --git a/src/app/manifest.ts b/src/app/manifest.ts deleted file mode 100644 index 792e752..0000000 --- a/src/app/manifest.ts +++ /dev/null @@ -1,54 +0,0 @@ -import type { MetadataRoute } from 'next'; - -export default function manifest(): MetadataRoute.Manifest { - return { - name: 'Perplexica - Chat with the internet', - short_name: 'Perplexica', - description: - 'Perplexica is an AI powered chatbot that is connected to the internet.', - start_url: '/', - display: 'standalone', - background_color: '#0a0a0a', - theme_color: '#0a0a0a', - screenshots: [ - { - src: '/screenshots/p1.png', - form_factor: 'wide', - sizes: '2560x1600', - }, - { - src: '/screenshots/p2.png', - form_factor: 'wide', - sizes: '2560x1600', - }, - { - src: '/screenshots/p1_small.png', - form_factor: 'narrow', - sizes: '828x1792', - }, - { - src: '/screenshots/p2_small.png', - form_factor: 'narrow', - sizes: '828x1792', - }, - ], - icons: [ - { - src: '/icon-50.png', - sizes: '50x50', - type: 'image/png' as const, - }, - { - src: '/icon-100.png', - sizes: '100x100', - type: 'image/png', - }, - { - src: '/icon.png', - sizes: '440x440', - type: 'image/png', - purpose: 'any', - }, - ], - }; -} diff --git a/src/app/settings/page.tsx b/src/app/settings/page.tsx deleted file mode 100644 index 6fb8255..0000000 --- a/src/app/settings/page.tsx +++ /dev/null @@ -1,963 +0,0 @@ -'use client'; - -import { Settings as SettingsIcon, ArrowLeft, Loader2 } from 'lucide-react'; -import { useEffect, useState } from 'react'; -import { cn } from '@/lib/utils'; -import { Switch } from '@headlessui/react'; -import ThemeSwitcher from '@/components/theme/Switcher'; -import { ImagesIcon, VideoIcon } from 'lucide-react'; -import Link from 'next/link'; -import { PROVIDER_METADATA } from '@/lib/providers'; - -interface SettingsType { - chatModelProviders: { - [key: string]: [Record]; - }; - embeddingModelProviders: { - [key: string]: [Record]; - }; - openaiApiKey: string; - groqApiKey: string; - anthropicApiKey: string; - geminiApiKey: string; - ollamaApiUrl: string; - ollamaApiKey: string; - lmStudioApiUrl: string; - deepseekApiKey: string; - aimlApiKey: string; - customOpenaiApiKey: string; - customOpenaiApiUrl: string; - customOpenaiModelName: string; -} - -interface InputProps extends React.InputHTMLAttributes { - isSaving?: boolean; - onSave?: (value: string) => void; -} - -const Input = ({ className, isSaving, onSave, ...restProps }: InputProps) => { - return ( -
- onSave?.(e.target.value)} - /> - {isSaving && ( -
- -
- )} -
- ); -}; - -interface TextareaProps extends React.InputHTMLAttributes { - isSaving?: boolean; - onSave?: (value: string) => void; -} - -const Textarea = ({ - className, - isSaving, - onSave, - ...restProps -}: TextareaProps) => { - return ( -
-