diff --git a/.assets/manifest.json b/.assets/manifest.json new file mode 100644 index 0000000..e69de29 diff --git a/.assets/perplexica-screenshot.png b/.assets/perplexica-screenshot.png index c47a544..fc7a697 100644 Binary files a/.assets/perplexica-screenshot.png and b/.assets/perplexica-screenshot.png differ diff --git a/ui/.eslintrc.json b/.eslintrc.json similarity index 100% rename from ui/.eslintrc.json rename to .eslintrc.json diff --git a/.github/workflows/docker-build.yaml b/.github/workflows/docker-build.yaml new file mode 100644 index 0000000..29f7987 --- /dev/null +++ b/.github/workflows/docker-build.yaml @@ -0,0 +1,138 @@ +name: Build & Push Docker Images + +on: + push: + branches: + - master + release: + types: [published] + +jobs: + build-amd64: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + with: + install: true + + - name: Log in to DockerHub + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Extract version from release tag + if: github.event_name == 'release' + id: version + run: echo "RELEASE_VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV + + - name: Build and push AMD64 Docker image + if: github.ref == 'refs/heads/master' && github.event_name == 'push' + run: | + DOCKERFILE=app.dockerfile + IMAGE_NAME=perplexica + docker buildx build --platform linux/amd64 \ + --cache-from=type=registry,ref=itzcrazykns1337/${IMAGE_NAME}:amd64 \ + --cache-to=type=inline \ + --provenance false \ + -f $DOCKERFILE \ + -t itzcrazykns1337/${IMAGE_NAME}:amd64 \ + --push . + + - name: Build and push AMD64 release Docker image + if: github.event_name == 'release' + run: | + DOCKERFILE=app.dockerfile + IMAGE_NAME=perplexica + docker buildx build --platform linux/amd64 \ + --cache-from=type=registry,ref=itzcrazykns1337/${IMAGE_NAME}:${{ env.RELEASE_VERSION }}-amd64 \ + --cache-to=type=inline \ + --provenance false \ + -f $DOCKERFILE \ + -t itzcrazykns1337/${IMAGE_NAME}:${{ env.RELEASE_VERSION }}-amd64 \ + --push . + + build-arm64: + runs-on: ubuntu-24.04-arm + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + with: + install: true + + - name: Log in to DockerHub + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Extract version from release tag + if: github.event_name == 'release' + id: version + run: echo "RELEASE_VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV + + - name: Build and push ARM64 Docker image + if: github.ref == 'refs/heads/master' && github.event_name == 'push' + run: | + DOCKERFILE=app.dockerfile + IMAGE_NAME=perplexica + docker buildx build --platform linux/arm64 \ + --cache-from=type=registry,ref=itzcrazykns1337/${IMAGE_NAME}:arm64 \ + --cache-to=type=inline \ + --provenance false \ + -f $DOCKERFILE \ + -t itzcrazykns1337/${IMAGE_NAME}:arm64 \ + --push . + + - name: Build and push ARM64 release Docker image + if: github.event_name == 'release' + run: | + DOCKERFILE=app.dockerfile + IMAGE_NAME=perplexica + docker buildx build --platform linux/arm64 \ + --cache-from=type=registry,ref=itzcrazykns1337/${IMAGE_NAME}:${{ env.RELEASE_VERSION }}-arm64 \ + --cache-to=type=inline \ + --provenance false \ + -f $DOCKERFILE \ + -t itzcrazykns1337/${IMAGE_NAME}:${{ env.RELEASE_VERSION }}-arm64 \ + --push . + + manifest: + needs: [build-amd64, build-arm64] + runs-on: ubuntu-latest + steps: + - name: Log in to DockerHub + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Extract version from release tag + if: github.event_name == 'release' + id: version + run: echo "RELEASE_VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV + + - name: Create and push multi-arch manifest for main + if: github.ref == 'refs/heads/master' && github.event_name == 'push' + run: | + IMAGE_NAME=perplexica + docker manifest create itzcrazykns1337/${IMAGE_NAME}:main \ + --amend itzcrazykns1337/${IMAGE_NAME}:amd64 \ + --amend itzcrazykns1337/${IMAGE_NAME}:arm64 + docker manifest push itzcrazykns1337/${IMAGE_NAME}:main + + - name: Create and push multi-arch manifest for releases + if: github.event_name == 'release' + run: | + IMAGE_NAME=perplexica + docker manifest create itzcrazykns1337/${IMAGE_NAME}:${{ env.RELEASE_VERSION }} \ + --amend itzcrazykns1337/${IMAGE_NAME}:${{ env.RELEASE_VERSION }}-amd64 \ + --amend itzcrazykns1337/${IMAGE_NAME}:${{ env.RELEASE_VERSION }}-arm64 + docker manifest push itzcrazykns1337/${IMAGE_NAME}:${{ env.RELEASE_VERSION }} diff --git a/.gitignore b/.gitignore index a3dd5cc..9fb5e4c 100644 --- a/.gitignore +++ b/.gitignore @@ -4,9 +4,9 @@ npm-debug.log yarn-error.log # Build output -/.next/ -/out/ -/dist/ +.next/ +out/ +dist/ # IDE/Editor specific .vscode/ @@ -35,4 +35,7 @@ logs/ Thumbs.db # Db -db.sqlite \ No newline at end of file +db.sqlite +/searxng + +certificates \ No newline at end of file diff --git a/.prettierignore b/.prettierignore index c184fdb..55d3c7c 100644 --- a/.prettierignore +++ b/.prettierignore @@ -35,4 +35,7 @@ coverage *.swp # Ignore all files with the .DS_Store extension (macOS specific) -.DS_Store \ No newline at end of file +.DS_Store + +# Ignore all files in uploads directory +uploads \ No newline at end of file diff --git a/.prettierrc.js b/.prettierrc.js index 1937ff1..8ca480f 100644 --- a/.prettierrc.js +++ b/.prettierrc.js @@ -6,7 +6,6 @@ const config = { endOfLine: 'auto', singleQuote: true, tabWidth: 2, - semi: true, }; module.exports = config; diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c779f91..7bbb280 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,30 +1,43 @@ # How to Contribute to Perplexica -Hey there, thanks for deciding to contribute to Perplexica. Anything you help with will support the development of Perplexica and will make it better. Let's walk you through the key aspects to ensure your contributions are effective and in harmony with the project's setup. +Thanks for your interest in contributing to Perplexica! Your help makes this project better. This guide explains how to contribute effectively. + +Perplexica is a modern AI chat application with advanced search capabilities. ## Project Structure -Perplexica's design consists of two main domains: +Perplexica's codebase is organized as follows: -- **Frontend (`ui` directory)**: This is a Next.js application holding all user interface components. It's a self-contained environment that manages everything the user interacts with. -- **Backend (root and `src` directory)**: The backend logic is situated in the `src` folder, but the root directory holds the main `package.json` for backend dependency management. +- **UI Components and Pages**: + - **Components (`src/components`)**: Reusable UI components. + - **Pages and Routes (`src/app`)**: Next.js app directory structure with page components. + - Main app routes include: home (`/`), chat (`/c`), discover (`/discover`), library (`/library`), and settings (`/settings`). + - **API Routes (`src/app/api`)**: API endpoints implemented with Next.js API routes. + - `/api/chat`: Handles chat interactions. + - `/api/search`: Provides direct access to Perplexica's search capabilities. + - Other endpoints for models, files, and suggestions. +- **Backend Logic (`src/lib`)**: Contains all the backend functionality including search, database, and API logic. + - The search functionality is present inside `src/lib/search` directory. + - All of the focus modes are implemented using the Meta Search Agent class in `src/lib/search/metaSearchAgent.ts`. + - Database functionality is in `src/lib/db`. + - Chat model and embedding model providers are managed in `src/lib/providers`. + - Prompt templates and LLM chain definitions are in `src/lib/prompts` and `src/lib/chains` respectively. + +## API Documentation + +Perplexica exposes several API endpoints for programmatic access, including: + +- **Search API**: Access Perplexica's advanced search capabilities directly via the `/api/search` endpoint. For detailed documentation, see `docs/api/search.md`. ## Setting Up Your Environment Before diving into coding, setting up your local environment is key. Here's what you need to do: -### Backend - 1. In the root directory, locate the `sample.config.toml` file. -2. Rename it to `config.toml` and fill in the necessary configuration fields specific to the backend. -3. Run `npm install` to install dependencies. -4. Use `npm run dev` to start the backend in development mode. - -### Frontend - -1. Navigate to the `ui` folder and repeat the process of renaming `.env.example` to `.env`, making sure to provide the frontend-specific variables. -2. Execute `npm install` within the `ui` directory to get the frontend dependencies ready. -3. Launch the frontend development server with `npm run dev`. +2. Rename it to `config.toml` and fill in the necessary configuration fields. +3. Run `npm install` to install all dependencies. +4. Run `npm run db:push` to set up the local sqlite database. +5. Use `npm run dev` to start the application in development mode. **Please note**: Docker configurations are present for setting up production environments, whereas `npm run dev` is used for development purposes. diff --git a/README.md b/README.md index 3c87acc..5eb0713 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,24 @@ # 🚀 Perplexica - An AI-powered search engine 🔎 -![preview](.assets/perplexica-screenshot.png) +
+ Special thanks to: +
+
+ + Warp sponsorship + + +### [Warp, the AI Devtool that lives in your terminal](https://www.warp.dev/perplexica) + +[Available for MacOS, Linux, & Windows](https://www.warp.dev/perplexica) + +
+ +
+ +[![Discord](https://dcbadge.limes.pink/api/server/26aArMy8tT?style=flat)](https://discord.gg/26aArMy8tT) + +![preview](.assets/perplexica-screenshot.png?) ## Table of Contents @@ -12,6 +30,8 @@ - [Non-Docker Installation](#non-docker-installation) - [Ollama Connection Errors](#ollama-connection-errors) - [Using as a Search Engine](#using-as-a-search-engine) +- [Using Perplexica's API](#using-perplexicas-api) +- [Expose Perplexica to a network](#expose-perplexica-to-network) - [One-Click Deployment](#one-click-deployment) - [Upcoming Features](#upcoming-features) - [Support Us](#support-us) @@ -39,12 +59,13 @@ Want to know more about its architecture and how it works? You can read it [here - **Normal Mode:** Processes your query and performs a web search. - **Focus Modes:** Special modes to better answer specific types of questions. Perplexica currently has 6 focus modes: - **All Mode:** Searches the entire web to find the best results. - - **Writing Assistant Mode:** Helpful for writing tasks that does not require searching the web. + - **Writing Assistant Mode:** Helpful for writing tasks that do not require searching the web. - **Academic Search Mode:** Finds articles and papers, ideal for academic research. - **YouTube Search Mode:** Finds YouTube videos based on the search query. - **Wolfram Alpha Search Mode:** Answers queries that need calculations or data analysis using Wolfram Alpha. - **Reddit Search Mode:** Searches Reddit for discussions and opinions related to the query. - **Current Information:** Some search tools might give you outdated info because they use data from crawling bots and convert them into embeddings and store them in a index. Unlike them, Perplexica uses SearxNG, a metasearch engine to get the results and rerank and get the most relevant source out of it, ensuring you always get the latest information without the overhead of daily data updates. +- **API**: Integrate Perplexica into your existing applications and make use of its capibilities. It has many more features like image and video search. Some of the planned features are mentioned in [upcoming features](#upcoming-features). @@ -69,6 +90,9 @@ There are mainly 2 ways of installing Perplexica - With Docker, Without Docker. - `OLLAMA`: Your Ollama API URL. You should enter it as `http://host.docker.internal:PORT_NUMBER`. If you installed Ollama on port 11434, use `http://host.docker.internal:11434`. For other ports, adjust accordingly. **You need to fill this if you wish to use Ollama's models instead of OpenAI's**. - `GROQ`: Your Groq API key. **You only need to fill this if you wish to use Groq's hosted models**. - `ANTHROPIC`: Your Anthropic API key. **You only need to fill this if you wish to use Anthropic models**. + - `Gemini`: Your Gemini API key. **You only need to fill this if you wish to use Google's models**. + - `DEEPSEEK`: Your Deepseek API key. **Only needed if you want Deepseek models.** + - `AIMLAPI`: Your AI/ML API key. **Only needed if you want to use AI/ML API models and embeddings.** **Note**: You can change these after starting Perplexica from the settings dialog. @@ -88,14 +112,13 @@ There are mainly 2 ways of installing Perplexica - With Docker, Without Docker. 1. Install SearXNG and allow `JSON` format in the SearXNG settings. 2. Clone the repository and rename the `sample.config.toml` file to `config.toml` in the root directory. Ensure you complete all required fields in this file. -3. Rename the `.env.example` file to `.env` in the `ui` folder and fill in all necessary fields. -4. After populating the configuration and environment files, run `npm i` in both the `ui` folder and the root directory. -5. Install the dependencies and then execute `npm run build` in both the `ui` folder and the root directory. -6. Finally, start both the frontend and the backend by running `npm run start` in both the `ui` folder and the root directory. +3. After populating the configuration run `npm i`. +4. Install the dependencies and then execute `npm run build`. +5. Finally, start the app by running `npm run start` **Note**: Using Docker is recommended as it simplifies the setup process, especially for managing environment variables and dependencies. -See the [installation documentation](https://github.com/ItzCrazyKns/Perplexica/tree/master/docs/installation) for more information like exposing it your network, etc. +See the [installation documentation](https://github.com/ItzCrazyKns/Perplexica/tree/master/docs/installation) for more information like updating, etc. ### Ollama Connection Errors @@ -112,7 +135,7 @@ If you're encountering an Ollama connection error, it is likely due to the backe 3. **Linux Users - Expose Ollama to Network:** - - Inside `/etc/systemd/system/ollama.service`, you need to add `Environment="OLLAMA_HOST=0.0.0.0"`. Then restart Ollama by `systemctl restart ollama`. For more information see [Ollama docs](https://github.com/ollama/ollama/blob/main/docs/faq.md#setting-environment-variables-on-linux) + - Inside `/etc/systemd/system/ollama.service`, you need to add `Environment="OLLAMA_HOST=0.0.0.0:11434"`. (Change the port number if you are using a different one.) Then reload the systemd manager configuration with `systemctl daemon-reload`, and restart Ollama by `systemctl restart ollama`. For more information see [Ollama docs](https://github.com/ollama/ollama/blob/main/docs/faq.md#setting-environment-variables-on-linux) - Ensure that the port (default is 11434) is not blocked by your firewall. @@ -125,9 +148,21 @@ If you wish to use Perplexica as an alternative to traditional search engines li 3. Add a new site search with the following URL: `http://localhost:3000/?q=%s`. Replace `localhost` with your IP address or domain name, and `3000` with the port number if Perplexica is not hosted locally. 4. Click the add button. Now, you can use Perplexica directly from your browser's search bar. +## Using Perplexica's API + +Perplexica also provides an API for developers looking to integrate its powerful search engine into their own applications. You can run searches, use multiple models and get answers to your queries. + +For more details, check out the full documentation [here](https://github.com/ItzCrazyKns/Perplexica/tree/master/docs/API/SEARCH.md). + +## Expose Perplexica to network + +Perplexica runs on Next.js and handles all API requests. It works right away on the same network and stays accessible even with port forwarding. + ## One-Click Deployment +[![Deploy to Sealos](https://raw.githubusercontent.com/labring-actions/templates/main/Deploy-on-Sealos.svg)](https://usw.sealos.io/?openapp=system-template%3FtemplateName%3Dperplexica) [![Deploy to RepoCloud](https://d16t0pc4846x52.cloudfront.net/deploylobe.svg)](https://repocloud.io/details/?app_id=267) +[![Run on ClawCloud](https://raw.githubusercontent.com/ClawCloud/Run-Template/refs/heads/main/Run-on-ClawCloud.svg)](https://template.run.claw.cloud/?referralCode=U11MRQ8U9RM4&openapp=system-fastdeploy%3FtemplateName%3Dperplexica) ## Upcoming Features @@ -135,8 +170,9 @@ If you wish to use Perplexica as an alternative to traditional search engines li - [x] Adding support for local LLMs - [x] History Saving features - [x] Introducing various Focus Modes +- [x] Adding API support +- [x] Adding Discover - [ ] Finalizing Copilot Mode -- [ ] Adding Discover ## Support Us diff --git a/app.dockerfile b/app.dockerfile index 105cf86..c3c0fd0 100644 --- a/app.dockerfile +++ b/app.dockerfile @@ -1,15 +1,35 @@ -FROM node:alpine - -ARG NEXT_PUBLIC_WS_URL -ARG NEXT_PUBLIC_API_URL -ENV NEXT_PUBLIC_WS_URL=${NEXT_PUBLIC_WS_URL} -ENV NEXT_PUBLIC_API_URL=${NEXT_PUBLIC_API_URL} +FROM node:20.18.0-slim AS builder WORKDIR /home/perplexica -COPY ui /home/perplexica/ +COPY package.json yarn.lock ./ +RUN yarn install --frozen-lockfile --network-timeout 600000 -RUN yarn install +COPY tsconfig.json next.config.mjs next-env.d.ts postcss.config.js drizzle.config.ts tailwind.config.ts ./ +COPY src ./src +COPY public ./public + +RUN mkdir -p /home/perplexica/data RUN yarn build -CMD ["yarn", "start"] \ No newline at end of file +RUN yarn add --dev @vercel/ncc +RUN yarn ncc build ./src/lib/db/migrate.ts -o migrator + +FROM node:20.18.0-slim + +WORKDIR /home/perplexica + +COPY --from=builder /home/perplexica/public ./public +COPY --from=builder /home/perplexica/.next/static ./public/_next/static + +COPY --from=builder /home/perplexica/.next/standalone ./ +COPY --from=builder /home/perplexica/data ./data +COPY drizzle ./drizzle +COPY --from=builder /home/perplexica/migrator/build ./build +COPY --from=builder /home/perplexica/migrator/index.js ./migrate.js + +RUN mkdir /home/perplexica/uploads + +COPY entrypoint.sh ./entrypoint.sh +RUN chmod +x ./entrypoint.sh +CMD ["./entrypoint.sh"] \ No newline at end of file diff --git a/backend.dockerfile b/backend.dockerfile deleted file mode 100644 index 4886573..0000000 --- a/backend.dockerfile +++ /dev/null @@ -1,21 +0,0 @@ -FROM node:slim - -ARG SEARXNG_API_URL - -WORKDIR /home/perplexica - -COPY src /home/perplexica/src -COPY tsconfig.json /home/perplexica/ -COPY config.toml /home/perplexica/ -COPY drizzle.config.ts /home/perplexica/ -COPY package.json /home/perplexica/ -COPY yarn.lock /home/perplexica/ - -RUN sed -i "s|SEARXNG = \".*\"|SEARXNG = \"${SEARXNG_API_URL}\"|g" /home/perplexica/config.toml - -RUN mkdir /home/perplexica/data - -RUN yarn install -RUN yarn build - -CMD ["yarn", "start"] \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index d6f9203..b32e0a9 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -9,37 +9,22 @@ services: - perplexica-network restart: unless-stopped - perplexica-backend: - build: - context: . - dockerfile: backend.dockerfile - args: - - SEARXNG_API_URL=http://searxng:8080 - depends_on: - - searxng - ports: - - 3001:3001 - volumes: - - backend-dbstore:/home/perplexica/data - extra_hosts: - - 'host.docker.internal:host-gateway' - networks: - - perplexica-network - restart: unless-stopped - - perplexica-frontend: + app: + image: itzcrazykns1337/perplexica:main build: context: . dockerfile: app.dockerfile - args: - - NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api - - NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001 - depends_on: - - perplexica-backend + environment: + - SEARXNG_API_URL=http://searxng:8080 + - DATA_DIR=/home/perplexica ports: - 3000:3000 networks: - perplexica-network + volumes: + - backend-dbstore:/home/perplexica/data + - uploads:/home/perplexica/uploads + - ./config.toml:/home/perplexica/config.toml restart: unless-stopped networks: @@ -47,3 +32,4 @@ networks: volumes: backend-dbstore: + uploads: diff --git a/docs/API/SEARCH.md b/docs/API/SEARCH.md new file mode 100644 index 0000000..b67b62b --- /dev/null +++ b/docs/API/SEARCH.md @@ -0,0 +1,145 @@ +# Perplexica Search API Documentation + +## Overview + +Perplexica’s Search API makes it easy to use our AI-powered search engine. You can run different types of searches, pick the models you want to use, and get the most recent info. Follow the following headings to learn more about Perplexica's search API. + +## Endpoint + +### **POST** `http://localhost:3000/api/search` + +**Note**: Replace `3000` with any other port if you've changed the default PORT + +### Request + +The API accepts a JSON object in the request body, where you define the focus mode, chat models, embedding models, and your query. + +#### Request Body Structure + +```json +{ + "chatModel": { + "provider": "openai", + "name": "gpt-4o-mini" + }, + "embeddingModel": { + "provider": "openai", + "name": "text-embedding-3-large" + }, + "optimizationMode": "speed", + "focusMode": "webSearch", + "query": "What is Perplexica", + "history": [ + ["human", "Hi, how are you?"], + ["assistant", "I am doing well, how can I help you today?"] + ], + "systemInstructions": "Focus on providing technical details about Perplexica's architecture.", + "stream": false +} +``` + +### Request Parameters + +- **`chatModel`** (object, optional): Defines the chat model to be used for the query. For model details you can send a GET request at `http://localhost:3000/api/models`. Make sure to use the key value (For example "gpt-4o-mini" instead of the display name "GPT 4 omni mini"). + + - `provider`: Specifies the provider for the chat model (e.g., `openai`, `ollama`). + - `name`: The specific model from the chosen provider (e.g., `gpt-4o-mini`). + - Optional fields for custom OpenAI configuration: + - `customOpenAIBaseURL`: If you’re using a custom OpenAI instance, provide the base URL. + - `customOpenAIKey`: The API key for a custom OpenAI instance. + +- **`embeddingModel`** (object, optional): Defines the embedding model for similarity-based searching. For model details you can send a GET request at `http://localhost:3000/api/models`. Make sure to use the key value (For example "text-embedding-3-large" instead of the display name "Text Embedding 3 Large"). + + - `provider`: The provider for the embedding model (e.g., `openai`). + - `name`: The specific embedding model (e.g., `text-embedding-3-large`). + +- **`focusMode`** (string, required): Specifies which focus mode to use. Available modes: + + - `webSearch`, `academicSearch`, `writingAssistant`, `wolframAlphaSearch`, `youtubeSearch`, `redditSearch`. + +- **`optimizationMode`** (string, optional): Specifies the optimization mode to control the balance between performance and quality. Available modes: + + - `speed`: Prioritize speed and return the fastest answer. + - `balanced`: Provide a balanced answer with good speed and reasonable quality. + +- **`query`** (string, required): The search query or question. + +- **`systemInstructions`** (string, optional): Custom instructions provided by the user to guide the AI's response. These instructions are treated as user preferences and have lower priority than the system's core instructions. For example, you can specify a particular writing style, format, or focus area. + +- **`history`** (array, optional): An array of message pairs representing the conversation history. Each pair consists of a role (either 'human' or 'assistant') and the message content. This allows the system to use the context of the conversation to refine results. Example: + + ```json + [ + ["human", "What is Perplexica?"], + ["assistant", "Perplexica is an AI-powered search engine..."] + ] + ``` + +- **`stream`** (boolean, optional): When set to `true`, enables streaming responses. Default is `false`. + +### Response + +The response from the API includes both the final message and the sources used to generate that message. + +#### Standard Response (stream: false) + +```json +{ + "message": "Perplexica is an innovative, open-source AI-powered search engine designed to enhance the way users search for information online. Here are some key features and characteristics of Perplexica:\n\n- **AI-Powered Technology**: It utilizes advanced machine learning algorithms to not only retrieve information but also to understand the context and intent behind user queries, providing more relevant results [1][5].\n\n- **Open-Source**: Being open-source, Perplexica offers flexibility and transparency, allowing users to explore its functionalities without the constraints of proprietary software [3][10].", + "sources": [ + { + "pageContent": "Perplexica is an innovative, open-source AI-powered search engine designed to enhance the way users search for information online.", + "metadata": { + "title": "What is Perplexica, and how does it function as an AI-powered search ...", + "url": "https://askai.glarity.app/search/What-is-Perplexica--and-how-does-it-function-as-an-AI-powered-search-engine" + } + }, + { + "pageContent": "Perplexica is an open-source AI-powered search tool that dives deep into the internet to find precise answers.", + "metadata": { + "title": "Sahar Mor's Post", + "url": "https://www.linkedin.com/posts/sahar-mor_a-new-open-source-project-called-perplexica-activity-7204489745668694016-ncja" + } + } + .... + ] +} +``` + +#### Streaming Response (stream: true) + +When streaming is enabled, the API returns a stream of newline-delimited JSON objects. Each line contains a complete, valid JSON object. The response has Content-Type: application/json. + +Example of streamed response objects: + +``` +{"type":"init","data":"Stream connected"} +{"type":"sources","data":[{"pageContent":"...","metadata":{"title":"...","url":"..."}},...]} +{"type":"response","data":"Perplexica is an "} +{"type":"response","data":"innovative, open-source "} +{"type":"response","data":"AI-powered search engine..."} +{"type":"done"} +``` + +Clients should process each line as a separate JSON object. The different message types include: + +- **`init`**: Initial connection message +- **`sources`**: All sources used for the response +- **`response`**: Chunks of the generated answer text +- **`done`**: Indicates the stream is complete + +### Fields in the Response + +- **`message`** (string): The search result, generated based on the query and focus mode. +- **`sources`** (array): A list of sources that were used to generate the search result. Each source includes: + - `pageContent`: A snippet of the relevant content from the source. + - `metadata`: Metadata about the source, including: + - `title`: The title of the webpage. + - `url`: The URL of the webpage. + +### Error Handling + +If an error occurs during the search process, the API will return an appropriate error message with an HTTP status code. + +- **400**: If the request is malformed or missing required fields (e.g., no focus mode or query). +- **500**: If an internal server error occurs during the search. diff --git a/docs/architecture/README.md b/docs/architecture/README.md index b1fcfcb..5732471 100644 --- a/docs/architecture/README.md +++ b/docs/architecture/README.md @@ -1,4 +1,4 @@ -## Perplexica's Architecture +# Perplexica's Architecture Perplexica's architecture consists of the following key components: diff --git a/docs/architecture/WORKING.md b/docs/architecture/WORKING.md index e39de7a..6bad4f9 100644 --- a/docs/architecture/WORKING.md +++ b/docs/architecture/WORKING.md @@ -1,19 +1,19 @@ -## How does Perplexica work? +# How does Perplexica work? Curious about how Perplexica works? Don't worry, we'll cover it here. Before we begin, make sure you've read about the architecture of Perplexica to ensure you understand what it's made up of. Haven't read it? You can read it [here](https://github.com/ItzCrazyKns/Perplexica/tree/master/docs/architecture/README.md). We'll understand how Perplexica works by taking an example of a scenario where a user asks: "How does an A.C. work?". We'll break down the process into steps to make it easier to understand. The steps are as follows: -1. The message is sent via WS to the backend server where it invokes the chain. The chain will depend on your focus mode. For this example, let's assume we use the "webSearch" focus mode. +1. The message is sent to the `/api/chat` route where it invokes the chain. The chain will depend on your focus mode. For this example, let's assume we use the "webSearch" focus mode. 2. The chain is now invoked; first, the message is passed to another chain where it first predicts (using the chat history and the question) whether there is a need for sources and searching the web. If there is, it will generate a query (in accordance with the chat history) for searching the web that we'll take up later. If not, the chain will end there, and then the answer generator chain, also known as the response generator, will be started. 3. The query returned by the first chain is passed to SearXNG to search the web for information. 4. After the information is retrieved, it is based on keyword-based search. We then convert the information into embeddings and the query as well, then we perform a similarity search to find the most relevant sources to answer the query. 5. After all this is done, the sources are passed to the response generator. This chain takes all the chat history, the query, and the sources. It generates a response that is streamed to the UI. -### How are the answers cited? +## How are the answers cited? The LLMs are prompted to do so. We've prompted them so well that they cite the answers themselves, and using some UI magic, we display it to the user. -### Image and Video Search +## Image and Video Search Image and video searches are conducted in a similar manner. A query is always generated first, then we search the web for images and videos that match the query. These results are then returned to the user. diff --git a/docs/installation/NETWORKING.md b/docs/installation/NETWORKING.md deleted file mode 100644 index baad296..0000000 --- a/docs/installation/NETWORKING.md +++ /dev/null @@ -1,109 +0,0 @@ -# Expose Perplexica to a network - -This guide will show you how to make Perplexica available over a network. Follow these steps to allow computers on the same network to interact with Perplexica. Choose the instructions that match the operating system you are using. - -## Windows - -1. Open PowerShell as Administrator - -2. Navigate to the directory containing the `docker-compose.yaml` file - -3. Stop and remove the existing Perplexica containers and images: - -``` -docker compose down --rmi all -``` - -4. Open the `docker-compose.yaml` file in a text editor like Notepad++ - -5. Replace `127.0.0.1` with the IP address of the server Perplexica is running on in these two lines: - -``` -args: - - NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api - - NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001 -``` - -6. Save and close the `docker-compose.yaml` file - -7. Rebuild and restart the Perplexica container: - -``` -docker compose up -d --build -``` - -## macOS - -1. Open the Terminal application - -2. Navigate to the directory with the `docker-compose.yaml` file: - -``` -cd /path/to/docker-compose.yaml -``` - -3. Stop and remove existing containers and images: - -``` -docker compose down --rmi all -``` - -4. Open `docker-compose.yaml` in a text editor like Sublime Text: - -``` -nano docker-compose.yaml -``` - -5. Replace `127.0.0.1` with the server IP in these lines: - -``` -args: - - NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api - - NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001 -``` - -6. Save and exit the editor - -7. Rebuild and restart Perplexica: - -``` -docker compose up -d --build -``` - -## Linux - -1. Open the terminal - -2. Navigate to the `docker-compose.yaml` directory: - -``` -cd /path/to/docker-compose.yaml -``` - -3. Stop and remove containers and images: - -``` -docker compose down --rmi all -``` - -4. Edit `docker-compose.yaml`: - -``` -nano docker-compose.yaml -``` - -5. Replace `127.0.0.1` with the server IP: - -``` -args: - - NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api - - NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001 -``` - -6. Save and exit the editor - -7. Rebuild and restart Perplexica: - -``` -docker compose up -d --build -``` diff --git a/docs/installation/UPDATING.md b/docs/installation/UPDATING.md index df67775..66edf5c 100644 --- a/docs/installation/UPDATING.md +++ b/docs/installation/UPDATING.md @@ -6,29 +6,41 @@ To update Perplexica to the latest version, follow these steps: 1. Clone the latest version of Perplexica from GitHub: -```bash + ```bash git clone https://github.com/ItzCrazyKns/Perplexica.git -``` + ``` -2. Navigate to the Project Directory +2. Navigate to the project directory. -3. Update and Rebuild Docker Containers: +3. Check for changes in the configuration files. If the `sample.config.toml` file contains new fields, delete your existing `config.toml` file, rename `sample.config.toml` to `config.toml`, and update the configuration accordingly. -```bash -docker compose up -d --build -``` +4. Pull the latest images from the registry. -4. Once the command completes running go to http://localhost:3000 and verify the latest changes. + ```bash + docker compose pull + ``` -## For non Docker users +5. Update and recreate the containers. + + ```bash + docker compose up -d + ``` + +6. Once the command completes, go to http://localhost:3000 and verify the latest changes. + +## For non-Docker users 1. Clone the latest version of Perplexica from GitHub: -```bash + ```bash git clone https://github.com/ItzCrazyKns/Perplexica.git -``` + ``` -2. Navigate to the Project Directory -3. Execute `npm i` in both the `ui` folder and the root directory. -4. Once packages are updated, execute `npm run build` in both the `ui` folder and the root directory. -5. Finally, start both the frontend and the backend by running `npm run start` in both the `ui` folder and the root directory. +2. Navigate to the project directory. + +3. Check for changes in the configuration files. If the `sample.config.toml` file contains new fields, delete your existing `config.toml` file, rename `sample.config.toml` to `config.toml`, and update the configuration accordingly. +4. After populating the configuration run `npm i`. +5. Install the dependencies and then execute `npm run build`. +6. Finally, start the app by running `npm run start` + +--- diff --git a/drizzle.config.ts b/drizzle.config.ts index 9ac3ec5..a029112 100644 --- a/drizzle.config.ts +++ b/drizzle.config.ts @@ -1,10 +1,11 @@ import { defineConfig } from 'drizzle-kit'; +import path from 'path'; export default defineConfig({ dialect: 'sqlite', - schema: './src/db/schema.ts', + schema: './src/lib/db/schema.ts', out: './drizzle', dbCredentials: { - url: './data/db.sqlite', + url: path.join(process.cwd(), 'data', 'db.sqlite'), }, }); diff --git a/drizzle/0000_fuzzy_randall.sql b/drizzle/0000_fuzzy_randall.sql new file mode 100644 index 0000000..0a2ff07 --- /dev/null +++ b/drizzle/0000_fuzzy_randall.sql @@ -0,0 +1,16 @@ +CREATE TABLE IF NOT EXISTS `chats` ( + `id` text PRIMARY KEY NOT NULL, + `title` text NOT NULL, + `createdAt` text NOT NULL, + `focusMode` text NOT NULL, + `files` text DEFAULT '[]' +); +--> statement-breakpoint +CREATE TABLE IF NOT EXISTS `messages` ( + `id` integer PRIMARY KEY NOT NULL, + `content` text NOT NULL, + `chatId` text NOT NULL, + `messageId` text NOT NULL, + `type` text, + `metadata` text +); diff --git a/drizzle/meta/0000_snapshot.json b/drizzle/meta/0000_snapshot.json new file mode 100644 index 0000000..850bcd3 --- /dev/null +++ b/drizzle/meta/0000_snapshot.json @@ -0,0 +1,116 @@ +{ + "version": "6", + "dialect": "sqlite", + "id": "ef3a044b-0f34-40b5-babb-2bb3a909ba27", + "prevId": "00000000-0000-0000-0000-000000000000", + "tables": { + "chats": { + "name": "chats", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "createdAt": { + "name": "createdAt", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "focusMode": { + "name": "focusMode", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "files": { + "name": "files", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": "'[]'" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "messages": { + "name": "messages", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "chatId": { + "name": "chatId", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "messageId": { + "name": "messageId", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "type": { + "name": "type", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "metadata": { + "name": "metadata", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + } + }, + "views": {}, + "enums": {}, + "_meta": { + "schemas": {}, + "tables": {}, + "columns": {} + }, + "internal": { + "indexes": {} + } +} diff --git a/drizzle/meta/_journal.json b/drizzle/meta/_journal.json new file mode 100644 index 0000000..5db59d1 --- /dev/null +++ b/drizzle/meta/_journal.json @@ -0,0 +1,13 @@ +{ + "version": "7", + "dialect": "sqlite", + "entries": [ + { + "idx": 0, + "version": "6", + "when": 1748405503809, + "tag": "0000_fuzzy_randall", + "breakpoints": true + } + ] +} diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100644 index 0000000..9f9448a --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,6 @@ +#!/bin/sh +set -e + +node migrate.js + +exec node server.js \ No newline at end of file diff --git a/next-env.d.ts b/next-env.d.ts new file mode 100644 index 0000000..1b3be08 --- /dev/null +++ b/next-env.d.ts @@ -0,0 +1,5 @@ +/// +/// + +// NOTE: This file should not be edited +// see https://nextjs.org/docs/app/api-reference/config/typescript for more information. diff --git a/ui/next.config.mjs b/next.config.mjs similarity index 75% rename from ui/next.config.mjs rename to next.config.mjs index c3f2e1a..2300ff4 100644 --- a/ui/next.config.mjs +++ b/next.config.mjs @@ -1,5 +1,6 @@ /** @type {import('next').NextConfig} */ const nextConfig = { + output: 'standalone', images: { remotePatterns: [ { @@ -7,6 +8,7 @@ const nextConfig = { }, ], }, + serverExternalPackages: ['pdf-parse'], }; export default nextConfig; diff --git a/package.json b/package.json index db3d773..5715c2a 100644 --- a/package.json +++ b/package.json @@ -1,48 +1,70 @@ { - "name": "perplexica-backend", - "version": "1.9.0-rc1", + "name": "perplexica-frontend", + "version": "1.11.0-rc2", "license": "MIT", "author": "ItzCrazyKns", "scripts": { - "start": "npm run db:push && node dist/app.js", - "build": "tsc", - "dev": "nodemon src/app.ts", - "db:push": "drizzle-kit push sqlite", - "format": "prettier . --check", - "format:write": "prettier . --write" - }, - "devDependencies": { - "@types/better-sqlite3": "^7.6.10", - "@types/cors": "^2.8.17", - "@types/express": "^4.17.21", - "@types/html-to-text": "^9.0.4", - "@types/pdf-parse": "^1.1.4", - "@types/readable-stream": "^4.0.11", - "drizzle-kit": "^0.22.7", - "nodemon": "^3.1.0", - "prettier": "^3.2.5", - "ts-node": "^10.9.2", - "typescript": "^5.4.3" + "dev": "next dev", + "build": "npm run db:push && next build", + "start": "next start", + "lint": "next lint", + "format:write": "prettier . --write", + "db:push": "drizzle-kit push" }, "dependencies": { + "@headlessui/react": "^2.2.0", "@iarna/toml": "^2.2.5", - "@langchain/anthropic": "^0.2.3", - "@langchain/community": "^0.2.16", - "@langchain/openai": "^0.0.25", - "@xenova/transformers": "^2.17.1", - "axios": "^1.6.8", - "better-sqlite3": "^11.0.0", + "@icons-pack/react-simple-icons": "^12.3.0", + "@langchain/anthropic": "^0.3.24", + "@langchain/community": "^0.3.49", + "@langchain/core": "^0.3.66", + "@langchain/google-genai": "^0.2.15", + "@langchain/groq": "^0.2.3", + "@langchain/ollama": "^0.2.3", + "@langchain/openai": "^0.6.2", + "@langchain/textsplitters": "^0.1.0", + "@tailwindcss/typography": "^0.5.12", + "@xenova/transformers": "^2.17.2", + "axios": "^1.8.3", + "better-sqlite3": "^11.9.1", + "clsx": "^2.1.0", "compute-cosine-similarity": "^1.1.0", "compute-dot": "^1.1.0", - "cors": "^2.8.5", - "dotenv": "^16.4.5", - "drizzle-orm": "^0.31.2", - "express": "^4.19.2", + "drizzle-orm": "^0.40.1", "html-to-text": "^9.0.5", - "langchain": "^0.1.30", + "jspdf": "^3.0.1", + "langchain": "^0.3.30", + "lucide-react": "^0.363.0", + "mammoth": "^1.9.1", + "markdown-to-jsx": "^7.7.2", + "next": "^15.2.2", + "next-themes": "^0.3.0", "pdf-parse": "^1.1.1", - "winston": "^3.13.0", - "ws": "^8.17.1", + "react": "^18", + "react-dom": "^18", + "react-text-to-speech": "^0.14.5", + "react-textarea-autosize": "^8.5.3", + "sonner": "^1.4.41", + "tailwind-merge": "^2.2.2", + "winston": "^3.17.0", + "yet-another-react-lightbox": "^3.17.2", "zod": "^3.22.4" + }, + "devDependencies": { + "@types/better-sqlite3": "^7.6.12", + "@types/html-to-text": "^9.0.4", + "@types/jspdf": "^2.0.0", + "@types/node": "^20", + "@types/pdf-parse": "^1.1.4", + "@types/react": "^18", + "@types/react-dom": "^18", + "autoprefixer": "^10.0.1", + "drizzle-kit": "^0.30.5", + "eslint": "^8", + "eslint-config-next": "14.1.4", + "postcss": "^8", + "prettier": "^3.2.5", + "tailwindcss": "^3.3.0", + "typescript": "^5" } } diff --git a/ui/postcss.config.js b/postcss.config.js similarity index 100% rename from ui/postcss.config.js rename to postcss.config.js diff --git a/public/icon-100.png b/public/icon-100.png new file mode 100644 index 0000000..98fa242 Binary files /dev/null and b/public/icon-100.png differ diff --git a/public/icon-50.png b/public/icon-50.png new file mode 100644 index 0000000..9bb7a0e Binary files /dev/null and b/public/icon-50.png differ diff --git a/public/icon.png b/public/icon.png new file mode 100644 index 0000000..f6fe3c7 Binary files /dev/null and b/public/icon.png differ diff --git a/ui/public/next.svg b/public/next.svg similarity index 100% rename from ui/public/next.svg rename to public/next.svg diff --git a/public/screenshots/p1.png b/public/screenshots/p1.png new file mode 100644 index 0000000..02f01e5 Binary files /dev/null and b/public/screenshots/p1.png differ diff --git a/public/screenshots/p1_small.png b/public/screenshots/p1_small.png new file mode 100644 index 0000000..13d9a42 Binary files /dev/null and b/public/screenshots/p1_small.png differ diff --git a/public/screenshots/p2.png b/public/screenshots/p2.png new file mode 100644 index 0000000..1171675 Binary files /dev/null and b/public/screenshots/p2.png differ diff --git a/public/screenshots/p2_small.png b/public/screenshots/p2_small.png new file mode 100644 index 0000000..bd8d673 Binary files /dev/null and b/public/screenshots/p2_small.png differ diff --git a/ui/public/vercel.svg b/public/vercel.svg similarity index 100% rename from ui/public/vercel.svg rename to public/vercel.svg diff --git a/public/weather-ico/clear-day.svg b/public/weather-ico/clear-day.svg new file mode 100644 index 0000000..d97d28b --- /dev/null +++ b/public/weather-ico/clear-day.svg @@ -0,0 +1,131 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/weather-ico/clear-night.svg b/public/weather-ico/clear-night.svg new file mode 100644 index 0000000..005ac63 --- /dev/null +++ b/public/weather-ico/clear-night.svg @@ -0,0 +1,159 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/weather-ico/cloudy-1-day.svg b/public/weather-ico/cloudy-1-day.svg new file mode 100644 index 0000000..823fea1 --- /dev/null +++ b/public/weather-ico/cloudy-1-day.svg @@ -0,0 +1,178 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/weather-ico/cloudy-1-night.svg b/public/weather-ico/cloudy-1-night.svg new file mode 100644 index 0000000..3fe1541 --- /dev/null +++ b/public/weather-ico/cloudy-1-night.svg @@ -0,0 +1,206 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/weather-ico/fog-day.svg b/public/weather-ico/fog-day.svg new file mode 100644 index 0000000..ed834cf --- /dev/null +++ b/public/weather-ico/fog-day.svg @@ -0,0 +1,244 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + F + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/weather-ico/fog-night.svg b/public/weather-ico/fog-night.svg new file mode 100644 index 0000000..d59f98f --- /dev/null +++ b/public/weather-ico/fog-night.svg @@ -0,0 +1,309 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/weather-ico/frost-day.svg b/public/weather-ico/frost-day.svg new file mode 100644 index 0000000..16d591c --- /dev/null +++ b/public/weather-ico/frost-day.svg @@ -0,0 +1,204 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + F + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/weather-ico/frost-night.svg b/public/weather-ico/frost-night.svg new file mode 100644 index 0000000..ff2c8dc --- /dev/null +++ b/public/weather-ico/frost-night.svg @@ -0,0 +1,269 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/weather-ico/rain-and-sleet-mix.svg b/public/weather-ico/rain-and-sleet-mix.svg new file mode 100644 index 0000000..172010d --- /dev/null +++ b/public/weather-ico/rain-and-sleet-mix.svg @@ -0,0 +1,141 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/weather-ico/rainy-1-day.svg b/public/weather-ico/rainy-1-day.svg new file mode 100644 index 0000000..2faf06e --- /dev/null +++ b/public/weather-ico/rainy-1-day.svg @@ -0,0 +1,179 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/weather-ico/rainy-1-night.svg b/public/weather-ico/rainy-1-night.svg new file mode 100644 index 0000000..ee8ffd8 --- /dev/null +++ b/public/weather-ico/rainy-1-night.svg @@ -0,0 +1,243 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/weather-ico/rainy-2-day.svg b/public/weather-ico/rainy-2-day.svg new file mode 100644 index 0000000..affdfff --- /dev/null +++ b/public/weather-ico/rainy-2-day.svg @@ -0,0 +1,204 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/weather-ico/rainy-2-night.svg b/public/weather-ico/rainy-2-night.svg new file mode 100644 index 0000000..9c3ae20 --- /dev/null +++ b/public/weather-ico/rainy-2-night.svg @@ -0,0 +1,256 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/weather-ico/rainy-3-day.svg b/public/weather-ico/rainy-3-day.svg new file mode 100644 index 0000000..b0b5754 --- /dev/null +++ b/public/weather-ico/rainy-3-day.svg @@ -0,0 +1,206 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/weather-ico/rainy-3-night.svg b/public/weather-ico/rainy-3-night.svg new file mode 100644 index 0000000..4078e7d --- /dev/null +++ b/public/weather-ico/rainy-3-night.svg @@ -0,0 +1,270 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/weather-ico/scattered-thunderstorms-day.svg b/public/weather-ico/scattered-thunderstorms-day.svg new file mode 100644 index 0000000..0cfbccc --- /dev/null +++ b/public/weather-ico/scattered-thunderstorms-day.svg @@ -0,0 +1,374 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/weather-ico/scattered-thunderstorms-night.svg b/public/weather-ico/scattered-thunderstorms-night.svg new file mode 100644 index 0000000..72cf7a6 --- /dev/null +++ b/public/weather-ico/scattered-thunderstorms-night.svg @@ -0,0 +1,283 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/weather-ico/severe-thunderstorm.svg b/public/weather-ico/severe-thunderstorm.svg new file mode 100644 index 0000000..223198b --- /dev/null +++ b/public/weather-ico/severe-thunderstorm.svg @@ -0,0 +1,307 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/weather-ico/snowy-1-day.svg b/public/weather-ico/snowy-1-day.svg new file mode 100644 index 0000000..fb73943 --- /dev/null +++ b/public/weather-ico/snowy-1-day.svg @@ -0,0 +1,241 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/weather-ico/snowy-1-night.svg b/public/weather-ico/snowy-1-night.svg new file mode 100644 index 0000000..039ea2e --- /dev/null +++ b/public/weather-ico/snowy-1-night.svg @@ -0,0 +1,269 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/weather-ico/snowy-2-day.svg b/public/weather-ico/snowy-2-day.svg new file mode 100644 index 0000000..323a616 --- /dev/null +++ b/public/weather-ico/snowy-2-day.svg @@ -0,0 +1,273 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/weather-ico/snowy-2-night.svg b/public/weather-ico/snowy-2-night.svg new file mode 100644 index 0000000..10dcbfa --- /dev/null +++ b/public/weather-ico/snowy-2-night.svg @@ -0,0 +1,301 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/weather-ico/snowy-3-day.svg b/public/weather-ico/snowy-3-day.svg new file mode 100644 index 0000000..846c17a --- /dev/null +++ b/public/weather-ico/snowy-3-day.svg @@ -0,0 +1,334 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/public/weather-ico/snowy-3-night.svg b/public/weather-ico/snowy-3-night.svg new file mode 100644 index 0000000..b3c8c24 --- /dev/null +++ b/public/weather-ico/snowy-3-night.svg @@ -0,0 +1,361 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/sample.config.toml b/sample.config.toml index f6c6943..ba3e98e 100644 --- a/sample.config.toml +++ b/sample.config.toml @@ -1,12 +1,35 @@ [GENERAL] -PORT = 3001 # Port to run the server on SIMILARITY_MEASURE = "cosine" # "cosine" or "dot" +KEEP_ALIVE = "5m" # How long to keep Ollama models loaded into memory. (Instead of using -1 use "-1m") -[API_KEYS] -OPENAI = "" # OpenAI API key - sk-1234567890abcdef1234567890abcdef -GROQ = "" # Groq API key - gsk_1234567890abcdef1234567890abcdef -ANTHROPIC = "" # Anthropic API key - sk-ant-1234567890abcdef1234567890abcdef +[MODELS.OPENAI] +API_KEY = "" + +[MODELS.GROQ] +API_KEY = "" + +[MODELS.ANTHROPIC] +API_KEY = "" + +[MODELS.GEMINI] +API_KEY = "" + +[MODELS.CUSTOM_OPENAI] +API_KEY = "" +API_URL = "" +MODEL_NAME = "" + +[MODELS.OLLAMA] +API_URL = "" # Ollama API URL - http://host.docker.internal:11434 + +[MODELS.DEEPSEEK] +API_KEY = "" + +[MODELS.AIMLAPI] +API_KEY = "" # Required to use AI/ML API chat and embedding models + +[MODELS.LM_STUDIO] +API_URL = "" # LM Studio API URL - http://host.docker.internal:1234 [API_ENDPOINTS] -SEARXNG = "http://localhost:32768" # SearxNG API URL -OLLAMA = "" # Ollama API URL - http://host.docker.internal:11434 \ No newline at end of file +SEARXNG = "" # SearxNG API URL - http://localhost:32768 diff --git a/searxng/settings.yml b/searxng/settings.yml index da973c1..54d27c4 100644 --- a/searxng/settings.yml +++ b/searxng/settings.yml @@ -1,2356 +1,17 @@ -general: - # Debug mode, only for development. Is overwritten by ${SEARXNG_DEBUG} - debug: false - # displayed name - instance_name: 'searxng' - # For example: https://example.com/privacy - privacypolicy_url: false - # use true to use your own donation page written in searx/info/en/donate.md - # use false to disable the donation link - donation_url: false - # mailto:contact@example.com - contact_url: false - # record stats - enable_metrics: true +use_default_settings: true -brand: - new_issue_url: https://github.com/searxng/searxng/issues/new - docs_url: https://docs.searxng.org/ - public_instances: https://searx.space - wiki_url: https://github.com/searxng/searxng/wiki - issue_url: https://github.com/searxng/searxng/issues - # custom: - # maintainer: "Jon Doe" - # # Custom entries in the footer: [title]: [link] - # links: - # Uptime: https://uptime.searxng.org/history/darmarit-org - # About: "https://searxng.org" +general: + instance_name: 'searxng' search: - # Filter results. 0: None, 1: Moderate, 2: Strict - safe_search: 0 - # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "yandex", "mwmbl", - # "seznam", "startpage", "stract", "swisscows", "qwant", "wikipedia" - leave blank to turn it off - # by default. autocomplete: 'google' - # minimun characters to type before autocompleter starts - autocomplete_min: 4 - # Default search language - leave blank to detect from browser information or - # use codes from 'languages.py' - default_lang: 'auto' - # max_page: 0 # if engine supports paging, 0 means unlimited numbers of pages - # Available languages - # languages: - # - all - # - en - # - en-US - # - de - # - it-IT - # - fr - # - fr-BE - # ban time in seconds after engine errors - ban_time_on_fail: 5 - # max ban time in seconds after engine errors - max_ban_time_on_fail: 120 - suspended_times: - # Engine suspension time after error (in seconds; set to 0 to disable) - # For error "Access denied" and "HTTP error [402, 403]" - SearxEngineAccessDenied: 86400 - # For error "CAPTCHA" - SearxEngineCaptcha: 86400 - # For error "Too many request" and "HTTP error 429" - SearxEngineTooManyRequests: 3600 - # Cloudflare CAPTCHA - cf_SearxEngineCaptcha: 1296000 - cf_SearxEngineAccessDenied: 86400 - # ReCAPTCHA - recaptcha_SearxEngineCaptcha: 604800 - - # remove format to deny access, use lower case. - # formats: [html, csv, json, rss] formats: - html - json server: - # Is overwritten by ${SEARXNG_PORT} and ${SEARXNG_BIND_ADDRESS} - port: 8888 - bind_address: '127.0.0.1' - # public URL of the instance, to ensure correct inbound links. Is overwritten - # by ${SEARXNG_URL}. - base_url: / # "http://example.com/location" - limiter: false # rate limit the number of request on the instance, block some bots - public_instance: false # enable features designed only for public instances - - # If your instance owns a /etc/searxng/settings.yml file, then set the following - # values there. - secret_key: 'a2fb23f1b02e6ee83875b09826990de0f6bd908b6638e8c10277d415f6ab852b' # Is overwritten by ${SEARXNG_SECRET} - # Proxying image results through searx - image_proxy: false - # 1.0 and 1.1 are supported - http_protocol_version: '1.0' - # POST queries are more secure as they don't show up in history but may cause - # problems when using Firefox containers - method: 'POST' - default_http_headers: - X-Content-Type-Options: nosniff - X-Download-Options: noopen - X-Robots-Tag: noindex, nofollow - Referrer-Policy: no-referrer - -redis: - # URL to connect redis database. Is overwritten by ${SEARXNG_REDIS_URL}. - # https://docs.searxng.org/admin/settings/settings_redis.html#settings-redis - url: false - -ui: - # Custom static path - leave it blank if you didn't change - static_path: '' - static_use_hash: false - # Custom templates path - leave it blank if you didn't change - templates_path: '' - # query_in_title: When true, the result page's titles contains the query - # it decreases the privacy, since the browser can records the page titles. - query_in_title: false - # infinite_scroll: When true, automatically loads the next page when scrolling to bottom of the current page. - infinite_scroll: false - # ui theme - default_theme: simple - # center the results ? - center_alignment: false - # URL prefix of the internet archive, don't forget trailing slash (if needed). - # cache_url: "https://webcache.googleusercontent.com/search?q=cache:" - # Default interface locale - leave blank to detect from browser information or - # use codes from the 'locales' config section - default_locale: '' - # Open result links in a new tab by default - # results_on_new_tab: false - theme_args: - # style of simple theme: auto, light, dark - simple_style: auto - # Perform search immediately if a category selected. - # Disable to select multiple categories at once and start the search manually. - search_on_category_select: true - # Hotkeys: default or vim - hotkeys: default - -# Lock arbitrary settings on the preferences page. To find the ID of the user -# setting you want to lock, check the ID of the form on the page "preferences". -# -# preferences: -# lock: -# - language -# - autocomplete -# - method -# - query_in_title - -# searx supports result proxification using an external service: -# https://github.com/asciimoo/morty uncomment below section if you have running -# morty proxy the key is base64 encoded (keep the !!binary notation) -# Note: since commit af77ec3, morty accepts a base64 encoded key. -# -# result_proxy: -# url: http://127.0.0.1:3000/ -# # the key is a base64 encoded string, the YAML !!binary prefix is optional -# key: !!binary "your_morty_proxy_key" -# # [true|false] enable the "proxy" button next to each result -# proxify_results: true - -# communication with search engines -# -outgoing: - # default timeout in seconds, can be override by engine - request_timeout: 3.0 - # the maximum timeout in seconds - # max_request_timeout: 10.0 - # suffix of searx_useragent, could contain information like an email address - # to the administrator - useragent_suffix: '' - # The maximum number of concurrent connections that may be established. - pool_connections: 100 - # Allow the connection pool to maintain keep-alive connections below this - # point. - pool_maxsize: 20 - # See https://www.python-httpx.org/http2/ - enable_http2: true - # uncomment below section if you want to use a custom server certificate - # see https://www.python-httpx.org/advanced/#changing-the-verification-defaults - # and https://www.python-httpx.org/compatibility/#ssl-configuration - # verify: ~/.mitmproxy/mitmproxy-ca-cert.cer - # - # uncomment below section if you want to use a proxyq see: SOCKS proxies - # https://2.python-requests.org/en/latest/user/advanced/#proxies - # are also supported: see - # https://2.python-requests.org/en/latest/user/advanced/#socks - # - # proxies: - # all://: - # - http://proxy1:8080 - # - http://proxy2:8080 - # - # using_tor_proxy: true - # - # Extra seconds to add in order to account for the time taken by the proxy - # - # extra_proxy_timeout: 10.0 - # - # uncomment below section only if you have more than one network interface - # which can be the source of outgoing search requests - # - # source_ips: - # - 1.1.1.1 - # - 1.1.1.2 - # - fe80::/126 - -# External plugin configuration, for more details see -# https://docs.searxng.org/dev/plugins.html -# -# plugins: -# - plugin1 -# - plugin2 -# - ... - -# Comment or un-comment plugin to activate / deactivate by default. -# -# enabled_plugins: -# # these plugins are enabled if nothing is configured .. -# - 'Hash plugin' -# - 'Self Information' -# - 'Tracker URL remover' -# - 'Ahmia blacklist' # activation depends on outgoing.using_tor_proxy -# # these plugins are disabled if nothing is configured .. -# - 'Hostname replace' # see hostname_replace configuration below -# - 'Open Access DOI rewrite' -# - 'Tor check plugin' -# # Read the docs before activate: auto-detection of the language could be -# # detrimental to users expectations / users can activate the plugin in the -# # preferences if they want. -# - 'Autodetect search language' - -# Configuration of the "Hostname replace" plugin: -# -# hostname_replace: -# '(.*\.)?youtube\.com$': 'invidious.example.com' -# '(.*\.)?youtu\.be$': 'invidious.example.com' -# '(.*\.)?youtube-noocookie\.com$': 'yotter.example.com' -# '(.*\.)?reddit\.com$': 'teddit.example.com' -# '(.*\.)?redd\.it$': 'teddit.example.com' -# '(www\.)?twitter\.com$': 'nitter.example.com' -# # to remove matching host names from result list, set value to false -# 'spam\.example\.com': false - -checker: - # disable checker when in debug mode - off_when_debug: true - - # use "scheduling: false" to disable scheduling - # scheduling: interval or int - - # to activate the scheduler: - # * uncomment "scheduling" section - # * add "cache2 = name=searxngcache,items=2000,blocks=2000,blocksize=4096,bitmap=1" - # to your uwsgi.ini - - # scheduling: - # start_after: [300, 1800] # delay to start the first run of the checker - # every: [86400, 90000] # how often the checker runs - - # additional tests: only for the YAML anchors (see the engines section) - # - additional_tests: - rosebud: &test_rosebud - matrix: - query: rosebud - lang: en - result_container: - - not_empty - - ['one_title_contains', 'citizen kane'] - test: - - unique_results - - android: &test_android - matrix: - query: ['android'] - lang: ['en', 'de', 'fr', 'zh-CN'] - result_container: - - not_empty - - ['one_title_contains', 'google'] - test: - - unique_results - - # tests: only for the YAML anchors (see the engines section) - tests: - infobox: &tests_infobox - infobox: - matrix: - query: ['linux', 'new york', 'bbc'] - result_container: - - has_infobox - -categories_as_tabs: - general: - images: - videos: - news: - map: - music: - it: - science: - files: - social media: engines: - - name: 9gag - engine: 9gag - shortcut: 9g - disabled: true - - - name: annas archive - engine: annas_archive - disabled: true - shortcut: aa - - # - name: annas articles - # engine: annas_archive - # shortcut: aaa - # # https://docs.searxng.org/dev/engines/online/annas_archive.html - # aa_content: 'journal_article' # book_any .. magazine, standards_document - # aa_ext: 'pdf' # pdf, epub, .. - # aa_sort: 'newest' # newest, oldest, largest, smallest - - - name: apk mirror - engine: apkmirror - timeout: 4.0 - shortcut: apkm - disabled: true - - - name: apple app store - engine: apple_app_store - shortcut: aps - disabled: true - - # Requires Tor - - name: ahmia - engine: ahmia - categories: onions - enable_http: true - shortcut: ah - - - name: anaconda - engine: xpath - paging: true - first_page_num: 0 - search_url: https://anaconda.org/search?q={query}&page={pageno} - results_xpath: //tbody/tr - url_xpath: ./td/h5/a[last()]/@href - title_xpath: ./td/h5 - content_xpath: ./td[h5]/text() - categories: it - timeout: 6.0 - shortcut: conda - disabled: true - - - name: arch linux wiki - engine: archlinux - shortcut: al - - - name: artic - engine: artic - shortcut: arc - timeout: 4.0 - - - name: arxiv - engine: arxiv - shortcut: arx - timeout: 4.0 - - - name: ask - engine: ask - shortcut: ask - disabled: true - - # tmp suspended: dh key too small - # - name: base - # engine: base - # shortcut: bs - - - name: bandcamp - engine: bandcamp - shortcut: bc - categories: music - - - name: wikipedia - engine: wikipedia - shortcut: wp - # add "list" to the array to get results in the results list - display_type: ['infobox'] - base_url: 'https://{language}.wikipedia.org/' - categories: [general] - - - name: bilibili - engine: bilibili - shortcut: bil - disabled: true - - - name: bing - engine: bing - shortcut: bi - disabled: true - - - name: bing images - engine: bing_images - shortcut: bii - - - name: bing news - engine: bing_news - shortcut: bin - - - name: bing videos - engine: bing_videos - shortcut: biv - - - name: bitbucket - engine: xpath - paging: true - search_url: https://bitbucket.org/repo/all/{pageno}?name={query} - url_xpath: //article[@class="repo-summary"]//a[@class="repo-link"]/@href - title_xpath: //article[@class="repo-summary"]//a[@class="repo-link"] - content_xpath: //article[@class="repo-summary"]/p - categories: [it, repos] - timeout: 4.0 - disabled: true - shortcut: bb - about: - website: https://bitbucket.org/ - wikidata_id: Q2493781 - official_api_documentation: https://developer.atlassian.com/bitbucket - use_official_api: false - require_api_key: false - results: HTML - - - name: bpb - engine: bpb - shortcut: bpb - disabled: true - - - name: btdigg - engine: btdigg - shortcut: bt - disabled: true - - - name: ccc-tv - engine: xpath - paging: false - search_url: https://media.ccc.de/search/?q={query} - url_xpath: //div[@class="caption"]/h3/a/@href - title_xpath: //div[@class="caption"]/h3/a/text() - content_xpath: //div[@class="caption"]/h4/@title - categories: videos - disabled: true - shortcut: c3tv - about: - website: https://media.ccc.de/ - wikidata_id: Q80729951 - official_api_documentation: https://github.com/voc/voctoweb - use_official_api: false - require_api_key: false - results: HTML - # We don't set language: de here because media.ccc.de is not just - # for a German audience. It contains many English videos and many - # German videos have English subtitles. - - - name: openverse - engine: openverse - categories: images - shortcut: opv - - - name: chefkoch - engine: chefkoch - shortcut: chef - # to show premium or plus results too: - # skip_premium: false - - # - name: core.ac.uk - # engine: core - # categories: science - # shortcut: cor - # # get your API key from: https://core.ac.uk/api-keys/register/ - # api_key: 'unset' - - - name: crossref - engine: crossref - shortcut: cr - timeout: 30 - disabled: true - - - name: crowdview - engine: json_engine - shortcut: cv - categories: general - paging: false - search_url: https://crowdview-next-js.onrender.com/api/search-v3?query={query} - results_query: results - url_query: link - title_query: title - content_query: snippet - disabled: true - about: - website: https://crowdview.ai/ - - - name: yep - engine: yep - shortcut: yep - categories: general - search_type: web - disabled: true - - - name: yep images - engine: yep - shortcut: yepi - categories: images - search_type: images - disabled: true - - - name: yep news - engine: yep - shortcut: yepn - categories: news - search_type: news - disabled: true - - - name: curlie - engine: xpath - shortcut: cl - categories: general - disabled: true - paging: true - lang_all: '' - search_url: https://curlie.org/search?q={query}&lang={lang}&start={pageno}&stime=92452189 - page_size: 20 - results_xpath: //div[@id="site-list-content"]/div[@class="site-item"] - url_xpath: ./div[@class="title-and-desc"]/a/@href - title_xpath: ./div[@class="title-and-desc"]/a/div - content_xpath: ./div[@class="title-and-desc"]/div[@class="site-descr"] - about: - website: https://curlie.org/ - wikidata_id: Q60715723 - use_official_api: false - require_api_key: false - results: HTML - - - name: currency - engine: currency_convert - categories: general - shortcut: cc - - - name: bahnhof - engine: json_engine - search_url: https://www.bahnhof.de/api/stations/search/{query} - url_prefix: https://www.bahnhof.de/ - url_query: slug - title_query: name - content_query: state - shortcut: bf - disabled: true - about: - website: https://www.bahn.de - wikidata_id: Q22811603 - use_official_api: false - require_api_key: false - results: JSON - language: de - - - name: deezer - engine: deezer - shortcut: dz - disabled: true - - - name: destatis - engine: destatis - shortcut: destat - disabled: true - - - name: deviantart - engine: deviantart - shortcut: da - timeout: 3.0 - - - name: ddg definitions - engine: duckduckgo_definitions - shortcut: ddd - weight: 2 - disabled: true - tests: *tests_infobox - - # cloudflare protected - # - name: digbt - # engine: digbt - # shortcut: dbt - # timeout: 6.0 - # disabled: true - - - name: docker hub - engine: docker_hub - shortcut: dh - categories: [it, packages] - - - name: erowid - engine: xpath - paging: true - first_page_num: 0 - page_size: 30 - search_url: https://www.erowid.org/search.php?q={query}&s={pageno} - url_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/@href - title_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/text() - content_xpath: //dl[@class="results-list"]/dd[@class="result-details"] - categories: [] - shortcut: ew - disabled: true - about: - website: https://www.erowid.org/ - wikidata_id: Q1430691 - official_api_documentation: - use_official_api: false - require_api_key: false - results: HTML - - # - name: elasticsearch - # shortcut: es - # engine: elasticsearch - # base_url: http://localhost:9200 - # username: elastic - # password: changeme - # index: my-index - # # available options: match, simple_query_string, term, terms, custom - # query_type: match - # # if query_type is set to custom, provide your query here - # #custom_query_json: {"query":{"match_all": {}}} - # #show_metadata: false - # disabled: true - - - name: wikidata - engine: wikidata - shortcut: wd - timeout: 3.0 - weight: 2 - # add "list" to the array to get results in the results list - display_type: ['infobox'] - tests: *tests_infobox - categories: [general] - - - name: duckduckgo - engine: duckduckgo - shortcut: ddg - - - name: duckduckgo images - engine: duckduckgo_extra - categories: [images, web] - ddg_category: images - shortcut: ddi - disabled: true - - - name: duckduckgo videos - engine: duckduckgo_extra - categories: [videos, web] - ddg_category: videos - shortcut: ddv - disabled: true - - - name: duckduckgo news - engine: duckduckgo_extra - categories: [news, web] - ddg_category: news - shortcut: ddn - disabled: true - - - name: duckduckgo weather - engine: duckduckgo_weather - shortcut: ddw - disabled: true - - - name: apple maps - engine: apple_maps - shortcut: apm - disabled: true - timeout: 5.0 - - - name: emojipedia - engine: emojipedia - timeout: 4.0 - shortcut: em - disabled: true - - - name: tineye - engine: tineye - shortcut: tin - timeout: 9.0 - disabled: true - - - name: etymonline - engine: xpath - paging: true - search_url: https://etymonline.com/search?page={pageno}&q={query} - url_xpath: //a[contains(@class, "word__name--")]/@href - title_xpath: //a[contains(@class, "word__name--")] - content_xpath: //section[contains(@class, "word__defination")] - first_page_num: 1 - shortcut: et - categories: [dictionaries] - about: - website: https://www.etymonline.com/ - wikidata_id: Q1188617 - official_api_documentation: - use_official_api: false - require_api_key: false - results: HTML - - # - name: ebay - # engine: ebay - # shortcut: eb - # base_url: 'https://www.ebay.com' - # disabled: true - # timeout: 5 - - - name: 1x - engine: www1x - shortcut: 1x - timeout: 3.0 - disabled: true - - - name: fdroid - engine: fdroid - shortcut: fd - disabled: true - - - name: flickr - categories: images - shortcut: fl - # You can use the engine using the official stable API, but you need an API - # key, see: https://www.flickr.com/services/apps/create/ - # engine: flickr - # api_key: 'apikey' # required! - # Or you can use the html non-stable engine, activated by default - engine: flickr_noapi - - - name: free software directory - engine: mediawiki - shortcut: fsd - categories: [it, software wikis] - base_url: https://directory.fsf.org/ - search_type: title - timeout: 5.0 - disabled: true - about: - website: https://directory.fsf.org/ - wikidata_id: Q2470288 - - # - name: freesound - # engine: freesound - # shortcut: fnd - # disabled: true - # timeout: 15.0 - # API key required, see: https://freesound.org/docs/api/overview.html - # api_key: MyAPIkey - - - name: frinkiac - engine: frinkiac - shortcut: frk - disabled: true - - - name: fyyd - engine: fyyd - shortcut: fy - timeout: 8.0 - disabled: true - - - name: genius - engine: genius - shortcut: gen - - - name: gentoo - engine: gentoo - shortcut: ge - timeout: 10.0 - - - name: gitlab - engine: json_engine - paging: true - search_url: https://gitlab.com/api/v4/projects?search={query}&page={pageno} - url_query: web_url - title_query: name_with_namespace - content_query: description - page_size: 20 - categories: [it, repos] - shortcut: gl - timeout: 10.0 - disabled: true - about: - website: https://about.gitlab.com/ - wikidata_id: Q16639197 - official_api_documentation: https://docs.gitlab.com/ee/api/ - use_official_api: false - require_api_key: false - results: JSON - - - name: github - engine: github - shortcut: gh - - # This a Gitea service. If you would like to use a different instance, - # change codeberg.org to URL of the desired Gitea host. Or you can create a - # new engine by copying this and changing the name, shortcut and search_url. - - - name: codeberg - engine: json_engine - search_url: https://codeberg.org/api/v1/repos/search?q={query}&limit=10 - url_query: html_url - title_query: name - content_query: description - categories: [it, repos] - shortcut: cb - disabled: true - about: - website: https://codeberg.org/ - wikidata_id: - official_api_documentation: https://try.gitea.io/api/swagger - use_official_api: false - require_api_key: false - results: JSON - - - name: goodreads - engine: goodreads - shortcut: good - timeout: 4.0 - disabled: true - - - name: google - engine: google - shortcut: go - # additional_tests: - # android: *test_android - - - name: google images - engine: google_images - shortcut: goi - # additional_tests: - # android: *test_android - # dali: - # matrix: - # query: ['Dali Christ'] - # lang: ['en', 'de', 'fr', 'zh-CN'] - # result_container: - # - ['one_title_contains', 'Salvador'] - - - name: google news - engine: google_news - shortcut: gon - # additional_tests: - # android: *test_android - - - name: google videos - engine: google_videos - shortcut: gov - # additional_tests: - # android: *test_android - - - name: google scholar - engine: google_scholar - shortcut: gos - - - name: google play apps - engine: google_play - categories: [files, apps] - shortcut: gpa - play_categ: apps - disabled: true - - - name: google play movies - engine: google_play - categories: videos - shortcut: gpm - play_categ: movies - disabled: true - - - name: material icons - engine: material_icons - categories: images - shortcut: mi - disabled: true - - - name: gpodder - engine: json_engine - shortcut: gpod - timeout: 4.0 - paging: false - search_url: https://gpodder.net/search.json?q={query} - url_query: url - title_query: title - content_query: description - page_size: 19 - categories: music - disabled: true - about: - website: https://gpodder.net - wikidata_id: Q3093354 - official_api_documentation: https://gpoddernet.readthedocs.io/en/latest/api/ - use_official_api: false - requires_api_key: false - results: JSON - - - name: habrahabr - engine: xpath - paging: true - search_url: https://habr.com/en/search/page{pageno}/?q={query} - results_xpath: //article[contains(@class, "tm-articles-list__item")] - url_xpath: .//a[@class="tm-title__link"]/@href - title_xpath: .//a[@class="tm-title__link"] - content_xpath: .//div[contains(@class, "article-formatted-body")] - categories: it - timeout: 4.0 - disabled: true - shortcut: habr - about: - website: https://habr.com/ - wikidata_id: Q4494434 - official_api_documentation: https://habr.com/en/docs/help/api/ - use_official_api: false - require_api_key: false - results: HTML - - - name: hackernews - engine: hackernews - shortcut: hn - disabled: true - - - name: hoogle - engine: xpath - paging: true - search_url: https://hoogle.haskell.org/?hoogle={query}&start={pageno} - results_xpath: '//div[@class="result"]' - title_xpath: './/div[@class="ans"]//a' - url_xpath: './/div[@class="ans"]//a/@href' - content_xpath: './/div[@class="from"]' - page_size: 20 - categories: [it, packages] - shortcut: ho - about: - website: https://hoogle.haskell.org/ - wikidata_id: Q34010 - official_api_documentation: https://hackage.haskell.org/api - use_official_api: false - require_api_key: false - results: JSON - - - name: imdb - engine: imdb - shortcut: imdb - timeout: 6.0 - disabled: true - - - name: imgur - engine: imgur - shortcut: img - disabled: true - - - name: ina - engine: ina - shortcut: in - timeout: 6.0 - disabled: true - - - name: invidious - engine: invidious - # Instanes will be selected randomly, see https://api.invidious.io/ for - # instances that are stable (good uptime) and close to you. - base_url: - - https://invidious.io.lol - - https://invidious.fdn.fr - - https://yt.artemislena.eu - - https://invidious.tiekoetter.com - - https://invidious.flokinet.to - - https://vid.puffyan.us - - https://invidious.privacydev.net - - https://inv.tux.pizza - shortcut: iv - timeout: 3.0 - disabled: true - - - name: jisho - engine: jisho - shortcut: js - timeout: 3.0 - disabled: true - - - name: kickass - engine: kickass - base_url: - - https://kickasstorrents.to - - https://kickasstorrents.cr - - https://kickasstorrent.cr - - https://kickass.sx - - https://kat.am - shortcut: kc - timeout: 4.0 - - - name: lemmy communities - engine: lemmy - lemmy_type: Communities - shortcut: leco - - - name: lemmy users - engine: lemmy - network: lemmy communities - lemmy_type: Users - shortcut: leus - - - name: lemmy posts - engine: lemmy - network: lemmy communities - lemmy_type: Posts - shortcut: lepo - - - name: lemmy comments - engine: lemmy - network: lemmy communities - lemmy_type: Comments - shortcut: lecom - - - name: library genesis - engine: xpath - # search_url: https://libgen.is/search.php?req={query} - search_url: https://libgen.rs/search.php?req={query} - url_xpath: //a[contains(@href,"book/index.php?md5")]/@href - title_xpath: //a[contains(@href,"book/")]/text()[1] - content_xpath: //td/a[1][contains(@href,"=author")]/text() - categories: files - timeout: 7.0 - disabled: true - shortcut: lg - about: - website: https://libgen.fun/ - wikidata_id: Q22017206 - official_api_documentation: - use_official_api: false - require_api_key: false - results: HTML - - - name: z-library - engine: zlibrary - shortcut: zlib - categories: files - timeout: 7.0 - - - name: library of congress - engine: loc - shortcut: loc - categories: images - - - name: lingva - engine: lingva - shortcut: lv - # set lingva instance in url, by default it will use the official instance - # url: https://lingva.thedaviddelta.com - - - name: lobste.rs - engine: xpath - search_url: https://lobste.rs/search?utf8=%E2%9C%93&q={query}&what=stories&order=relevance - results_xpath: //li[contains(@class, "story")] - url_xpath: .//a[@class="u-url"]/@href - title_xpath: .//a[@class="u-url"] - content_xpath: .//a[@class="domain"] - categories: it - shortcut: lo - timeout: 5.0 - disabled: true - about: - website: https://lobste.rs/ - wikidata_id: Q60762874 - official_api_documentation: - use_official_api: false - require_api_key: false - results: HTML - - - name: mastodon users - engine: mastodon - mastodon_type: accounts - base_url: https://mastodon.social - shortcut: mau - - - name: mastodon hashtags - engine: mastodon - mastodon_type: hashtags - base_url: https://mastodon.social - shortcut: mah - - # - name: matrixrooms - # engine: mrs - # # https://docs.searxng.org/dev/engines/online/mrs.html - # # base_url: https://mrs-api-host - # shortcut: mtrx - # disabled: true - - - name: mdn - shortcut: mdn - engine: json_engine - categories: [it] - paging: true - search_url: https://developer.mozilla.org/api/v1/search?q={query}&page={pageno} - results_query: documents - url_query: mdn_url - url_prefix: https://developer.mozilla.org - title_query: title - content_query: summary - about: - website: https://developer.mozilla.org - wikidata_id: Q3273508 - official_api_documentation: null - use_official_api: false - require_api_key: false - results: JSON - - - name: metacpan - engine: metacpan - shortcut: cpan - disabled: true - number_of_results: 20 - - # - name: meilisearch - # engine: meilisearch - # shortcut: mes - # enable_http: true - # base_url: http://localhost:7700 - # index: my-index - - - name: mixcloud - engine: mixcloud - shortcut: mc - - # MongoDB engine - # Required dependency: pymongo - # - name: mymongo - # engine: mongodb - # shortcut: md - # exact_match_only: false - # host: '127.0.0.1' - # port: 27017 - # enable_http: true - # results_per_page: 20 - # database: 'business' - # collection: 'reviews' # name of the db collection - # key: 'name' # key in the collection to search for - - - name: mozhi - engine: mozhi - base_url: - - https://mozhi.aryak.me - - https://translate.bus-hit.me - - https://nyc1.mz.ggtyler.dev - # mozhi_engine: google - see https://mozhi.aryak.me for supported engines - timeout: 4.0 - shortcut: mz - disabled: true - - - name: mwmbl - engine: mwmbl - # api_url: https://api.mwmbl.org - shortcut: mwm - disabled: true - - - name: npm - engine: json_engine - paging: true - first_page_num: 0 - search_url: https://api.npms.io/v2/search?q={query}&size=25&from={pageno} - results_query: results - url_query: package/links/npm - title_query: package/name - content_query: package/description - page_size: 25 - categories: [it, packages] - disabled: true - timeout: 5.0 - shortcut: npm - about: - website: https://npms.io/ - wikidata_id: Q7067518 - official_api_documentation: https://api-docs.npms.io/ - use_official_api: false - require_api_key: false - results: JSON - - - name: nyaa - engine: nyaa - shortcut: nt - disabled: true - - - name: mankier - engine: json_engine - search_url: https://www.mankier.com/api/v2/mans/?q={query} - results_query: results - url_query: url - title_query: name - content_query: description - categories: it - shortcut: man - about: - website: https://www.mankier.com/ - official_api_documentation: https://www.mankier.com/api - use_official_api: true - require_api_key: false - results: JSON - - - name: odysee - engine: odysee - shortcut: od - disabled: true - - - name: openairedatasets - engine: json_engine - paging: true - search_url: https://api.openaire.eu/search/datasets?format=json&page={pageno}&size=10&title={query} - results_query: response/results/result - url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ - title_query: metadata/oaf:entity/oaf:result/title/$ - content_query: metadata/oaf:entity/oaf:result/description/$ - content_html_to_text: true - categories: 'science' - shortcut: oad - timeout: 5.0 - about: - website: https://www.openaire.eu/ - wikidata_id: Q25106053 - official_api_documentation: https://api.openaire.eu/ - use_official_api: false - require_api_key: false - results: JSON - - - name: openairepublications - engine: json_engine - paging: true - search_url: https://api.openaire.eu/search/publications?format=json&page={pageno}&size=10&title={query} - results_query: response/results/result - url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ - title_query: metadata/oaf:entity/oaf:result/title/$ - content_query: metadata/oaf:entity/oaf:result/description/$ - content_html_to_text: true - categories: science - shortcut: oap - timeout: 5.0 - about: - website: https://www.openaire.eu/ - wikidata_id: Q25106053 - official_api_documentation: https://api.openaire.eu/ - use_official_api: false - require_api_key: false - results: JSON - - # - name: opensemanticsearch - # engine: opensemantic - # shortcut: oss - # base_url: 'http://localhost:8983/solr/opensemanticsearch/' - - - name: openstreetmap - engine: openstreetmap - shortcut: osm - - - name: openrepos - engine: xpath - paging: true - search_url: https://openrepos.net/search/node/{query}?page={pageno} - url_xpath: //li[@class="search-result"]//h3[@class="title"]/a/@href - title_xpath: //li[@class="search-result"]//h3[@class="title"]/a - content_xpath: //li[@class="search-result"]//div[@class="search-snippet-info"]//p[@class="search-snippet"] - categories: files - timeout: 4.0 - disabled: true - shortcut: or - about: - website: https://openrepos.net/ - wikidata_id: - official_api_documentation: - use_official_api: false - require_api_key: false - results: HTML - - - name: packagist - engine: json_engine - paging: true - search_url: https://packagist.org/search.json?q={query}&page={pageno} - results_query: results - url_query: url - title_query: name - content_query: description - categories: [it, packages] - disabled: true - timeout: 5.0 - shortcut: pack - about: - website: https://packagist.org - wikidata_id: Q108311377 - official_api_documentation: https://packagist.org/apidoc - use_official_api: true - require_api_key: false - results: JSON - - - name: pdbe - engine: pdbe - shortcut: pdb - # Hide obsolete PDB entries. Default is not to hide obsolete structures - # hide_obsolete: false - - - name: photon - engine: photon - shortcut: ph - - - name: pinterest - engine: pinterest - shortcut: pin - - - name: piped - engine: piped - shortcut: ppd - categories: videos - piped_filter: videos - timeout: 3.0 - - # URL to use as link and for embeds - frontend_url: https://srv.piped.video - # Instance will be selected randomly, for more see https://piped-instances.kavin.rocks/ - backend_url: - - https://pipedapi.kavin.rocks - - https://pipedapi-libre.kavin.rocks - - https://pipedapi.adminforge.de - - - name: piped.music - engine: piped - network: piped - shortcut: ppdm - categories: music - piped_filter: music_songs - timeout: 3.0 - - - name: piratebay - engine: piratebay - shortcut: tpb - # You may need to change this URL to a proxy if piratebay is blocked in your - # country - url: https://thepiratebay.org/ - timeout: 3.0 - - - name: podcastindex - engine: podcastindex - shortcut: podcast - - # Required dependency: psychopg2 - # - name: postgresql - # engine: postgresql - # database: postgres - # username: postgres - # password: postgres - # limit: 10 - # query_str: 'SELECT * from my_table WHERE my_column = %(query)s' - # shortcut : psql - - - name: presearch - engine: presearch - search_type: search - categories: [general, web] - shortcut: ps - timeout: 4.0 - disabled: true - - - name: presearch images - engine: presearch - network: presearch - search_type: images - categories: [images, web] - timeout: 4.0 - shortcut: psimg - disabled: true - - - name: presearch videos - engine: presearch - network: presearch - search_type: videos - categories: [general, web] - timeout: 4.0 - shortcut: psvid - disabled: true - - - name: presearch news - engine: presearch - network: presearch - search_type: news - categories: [news, web] - timeout: 4.0 - shortcut: psnews - disabled: true - - - name: pub.dev - engine: xpath - shortcut: pd - search_url: https://pub.dev/packages?q={query}&page={pageno} - paging: true - results_xpath: //div[contains(@class,"packages-item")] - url_xpath: ./div/h3/a/@href - title_xpath: ./div/h3/a - content_xpath: ./div/div/div[contains(@class,"packages-description")]/span - categories: [packages, it] - timeout: 3.0 - disabled: true - first_page_num: 1 - about: - website: https://pub.dev/ - official_api_documentation: https://pub.dev/help/api - use_official_api: false - require_api_key: false - results: HTML - - - name: pubmed - engine: pubmed - shortcut: pub - timeout: 3.0 - - - name: pypi - shortcut: pypi - engine: xpath - paging: true - search_url: https://pypi.org/search/?q={query}&page={pageno} - results_xpath: /html/body/main/div/div/div/form/div/ul/li/a[@class="package-snippet"] - url_xpath: ./@href - title_xpath: ./h3/span[@class="package-snippet__name"] - content_xpath: ./p - suggestion_xpath: /html/body/main/div/div/div/form/div/div[@class="callout-block"]/p/span/a[@class="link"] - first_page_num: 1 - categories: [it, packages] - about: - website: https://pypi.org - wikidata_id: Q2984686 - official_api_documentation: https://warehouse.readthedocs.io/api-reference/index.html - use_official_api: false - require_api_key: false - results: HTML - - - name: qwant - qwant_categ: web - engine: qwant - shortcut: qw - categories: [general, web] - additional_tests: - rosebud: *test_rosebud - - - name: qwant news - qwant_categ: news - engine: qwant - shortcut: qwn - categories: news - network: qwant - - - name: qwant images - qwant_categ: images - engine: qwant - shortcut: qwi - categories: [images, web] - network: qwant - - - name: qwant videos - qwant_categ: videos - engine: qwant - shortcut: qwv - categories: [videos, web] - network: qwant - - # - name: library - # engine: recoll - # shortcut: lib - # base_url: 'https://recoll.example.org/' - # search_dir: '' - # mount_prefix: /export - # dl_prefix: 'https://download.example.org' - # timeout: 30.0 - # categories: files - # disabled: true - - # - name: recoll library reference - # engine: recoll - # base_url: 'https://recoll.example.org/' - # search_dir: reference - # mount_prefix: /export - # dl_prefix: 'https://download.example.org' - # shortcut: libr - # timeout: 30.0 - # categories: files - # disabled: true - - - name: radio browser - engine: radio_browser - shortcut: rb - - - name: reddit - engine: reddit - shortcut: re - page_size: 25 - - - name: rottentomatoes - engine: rottentomatoes - shortcut: rt - disabled: true - - # Required dependency: redis - # - name: myredis - # shortcut : rds - # engine: redis_server - # exact_match_only: false - # host: '127.0.0.1' - # port: 6379 - # enable_http: true - # password: '' - # db: 0 - - # tmp suspended: bad certificate - # - name: scanr structures - # shortcut: scs - # engine: scanr_structures - # disabled: true - - - name: sepiasearch - engine: sepiasearch - shortcut: sep - - - name: soundcloud - engine: soundcloud - shortcut: sc - - - name: stackoverflow - engine: stackexchange - shortcut: st - api_site: 'stackoverflow' - categories: [it, q&a] - - - name: askubuntu - engine: stackexchange - shortcut: ubuntu - api_site: 'askubuntu' - categories: [it, q&a] - - - name: internetarchivescholar - engine: internet_archive_scholar - shortcut: ias - timeout: 5.0 - - - name: superuser - engine: stackexchange - shortcut: su - api_site: 'superuser' - categories: [it, q&a] - - - name: searchcode code - engine: searchcode_code - shortcut: scc - disabled: true - - # - name: searx - # engine: searx_engine - # shortcut: se - # instance_urls : - # - http://127.0.0.1:8888/ - # - ... - # disabled: true - - - name: semantic scholar - engine: semantic_scholar - disabled: true - shortcut: se - - # Spotify needs API credentials - # - name: spotify - # engine: spotify - # shortcut: stf - # api_client_id: ******* - # api_client_secret: ******* - - # - name: solr - # engine: solr - # shortcut: slr - # base_url: http://localhost:8983 - # collection: collection_name - # sort: '' # sorting: asc or desc - # field_list: '' # comma separated list of field names to display on the UI - # default_fields: '' # default field to query - # query_fields: '' # query fields - # enable_http: true - - # - name: springer nature - # engine: springer - # # get your API key from: https://dev.springernature.com/signup - # # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601" - # api_key: 'unset' - # shortcut: springer - # timeout: 15.0 - - - name: startpage - engine: startpage - shortcut: sp - timeout: 6.0 - disabled: true - additional_tests: - rosebud: *test_rosebud - - - name: tokyotoshokan - engine: tokyotoshokan - shortcut: tt - timeout: 6.0 - disabled: true - - - name: solidtorrents - engine: solidtorrents - shortcut: solid - timeout: 4.0 - base_url: - - https://solidtorrents.to - - https://bitsearch.to - - # For this demo of the sqlite engine download: - # https://liste.mediathekview.de/filmliste-v2.db.bz2 - # and unpack into searx/data/filmliste-v2.db - # Query to test: "!demo concert" - # - # - name: demo - # engine: sqlite - # shortcut: demo - # categories: general - # result_template: default.html - # database: searx/data/filmliste-v2.db - # query_str: >- - # SELECT title || ' (' || time(duration, 'unixepoch') || ')' AS title, - # COALESCE( NULLIF(url_video_hd,''), NULLIF(url_video_sd,''), url_video) AS url, - # description AS content - # FROM film - # WHERE title LIKE :wildcard OR description LIKE :wildcard - # ORDER BY duration DESC - - - name: tagesschau - engine: tagesschau - # when set to false, display URLs from Tagesschau, and not the actual source - # (e.g. NDR, WDR, SWR, HR, ...) - use_source_url: true - shortcut: ts - disabled: true - - - name: tmdb - engine: xpath - paging: true - categories: movies - search_url: https://www.themoviedb.org/search?page={pageno}&query={query} - results_xpath: //div[contains(@class,"movie") or contains(@class,"tv")]//div[contains(@class,"card")] - url_xpath: .//div[contains(@class,"poster")]/a/@href - thumbnail_xpath: .//img/@src - title_xpath: .//div[contains(@class,"title")]//h2 - content_xpath: .//div[contains(@class,"overview")] - shortcut: tm - disabled: true - - # Requires Tor - - name: torch - engine: xpath - paging: true - search_url: http://xmh57jrknzkhv6y3ls3ubitzfqnkrwxhopf5aygthi7d6rplyvk3noyd.onion/cgi-bin/omega/omega?P={query}&DEFAULTOP=and - results_xpath: //table//tr - url_xpath: ./td[2]/a - title_xpath: ./td[2]/b - content_xpath: ./td[2]/small - categories: onions - enable_http: true - shortcut: tch - - # torznab engine lets you query any torznab compatible indexer. Using this - # engine in combination with Jackett opens the possibility to query a lot of - # public and private indexers directly from SearXNG. More details at: - # https://docs.searxng.org/dev/engines/online/torznab.html - # - # - name: Torznab EZTV - # engine: torznab - # shortcut: eztv - # base_url: http://localhost:9117/api/v2.0/indexers/eztv/results/torznab - # enable_http: true # if using localhost - # api_key: xxxxxxxxxxxxxxx - # show_magnet_links: true - # show_torrent_files: false - # # https://github.com/Jackett/Jackett/wiki/Jackett-Categories - # torznab_categories: # optional - # - 2000 - # - 5000 - - # tmp suspended - too slow, too many errors - # - name: urbandictionary - # engine : xpath - # search_url : https://www.urbandictionary.com/define.php?term={query} - # url_xpath : //*[@class="word"]/@href - # title_xpath : //*[@class="def-header"] - # content_xpath: //*[@class="meaning"] - # shortcut: ud - - - name: unsplash - engine: unsplash - shortcut: us - - - name: yandex music - engine: yandex_music - shortcut: ydm - disabled: true - # https://yandex.com/support/music/access.html - inactive: true - - - name: yahoo - engine: yahoo - shortcut: yh - disabled: true - - - name: yahoo news - engine: yahoo_news - shortcut: yhn - - - name: youtube - shortcut: yt - # You can use the engine using the official stable API, but you need an API - # key See: https://console.developers.google.com/project - # - # engine: youtube_api - # api_key: 'apikey' # required! - # - # Or you can use the html non-stable engine, activated by default - engine: youtube_noapi - - - name: dailymotion - engine: dailymotion - shortcut: dm - - - name: vimeo - engine: vimeo - shortcut: vm - - - name: wiby - engine: json_engine - paging: true - search_url: https://wiby.me/json/?q={query}&p={pageno} - url_query: URL - title_query: Title - content_query: Snippet - categories: [general, web] - shortcut: wib - disabled: true - about: - website: https://wiby.me/ - - - name: alexandria - engine: json_engine - shortcut: alx - categories: general - paging: true - search_url: https://api.alexandria.org/?a=1&q={query}&p={pageno} - results_query: results - title_query: title - url_query: url - content_query: snippet - timeout: 1.5 - disabled: true - about: - website: https://alexandria.org/ - official_api_documentation: https://github.com/alexandria-org/alexandria-api/raw/master/README.md - use_official_api: true - require_api_key: false - results: JSON - - - name: wikibooks - engine: mediawiki - weight: 0.5 - shortcut: wb - categories: [general, wikimedia] - base_url: 'https://{language}.wikibooks.org/' - search_type: text - disabled: true - about: - website: https://www.wikibooks.org/ - wikidata_id: Q367 - - - name: wikinews - engine: mediawiki - shortcut: wn - categories: [news, wikimedia] - base_url: 'https://{language}.wikinews.org/' - search_type: text - srsort: create_timestamp_desc - about: - website: https://www.wikinews.org/ - wikidata_id: Q964 - - - name: wikiquote - engine: mediawiki - weight: 0.5 - shortcut: wq - categories: [general, wikimedia] - base_url: 'https://{language}.wikiquote.org/' - search_type: text - disabled: true - additional_tests: - rosebud: *test_rosebud - about: - website: https://www.wikiquote.org/ - wikidata_id: Q369 - - - name: wikisource - engine: mediawiki - weight: 0.5 - shortcut: ws - categories: [general, wikimedia] - base_url: 'https://{language}.wikisource.org/' - search_type: text - disabled: true - about: - website: https://www.wikisource.org/ - wikidata_id: Q263 - - - name: wikispecies - engine: mediawiki - shortcut: wsp - categories: [general, science, wikimedia] - base_url: 'https://species.wikimedia.org/' - search_type: text - disabled: true - about: - website: https://species.wikimedia.org/ - wikidata_id: Q13679 - - - name: wiktionary - engine: mediawiki - shortcut: wt - categories: [dictionaries, wikimedia] - base_url: 'https://{language}.wiktionary.org/' - search_type: text - about: - website: https://www.wiktionary.org/ - wikidata_id: Q151 - - - name: wikiversity - engine: mediawiki - weight: 0.5 - shortcut: wv - categories: [general, wikimedia] - base_url: 'https://{language}.wikiversity.org/' - search_type: text - disabled: true - about: - website: https://www.wikiversity.org/ - wikidata_id: Q370 - - - name: wikivoyage - engine: mediawiki - weight: 0.5 - shortcut: wy - categories: [general, wikimedia] - base_url: 'https://{language}.wikivoyage.org/' - search_type: text - disabled: true - about: - website: https://www.wikivoyage.org/ - wikidata_id: Q373 - - - name: wikicommons.images - engine: wikicommons - shortcut: wc - categories: images - number_of_results: 10 - - name: wolframalpha - shortcut: wa - # You can use the engine using the official stable API, but you need an API - # key. See: https://products.wolframalpha.com/api/ - # - # engine: wolframalpha_api - # api_key: '' - # - # Or you can use the html non-stable engine, activated by default - engine: wolframalpha_noapi - timeout: 6.0 - categories: general disabled: false - - - name: dictzone - engine: dictzone - shortcut: dc - - - name: mymemory translated - engine: translated - shortcut: tl - timeout: 5.0 - # You can use without an API key, but you are limited to 1000 words/day - # See: https://mymemory.translated.net/doc/usagelimits.php - # api_key: '' - - # Required dependency: mysql-connector-python - # - name: mysql - # engine: mysql_server - # database: mydatabase - # username: user - # password: pass - # limit: 10 - # query_str: 'SELECT * from mytable WHERE fieldname=%(query)s' - # shortcut: mysql - - - name: 1337x - engine: 1337x - shortcut: 1337x - disabled: true - - - name: duden - engine: duden - shortcut: du - disabled: true - - - name: seznam - shortcut: szn - engine: seznam - disabled: true - - # - name: deepl - # engine: deepl - # shortcut: dpl - # # You can use the engine using the official stable API, but you need an API key - # # See: https://www.deepl.com/pro-api?cta=header-pro-api - # api_key: '' # required! - # timeout: 5.0 - # disabled: true - - - name: mojeek - shortcut: mjk - engine: xpath - paging: true - categories: [general, web] - search_url: https://www.mojeek.com/search?q={query}&s={pageno}&lang={lang}&lb={lang} - results_xpath: //ul[@class="results-standard"]/li/a[@class="ob"] - url_xpath: ./@href - title_xpath: ../h2/a - content_xpath: ..//p[@class="s"] - suggestion_xpath: //div[@class="top-info"]/p[@class="top-info spell"]/em/a - first_page_num: 0 - page_size: 10 - max_page: 100 - disabled: true - about: - website: https://www.mojeek.com/ - wikidata_id: Q60747299 - official_api_documentation: https://www.mojeek.com/services/api.html/ - use_official_api: false - require_api_key: false - results: HTML - - - name: moviepilot - engine: moviepilot - shortcut: mp - disabled: true - - - name: naver - shortcut: nvr - categories: [general, web] - engine: xpath - paging: true - search_url: https://search.naver.com/search.naver?where=webkr&sm=osp_hty&ie=UTF-8&query={query}&start={pageno} - url_xpath: //a[@class="link_tit"]/@href - title_xpath: //a[@class="link_tit"] - content_xpath: //a[@class="total_dsc"]/div - first_page_num: 1 - page_size: 10 - disabled: true - about: - website: https://www.naver.com/ - wikidata_id: Q485639 - official_api_documentation: https://developers.naver.com/docs/nmt/examples/ - use_official_api: false - require_api_key: false - results: HTML - language: ko - - - name: rubygems - shortcut: rbg - engine: xpath - paging: true - search_url: https://rubygems.org/search?page={pageno}&query={query} - results_xpath: /html/body/main/div/a[@class="gems__gem"] - url_xpath: ./@href - title_xpath: ./span/h2 - content_xpath: ./span/p - suggestion_xpath: /html/body/main/div/div[@class="search__suggestions"]/p/a - first_page_num: 1 - categories: [it, packages] - disabled: true - about: - website: https://rubygems.org/ - wikidata_id: Q1853420 - official_api_documentation: https://guides.rubygems.org/rubygems-org-api/ - use_official_api: false - require_api_key: false - results: HTML - - - name: peertube - engine: peertube - shortcut: ptb - paging: true - # alternatives see: https://instances.joinpeertube.org/instances - # base_url: https://tube.4aem.com - categories: videos - disabled: true - timeout: 6.0 - - - name: mediathekviewweb - engine: mediathekviewweb - shortcut: mvw - disabled: true - - - name: yacy - engine: yacy - categories: general - search_type: text - base_url: https://yacy.searchlab.eu - shortcut: ya - disabled: true - # required if you aren't using HTTPS for your local yacy instance - # https://docs.searxng.org/dev/engines/online/yacy.html - # enable_http: true - # timeout: 3.0 - # search_mode: 'global' - - - name: yacy images - engine: yacy - categories: images - search_type: image - base_url: https://yacy.searchlab.eu - shortcut: yai - disabled: true - - - name: rumble - engine: rumble - shortcut: ru - base_url: https://rumble.com/ - paging: true - categories: videos - disabled: true - - - name: livespace - engine: livespace - shortcut: ls - categories: videos - disabled: true - timeout: 5.0 - - - name: wordnik - engine: wordnik - shortcut: def - base_url: https://www.wordnik.com/ - categories: [dictionaries] - timeout: 5.0 - - - name: woxikon.de synonyme - engine: xpath - shortcut: woxi - categories: [dictionaries] - timeout: 5.0 - disabled: true - search_url: https://synonyme.woxikon.de/synonyme/{query}.php - url_xpath: //div[@class="upper-synonyms"]/a/@href - content_xpath: //div[@class="synonyms-list-group"] - title_xpath: //div[@class="upper-synonyms"]/a - no_result_for_http_status: [404] - about: - website: https://www.woxikon.de/ - wikidata_id: # No Wikidata ID - use_official_api: false - require_api_key: false - results: HTML - language: de - - - name: seekr news - engine: seekr - shortcut: senews - categories: news - seekr_category: news - disabled: true - - - name: seekr images - engine: seekr - network: seekr news - shortcut: seimg - categories: images - seekr_category: images - disabled: true - - - name: seekr videos - engine: seekr - network: seekr news - shortcut: sevid - categories: videos - seekr_category: videos - disabled: true - - - name: sjp.pwn - engine: sjp - shortcut: sjp - base_url: https://sjp.pwn.pl/ - timeout: 5.0 - disabled: true - - - name: stract - engine: stract - shortcut: str - disabled: true - - - name: svgrepo - engine: svgrepo - shortcut: svg - timeout: 10.0 - disabled: true - - - name: tootfinder - engine: tootfinder - shortcut: toot - - - name: wallhaven - engine: wallhaven - # api_key: abcdefghijklmnopqrstuvwxyz - shortcut: wh - - # wikimini: online encyclopedia for children - # The fulltext and title parameter is necessary for Wikimini because - # sometimes it will not show the results and redirect instead - - name: wikimini - engine: xpath - shortcut: wkmn - search_url: https://fr.wikimini.org/w/index.php?search={query}&title=Sp%C3%A9cial%3ASearch&fulltext=Search - url_xpath: //li/div[@class="mw-search-result-heading"]/a/@href - title_xpath: //li//div[@class="mw-search-result-heading"]/a - content_xpath: //li/div[@class="searchresult"] - categories: general - disabled: true - about: - website: https://wikimini.org/ - wikidata_id: Q3568032 - use_official_api: false - require_api_key: false - results: HTML - language: fr - - - name: wttr.in - engine: wttr - shortcut: wttr - timeout: 9.0 - - - name: yummly - engine: yummly - shortcut: yum - disabled: true - - - name: brave - engine: brave - shortcut: br - time_range_support: true - paging: true - categories: [general, web] - brave_category: search - # brave_spellcheck: true - - - name: brave.images - engine: brave - network: brave - shortcut: brimg - categories: [images, web] - brave_category: images - - - name: brave.videos - engine: brave - network: brave - shortcut: brvid - categories: [videos, web] - brave_category: videos - - - name: brave.news - engine: brave - network: brave - shortcut: brnews - categories: news - brave_category: news - - # - name: brave.goggles - # engine: brave - # network: brave - # shortcut: brgog - # time_range_support: true - # paging: true - # categories: [general, web] - # brave_category: goggles - # Goggles: # required! This should be a URL ending in .goggle - - - name: lib.rs - shortcut: lrs - engine: xpath - search_url: https://lib.rs/search?q={query} - results_xpath: /html/body/main/div/ol/li/a - url_xpath: ./@href - title_xpath: ./div[@class="h"]/h4 - content_xpath: ./div[@class="h"]/p - categories: [it, packages] - disabled: true - about: - website: https://lib.rs - wikidata_id: Q113486010 - use_official_api: false - require_api_key: false - results: HTML - - - name: sourcehut - shortcut: srht - engine: xpath - paging: true - search_url: https://sr.ht/projects?page={pageno}&search={query} - results_xpath: (//div[@class="event-list"])[1]/div[@class="event"] - url_xpath: ./h4/a[2]/@href - title_xpath: ./h4/a[2] - content_xpath: ./p - first_page_num: 1 - categories: [it, repos] - disabled: true - about: - website: https://sr.ht - wikidata_id: Q78514485 - official_api_documentation: https://man.sr.ht/ - use_official_api: false - require_api_key: false - results: HTML - - - name: goo - shortcut: goo - engine: xpath - paging: true - search_url: https://search.goo.ne.jp/web.jsp?MT={query}&FR={pageno}0 - url_xpath: //div[@class="result"]/p[@class='title fsL1']/a/@href - title_xpath: //div[@class="result"]/p[@class='title fsL1']/a - content_xpath: //p[contains(@class,'url fsM')]/following-sibling::p - first_page_num: 0 - categories: [general, web] - disabled: true - timeout: 4.0 - about: - website: https://search.goo.ne.jp - wikidata_id: Q249044 - use_official_api: false - require_api_key: false - results: HTML - language: ja - - - name: bt4g - engine: bt4g - shortcut: bt4g - - - name: pkg.go.dev - engine: xpath - shortcut: pgo - search_url: https://pkg.go.dev/search?limit=100&m=package&q={query} - results_xpath: /html/body/main/div[contains(@class,"SearchResults")]/div[not(@class)]/div[@class="SearchSnippet"] - url_xpath: ./div[@class="SearchSnippet-headerContainer"]/h2/a/@href - title_xpath: ./div[@class="SearchSnippet-headerContainer"]/h2/a - content_xpath: ./p[@class="SearchSnippet-synopsis"] - categories: [packages, it] - timeout: 3.0 - disabled: true - about: - website: https://pkg.go.dev/ - use_official_api: false - require_api_key: false - results: HTML - -# Doku engine lets you access to any Doku wiki instance: -# A public one or a privete/corporate one. -# - name: ubuntuwiki -# engine: doku -# shortcut: uw -# base_url: 'https://doc.ubuntu-fr.org' - -# Be careful when enabling this engine if you are -# running a public instance. Do not expose any sensitive -# information. You can restrict access by configuring a list -# of access tokens under tokens. -# - name: git grep -# engine: command -# command: ['git', 'grep', '{{QUERY}}'] -# shortcut: gg -# tokens: [] -# disabled: true -# delimiter: -# chars: ':' -# keys: ['filepath', 'code'] - -# Be careful when enabling this engine if you are -# running a public instance. Do not expose any sensitive -# information. You can restrict access by configuring a list -# of access tokens under tokens. -# - name: locate -# engine: command -# command: ['locate', '{{QUERY}}'] -# shortcut: loc -# tokens: [] -# disabled: true -# delimiter: -# chars: ' ' -# keys: ['line'] - -# Be careful when enabling this engine if you are -# running a public instance. Do not expose any sensitive -# information. You can restrict access by configuring a list -# of access tokens under tokens. -# - name: find -# engine: command -# command: ['find', '.', '-name', '{{QUERY}}'] -# query_type: path -# shortcut: fnd -# tokens: [] -# disabled: true -# delimiter: -# chars: ' ' -# keys: ['line'] - -# Be careful when enabling this engine if you are -# running a public instance. Do not expose any sensitive -# information. You can restrict access by configuring a list -# of access tokens under tokens. -# - name: pattern search in files -# engine: command -# command: ['fgrep', '{{QUERY}}'] -# shortcut: fgr -# tokens: [] -# disabled: true -# delimiter: -# chars: ' ' -# keys: ['line'] - -# Be careful when enabling this engine if you are -# running a public instance. Do not expose any sensitive -# information. You can restrict access by configuring a list -# of access tokens under tokens. -# - name: regex search in files -# engine: command -# command: ['grep', '{{QUERY}}'] -# shortcut: gr -# tokens: [] -# disabled: true -# delimiter: -# chars: ' ' -# keys: ['line'] - -doi_resolvers: - oadoi.org: 'https://oadoi.org/' - doi.org: 'https://doi.org/' - doai.io: 'https://dissem.in/' - sci-hub.se: 'https://sci-hub.se/' - sci-hub.st: 'https://sci-hub.st/' - sci-hub.ru: 'https://sci-hub.ru/' - -default_doi_resolver: 'oadoi.org' diff --git a/src/agents/academicSearchAgent.ts b/src/agents/academicSearchAgent.ts deleted file mode 100644 index d797119..0000000 --- a/src/agents/academicSearchAgent.ts +++ /dev/null @@ -1,265 +0,0 @@ -import { BaseMessage } from '@langchain/core/messages'; -import { - PromptTemplate, - ChatPromptTemplate, - MessagesPlaceholder, -} from '@langchain/core/prompts'; -import { - RunnableSequence, - RunnableMap, - RunnableLambda, -} from '@langchain/core/runnables'; -import { StringOutputParser } from '@langchain/core/output_parsers'; -import { Document } from '@langchain/core/documents'; -import { searchSearxng } from '../lib/searxng'; -import type { StreamEvent } from '@langchain/core/tracers/log_stream'; -import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import type { Embeddings } from '@langchain/core/embeddings'; -import formatChatHistoryAsString from '../utils/formatHistory'; -import eventEmitter from 'events'; -import computeSimilarity from '../utils/computeSimilarity'; -import logger from '../utils/logger'; - -const basicAcademicSearchRetrieverPrompt = ` -You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. -If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response. - -Example: -1. Follow up question: How does stable diffusion work? -Rephrased: Stable diffusion working - -2. Follow up question: What is linear algebra? -Rephrased: Linear algebra - -3. Follow up question: What is the third law of thermodynamics? -Rephrased: Third law of thermodynamics - -Conversation: -{chat_history} - -Follow up question: {query} -Rephrased question: -`; - -const basicAcademicSearchResponsePrompt = ` - You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are set on focus mode 'Academic', this means you will be searching for academic papers and articles on the web. - - Generate a response that is informative and relevant to the user's query based on provided context (the context consits of search results containing a brief description of the content of that page). - You must use this context to answer the user's query in the best way possible. Use an unbaised and journalistic tone in your response. Do not repeat the text. - You must not tell the user to open any link or visit any website to get the answer. You must provide the answer in the response itself. If the user asks for links you can provide them. - Your responses should be medium to long in length be informative and relevant to the user's query. You can use markdowns to format your response. You should use bullet points to list the information. Make sure the answer is not short and is informative. - You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from. - Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2]. - However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer. - - Anything inside the following \`context\` HTML block provided below is for your knowledge returned by the search engine and is not shared by the user. You have to answer question on the basis of it and cite the relevant information from it but you do not have to - talk about the context in your response. - - - {context} - - - If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'. - Anything between the \`context\` is retrieved from a search engine and is not a part of the conversation with the user. Today's date is ${new Date().toISOString()} -`; - -const strParser = new StringOutputParser(); - -const handleStream = async ( - stream: AsyncGenerator, - emitter: eventEmitter, -) => { - for await (const event of stream) { - if ( - event.event === 'on_chain_end' && - event.name === 'FinalSourceRetriever' - ) { - emitter.emit( - 'data', - JSON.stringify({ type: 'sources', data: event.data.output }), - ); - } - if ( - event.event === 'on_chain_stream' && - event.name === 'FinalResponseGenerator' - ) { - emitter.emit( - 'data', - JSON.stringify({ type: 'response', data: event.data.chunk }), - ); - } - if ( - event.event === 'on_chain_end' && - event.name === 'FinalResponseGenerator' - ) { - emitter.emit('end'); - } - } -}; - -type BasicChainInput = { - chat_history: BaseMessage[]; - query: string; -}; - -const createBasicAcademicSearchRetrieverChain = (llm: BaseChatModel) => { - return RunnableSequence.from([ - PromptTemplate.fromTemplate(basicAcademicSearchRetrieverPrompt), - llm, - strParser, - RunnableLambda.from(async (input: string) => { - if (input === 'not_needed') { - return { query: '', docs: [] }; - } - - const res = await searchSearxng(input, { - language: 'en', - engines: [ - 'arxiv', - 'google scholar', - 'internetarchivescholar', - 'pubmed', - ], - }); - - const documents = res.results.map( - (result) => - new Document({ - pageContent: result.content, - metadata: { - title: result.title, - url: result.url, - ...(result.img_src && { img_src: result.img_src }), - }, - }), - ); - - return { query: input, docs: documents }; - }), - ]); -}; - -const createBasicAcademicSearchAnsweringChain = ( - llm: BaseChatModel, - embeddings: Embeddings, -) => { - const basicAcademicSearchRetrieverChain = - createBasicAcademicSearchRetrieverChain(llm); - - const processDocs = async (docs: Document[]) => { - return docs - .map((_, index) => `${index + 1}. ${docs[index].pageContent}`) - .join('\n'); - }; - - const rerankDocs = async ({ - query, - docs, - }: { - query: string; - docs: Document[]; - }) => { - if (docs.length === 0) { - return docs; - } - - const docsWithContent = docs.filter( - (doc) => doc.pageContent && doc.pageContent.length > 0, - ); - - const [docEmbeddings, queryEmbedding] = await Promise.all([ - embeddings.embedDocuments(docsWithContent.map((doc) => doc.pageContent)), - embeddings.embedQuery(query), - ]); - - const similarity = docEmbeddings.map((docEmbedding, i) => { - const sim = computeSimilarity(queryEmbedding, docEmbedding); - - return { - index: i, - similarity: sim, - }; - }); - - const sortedDocs = similarity - .sort((a, b) => b.similarity - a.similarity) - .slice(0, 15) - .map((sim) => docsWithContent[sim.index]); - - return sortedDocs; - }; - - return RunnableSequence.from([ - RunnableMap.from({ - query: (input: BasicChainInput) => input.query, - chat_history: (input: BasicChainInput) => input.chat_history, - context: RunnableSequence.from([ - (input) => ({ - query: input.query, - chat_history: formatChatHistoryAsString(input.chat_history), - }), - basicAcademicSearchRetrieverChain - .pipe(rerankDocs) - .withConfig({ - runName: 'FinalSourceRetriever', - }) - .pipe(processDocs), - ]), - }), - ChatPromptTemplate.fromMessages([ - ['system', basicAcademicSearchResponsePrompt], - new MessagesPlaceholder('chat_history'), - ['user', '{query}'], - ]), - llm, - strParser, - ]).withConfig({ - runName: 'FinalResponseGenerator', - }); -}; - -const basicAcademicSearch = ( - query: string, - history: BaseMessage[], - llm: BaseChatModel, - embeddings: Embeddings, -) => { - const emitter = new eventEmitter(); - - try { - const basicAcademicSearchAnsweringChain = - createBasicAcademicSearchAnsweringChain(llm, embeddings); - - const stream = basicAcademicSearchAnsweringChain.streamEvents( - { - chat_history: history, - query: query, - }, - { - version: 'v1', - }, - ); - - handleStream(stream, emitter); - } catch (err) { - emitter.emit( - 'error', - JSON.stringify({ data: 'An error has occurred please try again later' }), - ); - logger.error(`Error in academic search: ${err}`); - } - - return emitter; -}; - -const handleAcademicSearch = ( - message: string, - history: BaseMessage[], - llm: BaseChatModel, - embeddings: Embeddings, -) => { - const emitter = basicAcademicSearch(message, history, llm, embeddings); - return emitter; -}; - -export default handleAcademicSearch; diff --git a/src/agents/redditSearchAgent.ts b/src/agents/redditSearchAgent.ts deleted file mode 100644 index 578e2bb..0000000 --- a/src/agents/redditSearchAgent.ts +++ /dev/null @@ -1,260 +0,0 @@ -import { BaseMessage } from '@langchain/core/messages'; -import { - PromptTemplate, - ChatPromptTemplate, - MessagesPlaceholder, -} from '@langchain/core/prompts'; -import { - RunnableSequence, - RunnableMap, - RunnableLambda, -} from '@langchain/core/runnables'; -import { StringOutputParser } from '@langchain/core/output_parsers'; -import { Document } from '@langchain/core/documents'; -import { searchSearxng } from '../lib/searxng'; -import type { StreamEvent } from '@langchain/core/tracers/log_stream'; -import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import type { Embeddings } from '@langchain/core/embeddings'; -import formatChatHistoryAsString from '../utils/formatHistory'; -import eventEmitter from 'events'; -import computeSimilarity from '../utils/computeSimilarity'; -import logger from '../utils/logger'; - -const basicRedditSearchRetrieverPrompt = ` -You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. -If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response. - -Example: -1. Follow up question: Which company is most likely to create an AGI -Rephrased: Which company is most likely to create an AGI - -2. Follow up question: Is Earth flat? -Rephrased: Is Earth flat? - -3. Follow up question: Is there life on Mars? -Rephrased: Is there life on Mars? - -Conversation: -{chat_history} - -Follow up question: {query} -Rephrased question: -`; - -const basicRedditSearchResponsePrompt = ` - You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are set on focus mode 'Reddit', this means you will be searching for information, opinions and discussions on the web using Reddit. - - Generate a response that is informative and relevant to the user's query based on provided context (the context consits of search results containing a brief description of the content of that page). - You must use this context to answer the user's query in the best way possible. Use an unbaised and journalistic tone in your response. Do not repeat the text. - You must not tell the user to open any link or visit any website to get the answer. You must provide the answer in the response itself. If the user asks for links you can provide them. - Your responses should be medium to long in length be informative and relevant to the user's query. You can use markdowns to format your response. You should use bullet points to list the information. Make sure the answer is not short and is informative. - You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from. - Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2]. - However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer. - - Anything inside the following \`context\` HTML block provided below is for your knowledge returned by Reddit and is not shared by the user. You have to answer question on the basis of it and cite the relevant information from it but you do not have to - talk about the context in your response. - - - {context} - - - If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'. - Anything between the \`context\` is retrieved from Reddit and is not a part of the conversation with the user. Today's date is ${new Date().toISOString()} -`; - -const strParser = new StringOutputParser(); - -const handleStream = async ( - stream: AsyncGenerator, - emitter: eventEmitter, -) => { - for await (const event of stream) { - if ( - event.event === 'on_chain_end' && - event.name === 'FinalSourceRetriever' - ) { - emitter.emit( - 'data', - JSON.stringify({ type: 'sources', data: event.data.output }), - ); - } - if ( - event.event === 'on_chain_stream' && - event.name === 'FinalResponseGenerator' - ) { - emitter.emit( - 'data', - JSON.stringify({ type: 'response', data: event.data.chunk }), - ); - } - if ( - event.event === 'on_chain_end' && - event.name === 'FinalResponseGenerator' - ) { - emitter.emit('end'); - } - } -}; - -type BasicChainInput = { - chat_history: BaseMessage[]; - query: string; -}; - -const createBasicRedditSearchRetrieverChain = (llm: BaseChatModel) => { - return RunnableSequence.from([ - PromptTemplate.fromTemplate(basicRedditSearchRetrieverPrompt), - llm, - strParser, - RunnableLambda.from(async (input: string) => { - if (input === 'not_needed') { - return { query: '', docs: [] }; - } - - const res = await searchSearxng(input, { - language: 'en', - engines: ['reddit'], - }); - - const documents = res.results.map( - (result) => - new Document({ - pageContent: result.content ? result.content : result.title, - metadata: { - title: result.title, - url: result.url, - ...(result.img_src && { img_src: result.img_src }), - }, - }), - ); - - return { query: input, docs: documents }; - }), - ]); -}; - -const createBasicRedditSearchAnsweringChain = ( - llm: BaseChatModel, - embeddings: Embeddings, -) => { - const basicRedditSearchRetrieverChain = - createBasicRedditSearchRetrieverChain(llm); - - const processDocs = async (docs: Document[]) => { - return docs - .map((_, index) => `${index + 1}. ${docs[index].pageContent}`) - .join('\n'); - }; - - const rerankDocs = async ({ - query, - docs, - }: { - query: string; - docs: Document[]; - }) => { - if (docs.length === 0) { - return docs; - } - - const docsWithContent = docs.filter( - (doc) => doc.pageContent && doc.pageContent.length > 0, - ); - - const [docEmbeddings, queryEmbedding] = await Promise.all([ - embeddings.embedDocuments(docsWithContent.map((doc) => doc.pageContent)), - embeddings.embedQuery(query), - ]); - - const similarity = docEmbeddings.map((docEmbedding, i) => { - const sim = computeSimilarity(queryEmbedding, docEmbedding); - - return { - index: i, - similarity: sim, - }; - }); - - const sortedDocs = similarity - .filter((sim) => sim.similarity > 0.3) - .sort((a, b) => b.similarity - a.similarity) - .slice(0, 15) - .map((sim) => docsWithContent[sim.index]); - - return sortedDocs; - }; - - return RunnableSequence.from([ - RunnableMap.from({ - query: (input: BasicChainInput) => input.query, - chat_history: (input: BasicChainInput) => input.chat_history, - context: RunnableSequence.from([ - (input) => ({ - query: input.query, - chat_history: formatChatHistoryAsString(input.chat_history), - }), - basicRedditSearchRetrieverChain - .pipe(rerankDocs) - .withConfig({ - runName: 'FinalSourceRetriever', - }) - .pipe(processDocs), - ]), - }), - ChatPromptTemplate.fromMessages([ - ['system', basicRedditSearchResponsePrompt], - new MessagesPlaceholder('chat_history'), - ['user', '{query}'], - ]), - llm, - strParser, - ]).withConfig({ - runName: 'FinalResponseGenerator', - }); -}; - -const basicRedditSearch = ( - query: string, - history: BaseMessage[], - llm: BaseChatModel, - embeddings: Embeddings, -) => { - const emitter = new eventEmitter(); - - try { - const basicRedditSearchAnsweringChain = - createBasicRedditSearchAnsweringChain(llm, embeddings); - const stream = basicRedditSearchAnsweringChain.streamEvents( - { - chat_history: history, - query: query, - }, - { - version: 'v1', - }, - ); - - handleStream(stream, emitter); - } catch (err) { - emitter.emit( - 'error', - JSON.stringify({ data: 'An error has occurred please try again later' }), - ); - logger.error(`Error in RedditSearch: ${err}`); - } - - return emitter; -}; - -const handleRedditSearch = ( - message: string, - history: BaseMessage[], - llm: BaseChatModel, - embeddings: Embeddings, -) => { - const emitter = basicRedditSearch(message, history, llm, embeddings); - return emitter; -}; - -export default handleRedditSearch; diff --git a/src/agents/videoSearchAgent.ts b/src/agents/videoSearchAgent.ts deleted file mode 100644 index cdd1ac0..0000000 --- a/src/agents/videoSearchAgent.ts +++ /dev/null @@ -1,90 +0,0 @@ -import { - RunnableSequence, - RunnableMap, - RunnableLambda, -} from '@langchain/core/runnables'; -import { PromptTemplate } from '@langchain/core/prompts'; -import formatChatHistoryAsString from '../utils/formatHistory'; -import { BaseMessage } from '@langchain/core/messages'; -import { StringOutputParser } from '@langchain/core/output_parsers'; -import { searchSearxng } from '../lib/searxng'; -import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; - -const VideoSearchChainPrompt = ` - You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search Youtube for videos. - You need to make sure the rephrased question agrees with the conversation and is relevant to the conversation. - - Example: - 1. Follow up question: How does a car work? - Rephrased: How does a car work? - - 2. Follow up question: What is the theory of relativity? - Rephrased: What is theory of relativity - - 3. Follow up question: How does an AC work? - Rephrased: How does an AC work - - Conversation: - {chat_history} - - Follow up question: {query} - Rephrased question: - `; - -type VideoSearchChainInput = { - chat_history: BaseMessage[]; - query: string; -}; - -const strParser = new StringOutputParser(); - -const createVideoSearchChain = (llm: BaseChatModel) => { - return RunnableSequence.from([ - RunnableMap.from({ - chat_history: (input: VideoSearchChainInput) => { - return formatChatHistoryAsString(input.chat_history); - }, - query: (input: VideoSearchChainInput) => { - return input.query; - }, - }), - PromptTemplate.fromTemplate(VideoSearchChainPrompt), - llm, - strParser, - RunnableLambda.from(async (input: string) => { - const res = await searchSearxng(input, { - engines: ['youtube'], - }); - - const videos = []; - - res.results.forEach((result) => { - if ( - result.thumbnail && - result.url && - result.title && - result.iframe_src - ) { - videos.push({ - img_src: result.thumbnail, - url: result.url, - title: result.title, - iframe_src: result.iframe_src, - }); - } - }); - - return videos.slice(0, 10); - }), - ]); -}; - -const handleVideoSearch = ( - input: VideoSearchChainInput, - llm: BaseChatModel, -) => { - const VideoSearchChain = createVideoSearchChain(llm); - return VideoSearchChain.invoke(input); -}; - -export default handleVideoSearch; diff --git a/src/agents/webSearchAgent.ts b/src/agents/webSearchAgent.ts deleted file mode 100644 index 8d0ffb1..0000000 --- a/src/agents/webSearchAgent.ts +++ /dev/null @@ -1,371 +0,0 @@ -import { BaseMessage } from '@langchain/core/messages'; -import { - PromptTemplate, - ChatPromptTemplate, - MessagesPlaceholder, -} from '@langchain/core/prompts'; -import { - RunnableSequence, - RunnableMap, - RunnableLambda, -} from '@langchain/core/runnables'; -import { StringOutputParser } from '@langchain/core/output_parsers'; -import { Document } from '@langchain/core/documents'; -import { searchSearxng } from '../lib/searxng'; -import type { StreamEvent } from '@langchain/core/tracers/log_stream'; -import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import type { Embeddings } from '@langchain/core/embeddings'; -import formatChatHistoryAsString from '../utils/formatHistory'; -import eventEmitter from 'events'; -import computeSimilarity from '../utils/computeSimilarity'; -import logger from '../utils/logger'; -import LineListOutputParser from '../lib/outputParsers/listLineOutputParser'; -import { getDocumentsFromLinks } from '../lib/linkDocument'; -import LineOutputParser from '../lib/outputParsers/lineOutputParser'; - -const basicSearchRetrieverPrompt = ` -You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. -If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response. -If the question contains some links and asks to answer from those links or even if they don't you need to return the links inside 'links' XML block and the question inside 'question' XML block. If there are no links then you need to return the question without any XML block. -If the user asks to summarrize the content from some links you need to return \`Summarize\` as the question inside the 'question' XML block and the links inside the 'links' XML block. - -Example: -1. Follow up question: What is the capital of France? -Rephrased question: \`Capital of france\` - -2. Follow up question: What is the population of New York City? -Rephrased question: \`Population of New York City\` - -3. Follow up question: What is Docker? -Rephrased question: \`What is Docker\` - -4. Follow up question: Can you tell me what is X from https://example.com -Rephrased question: \` - -Can you tell me what is X? - - - -https://example.com - -\` - -5. Follow up question: Summarize the content from https://example.com -Rephrased question: \` - -Summarize - - - -https://example.com - -\` - -Conversation: -{chat_history} - -Follow up question: {query} -Rephrased question: -`; - -const basicWebSearchResponsePrompt = ` - You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are also an expert at summarizing web pages or documents and searching for content in them. - - Generate a response that is informative and relevant to the user's query based on provided context (the context consits of search results containing a brief description of the content of that page). - You must use this context to answer the user's query in the best way possible. Use an unbaised and journalistic tone in your response. Do not repeat the text. - You must not tell the user to open any link or visit any website to get the answer. You must provide the answer in the response itself. If the user asks for links you can provide them. - If the query contains some links and the user asks to answer from those links you will be provided the entire content of the page inside the \`context\` XML block. You can then use this content to answer the user's query. - If the user asks to summarize content from some links, you will be provided the entire content of the page inside the \`context\` XML block. You can then use this content to summarize the text. The content provided inside the \`context\` block will be already summarized by another model so you just need to use that content to answer the user's query. - Your responses should be medium to long in length be informative and relevant to the user's query. You can use markdowns to format your response. You should use bullet points to list the information. Make sure the answer is not short and is informative. - You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from. - Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2]. - However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer. - - Anything inside the following \`context\` HTML block provided below is for your knowledge returned by the search engine and is not shared by the user. You have to answer question on the basis of it and cite the relevant information from it but you do not have to - talk about the context in your response. - - - {context} - - - If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'. You do not need to do this for summarization tasks. - Anything between the \`context\` is retrieved from a search engine and is not a part of the conversation with the user. Today's date is ${new Date().toISOString()} -`; - -const strParser = new StringOutputParser(); - -const handleStream = async ( - stream: AsyncGenerator, - emitter: eventEmitter, -) => { - for await (const event of stream) { - if ( - event.event === 'on_chain_end' && - event.name === 'FinalSourceRetriever' - ) { - emitter.emit( - 'data', - JSON.stringify({ type: 'sources', data: event.data.output }), - ); - } - if ( - event.event === 'on_chain_stream' && - event.name === 'FinalResponseGenerator' - ) { - emitter.emit( - 'data', - JSON.stringify({ type: 'response', data: event.data.chunk }), - ); - } - if ( - event.event === 'on_chain_end' && - event.name === 'FinalResponseGenerator' - ) { - emitter.emit('end'); - } - } -}; - -type BasicChainInput = { - chat_history: BaseMessage[]; - query: string; -}; - -const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => { - return RunnableSequence.from([ - PromptTemplate.fromTemplate(basicSearchRetrieverPrompt), - llm, - strParser, - RunnableLambda.from(async (input: string) => { - if (input === 'not_needed') { - return { query: '', docs: [] }; - } - - const linksOutputParser = new LineListOutputParser({ - key: 'links', - }); - - const questionOutputParser = new LineOutputParser({ - key: 'question', - }); - - const links = await linksOutputParser.parse(input); - let question = await questionOutputParser.parse(input); - - if (links.length > 0) { - if (question.length === 0) { - question = 'Summarize'; - } - - let docs = [] - - const linkDocs = await getDocumentsFromLinks({ links }); - - const docGroups: Document[] = []; - - linkDocs.map((doc) => { - const URLDocExists = docGroups.find((d) => d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10); - - if (!URLDocExists) { - docGroups.push({ - ...doc, - metadata: { - ...doc.metadata, - totalDocs: 1 - } - }); - } - - const docIndex = docGroups.findIndex((d) => d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10); - - if (docIndex !== -1) { - docGroups[docIndex].pageContent = docGroups[docIndex].pageContent + `\n\n` + doc.pageContent; - docGroups[docIndex].metadata.totalDocs += 1; - } - }) - - await Promise.all(docGroups.map(async (doc) => { - const res = await llm.invoke(` - You are a text summarizer. You need to summarize the text provided inside the \`text\` XML block. - You need to summarize the text into 1 or 2 sentences capturing the main idea of the text. - You need to make sure that you don't miss any point while summarizing the text. - You will also be given a \`query\` XML block which will contain the query of the user. Try to answer the query in the summary from the text provided. - If the query says Summarize then you just need to summarize the text without answering the query. - Only return the summarized text without any other messages, text or XML block. - - - ${question} - - - - ${doc.pageContent} - - - Make sure to answer the query in the summary. - `); - - const document = new Document({ - pageContent: res.content as string, - metadata: { - title: doc.metadata.title, - url: doc.metadata.url, - }, - }) - - docs.push(document) - })) - - return { query: question, docs: docs }; - } else { - const res = await searchSearxng(input, { - language: 'en', - }); - - const documents = res.results.map( - (result) => - new Document({ - pageContent: result.content, - metadata: { - title: result.title, - url: result.url, - ...(result.img_src && { img_src: result.img_src }), - }, - }), - ); - - return { query: input, docs: documents }; - } - }), - ]); -}; - -const createBasicWebSearchAnsweringChain = ( - llm: BaseChatModel, - embeddings: Embeddings, -) => { - const basicWebSearchRetrieverChain = createBasicWebSearchRetrieverChain(llm); - - const processDocs = async (docs: Document[]) => { - return docs - .map((_, index) => `${index + 1}. ${docs[index].pageContent}`) - .join('\n'); - }; - - const rerankDocs = async ({ - query, - docs, - }: { - query: string; - docs: Document[]; - }) => { - if (docs.length === 0) { - return docs; - } - - if (query === 'Summarize') { - return docs; - } - - const docsWithContent = docs.filter( - (doc) => doc.pageContent && doc.pageContent.length > 0, - ); - - const [docEmbeddings, queryEmbedding] = await Promise.all([ - embeddings.embedDocuments(docsWithContent.map((doc) => doc.pageContent)), - embeddings.embedQuery(query), - ]); - - const similarity = docEmbeddings.map((docEmbedding, i) => { - const sim = computeSimilarity(queryEmbedding, docEmbedding); - - return { - index: i, - similarity: sim, - }; - }); - - const sortedDocs = similarity - .filter((sim) => sim.similarity > 0.5) - .sort((a, b) => b.similarity - a.similarity) - .slice(0, 15) - .map((sim) => docsWithContent[sim.index]); - - return sortedDocs; - }; - - return RunnableSequence.from([ - RunnableMap.from({ - query: (input: BasicChainInput) => input.query, - chat_history: (input: BasicChainInput) => input.chat_history, - context: RunnableSequence.from([ - (input) => ({ - query: input.query, - chat_history: formatChatHistoryAsString(input.chat_history), - }), - basicWebSearchRetrieverChain - .pipe(rerankDocs) - .withConfig({ - runName: 'FinalSourceRetriever', - }) - .pipe(processDocs), - ]), - }), - ChatPromptTemplate.fromMessages([ - ['system', basicWebSearchResponsePrompt], - new MessagesPlaceholder('chat_history'), - ['user', '{query}'], - ]), - llm, - strParser, - ]).withConfig({ - runName: 'FinalResponseGenerator', - }); -}; - -const basicWebSearch = ( - query: string, - history: BaseMessage[], - llm: BaseChatModel, - embeddings: Embeddings, -) => { - const emitter = new eventEmitter(); - - try { - const basicWebSearchAnsweringChain = createBasicWebSearchAnsweringChain( - llm, - embeddings, - ); - - const stream = basicWebSearchAnsweringChain.streamEvents( - { - chat_history: history, - query: query, - }, - { - version: 'v1', - }, - ); - - handleStream(stream, emitter); - } catch (err) { - emitter.emit( - 'error', - JSON.stringify({ data: 'An error has occurred please try again later' }), - ); - logger.error(`Error in websearch: ${err}`); - } - - return emitter; -}; - -const handleWebSearch = ( - message: string, - history: BaseMessage[], - llm: BaseChatModel, - embeddings: Embeddings, -) => { - const emitter = basicWebSearch(message, history, llm, embeddings); - return emitter; -}; - -export default handleWebSearch; diff --git a/src/agents/wolframAlphaSearchAgent.ts b/src/agents/wolframAlphaSearchAgent.ts deleted file mode 100644 index b80fcf3..0000000 --- a/src/agents/wolframAlphaSearchAgent.ts +++ /dev/null @@ -1,219 +0,0 @@ -import { BaseMessage } from '@langchain/core/messages'; -import { - PromptTemplate, - ChatPromptTemplate, - MessagesPlaceholder, -} from '@langchain/core/prompts'; -import { - RunnableSequence, - RunnableMap, - RunnableLambda, -} from '@langchain/core/runnables'; -import { StringOutputParser } from '@langchain/core/output_parsers'; -import { Document } from '@langchain/core/documents'; -import { searchSearxng } from '../lib/searxng'; -import type { StreamEvent } from '@langchain/core/tracers/log_stream'; -import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import type { Embeddings } from '@langchain/core/embeddings'; -import formatChatHistoryAsString from '../utils/formatHistory'; -import eventEmitter from 'events'; -import logger from '../utils/logger'; - -const basicWolframAlphaSearchRetrieverPrompt = ` -You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. -If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response. - -Example: -1. Follow up question: What is the atomic radius of S? -Rephrased: Atomic radius of S - -2. Follow up question: What is linear algebra? -Rephrased: Linear algebra - -3. Follow up question: What is the third law of thermodynamics? -Rephrased: Third law of thermodynamics - -Conversation: -{chat_history} - -Follow up question: {query} -Rephrased question: -`; - -const basicWolframAlphaSearchResponsePrompt = ` - You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are set on focus mode 'Wolfram Alpha', this means you will be searching for information on the web using Wolfram Alpha. It is a computational knowledge engine that can answer factual queries and perform computations. - - Generate a response that is informative and relevant to the user's query based on provided context (the context consits of search results containing a brief description of the content of that page). - You must use this context to answer the user's query in the best way possible. Use an unbaised and journalistic tone in your response. Do not repeat the text. - You must not tell the user to open any link or visit any website to get the answer. You must provide the answer in the response itself. If the user asks for links you can provide them. - Your responses should be medium to long in length be informative and relevant to the user's query. You can use markdowns to format your response. You should use bullet points to list the information. Make sure the answer is not short and is informative. - You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from. - Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2]. - However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer. - - Anything inside the following \`context\` HTML block provided below is for your knowledge returned by Wolfram Alpha and is not shared by the user. You have to answer question on the basis of it and cite the relevant information from it but you do not have to - talk about the context in your response. - - - {context} - - - If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'. - Anything between the \`context\` is retrieved from Wolfram Alpha and is not a part of the conversation with the user. Today's date is ${new Date().toISOString()} -`; - -const strParser = new StringOutputParser(); - -const handleStream = async ( - stream: AsyncGenerator, - emitter: eventEmitter, -) => { - for await (const event of stream) { - if ( - event.event === 'on_chain_end' && - event.name === 'FinalSourceRetriever' - ) { - emitter.emit( - 'data', - JSON.stringify({ type: 'sources', data: event.data.output }), - ); - } - if ( - event.event === 'on_chain_stream' && - event.name === 'FinalResponseGenerator' - ) { - emitter.emit( - 'data', - JSON.stringify({ type: 'response', data: event.data.chunk }), - ); - } - if ( - event.event === 'on_chain_end' && - event.name === 'FinalResponseGenerator' - ) { - emitter.emit('end'); - } - } -}; - -type BasicChainInput = { - chat_history: BaseMessage[]; - query: string; -}; - -const createBasicWolframAlphaSearchRetrieverChain = (llm: BaseChatModel) => { - return RunnableSequence.from([ - PromptTemplate.fromTemplate(basicWolframAlphaSearchRetrieverPrompt), - llm, - strParser, - RunnableLambda.from(async (input: string) => { - if (input === 'not_needed') { - return { query: '', docs: [] }; - } - - const res = await searchSearxng(input, { - language: 'en', - engines: ['wolframalpha'], - }); - - const documents = res.results.map( - (result) => - new Document({ - pageContent: result.content, - metadata: { - title: result.title, - url: result.url, - ...(result.img_src && { img_src: result.img_src }), - }, - }), - ); - - return { query: input, docs: documents }; - }), - ]); -}; - -const createBasicWolframAlphaSearchAnsweringChain = (llm: BaseChatModel) => { - const basicWolframAlphaSearchRetrieverChain = - createBasicWolframAlphaSearchRetrieverChain(llm); - - const processDocs = (docs: Document[]) => { - return docs - .map((_, index) => `${index + 1}. ${docs[index].pageContent}`) - .join('\n'); - }; - - return RunnableSequence.from([ - RunnableMap.from({ - query: (input: BasicChainInput) => input.query, - chat_history: (input: BasicChainInput) => input.chat_history, - context: RunnableSequence.from([ - (input) => ({ - query: input.query, - chat_history: formatChatHistoryAsString(input.chat_history), - }), - basicWolframAlphaSearchRetrieverChain - .pipe(({ query, docs }) => { - return docs; - }) - .withConfig({ - runName: 'FinalSourceRetriever', - }) - .pipe(processDocs), - ]), - }), - ChatPromptTemplate.fromMessages([ - ['system', basicWolframAlphaSearchResponsePrompt], - new MessagesPlaceholder('chat_history'), - ['user', '{query}'], - ]), - llm, - strParser, - ]).withConfig({ - runName: 'FinalResponseGenerator', - }); -}; - -const basicWolframAlphaSearch = ( - query: string, - history: BaseMessage[], - llm: BaseChatModel, -) => { - const emitter = new eventEmitter(); - - try { - const basicWolframAlphaSearchAnsweringChain = - createBasicWolframAlphaSearchAnsweringChain(llm); - const stream = basicWolframAlphaSearchAnsweringChain.streamEvents( - { - chat_history: history, - query: query, - }, - { - version: 'v1', - }, - ); - - handleStream(stream, emitter); - } catch (err) { - emitter.emit( - 'error', - JSON.stringify({ data: 'An error has occurred please try again later' }), - ); - logger.error(`Error in WolframAlphaSearch: ${err}`); - } - - return emitter; -}; - -const handleWolframAlphaSearch = ( - message: string, - history: BaseMessage[], - llm: BaseChatModel, - embeddings: Embeddings, -) => { - const emitter = basicWolframAlphaSearch(message, history, llm); - return emitter; -}; - -export default handleWolframAlphaSearch; diff --git a/src/agents/writingAssistant.ts b/src/agents/writingAssistant.ts deleted file mode 100644 index 7c2cb49..0000000 --- a/src/agents/writingAssistant.ts +++ /dev/null @@ -1,90 +0,0 @@ -import { BaseMessage } from '@langchain/core/messages'; -import { - ChatPromptTemplate, - MessagesPlaceholder, -} from '@langchain/core/prompts'; -import { RunnableSequence } from '@langchain/core/runnables'; -import { StringOutputParser } from '@langchain/core/output_parsers'; -import type { StreamEvent } from '@langchain/core/tracers/log_stream'; -import eventEmitter from 'events'; -import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import type { Embeddings } from '@langchain/core/embeddings'; -import logger from '../utils/logger'; - -const writingAssistantPrompt = ` -You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are currently set on focus mode 'Writing Assistant', this means you will be helping the user write a response to a given query. -Since you are a writing assistant, you would not perform web searches. If you think you lack information to answer the query, you can ask the user for more information or suggest them to switch to a different focus mode. -`; - -const strParser = new StringOutputParser(); - -const handleStream = async ( - stream: AsyncGenerator, - emitter: eventEmitter, -) => { - for await (const event of stream) { - if ( - event.event === 'on_chain_stream' && - event.name === 'FinalResponseGenerator' - ) { - emitter.emit( - 'data', - JSON.stringify({ type: 'response', data: event.data.chunk }), - ); - } - if ( - event.event === 'on_chain_end' && - event.name === 'FinalResponseGenerator' - ) { - emitter.emit('end'); - } - } -}; - -const createWritingAssistantChain = (llm: BaseChatModel) => { - return RunnableSequence.from([ - ChatPromptTemplate.fromMessages([ - ['system', writingAssistantPrompt], - new MessagesPlaceholder('chat_history'), - ['user', '{query}'], - ]), - llm, - strParser, - ]).withConfig({ - runName: 'FinalResponseGenerator', - }); -}; - -const handleWritingAssistant = ( - query: string, - history: BaseMessage[], - llm: BaseChatModel, - embeddings: Embeddings, -) => { - const emitter = new eventEmitter(); - - try { - const writingAssistantChain = createWritingAssistantChain(llm); - const stream = writingAssistantChain.streamEvents( - { - chat_history: history, - query: query, - }, - { - version: 'v1', - }, - ); - - handleStream(stream, emitter); - } catch (err) { - emitter.emit( - 'error', - JSON.stringify({ data: 'An error has occurred please try again later' }), - ); - logger.error(`Error in writing assistant: ${err}`); - } - - return emitter; -}; - -export default handleWritingAssistant; diff --git a/src/agents/youtubeSearchAgent.ts b/src/agents/youtubeSearchAgent.ts deleted file mode 100644 index 4133157..0000000 --- a/src/agents/youtubeSearchAgent.ts +++ /dev/null @@ -1,261 +0,0 @@ -import { BaseMessage } from '@langchain/core/messages'; -import { - PromptTemplate, - ChatPromptTemplate, - MessagesPlaceholder, -} from '@langchain/core/prompts'; -import { - RunnableSequence, - RunnableMap, - RunnableLambda, -} from '@langchain/core/runnables'; -import { StringOutputParser } from '@langchain/core/output_parsers'; -import { Document } from '@langchain/core/documents'; -import { searchSearxng } from '../lib/searxng'; -import type { StreamEvent } from '@langchain/core/tracers/log_stream'; -import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import type { Embeddings } from '@langchain/core/embeddings'; -import formatChatHistoryAsString from '../utils/formatHistory'; -import eventEmitter from 'events'; -import computeSimilarity from '../utils/computeSimilarity'; -import logger from '../utils/logger'; - -const basicYoutubeSearchRetrieverPrompt = ` -You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. -If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response. - -Example: -1. Follow up question: How does an A.C work? -Rephrased: A.C working - -2. Follow up question: Linear algebra explanation video -Rephrased: What is linear algebra? - -3. Follow up question: What is theory of relativity? -Rephrased: What is theory of relativity? - -Conversation: -{chat_history} - -Follow up question: {query} -Rephrased question: -`; - -const basicYoutubeSearchResponsePrompt = ` - You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are set on focus mode 'Youtube', this means you will be searching for videos on the web using Youtube and providing information based on the video's transcript. - - Generate a response that is informative and relevant to the user's query based on provided context (the context consits of search results containing a brief description of the content of that page). - You must use this context to answer the user's query in the best way possible. Use an unbaised and journalistic tone in your response. Do not repeat the text. - You must not tell the user to open any link or visit any website to get the answer. You must provide the answer in the response itself. If the user asks for links you can provide them. - Your responses should be medium to long in length be informative and relevant to the user's query. You can use markdowns to format your response. You should use bullet points to list the information. Make sure the answer is not short and is informative. - You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from. - Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2]. - However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer. - - Anything inside the following \`context\` HTML block provided below is for your knowledge returned by Youtube and is not shared by the user. You have to answer question on the basis of it and cite the relevant information from it but you do not have to - talk about the context in your response. - - - {context} - - - If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'. - Anything between the \`context\` is retrieved from Youtube and is not a part of the conversation with the user. Today's date is ${new Date().toISOString()} -`; - -const strParser = new StringOutputParser(); - -const handleStream = async ( - stream: AsyncGenerator, - emitter: eventEmitter, -) => { - for await (const event of stream) { - if ( - event.event === 'on_chain_end' && - event.name === 'FinalSourceRetriever' - ) { - emitter.emit( - 'data', - JSON.stringify({ type: 'sources', data: event.data.output }), - ); - } - if ( - event.event === 'on_chain_stream' && - event.name === 'FinalResponseGenerator' - ) { - emitter.emit( - 'data', - JSON.stringify({ type: 'response', data: event.data.chunk }), - ); - } - if ( - event.event === 'on_chain_end' && - event.name === 'FinalResponseGenerator' - ) { - emitter.emit('end'); - } - } -}; - -type BasicChainInput = { - chat_history: BaseMessage[]; - query: string; -}; - -const createBasicYoutubeSearchRetrieverChain = (llm: BaseChatModel) => { - return RunnableSequence.from([ - PromptTemplate.fromTemplate(basicYoutubeSearchRetrieverPrompt), - llm, - strParser, - RunnableLambda.from(async (input: string) => { - if (input === 'not_needed') { - return { query: '', docs: [] }; - } - - const res = await searchSearxng(input, { - language: 'en', - engines: ['youtube'], - }); - - const documents = res.results.map( - (result) => - new Document({ - pageContent: result.content ? result.content : result.title, - metadata: { - title: result.title, - url: result.url, - ...(result.img_src && { img_src: result.img_src }), - }, - }), - ); - - return { query: input, docs: documents }; - }), - ]); -}; - -const createBasicYoutubeSearchAnsweringChain = ( - llm: BaseChatModel, - embeddings: Embeddings, -) => { - const basicYoutubeSearchRetrieverChain = - createBasicYoutubeSearchRetrieverChain(llm); - - const processDocs = async (docs: Document[]) => { - return docs - .map((_, index) => `${index + 1}. ${docs[index].pageContent}`) - .join('\n'); - }; - - const rerankDocs = async ({ - query, - docs, - }: { - query: string; - docs: Document[]; - }) => { - if (docs.length === 0) { - return docs; - } - - const docsWithContent = docs.filter( - (doc) => doc.pageContent && doc.pageContent.length > 0, - ); - - const [docEmbeddings, queryEmbedding] = await Promise.all([ - embeddings.embedDocuments(docsWithContent.map((doc) => doc.pageContent)), - embeddings.embedQuery(query), - ]); - - const similarity = docEmbeddings.map((docEmbedding, i) => { - const sim = computeSimilarity(queryEmbedding, docEmbedding); - - return { - index: i, - similarity: sim, - }; - }); - - const sortedDocs = similarity - .filter((sim) => sim.similarity > 0.3) - .sort((a, b) => b.similarity - a.similarity) - .slice(0, 15) - .map((sim) => docsWithContent[sim.index]); - - return sortedDocs; - }; - - return RunnableSequence.from([ - RunnableMap.from({ - query: (input: BasicChainInput) => input.query, - chat_history: (input: BasicChainInput) => input.chat_history, - context: RunnableSequence.from([ - (input) => ({ - query: input.query, - chat_history: formatChatHistoryAsString(input.chat_history), - }), - basicYoutubeSearchRetrieverChain - .pipe(rerankDocs) - .withConfig({ - runName: 'FinalSourceRetriever', - }) - .pipe(processDocs), - ]), - }), - ChatPromptTemplate.fromMessages([ - ['system', basicYoutubeSearchResponsePrompt], - new MessagesPlaceholder('chat_history'), - ['user', '{query}'], - ]), - llm, - strParser, - ]).withConfig({ - runName: 'FinalResponseGenerator', - }); -}; - -const basicYoutubeSearch = ( - query: string, - history: BaseMessage[], - llm: BaseChatModel, - embeddings: Embeddings, -) => { - const emitter = new eventEmitter(); - - try { - const basicYoutubeSearchAnsweringChain = - createBasicYoutubeSearchAnsweringChain(llm, embeddings); - - const stream = basicYoutubeSearchAnsweringChain.streamEvents( - { - chat_history: history, - query: query, - }, - { - version: 'v1', - }, - ); - - handleStream(stream, emitter); - } catch (err) { - emitter.emit( - 'error', - JSON.stringify({ data: 'An error has occurred please try again later' }), - ); - logger.error(`Error in youtube search: ${err}`); - } - - return emitter; -}; - -const handleYoutubeSearch = ( - message: string, - history: BaseMessage[], - llm: BaseChatModel, - embeddings: Embeddings, -) => { - const emitter = basicYoutubeSearch(message, history, llm, embeddings); - return emitter; -}; - -export default handleYoutubeSearch; diff --git a/src/app.ts b/src/app.ts deleted file mode 100644 index 9c80baa..0000000 --- a/src/app.ts +++ /dev/null @@ -1,38 +0,0 @@ -import { startWebSocketServer } from './websocket'; -import express from 'express'; -import cors from 'cors'; -import http from 'http'; -import routes from './routes'; -import { getPort } from './config'; -import logger from './utils/logger'; - -const port = getPort(); - -const app = express(); -const server = http.createServer(app); - -const corsOptions = { - origin: '*', -}; - -app.use(cors(corsOptions)); -app.use(express.json()); - -app.use('/api', routes); -app.get('/api', (_, res) => { - res.status(200).json({ status: 'ok' }); -}); - -server.listen(port, () => { - logger.info(`Server is running on port ${port}`); -}); - -startWebSocketServer(server); - -process.on('uncaughtException', (err, origin) => { - logger.error(`Uncaught Exception at ${origin}: ${err}`) -}) - -process.on('unhandledRejection', (reason, promise) => { - logger.error(`Unhandled Rejection at: ${promise}, reason: ${reason}`) -}) \ No newline at end of file diff --git a/src/app/api/chat/route.ts b/src/app/api/chat/route.ts new file mode 100644 index 0000000..ba88da6 --- /dev/null +++ b/src/app/api/chat/route.ts @@ -0,0 +1,310 @@ +import crypto from 'crypto'; +import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages'; +import { EventEmitter } from 'stream'; +import { + getAvailableChatModelProviders, + getAvailableEmbeddingModelProviders, +} from '@/lib/providers'; +import db from '@/lib/db'; +import { chats, messages as messagesSchema } from '@/lib/db/schema'; +import { and, eq, gt } from 'drizzle-orm'; +import { getFileDetails } from '@/lib/utils/files'; +import { BaseChatModel } from '@langchain/core/language_models/chat_models'; +import { ChatOpenAI } from '@langchain/openai'; +import { + getCustomOpenaiApiKey, + getCustomOpenaiApiUrl, + getCustomOpenaiModelName, +} from '@/lib/config'; +import { searchHandlers } from '@/lib/search'; + +export const runtime = 'nodejs'; +export const dynamic = 'force-dynamic'; + +type Message = { + messageId: string; + chatId: string; + content: string; +}; + +type ChatModel = { + provider: string; + name: string; +}; + +type EmbeddingModel = { + provider: string; + name: string; +}; + +type Body = { + message: Message; + optimizationMode: 'speed' | 'balanced' | 'quality'; + focusMode: string; + history: Array<[string, string]>; + files: Array; + chatModel: ChatModel; + embeddingModel: EmbeddingModel; + systemInstructions: string; +}; + +const handleEmitterEvents = async ( + stream: EventEmitter, + writer: WritableStreamDefaultWriter, + encoder: TextEncoder, + aiMessageId: string, + chatId: string, +) => { + let recievedMessage = ''; + let sources: any[] = []; + + stream.on('data', (data) => { + const parsedData = JSON.parse(data); + if (parsedData.type === 'response') { + writer.write( + encoder.encode( + JSON.stringify({ + type: 'message', + data: parsedData.data, + messageId: aiMessageId, + }) + '\n', + ), + ); + + recievedMessage += parsedData.data; + } else if (parsedData.type === 'sources') { + writer.write( + encoder.encode( + JSON.stringify({ + type: 'sources', + data: parsedData.data, + messageId: aiMessageId, + }) + '\n', + ), + ); + + sources = parsedData.data; + } + }); + stream.on('end', () => { + writer.write( + encoder.encode( + JSON.stringify({ + type: 'messageEnd', + messageId: aiMessageId, + }) + '\n', + ), + ); + writer.close(); + + db.insert(messagesSchema) + .values({ + content: recievedMessage, + chatId: chatId, + messageId: aiMessageId, + role: 'assistant', + metadata: JSON.stringify({ + createdAt: new Date(), + ...(sources && sources.length > 0 && { sources }), + }), + }) + .execute(); + }); + stream.on('error', (data) => { + const parsedData = JSON.parse(data); + writer.write( + encoder.encode( + JSON.stringify({ + type: 'error', + data: parsedData.data, + }), + ), + ); + writer.close(); + }); +}; + +const handleHistorySave = async ( + message: Message, + humanMessageId: string, + focusMode: string, + files: string[], +) => { + const chat = await db.query.chats.findFirst({ + where: eq(chats.id, message.chatId), + }); + + const fileData = files.map(getFileDetails); + + if (!chat) { + await db + .insert(chats) + .values({ + id: message.chatId, + title: message.content, + createdAt: new Date().toString(), + focusMode: focusMode, + files: fileData, + }) + .execute(); + } else if (JSON.stringify(chat.files ?? []) != JSON.stringify(fileData)) { + db.update(chats) + .set({ + files: files.map(getFileDetails), + }) + .where(eq(chats.id, message.chatId)); + } + + const messageExists = await db.query.messages.findFirst({ + where: eq(messagesSchema.messageId, humanMessageId), + }); + + if (!messageExists) { + await db + .insert(messagesSchema) + .values({ + content: message.content, + chatId: message.chatId, + messageId: humanMessageId, + role: 'user', + metadata: JSON.stringify({ + createdAt: new Date(), + }), + }) + .execute(); + } else { + await db + .delete(messagesSchema) + .where( + and( + gt(messagesSchema.id, messageExists.id), + eq(messagesSchema.chatId, message.chatId), + ), + ) + .execute(); + } +}; + +export const POST = async (req: Request) => { + try { + const body = (await req.json()) as Body; + const { message } = body; + + if (message.content === '') { + return Response.json( + { + message: 'Please provide a message to process', + }, + { status: 400 }, + ); + } + + const [chatModelProviders, embeddingModelProviders] = await Promise.all([ + getAvailableChatModelProviders(), + getAvailableEmbeddingModelProviders(), + ]); + + const chatModelProvider = + chatModelProviders[ + body.chatModel?.provider || Object.keys(chatModelProviders)[0] + ]; + const chatModel = + chatModelProvider[ + body.chatModel?.name || Object.keys(chatModelProvider)[0] + ]; + + const embeddingProvider = + embeddingModelProviders[ + body.embeddingModel?.provider || Object.keys(embeddingModelProviders)[0] + ]; + const embeddingModel = + embeddingProvider[ + body.embeddingModel?.name || Object.keys(embeddingProvider)[0] + ]; + + let llm: BaseChatModel | undefined; + let embedding = embeddingModel.model; + + if (body.chatModel?.provider === 'custom_openai') { + llm = new ChatOpenAI({ + apiKey: getCustomOpenaiApiKey(), + modelName: getCustomOpenaiModelName(), + temperature: 0.7, + configuration: { + baseURL: getCustomOpenaiApiUrl(), + }, + }) as unknown as BaseChatModel; + } else if (chatModelProvider && chatModel) { + llm = chatModel.model; + } + + if (!llm) { + return Response.json({ error: 'Invalid chat model' }, { status: 400 }); + } + + if (!embedding) { + return Response.json( + { error: 'Invalid embedding model' }, + { status: 400 }, + ); + } + + const humanMessageId = + message.messageId ?? crypto.randomBytes(7).toString('hex'); + const aiMessageId = crypto.randomBytes(7).toString('hex'); + + const history: BaseMessage[] = body.history.map((msg) => { + if (msg[0] === 'human') { + return new HumanMessage({ + content: msg[1], + }); + } else { + return new AIMessage({ + content: msg[1], + }); + } + }); + + const handler = searchHandlers[body.focusMode]; + + if (!handler) { + return Response.json( + { + message: 'Invalid focus mode', + }, + { status: 400 }, + ); + } + + const stream = await handler.searchAndAnswer( + message.content, + history, + llm, + embedding, + body.optimizationMode, + body.files, + body.systemInstructions, + ); + + const responseStream = new TransformStream(); + const writer = responseStream.writable.getWriter(); + const encoder = new TextEncoder(); + + handleEmitterEvents(stream, writer, encoder, aiMessageId, message.chatId); + handleHistorySave(message, humanMessageId, body.focusMode, body.files); + + return new Response(responseStream.readable, { + headers: { + 'Content-Type': 'text/event-stream', + Connection: 'keep-alive', + 'Cache-Control': 'no-cache, no-transform', + }, + }); + } catch (err) { + console.error('An error occurred while processing chat request:', err); + return Response.json( + { message: 'An error occurred while processing chat request' }, + { status: 500 }, + ); + } +}; diff --git a/src/app/api/chats/[id]/route.ts b/src/app/api/chats/[id]/route.ts new file mode 100644 index 0000000..6891454 --- /dev/null +++ b/src/app/api/chats/[id]/route.ts @@ -0,0 +1,69 @@ +import db from '@/lib/db'; +import { chats, messages } from '@/lib/db/schema'; +import { eq } from 'drizzle-orm'; + +export const GET = async ( + req: Request, + { params }: { params: Promise<{ id: string }> }, +) => { + try { + const { id } = await params; + + const chatExists = await db.query.chats.findFirst({ + where: eq(chats.id, id), + }); + + if (!chatExists) { + return Response.json({ message: 'Chat not found' }, { status: 404 }); + } + + const chatMessages = await db.query.messages.findMany({ + where: eq(messages.chatId, id), + }); + + return Response.json( + { + chat: chatExists, + messages: chatMessages, + }, + { status: 200 }, + ); + } catch (err) { + console.error('Error in getting chat by id: ', err); + return Response.json( + { message: 'An error has occurred.' }, + { status: 500 }, + ); + } +}; + +export const DELETE = async ( + req: Request, + { params }: { params: Promise<{ id: string }> }, +) => { + try { + const { id } = await params; + + const chatExists = await db.query.chats.findFirst({ + where: eq(chats.id, id), + }); + + if (!chatExists) { + return Response.json({ message: 'Chat not found' }, { status: 404 }); + } + + await db.delete(chats).where(eq(chats.id, id)).execute(); + await db.delete(messages).where(eq(messages.chatId, id)).execute(); + + return Response.json( + { message: 'Chat deleted successfully' }, + { status: 200 }, + ); + } catch (err) { + console.error('Error in deleting chat by id: ', err); + return Response.json( + { message: 'An error has occurred.' }, + { status: 500 }, + ); + } +}; diff --git a/src/app/api/chats/route.ts b/src/app/api/chats/route.ts new file mode 100644 index 0000000..986a192 --- /dev/null +++ b/src/app/api/chats/route.ts @@ -0,0 +1,15 @@ +import db from '@/lib/db'; + +export const GET = async (req: Request) => { + try { + let chats = await db.query.chats.findMany(); + chats = chats.reverse(); + return Response.json({ chats: chats }, { status: 200 }); + } catch (err) { + console.error('Error in getting chats: ', err); + return Response.json( + { message: 'An error has occurred.' }, + { status: 500 }, + ); + } +}; diff --git a/src/app/api/config/route.ts b/src/app/api/config/route.ts new file mode 100644 index 0000000..f117cce --- /dev/null +++ b/src/app/api/config/route.ts @@ -0,0 +1,127 @@ +import { + getAnthropicApiKey, + getCustomOpenaiApiKey, + getCustomOpenaiApiUrl, + getCustomOpenaiModelName, + getGeminiApiKey, + getGroqApiKey, + getOllamaApiEndpoint, + getOpenaiApiKey, + getDeepseekApiKey, + getAimlApiKey, + getLMStudioApiEndpoint, + updateConfig, + getOllamaApiKey, +} from '@/lib/config'; +import { + getAvailableChatModelProviders, + getAvailableEmbeddingModelProviders, +} from '@/lib/providers'; + +export const GET = async (req: Request) => { + try { + const config: Record = {}; + + const [chatModelProviders, embeddingModelProviders] = await Promise.all([ + getAvailableChatModelProviders(), + getAvailableEmbeddingModelProviders(), + ]); + + config['chatModelProviders'] = {}; + config['embeddingModelProviders'] = {}; + + for (const provider in chatModelProviders) { + config['chatModelProviders'][provider] = Object.keys( + chatModelProviders[provider], + ).map((model) => { + return { + name: model, + displayName: chatModelProviders[provider][model].displayName, + }; + }); + } + + for (const provider in embeddingModelProviders) { + config['embeddingModelProviders'][provider] = Object.keys( + embeddingModelProviders[provider], + ).map((model) => { + return { + name: model, + displayName: embeddingModelProviders[provider][model].displayName, + }; + }); + } + + config['openaiApiKey'] = getOpenaiApiKey(); + config['ollamaApiUrl'] = getOllamaApiEndpoint(); + config['ollamaApiKey'] = getOllamaApiKey(); + config['lmStudioApiUrl'] = getLMStudioApiEndpoint(); + config['anthropicApiKey'] = getAnthropicApiKey(); + config['groqApiKey'] = getGroqApiKey(); + config['geminiApiKey'] = getGeminiApiKey(); + config['deepseekApiKey'] = getDeepseekApiKey(); + config['aimlApiKey'] = getAimlApiKey(); + config['customOpenaiApiUrl'] = getCustomOpenaiApiUrl(); + config['customOpenaiApiKey'] = getCustomOpenaiApiKey(); + config['customOpenaiModelName'] = getCustomOpenaiModelName(); + + return Response.json({ ...config }, { status: 200 }); + } catch (err) { + console.error('An error occurred while getting config:', err); + return Response.json( + { message: 'An error occurred while getting config' }, + { status: 500 }, + ); + } +}; + +export const POST = async (req: Request) => { + try { + const config = await req.json(); + + const updatedConfig = { + MODELS: { + OPENAI: { + API_KEY: config.openaiApiKey, + }, + GROQ: { + API_KEY: config.groqApiKey, + }, + ANTHROPIC: { + API_KEY: config.anthropicApiKey, + }, + GEMINI: { + API_KEY: config.geminiApiKey, + }, + OLLAMA: { + API_URL: config.ollamaApiUrl, + API_KEY: config.ollamaApiKey, + }, + DEEPSEEK: { + API_KEY: config.deepseekApiKey, + }, + AIMLAPI: { + API_KEY: config.aimlApiKey, + }, + LM_STUDIO: { + API_URL: config.lmStudioApiUrl, + }, + CUSTOM_OPENAI: { + API_URL: config.customOpenaiApiUrl, + API_KEY: config.customOpenaiApiKey, + MODEL_NAME: config.customOpenaiModelName, + }, + }, + }; + + updateConfig(updatedConfig); + + return Response.json({ message: 'Config updated' }, { status: 200 }); + } catch (err) { + console.error('An error occurred while updating config:', err); + return Response.json( + { message: 'An error occurred while updating config' }, + { status: 500 }, + ); + } +}; diff --git a/src/app/api/discover/route.ts b/src/app/api/discover/route.ts new file mode 100644 index 0000000..415aee8 --- /dev/null +++ b/src/app/api/discover/route.ts @@ -0,0 +1,98 @@ +import { searchSearxng } from '@/lib/searxng'; + +const websitesForTopic = { + tech: { + query: ['technology news', 'latest tech', 'AI', 'science and innovation'], + links: ['techcrunch.com', 'wired.com', 'theverge.com'], + }, + finance: { + query: ['finance news', 'economy', 'stock market', 'investing'], + links: ['bloomberg.com', 'cnbc.com', 'marketwatch.com'], + }, + art: { + query: ['art news', 'culture', 'modern art', 'cultural events'], + links: ['artnews.com', 'hyperallergic.com', 'theartnewspaper.com'], + }, + sports: { + query: ['sports news', 'latest sports', 'cricket football tennis'], + links: ['espn.com', 'bbc.com/sport', 'skysports.com'], + }, + entertainment: { + query: ['entertainment news', 'movies', 'TV shows', 'celebrities'], + links: ['hollywoodreporter.com', 'variety.com', 'deadline.com'], + }, +}; + +type Topic = keyof typeof websitesForTopic; + +export const GET = async (req: Request) => { + try { + const params = new URL(req.url).searchParams; + + const mode: 'normal' | 'preview' = + (params.get('mode') as 'normal' | 'preview') || 'normal'; + const topic: Topic = (params.get('topic') as Topic) || 'tech'; + + const selectedTopic = websitesForTopic[topic]; + + let data = []; + + if (mode === 'normal') { + const seenUrls = new Set(); + + data = ( + await Promise.all( + selectedTopic.links.flatMap((link) => + selectedTopic.query.map(async (query) => { + return ( + await searchSearxng(`site:${link} ${query}`, { + engines: ['bing news'], + pageno: 1, + language: 'en', + }) + ).results; + }), + ), + ) + ) + .flat() + .filter((item) => { + const url = item.url?.toLowerCase().trim(); + if (seenUrls.has(url)) return false; + seenUrls.add(url); + return true; + }) + .sort(() => Math.random() - 0.5); + } else { + data = ( + await searchSearxng( + `site:${selectedTopic.links[Math.floor(Math.random() * selectedTopic.links.length)]} ${selectedTopic.query[Math.floor(Math.random() * selectedTopic.query.length)]}`, + { + engines: ['bing news'], + pageno: 1, + language: 'en', + }, + ) + ).results; + } + + return Response.json( + { + blogs: data, + }, + { + status: 200, + }, + ); + } catch (err) { + console.error(`An error occurred in discover route: ${err}`); + return Response.json( + { + message: 'An error has occurred', + }, + { + status: 500, + }, + ); + } +}; diff --git a/src/app/api/images/route.ts b/src/app/api/images/route.ts new file mode 100644 index 0000000..e02854d --- /dev/null +++ b/src/app/api/images/route.ts @@ -0,0 +1,83 @@ +import handleImageSearch from '@/lib/chains/imageSearchAgent'; +import { + getCustomOpenaiApiKey, + getCustomOpenaiApiUrl, + getCustomOpenaiModelName, +} from '@/lib/config'; +import { getAvailableChatModelProviders } from '@/lib/providers'; +import { BaseChatModel } from '@langchain/core/language_models/chat_models'; +import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages'; +import { ChatOpenAI } from '@langchain/openai'; + +interface ChatModel { + provider: string; + model: string; +} + +interface ImageSearchBody { + query: string; + chatHistory: any[]; + chatModel?: ChatModel; +} + +export const POST = async (req: Request) => { + try { + const body: ImageSearchBody = await req.json(); + + const chatHistory = body.chatHistory + .map((msg: any) => { + if (msg.role === 'user') { + return new HumanMessage(msg.content); + } else if (msg.role === 'assistant') { + return new AIMessage(msg.content); + } + }) + .filter((msg) => msg !== undefined) as BaseMessage[]; + + const chatModelProviders = await getAvailableChatModelProviders(); + + const chatModelProvider = + chatModelProviders[ + body.chatModel?.provider || Object.keys(chatModelProviders)[0] + ]; + const chatModel = + chatModelProvider[ + body.chatModel?.model || Object.keys(chatModelProvider)[0] + ]; + + let llm: BaseChatModel | undefined; + + if (body.chatModel?.provider === 'custom_openai') { + llm = new ChatOpenAI({ + apiKey: getCustomOpenaiApiKey(), + modelName: getCustomOpenaiModelName(), + temperature: 0.7, + configuration: { + baseURL: getCustomOpenaiApiUrl(), + }, + }) as unknown as BaseChatModel; + } else if (chatModelProvider && chatModel) { + llm = chatModel.model; + } + + if (!llm) { + return Response.json({ error: 'Invalid chat model' }, { status: 400 }); + } + + const images = await handleImageSearch( + { + chat_history: chatHistory, + query: body.query, + }, + llm, + ); + + return Response.json({ images }, { status: 200 }); + } catch (err) { + console.error(`An error occurred while searching images: ${err}`); + return Response.json( + { message: 'An error occurred while searching images' }, + { status: 500 }, + ); + } +}; diff --git a/src/app/api/models/route.ts b/src/app/api/models/route.ts new file mode 100644 index 0000000..04a6949 --- /dev/null +++ b/src/app/api/models/route.ts @@ -0,0 +1,47 @@ +import { + getAvailableChatModelProviders, + getAvailableEmbeddingModelProviders, +} from '@/lib/providers'; + +export const GET = async (req: Request) => { + try { + const [chatModelProviders, embeddingModelProviders] = await Promise.all([ + getAvailableChatModelProviders(), + getAvailableEmbeddingModelProviders(), + ]); + + Object.keys(chatModelProviders).forEach((provider) => { + Object.keys(chatModelProviders[provider]).forEach((model) => { + delete (chatModelProviders[provider][model] as { model?: unknown }) + .model; + }); + }); + + Object.keys(embeddingModelProviders).forEach((provider) => { + Object.keys(embeddingModelProviders[provider]).forEach((model) => { + delete (embeddingModelProviders[provider][model] as { model?: unknown }) + .model; + }); + }); + + return Response.json( + { + chatModelProviders, + embeddingModelProviders, + }, + { + status: 200, + }, + ); + } catch (err) { + console.error('An error occurred while fetching models', err); + return Response.json( + { + message: 'An error has occurred.', + }, + { + status: 500, + }, + ); + } +}; diff --git a/src/app/api/search/route.ts b/src/app/api/search/route.ts new file mode 100644 index 0000000..5f752ec --- /dev/null +++ b/src/app/api/search/route.ts @@ -0,0 +1,269 @@ +import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; +import type { Embeddings } from '@langchain/core/embeddings'; +import { ChatOpenAI } from '@langchain/openai'; +import { + getAvailableChatModelProviders, + getAvailableEmbeddingModelProviders, +} from '@/lib/providers'; +import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages'; +import { MetaSearchAgentType } from '@/lib/search/metaSearchAgent'; +import { + getCustomOpenaiApiKey, + getCustomOpenaiApiUrl, + getCustomOpenaiModelName, +} from '@/lib/config'; +import { searchHandlers } from '@/lib/search'; + +interface chatModel { + provider: string; + name: string; + customOpenAIKey?: string; + customOpenAIBaseURL?: string; +} + +interface embeddingModel { + provider: string; + name: string; +} + +interface ChatRequestBody { + optimizationMode: 'speed' | 'balanced'; + focusMode: string; + chatModel?: chatModel; + embeddingModel?: embeddingModel; + query: string; + history: Array<[string, string]>; + stream?: boolean; + systemInstructions?: string; +} + +export const POST = async (req: Request) => { + try { + const body: ChatRequestBody = await req.json(); + + if (!body.focusMode || !body.query) { + return Response.json( + { message: 'Missing focus mode or query' }, + { status: 400 }, + ); + } + + body.history = body.history || []; + body.optimizationMode = body.optimizationMode || 'balanced'; + body.stream = body.stream || false; + + const history: BaseMessage[] = body.history.map((msg) => { + return msg[0] === 'human' + ? new HumanMessage({ content: msg[1] }) + : new AIMessage({ content: msg[1] }); + }); + + const [chatModelProviders, embeddingModelProviders] = await Promise.all([ + getAvailableChatModelProviders(), + getAvailableEmbeddingModelProviders(), + ]); + + const chatModelProvider = + body.chatModel?.provider || Object.keys(chatModelProviders)[0]; + const chatModel = + body.chatModel?.name || + Object.keys(chatModelProviders[chatModelProvider])[0]; + + const embeddingModelProvider = + body.embeddingModel?.provider || Object.keys(embeddingModelProviders)[0]; + const embeddingModel = + body.embeddingModel?.name || + Object.keys(embeddingModelProviders[embeddingModelProvider])[0]; + + let llm: BaseChatModel | undefined; + let embeddings: Embeddings | undefined; + + if (body.chatModel?.provider === 'custom_openai') { + llm = new ChatOpenAI({ + modelName: body.chatModel?.name || getCustomOpenaiModelName(), + apiKey: body.chatModel?.customOpenAIKey || getCustomOpenaiApiKey(), + temperature: 0.7, + configuration: { + baseURL: + body.chatModel?.customOpenAIBaseURL || getCustomOpenaiApiUrl(), + }, + }) as unknown as BaseChatModel; + } else if ( + chatModelProviders[chatModelProvider] && + chatModelProviders[chatModelProvider][chatModel] + ) { + llm = chatModelProviders[chatModelProvider][chatModel] + .model as unknown as BaseChatModel | undefined; + } + + if ( + embeddingModelProviders[embeddingModelProvider] && + embeddingModelProviders[embeddingModelProvider][embeddingModel] + ) { + embeddings = embeddingModelProviders[embeddingModelProvider][ + embeddingModel + ].model as Embeddings | undefined; + } + + if (!llm || !embeddings) { + return Response.json( + { message: 'Invalid model selected' }, + { status: 400 }, + ); + } + + const searchHandler: MetaSearchAgentType = searchHandlers[body.focusMode]; + + if (!searchHandler) { + return Response.json({ message: 'Invalid focus mode' }, { status: 400 }); + } + + const emitter = await searchHandler.searchAndAnswer( + body.query, + history, + llm, + embeddings, + body.optimizationMode, + [], + body.systemInstructions || '', + ); + + if (!body.stream) { + return new Promise( + ( + resolve: (value: Response) => void, + reject: (value: Response) => void, + ) => { + let message = ''; + let sources: any[] = []; + + emitter.on('data', (data: string) => { + try { + const parsedData = JSON.parse(data); + if (parsedData.type === 'response') { + message += parsedData.data; + } else if (parsedData.type === 'sources') { + sources = parsedData.data; + } + } catch (error) { + reject( + Response.json( + { message: 'Error parsing data' }, + { status: 500 }, + ), + ); + } + }); + + emitter.on('end', () => { + resolve(Response.json({ message, sources }, { status: 200 })); + }); + + emitter.on('error', (error: any) => { + reject( + Response.json( + { message: 'Search error', error }, + { status: 500 }, + ), + ); + }); + }, + ); + } + + const encoder = new TextEncoder(); + + const abortController = new AbortController(); + const { signal } = abortController; + + const stream = new ReadableStream({ + start(controller) { + let sources: any[] = []; + + controller.enqueue( + encoder.encode( + JSON.stringify({ + type: 'init', + data: 'Stream connected', + }) + '\n', + ), + ); + + signal.addEventListener('abort', () => { + emitter.removeAllListeners(); + + try { + controller.close(); + } catch (error) {} + }); + + emitter.on('data', (data: string) => { + if (signal.aborted) return; + + try { + const parsedData = JSON.parse(data); + + if (parsedData.type === 'response') { + controller.enqueue( + encoder.encode( + JSON.stringify({ + type: 'response', + data: parsedData.data, + }) + '\n', + ), + ); + } else if (parsedData.type === 'sources') { + sources = parsedData.data; + controller.enqueue( + encoder.encode( + JSON.stringify({ + type: 'sources', + data: sources, + }) + '\n', + ), + ); + } + } catch (error) { + controller.error(error); + } + }); + + emitter.on('end', () => { + if (signal.aborted) return; + + controller.enqueue( + encoder.encode( + JSON.stringify({ + type: 'done', + }) + '\n', + ), + ); + controller.close(); + }); + + emitter.on('error', (error: any) => { + if (signal.aborted) return; + + controller.error(error); + }); + }, + cancel() { + abortController.abort(); + }, + }); + + return new Response(stream, { + headers: { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache, no-transform', + Connection: 'keep-alive', + }, + }); + } catch (err: any) { + console.error(`Error in getting search results: ${err.message}`); + return Response.json( + { message: 'An error has occurred.' }, + { status: 500 }, + ); + } +}; diff --git a/src/app/api/suggestions/route.ts b/src/app/api/suggestions/route.ts new file mode 100644 index 0000000..99179d2 --- /dev/null +++ b/src/app/api/suggestions/route.ts @@ -0,0 +1,81 @@ +import generateSuggestions from '@/lib/chains/suggestionGeneratorAgent'; +import { + getCustomOpenaiApiKey, + getCustomOpenaiApiUrl, + getCustomOpenaiModelName, +} from '@/lib/config'; +import { getAvailableChatModelProviders } from '@/lib/providers'; +import { BaseChatModel } from '@langchain/core/language_models/chat_models'; +import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages'; +import { ChatOpenAI } from '@langchain/openai'; + +interface ChatModel { + provider: string; + model: string; +} + +interface SuggestionsGenerationBody { + chatHistory: any[]; + chatModel?: ChatModel; +} + +export const POST = async (req: Request) => { + try { + const body: SuggestionsGenerationBody = await req.json(); + + const chatHistory = body.chatHistory + .map((msg: any) => { + if (msg.role === 'user') { + return new HumanMessage(msg.content); + } else if (msg.role === 'assistant') { + return new AIMessage(msg.content); + } + }) + .filter((msg) => msg !== undefined) as BaseMessage[]; + + const chatModelProviders = await getAvailableChatModelProviders(); + + const chatModelProvider = + chatModelProviders[ + body.chatModel?.provider || Object.keys(chatModelProviders)[0] + ]; + const chatModel = + chatModelProvider[ + body.chatModel?.model || Object.keys(chatModelProvider)[0] + ]; + + let llm: BaseChatModel | undefined; + + if (body.chatModel?.provider === 'custom_openai') { + llm = new ChatOpenAI({ + apiKey: getCustomOpenaiApiKey(), + modelName: getCustomOpenaiModelName(), + temperature: 0.7, + configuration: { + baseURL: getCustomOpenaiApiUrl(), + }, + }) as unknown as BaseChatModel; + } else if (chatModelProvider && chatModel) { + llm = chatModel.model; + } + + if (!llm) { + return Response.json({ error: 'Invalid chat model' }, { status: 400 }); + } + + const suggestions = await generateSuggestions( + { + chat_history: chatHistory, + }, + llm, + ); + + return Response.json({ suggestions }, { status: 200 }); + } catch (err) { + console.error(`An error occurred while generating suggestions: ${err}`); + return Response.json( + { message: 'An error occurred while generating suggestions' }, + { status: 500 }, + ); + } +}; diff --git a/src/app/api/uploads/route.ts b/src/app/api/uploads/route.ts new file mode 100644 index 0000000..9fbaf2d --- /dev/null +++ b/src/app/api/uploads/route.ts @@ -0,0 +1,134 @@ +import { NextResponse } from 'next/server'; +import fs from 'fs'; +import path from 'path'; +import crypto from 'crypto'; +import { getAvailableEmbeddingModelProviders } from '@/lib/providers'; +import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf'; +import { DocxLoader } from '@langchain/community/document_loaders/fs/docx'; +import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; +import { Document } from 'langchain/document'; + +interface FileRes { + fileName: string; + fileExtension: string; + fileId: string; +} + +const uploadDir = path.join(process.cwd(), 'uploads'); + +if (!fs.existsSync(uploadDir)) { + fs.mkdirSync(uploadDir, { recursive: true }); +} + +const splitter = new RecursiveCharacterTextSplitter({ + chunkSize: 500, + chunkOverlap: 100, +}); + +export async function POST(req: Request) { + try { + const formData = await req.formData(); + + const files = formData.getAll('files') as File[]; + const embedding_model = formData.get('embedding_model'); + const embedding_model_provider = formData.get('embedding_model_provider'); + + if (!embedding_model || !embedding_model_provider) { + return NextResponse.json( + { message: 'Missing embedding model or provider' }, + { status: 400 }, + ); + } + + const embeddingModels = await getAvailableEmbeddingModelProviders(); + const provider = + embedding_model_provider ?? Object.keys(embeddingModels)[0]; + const embeddingModel = + embedding_model ?? Object.keys(embeddingModels[provider as string])[0]; + + let embeddingsModel = + embeddingModels[provider as string]?.[embeddingModel as string]?.model; + if (!embeddingsModel) { + return NextResponse.json( + { message: 'Invalid embedding model selected' }, + { status: 400 }, + ); + } + + const processedFiles: FileRes[] = []; + + await Promise.all( + files.map(async (file: any) => { + const fileExtension = file.name.split('.').pop(); + if (!['pdf', 'docx', 'txt'].includes(fileExtension!)) { + return NextResponse.json( + { message: 'File type not supported' }, + { status: 400 }, + ); + } + + const uniqueFileName = `${crypto.randomBytes(16).toString('hex')}.${fileExtension}`; + const filePath = path.join(uploadDir, uniqueFileName); + + const buffer = Buffer.from(await file.arrayBuffer()); + fs.writeFileSync(filePath, new Uint8Array(buffer)); + + let docs: any[] = []; + if (fileExtension === 'pdf') { + const loader = new PDFLoader(filePath); + docs = await loader.load(); + } else if (fileExtension === 'docx') { + const loader = new DocxLoader(filePath); + docs = await loader.load(); + } else if (fileExtension === 'txt') { + const text = fs.readFileSync(filePath, 'utf-8'); + docs = [ + new Document({ pageContent: text, metadata: { title: file.name } }), + ]; + } + + const splitted = await splitter.splitDocuments(docs); + + const extractedDataPath = filePath.replace(/\.\w+$/, '-extracted.json'); + fs.writeFileSync( + extractedDataPath, + JSON.stringify({ + title: file.name, + contents: splitted.map((doc) => doc.pageContent), + }), + ); + + const embeddings = await embeddingsModel.embedDocuments( + splitted.map((doc) => doc.pageContent), + ); + const embeddingsDataPath = filePath.replace( + /\.\w+$/, + '-embeddings.json', + ); + fs.writeFileSync( + embeddingsDataPath, + JSON.stringify({ + title: file.name, + embeddings, + }), + ); + + processedFiles.push({ + fileName: file.name, + fileExtension: fileExtension, + fileId: uniqueFileName.replace(/\.\w+$/, ''), + }); + }), + ); + + return NextResponse.json({ + files: processedFiles, + }); + } catch (error) { + console.error('Error uploading file:', error); + return NextResponse.json( + { message: 'An error has occurred.' }, + { status: 500 }, + ); + } +} diff --git a/src/app/api/videos/route.ts b/src/app/api/videos/route.ts new file mode 100644 index 0000000..7e8288b --- /dev/null +++ b/src/app/api/videos/route.ts @@ -0,0 +1,83 @@ +import handleVideoSearch from '@/lib/chains/videoSearchAgent'; +import { + getCustomOpenaiApiKey, + getCustomOpenaiApiUrl, + getCustomOpenaiModelName, +} from '@/lib/config'; +import { getAvailableChatModelProviders } from '@/lib/providers'; +import { BaseChatModel } from '@langchain/core/language_models/chat_models'; +import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages'; +import { ChatOpenAI } from '@langchain/openai'; + +interface ChatModel { + provider: string; + model: string; +} + +interface VideoSearchBody { + query: string; + chatHistory: any[]; + chatModel?: ChatModel; +} + +export const POST = async (req: Request) => { + try { + const body: VideoSearchBody = await req.json(); + + const chatHistory = body.chatHistory + .map((msg: any) => { + if (msg.role === 'user') { + return new HumanMessage(msg.content); + } else if (msg.role === 'assistant') { + return new AIMessage(msg.content); + } + }) + .filter((msg) => msg !== undefined) as BaseMessage[]; + + const chatModelProviders = await getAvailableChatModelProviders(); + + const chatModelProvider = + chatModelProviders[ + body.chatModel?.provider || Object.keys(chatModelProviders)[0] + ]; + const chatModel = + chatModelProvider[ + body.chatModel?.model || Object.keys(chatModelProvider)[0] + ]; + + let llm: BaseChatModel | undefined; + + if (body.chatModel?.provider === 'custom_openai') { + llm = new ChatOpenAI({ + apiKey: getCustomOpenaiApiKey(), + modelName: getCustomOpenaiModelName(), + temperature: 0.7, + configuration: { + baseURL: getCustomOpenaiApiUrl(), + }, + }) as unknown as BaseChatModel; + } else if (chatModelProvider && chatModel) { + llm = chatModel.model; + } + + if (!llm) { + return Response.json({ error: 'Invalid chat model' }, { status: 400 }); + } + + const videos = await handleVideoSearch( + { + chat_history: chatHistory, + query: body.query, + }, + llm, + ); + + return Response.json({ videos }, { status: 200 }); + } catch (err) { + console.error(`An error occurred while searching videos: ${err}`); + return Response.json( + { message: 'An error occurred while searching videos' }, + { status: 500 }, + ); + } +}; diff --git a/src/app/api/weather/route.ts b/src/app/api/weather/route.ts new file mode 100644 index 0000000..afaf8a6 --- /dev/null +++ b/src/app/api/weather/route.ts @@ -0,0 +1,174 @@ +export const POST = async (req: Request) => { + try { + const body: { + lat: number; + lng: number; + measureUnit: 'Imperial' | 'Metric'; + } = await req.json(); + + if (!body.lat || !body.lng) { + return Response.json( + { + message: 'Invalid request.', + }, + { status: 400 }, + ); + } + + const res = await fetch( + `https://api.open-meteo.com/v1/forecast?latitude=${body.lat}&longitude=${body.lng}¤t=weather_code,temperature_2m,is_day,relative_humidity_2m,wind_speed_10m&timezone=auto${ + body.measureUnit === 'Metric' ? '' : '&temperature_unit=fahrenheit' + }${body.measureUnit === 'Metric' ? '' : '&wind_speed_unit=mph'}`, + ); + + const data = await res.json(); + + if (data.error) { + console.error(`Error fetching weather data: ${data.reason}`); + return Response.json( + { + message: 'An error has occurred.', + }, + { status: 500 }, + ); + } + + const weather: { + temperature: number; + condition: string; + humidity: number; + windSpeed: number; + icon: string; + temperatureUnit: 'C' | 'F'; + windSpeedUnit: 'm/s' | 'mph'; + } = { + temperature: data.current.temperature_2m, + condition: '', + humidity: data.current.relative_humidity_2m, + windSpeed: data.current.wind_speed_10m, + icon: '', + temperatureUnit: body.measureUnit === 'Metric' ? 'C' : 'F', + windSpeedUnit: body.measureUnit === 'Metric' ? 'm/s' : 'mph', + }; + + const code = data.current.weather_code; + const isDay = data.current.is_day === 1; + const dayOrNight = isDay ? 'day' : 'night'; + + switch (code) { + case 0: + weather.icon = `clear-${dayOrNight}`; + weather.condition = 'Clear'; + break; + + case 1: + weather.condition = 'Mainly Clear'; + case 2: + weather.condition = 'Partly Cloudy'; + case 3: + weather.icon = `cloudy-1-${dayOrNight}`; + weather.condition = 'Cloudy'; + break; + + case 45: + weather.condition = 'Fog'; + case 48: + weather.icon = `fog-${dayOrNight}`; + weather.condition = 'Fog'; + break; + + case 51: + weather.condition = 'Light Drizzle'; + case 53: + weather.condition = 'Moderate Drizzle'; + case 55: + weather.icon = `rainy-1-${dayOrNight}`; + weather.condition = 'Dense Drizzle'; + break; + + case 56: + weather.condition = 'Light Freezing Drizzle'; + case 57: + weather.icon = `frost-${dayOrNight}`; + weather.condition = 'Dense Freezing Drizzle'; + break; + + case 61: + weather.condition = 'Slight Rain'; + case 63: + weather.condition = 'Moderate Rain'; + case 65: + weather.condition = 'Heavy Rain'; + weather.icon = `rainy-2-${dayOrNight}`; + break; + + case 66: + weather.condition = 'Light Freezing Rain'; + case 67: + weather.condition = 'Heavy Freezing Rain'; + weather.icon = 'rain-and-sleet-mix'; + break; + + case 71: + weather.condition = 'Slight Snow Fall'; + case 73: + weather.condition = 'Moderate Snow Fall'; + case 75: + weather.condition = 'Heavy Snow Fall'; + weather.icon = `snowy-2-${dayOrNight}`; + break; + + case 77: + weather.condition = 'Snow'; + weather.icon = `snowy-1-${dayOrNight}`; + break; + + case 80: + weather.condition = 'Slight Rain Showers'; + case 81: + weather.condition = 'Moderate Rain Showers'; + case 82: + weather.condition = 'Heavy Rain Showers'; + weather.icon = `rainy-3-${dayOrNight}`; + break; + + case 85: + weather.condition = 'Slight Snow Showers'; + case 86: + weather.condition = 'Moderate Snow Showers'; + case 87: + weather.condition = 'Heavy Snow Showers'; + weather.icon = `snowy-3-${dayOrNight}`; + break; + + case 95: + weather.condition = 'Thunderstorm'; + weather.icon = `scattered-thunderstorms-${dayOrNight}`; + break; + + case 96: + weather.condition = 'Thunderstorm with Slight Hail'; + case 99: + weather.condition = 'Thunderstorm with Heavy Hail'; + weather.icon = 'severe-thunderstorm'; + break; + + default: + weather.icon = `clear-${dayOrNight}`; + weather.condition = 'Clear'; + break; + } + + return Response.json(weather); + } catch (err) { + console.error('An error occurred while getting home widgets', err); + return Response.json( + { + message: 'An error has occurred.', + }, + { + status: 500, + }, + ); + } +}; diff --git a/src/app/c/[chatId]/page.tsx b/src/app/c/[chatId]/page.tsx new file mode 100644 index 0000000..672107a --- /dev/null +++ b/src/app/c/[chatId]/page.tsx @@ -0,0 +1,17 @@ +'use client'; + +import ChatWindow from '@/components/ChatWindow'; +import { useParams } from 'next/navigation'; +import React from 'react'; +import { ChatProvider } from '@/lib/hooks/useChat'; + +const Page = () => { + const { chatId }: { chatId: string } = useParams(); + return ( + + + + ); +}; + +export default Page; diff --git a/src/app/discover/page.tsx b/src/app/discover/page.tsx new file mode 100644 index 0000000..8e20e50 --- /dev/null +++ b/src/app/discover/page.tsx @@ -0,0 +1,158 @@ +'use client'; + +import { Search } from 'lucide-react'; +import { useEffect, useState } from 'react'; +import Link from 'next/link'; +import { toast } from 'sonner'; +import { cn } from '@/lib/utils'; + +interface Discover { + title: string; + content: string; + url: string; + thumbnail: string; +} + +const topics: { key: string; display: string }[] = [ + { + display: 'Tech & Science', + key: 'tech', + }, + { + display: 'Finance', + key: 'finance', + }, + { + display: 'Art & Culture', + key: 'art', + }, + { + display: 'Sports', + key: 'sports', + }, + { + display: 'Entertainment', + key: 'entertainment', + }, +]; + +const Page = () => { + const [discover, setDiscover] = useState(null); + const [loading, setLoading] = useState(true); + const [activeTopic, setActiveTopic] = useState(topics[0].key); + + const fetchArticles = async (topic: string) => { + setLoading(true); + try { + const res = await fetch(`/api/discover?topic=${topic}`, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + }, + }); + + const data = await res.json(); + + if (!res.ok) { + throw new Error(data.message); + } + + data.blogs = data.blogs.filter((blog: Discover) => blog.thumbnail); + + setDiscover(data.blogs); + } catch (err: any) { + console.error('Error fetching data:', err.message); + toast.error('Error fetching data'); + } finally { + setLoading(false); + } + }; + + useEffect(() => { + fetchArticles(activeTopic); + }, [activeTopic]); + + return ( + <> +
+
+
+ +

Discover

+
+
+
+ +
+ {topics.map((t, i) => ( +
setActiveTopic(t.key)} + > + {t.display} +
+ ))} +
+ + {loading ? ( +
+ +
+ ) : ( +
+ {discover && + discover?.map((item, i) => ( + + {item.title} +
+
+ {item.title.slice(0, 100)}... +
+

+ {item.content.slice(0, 100)}... +

+
+ + ))} +
+ )} +
+ + ); +}; + +export default Page; diff --git a/ui/app/favicon.ico b/src/app/favicon.ico similarity index 100% rename from ui/app/favicon.ico rename to src/app/favicon.ico diff --git a/ui/app/globals.css b/src/app/globals.css similarity index 63% rename from ui/app/globals.css rename to src/app/globals.css index f75daca..6bdc1a8 100644 --- a/ui/app/globals.css +++ b/src/app/globals.css @@ -11,3 +11,11 @@ display: none; } } + +@media screen and (-webkit-min-device-pixel-ratio: 0) { + select, + textarea, + input { + font-size: 16px !important; + } +} diff --git a/ui/app/layout.tsx b/src/app/layout.tsx similarity index 100% rename from ui/app/layout.tsx rename to src/app/layout.tsx diff --git a/ui/app/library/layout.tsx b/src/app/library/layout.tsx similarity index 100% rename from ui/app/library/layout.tsx rename to src/app/library/layout.tsx diff --git a/ui/app/library/page.tsx b/src/app/library/page.tsx similarity index 84% rename from ui/app/library/page.tsx rename to src/app/library/page.tsx index 8294fc1..9c40b2b 100644 --- a/ui/app/library/page.tsx +++ b/src/app/library/page.tsx @@ -1,7 +1,7 @@ 'use client'; import DeleteChat from '@/components/DeleteChat'; -import { formatTimeDifference } from '@/lib/utils'; +import { cn, formatTimeDifference } from '@/lib/utils'; import { BookOpenText, ClockIcon, Delete, ScanEye } from 'lucide-react'; import Link from 'next/link'; import { useEffect, useState } from 'react'; @@ -21,7 +21,7 @@ const Page = () => { const fetchChats = async () => { setLoading(true); - const res = await fetch(`${process.env.NEXT_PUBLIC_API_URL}/chats`, { + const res = await fetch(`/api/chats`, { method: 'GET', headers: { 'Content-Type': 'application/json', @@ -58,13 +58,12 @@ const Page = () => { ) : (
-
-
+
+
-

- Library -

+

Library

+
{chats.length === 0 && (
@@ -74,10 +73,15 @@ const Page = () => {
)} {chats.length > 0 && ( -
+
{chats.map((chat, i) => (
{ return (
- + + +
); diff --git a/src/app/settings/page.tsx b/src/app/settings/page.tsx new file mode 100644 index 0000000..6fb8255 --- /dev/null +++ b/src/app/settings/page.tsx @@ -0,0 +1,963 @@ +'use client'; + +import { Settings as SettingsIcon, ArrowLeft, Loader2 } from 'lucide-react'; +import { useEffect, useState } from 'react'; +import { cn } from '@/lib/utils'; +import { Switch } from '@headlessui/react'; +import ThemeSwitcher from '@/components/theme/Switcher'; +import { ImagesIcon, VideoIcon } from 'lucide-react'; +import Link from 'next/link'; +import { PROVIDER_METADATA } from '@/lib/providers'; + +interface SettingsType { + chatModelProviders: { + [key: string]: [Record]; + }; + embeddingModelProviders: { + [key: string]: [Record]; + }; + openaiApiKey: string; + groqApiKey: string; + anthropicApiKey: string; + geminiApiKey: string; + ollamaApiUrl: string; + ollamaApiKey: string; + lmStudioApiUrl: string; + deepseekApiKey: string; + aimlApiKey: string; + customOpenaiApiKey: string; + customOpenaiApiUrl: string; + customOpenaiModelName: string; +} + +interface InputProps extends React.InputHTMLAttributes { + isSaving?: boolean; + onSave?: (value: string) => void; +} + +const Input = ({ className, isSaving, onSave, ...restProps }: InputProps) => { + return ( +
+ onSave?.(e.target.value)} + /> + {isSaving && ( +
+ +
+ )} +
+ ); +}; + +interface TextareaProps extends React.InputHTMLAttributes { + isSaving?: boolean; + onSave?: (value: string) => void; +} + +const Textarea = ({ + className, + isSaving, + onSave, + ...restProps +}: TextareaProps) => { + return ( +
+