feat(i18n): Refactor prompts and search agent to enhance language handling and formatting instructions

- Updated academicSearch, redditSearch, webSearch, wolframAlpha, writingAssistant, and youtubeSearch prompts to improve clarity and consistency in formatting instructions.
- Added language handling instructions to writingAssistant and other prompts for better localization support.
- Modified MetaSearchAgent to include locale and language parameters for improved prompt generation and language-specific responses.

# Conflicts:
#	Dockerfile
This commit is contained in:
wei840222 2025-08-17 17:38:47 +08:00
parent 9a772d6abe
commit f8896b0f7b
28 changed files with 318 additions and 258 deletions

View file

@ -7,7 +7,6 @@ RUN yarn install --frozen-lockfile --network-timeout 600000
COPY tsconfig.json next.config.mjs next-env.d.ts postcss.config.js drizzle.config.ts tailwind.config.ts ./
COPY src ./src
COPY messages ./messages
COPY public ./public
RUN mkdir -p /home/perplexica/data

View file

@ -34,6 +34,7 @@ The API accepts a JSON object in the request body, where you define the focus mo
["assistant", "I am doing well, how can I help you today?"]
],
"systemInstructions": "Focus on providing technical details about Perplexica's architecture.",
"locale": "en-US",
"stream": false
}
```
@ -66,6 +67,8 @@ The API accepts a JSON object in the request body, where you define the focus mo
- **`systemInstructions`** (string, optional): Custom instructions provided by the user to guide the AI's response. These instructions are treated as user preferences and have lower priority than the system's core instructions. For example, you can specify a particular writing style, format, or focus area.
- **`locale`** (string, optional): Specifies a custom locale for the search operation. If not provided, the default locale (en-US) will be used. This can be useful for tailoring search results to a specific language or region. Format: IETF BCP 47 codes ({ISO 639-1}-{ISO 3166-1 alpha-2}), see https://www.rfc-editor.org/rfc/bcp/bcp47.txt.
- **`history`** (array, optional): An array of message pairs representing the conversation history. Each pair consists of a role (either 'human' or 'assistant') and the message content. This allows the system to use the context of the conversation to refine results. Example:
```json

View file

@ -50,6 +50,7 @@ type Body = {
chatModel: ChatModel;
embeddingModel: EmbeddingModel;
systemInstructions: string;
locale: string;
};
const handleEmitterEvents = async (
@ -280,6 +281,7 @@ export const POST = async (req: Request) => {
body.optimizationMode,
body.files,
body.systemInstructions,
body.locale,
);
const responseStream = new TransformStream();

View file

@ -13,6 +13,7 @@ import {
getCustomOpenaiModelName,
} from '@/lib/config';
import { searchHandlers } from '@/lib/search';
import { DEFAULT_LOCALE } from '@/i18n/locales';
interface chatModel {
provider: string;
@ -35,6 +36,7 @@ interface ChatRequestBody {
history: Array<[string, string]>;
stream?: boolean;
systemInstructions?: string;
locale?: string;
}
export const POST = async (req: Request) => {
@ -126,6 +128,7 @@ export const POST = async (req: Request) => {
body.optimizationMode,
[],
body.systemInstructions || '',
body.locale || DEFAULT_LOCALE,
);
if (!body.stream) {

View file

@ -8,6 +8,7 @@ import { getAvailableChatModelProviders } from '@/lib/providers';
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages';
import { ChatOpenAI } from '@langchain/openai';
import { DEFAULT_LOCALE } from '@/i18n/locales';
interface ChatModel {
provider: string;
@ -17,6 +18,7 @@ interface ChatModel {
interface SuggestionsGenerationBody {
chatHistory: any[];
chatModel?: ChatModel;
locale?: string;
}
export const POST = async (req: Request) => {
@ -66,6 +68,7 @@ export const POST = async (req: Request) => {
const suggestions = await generateSuggestions(
{
chat_history: chatHistory,
locale: body.locale || DEFAULT_LOCALE,
},
llm,
);

View file

@ -1,15 +1,14 @@
'use client';
import { Settings as SettingsIcon, ArrowLeft, Loader2 } from 'lucide-react';
import { useCallback, useEffect, useState } from 'react';
import { useEffect, useState } from 'react';
import { cn } from '@/lib/utils';
import { Switch } from '@headlessui/react';
import ThemeSwitcher from '@/components/theme/Switcher';
import LocaleSwitcher from '@/components/LocaleSwitcher';
import { ImagesIcon, VideoIcon } from 'lucide-react';
import Link from 'next/link';
import { PROVIDER_METADATA } from '@/lib/providers';
import LocaleSwitcher from '@/components/LocaleSwitcher';
import { getPromptLanguageName } from '@/i18n/locales';
import { useLocale, useTranslations } from 'next-intl';
interface SettingsType {
@ -216,8 +215,7 @@ const Page = () => {
localStorage.getItem('autoVideoSearch') === 'true',
);
const stored = localStorage.getItem('systemInstructions') || '';
setSystemInstructions(stripPrefixedPrompt(stored));
setSystemInstructions(localStorage.getItem('systemInstructions')!);
setMeasureUnit(
localStorage.getItem('measureUnit')! as 'Imperial' | 'Metric',
@ -229,37 +227,6 @@ const Page = () => {
fetchConfig();
}, []);
// Remove prefix for UI display if it exists in stored value
const stripPrefixedPrompt = (text: string) => {
const trimmed = (text || '').trim();
const starts = 'Always respond to all non-code content and explanations in';
if (trimmed.startsWith(starts)) {
const parts = trimmed.split('\n\n');
// Drop the first block (prefix paragraph and rules)
const rest = parts.slice(1).join('\n\n');
return rest || '';
}
return trimmed;
};
const buildPrefixedPrompt = useCallback((base: string, loc: string) => {
const langName = getPromptLanguageName(loc);
const prefix = `Always respond to all non-code content and explanations in ${langName}.\nRules:\n1. All descriptions, explanations, and example clarifications must be in ${langName}.\n2. Any content inside code blocks and code comments must be entirely in English.\n3. For language-specific or technical terms, use the original term in that specific language (do not translate it).`;
const trimmed = (base || '').trim();
// If already starts with the prefix (by simple inclusion of first sentence), avoid duplicating
if (
trimmed.startsWith(
`Always respond to all non-code content and explanations in`,
)
) {
// If locale changed, replace the existing first paragraph block
const parts = trimmed.split('\n\n');
const rest = parts.slice(1).join('\n\n');
return `${prefix}${rest ? '\n\n' + rest : ''}`;
}
return prefix + (trimmed ? `\n\n${trimmed}` : '');
}, []);
const saveConfig = async (key: string, value: any) => {
setSavingStates((prev) => ({ ...prev, [key]: true }));
@ -495,16 +462,7 @@ const Page = () => {
<p className="text-black/70 dark:text-white/70 text-sm">
{t('preferences.language')}
</p>
<LocaleSwitcher
onChange={(nextLocale) => {
// Rebuild and persist with new locale prefix; keep UI clean
const prefixed = buildPrefixedPrompt(
systemInstructions,
nextLocale,
);
saveConfig('systemInstructions', prefixed);
}}
/>
<LocaleSwitcher />
</div>
</SettingsSection>
@ -602,12 +560,7 @@ const Page = () => {
onChange={(e) => {
setSystemInstructions(e.target.value);
}}
onSave={(value) => {
const prefixed = buildPrefixedPrompt(value, locale);
// Keep UI as user input without prefix
setSystemInstructions(value);
saveConfig('systemInstructions', prefixed);
}}
onSave={(value) => saveConfig('systemInstructions', value)}
placeholder={t('systemInstructions.placeholder')}
/>
</div>
@ -857,7 +810,6 @@ const Page = () => {
</p>
<Input
type="text"
placeholder={t('api.ollamaApiUrl')}
value={config.ollamaApiUrl}
isSaving={savingStates['ollamaApiUrl']}
onChange={(e) => {
@ -966,7 +918,6 @@ const Page = () => {
</p>
<Input
type="text"
placeholder={t('api.lmStudioApiUrl')}
value={config.lmStudioApiUrl}
isSaving={savingStates['lmStudioApiUrl']}
onChange={(e) => {

View file

@ -7,7 +7,7 @@ import Chat from './Chat';
import EmptyChat from './EmptyChat';
import crypto from 'crypto';
import { toast } from 'sonner';
import { useTranslations } from 'next-intl';
import { useLocale, useTranslations } from 'next-intl';
import { useSearchParams } from 'next/navigation';
import { getSuggestions } from '@/lib/actions';
import { Settings } from 'lucide-react';
@ -264,6 +264,7 @@ const loadMessages = async (
const ChatWindow = ({ id }: { id?: string }) => {
const t = useTranslations();
const locale = useLocale();
const searchParams = useSearchParams();
const initialMessage = searchParams.get('q');
@ -473,7 +474,7 @@ const ChatWindow = ({ id }: { id?: string }) => {
lastMsg.sources.length > 0 &&
!lastMsg.suggestions
) {
const suggestions = await getSuggestions(messagesRef.current);
const suggestions = await getSuggestions(messagesRef.current, locale);
setMessages((prev) =>
prev.map((msg) => {
if (msg.messageId === lastMsg.messageId) {
@ -516,6 +517,7 @@ const ChatWindow = ({ id }: { id?: string }) => {
provider: embeddingModelProvider.provider,
},
systemInstructions: localStorage.getItem('systemInstructions'),
locale: locale,
}),
});

View file

@ -1,3 +1,4 @@
// IETF BCP 47 codes, see https://www.rfc-editor.org/rfc/bcp/bcp47.txt. {ISO 639-1}-{ISO 3166-1 alpha-2}
export const LOCALES = [
'en-US',
'en-GB',
@ -10,6 +11,7 @@ export const LOCALES = [
'fr-CA',
'de',
] as const;
export type AppLocale = (typeof LOCALES)[number];
// Default locale for fallbacks
@ -19,7 +21,7 @@ export const DEFAULT_LOCALE: AppLocale = 'en-US';
export const LOCALE_LABELS: Record<AppLocale, string> = {
'en-US': 'English (US)',
'en-GB': 'English (UK)',
'zh-TW': '繁體中文',
'zh-TW': '繁體中文(台灣)',
'zh-HK': '繁體中文(香港)',
'zh-CN': '简体中文',
ja: '日本語',

View file

@ -110,6 +110,6 @@ export default getRequestConfig(async () => {
return {
locale,
messages: (await import(`../../messages/${locale}.json`)).default,
messages: (await import(`./${locale}.json`)).default,
};
});

View file

@ -1,6 +1,9 @@
import { Message } from '@/components/ChatWindow';
export const getSuggestions = async (chatHisory: Message[]) => {
export const getSuggestions = async (
chatHistory: Message[],
locale?: string,
) => {
const chatModel = localStorage.getItem('chatModel');
const chatModelProvider = localStorage.getItem('chatModelProvider');
@ -13,7 +16,7 @@ export const getSuggestions = async (chatHisory: Message[]) => {
'Content-Type': 'application/json',
},
body: JSON.stringify({
chatHistory: chatHisory,
chatHistory: chatHistory,
chatModel: {
provider: chatModelProvider,
model: chatModel,
@ -22,6 +25,7 @@ export const getSuggestions = async (chatHisory: Message[]) => {
customOpenAIBaseURL,
}),
},
locale,
}),
});

View file

@ -5,14 +5,30 @@ import formatChatHistoryAsString from '../utils/formatHistory';
import { BaseMessage } from '@langchain/core/messages';
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { ChatOpenAI } from '@langchain/openai';
import { getPromptLanguageName } from '@/i18n/locales';
const suggestionGeneratorPrompt = `
You are an AI suggestion generator for an AI powered search engine. You will be given a conversation below. You need to generate 4-5 suggestions based on the conversation. The suggestion should be relevant to the conversation that can be used by the user to ask the chat model for more information.
You need to make sure the suggestions are relevant to the conversation and are helpful to the user. Keep a note that the user might use these suggestions to ask a chat model for more information.
Make sure the suggestions are medium in length and are informative and relevant to the conversation.
You are an AI suggestion generator for an AI powered search engine.
Provide these suggestions separated by newlines between the XML tags <suggestions> and </suggestions>. For example:
Your need to meet these requirements:
- You will be given a conversation below. You need to generate 4-5 suggestions based on the conversation.
- The suggestion should be relevant to the conversation that can be used by the user to ask the chat model for more information.
- You need to make sure the suggestions are relevant to the conversation and are helpful to the user. Keep a note that the user might use these suggestions to ask a chat model for more information.
### Language Instructions
- **Language Definition**: Interpret "{language}" as a combination of language and optional region.
- Format: "language (region)" or "languageregion" (e.g., "English (US)", "繁體中文(台灣)").
- The main language indicates the linguistic system (e.g., English, , ).
- The region in parentheses indicates the regional variant or locale style (e.g., US, UK, , , France).
- **Primary Language**: Use "{language}" for all non-code content, including explanations, descriptions, and examples.
- **Regional Variants**: Adjust word choice, spelling, and style according to the region specified in "{language}" (e.g., 使, 使; English (US) uses "color", English (UK) uses "colour").
- **Code and Comments**: All code blocks and code comments must be entirely in "English (US)".
- **Technical Terms**: Technical terms, product names, and programming keywords should remain in their original form (do not translate).
- **Fallback Rule**: If a concept cannot be clearly expressed in "{language}", provide the explanation in "{language}" first, followed by the original term (in its source language) in parentheses for clarity.
### Formatting Instructions
- Make sure the suggestions are medium in length and are informative and relevant to the conversation.
- Provide these suggestions separated by newlines between the XML tags <suggestions> and </suggestions>. For example:
<suggestions>
Tell me more about SpaceX and their recent projects
What is the latest news on SpaceX?
@ -25,6 +41,7 @@ Conversation:
type SuggestionGeneratorInput = {
chat_history: BaseMessage[];
locale: string;
};
const outputParser = new ListLineOutputParser({
@ -36,6 +53,8 @@ const createSuggestionGeneratorChain = (llm: BaseChatModel) => {
RunnableMap.from({
chat_history: (input: SuggestionGeneratorInput) =>
formatChatHistoryAsString(input.chat_history),
language: (input: SuggestionGeneratorInput) =>
getPromptLanguageName(input.locale),
}),
PromptTemplate.fromTemplate(suggestionGeneratorPrompt),
llm,

View file

@ -55,6 +55,17 @@ export const academicSearchResponsePrompt = `
These instructions are shared to you by the user and not by the system. You will have to follow them but give them less priority than the above instructions. If the user has provided specific instructions or preferences, incorporate them into your response while adhering to the overall guidelines.
{systemInstructions}
### Language Instructions
- **Language Definition**: Interpret "{language}" as a combination of language and optional region.
- Format: "language (region)" or "languageregion" (e.g., "English (US)", "繁體中文(台灣)").
- The main language indicates the linguistic system (e.g., English, , ).
- The region in parentheses indicates the regional variant or locale style (e.g., US, UK, , , France).
- **Primary Language**: Use "{language}" for all non-code content, including explanations, descriptions, and examples.
- **Regional Variants**: Adjust word choice, spelling, and style according to the region specified in "{language}" (e.g., 使, 使; English (US) uses "color", English (UK) uses "colour").
- **Code and Comments**: All code blocks and code comments must be entirely in "English (US)".
- **Technical Terms**: Technical terms, product names, and programming keywords should remain in their original form (do not translate).
- **Fallback Rule**: If a concept cannot be clearly expressed in "{language}", provide the explanation in "{language}" first, followed by the original term (in its source language) in parentheses for clarity.
### Example Output
- Begin with a brief introduction summarizing the event or query topic.
- Follow with detailed sections under clear headings, covering all aspects of the query if possible.

View file

@ -55,6 +55,17 @@ export const redditSearchResponsePrompt = `
These instructions are shared to you by the user and not by the system. You will have to follow them but give them less priority than the above instructions. If the user has provided specific instructions or preferences, incorporate them into your response while adhering to the overall guidelines.
{systemInstructions}
### Language Instructions
- **Language Definition**: Interpret "{language}" as a combination of language and optional region.
- Format: "language (region)" or "languageregion" (e.g., "English (US)", "繁體中文(台灣)").
- The main language indicates the linguistic system (e.g., English, , ).
- The region in parentheses indicates the regional variant or locale style (e.g., US, UK, , , France).
- **Primary Language**: Use "{language}" for all non-code content, including explanations, descriptions, and examples.
- **Regional Variants**: Adjust word choice, spelling, and style according to the region specified in "{language}" (e.g., 使, 使; English (US) uses "color", English (UK) uses "colour").
- **Code and Comments**: All code blocks and code comments must be entirely in "English (US)".
- **Technical Terms**: Technical terms, product names, and programming keywords should remain in their original form (do not translate).
- **Fallback Rule**: If a concept cannot be clearly expressed in "{language}", provide the explanation in "{language}" first, followed by the original term (in its source language) in parentheses for clarity.
### Example Output
- Begin with a brief introduction summarizing the event or query topic.
- Follow with detailed sections under clear headings, covering all aspects of the query if possible.

View file

@ -96,6 +96,17 @@ export const webSearchResponsePrompt = `
These instructions are shared to you by the user and not by the system. You will have to follow them but give them less priority than the above instructions. If the user has provided specific instructions or preferences, incorporate them into your response while adhering to the overall guidelines.
{systemInstructions}
### Language Instructions
- **Language Definition**: Interpret "{language}" as a combination of language and optional region.
- Format: "language (region)" or "languageregion" (e.g., "English (US)", "繁體中文(台灣)").
- The main language indicates the linguistic system (e.g., English, , ).
- The region in parentheses indicates the regional variant or locale style (e.g., US, UK, , , France).
- **Primary Language**: Use "{language}" for all non-code content, including explanations, descriptions, and examples.
- **Regional Variants**: Adjust word choice, spelling, and style according to the region specified in "{language}" (e.g., 使, 使; English (US) uses "color", English (UK) uses "colour").
- **Code and Comments**: All code blocks and code comments must be entirely in "English (US)".
- **Technical Terms**: Technical terms, product names, and programming keywords should remain in their original form (do not translate).
- **Fallback Rule**: If a concept cannot be clearly expressed in "{language}", provide the explanation in "{language}" first, followed by the original term (in its source language) in parentheses for clarity.
### Example Output
- Begin with a brief introduction summarizing the event or query topic.
- Follow with detailed sections under clear headings, covering all aspects of the query if possible.

View file

@ -55,6 +55,17 @@ export const wolframAlphaSearchResponsePrompt = `
These instructions are shared to you by the user and not by the system. You will have to follow them but give them less priority than the above instructions. If the user has provided specific instructions or preferences, incorporate them into your response while adhering to the overall guidelines.
{systemInstructions}
### Language Instructions
- **Language Definition**: Interpret "{language}" as a combination of language and optional region.
- Format: "language (region)" or "languageregion" (e.g., "English (US)", "繁體中文(台灣)").
- The main language indicates the linguistic system (e.g., English, , ).
- The region in parentheses indicates the regional variant or locale style (e.g., US, UK, , , France).
- **Primary Language**: Use "{language}" for all non-code content, including explanations, descriptions, and examples.
- **Regional Variants**: Adjust word choice, spelling, and style according to the region specified in "{language}" (e.g., 使, 使; English (US) uses "color", English (UK) uses "colour").
- **Code and Comments**: All code blocks and code comments must be entirely in "English (US)".
- **Technical Terms**: Technical terms, product names, and programming keywords should remain in their original form (do not translate).
- **Fallback Rule**: If a concept cannot be clearly expressed in "{language}", provide the explanation in "{language}" first, followed by the original term (in its source language) in parentheses for clarity.
### Example Output
- Begin with a brief introduction summarizing the event or query topic.
- Follow with detailed sections under clear headings, covering all aspects of the query if possible.

View file

@ -11,6 +11,17 @@ However you do not need to cite it using the same number. You can use different
These instructions are shared to you by the user and not by the system. You will have to follow them but give them less priority than the above instructions. If the user has provided specific instructions or preferences, incorporate them into your response while adhering to the overall guidelines.
{systemInstructions}
### Language Instructions
- **Language Definition**: Interpret "{language}" as a combination of language and optional region.
- Format: "language (region)" or "languageregion" (e.g., "English (US)", "繁體中文(台灣)").
- The main language indicates the linguistic system (e.g., English, , ).
- The region in parentheses indicates the regional variant or locale style (e.g., US, UK, , , France).
- **Primary Language**: Use "{language}" for all non-code content, including explanations, descriptions, and examples.
- **Regional Variants**: Adjust word choice, spelling, and style according to the region specified in "{language}" (e.g., 使, 使; English (US) uses "color", English (UK) uses "colour").
- **Code and Comments**: All code blocks and code comments must be entirely in "English (US)".
- **Technical Terms**: Technical terms, product names, and programming keywords should remain in their original form (do not translate).
- **Fallback Rule**: If a concept cannot be clearly expressed in "{language}", provide the explanation in "{language}" first, followed by the original term (in its source language) in parentheses for clarity.
<context>
{context}
</context>

View file

@ -51,10 +51,21 @@ export const youtubeSearchResponsePrompt = `
- If no relevant information is found, say: "Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?" Be transparent about limitations and suggest alternatives or ways to reframe the query.
- You are set on focus mode 'Youtube', this means you will be searching for videos on the web using Youtube and providing information based on the video's transcrip
### User instructions
### User Instructions
These instructions are shared to you by the user and not by the system. You will have to follow them but give them less priority than the above instructions. If the user has provided specific instructions or preferences, incorporate them into your response while adhering to the overall guidelines.
{systemInstructions}
### Language Instructions
- **Language Definition**: Interpret "{language}" as a combination of language and optional region.
- Format: "language (region)" or "languageregion" (e.g., "English (US)", "繁體中文(台灣)").
- The main language indicates the linguistic system (e.g., English, , ).
- The region in parentheses indicates the regional variant or locale style (e.g., US, UK, , , France).
- **Primary Language**: Use "{language}" for all non-code content, including explanations, descriptions, and examples.
- **Regional Variants**: Adjust word choice, spelling, and style according to the region specified in "{language}" (e.g., 使, 使; English (US) uses "color", English (UK) uses "colour").
- **Code and Comments**: All code blocks and code comments must be entirely in "English (US)".
- **Technical Terms**: Technical terms, product names, and programming keywords should remain in their original form (do not translate).
- **Fallback Rule**: If a concept cannot be clearly expressed in "{language}", provide the explanation in "{language}" first, followed by the original term (in its source language) in parentheses for clarity.
### Example Output
- Begin with a brief introduction summarizing the event or query topic.
- Follow with detailed sections under clear headings, covering all aspects of the query if possible.

View file

@ -25,6 +25,7 @@ import computeSimilarity from '../utils/computeSimilarity';
import formatChatHistoryAsString from '../utils/formatHistory';
import eventEmitter from 'events';
import { StreamEvent } from '@langchain/core/tracers/log_stream';
import { getPromptLanguageName } from '@/i18n/locales';
export interface MetaSearchAgentType {
searchAndAnswer: (
@ -35,6 +36,7 @@ export interface MetaSearchAgentType {
optimizationMode: 'speed' | 'balanced' | 'quality',
fileIds: string[],
systemInstructions: string,
locale: string,
) => Promise<eventEmitter>;
}
@ -241,10 +243,12 @@ class MetaSearchAgent implements MetaSearchAgentType {
embeddings: Embeddings,
optimizationMode: 'speed' | 'balanced' | 'quality',
systemInstructions: string,
language: string,
) {
return RunnableSequence.from([
RunnableMap.from({
systemInstructions: () => systemInstructions,
language: () => language,
query: (input: BasicChainInput) => input.query,
chat_history: (input: BasicChainInput) => input.chat_history,
date: () => new Date().toISOString(),
@ -475,6 +479,7 @@ class MetaSearchAgent implements MetaSearchAgentType {
optimizationMode: 'speed' | 'balanced' | 'quality',
fileIds: string[],
systemInstructions: string,
locale: string,
) {
const emitter = new eventEmitter();
@ -484,6 +489,7 @@ class MetaSearchAgent implements MetaSearchAgentType {
embeddings,
optimizationMode,
systemInstructions,
getPromptLanguageName(locale),
);
const stream = answeringChain.streamEvents(