Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 151 additions & 0 deletions src/components/SettingsPage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ import LanguageSelector from "./ui/LanguageSelector";
import { Skeleton } from "./ui/skeleton";
import { Progress } from "./ui/progress";
import { useToast } from "./ui/Toast";
import {
DropdownMenu,
DropdownMenuTrigger,
DropdownMenuContent,
DropdownMenuItem,
} from "./ui/dropdown-menu";
import { useTheme } from "../hooks/useTheme";
import type { GpuDevice, LocalTranscriptionProvider } from "../types/electron";
import logger from "../utils/logger";
Expand All @@ -84,6 +90,72 @@ import { useSettingsStore } from "../stores/settingsStore";
const formatAmount = (cents: number, currency: string) =>
(cents / 100).toLocaleString(undefined, { style: "currency", currency });

/** Estimate Whisper token count — CJK chars ≈ 2.2 tokens, Cyrillic ≈ 0.5, Latin ≈ 0.25 */
function estimateTokens(text: string): number {
let tokens = 0;
for (const ch of text) {
const code = ch.codePointAt(0)!;
if (
(code >= 0x3000 && code <= 0x9fff) ||
(code >= 0xf900 && code <= 0xfaff) ||
(code >= 0xff00 && code <= 0xffef)
) {
tokens += 2.2; // CJK ideographs
} else if (code >= 0x0400 && code <= 0x04ff) {
tokens += 0.5; // Cyrillic
} else {
tokens += 0.25; // Latin / other
}
}
return Math.round(tokens);
}

/** ~half of Whisper's 224-token initial_prompt window, leaving room for Custom Dictionary */
const TOKEN_BUDGET = 112;

const TRANSCRIPTION_PROMPT_PRESETS: Record<string, { label: string; prompt: string }> = {
en: {
label: "English",
prompt: 'Hello! How are you? He said: "Let\'s do this today — while we have time." Of course, it\'s not that simple.',
},
es: {
label: "Español",
prompt: '¡Hola! ¿Cómo estás? Él dijo: "Hagámoslo hoy — mientras tengamos tiempo." Claro, no es tan sencillo.',
},
fr: {
label: "Français",
prompt: 'Bonjour ! Comment allez-vous ? Il a dit : « Faisons-le aujourd\'hui — tant qu\'on a le temps. » Ce n\'est pas si simple.',
},
de: {
label: "Deutsch",
prompt: 'Hallo! Wie geht es Ihnen? Er sagte: „Machen wir es heute — solange wir Zeit haben." So einfach ist es nicht.',
},
pt: {
label: "Português",
prompt: 'Olá! Como você está? Ele disse: "Vamos fazer isso hoje — enquanto temos tempo." Não é tão simples.',
},
it: {
label: "Italiano",
prompt: 'Ciao! Come stai? Ha detto: "Facciamolo oggi — finché abbiamo tempo." Non è così semplice.',
},
ru: {
label: "Русский",
prompt: 'Привет! Как дела? Он сказал: «Сделаем это сегодня — пока есть время». Конечно, не всё так просто; нужно учесть погоду.',
},
ja: {
label: "日本語",
prompt: 'こんにちは!元気ですか?「今日やりましょう。」もちろん、簡単ではない。',
},
"zh-CN": {
label: "中文(简体)",
prompt: '你好!你怎么样?他说:"今天就做吧。"当然,事情没那么简单。',
},
"zh-TW": {
label: "中文(繁體)",
prompt: '你好!你怎麼樣?他說:「今天就做吧。」當然,事情沒那麼簡單。',
},
};

export type SettingsSectionType =
| "account"
| "plansBilling"
Expand Down Expand Up @@ -183,6 +255,8 @@ interface TranscriptionSectionProps {
setCustomTranscriptionApiKey: (key: string) => void;
cloudTranscriptionBaseUrl?: string;
setCloudTranscriptionBaseUrl: (url: string) => void;
customTranscriptionPrompt: string;
setCustomTranscriptionPrompt: (value: string) => void;
toast: (opts: {
title: string;
description: string;
Expand Down Expand Up @@ -218,6 +292,8 @@ function TranscriptionSection({
setCustomTranscriptionApiKey,
cloudTranscriptionBaseUrl,
setCloudTranscriptionBaseUrl,
customTranscriptionPrompt,
setCustomTranscriptionPrompt,
toast,
}: TranscriptionSectionProps) {
const { t } = useTranslation();
Expand Down Expand Up @@ -398,6 +474,77 @@ function TranscriptionSection({
/>
)}
<GpuDeviceSelector purpose="transcription" />

{/* Transcription Prompt */}
<SectionHeader
title={t("settingsPage.transcription.transcriptionPrompt.title")}
description={t("settingsPage.transcription.transcriptionPrompt.description")}
/>
<SettingsPanel>
<textarea
className="w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 resize-y"
rows={4}
value={customTranscriptionPrompt}
onChange={(e) => {
if (estimateTokens(e.target.value) <= TOKEN_BUDGET) {
setCustomTranscriptionPrompt(e.target.value);
}
}}
placeholder={t("settingsPage.transcription.transcriptionPrompt.placeholder")}
/>
<div className="flex items-center justify-between mt-1.5">
<DropdownMenu>
<DropdownMenuTrigger asChild>
<Button variant="outline" size="sm">
{t("settingsPage.transcription.transcriptionPrompt.insertPreset")}
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent>
{Object.entries(TRANSCRIPTION_PROMPT_PRESETS).map(([code, { label }]) => (
<DropdownMenuItem
key={code}
onClick={() =>
setCustomTranscriptionPrompt(TRANSCRIPTION_PROMPT_PRESETS[code].prompt)
}
>
{label}
</DropdownMenuItem>
))}
</DropdownMenuContent>
</DropdownMenu>
{(() => {
const pct = Math.min(
Math.round((estimateTokens(customTranscriptionPrompt) / TOKEN_BUDGET) * 100),
100,
);
return (
<div className="flex items-center gap-2 min-w-[120px]">
<div className="h-1.5 flex-1 rounded-full bg-muted overflow-hidden">
<div
className={cn(
"h-full rounded-full transition-all",
pct < 80
? "bg-muted-foreground/40"
: pct < 95
? "bg-yellow-500"
: "bg-destructive",
)}
style={{ width: `${pct}%` }}
/>
</div>
<span
className={cn(
"text-xs tabular-nums text-muted-foreground/70 w-8 text-right",
pct >= 95 && "text-destructive",
)}
>
{pct}%
</span>
</div>
);
})()}
</div>
</SettingsPanel>
</div>
);
}
Expand Down Expand Up @@ -787,6 +934,8 @@ export default function SettingsPage({ activeSection = "general" }: SettingsPage
setDataRetentionEnabled,
customDictionary,
setCustomDictionary,
customTranscriptionPrompt,
setCustomTranscriptionPrompt,
noteFilesEnabled,
setNoteFilesEnabled,
noteFilesPath,
Expand Down Expand Up @@ -3043,6 +3192,8 @@ EOF`,
setCustomTranscriptionApiKey={setCustomTranscriptionApiKey}
cloudTranscriptionBaseUrl={cloudTranscriptionBaseUrl}
setCloudTranscriptionBaseUrl={setCloudTranscriptionBaseUrl}
customTranscriptionPrompt={customTranscriptionPrompt}
setCustomTranscriptionPrompt={setCustomTranscriptionPrompt}
toast={toast}
/>
);
Expand Down
55 changes: 37 additions & 18 deletions src/helpers/audioManager.js
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,25 @@ registerProcessor("pcm-streaming-processor", PCMStreamingProcessor);
return words.length > 0 ? words.join(", ") : null;
}

/**
* Build a combined transcription prompt: custom dictionary words + user's transcription prompt.
* @returns {string|null}
*/
buildTranscriptionPrompt() {
const parts = [];

// Dictionary words FIRST — truncated first by Whisper's 224-token window
const dict = this.getCustomDictionaryPrompt();
if (dict) parts.push(dict);

// Custom prompt LAST — Whisper truncates initial_prompt from the LEFT (keeps rightmost tokens),
// so the custom prompt at the end survives truncation. See: whisper.cpp tokenize logic.
const customPrompt = (getSettings().customTranscriptionPrompt || "").trim().replace(/\s+/g, " ");
if (customPrompt) parts.push(customPrompt);

return parts.length > 0 ? parts.join(" ") : null;
}

setCallbacks({
onStateChange,
onError,
Expand Down Expand Up @@ -585,10 +604,10 @@ registerProcessor("pcm-streaming-processor", PCMStreamingProcessor);
options.language = language;
}

// Add custom dictionary as initial prompt to help Whisper recognize specific words
const dictionaryPrompt = this.getCustomDictionaryPrompt();
if (dictionaryPrompt) {
options.initialPrompt = dictionaryPrompt;
// Add custom dictionary + transcription prompt as initial prompt
const transcriptionPrompt = this.buildTranscriptionPrompt();
if (transcriptionPrompt) {
options.initialPrompt = transcriptionPrompt;
}

logger.debug(
Expand Down Expand Up @@ -1182,8 +1201,8 @@ registerProcessor("pcm-streaming-processor", PCMStreamingProcessor);
opts.sendLogs = "false";
}

const dictionaryPrompt = this.getCustomDictionaryPrompt();
if (dictionaryPrompt) opts.prompt = dictionaryPrompt;
const transcriptionPrompt = this.buildTranscriptionPrompt();
if (transcriptionPrompt) opts.prompt = transcriptionPrompt;

// Use withSessionRefresh to handle AUTH_EXPIRED automatically
const transcriptionStart = performance.now();
Expand Down Expand Up @@ -1342,28 +1361,28 @@ registerProcessor("pcm-streaming-processor", PCMStreamingProcessor);
formData.append("language", language);
}

// Add custom dictionary as prompt hint for cloud transcription
// Add custom dictionary + transcription prompt as prompt hint
// Groq Whisper API limits prompt to 896 chars; OpenAI ~900 chars.
// Truncate at last comma boundary so we never send a partial word.
const MAX_PROMPT_CHARS = provider === "groq" ? 896 : 900;
let dictionaryPrompt = this.getCustomDictionaryPrompt();
if (dictionaryPrompt) {
if (dictionaryPrompt.length > MAX_PROMPT_CHARS) {
const originalLength = dictionaryPrompt.length;
const truncated = dictionaryPrompt.slice(0, MAX_PROMPT_CHARS);
let transcriptionPrompt = this.buildTranscriptionPrompt();
if (transcriptionPrompt) {
if (transcriptionPrompt.length > MAX_PROMPT_CHARS) {
const originalLength = transcriptionPrompt.length;
const truncated = transcriptionPrompt.slice(0, MAX_PROMPT_CHARS);
const lastComma = truncated.lastIndexOf(",");
dictionaryPrompt = lastComma > 0 ? truncated.slice(0, lastComma) : truncated;
transcriptionPrompt = lastComma > 0 ? truncated.slice(0, lastComma) : truncated;
logger.debug(
"Custom dictionary prompt truncated",
"Transcription prompt truncated",
{
originalLength,
truncatedLength: dictionaryPrompt.length,
truncatedLength: transcriptionPrompt.length,
maxChars: MAX_PROMPT_CHARS,
},
"transcription"
);
}
formData.append("prompt", dictionaryPrompt);
formData.append("prompt", transcriptionPrompt);
}

const shouldStream = this.shouldStreamTranscription(model, provider);
Expand All @@ -1385,8 +1404,8 @@ registerProcessor("pcm-streaming-processor", PCMStreamingProcessor);
const audioBuffer = await optimizedAudio.arrayBuffer();
const proxyData = { audioBuffer, model, language };

if (dictionaryPrompt) {
const tokens = dictionaryPrompt
if (transcriptionPrompt) {
const tokens = transcriptionPrompt
.split(",")
.flatMap((entry) => entry.trim().split(/\s+/))
.filter(Boolean)
Expand Down
3 changes: 3 additions & 0 deletions src/hooks/useSettings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ export interface TranscriptionSettings {
cloudTranscriptionBaseUrl?: string;
cloudTranscriptionMode: string;
customDictionary: string[];
customTranscriptionPrompt: string;
assemblyAiStreaming: boolean;
}

Expand Down Expand Up @@ -179,8 +180,10 @@ function useSettingsInternal() {
cloudTranscriptionMode: store.cloudTranscriptionMode,
cloudReasoningMode: store.cloudReasoningMode,
customDictionary: store.customDictionary,
customTranscriptionPrompt: store.customTranscriptionPrompt,
assemblyAiStreaming: store.assemblyAiStreaming,
setAssemblyAiStreaming: store.setAssemblyAiStreaming,
setCustomTranscriptionPrompt: store.setCustomTranscriptionPrompt,
useReasoningModel: store.useReasoningModel,
reasoningModel: store.reasoningModel,
reasoningProvider: store.reasoningProvider,
Expand Down
6 changes: 6 additions & 0 deletions src/locales/de/translation.json
Original file line number Diff line number Diff line change
Expand Up @@ -1395,6 +1395,12 @@
"title": "Zu benutzerdefinierter Einrichtung gewechselt"
}
},
"transcriptionPrompt": {
"title": "Transkriptions-Prompt",
"description": "Whisper ahmt den Formatierungsstil dieses Prompts nach — verwenden Sie vielfältige Zeichensetzung (Kommas, Gedankenstriche, Anführungszeichen), um interpunktierten Text zu erhalten. Im \"auto\"-Modus beeinflusst die Prompt-Sprache auch die Spracherkennung. Teilt das Token-Budget mit dem benutzerdefinierten Wörterbuch — ein kürzerer Prompt lässt mehr Platz für Wörterbuch-Wörter.",
"placeholder": "Geben Sie einen gut interpunktierten Absatz ein, um den Transkriptionsstil zu steuern...",
"insertPreset": "Vorlage einfügen"
},
"gpuDevice": {
"title": "Transkriptions-GPU",
"description": "GPU für lokale Spracherkennung"
Expand Down
6 changes: 6 additions & 0 deletions src/locales/en/translation.json
Original file line number Diff line number Diff line change
Expand Up @@ -1443,6 +1443,12 @@
"title": "Switched to Custom Setup"
}
},
"transcriptionPrompt": {
"title": "Transcription Prompt",
"description": "Whisper mimics the formatting style of this prompt — include varied punctuation (commas, dashes, quotes) to get punctuated output. In \"auto\" mode, the prompt language also affects language detection. Shares a token budget with Custom Dictionary — a shorter prompt leaves more room for dictionary words.",
"placeholder": "Enter a well-punctuated paragraph to guide transcription style...",
"insertPreset": "Insert preset"
},
"gpuDevice": {
"title": "Transcription GPU",
"description": "GPU used for local speech-to-text"
Expand Down
6 changes: 6 additions & 0 deletions src/locales/es/translation.json
Original file line number Diff line number Diff line change
Expand Up @@ -1395,6 +1395,12 @@
"title": "Cambiado a configuración personalizada"
}
},
"transcriptionPrompt": {
"title": "Prompt de transcripción",
"description": "Whisper imita el estilo de formato de este prompt — incluye puntuación variada (comas, guiones, comillas) para obtener texto puntuado. En modo \"auto\", el idioma del prompt también afecta la detección de idioma. Comparte el presupuesto de tokens con el diccionario personalizado — un prompt más corto deja más espacio para las palabras del diccionario.",
"placeholder": "Escribe un párrafo bien puntuado para guiar el estilo de transcripción...",
"insertPreset": "Insertar preajuste"
},
"gpuDevice": {
"title": "GPU de transcripción",
"description": "GPU usada para transcripción local"
Expand Down
6 changes: 6 additions & 0 deletions src/locales/fr/translation.json
Original file line number Diff line number Diff line change
Expand Up @@ -1395,6 +1395,12 @@
"title": "Basculé vers la configuration personnalisée"
}
},
"transcriptionPrompt": {
"title": "Prompt de transcription",
"description": "Whisper imite le style de formatage de ce prompt — incluez une ponctuation variée (virgules, tirets, guillemets) pour obtenir un texte ponctué. En mode « auto », la langue du prompt influence aussi la détection de langue. Partage le budget de tokens avec le dictionnaire personnalisé — un prompt plus court laisse plus de place aux mots du dictionnaire.",
"placeholder": "Entrez un paragraphe bien ponctué pour guider le style de transcription...",
"insertPreset": "Insérer un modèle"
},
"gpuDevice": {
"title": "GPU de transcription",
"description": "GPU pour la transcription locale"
Expand Down
6 changes: 6 additions & 0 deletions src/locales/it/translation.json
Original file line number Diff line number Diff line change
Expand Up @@ -1395,6 +1395,12 @@
"title": "Passato a configurazione personalizzata"
}
},
"transcriptionPrompt": {
"title": "Prompt di trascrizione",
"description": "Whisper imita lo stile di formattazione di questo prompt — includi punteggiatura varia (virgole, trattini, virgolette) per ottenere testo punteggiato. In modalità \"auto\", la lingua del prompt influenza anche il rilevamento della lingua. Condivide il budget di token con il dizionario personalizzato — un prompt più corto lascia più spazio per le parole del dizionario.",
"placeholder": "Inserisci un paragrafo ben punteggiato per guidare lo stile di trascrizione...",
"insertPreset": "Inserisci modello"
},
"gpuDevice": {
"title": "GPU trascrizione",
"description": "GPU usata per la trascrizione locale"
Expand Down
6 changes: 6 additions & 0 deletions src/locales/ja/translation.json
Original file line number Diff line number Diff line change
Expand Up @@ -1395,6 +1395,12 @@
"title": "カスタム設定に切り替えました"
}
},
"transcriptionPrompt": {
"title": "文字起こしプロンプト",
"description": "Whisperはこのプロンプトの書式スタイルを模倣します — 多様な句読点(読点、ダッシュ、引用符)を含めると、句読点付きの出力が得られます。「auto」モードでは、プロンプトの言語が言語検出にも影響します。カスタム辞書とトークン予算を共有します — プロンプトを短くすると辞書の単語により多くの余裕が生まれます。",
"placeholder": "文字起こしスタイルをガイドする句読点付きの段落を入力...",
"insertPreset": "プリセットを挿入"
},
"gpuDevice": {
"title": "文字起こしGPU",
"description": "ローカル音声認識に使用するGPU"
Expand Down
Loading
Loading