From 5382f8ecdbaaf5144ccca3b213260b9d1f315376 Mon Sep 17 00:00:00 2001 From: Egor Date: Sun, 5 Apr 2026 12:11:18 +0300 Subject: [PATCH] feat: add custom transcription prompt with token-aware budget MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add user-editable "Transcription Prompt" textarea in Settings → Transcription with dropdown presets for 10 languages. Whisper copies the formatting style of this prompt, so a well-punctuated paragraph nudges it to produce punctuated output. - Empty by default (avoids language bias in auto-detect mode) - "Insert preset" dropdown: en, es, fr, de, pt, it, ru, ja, zh-CN, zh-TW - Each preset uses native punctuation (Russian «ёлочки», German „Gänsefüßchen", etc.) - Token-aware budget with estimateTokens() heuristic (CJK ×2.2, Cyrillic ×0.5, Latin ×0.25) — progress bar replaces flat character limit - Budget capped at ~112 tokens (~half of Whisper's 224-token window), leaving room for Custom Dictionary - Dictionary words prepended automatically (truncated first by Whisper's 224-token window; left-truncation documented in code) - i18n: all 10 locales updated Co-Authored-By: Claude Opus 4.6 (1M context) --- src/components/SettingsPage.tsx | 151 +++++++++++++++++++++++++++++ src/helpers/audioManager.js | 55 +++++++---- src/hooks/useSettings.ts | 3 + src/locales/de/translation.json | 6 ++ src/locales/en/translation.json | 6 ++ src/locales/es/translation.json | 6 ++ src/locales/fr/translation.json | 6 ++ src/locales/it/translation.json | 6 ++ src/locales/ja/translation.json | 6 ++ src/locales/pt/translation.json | 6 ++ src/locales/ru/translation.json | 6 ++ src/locales/zh-CN/translation.json | 6 ++ src/locales/zh-TW/translation.json | 6 ++ src/stores/settingsStore.ts | 2 + 14 files changed, 253 insertions(+), 18 deletions(-) diff --git a/src/components/SettingsPage.tsx b/src/components/SettingsPage.tsx index 226621e35..2607cc7b9 100644 --- a/src/components/SettingsPage.tsx +++ b/src/components/SettingsPage.tsx @@ -70,6 +70,12 @@ import LanguageSelector from "./ui/LanguageSelector"; import { Skeleton } from "./ui/skeleton"; import { Progress } from "./ui/progress"; import { useToast } from "./ui/Toast"; +import { + DropdownMenu, + DropdownMenuTrigger, + DropdownMenuContent, + DropdownMenuItem, +} from "./ui/dropdown-menu"; import { useTheme } from "../hooks/useTheme"; import type { GpuDevice, LocalTranscriptionProvider } from "../types/electron"; import logger from "../utils/logger"; @@ -84,6 +90,72 @@ import { useSettingsStore } from "../stores/settingsStore"; const formatAmount = (cents: number, currency: string) => (cents / 100).toLocaleString(undefined, { style: "currency", currency }); +/** Estimate Whisper token count — CJK chars ≈ 2.2 tokens, Cyrillic ≈ 0.5, Latin ≈ 0.25 */ +function estimateTokens(text: string): number { + let tokens = 0; + for (const ch of text) { + const code = ch.codePointAt(0)!; + if ( + (code >= 0x3000 && code <= 0x9fff) || + (code >= 0xf900 && code <= 0xfaff) || + (code >= 0xff00 && code <= 0xffef) + ) { + tokens += 2.2; // CJK ideographs + } else if (code >= 0x0400 && code <= 0x04ff) { + tokens += 0.5; // Cyrillic + } else { + tokens += 0.25; // Latin / other + } + } + return Math.round(tokens); +} + +/** ~half of Whisper's 224-token initial_prompt window, leaving room for Custom Dictionary */ +const TOKEN_BUDGET = 112; + +const TRANSCRIPTION_PROMPT_PRESETS: Record = { + en: { + label: "English", + prompt: 'Hello! How are you? He said: "Let\'s do this today — while we have time." Of course, it\'s not that simple.', + }, + es: { + label: "Español", + prompt: '¡Hola! ¿Cómo estás? Él dijo: "Hagámoslo hoy — mientras tengamos tiempo." Claro, no es tan sencillo.', + }, + fr: { + label: "Français", + prompt: 'Bonjour ! Comment allez-vous ? Il a dit : « Faisons-le aujourd\'hui — tant qu\'on a le temps. » Ce n\'est pas si simple.', + }, + de: { + label: "Deutsch", + prompt: 'Hallo! Wie geht es Ihnen? Er sagte: „Machen wir es heute — solange wir Zeit haben." So einfach ist es nicht.', + }, + pt: { + label: "Português", + prompt: 'Olá! Como você está? Ele disse: "Vamos fazer isso hoje — enquanto temos tempo." Não é tão simples.', + }, + it: { + label: "Italiano", + prompt: 'Ciao! Come stai? Ha detto: "Facciamolo oggi — finché abbiamo tempo." Non è così semplice.', + }, + ru: { + label: "Русский", + prompt: 'Привет! Как дела? Он сказал: «Сделаем это сегодня — пока есть время». Конечно, не всё так просто; нужно учесть погоду.', + }, + ja: { + label: "日本語", + prompt: 'こんにちは!元気ですか?「今日やりましょう。」もちろん、簡単ではない。', + }, + "zh-CN": { + label: "中文(简体)", + prompt: '你好!你怎么样?他说:"今天就做吧。"当然,事情没那么简单。', + }, + "zh-TW": { + label: "中文(繁體)", + prompt: '你好!你怎麼樣?他說:「今天就做吧。」當然,事情沒那麼簡單。', + }, +}; + export type SettingsSectionType = | "account" | "plansBilling" @@ -183,6 +255,8 @@ interface TranscriptionSectionProps { setCustomTranscriptionApiKey: (key: string) => void; cloudTranscriptionBaseUrl?: string; setCloudTranscriptionBaseUrl: (url: string) => void; + customTranscriptionPrompt: string; + setCustomTranscriptionPrompt: (value: string) => void; toast: (opts: { title: string; description: string; @@ -218,6 +292,8 @@ function TranscriptionSection({ setCustomTranscriptionApiKey, cloudTranscriptionBaseUrl, setCloudTranscriptionBaseUrl, + customTranscriptionPrompt, + setCustomTranscriptionPrompt, toast, }: TranscriptionSectionProps) { const { t } = useTranslation(); @@ -398,6 +474,77 @@ function TranscriptionSection({ /> )} + + {/* Transcription Prompt */} + + +