OpenWhispr · egsok · Apr 5, 2026
diff --git a/src/components/SettingsPage.tsx b/src/components/SettingsPage.tsx
@@ -70,6 +70,12 @@ import LanguageSelector from "./ui/LanguageSelector";
 import { Skeleton } from "./ui/skeleton";
 import { Progress } from "./ui/progress";
 import { useToast } from "./ui/Toast";
+import {
+  DropdownMenu,
+  DropdownMenuTrigger,
+  DropdownMenuContent,
+  DropdownMenuItem,
+} from "./ui/dropdown-menu";
 import { useTheme } from "../hooks/useTheme";
 import type { GpuDevice, LocalTranscriptionProvider } from "../types/electron";
 import logger from "../utils/logger";
@@ -84,6 +90,72 @@ import { useSettingsStore } from "../stores/settingsStore";
 const formatAmount = (cents: number, currency: string) =>
   (cents / 100).toLocaleString(undefined, { style: "currency", currency });
 
+/** Estimate Whisper token count — CJK chars ≈ 2.2 tokens, Cyrillic ≈ 0.5, Latin ≈ 0.25 */
+function estimateTokens(text: string): number {
+  let tokens = 0;
+  for (const ch of text) {
+    const code = ch.codePointAt(0)!;
+    if (
+      (code >= 0x3000 && code <= 0x9fff) ||
+      (code >= 0xf900 && code <= 0xfaff) ||
+      (code >= 0xff00 && code <= 0xffef)
+    ) {
+      tokens += 2.2; // CJK ideographs
+    } else if (code >= 0x0400 && code <= 0x04ff) {
+      tokens += 0.5; // Cyrillic
+    } else {
+      tokens += 0.25; // Latin / other
+    }
+  }
+  return Math.round(tokens);
+}
+
+/** ~half of Whisper's 224-token initial_prompt window, leaving room for Custom Dictionary */
+const TOKEN_BUDGET = 112;
+
+const TRANSCRIPTION_PROMPT_PRESETS: Record<string, { label: string; prompt: string }> = {
+  en: {
+    label: "English",
+    prompt: 'Hello! How are you? He said: "Let\'s do this today — while we have time." Of course, it\'s not that simple.',
+  },
+  es: {
+    label: "Español",
+    prompt: '¡Hola! ¿Cómo estás? Él dijo: "Hagámoslo hoy — mientras tengamos tiempo." Claro, no es tan sencillo.',
+  },
+  fr: {
+    label: "Français",
+    prompt: 'Bonjour ! Comment allez-vous ? Il a dit : « Faisons-le aujourd\'hui — tant qu\'on a le temps. » Ce n\'est pas si simple.',
+  },
+  de: {
+    label: "Deutsch",
+    prompt: 'Hallo! Wie geht es Ihnen? Er sagte: „Machen wir es heute — solange wir Zeit haben." So einfach ist es nicht.',
+  },
+  pt: {
+    label: "Português",
+    prompt: 'Olá! Como você está? Ele disse: "Vamos fazer isso hoje — enquanto temos tempo." Não é tão simples.',
+  },
+  it: {
+    label: "Italiano",
+    prompt: 'Ciao! Come stai? Ha detto: "Facciamolo oggi — finché abbiamo tempo." Non è così semplice.',
+  },
+  ru: {
+    label: "Русский",
+    prompt: 'Привет! Как дела? Он сказал: «Сделаем это сегодня — пока есть время». Конечно, не всё так просто; нужно учесть погоду.',
+  },
+  ja: {
+    label: "日本語",
+    prompt: 'こんにちは！元気ですか？「今日やりましょう。」もちろん、簡単ではない。',
+  },
+  "zh-CN": {
+    label: "中文（简体）",
+    prompt: '你好！你怎么样？他说："今天就做吧。"当然，事情没那么简单。',
+  },
+  "zh-TW": {
+    label: "中文（繁體）",
+    prompt: '你好！你怎麼樣？他說：「今天就做吧。」當然，事情沒那麼簡單。',
+  },
+};
+
 export type SettingsSectionType =
   | "account"
   | "plansBilling"
@@ -183,6 +255,8 @@ interface TranscriptionSectionProps {
   setCustomTranscriptionApiKey: (key: string) => void;
   cloudTranscriptionBaseUrl?: string;
   setCloudTranscriptionBaseUrl: (url: string) => void;
+  customTranscriptionPrompt: string;
+  setCustomTranscriptionPrompt: (value: string) => void;
   toast: (opts: {
     title: string;
     description: string;
@@ -218,6 +292,8 @@ function TranscriptionSection({
   setCustomTranscriptionApiKey,
   cloudTranscriptionBaseUrl,
   setCloudTranscriptionBaseUrl,
+  customTranscriptionPrompt,
+  setCustomTranscriptionPrompt,
   toast,
 }: TranscriptionSectionProps) {
   const { t } = useTranslation();
@@ -398,6 +474,77 @@ function TranscriptionSection({
         />
       )}
       <GpuDeviceSelector purpose="transcription" />
+
+      {/* Transcription Prompt */}
+      <SectionHeader
+        title={t("settingsPage.transcription.transcriptionPrompt.title")}
+        description={t("settingsPage.transcription.transcriptionPrompt.description")}
+      />
+      <SettingsPanel>
+        <textarea
+          className="w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 resize-y"
+          rows={4}
+          value={customTranscriptionPrompt}
+          onChange={(e) => {
+            if (estimateTokens(e.target.value) <= TOKEN_BUDGET) {
+              setCustomTranscriptionPrompt(e.target.value);
+            }
+          }}
+          placeholder={t("settingsPage.transcription.transcriptionPrompt.placeholder")}
+        />
+        <div className="flex items-center justify-between mt-1.5">
+          <DropdownMenu>
+            <DropdownMenuTrigger asChild>
+              <Button variant="outline" size="sm">
+                {t("settingsPage.transcription.transcriptionPrompt.insertPreset")}
+              </Button>
+            </DropdownMenuTrigger>
+            <DropdownMenuContent>
+              {Object.entries(TRANSCRIPTION_PROMPT_PRESETS).map(([code, { label }]) => (
+                <DropdownMenuItem
+                  key={code}
+                  onClick={() =>
+                    setCustomTranscriptionPrompt(TRANSCRIPTION_PROMPT_PRESETS[code].prompt)
+                  }
+                >
+                  {label}
+                </DropdownMenuItem>
+              ))}
+            </DropdownMenuContent>
+          </DropdownMenu>
+          {(() => {
+            const pct = Math.min(
+              Math.round((estimateTokens(customTranscriptionPrompt) / TOKEN_BUDGET) * 100),
+              100,
+            );
+            return (
+              <div className="flex items-center gap-2 min-w-[120px]">
+                <div className="h-1.5 flex-1 rounded-full bg-muted overflow-hidden">
+                  <div
+                    className={cn(
+                      "h-full rounded-full transition-all",
+                      pct < 80
+                        ? "bg-muted-foreground/40"
+                        : pct < 95
+                          ? "bg-yellow-500"
+                          : "bg-destructive",
+                    )}
+                    style={{ width: `${pct}%` }}
+                  />
+                </div>
+                <span
+                  className={cn(
+                    "text-xs tabular-nums text-muted-foreground/70 w-8 text-right",
+                    pct >= 95 && "text-destructive",
+                  )}
+                >
+                  {pct}%
+                </span>
+              </div>
+            );
+          })()}
+        </div>
+      </SettingsPanel>
     </div>
   );
 }
@@ -787,6 +934,8 @@ export default function SettingsPage({ activeSection = "general" }: SettingsPage
     setDataRetentionEnabled,
     customDictionary,
     setCustomDictionary,
+    customTranscriptionPrompt,
+    setCustomTranscriptionPrompt,
     noteFilesEnabled,
     setNoteFilesEnabled,
     noteFilesPath,
@@ -3043,6 +3192,8 @@ EOF`,
             setCustomTranscriptionApiKey={setCustomTranscriptionApiKey}
             cloudTranscriptionBaseUrl={cloudTranscriptionBaseUrl}
             setCloudTranscriptionBaseUrl={setCloudTranscriptionBaseUrl}
+            customTranscriptionPrompt={customTranscriptionPrompt}
+            setCustomTranscriptionPrompt={setCustomTranscriptionPrompt}
             toast={toast}
           />
         );

diff --git a/src/helpers/audioManager.js b/src/helpers/audioManager.js
@@ -161,6 +161,25 @@ registerProcessor("pcm-streaming-processor", PCMStreamingProcessor);
     return words.length > 0 ? words.join(", ") : null;
   }
 
+  /**
+   * Build a combined transcription prompt: custom dictionary words + user's transcription prompt.
+   * @returns {string|null}
+   */
+  buildTranscriptionPrompt() {
+    const parts = [];
+
+    // Dictionary words FIRST — truncated first by Whisper's 224-token window
+    const dict = this.getCustomDictionaryPrompt();
+    if (dict) parts.push(dict);
+
+    // Custom prompt LAST — Whisper truncates initial_prompt from the LEFT (keeps rightmost tokens),
+    // so the custom prompt at the end survives truncation. See: whisper.cpp tokenize logic.
+    const customPrompt = (getSettings().customTranscriptionPrompt || "").trim().replace(/\s+/g, " ");
+    if (customPrompt) parts.push(customPrompt);
+
+    return parts.length > 0 ? parts.join(" ") : null;
+  }
+
   setCallbacks({
     onStateChange,
     onError,
@@ -585,10 +604,10 @@ registerProcessor("pcm-streaming-processor", PCMStreamingProcessor);
         options.language = language;
       }
 
-      // Add custom dictionary as initial prompt to help Whisper recognize specific words
-      const dictionaryPrompt = this.getCustomDictionaryPrompt();
-      if (dictionaryPrompt) {
-        options.initialPrompt = dictionaryPrompt;
+      // Add custom dictionary + transcription prompt as initial prompt
+      const transcriptionPrompt = this.buildTranscriptionPrompt();
+      if (transcriptionPrompt) {
+        options.initialPrompt = transcriptionPrompt;
       }
 
       logger.debug(
@@ -1182,8 +1201,8 @@ registerProcessor("pcm-streaming-processor", PCMStreamingProcessor);
       opts.sendLogs = "false";
     }
 
-    const dictionaryPrompt = this.getCustomDictionaryPrompt();
-    if (dictionaryPrompt) opts.prompt = dictionaryPrompt;
+    const transcriptionPrompt = this.buildTranscriptionPrompt();
+    if (transcriptionPrompt) opts.prompt = transcriptionPrompt;
 
     // Use withSessionRefresh to handle AUTH_EXPIRED automatically
     const transcriptionStart = performance.now();
@@ -1342,28 +1361,28 @@ registerProcessor("pcm-streaming-processor", PCMStreamingProcessor);
         formData.append("language", language);
       }
 
-      // Add custom dictionary as prompt hint for cloud transcription
+      // Add custom dictionary + transcription prompt as prompt hint
       // Groq Whisper API limits prompt to 896 chars; OpenAI ~900 chars.
       // Truncate at last comma boundary so we never send a partial word.
       const MAX_PROMPT_CHARS = provider === "groq" ? 896 : 900;
-      let dictionaryPrompt = this.getCustomDictionaryPrompt();
-      if (dictionaryPrompt) {
-        if (dictionaryPrompt.length > MAX_PROMPT_CHARS) {
-          const originalLength = dictionaryPrompt.length;
-          const truncated = dictionaryPrompt.slice(0, MAX_PROMPT_CHARS);
+      let transcriptionPrompt = this.buildTranscriptionPrompt();
+      if (transcriptionPrompt) {
+        if (transcriptionPrompt.length > MAX_PROMPT_CHARS) {
+          const originalLength = transcriptionPrompt.length;
+          const truncated = transcriptionPrompt.slice(0, MAX_PROMPT_CHARS);
           const lastComma = truncated.lastIndexOf(",");
-          dictionaryPrompt = lastComma > 0 ? truncated.slice(0, lastComma) : truncated;
+          transcriptionPrompt = lastComma > 0 ? truncated.slice(0, lastComma) : truncated;
           logger.debug(
-            "Custom dictionary prompt truncated",
+            "Transcription prompt truncated",
             {
               originalLength,
-              truncatedLength: dictionaryPrompt.length,
+              truncatedLength: transcriptionPrompt.length,
               maxChars: MAX_PROMPT_CHARS,
             },
             "transcription"
           );
         }
-        formData.append("prompt", dictionaryPrompt);
+        formData.append("prompt", transcriptionPrompt);
       }
 
       const shouldStream = this.shouldStreamTranscription(model, provider);
@@ -1385,8 +1404,8 @@ registerProcessor("pcm-streaming-processor", PCMStreamingProcessor);
         const audioBuffer = await optimizedAudio.arrayBuffer();
         const proxyData = { audioBuffer, model, language };
 
-        if (dictionaryPrompt) {
-          const tokens = dictionaryPrompt
+        if (transcriptionPrompt) {
+          const tokens = transcriptionPrompt
             .split(",")
             .flatMap((entry) => entry.trim().split(/\s+/))
             .filter(Boolean)

diff --git a/src/hooks/useSettings.ts b/src/hooks/useSettings.ts
@@ -19,6 +19,7 @@ export interface TranscriptionSettings {
   cloudTranscriptionBaseUrl?: string;
   cloudTranscriptionMode: string;
   customDictionary: string[];
+  customTranscriptionPrompt: string;
   assemblyAiStreaming: boolean;
 }
 
@@ -179,8 +180,10 @@ function useSettingsInternal() {
     cloudTranscriptionMode: store.cloudTranscriptionMode,
     cloudReasoningMode: store.cloudReasoningMode,
     customDictionary: store.customDictionary,
+    customTranscriptionPrompt: store.customTranscriptionPrompt,
     assemblyAiStreaming: store.assemblyAiStreaming,
     setAssemblyAiStreaming: store.setAssemblyAiStreaming,
+    setCustomTranscriptionPrompt: store.setCustomTranscriptionPrompt,
     useReasoningModel: store.useReasoningModel,
     reasoningModel: store.reasoningModel,
     reasoningProvider: store.reasoningProvider,

diff --git a/src/locales/de/translation.json b/src/locales/de/translation.json
@@ -1395,6 +1395,12 @@
           "title": "Zu benutzerdefinierter Einrichtung gewechselt"
         }
       },
+      "transcriptionPrompt": {
+        "title": "Transkriptions-Prompt",
+        "description": "Whisper ahmt den Formatierungsstil dieses Prompts nach — verwenden Sie vielfältige Zeichensetzung (Kommas, Gedankenstriche, Anführungszeichen), um interpunktierten Text zu erhalten. Im \"auto\"-Modus beeinflusst die Prompt-Sprache auch die Spracherkennung. Teilt das Token-Budget mit dem benutzerdefinierten Wörterbuch — ein kürzerer Prompt lässt mehr Platz für Wörterbuch-Wörter.",
+        "placeholder": "Geben Sie einen gut interpunktierten Absatz ein, um den Transkriptionsstil zu steuern...",
+        "insertPreset": "Vorlage einfügen"
+      },
       "gpuDevice": {
         "title": "Transkriptions-GPU",
         "description": "GPU für lokale Spracherkennung"

diff --git a/src/locales/en/translation.json b/src/locales/en/translation.json
@@ -1443,6 +1443,12 @@
           "title": "Switched to Custom Setup"
         }
       },
+      "transcriptionPrompt": {
+        "title": "Transcription Prompt",
+        "description": "Whisper mimics the formatting style of this prompt — include varied punctuation (commas, dashes, quotes) to get punctuated output. In \"auto\" mode, the prompt language also affects language detection. Shares a token budget with Custom Dictionary — a shorter prompt leaves more room for dictionary words.",
+        "placeholder": "Enter a well-punctuated paragraph to guide transcription style...",
+        "insertPreset": "Insert preset"
+      },
       "gpuDevice": {
         "title": "Transcription GPU",
         "description": "GPU used for local speech-to-text"

diff --git a/src/locales/es/translation.json b/src/locales/es/translation.json
@@ -1395,6 +1395,12 @@
           "title": "Cambiado a configuración personalizada"
         }
       },
+      "transcriptionPrompt": {
+        "title": "Prompt de transcripción",
+        "description": "Whisper imita el estilo de formato de este prompt — incluye puntuación variada (comas, guiones, comillas) para obtener texto puntuado. En modo \"auto\", el idioma del prompt también afecta la detección de idioma. Comparte el presupuesto de tokens con el diccionario personalizado — un prompt más corto deja más espacio para las palabras del diccionario.",
+        "placeholder": "Escribe un párrafo bien puntuado para guiar el estilo de transcripción...",
+        "insertPreset": "Insertar preajuste"
+      },
       "gpuDevice": {
         "title": "GPU de transcripción",
         "description": "GPU usada para transcripción local"

diff --git a/src/locales/fr/translation.json b/src/locales/fr/translation.json
@@ -1395,6 +1395,12 @@
           "title": "Basculé vers la configuration personnalisée"
         }
       },
+      "transcriptionPrompt": {
+        "title": "Prompt de transcription",
+        "description": "Whisper imite le style de formatage de ce prompt — incluez une ponctuation variée (virgules, tirets, guillemets) pour obtenir un texte ponctué. En mode « auto », la langue du prompt influence aussi la détection de langue. Partage le budget de tokens avec le dictionnaire personnalisé — un prompt plus court laisse plus de place aux mots du dictionnaire.",
+        "placeholder": "Entrez un paragraphe bien ponctué pour guider le style de transcription...",
+        "insertPreset": "Insérer un modèle"
+      },
       "gpuDevice": {
         "title": "GPU de transcription",
         "description": "GPU pour la transcription locale"

diff --git a/src/locales/it/translation.json b/src/locales/it/translation.json
@@ -1395,6 +1395,12 @@
           "title": "Passato a configurazione personalizzata"
         }
       },
+      "transcriptionPrompt": {
+        "title": "Prompt di trascrizione",
+        "description": "Whisper imita lo stile di formattazione di questo prompt — includi punteggiatura varia (virgole, trattini, virgolette) per ottenere testo punteggiato. In modalità \"auto\", la lingua del prompt influenza anche il rilevamento della lingua. Condivide il budget di token con il dizionario personalizzato — un prompt più corto lascia più spazio per le parole del dizionario.",
+        "placeholder": "Inserisci un paragrafo ben punteggiato per guidare lo stile di trascrizione...",
+        "insertPreset": "Inserisci modello"
+      },
       "gpuDevice": {
         "title": "GPU trascrizione",
         "description": "GPU usata per la trascrizione locale"

diff --git a/src/locales/ja/translation.json b/src/locales/ja/translation.json
@@ -1395,6 +1395,12 @@
           "title": "カスタム設定に切り替えました"
         }
       },
+      "transcriptionPrompt": {
+        "title": "文字起こしプロンプト",
+        "description": "Whisperはこのプロンプトの書式スタイルを模倣します — 多様な句読点（読点、ダッシュ、引用符）を含めると、句読点付きの出力が得られます。「auto」モードでは、プロンプトの言語が言語検出にも影響します。カスタム辞書とトークン予算を共有します — プロンプトを短くすると辞書の単語により多くの余裕が生まれます。",
+        "placeholder": "文字起こしスタイルをガイドする句読点付きの段落を入力...",
+        "insertPreset": "プリセットを挿入"
+      },
       "gpuDevice": {
         "title": "文字起こしGPU",
         "description": "ローカル音声認識に使用するGPU"