diff --git a/src/voice-typing/VoiceTypingApp.tsx b/src/voice-typing/VoiceTypingApp.tsx index e725970..24b15a6 100644 --- a/src/voice-typing/VoiceTypingApp.tsx +++ b/src/voice-typing/VoiceTypingApp.tsx @@ -71,6 +71,9 @@ export const VoiceTypingApp = () => { const finalsRef = useRef>(new Map()); const interimRef = useRef(""); + // Cache the Simplified→Traditional conversion per final segment so a long + // dictation doesn't re-convert the whole transcript on every incoming token. + const convertedRef = useRef>(new Map()); // Stable per-position keys for the waveform bars (values shift, positions don't). const barKeys = useRef(Array.from({ length: BAR_COUNT }, (_, i) => `bar-${i}`)); @@ -91,15 +94,27 @@ export const VoiceTypingApp = () => { }, []); // Recompute display text from the raw refs, convert, and publish to the host. + // Final segments are converted once and cached; only the live interim tail is + // converted every token, so cost stays flat no matter how long the dictation. const publish = useRef(async () => {}); publish.current = async () => { - const ordered = [...finalsRef.current.entries()] - .sort((a, b) => idIndex(a[0]) - idIndex(b[0])) - .map(([, v]) => v); - const raw = ordered.join("") + interimRef.current; - const converted = (await toTraditional(raw)).trim(); - setText(converted); - emit("voicetyping://text", { text: converted }).catch(() => {}); + const entries = [...finalsRef.current.entries()].sort((a, b) => idIndex(a[0]) - idIndex(b[0])); + let finals = ""; + for (const [id, raw] of entries) { + const cached = convertedRef.current.get(id); + let conv: string; + if (cached && cached.raw === raw) { + conv = cached.conv; + } else { + conv = await toTraditional(raw); + convertedRef.current.set(id, { raw, conv }); + } + finals += conv; + } + const interim = interimRef.current ? await toTraditional(interimRef.current) : ""; + const full = (finals + interim).trim(); + setText(full); + emit("voicetyping://text", { text: full }).catch(() => {}); }; useEffect(() => { @@ -130,6 +145,7 @@ export const VoiceTypingApp = () => { const { phase: p, message } = e.payload; if (p === "start") { finalsRef.current.clear(); + convertedRef.current.clear(); interimRef.current = ""; setText(""); setError(null); @@ -172,11 +188,14 @@ export const VoiceTypingApp = () => { text) for high contrast against whatever's behind the overlay. */} {bubble && (
- {bubble} + {/* Bottom-anchored + clipped: the newest words stay visible while a + long dictation scrolls older lines off the top, so the preview + never outgrows the fixed overlay window. */} + {bubble}
)}