diff --git a/server/utils.test.ts b/server/utils.test.ts index 61edc67..649ea5c 100644 --- a/server/utils.test.ts +++ b/server/utils.test.ts @@ -58,14 +58,37 @@ describe("charWidth", () => { expect(charWidth("๐Ÿผ")).toBe(2); }); - test("special ASCII art characters = 1", () => { - // Characters used in pet ASCII art - expect(charWidth("ยท")).toBe(1); // middle dot (beaver eyes) - expect(charWidth("โ—‰")).toBe(1); // bullseye (lion eyes) - expect(charWidth("โ€ข")).toBe(1); // bullet (golden/elephant eyes) - expect(charWidth("โ€“")).toBe(1); // en dash - expect(charWidth("โ€ฆ")).toBe(1); // ellipsis - expect(charWidth("ยฐ")).toBe(1); // degree (beaver slap) + test("special ASCII art characters = 1 on non-CJK locales", () => { + // Characters used in pet ASCII art. Pass cjk: false explicitly so the test + // is deterministic regardless of where the suite happens to run. + expect(charWidth("ยท", false)).toBe(1); // middle dot (beaver eyes) + expect(charWidth("โ—‰", false)).toBe(1); // bullseye (lion eyes) + expect(charWidth("โ€ข", false)).toBe(1); // bullet (golden/elephant eyes) + expect(charWidth("โ€“", false)).toBe(1); // en dash + expect(charWidth("โ€ฆ", false)).toBe(1); // ellipsis + expect(charWidth("ยฐ", false)).toBe(1); // degree (beaver slap) + }); + + test("Ambiguous-width art characters = 2 on CJK locales", () => { + // Same characters render double-wide on Chinese/Japanese/Korean Windows + // Terminal, iTerm2 in CJK locale, etc. Without this the right border of + // any padded box drifts on those terminals. + expect(charWidth("โ—‰", true)).toBe(2); // bullseye + expect(charWidth("โ€”", true)).toBe(2); // em dash + expect(charWidth("โœฆ", true)).toBe(2); // four-pointed star + expect(charWidth("โœง", true)).toBe(2); // four-pointed star outline + expect(charWidth("โ†’", true)).toBe(2); // rightwards arrow + expect(charWidth("โ˜…", true)).toBe(2); // black star + }); + + test("box-drawing stays 1 even on CJK locales", () => { + // Every mainstream terminal special-cases box drawings to 1-wide for TUI + // sanity, even under a CJK locale. Borders would double otherwise. + expect(charWidth("โ”€", true)).toBe(1); + expect(charWidth("โ”‚", true)).toBe(1); + expect(charWidth("โ•ญ", true)).toBe(1); + expect(charWidth("โ•ฎ", true)).toBe(1); + expect(charWidth("โ”œ", true)).toBe(1); }); test("braille blank = 1", () => { diff --git a/server/utils.ts b/server/utils.ts index 0d23931..19722b2 100644 --- a/server/utils.ts +++ b/server/utils.ts @@ -2,9 +2,49 @@ * Shared utility functions โ€” display width, padding, etc. */ -// โ”€โ”€โ”€ CJK display width (CJK = 2 cols, ASCII = 1) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +// โ”€โ”€โ”€ CJK locale detection โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +// +// On East Asian terminals (Chinese/Japanese/Korean Windows Terminal, iTerm2 in +// CJK locale, etc.) Unicode "Ambiguous" East Asian Width characters render as +// 2 columns instead of 1. The pet art uses a few of these (`โ—‰`, `โ€”`, `โœฆ`, โ€ฆ) +// so without locale awareness the right border of any padded box drifts on +// CJK terminals โ€” see issue #?. +// +// Detection order: +// 1. POSIX env vars (LC_ALL / LC_CTYPE / LANG / LANGUAGE) +// 2. Intl.DateTimeFormat resolved locale โ€” works on Windows where the env +// vars are usually unset but the system locale is e.g. "zh-CN". -export function charWidth(ch: string): number { +const CJK_LOCALE_RE = /^(zh|ja|ko)\b/i; + +function detectCjkLocale(): boolean { + for (const v of [ + process.env.LC_ALL, + process.env.LC_CTYPE, + process.env.LANG, + process.env.LANGUAGE, + ]) { + if (v && CJK_LOCALE_RE.test(v)) return true; + } + try { + const loc = Intl.DateTimeFormat().resolvedOptions().locale || ""; + if (CJK_LOCALE_RE.test(loc)) return true; + } catch { + // Some restricted runtimes throw โ€” fall through to false. + } + return false; +} + +export const IS_CJK_LOCALE = detectCjkLocale(); + +// โ”€โ”€โ”€ Display width โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +// +// CJK = 2 cols, ASCII = 1, with locale-aware handling of Ambiguous-width +// characters. Box-drawing chars (0x2500โ€“0x257F) are intentionally excluded +// from the Ambiguous-as-wide branch because every mainstream terminal +// special-cases them to 1 column for TUI sanity even under a CJK locale. + +export function charWidth(ch: string, cjk: boolean = IS_CJK_LOCALE): number { const code = ch.codePointAt(0) ?? 0; if (code < 32 || (code >= 0x7f && code < 0xa0)) return 0; if ( @@ -22,15 +62,27 @@ export function charWidth(ch: string): number { (code >= 0x1f300 && code <= 0x1f9ff) || (code >= 0x1fa00 && code <= 0x1faff) ) return 2; + if (cjk) { + if ( + (code >= 0x2010 && code <= 0x2027) || // General Punctuation: em/en dash, quotes + (code >= 0x2030 && code <= 0x205e) || // More General Punctuation + (code >= 0x2150 && code <= 0x218f) || // Number Forms + (code >= 0x2190 && code <= 0x21ff) || // Arrows + (code >= 0x2200 && code <= 0x22ff) || // Mathematical Operators + (code >= 0x2580 && code <= 0x25ff) || // Block Elements + Geometric Shapes (covers โ—‰) + (code >= 0x2600 && code <= 0x26ff) || // Misc Symbols + (code >= 0x2700 && code <= 0x27bf) // Dingbats (covers โœฆ โœง) + ) return 2; + } return 1; } -export function stringWidth(str: string): number { +export function stringWidth(str: string, cjk: boolean = IS_CJK_LOCALE): number { return Array.from(str.replace(/\x1b\[[0-9;]*m/g, "")) - .reduce((sum, ch) => sum + charWidth(ch), 0); + .reduce((sum, ch) => sum + charWidth(ch, cjk), 0); } -export function padDisplay(str: string, targetWidth: number): string { - const pad = Math.max(0, targetWidth - stringWidth(str)); +export function padDisplay(str: string, targetWidth: number, cjk: boolean = IS_CJK_LOCALE): string { + const pad = Math.max(0, targetWidth - stringWidth(str, cjk)); return str + " ".repeat(pad); }