From bcd0e3d51723af6d497a0aa038edb6ab81ea4aa9 Mon Sep 17 00:00:00 2001 From: nanami-he <270413913+nanami-he@users.noreply.github.com> Date: Wed, 29 Apr 2026 19:56:18 +0900 Subject: [PATCH] fix(server): CJK-aware charWidth so pet boxes align on Chinese/JP/KR terminals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pet cards use box-drawing borders with content padded by stringWidth(). A handful of art characters — `◉` (U+25C9), `—` (U+2014), `✦/✧`, `★`, `Λ`, `×` — fall in Unicode's "Ambiguous East Asian Width" class. On CJK-locale terminals (Chinese/Japanese/Korean Windows Terminal, iTerm2 under zh/ja/ko, etc.) those render as 2 columns, but charWidth() was returning 1 — so padding under-counted and the right `│` drifted past the top `╮`. Reproduced visually on Win11 with system code page 936. Detection order: 1. POSIX env vars (LC_ALL / LC_CTYPE / LANG / LANGUAGE) — covers Mac and Linux. 2. Intl.DateTimeFormat resolved locale — covers Windows where the env vars are usually unset but the system locale is e.g. "zh-CN". When the locale resolves to zh/ja/ko, charWidth() additionally treats General Punctuation, Geometric Shapes, Misc Symbols, Dingbats, Arrows, and Math Operators as 2 cols. Box-drawing (0x2500–0x257F) is intentionally excluded because every mainstream terminal special-cases those to 1-wide for TUI sanity even under a CJK locale — including them would double the border width. charWidth/stringWidth/padDisplay now take an optional explicit `cjk` parameter (defaulting to the auto-detected locale) so tests are deterministic regardless of where the suite happens to run. Surfaced via /pet browse on Win11 zh-CN — Owl/Labrador/Lion/etc. cards visibly broken in user report. --- server/utils.test.ts | 39 +++++++++++++++++++++------ server/utils.ts | 64 +++++++++++++++++++++++++++++++++++++++----- 2 files changed, 89 insertions(+), 14 deletions(-) diff --git a/server/utils.test.ts b/server/utils.test.ts index 61edc67..649ea5c 100644 --- a/server/utils.test.ts +++ b/server/utils.test.ts @@ -58,14 +58,37 @@ describe("charWidth", () => { expect(charWidth("🐼")).toBe(2); }); - test("special ASCII art characters = 1", () => { - // Characters used in pet ASCII art - expect(charWidth("·")).toBe(1); // middle dot (beaver eyes) - expect(charWidth("◉")).toBe(1); // bullseye (lion eyes) - expect(charWidth("•")).toBe(1); // bullet (golden/elephant eyes) - expect(charWidth("–")).toBe(1); // en dash - expect(charWidth("…")).toBe(1); // ellipsis - expect(charWidth("°")).toBe(1); // degree (beaver slap) + test("special ASCII art characters = 1 on non-CJK locales", () => { + // Characters used in pet ASCII art. Pass cjk: false explicitly so the test + // is deterministic regardless of where the suite happens to run. + expect(charWidth("·", false)).toBe(1); // middle dot (beaver eyes) + expect(charWidth("◉", false)).toBe(1); // bullseye (lion eyes) + expect(charWidth("•", false)).toBe(1); // bullet (golden/elephant eyes) + expect(charWidth("–", false)).toBe(1); // en dash + expect(charWidth("…", false)).toBe(1); // ellipsis + expect(charWidth("°", false)).toBe(1); // degree (beaver slap) + }); + + test("Ambiguous-width art characters = 2 on CJK locales", () => { + // Same characters render double-wide on Chinese/Japanese/Korean Windows + // Terminal, iTerm2 in CJK locale, etc. Without this the right border of + // any padded box drifts on those terminals. + expect(charWidth("◉", true)).toBe(2); // bullseye + expect(charWidth("—", true)).toBe(2); // em dash + expect(charWidth("✦", true)).toBe(2); // four-pointed star + expect(charWidth("✧", true)).toBe(2); // four-pointed star outline + expect(charWidth("→", true)).toBe(2); // rightwards arrow + expect(charWidth("★", true)).toBe(2); // black star + }); + + test("box-drawing stays 1 even on CJK locales", () => { + // Every mainstream terminal special-cases box drawings to 1-wide for TUI + // sanity, even under a CJK locale. Borders would double otherwise. + expect(charWidth("─", true)).toBe(1); + expect(charWidth("│", true)).toBe(1); + expect(charWidth("╭", true)).toBe(1); + expect(charWidth("╮", true)).toBe(1); + expect(charWidth("├", true)).toBe(1); }); test("braille blank = 1", () => { diff --git a/server/utils.ts b/server/utils.ts index 0d23931..19722b2 100644 --- a/server/utils.ts +++ b/server/utils.ts @@ -2,9 +2,49 @@ * Shared utility functions — display width, padding, etc. */ -// ─── CJK display width (CJK = 2 cols, ASCII = 1) ───────────────────────── +// ─── CJK locale detection ───────────────────────────────────────────────── +// +// On East Asian terminals (Chinese/Japanese/Korean Windows Terminal, iTerm2 in +// CJK locale, etc.) Unicode "Ambiguous" East Asian Width characters render as +// 2 columns instead of 1. The pet art uses a few of these (`◉`, `—`, `✦`, …) +// so without locale awareness the right border of any padded box drifts on +// CJK terminals — see issue #?. +// +// Detection order: +// 1. POSIX env vars (LC_ALL / LC_CTYPE / LANG / LANGUAGE) +// 2. Intl.DateTimeFormat resolved locale — works on Windows where the env +// vars are usually unset but the system locale is e.g. "zh-CN". -export function charWidth(ch: string): number { +const CJK_LOCALE_RE = /^(zh|ja|ko)\b/i; + +function detectCjkLocale(): boolean { + for (const v of [ + process.env.LC_ALL, + process.env.LC_CTYPE, + process.env.LANG, + process.env.LANGUAGE, + ]) { + if (v && CJK_LOCALE_RE.test(v)) return true; + } + try { + const loc = Intl.DateTimeFormat().resolvedOptions().locale || ""; + if (CJK_LOCALE_RE.test(loc)) return true; + } catch { + // Some restricted runtimes throw — fall through to false. + } + return false; +} + +export const IS_CJK_LOCALE = detectCjkLocale(); + +// ─── Display width ──────────────────────────────────────────────────────── +// +// CJK = 2 cols, ASCII = 1, with locale-aware handling of Ambiguous-width +// characters. Box-drawing chars (0x2500–0x257F) are intentionally excluded +// from the Ambiguous-as-wide branch because every mainstream terminal +// special-cases them to 1 column for TUI sanity even under a CJK locale. + +export function charWidth(ch: string, cjk: boolean = IS_CJK_LOCALE): number { const code = ch.codePointAt(0) ?? 0; if (code < 32 || (code >= 0x7f && code < 0xa0)) return 0; if ( @@ -22,15 +62,27 @@ export function charWidth(ch: string): number { (code >= 0x1f300 && code <= 0x1f9ff) || (code >= 0x1fa00 && code <= 0x1faff) ) return 2; + if (cjk) { + if ( + (code >= 0x2010 && code <= 0x2027) || // General Punctuation: em/en dash, quotes + (code >= 0x2030 && code <= 0x205e) || // More General Punctuation + (code >= 0x2150 && code <= 0x218f) || // Number Forms + (code >= 0x2190 && code <= 0x21ff) || // Arrows + (code >= 0x2200 && code <= 0x22ff) || // Mathematical Operators + (code >= 0x2580 && code <= 0x25ff) || // Block Elements + Geometric Shapes (covers ◉) + (code >= 0x2600 && code <= 0x26ff) || // Misc Symbols + (code >= 0x2700 && code <= 0x27bf) // Dingbats (covers ✦ ✧) + ) return 2; + } return 1; } -export function stringWidth(str: string): number { +export function stringWidth(str: string, cjk: boolean = IS_CJK_LOCALE): number { return Array.from(str.replace(/\x1b\[[0-9;]*m/g, "")) - .reduce((sum, ch) => sum + charWidth(ch), 0); + .reduce((sum, ch) => sum + charWidth(ch, cjk), 0); } -export function padDisplay(str: string, targetWidth: number): string { - const pad = Math.max(0, targetWidth - stringWidth(str)); +export function padDisplay(str: string, targetWidth: number, cjk: boolean = IS_CJK_LOCALE): string { + const pad = Math.max(0, targetWidth - stringWidth(str, cjk)); return str + " ".repeat(pad); }