Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 23 additions & 5 deletions bin/update-tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1246,12 +1246,17 @@ def parse_wchar_codepoint(wchar: str) -> int:

@dataclass(frozen=True)
class TerminalOverrides:
"""Per-terminal codepoint override ranges for narrower/wider measurements."""
"""Per-terminal codepoint override ranges for narrower/wider/zeroer measurements."""
narrower: list[tuple[str, str, str]]
wider: list[tuple[str, str, str]]
zeroer: list[tuple[str, str, str]]
narrow_zeroer: list[tuple[str, str, str]]

def items(self) -> list[tuple[str, list[tuple[str, str, str]]]]:
return [('narrower', self.narrower), ('wider', self.wider)]
return [('narrower', self.narrower),
('wider', self.wider),
('zeroer', self.zeroer),
('narrow_zeroer', self.narrow_zeroer)]


@dataclass(frozen=True)
Expand All @@ -1269,7 +1274,9 @@ def dedup_override_table(
terminal_refs: dict[str, str] = {}
for term_name, overrides in table.items():
key = (tuple(overrides.narrower),
tuple(overrides.wider))
tuple(overrides.wider),
tuple(overrides.zeroer),
tuple(overrides.narrow_zeroer))
hash_key = hashlib.sha256(repr(key).encode()).hexdigest()[:8]
if hash_key not in shared_sets:
shared_sets[hash_key] = overrides
Expand Down Expand Up @@ -1405,11 +1412,14 @@ def make_single_override(

Returns a dict mapping canonical_name to 'TerminalOverrides'.

- 'zeroer' means terminal measured 0, but wcwidth measured 2,
- 'narrower' means terminal measured 1, but wcwidth measured 2,
- 'wider' means terminal measured 2, but wcwidth measured 1.
"""
zeroer: dict[str, set[int]] = {}
narrower: dict[str, set[int]] = {}
wider: dict[str, set[int]] = {}
narrow_zeroer_map: dict[str, set[int]] = {}

for _, canonical, doc in load_ucs_detect_yaml():
test_results = doc.get('test_results', {})
Expand All @@ -1420,7 +1430,11 @@ def make_single_override(
ucs = parse_wchar_codepoint(wchar)
term_w = entry['measured_by_terminal']
wc_w = entry['measured_by_wcwidth']
if term_w == 1 and wc_w == 2:
if term_w == 0 and wc_w == 2:
zeroer.setdefault(canonical, set()).add(ucs)
elif term_w == 0 and wc_w == 1 and category == 'narrow_results':
narrow_zeroer_map.setdefault(canonical, set()).add(ucs)
elif term_w == 1 and wc_w == 2:
narrower.setdefault(canonical, set()).add(ucs)
# 'wider' entries in emoji_vs16_results are from the vs16n baseline test
# (base character measured without VS16, expected width 1). Kitty rendering
Expand All @@ -1431,11 +1445,14 @@ def make_single_override(
wider.setdefault(canonical, set()).add(ucs)

result: dict[str, TerminalOverrides] = {}
all_names = sorted(set(narrower.keys()) | set(wider.keys()))
all_names = sorted(set(zeroer.keys()) | set(narrower.keys()) | set(wider.keys())
| set(narrow_zeroer_map.keys()))
for name in all_names:
result[name] = TerminalOverrides(
narrower=values_to_hex_ranges(narrower.get(name, set())),
wider=values_to_hex_ranges(wider.get(name, set())),
zeroer=values_to_hex_ranges(zeroer.get(name, set())),
narrow_zeroer=values_to_hex_ranges(narrow_zeroer_map.get(name, set())),
)
result = {
name: data for name, data in result.items()
Expand Down Expand Up @@ -1809,6 +1826,7 @@ def get_codegen_definitions() -> Iterator[RenderDefinition]:
_make_merged_category('SFZ_OVERRIDES', make_single_override('sfz_results', kt)),
_make_merged_category('VS16_OVERRIDES', make_single_override('emoji_vs16_results', kt)),
_make_merged_category('VS15_OVERRIDES', make_single_override('emoji_vs15_results', kt)),
_make_merged_category('NARROW_OVERRIDES', make_single_override('narrow_results', kt)),
])
yield from fetch_override_grapheme_data(kt)
yield TermProgramTableRenderDef.new()
Expand Down
2 changes: 1 addition & 1 deletion code_templates/grapheme_override_per_terminal.py.j2
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Terminals: {{ terminals|join(', ') }}
"""
# pylint: skip-file
GRAPHEMES = {
{%- for grapheme_str, terminal_width in graphemes.items() %}
{%- for grapheme_str, terminal_width in graphemes | dictsort %}
'{{ grapheme_str }}': {{ terminal_width }},
{%- endfor %}
}
2 changes: 1 addition & 1 deletion code_templates/grapheme_registry.py.j2
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ This code generated by python wcwidth using ucs-detect project data.
"""
# pylint: skip-file
_REGISTRY = {
{%- for terminal, hash_key in registry.items() %}
{%- for terminal, hash_key in registry | dictsort %}
{{ terminal.__repr__() }}: {{ hash_key.__repr__() }},
{%- endfor %}
}
Expand Down
11 changes: 7 additions & 4 deletions code_templates/table_overrides.py.j2
Original file line number Diff line number Diff line change
Expand Up @@ -4,30 +4,33 @@ Terminal override tables keyed by canonical terminal software name.
This code generated by python wcwidth using ucs-detect project data.
"""
# pylint: skip-file
from __future__ import annotations
{%- for category in categories %}
{%- set varname = category.variable_name -%}
{%- set shared_sets = category.shared_sets -%}
{%- set terminal_refs = category.terminal_refs -%}
{%- set set_terminals = category.set_terminals -%}
{%- if shared_sets %}
{%- for hash_key, overrides in shared_sets.items() %}
{%- for hash_key, overrides in shared_sets | dictsort %}
{%- set terminals = set_terminals.get(hash_key, ()) %}

# For terminals: {{ terminals | join(', ') }}
_SET_{{ varname }}_{{ hash_key | upper }} = {
_SET_{{ varname }}_{{ hash_key | upper }}: dict[str, tuple[tuple[int, int], ...]] = {
{%- for direction, ranges in overrides.items() %}
{%- if ranges %}
'{{ direction }}': (
{%- for hex_start, hex_end, txt_description in ranges %}
({{ hex_start }}, {{ hex_end }},), # {{ txt_description }}
{%- endfor %}
),
{%- endif %}
{%- endfor %}
}
{%- endfor %}
{%- endif %}

{{ varname }} = {
{%- for term_name, hash_key in terminal_refs.items() %}
{{ varname }}: dict[str, dict[str, tuple[tuple[int, int], ...]]] = {
{%- for term_name, hash_key in terminal_refs | dictsort %}
'{{ term_name }}': _SET_{{ varname }}_{{ hash_key | upper }},
{%- endfor %}
}
Expand Down
16 changes: 11 additions & 5 deletions docs/intro.rst
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ See also:
- `Grapheme Clusters and Terminal Emulators`_
- `terminal-unicode-core.tex`_
- `State of Terminal Emulators in 2025`_
- `Report of terminals supporting Graphemes (2027)`_
- `Perfecting Terminal Character Width Using Correction Tables (2026)`_

The `jquast/ucs-detect`_ project publishes automatic results of compliance to our standard for Wide
character, Languages, grapheme clustering, complex or combining scripts, emojis, zero-width joiner,
Expand Down Expand Up @@ -417,9 +417,10 @@ possible timeout, slow network, or non-response when working with "dumb terminal
Corrections
-----------

Corrections are automatically applied depending on detected or given terminal software name
Corrections may be automatically applied depending on the detected or given terminal software name
beginning with wcwidth release 0.8.0. This allows to correct widths for terminal software that
differs from the standard. These corrections are sourced from the `jquast/ucs-detect`_ project.
differs from the python wcwidth standard_. These corrections are sourced from the
`jquast/ucs-detect`_ project.

The ``term_program`` parameter is available on all width-measuring functions: `wcstwidth()`_,
`width()`_, `ljust()`_, `rjust()`_, `center()`_, `wrap()`_, and `clip()`_.
Expand Down Expand Up @@ -653,10 +654,14 @@ languages:
History
=======

0.8.1 *2026-06-08*
* **Improved** `wcstwidth()`_ with new ``zeroer``, ``narrow_wider``, and ``narrow_zeroer``
Corrections_. `PR #226`_

0.8.0 *2026-06-05*
* **New** support for Variation Selector 15 Emojis as narrow, `Issue #211`_.
* **New** argument, ``term_program`` for `wcstwidth()`_, `width()`_, `clip()`_, `wrap()`_,
`ljust()`_, `rjust()`_, and `center()`_. ``False`` disables corrections; ``True``
`ljust()`_, `rjust()`_, and `center()`_. ``False`` disables Corrections_; ``True``
auto-detects by ``TERM_PROGRAM`` or ``TERM``; string values accept canonical names matching
`list_term_programs()`_. `wcstwidth()`_ defaults to ``True``; all other functions
default to ``False``.
Expand Down Expand Up @@ -901,6 +906,7 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c::
.. _`PR #221`: https://github.com/jquast/wcwidth/pull/221
.. _`PR #223`: https://github.com/jquast/wcwidth/pull/223
.. _`PR #224`: https://github.com/jquast/wcwidth/pull/224
.. _`PR #226`: https://github.com/jquast/wcwidth/pull/226
.. _`Issue #101`: https://github.com/jquast/wcwidth/issues/101
.. _`Issue #155`: https://github.com/jquast/wcwidth/issues/155
.. _`Issue #190`: https://github.com/jquast/wcwidth/issues/190
Expand Down Expand Up @@ -980,7 +986,7 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c::
.. _`Grapheme Clusters and Terminal Emulators`: https://mitchellh.com/writing/grapheme-clusters-in-terminals
.. _`terminal-unicode-core.tex`: https://github.com/contour-terminal/terminal-unicode-core/blob/master/spec/terminal-unicode-core.tex
.. _`State of Terminal Emulators in 2025`: https://www.jeffquast.com/post/state-of-terminal-emulation-2025/
.. _`Report of terminals supporting Graphemes (2027)`: https://ucs-detect.readthedocs.io/results.html#terminal-features
.. _`Perfecting Terminal Character Width Using Correction Tables`: https://www.jeffquast.com/post/perfecting-terminal-character-width-using-correction-tables/
.. _XTVERSION: https://vtdn.dev/docs/dcs/xtversion/
.. _ENQ: https://documentation.help/PuTTY/config-answerback.html
.. _detectable: https://ucs-detect.readthedocs.io/results.html#terminal-identification
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ requires = [ "hatchling" ]

[project]
name = "wcwidth"
version = "0.8.0" # don't forget to also update wcwidth/__init__.py:__version__
version = "0.8.2" # don't forget to also update wcwidth/__init__.py:__version__
description = "Measures the displayed width of unicode strings in a terminal"
readme = "README.rst"
keywords = [
Expand Down
99 changes: 98 additions & 1 deletion tests/test_term_overrides.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@
# local
import wcwidth
import wcwidth.table_grapheme_overrides as grapheme_overrides
from wcwidth._constants import _merge_ranges, resolve_terminal, list_term_programs
from wcwidth._constants import (_EMPTY_OVERRIDES,
_merge_ranges,
resolve_terminal,
get_term_overrides,
list_term_programs)
from wcwidth.table_overrides import VS15_OVERRIDES


Expand Down Expand Up @@ -362,6 +366,99 @@ def test_sfz_override_foot():
assert wcwidth.wcstwidth('\U0001F3FB', term_program='foot') == 1


@pytest.mark.parametrize('term_program,expected', [
('kitty', 0),
('bobcat', 0),
(False, 2),
])
def test_sfz_zeroer(term_program, expected):
"""Standalone Fitzpatrick modifiers zeroed per terminal."""
assert wcwidth.wcswidth('\U0001F3FB') == 2
assert wcwidth.wcstwidth('\U0001F3FB', term_program=term_program) == expected


@pytest.mark.parametrize('kwargs,expected', [
({}, 0),
({'control_codes': 'ignore'}, 0),
])
def test_width_zeroer(kwargs, expected):
"""Width() zeroes standalone Fitzpatrick modifiers for kitty."""
assert wcwidth.width('\U0001F3FB', term_program='kitty', **kwargs) == expected


def test_empty_overrides_includes_zeroer():
"""_EMPTY_OVERRIDES has six empty tuple fields."""
assert _EMPTY_OVERRIDES.narrower == ()
assert _EMPTY_OVERRIDES.vs16_narrower == ()
assert _EMPTY_OVERRIDES.vs15_wider == ()
assert _EMPTY_OVERRIDES.zeroer == ()
assert _EMPTY_OVERRIDES.narrow_wider == ()
assert _EMPTY_OVERRIDES.narrow_zeroer == ()


def test_get_term_overrides_returns_empty_when_no_overrides():
"""get_term_overrides returns _EMPTY_OVERRIDES when terminal has no override data."""
get_term_overrides.cache_clear()
overrides = get_term_overrides('no-such-terminal')
assert overrides is _EMPTY_OVERRIDES


def test_get_term_overrides_reads_narrow_zeroer_key():
"""get_term_overrides reads 'narrow_zeroer' key from NARROW_OVERRIDES."""
get_term_overrides.cache_clear()
overrides = get_term_overrides('kitty')
assert len(overrides.narrow_zeroer) == 9
assert overrides.narrow_zeroer[0] == (0x00AD, 0x00AD)


def test_get_term_overrides_narrow_wider_still_empty():
"""get_term_overrides narrow_wider is empty when no 'wider' entries exist."""
get_term_overrides.cache_clear()
overrides = get_term_overrides('konsole')
assert overrides.narrow_wider == ()


@pytest.mark.parametrize('codepoint', [
'\u00ad',
'\u0600',
'\u0605',
'\u06dd',
'\u070f',
'\u0890',
'\u0891',
'\u08e2',
'\U000110bd',
'\U000110cd',
])
def test_narrow_zeroer_cf_codepoints(codepoint):
"""Cf format characters are zeroed by kitty/konsole/wezterm narrow_zeroer."""
assert wcwidth.wcswidth(codepoint) == 1
assert wcwidth.wcstwidth(codepoint, term_program='kitty') == 0
assert wcwidth.wcstwidth(codepoint, term_program='konsole') == 0
assert wcwidth.wcstwidth(codepoint, term_program='wezterm') == 0


@pytest.mark.parametrize('func', [wcwidth.wcstwidth, wcwidth.width])
def test_narrow_zeroer_not_applied_for_other_terminals(func):
"""Terminals without narrow overrides keep width 1 for Cf characters."""
assert func('\u0600', term_program='xterm') == 1
assert func('\u0600', term_program='ghostty') == 1
assert func('\u0600', term_program='') == 1


@pytest.mark.parametrize('func,term_program,expected', [
(wcwidth.wcstwidth, 'kitty', 0),
(wcwidth.width, 'kitty', 0),
(wcwidth.wcstwidth, 'konsole', 0),
(wcwidth.width, 'konsole', 0),
(wcwidth.wcstwidth, 'wezterm', 0),
(wcwidth.width, 'wezterm', 0),
])
def test_narrow_zeroer_width(func, term_program, expected):
"""Width() matches wcstwidth() for narrow_zeroer overrides."""
assert func('\u0600', term_program=term_program) == expected


@pytest.mark.parametrize('value,expected', [
(' KITTY ', 'kitty'),
(' ', None),
Expand Down
2 changes: 1 addition & 1 deletion ucs-detect
Submodule ucs-detect updated 958 files
2 changes: 1 addition & 1 deletion wcwidth/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,4 @@
# Using 'hatchling', it does not seem to provide the pyproject.toml nicety, "dynamic = ['version']"
# like flit_core, maybe there is some better way but for now we have to duplicate it in both places
# Prefer the installed distribution version when available (helps test environments)
__version__ = '0.8.0' # don't forget to also update pyproject.toml:version
__version__ = '0.8.2' # don't forget to also update pyproject.toml:version
21 changes: 17 additions & 4 deletions wcwidth/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
SRI_OVERRIDES,
VS15_OVERRIDES,
VS16_OVERRIDES,
WIDE_OVERRIDES)
WIDE_OVERRIDES,
NARROW_OVERRIDES)
from .unicode_versions import list_versions
from .table_term_programs import ALIASES, KNOWN_TERMINALS

Expand Down Expand Up @@ -97,9 +98,12 @@ class TerminalOverrides(NamedTuple):
narrower: _RangeTuple
vs16_narrower: _RangeTuple
vs15_wider: _RangeTuple
zeroer: _RangeTuple
narrow_wider: _RangeTuple
narrow_zeroer: _RangeTuple


_EMPTY_OVERRIDES = TerminalOverrides((), (), ())
_EMPTY_OVERRIDES = TerminalOverrides((), (), (), (), (), ())


@lru_cache(maxsize=32)
Expand All @@ -113,14 +117,23 @@ def get_term_overrides(term_canonical: str) -> TerminalOverrides:
)
vs16_narrower = VS16_OVERRIDES.get(term_canonical, {}).get('narrower', ())
vs15_wider = VS15_OVERRIDES.get(term_canonical, {}).get('wider', ())
zeroer = _merge_ranges(
WIDE_OVERRIDES.get(term_canonical, {}).get('zeroer', ()),
SRI_OVERRIDES.get(term_canonical, {}).get('zeroer', ()),
SFZ_OVERRIDES.get(term_canonical, {}).get('zeroer', ()),
)
narrow_wider = NARROW_OVERRIDES.get(term_canonical, {}).get('wider', ())
narrow_zeroer = NARROW_OVERRIDES.get(term_canonical, {}).get('narrow_zeroer', ())
# vs15_narrower intentionally excluded: no known terminal narrows VS15
# vs16_wider intentionally excluded: any 'wider' entries in emoji_vs16_results
# ucs-detect YAML are from the vs16n baseline test (base char without VS16),
# not actual VS16 correction data.

if not (narrower or vs16_narrower or vs15_wider):
if not (narrower or vs16_narrower or vs15_wider or zeroer
or narrow_wider or narrow_zeroer):
return _EMPTY_OVERRIDES
return TerminalOverrides(narrower, vs16_narrower, vs15_wider)
return TerminalOverrides(narrower, vs16_narrower, vs15_wider, zeroer,
narrow_wider, narrow_zeroer)


@lru_cache(maxsize=32)
Expand Down
Loading
Loading