From e496b44f9bb838bf743b01f8f6148a9f650312dc Mon Sep 17 00:00:00 2001 From: Romain Beauxis Date: Thu, 5 Jun 2025 07:58:10 -0500 Subject: [PATCH 01/24] Bump version, tag. --- CHANGES.md | 2 +- dune-project | 2 +- sedlex.opam | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 67f8477..bb1e2b2 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,4 +1,4 @@ -# dev +# 3.6 (2025-01-05) - Fixed one of the ranges implementing Implement Corrigendum #1: UTF-8 Shortest Form for 4-bytes long characters (#171) diff --git a/dune-project b/dune-project index a5de262..4aec9de 100644 --- a/dune-project +++ b/dune-project @@ -1,5 +1,5 @@ (lang dune 3.0) -(version 3.5) +(version 3.6) (name sedlex) (source (github ocaml-community/sedlex)) (license MIT) diff --git a/sedlex.opam b/sedlex.opam index 45dd539..fdde84f 100644 --- a/sedlex.opam +++ b/sedlex.opam @@ -1,6 +1,6 @@ # This file is generated by dune, edit dune-project instead opam-version: "2.0" -version: "3.5" +version: "3.6" synopsis: "An OCaml lexer generator for Unicode" description: """ sedlex is a lexer generator for OCaml. It is similar to ocamllex, but supports From 7fae55f1acb4e509f616f855ac7db68deaad9ec8 Mon Sep 17 00:00:00 2001 From: Romain Beauxis Date: Thu, 11 Sep 2025 15:22:41 -0500 Subject: [PATCH 02/24] Update to Unicode 17.0.0 (#172) --- CHANGES.md | 3 + dune-project | 2 +- examples/regressions.ml | 2 +- examples/unicode_old.ml | 1910 ++++++++++++++++++----------------- sedlex.opam | 2 +- src/generator/data/base_url | 2 +- src/syntax/unicode.ml | 1567 ++++++++++++++-------------- 7 files changed, 1778 insertions(+), 1710 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index bb1e2b2..ff42aec 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,6 @@ +# 3.7 (unreleased) +- Update to unicode 17.0.0 + # 3.6 (2025-01-05) - Fixed one of the ranges implementing Implement Corrigendum #1: UTF-8 Shortest Form diff --git a/dune-project b/dune-project index 4aec9de..c678939 100644 --- a/dune-project +++ b/dune-project @@ -1,5 +1,5 @@ (lang dune 3.0) -(version 3.6) +(version 3.7) (name sedlex) (source (github ocaml-community/sedlex)) (license MIT) diff --git a/examples/regressions.ml b/examples/regressions.ml index 387818d..03ab9d6 100644 --- a/examples/regressions.ml +++ b/examples/regressions.ml @@ -3,7 +3,7 @@ module CSet = Sedlex_ppx.Sedlex_cset module Unicode = Sedlex_ppx.Unicode -let test_versions = ("15.0.0", "16.0.0") +let test_versions = ("16.0.0", "17.0.0") let regressions = [ (* Example *) diff --git a/examples/unicode_old.ml b/examples/unicode_old.ml index a269af9..7baa14e 100644 --- a/examples/unicode_old.ml +++ b/examples/unicode_old.ml @@ -4,7 +4,8 @@ (* Edit gen_unicode.ml.inc instead. *) -let version = "15.0.0" + +let version = "16.0.0" module Categories = struct @@ -23,7 +24,7 @@ module Categories = struct 0x530, 0x530; 0x557, 0x558; 0x58b, 0x58c; 0x590, 0x590; 0x5c8, 0x5cf; 0x5eb, 0x5ee; 0x5f5, 0x5ff; 0x70e, 0x70e; 0x74b, 0x74c; 0x7b2, 0x7bf; 0x7fb, 0x7fc; 0x82e, 0x82f; 0x83f, 0x83f; 0x85c, 0x85d; 0x85f, 0x85f; - 0x86b, 0x86f; 0x88f, 0x88f; 0x892, 0x897; 0x984, 0x984; 0x98d, 0x98e; + 0x86b, 0x86f; 0x88f, 0x88f; 0x892, 0x896; 0x984, 0x984; 0x98d, 0x98e; 0x991, 0x992; 0x9a9, 0x9a9; 0x9b1, 0x9b1; 0x9b3, 0x9b5; 0x9ba, 0x9bb; 0x9c5, 0x9c6; 0x9c9, 0x9ca; 0x9cf, 0x9d6; 0x9d8, 0x9db; 0x9de, 0x9de; 0x9e4, 0x9e5; 0x9ff, 0xa00; 0xa04, 0xa04; 0xa0b, 0xa0e; 0xa11, 0xa12; @@ -61,106 +62,111 @@ module Categories = struct 0x1879, 0x187f; 0x18ab, 0x18af; 0x18f6, 0x18ff; 0x191f, 0x191f; 0x192c, 0x192f; 0x193c, 0x193f; 0x1941, 0x1943; 0x196e, 0x196f; 0x1975, 0x197f; 0x19ac, 0x19af; 0x19ca, 0x19cf; 0x19db, 0x19dd; 0x1a1c, 0x1a1d; 0x1a5f, 0x1a5f; 0x1a7d, 0x1a7e; - 0x1a8a, 0x1a8f; 0x1a9a, 0x1a9f; 0x1aae, 0x1aaf; 0x1acf, 0x1aff; 0x1b4d, 0x1b4f; - 0x1b7f, 0x1b7f; 0x1bf4, 0x1bfb; 0x1c38, 0x1c3a; 0x1c4a, 0x1c4c; 0x1c89, 0x1c8f; - 0x1cbb, 0x1cbc; 0x1cc8, 0x1ccf; 0x1cfb, 0x1cff; 0x1f16, 0x1f17; 0x1f1e, 0x1f1f; - 0x1f46, 0x1f47; 0x1f4e, 0x1f4f; 0x1f58, 0x1f58; 0x1f5a, 0x1f5a; 0x1f5c, 0x1f5c; - 0x1f5e, 0x1f5e; 0x1f7e, 0x1f7f; 0x1fb5, 0x1fb5; 0x1fc5, 0x1fc5; 0x1fd4, 0x1fd5; - 0x1fdc, 0x1fdc; 0x1ff0, 0x1ff1; 0x1ff5, 0x1ff5; 0x1fff, 0x1fff; 0x2065, 0x2065; - 0x2072, 0x2073; 0x208f, 0x208f; 0x209d, 0x209f; 0x20c1, 0x20cf; 0x20f1, 0x20ff; - 0x218c, 0x218f; 0x2427, 0x243f; 0x244b, 0x245f; 0x2b74, 0x2b75; 0x2b96, 0x2b96; - 0x2cf4, 0x2cf8; 0x2d26, 0x2d26; 0x2d28, 0x2d2c; 0x2d2e, 0x2d2f; 0x2d68, 0x2d6e; - 0x2d71, 0x2d7e; 0x2d97, 0x2d9f; 0x2da7, 0x2da7; 0x2daf, 0x2daf; 0x2db7, 0x2db7; - 0x2dbf, 0x2dbf; 0x2dc7, 0x2dc7; 0x2dcf, 0x2dcf; 0x2dd7, 0x2dd7; 0x2ddf, 0x2ddf; - 0x2e5e, 0x2e7f; 0x2e9a, 0x2e9a; 0x2ef4, 0x2eff; 0x2fd6, 0x2fef; 0x2ffc, 0x2fff; - 0x3040, 0x3040; 0x3097, 0x3098; 0x3100, 0x3104; 0x3130, 0x3130; 0x318f, 0x318f; - 0x31e4, 0x31ef; 0x321f, 0x321f; 0xa48d, 0xa48f; 0xa4c7, 0xa4cf; 0xa62c, 0xa63f; - 0xa6f8, 0xa6ff; 0xa7cb, 0xa7cf; 0xa7d2, 0xa7d2; 0xa7d4, 0xa7d4; 0xa7da, 0xa7f1; - 0xa82d, 0xa82f; 0xa83a, 0xa83f; 0xa878, 0xa87f; 0xa8c6, 0xa8cd; 0xa8da, 0xa8df; - 0xa954, 0xa95e; 0xa97d, 0xa97f; 0xa9ce, 0xa9ce; 0xa9da, 0xa9dd; 0xa9ff, 0xa9ff; - 0xaa37, 0xaa3f; 0xaa4e, 0xaa4f; 0xaa5a, 0xaa5b; 0xaac3, 0xaada; 0xaaf7, 0xab00; - 0xab07, 0xab08; 0xab0f, 0xab10; 0xab17, 0xab1f; 0xab27, 0xab27; 0xab2f, 0xab2f; - 0xab6c, 0xab6f; 0xabee, 0xabef; 0xabfa, 0xabff; 0xd7a4, 0xd7af; 0xd7c7, 0xd7ca; - 0xd7fc, 0xd7ff; 0xfa6e, 0xfa6f; 0xfada, 0xfaff; 0xfb07, 0xfb12; 0xfb18, 0xfb1c; - 0xfb37, 0xfb37; 0xfb3d, 0xfb3d; 0xfb3f, 0xfb3f; 0xfb42, 0xfb42; 0xfb45, 0xfb45; - 0xfbc3, 0xfbd2; 0xfd90, 0xfd91; 0xfdc8, 0xfdce; 0xfdd0, 0xfdef; 0xfe1a, 0xfe1f; - 0xfe53, 0xfe53; 0xfe67, 0xfe67; 0xfe6c, 0xfe6f; 0xfe75, 0xfe75; 0xfefd, 0xfefe; - 0xff00, 0xff00; 0xffbf, 0xffc1; 0xffc8, 0xffc9; 0xffd0, 0xffd1; 0xffd8, 0xffd9; - 0xffdd, 0xffdf; 0xffe7, 0xffe7; 0xffef, 0xfff8; 0xfffe, 0xffff; 0x1000c, 0x1000c; - 0x10027, 0x10027; 0x1003b, 0x1003b; 0x1003e, 0x1003e; 0x1004e, 0x1004f; 0x1005e, 0x1007f; - 0x100fb, 0x100ff; 0x10103, 0x10106; 0x10134, 0x10136; 0x1018f, 0x1018f; 0x1019d, 0x1019f; - 0x101a1, 0x101cf; 0x101fe, 0x1027f; 0x1029d, 0x1029f; 0x102d1, 0x102df; 0x102fc, 0x102ff; - 0x10324, 0x1032c; 0x1034b, 0x1034f; 0x1037b, 0x1037f; 0x1039e, 0x1039e; 0x103c4, 0x103c7; - 0x103d6, 0x103ff; 0x1049e, 0x1049f; 0x104aa, 0x104af; 0x104d4, 0x104d7; 0x104fc, 0x104ff; - 0x10528, 0x1052f; 0x10564, 0x1056e; 0x1057b, 0x1057b; 0x1058b, 0x1058b; 0x10593, 0x10593; - 0x10596, 0x10596; 0x105a2, 0x105a2; 0x105b2, 0x105b2; 0x105ba, 0x105ba; 0x105bd, 0x105ff; - 0x10737, 0x1073f; 0x10756, 0x1075f; 0x10768, 0x1077f; 0x10786, 0x10786; 0x107b1, 0x107b1; - 0x107bb, 0x107ff; 0x10806, 0x10807; 0x10809, 0x10809; 0x10836, 0x10836; 0x10839, 0x1083b; - 0x1083d, 0x1083e; 0x10856, 0x10856; 0x1089f, 0x108a6; 0x108b0, 0x108df; 0x108f3, 0x108f3; - 0x108f6, 0x108fa; 0x1091c, 0x1091e; 0x1093a, 0x1093e; 0x10940, 0x1097f; 0x109b8, 0x109bb; - 0x109d0, 0x109d1; 0x10a04, 0x10a04; 0x10a07, 0x10a0b; 0x10a14, 0x10a14; 0x10a18, 0x10a18; - 0x10a36, 0x10a37; 0x10a3b, 0x10a3e; 0x10a49, 0x10a4f; 0x10a59, 0x10a5f; 0x10aa0, 0x10abf; - 0x10ae7, 0x10aea; 0x10af7, 0x10aff; 0x10b36, 0x10b38; 0x10b56, 0x10b57; 0x10b73, 0x10b77; - 0x10b92, 0x10b98; 0x10b9d, 0x10ba8; 0x10bb0, 0x10bff; 0x10c49, 0x10c7f; 0x10cb3, 0x10cbf; - 0x10cf3, 0x10cf9; 0x10d28, 0x10d2f; 0x10d3a, 0x10e5f; 0x10e7f, 0x10e7f; 0x10eaa, 0x10eaa; - 0x10eae, 0x10eaf; 0x10eb2, 0x10efc; 0x10f28, 0x10f2f; 0x10f5a, 0x10f6f; 0x10f8a, 0x10faf; - 0x10fcc, 0x10fdf; 0x10ff7, 0x10fff; 0x1104e, 0x11051; 0x11076, 0x1107e; 0x110c3, 0x110cc; - 0x110ce, 0x110cf; 0x110e9, 0x110ef; 0x110fa, 0x110ff; 0x11135, 0x11135; 0x11148, 0x1114f; - 0x11177, 0x1117f; 0x111e0, 0x111e0; 0x111f5, 0x111ff; 0x11212, 0x11212; 0x11242, 0x1127f; - 0x11287, 0x11287; 0x11289, 0x11289; 0x1128e, 0x1128e; 0x1129e, 0x1129e; 0x112aa, 0x112af; - 0x112eb, 0x112ef; 0x112fa, 0x112ff; 0x11304, 0x11304; 0x1130d, 0x1130e; 0x11311, 0x11312; - 0x11329, 0x11329; 0x11331, 0x11331; 0x11334, 0x11334; 0x1133a, 0x1133a; 0x11345, 0x11346; - 0x11349, 0x1134a; 0x1134e, 0x1134f; 0x11351, 0x11356; 0x11358, 0x1135c; 0x11364, 0x11365; - 0x1136d, 0x1136f; 0x11375, 0x113ff; 0x1145c, 0x1145c; 0x11462, 0x1147f; 0x114c8, 0x114cf; - 0x114da, 0x1157f; 0x115b6, 0x115b7; 0x115de, 0x115ff; 0x11645, 0x1164f; 0x1165a, 0x1165f; - 0x1166d, 0x1167f; 0x116ba, 0x116bf; 0x116ca, 0x116ff; 0x1171b, 0x1171c; 0x1172c, 0x1172f; + 0x1a8a, 0x1a8f; 0x1a9a, 0x1a9f; 0x1aae, 0x1aaf; 0x1acf, 0x1aff; 0x1b4d, 0x1b4d; + 0x1bf4, 0x1bfb; 0x1c38, 0x1c3a; 0x1c4a, 0x1c4c; 0x1c8b, 0x1c8f; 0x1cbb, 0x1cbc; + 0x1cc8, 0x1ccf; 0x1cfb, 0x1cff; 0x1f16, 0x1f17; 0x1f1e, 0x1f1f; 0x1f46, 0x1f47; + 0x1f4e, 0x1f4f; 0x1f58, 0x1f58; 0x1f5a, 0x1f5a; 0x1f5c, 0x1f5c; 0x1f5e, 0x1f5e; + 0x1f7e, 0x1f7f; 0x1fb5, 0x1fb5; 0x1fc5, 0x1fc5; 0x1fd4, 0x1fd5; 0x1fdc, 0x1fdc; + 0x1ff0, 0x1ff1; 0x1ff5, 0x1ff5; 0x1fff, 0x1fff; 0x2065, 0x2065; 0x2072, 0x2073; + 0x208f, 0x208f; 0x209d, 0x209f; 0x20c1, 0x20cf; 0x20f1, 0x20ff; 0x218c, 0x218f; + 0x242a, 0x243f; 0x244b, 0x245f; 0x2b74, 0x2b75; 0x2b96, 0x2b96; 0x2cf4, 0x2cf8; + 0x2d26, 0x2d26; 0x2d28, 0x2d2c; 0x2d2e, 0x2d2f; 0x2d68, 0x2d6e; 0x2d71, 0x2d7e; + 0x2d97, 0x2d9f; 0x2da7, 0x2da7; 0x2daf, 0x2daf; 0x2db7, 0x2db7; 0x2dbf, 0x2dbf; + 0x2dc7, 0x2dc7; 0x2dcf, 0x2dcf; 0x2dd7, 0x2dd7; 0x2ddf, 0x2ddf; 0x2e5e, 0x2e7f; + 0x2e9a, 0x2e9a; 0x2ef4, 0x2eff; 0x2fd6, 0x2fef; 0x3040, 0x3040; 0x3097, 0x3098; + 0x3100, 0x3104; 0x3130, 0x3130; 0x318f, 0x318f; 0x31e6, 0x31ee; 0x321f, 0x321f; + 0xa48d, 0xa48f; 0xa4c7, 0xa4cf; 0xa62c, 0xa63f; 0xa6f8, 0xa6ff; 0xa7ce, 0xa7cf; + 0xa7d2, 0xa7d2; 0xa7d4, 0xa7d4; 0xa7dd, 0xa7f1; 0xa82d, 0xa82f; 0xa83a, 0xa83f; + 0xa878, 0xa87f; 0xa8c6, 0xa8cd; 0xa8da, 0xa8df; 0xa954, 0xa95e; 0xa97d, 0xa97f; + 0xa9ce, 0xa9ce; 0xa9da, 0xa9dd; 0xa9ff, 0xa9ff; 0xaa37, 0xaa3f; 0xaa4e, 0xaa4f; + 0xaa5a, 0xaa5b; 0xaac3, 0xaada; 0xaaf7, 0xab00; 0xab07, 0xab08; 0xab0f, 0xab10; + 0xab17, 0xab1f; 0xab27, 0xab27; 0xab2f, 0xab2f; 0xab6c, 0xab6f; 0xabee, 0xabef; + 0xabfa, 0xabff; 0xd7a4, 0xd7af; 0xd7c7, 0xd7ca; 0xd7fc, 0xd7ff; 0xfa6e, 0xfa6f; + 0xfada, 0xfaff; 0xfb07, 0xfb12; 0xfb18, 0xfb1c; 0xfb37, 0xfb37; 0xfb3d, 0xfb3d; + 0xfb3f, 0xfb3f; 0xfb42, 0xfb42; 0xfb45, 0xfb45; 0xfbc3, 0xfbd2; 0xfd90, 0xfd91; + 0xfdc8, 0xfdce; 0xfdd0, 0xfdef; 0xfe1a, 0xfe1f; 0xfe53, 0xfe53; 0xfe67, 0xfe67; + 0xfe6c, 0xfe6f; 0xfe75, 0xfe75; 0xfefd, 0xfefe; 0xff00, 0xff00; 0xffbf, 0xffc1; + 0xffc8, 0xffc9; 0xffd0, 0xffd1; 0xffd8, 0xffd9; 0xffdd, 0xffdf; 0xffe7, 0xffe7; + 0xffef, 0xfff8; 0xfffe, 0xffff; 0x1000c, 0x1000c; 0x10027, 0x10027; 0x1003b, 0x1003b; + 0x1003e, 0x1003e; 0x1004e, 0x1004f; 0x1005e, 0x1007f; 0x100fb, 0x100ff; 0x10103, 0x10106; + 0x10134, 0x10136; 0x1018f, 0x1018f; 0x1019d, 0x1019f; 0x101a1, 0x101cf; 0x101fe, 0x1027f; + 0x1029d, 0x1029f; 0x102d1, 0x102df; 0x102fc, 0x102ff; 0x10324, 0x1032c; 0x1034b, 0x1034f; + 0x1037b, 0x1037f; 0x1039e, 0x1039e; 0x103c4, 0x103c7; 0x103d6, 0x103ff; 0x1049e, 0x1049f; + 0x104aa, 0x104af; 0x104d4, 0x104d7; 0x104fc, 0x104ff; 0x10528, 0x1052f; 0x10564, 0x1056e; + 0x1057b, 0x1057b; 0x1058b, 0x1058b; 0x10593, 0x10593; 0x10596, 0x10596; 0x105a2, 0x105a2; + 0x105b2, 0x105b2; 0x105ba, 0x105ba; 0x105bd, 0x105bf; 0x105f4, 0x105ff; 0x10737, 0x1073f; + 0x10756, 0x1075f; 0x10768, 0x1077f; 0x10786, 0x10786; 0x107b1, 0x107b1; 0x107bb, 0x107ff; + 0x10806, 0x10807; 0x10809, 0x10809; 0x10836, 0x10836; 0x10839, 0x1083b; 0x1083d, 0x1083e; + 0x10856, 0x10856; 0x1089f, 0x108a6; 0x108b0, 0x108df; 0x108f3, 0x108f3; 0x108f6, 0x108fa; + 0x1091c, 0x1091e; 0x1093a, 0x1093e; 0x10940, 0x1097f; 0x109b8, 0x109bb; 0x109d0, 0x109d1; + 0x10a04, 0x10a04; 0x10a07, 0x10a0b; 0x10a14, 0x10a14; 0x10a18, 0x10a18; 0x10a36, 0x10a37; + 0x10a3b, 0x10a3e; 0x10a49, 0x10a4f; 0x10a59, 0x10a5f; 0x10aa0, 0x10abf; 0x10ae7, 0x10aea; + 0x10af7, 0x10aff; 0x10b36, 0x10b38; 0x10b56, 0x10b57; 0x10b73, 0x10b77; 0x10b92, 0x10b98; + 0x10b9d, 0x10ba8; 0x10bb0, 0x10bff; 0x10c49, 0x10c7f; 0x10cb3, 0x10cbf; 0x10cf3, 0x10cf9; + 0x10d28, 0x10d2f; 0x10d3a, 0x10d3f; 0x10d66, 0x10d68; 0x10d86, 0x10d8d; 0x10d90, 0x10e5f; + 0x10e7f, 0x10e7f; 0x10eaa, 0x10eaa; 0x10eae, 0x10eaf; 0x10eb2, 0x10ec1; 0x10ec5, 0x10efb; + 0x10f28, 0x10f2f; 0x10f5a, 0x10f6f; 0x10f8a, 0x10faf; 0x10fcc, 0x10fdf; 0x10ff7, 0x10fff; + 0x1104e, 0x11051; 0x11076, 0x1107e; 0x110c3, 0x110cc; 0x110ce, 0x110cf; 0x110e9, 0x110ef; + 0x110fa, 0x110ff; 0x11135, 0x11135; 0x11148, 0x1114f; 0x11177, 0x1117f; 0x111e0, 0x111e0; + 0x111f5, 0x111ff; 0x11212, 0x11212; 0x11242, 0x1127f; 0x11287, 0x11287; 0x11289, 0x11289; + 0x1128e, 0x1128e; 0x1129e, 0x1129e; 0x112aa, 0x112af; 0x112eb, 0x112ef; 0x112fa, 0x112ff; + 0x11304, 0x11304; 0x1130d, 0x1130e; 0x11311, 0x11312; 0x11329, 0x11329; 0x11331, 0x11331; + 0x11334, 0x11334; 0x1133a, 0x1133a; 0x11345, 0x11346; 0x11349, 0x1134a; 0x1134e, 0x1134f; + 0x11351, 0x11356; 0x11358, 0x1135c; 0x11364, 0x11365; 0x1136d, 0x1136f; 0x11375, 0x1137f; + 0x1138a, 0x1138a; 0x1138c, 0x1138d; 0x1138f, 0x1138f; 0x113b6, 0x113b6; 0x113c1, 0x113c1; + 0x113c3, 0x113c4; 0x113c6, 0x113c6; 0x113cb, 0x113cb; 0x113d6, 0x113d6; 0x113d9, 0x113e0; + 0x113e3, 0x113ff; 0x1145c, 0x1145c; 0x11462, 0x1147f; 0x114c8, 0x114cf; 0x114da, 0x1157f; + 0x115b6, 0x115b7; 0x115de, 0x115ff; 0x11645, 0x1164f; 0x1165a, 0x1165f; 0x1166d, 0x1167f; + 0x116ba, 0x116bf; 0x116ca, 0x116cf; 0x116e4, 0x116ff; 0x1171b, 0x1171c; 0x1172c, 0x1172f; 0x11747, 0x117ff; 0x1183c, 0x1189f; 0x118f3, 0x118fe; 0x11907, 0x11908; 0x1190a, 0x1190b; 0x11914, 0x11914; 0x11917, 0x11917; 0x11936, 0x11936; 0x11939, 0x1193a; 0x11947, 0x1194f; 0x1195a, 0x1199f; 0x119a8, 0x119a9; 0x119d8, 0x119d9; 0x119e5, 0x119ff; 0x11a48, 0x11a4f; - 0x11aa3, 0x11aaf; 0x11af9, 0x11aff; 0x11b0a, 0x11bff; 0x11c09, 0x11c09; 0x11c37, 0x11c37; - 0x11c46, 0x11c4f; 0x11c6d, 0x11c6f; 0x11c90, 0x11c91; 0x11ca8, 0x11ca8; 0x11cb7, 0x11cff; - 0x11d07, 0x11d07; 0x11d0a, 0x11d0a; 0x11d37, 0x11d39; 0x11d3b, 0x11d3b; 0x11d3e, 0x11d3e; - 0x11d48, 0x11d4f; 0x11d5a, 0x11d5f; 0x11d66, 0x11d66; 0x11d69, 0x11d69; 0x11d8f, 0x11d8f; - 0x11d92, 0x11d92; 0x11d99, 0x11d9f; 0x11daa, 0x11edf; 0x11ef9, 0x11eff; 0x11f11, 0x11f11; - 0x11f3b, 0x11f3d; 0x11f5a, 0x11faf; 0x11fb1, 0x11fbf; 0x11ff2, 0x11ffe; 0x1239a, 0x123ff; - 0x1246f, 0x1246f; 0x12475, 0x1247f; 0x12544, 0x12f8f; 0x12ff3, 0x12fff; 0x13456, 0x143ff; - 0x14647, 0x167ff; 0x16a39, 0x16a3f; 0x16a5f, 0x16a5f; 0x16a6a, 0x16a6d; 0x16abf, 0x16abf; - 0x16aca, 0x16acf; 0x16aee, 0x16aef; 0x16af6, 0x16aff; 0x16b46, 0x16b4f; 0x16b5a, 0x16b5a; - 0x16b62, 0x16b62; 0x16b78, 0x16b7c; 0x16b90, 0x16e3f; 0x16e9b, 0x16eff; 0x16f4b, 0x16f4e; + 0x11aa3, 0x11aaf; 0x11af9, 0x11aff; 0x11b0a, 0x11bbf; 0x11be2, 0x11bef; 0x11bfa, 0x11bff; + 0x11c09, 0x11c09; 0x11c37, 0x11c37; 0x11c46, 0x11c4f; 0x11c6d, 0x11c6f; 0x11c90, 0x11c91; + 0x11ca8, 0x11ca8; 0x11cb7, 0x11cff; 0x11d07, 0x11d07; 0x11d0a, 0x11d0a; 0x11d37, 0x11d39; + 0x11d3b, 0x11d3b; 0x11d3e, 0x11d3e; 0x11d48, 0x11d4f; 0x11d5a, 0x11d5f; 0x11d66, 0x11d66; + 0x11d69, 0x11d69; 0x11d8f, 0x11d8f; 0x11d92, 0x11d92; 0x11d99, 0x11d9f; 0x11daa, 0x11edf; + 0x11ef9, 0x11eff; 0x11f11, 0x11f11; 0x11f3b, 0x11f3d; 0x11f5b, 0x11faf; 0x11fb1, 0x11fbf; + 0x11ff2, 0x11ffe; 0x1239a, 0x123ff; 0x1246f, 0x1246f; 0x12475, 0x1247f; 0x12544, 0x12f8f; + 0x12ff3, 0x12fff; 0x13456, 0x1345f; 0x143fb, 0x143ff; 0x14647, 0x160ff; 0x1613a, 0x167ff; + 0x16a39, 0x16a3f; 0x16a5f, 0x16a5f; 0x16a6a, 0x16a6d; 0x16abf, 0x16abf; 0x16aca, 0x16acf; + 0x16aee, 0x16aef; 0x16af6, 0x16aff; 0x16b46, 0x16b4f; 0x16b5a, 0x16b5a; 0x16b62, 0x16b62; + 0x16b78, 0x16b7c; 0x16b90, 0x16d3f; 0x16d7a, 0x16e3f; 0x16e9b, 0x16eff; 0x16f4b, 0x16f4e; 0x16f88, 0x16f8e; 0x16fa0, 0x16fdf; 0x16fe5, 0x16fef; 0x16ff2, 0x16fff; 0x187f8, 0x187ff; - 0x18cd6, 0x18cff; 0x18d09, 0x1afef; 0x1aff4, 0x1aff4; 0x1affc, 0x1affc; 0x1afff, 0x1afff; + 0x18cd6, 0x18cfe; 0x18d09, 0x1afef; 0x1aff4, 0x1aff4; 0x1affc, 0x1affc; 0x1afff, 0x1afff; 0x1b123, 0x1b131; 0x1b133, 0x1b14f; 0x1b153, 0x1b154; 0x1b156, 0x1b163; 0x1b168, 0x1b16f; 0x1b2fc, 0x1bbff; 0x1bc6b, 0x1bc6f; 0x1bc7d, 0x1bc7f; 0x1bc89, 0x1bc8f; 0x1bc9a, 0x1bc9b; - 0x1bca4, 0x1ceff; 0x1cf2e, 0x1cf2f; 0x1cf47, 0x1cf4f; 0x1cfc4, 0x1cfff; 0x1d0f6, 0x1d0ff; - 0x1d127, 0x1d128; 0x1d1eb, 0x1d1ff; 0x1d246, 0x1d2bf; 0x1d2d4, 0x1d2df; 0x1d2f4, 0x1d2ff; - 0x1d357, 0x1d35f; 0x1d379, 0x1d3ff; 0x1d455, 0x1d455; 0x1d49d, 0x1d49d; 0x1d4a0, 0x1d4a1; - 0x1d4a3, 0x1d4a4; 0x1d4a7, 0x1d4a8; 0x1d4ad, 0x1d4ad; 0x1d4ba, 0x1d4ba; 0x1d4bc, 0x1d4bc; - 0x1d4c4, 0x1d4c4; 0x1d506, 0x1d506; 0x1d50b, 0x1d50c; 0x1d515, 0x1d515; 0x1d51d, 0x1d51d; - 0x1d53a, 0x1d53a; 0x1d53f, 0x1d53f; 0x1d545, 0x1d545; 0x1d547, 0x1d549; 0x1d551, 0x1d551; - 0x1d6a6, 0x1d6a7; 0x1d7cc, 0x1d7cd; 0x1da8c, 0x1da9a; 0x1daa0, 0x1daa0; 0x1dab0, 0x1deff; - 0x1df1f, 0x1df24; 0x1df2b, 0x1dfff; 0x1e007, 0x1e007; 0x1e019, 0x1e01a; 0x1e022, 0x1e022; - 0x1e025, 0x1e025; 0x1e02b, 0x1e02f; 0x1e06e, 0x1e08e; 0x1e090, 0x1e0ff; 0x1e12d, 0x1e12f; - 0x1e13e, 0x1e13f; 0x1e14a, 0x1e14d; 0x1e150, 0x1e28f; 0x1e2af, 0x1e2bf; 0x1e2fa, 0x1e2fe; - 0x1e300, 0x1e4cf; 0x1e4fa, 0x1e7df; 0x1e7e7, 0x1e7e7; 0x1e7ec, 0x1e7ec; 0x1e7ef, 0x1e7ef; - 0x1e7ff, 0x1e7ff; 0x1e8c5, 0x1e8c6; 0x1e8d7, 0x1e8ff; 0x1e94c, 0x1e94f; 0x1e95a, 0x1e95d; - 0x1e960, 0x1ec70; 0x1ecb5, 0x1ed00; 0x1ed3e, 0x1edff; 0x1ee04, 0x1ee04; 0x1ee20, 0x1ee20; - 0x1ee23, 0x1ee23; 0x1ee25, 0x1ee26; 0x1ee28, 0x1ee28; 0x1ee33, 0x1ee33; 0x1ee38, 0x1ee38; - 0x1ee3a, 0x1ee3a; 0x1ee3c, 0x1ee41; 0x1ee43, 0x1ee46; 0x1ee48, 0x1ee48; 0x1ee4a, 0x1ee4a; - 0x1ee4c, 0x1ee4c; 0x1ee50, 0x1ee50; 0x1ee53, 0x1ee53; 0x1ee55, 0x1ee56; 0x1ee58, 0x1ee58; - 0x1ee5a, 0x1ee5a; 0x1ee5c, 0x1ee5c; 0x1ee5e, 0x1ee5e; 0x1ee60, 0x1ee60; 0x1ee63, 0x1ee63; - 0x1ee65, 0x1ee66; 0x1ee6b, 0x1ee6b; 0x1ee73, 0x1ee73; 0x1ee78, 0x1ee78; 0x1ee7d, 0x1ee7d; - 0x1ee7f, 0x1ee7f; 0x1ee8a, 0x1ee8a; 0x1ee9c, 0x1eea0; 0x1eea4, 0x1eea4; 0x1eeaa, 0x1eeaa; - 0x1eebc, 0x1eeef; 0x1eef2, 0x1efff; 0x1f02c, 0x1f02f; 0x1f094, 0x1f09f; 0x1f0af, 0x1f0b0; - 0x1f0c0, 0x1f0c0; 0x1f0d0, 0x1f0d0; 0x1f0f6, 0x1f0ff; 0x1f1ae, 0x1f1e5; 0x1f203, 0x1f20f; - 0x1f23c, 0x1f23f; 0x1f249, 0x1f24f; 0x1f252, 0x1f25f; 0x1f266, 0x1f2ff; 0x1f6d8, 0x1f6db; - 0x1f6ed, 0x1f6ef; 0x1f6fd, 0x1f6ff; 0x1f777, 0x1f77a; 0x1f7da, 0x1f7df; 0x1f7ec, 0x1f7ef; - 0x1f7f1, 0x1f7ff; 0x1f80c, 0x1f80f; 0x1f848, 0x1f84f; 0x1f85a, 0x1f85f; 0x1f888, 0x1f88f; - 0x1f8ae, 0x1f8af; 0x1f8b2, 0x1f8ff; 0x1fa54, 0x1fa5f; 0x1fa6e, 0x1fa6f; 0x1fa7d, 0x1fa7f; - 0x1fa89, 0x1fa8f; 0x1fabe, 0x1fabe; 0x1fac6, 0x1facd; 0x1fadc, 0x1fadf; 0x1fae9, 0x1faef; - 0x1faf9, 0x1faff; 0x1fb93, 0x1fb93; 0x1fbcb, 0x1fbef; 0x1fbfa, 0x1ffff; 0x2a6e0, 0x2a6ff; - 0x2b73a, 0x2b73f; 0x2b81e, 0x2b81f; 0x2cea2, 0x2ceaf; 0x2ebe1, 0x2f7ff; 0x2fa1e, 0x2ffff; - 0x3134b, 0x3134f; 0x323b0, 0xe0000; 0xe0002, 0xe001f; 0xe0080, 0xe00ff; 0xe01f0, 0xeffff; - 0xffffe, 0xfffff; 0x10fffe, 0x10ffff] + 0x1bca4, 0x1cbff; 0x1ccfa, 0x1ccff; 0x1ceb4, 0x1ceff; 0x1cf2e, 0x1cf2f; 0x1cf47, 0x1cf4f; + 0x1cfc4, 0x1cfff; 0x1d0f6, 0x1d0ff; 0x1d127, 0x1d128; 0x1d1eb, 0x1d1ff; 0x1d246, 0x1d2bf; + 0x1d2d4, 0x1d2df; 0x1d2f4, 0x1d2ff; 0x1d357, 0x1d35f; 0x1d379, 0x1d3ff; 0x1d455, 0x1d455; + 0x1d49d, 0x1d49d; 0x1d4a0, 0x1d4a1; 0x1d4a3, 0x1d4a4; 0x1d4a7, 0x1d4a8; 0x1d4ad, 0x1d4ad; + 0x1d4ba, 0x1d4ba; 0x1d4bc, 0x1d4bc; 0x1d4c4, 0x1d4c4; 0x1d506, 0x1d506; 0x1d50b, 0x1d50c; + 0x1d515, 0x1d515; 0x1d51d, 0x1d51d; 0x1d53a, 0x1d53a; 0x1d53f, 0x1d53f; 0x1d545, 0x1d545; + 0x1d547, 0x1d549; 0x1d551, 0x1d551; 0x1d6a6, 0x1d6a7; 0x1d7cc, 0x1d7cd; 0x1da8c, 0x1da9a; + 0x1daa0, 0x1daa0; 0x1dab0, 0x1deff; 0x1df1f, 0x1df24; 0x1df2b, 0x1dfff; 0x1e007, 0x1e007; + 0x1e019, 0x1e01a; 0x1e022, 0x1e022; 0x1e025, 0x1e025; 0x1e02b, 0x1e02f; 0x1e06e, 0x1e08e; + 0x1e090, 0x1e0ff; 0x1e12d, 0x1e12f; 0x1e13e, 0x1e13f; 0x1e14a, 0x1e14d; 0x1e150, 0x1e28f; + 0x1e2af, 0x1e2bf; 0x1e2fa, 0x1e2fe; 0x1e300, 0x1e4cf; 0x1e4fa, 0x1e5cf; 0x1e5fb, 0x1e5fe; + 0x1e600, 0x1e7df; 0x1e7e7, 0x1e7e7; 0x1e7ec, 0x1e7ec; 0x1e7ef, 0x1e7ef; 0x1e7ff, 0x1e7ff; + 0x1e8c5, 0x1e8c6; 0x1e8d7, 0x1e8ff; 0x1e94c, 0x1e94f; 0x1e95a, 0x1e95d; 0x1e960, 0x1ec70; + 0x1ecb5, 0x1ed00; 0x1ed3e, 0x1edff; 0x1ee04, 0x1ee04; 0x1ee20, 0x1ee20; 0x1ee23, 0x1ee23; + 0x1ee25, 0x1ee26; 0x1ee28, 0x1ee28; 0x1ee33, 0x1ee33; 0x1ee38, 0x1ee38; 0x1ee3a, 0x1ee3a; + 0x1ee3c, 0x1ee41; 0x1ee43, 0x1ee46; 0x1ee48, 0x1ee48; 0x1ee4a, 0x1ee4a; 0x1ee4c, 0x1ee4c; + 0x1ee50, 0x1ee50; 0x1ee53, 0x1ee53; 0x1ee55, 0x1ee56; 0x1ee58, 0x1ee58; 0x1ee5a, 0x1ee5a; + 0x1ee5c, 0x1ee5c; 0x1ee5e, 0x1ee5e; 0x1ee60, 0x1ee60; 0x1ee63, 0x1ee63; 0x1ee65, 0x1ee66; + 0x1ee6b, 0x1ee6b; 0x1ee73, 0x1ee73; 0x1ee78, 0x1ee78; 0x1ee7d, 0x1ee7d; 0x1ee7f, 0x1ee7f; + 0x1ee8a, 0x1ee8a; 0x1ee9c, 0x1eea0; 0x1eea4, 0x1eea4; 0x1eeaa, 0x1eeaa; 0x1eebc, 0x1eeef; + 0x1eef2, 0x1efff; 0x1f02c, 0x1f02f; 0x1f094, 0x1f09f; 0x1f0af, 0x1f0b0; 0x1f0c0, 0x1f0c0; + 0x1f0d0, 0x1f0d0; 0x1f0f6, 0x1f0ff; 0x1f1ae, 0x1f1e5; 0x1f203, 0x1f20f; 0x1f23c, 0x1f23f; + 0x1f249, 0x1f24f; 0x1f252, 0x1f25f; 0x1f266, 0x1f2ff; 0x1f6d8, 0x1f6db; 0x1f6ed, 0x1f6ef; + 0x1f6fd, 0x1f6ff; 0x1f777, 0x1f77a; 0x1f7da, 0x1f7df; 0x1f7ec, 0x1f7ef; 0x1f7f1, 0x1f7ff; + 0x1f80c, 0x1f80f; 0x1f848, 0x1f84f; 0x1f85a, 0x1f85f; 0x1f888, 0x1f88f; 0x1f8ae, 0x1f8af; + 0x1f8bc, 0x1f8bf; 0x1f8c2, 0x1f8ff; 0x1fa54, 0x1fa5f; 0x1fa6e, 0x1fa6f; 0x1fa7d, 0x1fa7f; + 0x1fa8a, 0x1fa8e; 0x1fac7, 0x1facd; 0x1fadd, 0x1fade; 0x1faea, 0x1faef; 0x1faf9, 0x1faff; + 0x1fb93, 0x1fb93; 0x1fbfa, 0x1ffff; 0x2a6e0, 0x2a6ff; 0x2b73a, 0x2b73f; 0x2b81e, 0x2b81f; + 0x2cea2, 0x2ceaf; 0x2ebe1, 0x2ebef; 0x2ee5e, 0x2f7ff; 0x2fa1e, 0x2ffff; 0x3134b, 0x3134f; + 0x323b0, 0xe0000; 0xe0002, 0xe001f; 0xe0080, 0xe00ff; 0xe01f0, 0xeffff; 0xffffe, 0xfffff; + 0x10fffe, 0x10ffff] let co = Sedlex_utils.Cset.of_list [0xe000, 0xf8ff; 0xf0000, 0xffffd; 0x100000, 0x10fffd] @@ -223,84 +229,85 @@ module Categories = struct 0x515, 0x515; 0x517, 0x517; 0x519, 0x519; 0x51b, 0x51b; 0x51d, 0x51d; 0x51f, 0x51f; 0x521, 0x521; 0x523, 0x523; 0x525, 0x525; 0x527, 0x527; 0x529, 0x529; 0x52b, 0x52b; 0x52d, 0x52d; 0x52f, 0x52f; 0x560, 0x588; - 0x10d0, 0x10fa; 0x10fd, 0x10ff; 0x13f8, 0x13fd; 0x1c80, 0x1c88; 0x1d00, 0x1d2b; - 0x1d6b, 0x1d77; 0x1d79, 0x1d9a; 0x1e01, 0x1e01; 0x1e03, 0x1e03; 0x1e05, 0x1e05; - 0x1e07, 0x1e07; 0x1e09, 0x1e09; 0x1e0b, 0x1e0b; 0x1e0d, 0x1e0d; 0x1e0f, 0x1e0f; - 0x1e11, 0x1e11; 0x1e13, 0x1e13; 0x1e15, 0x1e15; 0x1e17, 0x1e17; 0x1e19, 0x1e19; - 0x1e1b, 0x1e1b; 0x1e1d, 0x1e1d; 0x1e1f, 0x1e1f; 0x1e21, 0x1e21; 0x1e23, 0x1e23; - 0x1e25, 0x1e25; 0x1e27, 0x1e27; 0x1e29, 0x1e29; 0x1e2b, 0x1e2b; 0x1e2d, 0x1e2d; - 0x1e2f, 0x1e2f; 0x1e31, 0x1e31; 0x1e33, 0x1e33; 0x1e35, 0x1e35; 0x1e37, 0x1e37; - 0x1e39, 0x1e39; 0x1e3b, 0x1e3b; 0x1e3d, 0x1e3d; 0x1e3f, 0x1e3f; 0x1e41, 0x1e41; - 0x1e43, 0x1e43; 0x1e45, 0x1e45; 0x1e47, 0x1e47; 0x1e49, 0x1e49; 0x1e4b, 0x1e4b; - 0x1e4d, 0x1e4d; 0x1e4f, 0x1e4f; 0x1e51, 0x1e51; 0x1e53, 0x1e53; 0x1e55, 0x1e55; - 0x1e57, 0x1e57; 0x1e59, 0x1e59; 0x1e5b, 0x1e5b; 0x1e5d, 0x1e5d; 0x1e5f, 0x1e5f; - 0x1e61, 0x1e61; 0x1e63, 0x1e63; 0x1e65, 0x1e65; 0x1e67, 0x1e67; 0x1e69, 0x1e69; - 0x1e6b, 0x1e6b; 0x1e6d, 0x1e6d; 0x1e6f, 0x1e6f; 0x1e71, 0x1e71; 0x1e73, 0x1e73; - 0x1e75, 0x1e75; 0x1e77, 0x1e77; 0x1e79, 0x1e79; 0x1e7b, 0x1e7b; 0x1e7d, 0x1e7d; - 0x1e7f, 0x1e7f; 0x1e81, 0x1e81; 0x1e83, 0x1e83; 0x1e85, 0x1e85; 0x1e87, 0x1e87; - 0x1e89, 0x1e89; 0x1e8b, 0x1e8b; 0x1e8d, 0x1e8d; 0x1e8f, 0x1e8f; 0x1e91, 0x1e91; - 0x1e93, 0x1e93; 0x1e95, 0x1e9d; 0x1e9f, 0x1e9f; 0x1ea1, 0x1ea1; 0x1ea3, 0x1ea3; - 0x1ea5, 0x1ea5; 0x1ea7, 0x1ea7; 0x1ea9, 0x1ea9; 0x1eab, 0x1eab; 0x1ead, 0x1ead; - 0x1eaf, 0x1eaf; 0x1eb1, 0x1eb1; 0x1eb3, 0x1eb3; 0x1eb5, 0x1eb5; 0x1eb7, 0x1eb7; - 0x1eb9, 0x1eb9; 0x1ebb, 0x1ebb; 0x1ebd, 0x1ebd; 0x1ebf, 0x1ebf; 0x1ec1, 0x1ec1; - 0x1ec3, 0x1ec3; 0x1ec5, 0x1ec5; 0x1ec7, 0x1ec7; 0x1ec9, 0x1ec9; 0x1ecb, 0x1ecb; - 0x1ecd, 0x1ecd; 0x1ecf, 0x1ecf; 0x1ed1, 0x1ed1; 0x1ed3, 0x1ed3; 0x1ed5, 0x1ed5; - 0x1ed7, 0x1ed7; 0x1ed9, 0x1ed9; 0x1edb, 0x1edb; 0x1edd, 0x1edd; 0x1edf, 0x1edf; - 0x1ee1, 0x1ee1; 0x1ee3, 0x1ee3; 0x1ee5, 0x1ee5; 0x1ee7, 0x1ee7; 0x1ee9, 0x1ee9; - 0x1eeb, 0x1eeb; 0x1eed, 0x1eed; 0x1eef, 0x1eef; 0x1ef1, 0x1ef1; 0x1ef3, 0x1ef3; - 0x1ef5, 0x1ef5; 0x1ef7, 0x1ef7; 0x1ef9, 0x1ef9; 0x1efb, 0x1efb; 0x1efd, 0x1efd; - 0x1eff, 0x1f07; 0x1f10, 0x1f15; 0x1f20, 0x1f27; 0x1f30, 0x1f37; 0x1f40, 0x1f45; - 0x1f50, 0x1f57; 0x1f60, 0x1f67; 0x1f70, 0x1f7d; 0x1f80, 0x1f87; 0x1f90, 0x1f97; - 0x1fa0, 0x1fa7; 0x1fb0, 0x1fb4; 0x1fb6, 0x1fb7; 0x1fbe, 0x1fbe; 0x1fc2, 0x1fc4; - 0x1fc6, 0x1fc7; 0x1fd0, 0x1fd3; 0x1fd6, 0x1fd7; 0x1fe0, 0x1fe7; 0x1ff2, 0x1ff4; - 0x1ff6, 0x1ff7; 0x210a, 0x210a; 0x210e, 0x210f; 0x2113, 0x2113; 0x212f, 0x212f; - 0x2134, 0x2134; 0x2139, 0x2139; 0x213c, 0x213d; 0x2146, 0x2149; 0x214e, 0x214e; - 0x2184, 0x2184; 0x2c30, 0x2c5f; 0x2c61, 0x2c61; 0x2c65, 0x2c66; 0x2c68, 0x2c68; - 0x2c6a, 0x2c6a; 0x2c6c, 0x2c6c; 0x2c71, 0x2c71; 0x2c73, 0x2c74; 0x2c76, 0x2c7b; - 0x2c81, 0x2c81; 0x2c83, 0x2c83; 0x2c85, 0x2c85; 0x2c87, 0x2c87; 0x2c89, 0x2c89; - 0x2c8b, 0x2c8b; 0x2c8d, 0x2c8d; 0x2c8f, 0x2c8f; 0x2c91, 0x2c91; 0x2c93, 0x2c93; - 0x2c95, 0x2c95; 0x2c97, 0x2c97; 0x2c99, 0x2c99; 0x2c9b, 0x2c9b; 0x2c9d, 0x2c9d; - 0x2c9f, 0x2c9f; 0x2ca1, 0x2ca1; 0x2ca3, 0x2ca3; 0x2ca5, 0x2ca5; 0x2ca7, 0x2ca7; - 0x2ca9, 0x2ca9; 0x2cab, 0x2cab; 0x2cad, 0x2cad; 0x2caf, 0x2caf; 0x2cb1, 0x2cb1; - 0x2cb3, 0x2cb3; 0x2cb5, 0x2cb5; 0x2cb7, 0x2cb7; 0x2cb9, 0x2cb9; 0x2cbb, 0x2cbb; - 0x2cbd, 0x2cbd; 0x2cbf, 0x2cbf; 0x2cc1, 0x2cc1; 0x2cc3, 0x2cc3; 0x2cc5, 0x2cc5; - 0x2cc7, 0x2cc7; 0x2cc9, 0x2cc9; 0x2ccb, 0x2ccb; 0x2ccd, 0x2ccd; 0x2ccf, 0x2ccf; - 0x2cd1, 0x2cd1; 0x2cd3, 0x2cd3; 0x2cd5, 0x2cd5; 0x2cd7, 0x2cd7; 0x2cd9, 0x2cd9; - 0x2cdb, 0x2cdb; 0x2cdd, 0x2cdd; 0x2cdf, 0x2cdf; 0x2ce1, 0x2ce1; 0x2ce3, 0x2ce4; - 0x2cec, 0x2cec; 0x2cee, 0x2cee; 0x2cf3, 0x2cf3; 0x2d00, 0x2d25; 0x2d27, 0x2d27; - 0x2d2d, 0x2d2d; 0xa641, 0xa641; 0xa643, 0xa643; 0xa645, 0xa645; 0xa647, 0xa647; - 0xa649, 0xa649; 0xa64b, 0xa64b; 0xa64d, 0xa64d; 0xa64f, 0xa64f; 0xa651, 0xa651; - 0xa653, 0xa653; 0xa655, 0xa655; 0xa657, 0xa657; 0xa659, 0xa659; 0xa65b, 0xa65b; - 0xa65d, 0xa65d; 0xa65f, 0xa65f; 0xa661, 0xa661; 0xa663, 0xa663; 0xa665, 0xa665; - 0xa667, 0xa667; 0xa669, 0xa669; 0xa66b, 0xa66b; 0xa66d, 0xa66d; 0xa681, 0xa681; - 0xa683, 0xa683; 0xa685, 0xa685; 0xa687, 0xa687; 0xa689, 0xa689; 0xa68b, 0xa68b; - 0xa68d, 0xa68d; 0xa68f, 0xa68f; 0xa691, 0xa691; 0xa693, 0xa693; 0xa695, 0xa695; - 0xa697, 0xa697; 0xa699, 0xa699; 0xa69b, 0xa69b; 0xa723, 0xa723; 0xa725, 0xa725; - 0xa727, 0xa727; 0xa729, 0xa729; 0xa72b, 0xa72b; 0xa72d, 0xa72d; 0xa72f, 0xa731; - 0xa733, 0xa733; 0xa735, 0xa735; 0xa737, 0xa737; 0xa739, 0xa739; 0xa73b, 0xa73b; - 0xa73d, 0xa73d; 0xa73f, 0xa73f; 0xa741, 0xa741; 0xa743, 0xa743; 0xa745, 0xa745; - 0xa747, 0xa747; 0xa749, 0xa749; 0xa74b, 0xa74b; 0xa74d, 0xa74d; 0xa74f, 0xa74f; - 0xa751, 0xa751; 0xa753, 0xa753; 0xa755, 0xa755; 0xa757, 0xa757; 0xa759, 0xa759; - 0xa75b, 0xa75b; 0xa75d, 0xa75d; 0xa75f, 0xa75f; 0xa761, 0xa761; 0xa763, 0xa763; - 0xa765, 0xa765; 0xa767, 0xa767; 0xa769, 0xa769; 0xa76b, 0xa76b; 0xa76d, 0xa76d; - 0xa76f, 0xa76f; 0xa771, 0xa778; 0xa77a, 0xa77a; 0xa77c, 0xa77c; 0xa77f, 0xa77f; - 0xa781, 0xa781; 0xa783, 0xa783; 0xa785, 0xa785; 0xa787, 0xa787; 0xa78c, 0xa78c; - 0xa78e, 0xa78e; 0xa791, 0xa791; 0xa793, 0xa795; 0xa797, 0xa797; 0xa799, 0xa799; - 0xa79b, 0xa79b; 0xa79d, 0xa79d; 0xa79f, 0xa79f; 0xa7a1, 0xa7a1; 0xa7a3, 0xa7a3; - 0xa7a5, 0xa7a5; 0xa7a7, 0xa7a7; 0xa7a9, 0xa7a9; 0xa7af, 0xa7af; 0xa7b5, 0xa7b5; - 0xa7b7, 0xa7b7; 0xa7b9, 0xa7b9; 0xa7bb, 0xa7bb; 0xa7bd, 0xa7bd; 0xa7bf, 0xa7bf; - 0xa7c1, 0xa7c1; 0xa7c3, 0xa7c3; 0xa7c8, 0xa7c8; 0xa7ca, 0xa7ca; 0xa7d1, 0xa7d1; - 0xa7d3, 0xa7d3; 0xa7d5, 0xa7d5; 0xa7d7, 0xa7d7; 0xa7d9, 0xa7d9; 0xa7f6, 0xa7f6; - 0xa7fa, 0xa7fa; 0xab30, 0xab5a; 0xab60, 0xab68; 0xab70, 0xabbf; 0xfb00, 0xfb06; - 0xfb13, 0xfb17; 0xff41, 0xff5a; 0x10428, 0x1044f; 0x104d8, 0x104fb; 0x10597, 0x105a1; - 0x105a3, 0x105b1; 0x105b3, 0x105b9; 0x105bb, 0x105bc; 0x10cc0, 0x10cf2; 0x118c0, 0x118df; - 0x16e60, 0x16e7f; 0x1d41a, 0x1d433; 0x1d44e, 0x1d454; 0x1d456, 0x1d467; 0x1d482, 0x1d49b; - 0x1d4b6, 0x1d4b9; 0x1d4bb, 0x1d4bb; 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d4cf; 0x1d4ea, 0x1d503; - 0x1d51e, 0x1d537; 0x1d552, 0x1d56b; 0x1d586, 0x1d59f; 0x1d5ba, 0x1d5d3; 0x1d5ee, 0x1d607; - 0x1d622, 0x1d63b; 0x1d656, 0x1d66f; 0x1d68a, 0x1d6a5; 0x1d6c2, 0x1d6da; 0x1d6dc, 0x1d6e1; - 0x1d6fc, 0x1d714; 0x1d716, 0x1d71b; 0x1d736, 0x1d74e; 0x1d750, 0x1d755; 0x1d770, 0x1d788; - 0x1d78a, 0x1d78f; 0x1d7aa, 0x1d7c2; 0x1d7c4, 0x1d7c9; 0x1d7cb, 0x1d7cb; 0x1df00, 0x1df09; - 0x1df0b, 0x1df1e; 0x1df25, 0x1df2a; 0x1e922, 0x1e943] + 0x10d0, 0x10fa; 0x10fd, 0x10ff; 0x13f8, 0x13fd; 0x1c80, 0x1c88; 0x1c8a, 0x1c8a; + 0x1d00, 0x1d2b; 0x1d6b, 0x1d77; 0x1d79, 0x1d9a; 0x1e01, 0x1e01; 0x1e03, 0x1e03; + 0x1e05, 0x1e05; 0x1e07, 0x1e07; 0x1e09, 0x1e09; 0x1e0b, 0x1e0b; 0x1e0d, 0x1e0d; + 0x1e0f, 0x1e0f; 0x1e11, 0x1e11; 0x1e13, 0x1e13; 0x1e15, 0x1e15; 0x1e17, 0x1e17; + 0x1e19, 0x1e19; 0x1e1b, 0x1e1b; 0x1e1d, 0x1e1d; 0x1e1f, 0x1e1f; 0x1e21, 0x1e21; + 0x1e23, 0x1e23; 0x1e25, 0x1e25; 0x1e27, 0x1e27; 0x1e29, 0x1e29; 0x1e2b, 0x1e2b; + 0x1e2d, 0x1e2d; 0x1e2f, 0x1e2f; 0x1e31, 0x1e31; 0x1e33, 0x1e33; 0x1e35, 0x1e35; + 0x1e37, 0x1e37; 0x1e39, 0x1e39; 0x1e3b, 0x1e3b; 0x1e3d, 0x1e3d; 0x1e3f, 0x1e3f; + 0x1e41, 0x1e41; 0x1e43, 0x1e43; 0x1e45, 0x1e45; 0x1e47, 0x1e47; 0x1e49, 0x1e49; + 0x1e4b, 0x1e4b; 0x1e4d, 0x1e4d; 0x1e4f, 0x1e4f; 0x1e51, 0x1e51; 0x1e53, 0x1e53; + 0x1e55, 0x1e55; 0x1e57, 0x1e57; 0x1e59, 0x1e59; 0x1e5b, 0x1e5b; 0x1e5d, 0x1e5d; + 0x1e5f, 0x1e5f; 0x1e61, 0x1e61; 0x1e63, 0x1e63; 0x1e65, 0x1e65; 0x1e67, 0x1e67; + 0x1e69, 0x1e69; 0x1e6b, 0x1e6b; 0x1e6d, 0x1e6d; 0x1e6f, 0x1e6f; 0x1e71, 0x1e71; + 0x1e73, 0x1e73; 0x1e75, 0x1e75; 0x1e77, 0x1e77; 0x1e79, 0x1e79; 0x1e7b, 0x1e7b; + 0x1e7d, 0x1e7d; 0x1e7f, 0x1e7f; 0x1e81, 0x1e81; 0x1e83, 0x1e83; 0x1e85, 0x1e85; + 0x1e87, 0x1e87; 0x1e89, 0x1e89; 0x1e8b, 0x1e8b; 0x1e8d, 0x1e8d; 0x1e8f, 0x1e8f; + 0x1e91, 0x1e91; 0x1e93, 0x1e93; 0x1e95, 0x1e9d; 0x1e9f, 0x1e9f; 0x1ea1, 0x1ea1; + 0x1ea3, 0x1ea3; 0x1ea5, 0x1ea5; 0x1ea7, 0x1ea7; 0x1ea9, 0x1ea9; 0x1eab, 0x1eab; + 0x1ead, 0x1ead; 0x1eaf, 0x1eaf; 0x1eb1, 0x1eb1; 0x1eb3, 0x1eb3; 0x1eb5, 0x1eb5; + 0x1eb7, 0x1eb7; 0x1eb9, 0x1eb9; 0x1ebb, 0x1ebb; 0x1ebd, 0x1ebd; 0x1ebf, 0x1ebf; + 0x1ec1, 0x1ec1; 0x1ec3, 0x1ec3; 0x1ec5, 0x1ec5; 0x1ec7, 0x1ec7; 0x1ec9, 0x1ec9; + 0x1ecb, 0x1ecb; 0x1ecd, 0x1ecd; 0x1ecf, 0x1ecf; 0x1ed1, 0x1ed1; 0x1ed3, 0x1ed3; + 0x1ed5, 0x1ed5; 0x1ed7, 0x1ed7; 0x1ed9, 0x1ed9; 0x1edb, 0x1edb; 0x1edd, 0x1edd; + 0x1edf, 0x1edf; 0x1ee1, 0x1ee1; 0x1ee3, 0x1ee3; 0x1ee5, 0x1ee5; 0x1ee7, 0x1ee7; + 0x1ee9, 0x1ee9; 0x1eeb, 0x1eeb; 0x1eed, 0x1eed; 0x1eef, 0x1eef; 0x1ef1, 0x1ef1; + 0x1ef3, 0x1ef3; 0x1ef5, 0x1ef5; 0x1ef7, 0x1ef7; 0x1ef9, 0x1ef9; 0x1efb, 0x1efb; + 0x1efd, 0x1efd; 0x1eff, 0x1f07; 0x1f10, 0x1f15; 0x1f20, 0x1f27; 0x1f30, 0x1f37; + 0x1f40, 0x1f45; 0x1f50, 0x1f57; 0x1f60, 0x1f67; 0x1f70, 0x1f7d; 0x1f80, 0x1f87; + 0x1f90, 0x1f97; 0x1fa0, 0x1fa7; 0x1fb0, 0x1fb4; 0x1fb6, 0x1fb7; 0x1fbe, 0x1fbe; + 0x1fc2, 0x1fc4; 0x1fc6, 0x1fc7; 0x1fd0, 0x1fd3; 0x1fd6, 0x1fd7; 0x1fe0, 0x1fe7; + 0x1ff2, 0x1ff4; 0x1ff6, 0x1ff7; 0x210a, 0x210a; 0x210e, 0x210f; 0x2113, 0x2113; + 0x212f, 0x212f; 0x2134, 0x2134; 0x2139, 0x2139; 0x213c, 0x213d; 0x2146, 0x2149; + 0x214e, 0x214e; 0x2184, 0x2184; 0x2c30, 0x2c5f; 0x2c61, 0x2c61; 0x2c65, 0x2c66; + 0x2c68, 0x2c68; 0x2c6a, 0x2c6a; 0x2c6c, 0x2c6c; 0x2c71, 0x2c71; 0x2c73, 0x2c74; + 0x2c76, 0x2c7b; 0x2c81, 0x2c81; 0x2c83, 0x2c83; 0x2c85, 0x2c85; 0x2c87, 0x2c87; + 0x2c89, 0x2c89; 0x2c8b, 0x2c8b; 0x2c8d, 0x2c8d; 0x2c8f, 0x2c8f; 0x2c91, 0x2c91; + 0x2c93, 0x2c93; 0x2c95, 0x2c95; 0x2c97, 0x2c97; 0x2c99, 0x2c99; 0x2c9b, 0x2c9b; + 0x2c9d, 0x2c9d; 0x2c9f, 0x2c9f; 0x2ca1, 0x2ca1; 0x2ca3, 0x2ca3; 0x2ca5, 0x2ca5; + 0x2ca7, 0x2ca7; 0x2ca9, 0x2ca9; 0x2cab, 0x2cab; 0x2cad, 0x2cad; 0x2caf, 0x2caf; + 0x2cb1, 0x2cb1; 0x2cb3, 0x2cb3; 0x2cb5, 0x2cb5; 0x2cb7, 0x2cb7; 0x2cb9, 0x2cb9; + 0x2cbb, 0x2cbb; 0x2cbd, 0x2cbd; 0x2cbf, 0x2cbf; 0x2cc1, 0x2cc1; 0x2cc3, 0x2cc3; + 0x2cc5, 0x2cc5; 0x2cc7, 0x2cc7; 0x2cc9, 0x2cc9; 0x2ccb, 0x2ccb; 0x2ccd, 0x2ccd; + 0x2ccf, 0x2ccf; 0x2cd1, 0x2cd1; 0x2cd3, 0x2cd3; 0x2cd5, 0x2cd5; 0x2cd7, 0x2cd7; + 0x2cd9, 0x2cd9; 0x2cdb, 0x2cdb; 0x2cdd, 0x2cdd; 0x2cdf, 0x2cdf; 0x2ce1, 0x2ce1; + 0x2ce3, 0x2ce4; 0x2cec, 0x2cec; 0x2cee, 0x2cee; 0x2cf3, 0x2cf3; 0x2d00, 0x2d25; + 0x2d27, 0x2d27; 0x2d2d, 0x2d2d; 0xa641, 0xa641; 0xa643, 0xa643; 0xa645, 0xa645; + 0xa647, 0xa647; 0xa649, 0xa649; 0xa64b, 0xa64b; 0xa64d, 0xa64d; 0xa64f, 0xa64f; + 0xa651, 0xa651; 0xa653, 0xa653; 0xa655, 0xa655; 0xa657, 0xa657; 0xa659, 0xa659; + 0xa65b, 0xa65b; 0xa65d, 0xa65d; 0xa65f, 0xa65f; 0xa661, 0xa661; 0xa663, 0xa663; + 0xa665, 0xa665; 0xa667, 0xa667; 0xa669, 0xa669; 0xa66b, 0xa66b; 0xa66d, 0xa66d; + 0xa681, 0xa681; 0xa683, 0xa683; 0xa685, 0xa685; 0xa687, 0xa687; 0xa689, 0xa689; + 0xa68b, 0xa68b; 0xa68d, 0xa68d; 0xa68f, 0xa68f; 0xa691, 0xa691; 0xa693, 0xa693; + 0xa695, 0xa695; 0xa697, 0xa697; 0xa699, 0xa699; 0xa69b, 0xa69b; 0xa723, 0xa723; + 0xa725, 0xa725; 0xa727, 0xa727; 0xa729, 0xa729; 0xa72b, 0xa72b; 0xa72d, 0xa72d; + 0xa72f, 0xa731; 0xa733, 0xa733; 0xa735, 0xa735; 0xa737, 0xa737; 0xa739, 0xa739; + 0xa73b, 0xa73b; 0xa73d, 0xa73d; 0xa73f, 0xa73f; 0xa741, 0xa741; 0xa743, 0xa743; + 0xa745, 0xa745; 0xa747, 0xa747; 0xa749, 0xa749; 0xa74b, 0xa74b; 0xa74d, 0xa74d; + 0xa74f, 0xa74f; 0xa751, 0xa751; 0xa753, 0xa753; 0xa755, 0xa755; 0xa757, 0xa757; + 0xa759, 0xa759; 0xa75b, 0xa75b; 0xa75d, 0xa75d; 0xa75f, 0xa75f; 0xa761, 0xa761; + 0xa763, 0xa763; 0xa765, 0xa765; 0xa767, 0xa767; 0xa769, 0xa769; 0xa76b, 0xa76b; + 0xa76d, 0xa76d; 0xa76f, 0xa76f; 0xa771, 0xa778; 0xa77a, 0xa77a; 0xa77c, 0xa77c; + 0xa77f, 0xa77f; 0xa781, 0xa781; 0xa783, 0xa783; 0xa785, 0xa785; 0xa787, 0xa787; + 0xa78c, 0xa78c; 0xa78e, 0xa78e; 0xa791, 0xa791; 0xa793, 0xa795; 0xa797, 0xa797; + 0xa799, 0xa799; 0xa79b, 0xa79b; 0xa79d, 0xa79d; 0xa79f, 0xa79f; 0xa7a1, 0xa7a1; + 0xa7a3, 0xa7a3; 0xa7a5, 0xa7a5; 0xa7a7, 0xa7a7; 0xa7a9, 0xa7a9; 0xa7af, 0xa7af; + 0xa7b5, 0xa7b5; 0xa7b7, 0xa7b7; 0xa7b9, 0xa7b9; 0xa7bb, 0xa7bb; 0xa7bd, 0xa7bd; + 0xa7bf, 0xa7bf; 0xa7c1, 0xa7c1; 0xa7c3, 0xa7c3; 0xa7c8, 0xa7c8; 0xa7ca, 0xa7ca; + 0xa7cd, 0xa7cd; 0xa7d1, 0xa7d1; 0xa7d3, 0xa7d3; 0xa7d5, 0xa7d5; 0xa7d7, 0xa7d7; + 0xa7d9, 0xa7d9; 0xa7db, 0xa7db; 0xa7f6, 0xa7f6; 0xa7fa, 0xa7fa; 0xab30, 0xab5a; + 0xab60, 0xab68; 0xab70, 0xabbf; 0xfb00, 0xfb06; 0xfb13, 0xfb17; 0xff41, 0xff5a; + 0x10428, 0x1044f; 0x104d8, 0x104fb; 0x10597, 0x105a1; 0x105a3, 0x105b1; 0x105b3, 0x105b9; + 0x105bb, 0x105bc; 0x10cc0, 0x10cf2; 0x10d70, 0x10d85; 0x118c0, 0x118df; 0x16e60, 0x16e7f; + 0x1d41a, 0x1d433; 0x1d44e, 0x1d454; 0x1d456, 0x1d467; 0x1d482, 0x1d49b; 0x1d4b6, 0x1d4b9; + 0x1d4bb, 0x1d4bb; 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d4cf; 0x1d4ea, 0x1d503; 0x1d51e, 0x1d537; + 0x1d552, 0x1d56b; 0x1d586, 0x1d59f; 0x1d5ba, 0x1d5d3; 0x1d5ee, 0x1d607; 0x1d622, 0x1d63b; + 0x1d656, 0x1d66f; 0x1d68a, 0x1d6a5; 0x1d6c2, 0x1d6da; 0x1d6dc, 0x1d6e1; 0x1d6fc, 0x1d714; + 0x1d716, 0x1d71b; 0x1d736, 0x1d74e; 0x1d750, 0x1d755; 0x1d770, 0x1d788; 0x1d78a, 0x1d78f; + 0x1d7aa, 0x1d7c2; 0x1d7c4, 0x1d7c9; 0x1d7cb, 0x1d7cb; 0x1df00, 0x1df09; 0x1df0b, 0x1df1e; + 0x1df25, 0x1df2a; 0x1e922, 0x1e943] let lm = Sedlex_utils.Cset.of_list [0x2b0, 0x2c1; 0x2c6, 0x2d1; 0x2e0, 0x2e4; 0x2ec, 0x2ec; 0x2ee, 0x2ee; @@ -315,9 +322,9 @@ module Categories = struct 0xa788, 0xa788; 0xa7f2, 0xa7f4; 0xa7f8, 0xa7f9; 0xa9cf, 0xa9cf; 0xa9e6, 0xa9e6; 0xaa70, 0xaa70; 0xaadd, 0xaadd; 0xaaf3, 0xaaf4; 0xab5c, 0xab5f; 0xab69, 0xab69; 0xff70, 0xff70; 0xff9e, 0xff9f; 0x10780, 0x10785; 0x10787, 0x107b0; 0x107b2, 0x107ba; - 0x16b40, 0x16b43; 0x16f93, 0x16f9f; 0x16fe0, 0x16fe1; 0x16fe3, 0x16fe3; 0x1aff0, 0x1aff3; - 0x1aff5, 0x1affb; 0x1affd, 0x1affe; 0x1e030, 0x1e06d; 0x1e137, 0x1e13d; 0x1e4eb, 0x1e4eb; - 0x1e94b, 0x1e94b] + 0x10d4e, 0x10d4e; 0x10d6f, 0x10d6f; 0x16b40, 0x16b43; 0x16d40, 0x16d42; 0x16d6b, 0x16d6c; + 0x16f93, 0x16f9f; 0x16fe0, 0x16fe1; 0x16fe3, 0x16fe3; 0x1aff0, 0x1aff3; 0x1aff5, 0x1affb; + 0x1affd, 0x1affe; 0x1e030, 0x1e06d; 0x1e137, 0x1e13d; 0x1e4eb, 0x1e4eb; 0x1e94b, 0x1e94b] let lo = Sedlex_utils.Cset.of_list [0xaa, 0xaa; 0xba, 0xba; 0x1bb, 0x1bb; 0x1c0, 0x1c3; 0x294, 0x294; @@ -381,47 +388,51 @@ module Categories = struct 0x10000, 0x1000b; 0x1000d, 0x10026; 0x10028, 0x1003a; 0x1003c, 0x1003d; 0x1003f, 0x1004d; 0x10050, 0x1005d; 0x10080, 0x100fa; 0x10280, 0x1029c; 0x102a0, 0x102d0; 0x10300, 0x1031f; 0x1032d, 0x10340; 0x10342, 0x10349; 0x10350, 0x10375; 0x10380, 0x1039d; 0x103a0, 0x103c3; - 0x103c8, 0x103cf; 0x10450, 0x1049d; 0x10500, 0x10527; 0x10530, 0x10563; 0x10600, 0x10736; - 0x10740, 0x10755; 0x10760, 0x10767; 0x10800, 0x10805; 0x10808, 0x10808; 0x1080a, 0x10835; - 0x10837, 0x10838; 0x1083c, 0x1083c; 0x1083f, 0x10855; 0x10860, 0x10876; 0x10880, 0x1089e; - 0x108e0, 0x108f2; 0x108f4, 0x108f5; 0x10900, 0x10915; 0x10920, 0x10939; 0x10980, 0x109b7; - 0x109be, 0x109bf; 0x10a00, 0x10a00; 0x10a10, 0x10a13; 0x10a15, 0x10a17; 0x10a19, 0x10a35; - 0x10a60, 0x10a7c; 0x10a80, 0x10a9c; 0x10ac0, 0x10ac7; 0x10ac9, 0x10ae4; 0x10b00, 0x10b35; - 0x10b40, 0x10b55; 0x10b60, 0x10b72; 0x10b80, 0x10b91; 0x10c00, 0x10c48; 0x10d00, 0x10d23; - 0x10e80, 0x10ea9; 0x10eb0, 0x10eb1; 0x10f00, 0x10f1c; 0x10f27, 0x10f27; 0x10f30, 0x10f45; - 0x10f70, 0x10f81; 0x10fb0, 0x10fc4; 0x10fe0, 0x10ff6; 0x11003, 0x11037; 0x11071, 0x11072; - 0x11075, 0x11075; 0x11083, 0x110af; 0x110d0, 0x110e8; 0x11103, 0x11126; 0x11144, 0x11144; - 0x11147, 0x11147; 0x11150, 0x11172; 0x11176, 0x11176; 0x11183, 0x111b2; 0x111c1, 0x111c4; - 0x111da, 0x111da; 0x111dc, 0x111dc; 0x11200, 0x11211; 0x11213, 0x1122b; 0x1123f, 0x11240; - 0x11280, 0x11286; 0x11288, 0x11288; 0x1128a, 0x1128d; 0x1128f, 0x1129d; 0x1129f, 0x112a8; - 0x112b0, 0x112de; 0x11305, 0x1130c; 0x1130f, 0x11310; 0x11313, 0x11328; 0x1132a, 0x11330; - 0x11332, 0x11333; 0x11335, 0x11339; 0x1133d, 0x1133d; 0x11350, 0x11350; 0x1135d, 0x11361; - 0x11400, 0x11434; 0x11447, 0x1144a; 0x1145f, 0x11461; 0x11480, 0x114af; 0x114c4, 0x114c5; - 0x114c7, 0x114c7; 0x11580, 0x115ae; 0x115d8, 0x115db; 0x11600, 0x1162f; 0x11644, 0x11644; - 0x11680, 0x116aa; 0x116b8, 0x116b8; 0x11700, 0x1171a; 0x11740, 0x11746; 0x11800, 0x1182b; - 0x118ff, 0x11906; 0x11909, 0x11909; 0x1190c, 0x11913; 0x11915, 0x11916; 0x11918, 0x1192f; - 0x1193f, 0x1193f; 0x11941, 0x11941; 0x119a0, 0x119a7; 0x119aa, 0x119d0; 0x119e1, 0x119e1; - 0x119e3, 0x119e3; 0x11a00, 0x11a00; 0x11a0b, 0x11a32; 0x11a3a, 0x11a3a; 0x11a50, 0x11a50; - 0x11a5c, 0x11a89; 0x11a9d, 0x11a9d; 0x11ab0, 0x11af8; 0x11c00, 0x11c08; 0x11c0a, 0x11c2e; - 0x11c40, 0x11c40; 0x11c72, 0x11c8f; 0x11d00, 0x11d06; 0x11d08, 0x11d09; 0x11d0b, 0x11d30; - 0x11d46, 0x11d46; 0x11d60, 0x11d65; 0x11d67, 0x11d68; 0x11d6a, 0x11d89; 0x11d98, 0x11d98; - 0x11ee0, 0x11ef2; 0x11f02, 0x11f02; 0x11f04, 0x11f10; 0x11f12, 0x11f33; 0x11fb0, 0x11fb0; - 0x12000, 0x12399; 0x12480, 0x12543; 0x12f90, 0x12ff0; 0x13000, 0x1342f; 0x13441, 0x13446; - 0x14400, 0x14646; 0x16800, 0x16a38; 0x16a40, 0x16a5e; 0x16a70, 0x16abe; 0x16ad0, 0x16aed; - 0x16b00, 0x16b2f; 0x16b63, 0x16b77; 0x16b7d, 0x16b8f; 0x16f00, 0x16f4a; 0x16f50, 0x16f50; - 0x17000, 0x187f7; 0x18800, 0x18cd5; 0x18d00, 0x18d08; 0x1b000, 0x1b122; 0x1b132, 0x1b132; + 0x103c8, 0x103cf; 0x10450, 0x1049d; 0x10500, 0x10527; 0x10530, 0x10563; 0x105c0, 0x105f3; + 0x10600, 0x10736; 0x10740, 0x10755; 0x10760, 0x10767; 0x10800, 0x10805; 0x10808, 0x10808; + 0x1080a, 0x10835; 0x10837, 0x10838; 0x1083c, 0x1083c; 0x1083f, 0x10855; 0x10860, 0x10876; + 0x10880, 0x1089e; 0x108e0, 0x108f2; 0x108f4, 0x108f5; 0x10900, 0x10915; 0x10920, 0x10939; + 0x10980, 0x109b7; 0x109be, 0x109bf; 0x10a00, 0x10a00; 0x10a10, 0x10a13; 0x10a15, 0x10a17; + 0x10a19, 0x10a35; 0x10a60, 0x10a7c; 0x10a80, 0x10a9c; 0x10ac0, 0x10ac7; 0x10ac9, 0x10ae4; + 0x10b00, 0x10b35; 0x10b40, 0x10b55; 0x10b60, 0x10b72; 0x10b80, 0x10b91; 0x10c00, 0x10c48; + 0x10d00, 0x10d23; 0x10d4a, 0x10d4d; 0x10d4f, 0x10d4f; 0x10e80, 0x10ea9; 0x10eb0, 0x10eb1; + 0x10ec2, 0x10ec4; 0x10f00, 0x10f1c; 0x10f27, 0x10f27; 0x10f30, 0x10f45; 0x10f70, 0x10f81; + 0x10fb0, 0x10fc4; 0x10fe0, 0x10ff6; 0x11003, 0x11037; 0x11071, 0x11072; 0x11075, 0x11075; + 0x11083, 0x110af; 0x110d0, 0x110e8; 0x11103, 0x11126; 0x11144, 0x11144; 0x11147, 0x11147; + 0x11150, 0x11172; 0x11176, 0x11176; 0x11183, 0x111b2; 0x111c1, 0x111c4; 0x111da, 0x111da; + 0x111dc, 0x111dc; 0x11200, 0x11211; 0x11213, 0x1122b; 0x1123f, 0x11240; 0x11280, 0x11286; + 0x11288, 0x11288; 0x1128a, 0x1128d; 0x1128f, 0x1129d; 0x1129f, 0x112a8; 0x112b0, 0x112de; + 0x11305, 0x1130c; 0x1130f, 0x11310; 0x11313, 0x11328; 0x1132a, 0x11330; 0x11332, 0x11333; + 0x11335, 0x11339; 0x1133d, 0x1133d; 0x11350, 0x11350; 0x1135d, 0x11361; 0x11380, 0x11389; + 0x1138b, 0x1138b; 0x1138e, 0x1138e; 0x11390, 0x113b5; 0x113b7, 0x113b7; 0x113d1, 0x113d1; + 0x113d3, 0x113d3; 0x11400, 0x11434; 0x11447, 0x1144a; 0x1145f, 0x11461; 0x11480, 0x114af; + 0x114c4, 0x114c5; 0x114c7, 0x114c7; 0x11580, 0x115ae; 0x115d8, 0x115db; 0x11600, 0x1162f; + 0x11644, 0x11644; 0x11680, 0x116aa; 0x116b8, 0x116b8; 0x11700, 0x1171a; 0x11740, 0x11746; + 0x11800, 0x1182b; 0x118ff, 0x11906; 0x11909, 0x11909; 0x1190c, 0x11913; 0x11915, 0x11916; + 0x11918, 0x1192f; 0x1193f, 0x1193f; 0x11941, 0x11941; 0x119a0, 0x119a7; 0x119aa, 0x119d0; + 0x119e1, 0x119e1; 0x119e3, 0x119e3; 0x11a00, 0x11a00; 0x11a0b, 0x11a32; 0x11a3a, 0x11a3a; + 0x11a50, 0x11a50; 0x11a5c, 0x11a89; 0x11a9d, 0x11a9d; 0x11ab0, 0x11af8; 0x11bc0, 0x11be0; + 0x11c00, 0x11c08; 0x11c0a, 0x11c2e; 0x11c40, 0x11c40; 0x11c72, 0x11c8f; 0x11d00, 0x11d06; + 0x11d08, 0x11d09; 0x11d0b, 0x11d30; 0x11d46, 0x11d46; 0x11d60, 0x11d65; 0x11d67, 0x11d68; + 0x11d6a, 0x11d89; 0x11d98, 0x11d98; 0x11ee0, 0x11ef2; 0x11f02, 0x11f02; 0x11f04, 0x11f10; + 0x11f12, 0x11f33; 0x11fb0, 0x11fb0; 0x12000, 0x12399; 0x12480, 0x12543; 0x12f90, 0x12ff0; + 0x13000, 0x1342f; 0x13441, 0x13446; 0x13460, 0x143fa; 0x14400, 0x14646; 0x16100, 0x1611d; + 0x16800, 0x16a38; 0x16a40, 0x16a5e; 0x16a70, 0x16abe; 0x16ad0, 0x16aed; 0x16b00, 0x16b2f; + 0x16b63, 0x16b77; 0x16b7d, 0x16b8f; 0x16d43, 0x16d6a; 0x16f00, 0x16f4a; 0x16f50, 0x16f50; + 0x17000, 0x187f7; 0x18800, 0x18cd5; 0x18cff, 0x18d08; 0x1b000, 0x1b122; 0x1b132, 0x1b132; 0x1b150, 0x1b152; 0x1b155, 0x1b155; 0x1b164, 0x1b167; 0x1b170, 0x1b2fb; 0x1bc00, 0x1bc6a; 0x1bc70, 0x1bc7c; 0x1bc80, 0x1bc88; 0x1bc90, 0x1bc99; 0x1df0a, 0x1df0a; 0x1e100, 0x1e12c; - 0x1e14e, 0x1e14e; 0x1e290, 0x1e2ad; 0x1e2c0, 0x1e2eb; 0x1e4d0, 0x1e4ea; 0x1e7e0, 0x1e7e6; - 0x1e7e8, 0x1e7eb; 0x1e7ed, 0x1e7ee; 0x1e7f0, 0x1e7fe; 0x1e800, 0x1e8c4; 0x1ee00, 0x1ee03; - 0x1ee05, 0x1ee1f; 0x1ee21, 0x1ee22; 0x1ee24, 0x1ee24; 0x1ee27, 0x1ee27; 0x1ee29, 0x1ee32; - 0x1ee34, 0x1ee37; 0x1ee39, 0x1ee39; 0x1ee3b, 0x1ee3b; 0x1ee42, 0x1ee42; 0x1ee47, 0x1ee47; - 0x1ee49, 0x1ee49; 0x1ee4b, 0x1ee4b; 0x1ee4d, 0x1ee4f; 0x1ee51, 0x1ee52; 0x1ee54, 0x1ee54; - 0x1ee57, 0x1ee57; 0x1ee59, 0x1ee59; 0x1ee5b, 0x1ee5b; 0x1ee5d, 0x1ee5d; 0x1ee5f, 0x1ee5f; - 0x1ee61, 0x1ee62; 0x1ee64, 0x1ee64; 0x1ee67, 0x1ee6a; 0x1ee6c, 0x1ee72; 0x1ee74, 0x1ee77; - 0x1ee79, 0x1ee7c; 0x1ee7e, 0x1ee7e; 0x1ee80, 0x1ee89; 0x1ee8b, 0x1ee9b; 0x1eea1, 0x1eea3; - 0x1eea5, 0x1eea9; 0x1eeab, 0x1eebb; 0x20000, 0x2a6df; 0x2a700, 0x2b739; 0x2b740, 0x2b81d; - 0x2b820, 0x2cea1; 0x2ceb0, 0x2ebe0; 0x2f800, 0x2fa1d; 0x30000, 0x3134a; 0x31350, 0x323af] + 0x1e14e, 0x1e14e; 0x1e290, 0x1e2ad; 0x1e2c0, 0x1e2eb; 0x1e4d0, 0x1e4ea; 0x1e5d0, 0x1e5ed; + 0x1e5f0, 0x1e5f0; 0x1e7e0, 0x1e7e6; 0x1e7e8, 0x1e7eb; 0x1e7ed, 0x1e7ee; 0x1e7f0, 0x1e7fe; + 0x1e800, 0x1e8c4; 0x1ee00, 0x1ee03; 0x1ee05, 0x1ee1f; 0x1ee21, 0x1ee22; 0x1ee24, 0x1ee24; + 0x1ee27, 0x1ee27; 0x1ee29, 0x1ee32; 0x1ee34, 0x1ee37; 0x1ee39, 0x1ee39; 0x1ee3b, 0x1ee3b; + 0x1ee42, 0x1ee42; 0x1ee47, 0x1ee47; 0x1ee49, 0x1ee49; 0x1ee4b, 0x1ee4b; 0x1ee4d, 0x1ee4f; + 0x1ee51, 0x1ee52; 0x1ee54, 0x1ee54; 0x1ee57, 0x1ee57; 0x1ee59, 0x1ee59; 0x1ee5b, 0x1ee5b; + 0x1ee5d, 0x1ee5d; 0x1ee5f, 0x1ee5f; 0x1ee61, 0x1ee62; 0x1ee64, 0x1ee64; 0x1ee67, 0x1ee6a; + 0x1ee6c, 0x1ee72; 0x1ee74, 0x1ee77; 0x1ee79, 0x1ee7c; 0x1ee7e, 0x1ee7e; 0x1ee80, 0x1ee89; + 0x1ee8b, 0x1ee9b; 0x1eea1, 0x1eea3; 0x1eea5, 0x1eea9; 0x1eeab, 0x1eebb; 0x20000, 0x2a6df; + 0x2a700, 0x2b739; 0x2b740, 0x2b81d; 0x2b820, 0x2cea1; 0x2ceb0, 0x2ebe0; 0x2ebf0, 0x2ee5d; + 0x2f800, 0x2fa1d; 0x30000, 0x3134a; 0x31350, 0x323af] let lt = Sedlex_utils.Cset.of_list [0x1c5, 0x1c5; 0x1c8, 0x1c8; 0x1cb, 0x1cb; 0x1f2, 0x1f2; 0x1f88, 0x1f8f; @@ -483,74 +494,75 @@ module Categories = struct 0x51a, 0x51a; 0x51c, 0x51c; 0x51e, 0x51e; 0x520, 0x520; 0x522, 0x522; 0x524, 0x524; 0x526, 0x526; 0x528, 0x528; 0x52a, 0x52a; 0x52c, 0x52c; 0x52e, 0x52e; 0x531, 0x556; 0x10a0, 0x10c5; 0x10c7, 0x10c7; 0x10cd, 0x10cd; - 0x13a0, 0x13f5; 0x1c90, 0x1cba; 0x1cbd, 0x1cbf; 0x1e00, 0x1e00; 0x1e02, 0x1e02; - 0x1e04, 0x1e04; 0x1e06, 0x1e06; 0x1e08, 0x1e08; 0x1e0a, 0x1e0a; 0x1e0c, 0x1e0c; - 0x1e0e, 0x1e0e; 0x1e10, 0x1e10; 0x1e12, 0x1e12; 0x1e14, 0x1e14; 0x1e16, 0x1e16; - 0x1e18, 0x1e18; 0x1e1a, 0x1e1a; 0x1e1c, 0x1e1c; 0x1e1e, 0x1e1e; 0x1e20, 0x1e20; - 0x1e22, 0x1e22; 0x1e24, 0x1e24; 0x1e26, 0x1e26; 0x1e28, 0x1e28; 0x1e2a, 0x1e2a; - 0x1e2c, 0x1e2c; 0x1e2e, 0x1e2e; 0x1e30, 0x1e30; 0x1e32, 0x1e32; 0x1e34, 0x1e34; - 0x1e36, 0x1e36; 0x1e38, 0x1e38; 0x1e3a, 0x1e3a; 0x1e3c, 0x1e3c; 0x1e3e, 0x1e3e; - 0x1e40, 0x1e40; 0x1e42, 0x1e42; 0x1e44, 0x1e44; 0x1e46, 0x1e46; 0x1e48, 0x1e48; - 0x1e4a, 0x1e4a; 0x1e4c, 0x1e4c; 0x1e4e, 0x1e4e; 0x1e50, 0x1e50; 0x1e52, 0x1e52; - 0x1e54, 0x1e54; 0x1e56, 0x1e56; 0x1e58, 0x1e58; 0x1e5a, 0x1e5a; 0x1e5c, 0x1e5c; - 0x1e5e, 0x1e5e; 0x1e60, 0x1e60; 0x1e62, 0x1e62; 0x1e64, 0x1e64; 0x1e66, 0x1e66; - 0x1e68, 0x1e68; 0x1e6a, 0x1e6a; 0x1e6c, 0x1e6c; 0x1e6e, 0x1e6e; 0x1e70, 0x1e70; - 0x1e72, 0x1e72; 0x1e74, 0x1e74; 0x1e76, 0x1e76; 0x1e78, 0x1e78; 0x1e7a, 0x1e7a; - 0x1e7c, 0x1e7c; 0x1e7e, 0x1e7e; 0x1e80, 0x1e80; 0x1e82, 0x1e82; 0x1e84, 0x1e84; - 0x1e86, 0x1e86; 0x1e88, 0x1e88; 0x1e8a, 0x1e8a; 0x1e8c, 0x1e8c; 0x1e8e, 0x1e8e; - 0x1e90, 0x1e90; 0x1e92, 0x1e92; 0x1e94, 0x1e94; 0x1e9e, 0x1e9e; 0x1ea0, 0x1ea0; - 0x1ea2, 0x1ea2; 0x1ea4, 0x1ea4; 0x1ea6, 0x1ea6; 0x1ea8, 0x1ea8; 0x1eaa, 0x1eaa; - 0x1eac, 0x1eac; 0x1eae, 0x1eae; 0x1eb0, 0x1eb0; 0x1eb2, 0x1eb2; 0x1eb4, 0x1eb4; - 0x1eb6, 0x1eb6; 0x1eb8, 0x1eb8; 0x1eba, 0x1eba; 0x1ebc, 0x1ebc; 0x1ebe, 0x1ebe; - 0x1ec0, 0x1ec0; 0x1ec2, 0x1ec2; 0x1ec4, 0x1ec4; 0x1ec6, 0x1ec6; 0x1ec8, 0x1ec8; - 0x1eca, 0x1eca; 0x1ecc, 0x1ecc; 0x1ece, 0x1ece; 0x1ed0, 0x1ed0; 0x1ed2, 0x1ed2; - 0x1ed4, 0x1ed4; 0x1ed6, 0x1ed6; 0x1ed8, 0x1ed8; 0x1eda, 0x1eda; 0x1edc, 0x1edc; - 0x1ede, 0x1ede; 0x1ee0, 0x1ee0; 0x1ee2, 0x1ee2; 0x1ee4, 0x1ee4; 0x1ee6, 0x1ee6; - 0x1ee8, 0x1ee8; 0x1eea, 0x1eea; 0x1eec, 0x1eec; 0x1eee, 0x1eee; 0x1ef0, 0x1ef0; - 0x1ef2, 0x1ef2; 0x1ef4, 0x1ef4; 0x1ef6, 0x1ef6; 0x1ef8, 0x1ef8; 0x1efa, 0x1efa; - 0x1efc, 0x1efc; 0x1efe, 0x1efe; 0x1f08, 0x1f0f; 0x1f18, 0x1f1d; 0x1f28, 0x1f2f; - 0x1f38, 0x1f3f; 0x1f48, 0x1f4d; 0x1f59, 0x1f59; 0x1f5b, 0x1f5b; 0x1f5d, 0x1f5d; - 0x1f5f, 0x1f5f; 0x1f68, 0x1f6f; 0x1fb8, 0x1fbb; 0x1fc8, 0x1fcb; 0x1fd8, 0x1fdb; - 0x1fe8, 0x1fec; 0x1ff8, 0x1ffb; 0x2102, 0x2102; 0x2107, 0x2107; 0x210b, 0x210d; - 0x2110, 0x2112; 0x2115, 0x2115; 0x2119, 0x211d; 0x2124, 0x2124; 0x2126, 0x2126; - 0x2128, 0x2128; 0x212a, 0x212d; 0x2130, 0x2133; 0x213e, 0x213f; 0x2145, 0x2145; - 0x2183, 0x2183; 0x2c00, 0x2c2f; 0x2c60, 0x2c60; 0x2c62, 0x2c64; 0x2c67, 0x2c67; - 0x2c69, 0x2c69; 0x2c6b, 0x2c6b; 0x2c6d, 0x2c70; 0x2c72, 0x2c72; 0x2c75, 0x2c75; - 0x2c7e, 0x2c80; 0x2c82, 0x2c82; 0x2c84, 0x2c84; 0x2c86, 0x2c86; 0x2c88, 0x2c88; - 0x2c8a, 0x2c8a; 0x2c8c, 0x2c8c; 0x2c8e, 0x2c8e; 0x2c90, 0x2c90; 0x2c92, 0x2c92; - 0x2c94, 0x2c94; 0x2c96, 0x2c96; 0x2c98, 0x2c98; 0x2c9a, 0x2c9a; 0x2c9c, 0x2c9c; - 0x2c9e, 0x2c9e; 0x2ca0, 0x2ca0; 0x2ca2, 0x2ca2; 0x2ca4, 0x2ca4; 0x2ca6, 0x2ca6; - 0x2ca8, 0x2ca8; 0x2caa, 0x2caa; 0x2cac, 0x2cac; 0x2cae, 0x2cae; 0x2cb0, 0x2cb0; - 0x2cb2, 0x2cb2; 0x2cb4, 0x2cb4; 0x2cb6, 0x2cb6; 0x2cb8, 0x2cb8; 0x2cba, 0x2cba; - 0x2cbc, 0x2cbc; 0x2cbe, 0x2cbe; 0x2cc0, 0x2cc0; 0x2cc2, 0x2cc2; 0x2cc4, 0x2cc4; - 0x2cc6, 0x2cc6; 0x2cc8, 0x2cc8; 0x2cca, 0x2cca; 0x2ccc, 0x2ccc; 0x2cce, 0x2cce; - 0x2cd0, 0x2cd0; 0x2cd2, 0x2cd2; 0x2cd4, 0x2cd4; 0x2cd6, 0x2cd6; 0x2cd8, 0x2cd8; - 0x2cda, 0x2cda; 0x2cdc, 0x2cdc; 0x2cde, 0x2cde; 0x2ce0, 0x2ce0; 0x2ce2, 0x2ce2; - 0x2ceb, 0x2ceb; 0x2ced, 0x2ced; 0x2cf2, 0x2cf2; 0xa640, 0xa640; 0xa642, 0xa642; - 0xa644, 0xa644; 0xa646, 0xa646; 0xa648, 0xa648; 0xa64a, 0xa64a; 0xa64c, 0xa64c; - 0xa64e, 0xa64e; 0xa650, 0xa650; 0xa652, 0xa652; 0xa654, 0xa654; 0xa656, 0xa656; - 0xa658, 0xa658; 0xa65a, 0xa65a; 0xa65c, 0xa65c; 0xa65e, 0xa65e; 0xa660, 0xa660; - 0xa662, 0xa662; 0xa664, 0xa664; 0xa666, 0xa666; 0xa668, 0xa668; 0xa66a, 0xa66a; - 0xa66c, 0xa66c; 0xa680, 0xa680; 0xa682, 0xa682; 0xa684, 0xa684; 0xa686, 0xa686; - 0xa688, 0xa688; 0xa68a, 0xa68a; 0xa68c, 0xa68c; 0xa68e, 0xa68e; 0xa690, 0xa690; - 0xa692, 0xa692; 0xa694, 0xa694; 0xa696, 0xa696; 0xa698, 0xa698; 0xa69a, 0xa69a; - 0xa722, 0xa722; 0xa724, 0xa724; 0xa726, 0xa726; 0xa728, 0xa728; 0xa72a, 0xa72a; - 0xa72c, 0xa72c; 0xa72e, 0xa72e; 0xa732, 0xa732; 0xa734, 0xa734; 0xa736, 0xa736; - 0xa738, 0xa738; 0xa73a, 0xa73a; 0xa73c, 0xa73c; 0xa73e, 0xa73e; 0xa740, 0xa740; - 0xa742, 0xa742; 0xa744, 0xa744; 0xa746, 0xa746; 0xa748, 0xa748; 0xa74a, 0xa74a; - 0xa74c, 0xa74c; 0xa74e, 0xa74e; 0xa750, 0xa750; 0xa752, 0xa752; 0xa754, 0xa754; - 0xa756, 0xa756; 0xa758, 0xa758; 0xa75a, 0xa75a; 0xa75c, 0xa75c; 0xa75e, 0xa75e; - 0xa760, 0xa760; 0xa762, 0xa762; 0xa764, 0xa764; 0xa766, 0xa766; 0xa768, 0xa768; - 0xa76a, 0xa76a; 0xa76c, 0xa76c; 0xa76e, 0xa76e; 0xa779, 0xa779; 0xa77b, 0xa77b; - 0xa77d, 0xa77e; 0xa780, 0xa780; 0xa782, 0xa782; 0xa784, 0xa784; 0xa786, 0xa786; - 0xa78b, 0xa78b; 0xa78d, 0xa78d; 0xa790, 0xa790; 0xa792, 0xa792; 0xa796, 0xa796; - 0xa798, 0xa798; 0xa79a, 0xa79a; 0xa79c, 0xa79c; 0xa79e, 0xa79e; 0xa7a0, 0xa7a0; - 0xa7a2, 0xa7a2; 0xa7a4, 0xa7a4; 0xa7a6, 0xa7a6; 0xa7a8, 0xa7a8; 0xa7aa, 0xa7ae; - 0xa7b0, 0xa7b4; 0xa7b6, 0xa7b6; 0xa7b8, 0xa7b8; 0xa7ba, 0xa7ba; 0xa7bc, 0xa7bc; - 0xa7be, 0xa7be; 0xa7c0, 0xa7c0; 0xa7c2, 0xa7c2; 0xa7c4, 0xa7c7; 0xa7c9, 0xa7c9; - 0xa7d0, 0xa7d0; 0xa7d6, 0xa7d6; 0xa7d8, 0xa7d8; 0xa7f5, 0xa7f5; 0xff21, 0xff3a; - 0x10400, 0x10427; 0x104b0, 0x104d3; 0x10570, 0x1057a; 0x1057c, 0x1058a; 0x1058c, 0x10592; - 0x10594, 0x10595; 0x10c80, 0x10cb2; 0x118a0, 0x118bf; 0x16e40, 0x16e5f; 0x1d400, 0x1d419; + 0x13a0, 0x13f5; 0x1c89, 0x1c89; 0x1c90, 0x1cba; 0x1cbd, 0x1cbf; 0x1e00, 0x1e00; + 0x1e02, 0x1e02; 0x1e04, 0x1e04; 0x1e06, 0x1e06; 0x1e08, 0x1e08; 0x1e0a, 0x1e0a; + 0x1e0c, 0x1e0c; 0x1e0e, 0x1e0e; 0x1e10, 0x1e10; 0x1e12, 0x1e12; 0x1e14, 0x1e14; + 0x1e16, 0x1e16; 0x1e18, 0x1e18; 0x1e1a, 0x1e1a; 0x1e1c, 0x1e1c; 0x1e1e, 0x1e1e; + 0x1e20, 0x1e20; 0x1e22, 0x1e22; 0x1e24, 0x1e24; 0x1e26, 0x1e26; 0x1e28, 0x1e28; + 0x1e2a, 0x1e2a; 0x1e2c, 0x1e2c; 0x1e2e, 0x1e2e; 0x1e30, 0x1e30; 0x1e32, 0x1e32; + 0x1e34, 0x1e34; 0x1e36, 0x1e36; 0x1e38, 0x1e38; 0x1e3a, 0x1e3a; 0x1e3c, 0x1e3c; + 0x1e3e, 0x1e3e; 0x1e40, 0x1e40; 0x1e42, 0x1e42; 0x1e44, 0x1e44; 0x1e46, 0x1e46; + 0x1e48, 0x1e48; 0x1e4a, 0x1e4a; 0x1e4c, 0x1e4c; 0x1e4e, 0x1e4e; 0x1e50, 0x1e50; + 0x1e52, 0x1e52; 0x1e54, 0x1e54; 0x1e56, 0x1e56; 0x1e58, 0x1e58; 0x1e5a, 0x1e5a; + 0x1e5c, 0x1e5c; 0x1e5e, 0x1e5e; 0x1e60, 0x1e60; 0x1e62, 0x1e62; 0x1e64, 0x1e64; + 0x1e66, 0x1e66; 0x1e68, 0x1e68; 0x1e6a, 0x1e6a; 0x1e6c, 0x1e6c; 0x1e6e, 0x1e6e; + 0x1e70, 0x1e70; 0x1e72, 0x1e72; 0x1e74, 0x1e74; 0x1e76, 0x1e76; 0x1e78, 0x1e78; + 0x1e7a, 0x1e7a; 0x1e7c, 0x1e7c; 0x1e7e, 0x1e7e; 0x1e80, 0x1e80; 0x1e82, 0x1e82; + 0x1e84, 0x1e84; 0x1e86, 0x1e86; 0x1e88, 0x1e88; 0x1e8a, 0x1e8a; 0x1e8c, 0x1e8c; + 0x1e8e, 0x1e8e; 0x1e90, 0x1e90; 0x1e92, 0x1e92; 0x1e94, 0x1e94; 0x1e9e, 0x1e9e; + 0x1ea0, 0x1ea0; 0x1ea2, 0x1ea2; 0x1ea4, 0x1ea4; 0x1ea6, 0x1ea6; 0x1ea8, 0x1ea8; + 0x1eaa, 0x1eaa; 0x1eac, 0x1eac; 0x1eae, 0x1eae; 0x1eb0, 0x1eb0; 0x1eb2, 0x1eb2; + 0x1eb4, 0x1eb4; 0x1eb6, 0x1eb6; 0x1eb8, 0x1eb8; 0x1eba, 0x1eba; 0x1ebc, 0x1ebc; + 0x1ebe, 0x1ebe; 0x1ec0, 0x1ec0; 0x1ec2, 0x1ec2; 0x1ec4, 0x1ec4; 0x1ec6, 0x1ec6; + 0x1ec8, 0x1ec8; 0x1eca, 0x1eca; 0x1ecc, 0x1ecc; 0x1ece, 0x1ece; 0x1ed0, 0x1ed0; + 0x1ed2, 0x1ed2; 0x1ed4, 0x1ed4; 0x1ed6, 0x1ed6; 0x1ed8, 0x1ed8; 0x1eda, 0x1eda; + 0x1edc, 0x1edc; 0x1ede, 0x1ede; 0x1ee0, 0x1ee0; 0x1ee2, 0x1ee2; 0x1ee4, 0x1ee4; + 0x1ee6, 0x1ee6; 0x1ee8, 0x1ee8; 0x1eea, 0x1eea; 0x1eec, 0x1eec; 0x1eee, 0x1eee; + 0x1ef0, 0x1ef0; 0x1ef2, 0x1ef2; 0x1ef4, 0x1ef4; 0x1ef6, 0x1ef6; 0x1ef8, 0x1ef8; + 0x1efa, 0x1efa; 0x1efc, 0x1efc; 0x1efe, 0x1efe; 0x1f08, 0x1f0f; 0x1f18, 0x1f1d; + 0x1f28, 0x1f2f; 0x1f38, 0x1f3f; 0x1f48, 0x1f4d; 0x1f59, 0x1f59; 0x1f5b, 0x1f5b; + 0x1f5d, 0x1f5d; 0x1f5f, 0x1f5f; 0x1f68, 0x1f6f; 0x1fb8, 0x1fbb; 0x1fc8, 0x1fcb; + 0x1fd8, 0x1fdb; 0x1fe8, 0x1fec; 0x1ff8, 0x1ffb; 0x2102, 0x2102; 0x2107, 0x2107; + 0x210b, 0x210d; 0x2110, 0x2112; 0x2115, 0x2115; 0x2119, 0x211d; 0x2124, 0x2124; + 0x2126, 0x2126; 0x2128, 0x2128; 0x212a, 0x212d; 0x2130, 0x2133; 0x213e, 0x213f; + 0x2145, 0x2145; 0x2183, 0x2183; 0x2c00, 0x2c2f; 0x2c60, 0x2c60; 0x2c62, 0x2c64; + 0x2c67, 0x2c67; 0x2c69, 0x2c69; 0x2c6b, 0x2c6b; 0x2c6d, 0x2c70; 0x2c72, 0x2c72; + 0x2c75, 0x2c75; 0x2c7e, 0x2c80; 0x2c82, 0x2c82; 0x2c84, 0x2c84; 0x2c86, 0x2c86; + 0x2c88, 0x2c88; 0x2c8a, 0x2c8a; 0x2c8c, 0x2c8c; 0x2c8e, 0x2c8e; 0x2c90, 0x2c90; + 0x2c92, 0x2c92; 0x2c94, 0x2c94; 0x2c96, 0x2c96; 0x2c98, 0x2c98; 0x2c9a, 0x2c9a; + 0x2c9c, 0x2c9c; 0x2c9e, 0x2c9e; 0x2ca0, 0x2ca0; 0x2ca2, 0x2ca2; 0x2ca4, 0x2ca4; + 0x2ca6, 0x2ca6; 0x2ca8, 0x2ca8; 0x2caa, 0x2caa; 0x2cac, 0x2cac; 0x2cae, 0x2cae; + 0x2cb0, 0x2cb0; 0x2cb2, 0x2cb2; 0x2cb4, 0x2cb4; 0x2cb6, 0x2cb6; 0x2cb8, 0x2cb8; + 0x2cba, 0x2cba; 0x2cbc, 0x2cbc; 0x2cbe, 0x2cbe; 0x2cc0, 0x2cc0; 0x2cc2, 0x2cc2; + 0x2cc4, 0x2cc4; 0x2cc6, 0x2cc6; 0x2cc8, 0x2cc8; 0x2cca, 0x2cca; 0x2ccc, 0x2ccc; + 0x2cce, 0x2cce; 0x2cd0, 0x2cd0; 0x2cd2, 0x2cd2; 0x2cd4, 0x2cd4; 0x2cd6, 0x2cd6; + 0x2cd8, 0x2cd8; 0x2cda, 0x2cda; 0x2cdc, 0x2cdc; 0x2cde, 0x2cde; 0x2ce0, 0x2ce0; + 0x2ce2, 0x2ce2; 0x2ceb, 0x2ceb; 0x2ced, 0x2ced; 0x2cf2, 0x2cf2; 0xa640, 0xa640; + 0xa642, 0xa642; 0xa644, 0xa644; 0xa646, 0xa646; 0xa648, 0xa648; 0xa64a, 0xa64a; + 0xa64c, 0xa64c; 0xa64e, 0xa64e; 0xa650, 0xa650; 0xa652, 0xa652; 0xa654, 0xa654; + 0xa656, 0xa656; 0xa658, 0xa658; 0xa65a, 0xa65a; 0xa65c, 0xa65c; 0xa65e, 0xa65e; + 0xa660, 0xa660; 0xa662, 0xa662; 0xa664, 0xa664; 0xa666, 0xa666; 0xa668, 0xa668; + 0xa66a, 0xa66a; 0xa66c, 0xa66c; 0xa680, 0xa680; 0xa682, 0xa682; 0xa684, 0xa684; + 0xa686, 0xa686; 0xa688, 0xa688; 0xa68a, 0xa68a; 0xa68c, 0xa68c; 0xa68e, 0xa68e; + 0xa690, 0xa690; 0xa692, 0xa692; 0xa694, 0xa694; 0xa696, 0xa696; 0xa698, 0xa698; + 0xa69a, 0xa69a; 0xa722, 0xa722; 0xa724, 0xa724; 0xa726, 0xa726; 0xa728, 0xa728; + 0xa72a, 0xa72a; 0xa72c, 0xa72c; 0xa72e, 0xa72e; 0xa732, 0xa732; 0xa734, 0xa734; + 0xa736, 0xa736; 0xa738, 0xa738; 0xa73a, 0xa73a; 0xa73c, 0xa73c; 0xa73e, 0xa73e; + 0xa740, 0xa740; 0xa742, 0xa742; 0xa744, 0xa744; 0xa746, 0xa746; 0xa748, 0xa748; + 0xa74a, 0xa74a; 0xa74c, 0xa74c; 0xa74e, 0xa74e; 0xa750, 0xa750; 0xa752, 0xa752; + 0xa754, 0xa754; 0xa756, 0xa756; 0xa758, 0xa758; 0xa75a, 0xa75a; 0xa75c, 0xa75c; + 0xa75e, 0xa75e; 0xa760, 0xa760; 0xa762, 0xa762; 0xa764, 0xa764; 0xa766, 0xa766; + 0xa768, 0xa768; 0xa76a, 0xa76a; 0xa76c, 0xa76c; 0xa76e, 0xa76e; 0xa779, 0xa779; + 0xa77b, 0xa77b; 0xa77d, 0xa77e; 0xa780, 0xa780; 0xa782, 0xa782; 0xa784, 0xa784; + 0xa786, 0xa786; 0xa78b, 0xa78b; 0xa78d, 0xa78d; 0xa790, 0xa790; 0xa792, 0xa792; + 0xa796, 0xa796; 0xa798, 0xa798; 0xa79a, 0xa79a; 0xa79c, 0xa79c; 0xa79e, 0xa79e; + 0xa7a0, 0xa7a0; 0xa7a2, 0xa7a2; 0xa7a4, 0xa7a4; 0xa7a6, 0xa7a6; 0xa7a8, 0xa7a8; + 0xa7aa, 0xa7ae; 0xa7b0, 0xa7b4; 0xa7b6, 0xa7b6; 0xa7b8, 0xa7b8; 0xa7ba, 0xa7ba; + 0xa7bc, 0xa7bc; 0xa7be, 0xa7be; 0xa7c0, 0xa7c0; 0xa7c2, 0xa7c2; 0xa7c4, 0xa7c7; + 0xa7c9, 0xa7c9; 0xa7cb, 0xa7cc; 0xa7d0, 0xa7d0; 0xa7d6, 0xa7d6; 0xa7d8, 0xa7d8; + 0xa7da, 0xa7da; 0xa7dc, 0xa7dc; 0xa7f5, 0xa7f5; 0xff21, 0xff3a; 0x10400, 0x10427; + 0x104b0, 0x104d3; 0x10570, 0x1057a; 0x1057c, 0x1058a; 0x1058c, 0x10592; 0x10594, 0x10595; + 0x10c80, 0x10cb2; 0x10d50, 0x10d65; 0x118a0, 0x118bf; 0x16e40, 0x16e5f; 0x1d400, 0x1d419; 0x1d434, 0x1d44d; 0x1d468, 0x1d481; 0x1d49c, 0x1d49c; 0x1d49e, 0x1d49f; 0x1d4a2, 0x1d4a2; 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; 0x1d4ae, 0x1d4b5; 0x1d4d0, 0x1d4e9; 0x1d504, 0x1d505; 0x1d507, 0x1d50a; 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; 0x1d538, 0x1d539; 0x1d53b, 0x1d53e; @@ -586,17 +598,18 @@ module Categories = struct 0x110b0, 0x110b2; 0x110b7, 0x110b8; 0x1112c, 0x1112c; 0x11145, 0x11146; 0x11182, 0x11182; 0x111b3, 0x111b5; 0x111bf, 0x111c0; 0x111ce, 0x111ce; 0x1122c, 0x1122e; 0x11232, 0x11233; 0x11235, 0x11235; 0x112e0, 0x112e2; 0x11302, 0x11303; 0x1133e, 0x1133f; 0x11341, 0x11344; - 0x11347, 0x11348; 0x1134b, 0x1134d; 0x11357, 0x11357; 0x11362, 0x11363; 0x11435, 0x11437; - 0x11440, 0x11441; 0x11445, 0x11445; 0x114b0, 0x114b2; 0x114b9, 0x114b9; 0x114bb, 0x114be; - 0x114c1, 0x114c1; 0x115af, 0x115b1; 0x115b8, 0x115bb; 0x115be, 0x115be; 0x11630, 0x11632; - 0x1163b, 0x1163c; 0x1163e, 0x1163e; 0x116ac, 0x116ac; 0x116ae, 0x116af; 0x116b6, 0x116b6; - 0x11720, 0x11721; 0x11726, 0x11726; 0x1182c, 0x1182e; 0x11838, 0x11838; 0x11930, 0x11935; - 0x11937, 0x11938; 0x1193d, 0x1193d; 0x11940, 0x11940; 0x11942, 0x11942; 0x119d1, 0x119d3; - 0x119dc, 0x119df; 0x119e4, 0x119e4; 0x11a39, 0x11a39; 0x11a57, 0x11a58; 0x11a97, 0x11a97; - 0x11c2f, 0x11c2f; 0x11c3e, 0x11c3e; 0x11ca9, 0x11ca9; 0x11cb1, 0x11cb1; 0x11cb4, 0x11cb4; - 0x11d8a, 0x11d8e; 0x11d93, 0x11d94; 0x11d96, 0x11d96; 0x11ef5, 0x11ef6; 0x11f03, 0x11f03; - 0x11f34, 0x11f35; 0x11f3e, 0x11f3f; 0x11f41, 0x11f41; 0x16f51, 0x16f87; 0x16ff0, 0x16ff1; - 0x1d165, 0x1d166; 0x1d16d, 0x1d172] + 0x11347, 0x11348; 0x1134b, 0x1134d; 0x11357, 0x11357; 0x11362, 0x11363; 0x113b8, 0x113ba; + 0x113c2, 0x113c2; 0x113c5, 0x113c5; 0x113c7, 0x113ca; 0x113cc, 0x113cd; 0x113cf, 0x113cf; + 0x11435, 0x11437; 0x11440, 0x11441; 0x11445, 0x11445; 0x114b0, 0x114b2; 0x114b9, 0x114b9; + 0x114bb, 0x114be; 0x114c1, 0x114c1; 0x115af, 0x115b1; 0x115b8, 0x115bb; 0x115be, 0x115be; + 0x11630, 0x11632; 0x1163b, 0x1163c; 0x1163e, 0x1163e; 0x116ac, 0x116ac; 0x116ae, 0x116af; + 0x116b6, 0x116b6; 0x1171e, 0x1171e; 0x11720, 0x11721; 0x11726, 0x11726; 0x1182c, 0x1182e; + 0x11838, 0x11838; 0x11930, 0x11935; 0x11937, 0x11938; 0x1193d, 0x1193d; 0x11940, 0x11940; + 0x11942, 0x11942; 0x119d1, 0x119d3; 0x119dc, 0x119df; 0x119e4, 0x119e4; 0x11a39, 0x11a39; + 0x11a57, 0x11a58; 0x11a97, 0x11a97; 0x11c2f, 0x11c2f; 0x11c3e, 0x11c3e; 0x11ca9, 0x11ca9; + 0x11cb1, 0x11cb1; 0x11cb4, 0x11cb4; 0x11d8a, 0x11d8e; 0x11d93, 0x11d94; 0x11d96, 0x11d96; + 0x11ef5, 0x11ef6; 0x11f03, 0x11f03; 0x11f34, 0x11f35; 0x11f3e, 0x11f3f; 0x11f41, 0x11f41; + 0x1612a, 0x1612c; 0x16f51, 0x16f87; 0x16ff0, 0x16ff1; 0x1d165, 0x1d166; 0x1d16d, 0x1d172] let me = Sedlex_utils.Cset.of_list [0x488, 0x489; 0x1abe, 0x1abe; 0x20dd, 0x20e0; 0x20e2, 0x20e4; 0xa670, 0xa672] @@ -606,7 +619,7 @@ module Categories = struct 0x5c4, 0x5c5; 0x5c7, 0x5c7; 0x610, 0x61a; 0x64b, 0x65f; 0x670, 0x670; 0x6d6, 0x6dc; 0x6df, 0x6e4; 0x6e7, 0x6e8; 0x6ea, 0x6ed; 0x711, 0x711; 0x730, 0x74a; 0x7a6, 0x7b0; 0x7eb, 0x7f3; 0x7fd, 0x7fd; 0x816, 0x819; - 0x81b, 0x823; 0x825, 0x827; 0x829, 0x82d; 0x859, 0x85b; 0x898, 0x89f; + 0x81b, 0x823; 0x825, 0x827; 0x829, 0x82d; 0x859, 0x85b; 0x897, 0x89f; 0x8ca, 0x8e1; 0x8e3, 0x902; 0x93a, 0x93a; 0x93c, 0x93c; 0x941, 0x948; 0x94d, 0x94d; 0x951, 0x957; 0x962, 0x963; 0x981, 0x981; 0x9bc, 0x9bc; 0x9c1, 0x9c4; 0x9cd, 0x9cd; 0x9e2, 0x9e3; 0x9fe, 0x9fe; 0xa01, 0xa02; @@ -646,32 +659,34 @@ module Categories = struct 0xaaf6, 0xaaf6; 0xabe5, 0xabe5; 0xabe8, 0xabe8; 0xabed, 0xabed; 0xfb1e, 0xfb1e; 0xfe00, 0xfe0f; 0xfe20, 0xfe2f; 0x101fd, 0x101fd; 0x102e0, 0x102e0; 0x10376, 0x1037a; 0x10a01, 0x10a03; 0x10a05, 0x10a06; 0x10a0c, 0x10a0f; 0x10a38, 0x10a3a; 0x10a3f, 0x10a3f; - 0x10ae5, 0x10ae6; 0x10d24, 0x10d27; 0x10eab, 0x10eac; 0x10efd, 0x10eff; 0x10f46, 0x10f50; - 0x10f82, 0x10f85; 0x11001, 0x11001; 0x11038, 0x11046; 0x11070, 0x11070; 0x11073, 0x11074; - 0x1107f, 0x11081; 0x110b3, 0x110b6; 0x110b9, 0x110ba; 0x110c2, 0x110c2; 0x11100, 0x11102; - 0x11127, 0x1112b; 0x1112d, 0x11134; 0x11173, 0x11173; 0x11180, 0x11181; 0x111b6, 0x111be; - 0x111c9, 0x111cc; 0x111cf, 0x111cf; 0x1122f, 0x11231; 0x11234, 0x11234; 0x11236, 0x11237; - 0x1123e, 0x1123e; 0x11241, 0x11241; 0x112df, 0x112df; 0x112e3, 0x112ea; 0x11300, 0x11301; - 0x1133b, 0x1133c; 0x11340, 0x11340; 0x11366, 0x1136c; 0x11370, 0x11374; 0x11438, 0x1143f; - 0x11442, 0x11444; 0x11446, 0x11446; 0x1145e, 0x1145e; 0x114b3, 0x114b8; 0x114ba, 0x114ba; - 0x114bf, 0x114c0; 0x114c2, 0x114c3; 0x115b2, 0x115b5; 0x115bc, 0x115bd; 0x115bf, 0x115c0; - 0x115dc, 0x115dd; 0x11633, 0x1163a; 0x1163d, 0x1163d; 0x1163f, 0x11640; 0x116ab, 0x116ab; - 0x116ad, 0x116ad; 0x116b0, 0x116b5; 0x116b7, 0x116b7; 0x1171d, 0x1171f; 0x11722, 0x11725; - 0x11727, 0x1172b; 0x1182f, 0x11837; 0x11839, 0x1183a; 0x1193b, 0x1193c; 0x1193e, 0x1193e; - 0x11943, 0x11943; 0x119d4, 0x119d7; 0x119da, 0x119db; 0x119e0, 0x119e0; 0x11a01, 0x11a0a; - 0x11a33, 0x11a38; 0x11a3b, 0x11a3e; 0x11a47, 0x11a47; 0x11a51, 0x11a56; 0x11a59, 0x11a5b; - 0x11a8a, 0x11a96; 0x11a98, 0x11a99; 0x11c30, 0x11c36; 0x11c38, 0x11c3d; 0x11c3f, 0x11c3f; - 0x11c92, 0x11ca7; 0x11caa, 0x11cb0; 0x11cb2, 0x11cb3; 0x11cb5, 0x11cb6; 0x11d31, 0x11d36; - 0x11d3a, 0x11d3a; 0x11d3c, 0x11d3d; 0x11d3f, 0x11d45; 0x11d47, 0x11d47; 0x11d90, 0x11d91; - 0x11d95, 0x11d95; 0x11d97, 0x11d97; 0x11ef3, 0x11ef4; 0x11f00, 0x11f01; 0x11f36, 0x11f3a; - 0x11f40, 0x11f40; 0x11f42, 0x11f42; 0x13440, 0x13440; 0x13447, 0x13455; 0x16af0, 0x16af4; + 0x10ae5, 0x10ae6; 0x10d24, 0x10d27; 0x10d69, 0x10d6d; 0x10eab, 0x10eac; 0x10efc, 0x10eff; + 0x10f46, 0x10f50; 0x10f82, 0x10f85; 0x11001, 0x11001; 0x11038, 0x11046; 0x11070, 0x11070; + 0x11073, 0x11074; 0x1107f, 0x11081; 0x110b3, 0x110b6; 0x110b9, 0x110ba; 0x110c2, 0x110c2; + 0x11100, 0x11102; 0x11127, 0x1112b; 0x1112d, 0x11134; 0x11173, 0x11173; 0x11180, 0x11181; + 0x111b6, 0x111be; 0x111c9, 0x111cc; 0x111cf, 0x111cf; 0x1122f, 0x11231; 0x11234, 0x11234; + 0x11236, 0x11237; 0x1123e, 0x1123e; 0x11241, 0x11241; 0x112df, 0x112df; 0x112e3, 0x112ea; + 0x11300, 0x11301; 0x1133b, 0x1133c; 0x11340, 0x11340; 0x11366, 0x1136c; 0x11370, 0x11374; + 0x113bb, 0x113c0; 0x113ce, 0x113ce; 0x113d0, 0x113d0; 0x113d2, 0x113d2; 0x113e1, 0x113e2; + 0x11438, 0x1143f; 0x11442, 0x11444; 0x11446, 0x11446; 0x1145e, 0x1145e; 0x114b3, 0x114b8; + 0x114ba, 0x114ba; 0x114bf, 0x114c0; 0x114c2, 0x114c3; 0x115b2, 0x115b5; 0x115bc, 0x115bd; + 0x115bf, 0x115c0; 0x115dc, 0x115dd; 0x11633, 0x1163a; 0x1163d, 0x1163d; 0x1163f, 0x11640; + 0x116ab, 0x116ab; 0x116ad, 0x116ad; 0x116b0, 0x116b5; 0x116b7, 0x116b7; 0x1171d, 0x1171d; + 0x1171f, 0x1171f; 0x11722, 0x11725; 0x11727, 0x1172b; 0x1182f, 0x11837; 0x11839, 0x1183a; + 0x1193b, 0x1193c; 0x1193e, 0x1193e; 0x11943, 0x11943; 0x119d4, 0x119d7; 0x119da, 0x119db; + 0x119e0, 0x119e0; 0x11a01, 0x11a0a; 0x11a33, 0x11a38; 0x11a3b, 0x11a3e; 0x11a47, 0x11a47; + 0x11a51, 0x11a56; 0x11a59, 0x11a5b; 0x11a8a, 0x11a96; 0x11a98, 0x11a99; 0x11c30, 0x11c36; + 0x11c38, 0x11c3d; 0x11c3f, 0x11c3f; 0x11c92, 0x11ca7; 0x11caa, 0x11cb0; 0x11cb2, 0x11cb3; + 0x11cb5, 0x11cb6; 0x11d31, 0x11d36; 0x11d3a, 0x11d3a; 0x11d3c, 0x11d3d; 0x11d3f, 0x11d45; + 0x11d47, 0x11d47; 0x11d90, 0x11d91; 0x11d95, 0x11d95; 0x11d97, 0x11d97; 0x11ef3, 0x11ef4; + 0x11f00, 0x11f01; 0x11f36, 0x11f3a; 0x11f40, 0x11f40; 0x11f42, 0x11f42; 0x11f5a, 0x11f5a; + 0x13440, 0x13440; 0x13447, 0x13455; 0x1611e, 0x16129; 0x1612d, 0x1612f; 0x16af0, 0x16af4; 0x16b30, 0x16b36; 0x16f4f, 0x16f4f; 0x16f8f, 0x16f92; 0x16fe4, 0x16fe4; 0x1bc9d, 0x1bc9e; 0x1cf00, 0x1cf2d; 0x1cf30, 0x1cf46; 0x1d167, 0x1d169; 0x1d17b, 0x1d182; 0x1d185, 0x1d18b; 0x1d1aa, 0x1d1ad; 0x1d242, 0x1d244; 0x1da00, 0x1da36; 0x1da3b, 0x1da6c; 0x1da75, 0x1da75; 0x1da84, 0x1da84; 0x1da9b, 0x1da9f; 0x1daa1, 0x1daaf; 0x1e000, 0x1e006; 0x1e008, 0x1e018; 0x1e01b, 0x1e021; 0x1e023, 0x1e024; 0x1e026, 0x1e02a; 0x1e08f, 0x1e08f; 0x1e130, 0x1e136; - 0x1e2ae, 0x1e2ae; 0x1e2ec, 0x1e2ef; 0x1e4ec, 0x1e4ef; 0x1e8d0, 0x1e8d6; 0x1e944, 0x1e94a; - 0xe0100, 0xe01ef] + 0x1e2ae, 0x1e2ae; 0x1e2ec, 0x1e2ef; 0x1e4ec, 0x1e4ef; 0x1e5ee, 0x1e5ef; 0x1e8d0, 0x1e8d6; + 0x1e944, 0x1e94a; 0xe0100, 0xe01ef] let nd = Sedlex_utils.Cset.of_list [0x30, 0x39; 0x660, 0x669; 0x6f0, 0x6f9; 0x7c0, 0x7c9; 0x966, 0x96f; @@ -681,12 +696,14 @@ module Categories = struct 0x1810, 0x1819; 0x1946, 0x194f; 0x19d0, 0x19d9; 0x1a80, 0x1a89; 0x1a90, 0x1a99; 0x1b50, 0x1b59; 0x1bb0, 0x1bb9; 0x1c40, 0x1c49; 0x1c50, 0x1c59; 0xa620, 0xa629; 0xa8d0, 0xa8d9; 0xa900, 0xa909; 0xa9d0, 0xa9d9; 0xa9f0, 0xa9f9; 0xaa50, 0xaa59; - 0xabf0, 0xabf9; 0xff10, 0xff19; 0x104a0, 0x104a9; 0x10d30, 0x10d39; 0x11066, 0x1106f; - 0x110f0, 0x110f9; 0x11136, 0x1113f; 0x111d0, 0x111d9; 0x112f0, 0x112f9; 0x11450, 0x11459; - 0x114d0, 0x114d9; 0x11650, 0x11659; 0x116c0, 0x116c9; 0x11730, 0x11739; 0x118e0, 0x118e9; - 0x11950, 0x11959; 0x11c50, 0x11c59; 0x11d50, 0x11d59; 0x11da0, 0x11da9; 0x11f50, 0x11f59; - 0x16a60, 0x16a69; 0x16ac0, 0x16ac9; 0x16b50, 0x16b59; 0x1d7ce, 0x1d7ff; 0x1e140, 0x1e149; - 0x1e2f0, 0x1e2f9; 0x1e4f0, 0x1e4f9; 0x1e950, 0x1e959; 0x1fbf0, 0x1fbf9] + 0xabf0, 0xabf9; 0xff10, 0xff19; 0x104a0, 0x104a9; 0x10d30, 0x10d39; 0x10d40, 0x10d49; + 0x11066, 0x1106f; 0x110f0, 0x110f9; 0x11136, 0x1113f; 0x111d0, 0x111d9; 0x112f0, 0x112f9; + 0x11450, 0x11459; 0x114d0, 0x114d9; 0x11650, 0x11659; 0x116c0, 0x116c9; 0x116d0, 0x116e3; + 0x11730, 0x11739; 0x118e0, 0x118e9; 0x11950, 0x11959; 0x11bf0, 0x11bf9; 0x11c50, 0x11c59; + 0x11d50, 0x11d59; 0x11da0, 0x11da9; 0x11f50, 0x11f59; 0x16130, 0x16139; 0x16a60, 0x16a69; + 0x16ac0, 0x16ac9; 0x16b50, 0x16b59; 0x16d70, 0x16d79; 0x1ccf0, 0x1ccf9; 0x1d7ce, 0x1d7ff; + 0x1e140, 0x1e149; 0x1e2f0, 0x1e2f9; 0x1e4f0, 0x1e4f9; 0x1e5f1, 0x1e5fa; 0x1e950, 0x1e959; + 0x1fbf0, 0x1fbf9] let nl = Sedlex_utils.Cset.of_list [0x16ee, 0x16f0; 0x2160, 0x2182; 0x2185, 0x2188; 0x3007, 0x3007; 0x3021, 0x3029; @@ -718,7 +735,7 @@ module Categories = struct [0x2d, 0x2d; 0x58a, 0x58a; 0x5be, 0x5be; 0x1400, 0x1400; 0x1806, 0x1806; 0x2010, 0x2015; 0x2e17, 0x2e17; 0x2e1a, 0x2e1a; 0x2e3a, 0x2e3b; 0x2e40, 0x2e40; 0x2e5d, 0x2e5d; 0x301c, 0x301c; 0x3030, 0x3030; 0x30a0, 0x30a0; 0xfe31, 0xfe32; - 0xfe58, 0xfe58; 0xfe63, 0xfe63; 0xff0d, 0xff0d; 0x10ead, 0x10ead] + 0xfe58, 0xfe58; 0xfe63, 0xfe63; 0xff0d, 0xff0d; 0x10d6e, 0x10d6e; 0x10ead, 0x10ead] let pe = Sedlex_utils.Cset.of_list [0x29, 0x29; 0x5d, 0x5d; 0x7d, 0x7d; 0xf3b, 0xf3b; 0xf3d, 0xf3d; @@ -759,33 +776,34 @@ module Categories = struct 0xf04, 0xf12; 0xf14, 0xf14; 0xf85, 0xf85; 0xfd0, 0xfd4; 0xfd9, 0xfda; 0x104a, 0x104f; 0x10fb, 0x10fb; 0x1360, 0x1368; 0x166e, 0x166e; 0x16eb, 0x16ed; 0x1735, 0x1736; 0x17d4, 0x17d6; 0x17d8, 0x17da; 0x1800, 0x1805; 0x1807, 0x180a; - 0x1944, 0x1945; 0x1a1e, 0x1a1f; 0x1aa0, 0x1aa6; 0x1aa8, 0x1aad; 0x1b5a, 0x1b60; - 0x1b7d, 0x1b7e; 0x1bfc, 0x1bff; 0x1c3b, 0x1c3f; 0x1c7e, 0x1c7f; 0x1cc0, 0x1cc7; - 0x1cd3, 0x1cd3; 0x2016, 0x2017; 0x2020, 0x2027; 0x2030, 0x2038; 0x203b, 0x203e; - 0x2041, 0x2043; 0x2047, 0x2051; 0x2053, 0x2053; 0x2055, 0x205e; 0x2cf9, 0x2cfc; - 0x2cfe, 0x2cff; 0x2d70, 0x2d70; 0x2e00, 0x2e01; 0x2e06, 0x2e08; 0x2e0b, 0x2e0b; - 0x2e0e, 0x2e16; 0x2e18, 0x2e19; 0x2e1b, 0x2e1b; 0x2e1e, 0x2e1f; 0x2e2a, 0x2e2e; - 0x2e30, 0x2e39; 0x2e3c, 0x2e3f; 0x2e41, 0x2e41; 0x2e43, 0x2e4f; 0x2e52, 0x2e54; - 0x3001, 0x3003; 0x303d, 0x303d; 0x30fb, 0x30fb; 0xa4fe, 0xa4ff; 0xa60d, 0xa60f; - 0xa673, 0xa673; 0xa67e, 0xa67e; 0xa6f2, 0xa6f7; 0xa874, 0xa877; 0xa8ce, 0xa8cf; - 0xa8f8, 0xa8fa; 0xa8fc, 0xa8fc; 0xa92e, 0xa92f; 0xa95f, 0xa95f; 0xa9c1, 0xa9cd; - 0xa9de, 0xa9df; 0xaa5c, 0xaa5f; 0xaade, 0xaadf; 0xaaf0, 0xaaf1; 0xabeb, 0xabeb; - 0xfe10, 0xfe16; 0xfe19, 0xfe19; 0xfe30, 0xfe30; 0xfe45, 0xfe46; 0xfe49, 0xfe4c; - 0xfe50, 0xfe52; 0xfe54, 0xfe57; 0xfe5f, 0xfe61; 0xfe68, 0xfe68; 0xfe6a, 0xfe6b; - 0xff01, 0xff03; 0xff05, 0xff07; 0xff0a, 0xff0a; 0xff0c, 0xff0c; 0xff0e, 0xff0f; - 0xff1a, 0xff1b; 0xff1f, 0xff20; 0xff3c, 0xff3c; 0xff61, 0xff61; 0xff64, 0xff65; - 0x10100, 0x10102; 0x1039f, 0x1039f; 0x103d0, 0x103d0; 0x1056f, 0x1056f; 0x10857, 0x10857; - 0x1091f, 0x1091f; 0x1093f, 0x1093f; 0x10a50, 0x10a58; 0x10a7f, 0x10a7f; 0x10af0, 0x10af6; - 0x10b39, 0x10b3f; 0x10b99, 0x10b9c; 0x10f55, 0x10f59; 0x10f86, 0x10f89; 0x11047, 0x1104d; - 0x110bb, 0x110bc; 0x110be, 0x110c1; 0x11140, 0x11143; 0x11174, 0x11175; 0x111c5, 0x111c8; - 0x111cd, 0x111cd; 0x111db, 0x111db; 0x111dd, 0x111df; 0x11238, 0x1123d; 0x112a9, 0x112a9; - 0x1144b, 0x1144f; 0x1145a, 0x1145b; 0x1145d, 0x1145d; 0x114c6, 0x114c6; 0x115c1, 0x115d7; - 0x11641, 0x11643; 0x11660, 0x1166c; 0x116b9, 0x116b9; 0x1173c, 0x1173e; 0x1183b, 0x1183b; - 0x11944, 0x11946; 0x119e2, 0x119e2; 0x11a3f, 0x11a46; 0x11a9a, 0x11a9c; 0x11a9e, 0x11aa2; - 0x11b00, 0x11b09; 0x11c41, 0x11c45; 0x11c70, 0x11c71; 0x11ef7, 0x11ef8; 0x11f43, 0x11f4f; - 0x11fff, 0x11fff; 0x12470, 0x12474; 0x12ff1, 0x12ff2; 0x16a6e, 0x16a6f; 0x16af5, 0x16af5; - 0x16b37, 0x16b3b; 0x16b44, 0x16b44; 0x16e97, 0x16e9a; 0x16fe2, 0x16fe2; 0x1bc9f, 0x1bc9f; - 0x1da87, 0x1da8b; 0x1e95e, 0x1e95f] + 0x1944, 0x1945; 0x1a1e, 0x1a1f; 0x1aa0, 0x1aa6; 0x1aa8, 0x1aad; 0x1b4e, 0x1b4f; + 0x1b5a, 0x1b60; 0x1b7d, 0x1b7f; 0x1bfc, 0x1bff; 0x1c3b, 0x1c3f; 0x1c7e, 0x1c7f; + 0x1cc0, 0x1cc7; 0x1cd3, 0x1cd3; 0x2016, 0x2017; 0x2020, 0x2027; 0x2030, 0x2038; + 0x203b, 0x203e; 0x2041, 0x2043; 0x2047, 0x2051; 0x2053, 0x2053; 0x2055, 0x205e; + 0x2cf9, 0x2cfc; 0x2cfe, 0x2cff; 0x2d70, 0x2d70; 0x2e00, 0x2e01; 0x2e06, 0x2e08; + 0x2e0b, 0x2e0b; 0x2e0e, 0x2e16; 0x2e18, 0x2e19; 0x2e1b, 0x2e1b; 0x2e1e, 0x2e1f; + 0x2e2a, 0x2e2e; 0x2e30, 0x2e39; 0x2e3c, 0x2e3f; 0x2e41, 0x2e41; 0x2e43, 0x2e4f; + 0x2e52, 0x2e54; 0x3001, 0x3003; 0x303d, 0x303d; 0x30fb, 0x30fb; 0xa4fe, 0xa4ff; + 0xa60d, 0xa60f; 0xa673, 0xa673; 0xa67e, 0xa67e; 0xa6f2, 0xa6f7; 0xa874, 0xa877; + 0xa8ce, 0xa8cf; 0xa8f8, 0xa8fa; 0xa8fc, 0xa8fc; 0xa92e, 0xa92f; 0xa95f, 0xa95f; + 0xa9c1, 0xa9cd; 0xa9de, 0xa9df; 0xaa5c, 0xaa5f; 0xaade, 0xaadf; 0xaaf0, 0xaaf1; + 0xabeb, 0xabeb; 0xfe10, 0xfe16; 0xfe19, 0xfe19; 0xfe30, 0xfe30; 0xfe45, 0xfe46; + 0xfe49, 0xfe4c; 0xfe50, 0xfe52; 0xfe54, 0xfe57; 0xfe5f, 0xfe61; 0xfe68, 0xfe68; + 0xfe6a, 0xfe6b; 0xff01, 0xff03; 0xff05, 0xff07; 0xff0a, 0xff0a; 0xff0c, 0xff0c; + 0xff0e, 0xff0f; 0xff1a, 0xff1b; 0xff1f, 0xff20; 0xff3c, 0xff3c; 0xff61, 0xff61; + 0xff64, 0xff65; 0x10100, 0x10102; 0x1039f, 0x1039f; 0x103d0, 0x103d0; 0x1056f, 0x1056f; + 0x10857, 0x10857; 0x1091f, 0x1091f; 0x1093f, 0x1093f; 0x10a50, 0x10a58; 0x10a7f, 0x10a7f; + 0x10af0, 0x10af6; 0x10b39, 0x10b3f; 0x10b99, 0x10b9c; 0x10f55, 0x10f59; 0x10f86, 0x10f89; + 0x11047, 0x1104d; 0x110bb, 0x110bc; 0x110be, 0x110c1; 0x11140, 0x11143; 0x11174, 0x11175; + 0x111c5, 0x111c8; 0x111cd, 0x111cd; 0x111db, 0x111db; 0x111dd, 0x111df; 0x11238, 0x1123d; + 0x112a9, 0x112a9; 0x113d4, 0x113d5; 0x113d7, 0x113d8; 0x1144b, 0x1144f; 0x1145a, 0x1145b; + 0x1145d, 0x1145d; 0x114c6, 0x114c6; 0x115c1, 0x115d7; 0x11641, 0x11643; 0x11660, 0x1166c; + 0x116b9, 0x116b9; 0x1173c, 0x1173e; 0x1183b, 0x1183b; 0x11944, 0x11946; 0x119e2, 0x119e2; + 0x11a3f, 0x11a46; 0x11a9a, 0x11a9c; 0x11a9e, 0x11aa2; 0x11b00, 0x11b09; 0x11be1, 0x11be1; + 0x11c41, 0x11c45; 0x11c70, 0x11c71; 0x11ef7, 0x11ef8; 0x11f43, 0x11f4f; 0x11fff, 0x11fff; + 0x12470, 0x12474; 0x12ff1, 0x12ff2; 0x16a6e, 0x16a6f; 0x16af5, 0x16af5; 0x16b37, 0x16b3b; + 0x16b44, 0x16b44; 0x16d6d, 0x16d6f; 0x16e97, 0x16e9a; 0x16fe2, 0x16fe2; 0x1bc9f, 0x1bc9f; + 0x1da87, 0x1da8b; 0x1e5ff, 0x1e5ff; 0x1e95e, 0x1e95f] let ps = Sedlex_utils.Cset.of_list [0x28, 0x28; 0x5b, 0x5b; 0x7b, 0x7b; 0xf3a, 0xf3a; 0xf3c, 0xf3c; @@ -832,9 +850,9 @@ module Categories = struct 0x27c0, 0x27c4; 0x27c7, 0x27e5; 0x27f0, 0x27ff; 0x2900, 0x2982; 0x2999, 0x29d7; 0x29dc, 0x29fb; 0x29fe, 0x2aff; 0x2b30, 0x2b44; 0x2b47, 0x2b4c; 0xfb29, 0xfb29; 0xfe62, 0xfe62; 0xfe64, 0xfe66; 0xff0b, 0xff0b; 0xff1c, 0xff1e; 0xff5c, 0xff5c; - 0xff5e, 0xff5e; 0xffe2, 0xffe2; 0xffe9, 0xffec; 0x1d6c1, 0x1d6c1; 0x1d6db, 0x1d6db; - 0x1d6fb, 0x1d6fb; 0x1d715, 0x1d715; 0x1d735, 0x1d735; 0x1d74f, 0x1d74f; 0x1d76f, 0x1d76f; - 0x1d789, 0x1d789; 0x1d7a9, 0x1d7a9; 0x1d7c3, 0x1d7c3; 0x1eef0, 0x1eef1] + 0xff5e, 0xff5e; 0xffe2, 0xffe2; 0xffe9, 0xffec; 0x10d8e, 0x10d8f; 0x1d6c1, 0x1d6c1; + 0x1d6db, 0x1d6db; 0x1d6fb, 0x1d6fb; 0x1d715, 0x1d715; 0x1d735, 0x1d735; 0x1d74f, 0x1d74f; + 0x1d76f, 0x1d76f; 0x1d789, 0x1d789; 0x1d7a9, 0x1d7a9; 0x1d7c3, 0x1d7c3; 0x1eef0, 0x1eef1] let so = Sedlex_utils.Cset.of_list [0xa6, 0xa6; 0xa9, 0xa9; 0xae, 0xae; 0xb0, 0xb0; 0x482, 0x482; @@ -850,30 +868,31 @@ module Categories = struct 0x218a, 0x218b; 0x2195, 0x2199; 0x219c, 0x219f; 0x21a1, 0x21a2; 0x21a4, 0x21a5; 0x21a7, 0x21ad; 0x21af, 0x21cd; 0x21d0, 0x21d1; 0x21d3, 0x21d3; 0x21d5, 0x21f3; 0x2300, 0x2307; 0x230c, 0x231f; 0x2322, 0x2328; 0x232b, 0x237b; 0x237d, 0x239a; - 0x23b4, 0x23db; 0x23e2, 0x2426; 0x2440, 0x244a; 0x249c, 0x24e9; 0x2500, 0x25b6; + 0x23b4, 0x23db; 0x23e2, 0x2429; 0x2440, 0x244a; 0x249c, 0x24e9; 0x2500, 0x25b6; 0x25b8, 0x25c0; 0x25c2, 0x25f7; 0x2600, 0x266e; 0x2670, 0x2767; 0x2794, 0x27bf; 0x2800, 0x28ff; 0x2b00, 0x2b2f; 0x2b45, 0x2b46; 0x2b4d, 0x2b73; 0x2b76, 0x2b95; 0x2b97, 0x2bff; 0x2ce5, 0x2cea; 0x2e50, 0x2e51; 0x2e80, 0x2e99; 0x2e9b, 0x2ef3; - 0x2f00, 0x2fd5; 0x2ff0, 0x2ffb; 0x3004, 0x3004; 0x3012, 0x3013; 0x3020, 0x3020; - 0x3036, 0x3037; 0x303e, 0x303f; 0x3190, 0x3191; 0x3196, 0x319f; 0x31c0, 0x31e3; - 0x3200, 0x321e; 0x322a, 0x3247; 0x3250, 0x3250; 0x3260, 0x327f; 0x328a, 0x32b0; - 0x32c0, 0x33ff; 0x4dc0, 0x4dff; 0xa490, 0xa4c6; 0xa828, 0xa82b; 0xa836, 0xa837; - 0xa839, 0xa839; 0xaa77, 0xaa79; 0xfd40, 0xfd4f; 0xfdcf, 0xfdcf; 0xfdfd, 0xfdff; - 0xffe4, 0xffe4; 0xffe8, 0xffe8; 0xffed, 0xffee; 0xfffc, 0xfffd; 0x10137, 0x1013f; - 0x10179, 0x10189; 0x1018c, 0x1018e; 0x10190, 0x1019c; 0x101a0, 0x101a0; 0x101d0, 0x101fc; - 0x10877, 0x10878; 0x10ac8, 0x10ac8; 0x1173f, 0x1173f; 0x11fd5, 0x11fdc; 0x11fe1, 0x11ff1; - 0x16b3c, 0x16b3f; 0x16b45, 0x16b45; 0x1bc9c, 0x1bc9c; 0x1cf50, 0x1cfc3; 0x1d000, 0x1d0f5; - 0x1d100, 0x1d126; 0x1d129, 0x1d164; 0x1d16a, 0x1d16c; 0x1d183, 0x1d184; 0x1d18c, 0x1d1a9; - 0x1d1ae, 0x1d1ea; 0x1d200, 0x1d241; 0x1d245, 0x1d245; 0x1d300, 0x1d356; 0x1d800, 0x1d9ff; - 0x1da37, 0x1da3a; 0x1da6d, 0x1da74; 0x1da76, 0x1da83; 0x1da85, 0x1da86; 0x1e14f, 0x1e14f; - 0x1ecac, 0x1ecac; 0x1ed2e, 0x1ed2e; 0x1f000, 0x1f02b; 0x1f030, 0x1f093; 0x1f0a0, 0x1f0ae; - 0x1f0b1, 0x1f0bf; 0x1f0c1, 0x1f0cf; 0x1f0d1, 0x1f0f5; 0x1f10d, 0x1f1ad; 0x1f1e6, 0x1f202; - 0x1f210, 0x1f23b; 0x1f240, 0x1f248; 0x1f250, 0x1f251; 0x1f260, 0x1f265; 0x1f300, 0x1f3fa; - 0x1f400, 0x1f6d7; 0x1f6dc, 0x1f6ec; 0x1f6f0, 0x1f6fc; 0x1f700, 0x1f776; 0x1f77b, 0x1f7d9; - 0x1f7e0, 0x1f7eb; 0x1f7f0, 0x1f7f0; 0x1f800, 0x1f80b; 0x1f810, 0x1f847; 0x1f850, 0x1f859; - 0x1f860, 0x1f887; 0x1f890, 0x1f8ad; 0x1f8b0, 0x1f8b1; 0x1f900, 0x1fa53; 0x1fa60, 0x1fa6d; - 0x1fa70, 0x1fa7c; 0x1fa80, 0x1fa88; 0x1fa90, 0x1fabd; 0x1fabf, 0x1fac5; 0x1face, 0x1fadb; - 0x1fae0, 0x1fae8; 0x1faf0, 0x1faf8; 0x1fb00, 0x1fb92; 0x1fb94, 0x1fbca] + 0x2f00, 0x2fd5; 0x2ff0, 0x2fff; 0x3004, 0x3004; 0x3012, 0x3013; 0x3020, 0x3020; + 0x3036, 0x3037; 0x303e, 0x303f; 0x3190, 0x3191; 0x3196, 0x319f; 0x31c0, 0x31e5; + 0x31ef, 0x31ef; 0x3200, 0x321e; 0x322a, 0x3247; 0x3250, 0x3250; 0x3260, 0x327f; + 0x328a, 0x32b0; 0x32c0, 0x33ff; 0x4dc0, 0x4dff; 0xa490, 0xa4c6; 0xa828, 0xa82b; + 0xa836, 0xa837; 0xa839, 0xa839; 0xaa77, 0xaa79; 0xfd40, 0xfd4f; 0xfdcf, 0xfdcf; + 0xfdfd, 0xfdff; 0xffe4, 0xffe4; 0xffe8, 0xffe8; 0xffed, 0xffee; 0xfffc, 0xfffd; + 0x10137, 0x1013f; 0x10179, 0x10189; 0x1018c, 0x1018e; 0x10190, 0x1019c; 0x101a0, 0x101a0; + 0x101d0, 0x101fc; 0x10877, 0x10878; 0x10ac8, 0x10ac8; 0x1173f, 0x1173f; 0x11fd5, 0x11fdc; + 0x11fe1, 0x11ff1; 0x16b3c, 0x16b3f; 0x16b45, 0x16b45; 0x1bc9c, 0x1bc9c; 0x1cc00, 0x1ccef; + 0x1cd00, 0x1ceb3; 0x1cf50, 0x1cfc3; 0x1d000, 0x1d0f5; 0x1d100, 0x1d126; 0x1d129, 0x1d164; + 0x1d16a, 0x1d16c; 0x1d183, 0x1d184; 0x1d18c, 0x1d1a9; 0x1d1ae, 0x1d1ea; 0x1d200, 0x1d241; + 0x1d245, 0x1d245; 0x1d300, 0x1d356; 0x1d800, 0x1d9ff; 0x1da37, 0x1da3a; 0x1da6d, 0x1da74; + 0x1da76, 0x1da83; 0x1da85, 0x1da86; 0x1e14f, 0x1e14f; 0x1ecac, 0x1ecac; 0x1ed2e, 0x1ed2e; + 0x1f000, 0x1f02b; 0x1f030, 0x1f093; 0x1f0a0, 0x1f0ae; 0x1f0b1, 0x1f0bf; 0x1f0c1, 0x1f0cf; + 0x1f0d1, 0x1f0f5; 0x1f10d, 0x1f1ad; 0x1f1e6, 0x1f202; 0x1f210, 0x1f23b; 0x1f240, 0x1f248; + 0x1f250, 0x1f251; 0x1f260, 0x1f265; 0x1f300, 0x1f3fa; 0x1f400, 0x1f6d7; 0x1f6dc, 0x1f6ec; + 0x1f6f0, 0x1f6fc; 0x1f700, 0x1f776; 0x1f77b, 0x1f7d9; 0x1f7e0, 0x1f7eb; 0x1f7f0, 0x1f7f0; + 0x1f800, 0x1f80b; 0x1f810, 0x1f847; 0x1f850, 0x1f859; 0x1f860, 0x1f887; 0x1f890, 0x1f8ad; + 0x1f8b0, 0x1f8bb; 0x1f8c0, 0x1f8c1; 0x1f900, 0x1fa53; 0x1fa60, 0x1fa6d; 0x1fa70, 0x1fa7c; + 0x1fa80, 0x1fa89; 0x1fa8f, 0x1fac6; 0x1face, 0x1fadc; 0x1fadf, 0x1fae9; 0x1faf0, 0x1faf8; + 0x1fb00, 0x1fb92; 0x1fb94, 0x1fbef] let zl = Sedlex_utils.Cset.of_list [0x2028, 0x2028] @@ -925,7 +944,7 @@ module Properties = struct let alphabetic = Sedlex_utils.Cset.of_list [0x41, 0x5a; 0x61, 0x7a; 0xaa, 0xaa; 0xb5, 0xb5; 0xba, 0xba; 0xc0, 0xd6; 0xd8, 0xf6; 0xf8, 0x2c1; 0x2c6, 0x2d1; 0x2e0, 0x2e4; - 0x2ec, 0x2ec; 0x2ee, 0x2ee; 0x345, 0x345; 0x370, 0x374; 0x376, 0x377; + 0x2ec, 0x2ec; 0x2ee, 0x2ee; 0x345, 0x345; 0x363, 0x374; 0x376, 0x377; 0x37a, 0x37d; 0x37f, 0x37f; 0x386, 0x386; 0x388, 0x38a; 0x38c, 0x38c; 0x38e, 0x3a1; 0x3a3, 0x3f5; 0x3f7, 0x481; 0x48a, 0x52f; 0x531, 0x556; 0x559, 0x559; 0x560, 0x588; 0x5b0, 0x5bd; 0x5bf, 0x5bf; 0x5c1, 0x5c2; @@ -933,142 +952,147 @@ module Properties = struct 0x620, 0x657; 0x659, 0x65f; 0x66e, 0x6d3; 0x6d5, 0x6dc; 0x6e1, 0x6e8; 0x6ed, 0x6ef; 0x6fa, 0x6fc; 0x6ff, 0x6ff; 0x710, 0x73f; 0x74d, 0x7b1; 0x7ca, 0x7ea; 0x7f4, 0x7f5; 0x7fa, 0x7fa; 0x800, 0x817; 0x81a, 0x82c; - 0x840, 0x858; 0x860, 0x86a; 0x870, 0x887; 0x889, 0x88e; 0x8a0, 0x8c9; - 0x8d4, 0x8df; 0x8e3, 0x8e9; 0x8f0, 0x93b; 0x93d, 0x94c; 0x94e, 0x950; - 0x955, 0x963; 0x971, 0x983; 0x985, 0x98c; 0x98f, 0x990; 0x993, 0x9a8; - 0x9aa, 0x9b0; 0x9b2, 0x9b2; 0x9b6, 0x9b9; 0x9bd, 0x9c4; 0x9c7, 0x9c8; - 0x9cb, 0x9cc; 0x9ce, 0x9ce; 0x9d7, 0x9d7; 0x9dc, 0x9dd; 0x9df, 0x9e3; - 0x9f0, 0x9f1; 0x9fc, 0x9fc; 0xa01, 0xa03; 0xa05, 0xa0a; 0xa0f, 0xa10; - 0xa13, 0xa28; 0xa2a, 0xa30; 0xa32, 0xa33; 0xa35, 0xa36; 0xa38, 0xa39; - 0xa3e, 0xa42; 0xa47, 0xa48; 0xa4b, 0xa4c; 0xa51, 0xa51; 0xa59, 0xa5c; - 0xa5e, 0xa5e; 0xa70, 0xa75; 0xa81, 0xa83; 0xa85, 0xa8d; 0xa8f, 0xa91; - 0xa93, 0xaa8; 0xaaa, 0xab0; 0xab2, 0xab3; 0xab5, 0xab9; 0xabd, 0xac5; - 0xac7, 0xac9; 0xacb, 0xacc; 0xad0, 0xad0; 0xae0, 0xae3; 0xaf9, 0xafc; - 0xb01, 0xb03; 0xb05, 0xb0c; 0xb0f, 0xb10; 0xb13, 0xb28; 0xb2a, 0xb30; - 0xb32, 0xb33; 0xb35, 0xb39; 0xb3d, 0xb44; 0xb47, 0xb48; 0xb4b, 0xb4c; - 0xb56, 0xb57; 0xb5c, 0xb5d; 0xb5f, 0xb63; 0xb71, 0xb71; 0xb82, 0xb83; - 0xb85, 0xb8a; 0xb8e, 0xb90; 0xb92, 0xb95; 0xb99, 0xb9a; 0xb9c, 0xb9c; - 0xb9e, 0xb9f; 0xba3, 0xba4; 0xba8, 0xbaa; 0xbae, 0xbb9; 0xbbe, 0xbc2; - 0xbc6, 0xbc8; 0xbca, 0xbcc; 0xbd0, 0xbd0; 0xbd7, 0xbd7; 0xc00, 0xc0c; - 0xc0e, 0xc10; 0xc12, 0xc28; 0xc2a, 0xc39; 0xc3d, 0xc44; 0xc46, 0xc48; - 0xc4a, 0xc4c; 0xc55, 0xc56; 0xc58, 0xc5a; 0xc5d, 0xc5d; 0xc60, 0xc63; - 0xc80, 0xc83; 0xc85, 0xc8c; 0xc8e, 0xc90; 0xc92, 0xca8; 0xcaa, 0xcb3; - 0xcb5, 0xcb9; 0xcbd, 0xcc4; 0xcc6, 0xcc8; 0xcca, 0xccc; 0xcd5, 0xcd6; - 0xcdd, 0xcde; 0xce0, 0xce3; 0xcf1, 0xcf3; 0xd00, 0xd0c; 0xd0e, 0xd10; - 0xd12, 0xd3a; 0xd3d, 0xd44; 0xd46, 0xd48; 0xd4a, 0xd4c; 0xd4e, 0xd4e; - 0xd54, 0xd57; 0xd5f, 0xd63; 0xd7a, 0xd7f; 0xd81, 0xd83; 0xd85, 0xd96; - 0xd9a, 0xdb1; 0xdb3, 0xdbb; 0xdbd, 0xdbd; 0xdc0, 0xdc6; 0xdcf, 0xdd4; - 0xdd6, 0xdd6; 0xdd8, 0xddf; 0xdf2, 0xdf3; 0xe01, 0xe3a; 0xe40, 0xe46; - 0xe4d, 0xe4d; 0xe81, 0xe82; 0xe84, 0xe84; 0xe86, 0xe8a; 0xe8c, 0xea3; - 0xea5, 0xea5; 0xea7, 0xeb9; 0xebb, 0xebd; 0xec0, 0xec4; 0xec6, 0xec6; - 0xecd, 0xecd; 0xedc, 0xedf; 0xf00, 0xf00; 0xf40, 0xf47; 0xf49, 0xf6c; - 0xf71, 0xf83; 0xf88, 0xf97; 0xf99, 0xfbc; 0x1000, 0x1036; 0x1038, 0x1038; - 0x103b, 0x103f; 0x1050, 0x108f; 0x109a, 0x109d; 0x10a0, 0x10c5; 0x10c7, 0x10c7; - 0x10cd, 0x10cd; 0x10d0, 0x10fa; 0x10fc, 0x1248; 0x124a, 0x124d; 0x1250, 0x1256; - 0x1258, 0x1258; 0x125a, 0x125d; 0x1260, 0x1288; 0x128a, 0x128d; 0x1290, 0x12b0; - 0x12b2, 0x12b5; 0x12b8, 0x12be; 0x12c0, 0x12c0; 0x12c2, 0x12c5; 0x12c8, 0x12d6; - 0x12d8, 0x1310; 0x1312, 0x1315; 0x1318, 0x135a; 0x1380, 0x138f; 0x13a0, 0x13f5; - 0x13f8, 0x13fd; 0x1401, 0x166c; 0x166f, 0x167f; 0x1681, 0x169a; 0x16a0, 0x16ea; - 0x16ee, 0x16f8; 0x1700, 0x1713; 0x171f, 0x1733; 0x1740, 0x1753; 0x1760, 0x176c; - 0x176e, 0x1770; 0x1772, 0x1773; 0x1780, 0x17b3; 0x17b6, 0x17c8; 0x17d7, 0x17d7; - 0x17dc, 0x17dc; 0x1820, 0x1878; 0x1880, 0x18aa; 0x18b0, 0x18f5; 0x1900, 0x191e; - 0x1920, 0x192b; 0x1930, 0x1938; 0x1950, 0x196d; 0x1970, 0x1974; 0x1980, 0x19ab; - 0x19b0, 0x19c9; 0x1a00, 0x1a1b; 0x1a20, 0x1a5e; 0x1a61, 0x1a74; 0x1aa7, 0x1aa7; - 0x1abf, 0x1ac0; 0x1acc, 0x1ace; 0x1b00, 0x1b33; 0x1b35, 0x1b43; 0x1b45, 0x1b4c; - 0x1b80, 0x1ba9; 0x1bac, 0x1baf; 0x1bba, 0x1be5; 0x1be7, 0x1bf1; 0x1c00, 0x1c36; - 0x1c4d, 0x1c4f; 0x1c5a, 0x1c7d; 0x1c80, 0x1c88; 0x1c90, 0x1cba; 0x1cbd, 0x1cbf; - 0x1ce9, 0x1cec; 0x1cee, 0x1cf3; 0x1cf5, 0x1cf6; 0x1cfa, 0x1cfa; 0x1d00, 0x1dbf; - 0x1de7, 0x1df4; 0x1e00, 0x1f15; 0x1f18, 0x1f1d; 0x1f20, 0x1f45; 0x1f48, 0x1f4d; - 0x1f50, 0x1f57; 0x1f59, 0x1f59; 0x1f5b, 0x1f5b; 0x1f5d, 0x1f5d; 0x1f5f, 0x1f7d; - 0x1f80, 0x1fb4; 0x1fb6, 0x1fbc; 0x1fbe, 0x1fbe; 0x1fc2, 0x1fc4; 0x1fc6, 0x1fcc; - 0x1fd0, 0x1fd3; 0x1fd6, 0x1fdb; 0x1fe0, 0x1fec; 0x1ff2, 0x1ff4; 0x1ff6, 0x1ffc; - 0x2071, 0x2071; 0x207f, 0x207f; 0x2090, 0x209c; 0x2102, 0x2102; 0x2107, 0x2107; - 0x210a, 0x2113; 0x2115, 0x2115; 0x2119, 0x211d; 0x2124, 0x2124; 0x2126, 0x2126; - 0x2128, 0x2128; 0x212a, 0x212d; 0x212f, 0x2139; 0x213c, 0x213f; 0x2145, 0x2149; - 0x214e, 0x214e; 0x2160, 0x2188; 0x24b6, 0x24e9; 0x2c00, 0x2ce4; 0x2ceb, 0x2cee; - 0x2cf2, 0x2cf3; 0x2d00, 0x2d25; 0x2d27, 0x2d27; 0x2d2d, 0x2d2d; 0x2d30, 0x2d67; - 0x2d6f, 0x2d6f; 0x2d80, 0x2d96; 0x2da0, 0x2da6; 0x2da8, 0x2dae; 0x2db0, 0x2db6; - 0x2db8, 0x2dbe; 0x2dc0, 0x2dc6; 0x2dc8, 0x2dce; 0x2dd0, 0x2dd6; 0x2dd8, 0x2dde; - 0x2de0, 0x2dff; 0x2e2f, 0x2e2f; 0x3005, 0x3007; 0x3021, 0x3029; 0x3031, 0x3035; - 0x3038, 0x303c; 0x3041, 0x3096; 0x309d, 0x309f; 0x30a1, 0x30fa; 0x30fc, 0x30ff; - 0x3105, 0x312f; 0x3131, 0x318e; 0x31a0, 0x31bf; 0x31f0, 0x31ff; 0x3400, 0x4dbf; - 0x4e00, 0xa48c; 0xa4d0, 0xa4fd; 0xa500, 0xa60c; 0xa610, 0xa61f; 0xa62a, 0xa62b; - 0xa640, 0xa66e; 0xa674, 0xa67b; 0xa67f, 0xa6ef; 0xa717, 0xa71f; 0xa722, 0xa788; - 0xa78b, 0xa7ca; 0xa7d0, 0xa7d1; 0xa7d3, 0xa7d3; 0xa7d5, 0xa7d9; 0xa7f2, 0xa805; - 0xa807, 0xa827; 0xa840, 0xa873; 0xa880, 0xa8c3; 0xa8c5, 0xa8c5; 0xa8f2, 0xa8f7; - 0xa8fb, 0xa8fb; 0xa8fd, 0xa8ff; 0xa90a, 0xa92a; 0xa930, 0xa952; 0xa960, 0xa97c; - 0xa980, 0xa9b2; 0xa9b4, 0xa9bf; 0xa9cf, 0xa9cf; 0xa9e0, 0xa9ef; 0xa9fa, 0xa9fe; - 0xaa00, 0xaa36; 0xaa40, 0xaa4d; 0xaa60, 0xaa76; 0xaa7a, 0xaabe; 0xaac0, 0xaac0; - 0xaac2, 0xaac2; 0xaadb, 0xaadd; 0xaae0, 0xaaef; 0xaaf2, 0xaaf5; 0xab01, 0xab06; - 0xab09, 0xab0e; 0xab11, 0xab16; 0xab20, 0xab26; 0xab28, 0xab2e; 0xab30, 0xab5a; - 0xab5c, 0xab69; 0xab70, 0xabea; 0xac00, 0xd7a3; 0xd7b0, 0xd7c6; 0xd7cb, 0xd7fb; - 0xf900, 0xfa6d; 0xfa70, 0xfad9; 0xfb00, 0xfb06; 0xfb13, 0xfb17; 0xfb1d, 0xfb28; - 0xfb2a, 0xfb36; 0xfb38, 0xfb3c; 0xfb3e, 0xfb3e; 0xfb40, 0xfb41; 0xfb43, 0xfb44; - 0xfb46, 0xfbb1; 0xfbd3, 0xfd3d; 0xfd50, 0xfd8f; 0xfd92, 0xfdc7; 0xfdf0, 0xfdfb; - 0xfe70, 0xfe74; 0xfe76, 0xfefc; 0xff21, 0xff3a; 0xff41, 0xff5a; 0xff66, 0xffbe; - 0xffc2, 0xffc7; 0xffca, 0xffcf; 0xffd2, 0xffd7; 0xffda, 0xffdc; 0x10000, 0x1000b; - 0x1000d, 0x10026; 0x10028, 0x1003a; 0x1003c, 0x1003d; 0x1003f, 0x1004d; 0x10050, 0x1005d; - 0x10080, 0x100fa; 0x10140, 0x10174; 0x10280, 0x1029c; 0x102a0, 0x102d0; 0x10300, 0x1031f; - 0x1032d, 0x1034a; 0x10350, 0x1037a; 0x10380, 0x1039d; 0x103a0, 0x103c3; 0x103c8, 0x103cf; - 0x103d1, 0x103d5; 0x10400, 0x1049d; 0x104b0, 0x104d3; 0x104d8, 0x104fb; 0x10500, 0x10527; - 0x10530, 0x10563; 0x10570, 0x1057a; 0x1057c, 0x1058a; 0x1058c, 0x10592; 0x10594, 0x10595; - 0x10597, 0x105a1; 0x105a3, 0x105b1; 0x105b3, 0x105b9; 0x105bb, 0x105bc; 0x10600, 0x10736; - 0x10740, 0x10755; 0x10760, 0x10767; 0x10780, 0x10785; 0x10787, 0x107b0; 0x107b2, 0x107ba; - 0x10800, 0x10805; 0x10808, 0x10808; 0x1080a, 0x10835; 0x10837, 0x10838; 0x1083c, 0x1083c; - 0x1083f, 0x10855; 0x10860, 0x10876; 0x10880, 0x1089e; 0x108e0, 0x108f2; 0x108f4, 0x108f5; - 0x10900, 0x10915; 0x10920, 0x10939; 0x10980, 0x109b7; 0x109be, 0x109bf; 0x10a00, 0x10a03; - 0x10a05, 0x10a06; 0x10a0c, 0x10a13; 0x10a15, 0x10a17; 0x10a19, 0x10a35; 0x10a60, 0x10a7c; - 0x10a80, 0x10a9c; 0x10ac0, 0x10ac7; 0x10ac9, 0x10ae4; 0x10b00, 0x10b35; 0x10b40, 0x10b55; - 0x10b60, 0x10b72; 0x10b80, 0x10b91; 0x10c00, 0x10c48; 0x10c80, 0x10cb2; 0x10cc0, 0x10cf2; - 0x10d00, 0x10d27; 0x10e80, 0x10ea9; 0x10eab, 0x10eac; 0x10eb0, 0x10eb1; 0x10f00, 0x10f1c; - 0x10f27, 0x10f27; 0x10f30, 0x10f45; 0x10f70, 0x10f81; 0x10fb0, 0x10fc4; 0x10fe0, 0x10ff6; - 0x11000, 0x11045; 0x11071, 0x11075; 0x11080, 0x110b8; 0x110c2, 0x110c2; 0x110d0, 0x110e8; - 0x11100, 0x11132; 0x11144, 0x11147; 0x11150, 0x11172; 0x11176, 0x11176; 0x11180, 0x111bf; - 0x111c1, 0x111c4; 0x111ce, 0x111cf; 0x111da, 0x111da; 0x111dc, 0x111dc; 0x11200, 0x11211; - 0x11213, 0x11234; 0x11237, 0x11237; 0x1123e, 0x11241; 0x11280, 0x11286; 0x11288, 0x11288; - 0x1128a, 0x1128d; 0x1128f, 0x1129d; 0x1129f, 0x112a8; 0x112b0, 0x112e8; 0x11300, 0x11303; - 0x11305, 0x1130c; 0x1130f, 0x11310; 0x11313, 0x11328; 0x1132a, 0x11330; 0x11332, 0x11333; - 0x11335, 0x11339; 0x1133d, 0x11344; 0x11347, 0x11348; 0x1134b, 0x1134c; 0x11350, 0x11350; - 0x11357, 0x11357; 0x1135d, 0x11363; 0x11400, 0x11441; 0x11443, 0x11445; 0x11447, 0x1144a; - 0x1145f, 0x11461; 0x11480, 0x114c1; 0x114c4, 0x114c5; 0x114c7, 0x114c7; 0x11580, 0x115b5; - 0x115b8, 0x115be; 0x115d8, 0x115dd; 0x11600, 0x1163e; 0x11640, 0x11640; 0x11644, 0x11644; - 0x11680, 0x116b5; 0x116b8, 0x116b8; 0x11700, 0x1171a; 0x1171d, 0x1172a; 0x11740, 0x11746; - 0x11800, 0x11838; 0x118a0, 0x118df; 0x118ff, 0x11906; 0x11909, 0x11909; 0x1190c, 0x11913; - 0x11915, 0x11916; 0x11918, 0x11935; 0x11937, 0x11938; 0x1193b, 0x1193c; 0x1193f, 0x11942; - 0x119a0, 0x119a7; 0x119aa, 0x119d7; 0x119da, 0x119df; 0x119e1, 0x119e1; 0x119e3, 0x119e4; - 0x11a00, 0x11a32; 0x11a35, 0x11a3e; 0x11a50, 0x11a97; 0x11a9d, 0x11a9d; 0x11ab0, 0x11af8; - 0x11c00, 0x11c08; 0x11c0a, 0x11c36; 0x11c38, 0x11c3e; 0x11c40, 0x11c40; 0x11c72, 0x11c8f; - 0x11c92, 0x11ca7; 0x11ca9, 0x11cb6; 0x11d00, 0x11d06; 0x11d08, 0x11d09; 0x11d0b, 0x11d36; - 0x11d3a, 0x11d3a; 0x11d3c, 0x11d3d; 0x11d3f, 0x11d41; 0x11d43, 0x11d43; 0x11d46, 0x11d47; - 0x11d60, 0x11d65; 0x11d67, 0x11d68; 0x11d6a, 0x11d8e; 0x11d90, 0x11d91; 0x11d93, 0x11d96; - 0x11d98, 0x11d98; 0x11ee0, 0x11ef6; 0x11f00, 0x11f10; 0x11f12, 0x11f3a; 0x11f3e, 0x11f40; - 0x11fb0, 0x11fb0; 0x12000, 0x12399; 0x12400, 0x1246e; 0x12480, 0x12543; 0x12f90, 0x12ff0; - 0x13000, 0x1342f; 0x13441, 0x13446; 0x14400, 0x14646; 0x16800, 0x16a38; 0x16a40, 0x16a5e; - 0x16a70, 0x16abe; 0x16ad0, 0x16aed; 0x16b00, 0x16b2f; 0x16b40, 0x16b43; 0x16b63, 0x16b77; - 0x16b7d, 0x16b8f; 0x16e40, 0x16e7f; 0x16f00, 0x16f4a; 0x16f4f, 0x16f87; 0x16f8f, 0x16f9f; - 0x16fe0, 0x16fe1; 0x16fe3, 0x16fe3; 0x16ff0, 0x16ff1; 0x17000, 0x187f7; 0x18800, 0x18cd5; - 0x18d00, 0x18d08; 0x1aff0, 0x1aff3; 0x1aff5, 0x1affb; 0x1affd, 0x1affe; 0x1b000, 0x1b122; - 0x1b132, 0x1b132; 0x1b150, 0x1b152; 0x1b155, 0x1b155; 0x1b164, 0x1b167; 0x1b170, 0x1b2fb; - 0x1bc00, 0x1bc6a; 0x1bc70, 0x1bc7c; 0x1bc80, 0x1bc88; 0x1bc90, 0x1bc99; 0x1bc9e, 0x1bc9e; - 0x1d400, 0x1d454; 0x1d456, 0x1d49c; 0x1d49e, 0x1d49f; 0x1d4a2, 0x1d4a2; 0x1d4a5, 0x1d4a6; - 0x1d4a9, 0x1d4ac; 0x1d4ae, 0x1d4b9; 0x1d4bb, 0x1d4bb; 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d505; - 0x1d507, 0x1d50a; 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; 0x1d51e, 0x1d539; 0x1d53b, 0x1d53e; - 0x1d540, 0x1d544; 0x1d546, 0x1d546; 0x1d54a, 0x1d550; 0x1d552, 0x1d6a5; 0x1d6a8, 0x1d6c0; - 0x1d6c2, 0x1d6da; 0x1d6dc, 0x1d6fa; 0x1d6fc, 0x1d714; 0x1d716, 0x1d734; 0x1d736, 0x1d74e; - 0x1d750, 0x1d76e; 0x1d770, 0x1d788; 0x1d78a, 0x1d7a8; 0x1d7aa, 0x1d7c2; 0x1d7c4, 0x1d7cb; - 0x1df00, 0x1df1e; 0x1df25, 0x1df2a; 0x1e000, 0x1e006; 0x1e008, 0x1e018; 0x1e01b, 0x1e021; - 0x1e023, 0x1e024; 0x1e026, 0x1e02a; 0x1e030, 0x1e06d; 0x1e08f, 0x1e08f; 0x1e100, 0x1e12c; - 0x1e137, 0x1e13d; 0x1e14e, 0x1e14e; 0x1e290, 0x1e2ad; 0x1e2c0, 0x1e2eb; 0x1e4d0, 0x1e4eb; - 0x1e7e0, 0x1e7e6; 0x1e7e8, 0x1e7eb; 0x1e7ed, 0x1e7ee; 0x1e7f0, 0x1e7fe; 0x1e800, 0x1e8c4; - 0x1e900, 0x1e943; 0x1e947, 0x1e947; 0x1e94b, 0x1e94b; 0x1ee00, 0x1ee03; 0x1ee05, 0x1ee1f; - 0x1ee21, 0x1ee22; 0x1ee24, 0x1ee24; 0x1ee27, 0x1ee27; 0x1ee29, 0x1ee32; 0x1ee34, 0x1ee37; - 0x1ee39, 0x1ee39; 0x1ee3b, 0x1ee3b; 0x1ee42, 0x1ee42; 0x1ee47, 0x1ee47; 0x1ee49, 0x1ee49; - 0x1ee4b, 0x1ee4b; 0x1ee4d, 0x1ee4f; 0x1ee51, 0x1ee52; 0x1ee54, 0x1ee54; 0x1ee57, 0x1ee57; - 0x1ee59, 0x1ee59; 0x1ee5b, 0x1ee5b; 0x1ee5d, 0x1ee5d; 0x1ee5f, 0x1ee5f; 0x1ee61, 0x1ee62; - 0x1ee64, 0x1ee64; 0x1ee67, 0x1ee6a; 0x1ee6c, 0x1ee72; 0x1ee74, 0x1ee77; 0x1ee79, 0x1ee7c; - 0x1ee7e, 0x1ee7e; 0x1ee80, 0x1ee89; 0x1ee8b, 0x1ee9b; 0x1eea1, 0x1eea3; 0x1eea5, 0x1eea9; - 0x1eeab, 0x1eebb; 0x1f130, 0x1f149; 0x1f150, 0x1f169; 0x1f170, 0x1f189; 0x20000, 0x2a6df; - 0x2a700, 0x2b739; 0x2b740, 0x2b81d; 0x2b820, 0x2cea1; 0x2ceb0, 0x2ebe0; 0x2f800, 0x2fa1d; + 0x840, 0x858; 0x860, 0x86a; 0x870, 0x887; 0x889, 0x88e; 0x897, 0x897; + 0x8a0, 0x8c9; 0x8d4, 0x8df; 0x8e3, 0x8e9; 0x8f0, 0x93b; 0x93d, 0x94c; + 0x94e, 0x950; 0x955, 0x963; 0x971, 0x983; 0x985, 0x98c; 0x98f, 0x990; + 0x993, 0x9a8; 0x9aa, 0x9b0; 0x9b2, 0x9b2; 0x9b6, 0x9b9; 0x9bd, 0x9c4; + 0x9c7, 0x9c8; 0x9cb, 0x9cc; 0x9ce, 0x9ce; 0x9d7, 0x9d7; 0x9dc, 0x9dd; + 0x9df, 0x9e3; 0x9f0, 0x9f1; 0x9fc, 0x9fc; 0xa01, 0xa03; 0xa05, 0xa0a; + 0xa0f, 0xa10; 0xa13, 0xa28; 0xa2a, 0xa30; 0xa32, 0xa33; 0xa35, 0xa36; + 0xa38, 0xa39; 0xa3e, 0xa42; 0xa47, 0xa48; 0xa4b, 0xa4c; 0xa51, 0xa51; + 0xa59, 0xa5c; 0xa5e, 0xa5e; 0xa70, 0xa75; 0xa81, 0xa83; 0xa85, 0xa8d; + 0xa8f, 0xa91; 0xa93, 0xaa8; 0xaaa, 0xab0; 0xab2, 0xab3; 0xab5, 0xab9; + 0xabd, 0xac5; 0xac7, 0xac9; 0xacb, 0xacc; 0xad0, 0xad0; 0xae0, 0xae3; + 0xaf9, 0xafc; 0xb01, 0xb03; 0xb05, 0xb0c; 0xb0f, 0xb10; 0xb13, 0xb28; + 0xb2a, 0xb30; 0xb32, 0xb33; 0xb35, 0xb39; 0xb3d, 0xb44; 0xb47, 0xb48; + 0xb4b, 0xb4c; 0xb56, 0xb57; 0xb5c, 0xb5d; 0xb5f, 0xb63; 0xb71, 0xb71; + 0xb82, 0xb83; 0xb85, 0xb8a; 0xb8e, 0xb90; 0xb92, 0xb95; 0xb99, 0xb9a; + 0xb9c, 0xb9c; 0xb9e, 0xb9f; 0xba3, 0xba4; 0xba8, 0xbaa; 0xbae, 0xbb9; + 0xbbe, 0xbc2; 0xbc6, 0xbc8; 0xbca, 0xbcc; 0xbd0, 0xbd0; 0xbd7, 0xbd7; + 0xc00, 0xc0c; 0xc0e, 0xc10; 0xc12, 0xc28; 0xc2a, 0xc39; 0xc3d, 0xc44; + 0xc46, 0xc48; 0xc4a, 0xc4c; 0xc55, 0xc56; 0xc58, 0xc5a; 0xc5d, 0xc5d; + 0xc60, 0xc63; 0xc80, 0xc83; 0xc85, 0xc8c; 0xc8e, 0xc90; 0xc92, 0xca8; + 0xcaa, 0xcb3; 0xcb5, 0xcb9; 0xcbd, 0xcc4; 0xcc6, 0xcc8; 0xcca, 0xccc; + 0xcd5, 0xcd6; 0xcdd, 0xcde; 0xce0, 0xce3; 0xcf1, 0xcf3; 0xd00, 0xd0c; + 0xd0e, 0xd10; 0xd12, 0xd3a; 0xd3d, 0xd44; 0xd46, 0xd48; 0xd4a, 0xd4c; + 0xd4e, 0xd4e; 0xd54, 0xd57; 0xd5f, 0xd63; 0xd7a, 0xd7f; 0xd81, 0xd83; + 0xd85, 0xd96; 0xd9a, 0xdb1; 0xdb3, 0xdbb; 0xdbd, 0xdbd; 0xdc0, 0xdc6; + 0xdcf, 0xdd4; 0xdd6, 0xdd6; 0xdd8, 0xddf; 0xdf2, 0xdf3; 0xe01, 0xe3a; + 0xe40, 0xe46; 0xe4d, 0xe4d; 0xe81, 0xe82; 0xe84, 0xe84; 0xe86, 0xe8a; + 0xe8c, 0xea3; 0xea5, 0xea5; 0xea7, 0xeb9; 0xebb, 0xebd; 0xec0, 0xec4; + 0xec6, 0xec6; 0xecd, 0xecd; 0xedc, 0xedf; 0xf00, 0xf00; 0xf40, 0xf47; + 0xf49, 0xf6c; 0xf71, 0xf83; 0xf88, 0xf97; 0xf99, 0xfbc; 0x1000, 0x1036; + 0x1038, 0x1038; 0x103b, 0x103f; 0x1050, 0x108f; 0x109a, 0x109d; 0x10a0, 0x10c5; + 0x10c7, 0x10c7; 0x10cd, 0x10cd; 0x10d0, 0x10fa; 0x10fc, 0x1248; 0x124a, 0x124d; + 0x1250, 0x1256; 0x1258, 0x1258; 0x125a, 0x125d; 0x1260, 0x1288; 0x128a, 0x128d; + 0x1290, 0x12b0; 0x12b2, 0x12b5; 0x12b8, 0x12be; 0x12c0, 0x12c0; 0x12c2, 0x12c5; + 0x12c8, 0x12d6; 0x12d8, 0x1310; 0x1312, 0x1315; 0x1318, 0x135a; 0x1380, 0x138f; + 0x13a0, 0x13f5; 0x13f8, 0x13fd; 0x1401, 0x166c; 0x166f, 0x167f; 0x1681, 0x169a; + 0x16a0, 0x16ea; 0x16ee, 0x16f8; 0x1700, 0x1713; 0x171f, 0x1733; 0x1740, 0x1753; + 0x1760, 0x176c; 0x176e, 0x1770; 0x1772, 0x1773; 0x1780, 0x17b3; 0x17b6, 0x17c8; + 0x17d7, 0x17d7; 0x17dc, 0x17dc; 0x1820, 0x1878; 0x1880, 0x18aa; 0x18b0, 0x18f5; + 0x1900, 0x191e; 0x1920, 0x192b; 0x1930, 0x1938; 0x1950, 0x196d; 0x1970, 0x1974; + 0x1980, 0x19ab; 0x19b0, 0x19c9; 0x1a00, 0x1a1b; 0x1a20, 0x1a5e; 0x1a61, 0x1a74; + 0x1aa7, 0x1aa7; 0x1abf, 0x1ac0; 0x1acc, 0x1ace; 0x1b00, 0x1b33; 0x1b35, 0x1b43; + 0x1b45, 0x1b4c; 0x1b80, 0x1ba9; 0x1bac, 0x1baf; 0x1bba, 0x1be5; 0x1be7, 0x1bf1; + 0x1c00, 0x1c36; 0x1c4d, 0x1c4f; 0x1c5a, 0x1c7d; 0x1c80, 0x1c8a; 0x1c90, 0x1cba; + 0x1cbd, 0x1cbf; 0x1ce9, 0x1cec; 0x1cee, 0x1cf3; 0x1cf5, 0x1cf6; 0x1cfa, 0x1cfa; + 0x1d00, 0x1dbf; 0x1dd3, 0x1df4; 0x1e00, 0x1f15; 0x1f18, 0x1f1d; 0x1f20, 0x1f45; + 0x1f48, 0x1f4d; 0x1f50, 0x1f57; 0x1f59, 0x1f59; 0x1f5b, 0x1f5b; 0x1f5d, 0x1f5d; + 0x1f5f, 0x1f7d; 0x1f80, 0x1fb4; 0x1fb6, 0x1fbc; 0x1fbe, 0x1fbe; 0x1fc2, 0x1fc4; + 0x1fc6, 0x1fcc; 0x1fd0, 0x1fd3; 0x1fd6, 0x1fdb; 0x1fe0, 0x1fec; 0x1ff2, 0x1ff4; + 0x1ff6, 0x1ffc; 0x2071, 0x2071; 0x207f, 0x207f; 0x2090, 0x209c; 0x2102, 0x2102; + 0x2107, 0x2107; 0x210a, 0x2113; 0x2115, 0x2115; 0x2119, 0x211d; 0x2124, 0x2124; + 0x2126, 0x2126; 0x2128, 0x2128; 0x212a, 0x212d; 0x212f, 0x2139; 0x213c, 0x213f; + 0x2145, 0x2149; 0x214e, 0x214e; 0x2160, 0x2188; 0x24b6, 0x24e9; 0x2c00, 0x2ce4; + 0x2ceb, 0x2cee; 0x2cf2, 0x2cf3; 0x2d00, 0x2d25; 0x2d27, 0x2d27; 0x2d2d, 0x2d2d; + 0x2d30, 0x2d67; 0x2d6f, 0x2d6f; 0x2d80, 0x2d96; 0x2da0, 0x2da6; 0x2da8, 0x2dae; + 0x2db0, 0x2db6; 0x2db8, 0x2dbe; 0x2dc0, 0x2dc6; 0x2dc8, 0x2dce; 0x2dd0, 0x2dd6; + 0x2dd8, 0x2dde; 0x2de0, 0x2dff; 0x2e2f, 0x2e2f; 0x3005, 0x3007; 0x3021, 0x3029; + 0x3031, 0x3035; 0x3038, 0x303c; 0x3041, 0x3096; 0x309d, 0x309f; 0x30a1, 0x30fa; + 0x30fc, 0x30ff; 0x3105, 0x312f; 0x3131, 0x318e; 0x31a0, 0x31bf; 0x31f0, 0x31ff; + 0x3400, 0x4dbf; 0x4e00, 0xa48c; 0xa4d0, 0xa4fd; 0xa500, 0xa60c; 0xa610, 0xa61f; + 0xa62a, 0xa62b; 0xa640, 0xa66e; 0xa674, 0xa67b; 0xa67f, 0xa6ef; 0xa717, 0xa71f; + 0xa722, 0xa788; 0xa78b, 0xa7cd; 0xa7d0, 0xa7d1; 0xa7d3, 0xa7d3; 0xa7d5, 0xa7dc; + 0xa7f2, 0xa805; 0xa807, 0xa827; 0xa840, 0xa873; 0xa880, 0xa8c3; 0xa8c5, 0xa8c5; + 0xa8f2, 0xa8f7; 0xa8fb, 0xa8fb; 0xa8fd, 0xa8ff; 0xa90a, 0xa92a; 0xa930, 0xa952; + 0xa960, 0xa97c; 0xa980, 0xa9b2; 0xa9b4, 0xa9bf; 0xa9cf, 0xa9cf; 0xa9e0, 0xa9ef; + 0xa9fa, 0xa9fe; 0xaa00, 0xaa36; 0xaa40, 0xaa4d; 0xaa60, 0xaa76; 0xaa7a, 0xaabe; + 0xaac0, 0xaac0; 0xaac2, 0xaac2; 0xaadb, 0xaadd; 0xaae0, 0xaaef; 0xaaf2, 0xaaf5; + 0xab01, 0xab06; 0xab09, 0xab0e; 0xab11, 0xab16; 0xab20, 0xab26; 0xab28, 0xab2e; + 0xab30, 0xab5a; 0xab5c, 0xab69; 0xab70, 0xabea; 0xac00, 0xd7a3; 0xd7b0, 0xd7c6; + 0xd7cb, 0xd7fb; 0xf900, 0xfa6d; 0xfa70, 0xfad9; 0xfb00, 0xfb06; 0xfb13, 0xfb17; + 0xfb1d, 0xfb28; 0xfb2a, 0xfb36; 0xfb38, 0xfb3c; 0xfb3e, 0xfb3e; 0xfb40, 0xfb41; + 0xfb43, 0xfb44; 0xfb46, 0xfbb1; 0xfbd3, 0xfd3d; 0xfd50, 0xfd8f; 0xfd92, 0xfdc7; + 0xfdf0, 0xfdfb; 0xfe70, 0xfe74; 0xfe76, 0xfefc; 0xff21, 0xff3a; 0xff41, 0xff5a; + 0xff66, 0xffbe; 0xffc2, 0xffc7; 0xffca, 0xffcf; 0xffd2, 0xffd7; 0xffda, 0xffdc; + 0x10000, 0x1000b; 0x1000d, 0x10026; 0x10028, 0x1003a; 0x1003c, 0x1003d; 0x1003f, 0x1004d; + 0x10050, 0x1005d; 0x10080, 0x100fa; 0x10140, 0x10174; 0x10280, 0x1029c; 0x102a0, 0x102d0; + 0x10300, 0x1031f; 0x1032d, 0x1034a; 0x10350, 0x1037a; 0x10380, 0x1039d; 0x103a0, 0x103c3; + 0x103c8, 0x103cf; 0x103d1, 0x103d5; 0x10400, 0x1049d; 0x104b0, 0x104d3; 0x104d8, 0x104fb; + 0x10500, 0x10527; 0x10530, 0x10563; 0x10570, 0x1057a; 0x1057c, 0x1058a; 0x1058c, 0x10592; + 0x10594, 0x10595; 0x10597, 0x105a1; 0x105a3, 0x105b1; 0x105b3, 0x105b9; 0x105bb, 0x105bc; + 0x105c0, 0x105f3; 0x10600, 0x10736; 0x10740, 0x10755; 0x10760, 0x10767; 0x10780, 0x10785; + 0x10787, 0x107b0; 0x107b2, 0x107ba; 0x10800, 0x10805; 0x10808, 0x10808; 0x1080a, 0x10835; + 0x10837, 0x10838; 0x1083c, 0x1083c; 0x1083f, 0x10855; 0x10860, 0x10876; 0x10880, 0x1089e; + 0x108e0, 0x108f2; 0x108f4, 0x108f5; 0x10900, 0x10915; 0x10920, 0x10939; 0x10980, 0x109b7; + 0x109be, 0x109bf; 0x10a00, 0x10a03; 0x10a05, 0x10a06; 0x10a0c, 0x10a13; 0x10a15, 0x10a17; + 0x10a19, 0x10a35; 0x10a60, 0x10a7c; 0x10a80, 0x10a9c; 0x10ac0, 0x10ac7; 0x10ac9, 0x10ae4; + 0x10b00, 0x10b35; 0x10b40, 0x10b55; 0x10b60, 0x10b72; 0x10b80, 0x10b91; 0x10c00, 0x10c48; + 0x10c80, 0x10cb2; 0x10cc0, 0x10cf2; 0x10d00, 0x10d27; 0x10d4a, 0x10d65; 0x10d69, 0x10d69; + 0x10d6f, 0x10d85; 0x10e80, 0x10ea9; 0x10eab, 0x10eac; 0x10eb0, 0x10eb1; 0x10ec2, 0x10ec4; + 0x10efc, 0x10efc; 0x10f00, 0x10f1c; 0x10f27, 0x10f27; 0x10f30, 0x10f45; 0x10f70, 0x10f81; + 0x10fb0, 0x10fc4; 0x10fe0, 0x10ff6; 0x11000, 0x11045; 0x11071, 0x11075; 0x11080, 0x110b8; + 0x110c2, 0x110c2; 0x110d0, 0x110e8; 0x11100, 0x11132; 0x11144, 0x11147; 0x11150, 0x11172; + 0x11176, 0x11176; 0x11180, 0x111bf; 0x111c1, 0x111c4; 0x111ce, 0x111cf; 0x111da, 0x111da; + 0x111dc, 0x111dc; 0x11200, 0x11211; 0x11213, 0x11234; 0x11237, 0x11237; 0x1123e, 0x11241; + 0x11280, 0x11286; 0x11288, 0x11288; 0x1128a, 0x1128d; 0x1128f, 0x1129d; 0x1129f, 0x112a8; + 0x112b0, 0x112e8; 0x11300, 0x11303; 0x11305, 0x1130c; 0x1130f, 0x11310; 0x11313, 0x11328; + 0x1132a, 0x11330; 0x11332, 0x11333; 0x11335, 0x11339; 0x1133d, 0x11344; 0x11347, 0x11348; + 0x1134b, 0x1134c; 0x11350, 0x11350; 0x11357, 0x11357; 0x1135d, 0x11363; 0x11380, 0x11389; + 0x1138b, 0x1138b; 0x1138e, 0x1138e; 0x11390, 0x113b5; 0x113b7, 0x113c0; 0x113c2, 0x113c2; + 0x113c5, 0x113c5; 0x113c7, 0x113ca; 0x113cc, 0x113cd; 0x113d1, 0x113d1; 0x113d3, 0x113d3; + 0x11400, 0x11441; 0x11443, 0x11445; 0x11447, 0x1144a; 0x1145f, 0x11461; 0x11480, 0x114c1; + 0x114c4, 0x114c5; 0x114c7, 0x114c7; 0x11580, 0x115b5; 0x115b8, 0x115be; 0x115d8, 0x115dd; + 0x11600, 0x1163e; 0x11640, 0x11640; 0x11644, 0x11644; 0x11680, 0x116b5; 0x116b8, 0x116b8; + 0x11700, 0x1171a; 0x1171d, 0x1172a; 0x11740, 0x11746; 0x11800, 0x11838; 0x118a0, 0x118df; + 0x118ff, 0x11906; 0x11909, 0x11909; 0x1190c, 0x11913; 0x11915, 0x11916; 0x11918, 0x11935; + 0x11937, 0x11938; 0x1193b, 0x1193c; 0x1193f, 0x11942; 0x119a0, 0x119a7; 0x119aa, 0x119d7; + 0x119da, 0x119df; 0x119e1, 0x119e1; 0x119e3, 0x119e4; 0x11a00, 0x11a32; 0x11a35, 0x11a3e; + 0x11a50, 0x11a97; 0x11a9d, 0x11a9d; 0x11ab0, 0x11af8; 0x11bc0, 0x11be0; 0x11c00, 0x11c08; + 0x11c0a, 0x11c36; 0x11c38, 0x11c3e; 0x11c40, 0x11c40; 0x11c72, 0x11c8f; 0x11c92, 0x11ca7; + 0x11ca9, 0x11cb6; 0x11d00, 0x11d06; 0x11d08, 0x11d09; 0x11d0b, 0x11d36; 0x11d3a, 0x11d3a; + 0x11d3c, 0x11d3d; 0x11d3f, 0x11d41; 0x11d43, 0x11d43; 0x11d46, 0x11d47; 0x11d60, 0x11d65; + 0x11d67, 0x11d68; 0x11d6a, 0x11d8e; 0x11d90, 0x11d91; 0x11d93, 0x11d96; 0x11d98, 0x11d98; + 0x11ee0, 0x11ef6; 0x11f00, 0x11f10; 0x11f12, 0x11f3a; 0x11f3e, 0x11f40; 0x11fb0, 0x11fb0; + 0x12000, 0x12399; 0x12400, 0x1246e; 0x12480, 0x12543; 0x12f90, 0x12ff0; 0x13000, 0x1342f; + 0x13441, 0x13446; 0x13460, 0x143fa; 0x14400, 0x14646; 0x16100, 0x1612e; 0x16800, 0x16a38; + 0x16a40, 0x16a5e; 0x16a70, 0x16abe; 0x16ad0, 0x16aed; 0x16b00, 0x16b2f; 0x16b40, 0x16b43; + 0x16b63, 0x16b77; 0x16b7d, 0x16b8f; 0x16d40, 0x16d6c; 0x16e40, 0x16e7f; 0x16f00, 0x16f4a; + 0x16f4f, 0x16f87; 0x16f8f, 0x16f9f; 0x16fe0, 0x16fe1; 0x16fe3, 0x16fe3; 0x16ff0, 0x16ff1; + 0x17000, 0x187f7; 0x18800, 0x18cd5; 0x18cff, 0x18d08; 0x1aff0, 0x1aff3; 0x1aff5, 0x1affb; + 0x1affd, 0x1affe; 0x1b000, 0x1b122; 0x1b132, 0x1b132; 0x1b150, 0x1b152; 0x1b155, 0x1b155; + 0x1b164, 0x1b167; 0x1b170, 0x1b2fb; 0x1bc00, 0x1bc6a; 0x1bc70, 0x1bc7c; 0x1bc80, 0x1bc88; + 0x1bc90, 0x1bc99; 0x1bc9e, 0x1bc9e; 0x1d400, 0x1d454; 0x1d456, 0x1d49c; 0x1d49e, 0x1d49f; + 0x1d4a2, 0x1d4a2; 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; 0x1d4ae, 0x1d4b9; 0x1d4bb, 0x1d4bb; + 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d505; 0x1d507, 0x1d50a; 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; + 0x1d51e, 0x1d539; 0x1d53b, 0x1d53e; 0x1d540, 0x1d544; 0x1d546, 0x1d546; 0x1d54a, 0x1d550; + 0x1d552, 0x1d6a5; 0x1d6a8, 0x1d6c0; 0x1d6c2, 0x1d6da; 0x1d6dc, 0x1d6fa; 0x1d6fc, 0x1d714; + 0x1d716, 0x1d734; 0x1d736, 0x1d74e; 0x1d750, 0x1d76e; 0x1d770, 0x1d788; 0x1d78a, 0x1d7a8; + 0x1d7aa, 0x1d7c2; 0x1d7c4, 0x1d7cb; 0x1df00, 0x1df1e; 0x1df25, 0x1df2a; 0x1e000, 0x1e006; + 0x1e008, 0x1e018; 0x1e01b, 0x1e021; 0x1e023, 0x1e024; 0x1e026, 0x1e02a; 0x1e030, 0x1e06d; + 0x1e08f, 0x1e08f; 0x1e100, 0x1e12c; 0x1e137, 0x1e13d; 0x1e14e, 0x1e14e; 0x1e290, 0x1e2ad; + 0x1e2c0, 0x1e2eb; 0x1e4d0, 0x1e4eb; 0x1e5d0, 0x1e5ed; 0x1e5f0, 0x1e5f0; 0x1e7e0, 0x1e7e6; + 0x1e7e8, 0x1e7eb; 0x1e7ed, 0x1e7ee; 0x1e7f0, 0x1e7fe; 0x1e800, 0x1e8c4; 0x1e900, 0x1e943; + 0x1e947, 0x1e947; 0x1e94b, 0x1e94b; 0x1ee00, 0x1ee03; 0x1ee05, 0x1ee1f; 0x1ee21, 0x1ee22; + 0x1ee24, 0x1ee24; 0x1ee27, 0x1ee27; 0x1ee29, 0x1ee32; 0x1ee34, 0x1ee37; 0x1ee39, 0x1ee39; + 0x1ee3b, 0x1ee3b; 0x1ee42, 0x1ee42; 0x1ee47, 0x1ee47; 0x1ee49, 0x1ee49; 0x1ee4b, 0x1ee4b; + 0x1ee4d, 0x1ee4f; 0x1ee51, 0x1ee52; 0x1ee54, 0x1ee54; 0x1ee57, 0x1ee57; 0x1ee59, 0x1ee59; + 0x1ee5b, 0x1ee5b; 0x1ee5d, 0x1ee5d; 0x1ee5f, 0x1ee5f; 0x1ee61, 0x1ee62; 0x1ee64, 0x1ee64; + 0x1ee67, 0x1ee6a; 0x1ee6c, 0x1ee72; 0x1ee74, 0x1ee77; 0x1ee79, 0x1ee7c; 0x1ee7e, 0x1ee7e; + 0x1ee80, 0x1ee89; 0x1ee8b, 0x1ee9b; 0x1eea1, 0x1eea3; 0x1eea5, 0x1eea9; 0x1eeab, 0x1eebb; + 0x1f130, 0x1f149; 0x1f150, 0x1f169; 0x1f170, 0x1f189; 0x20000, 0x2a6df; 0x2a700, 0x2b739; + 0x2b740, 0x2b81d; 0x2b820, 0x2cea1; 0x2ceb0, 0x2ebe0; 0x2ebf0, 0x2ee5d; 0x2f800, 0x2fa1d; 0x30000, 0x3134a; 0x31350, 0x323af] let ascii_hex_digit = Sedlex_utils.Cset.of_list @@ -1089,7 +1113,7 @@ module Properties = struct 0x5ef, 0x5f2; 0x610, 0x61a; 0x620, 0x669; 0x66e, 0x6d3; 0x6d5, 0x6dc; 0x6df, 0x6e8; 0x6ea, 0x6fc; 0x6ff, 0x6ff; 0x710, 0x74a; 0x74d, 0x7b1; 0x7c0, 0x7f5; 0x7fa, 0x7fa; 0x7fd, 0x7fd; 0x800, 0x82d; 0x840, 0x85b; - 0x860, 0x86a; 0x870, 0x887; 0x889, 0x88e; 0x898, 0x8e1; 0x8e3, 0x963; + 0x860, 0x86a; 0x870, 0x887; 0x889, 0x88e; 0x897, 0x8e1; 0x8e3, 0x963; 0x966, 0x96f; 0x971, 0x983; 0x985, 0x98c; 0x98f, 0x990; 0x993, 0x9a8; 0x9aa, 0x9b0; 0x9b2, 0x9b2; 0x9b6, 0x9b9; 0x9bc, 0x9c4; 0x9c7, 0x9c8; 0x9cb, 0x9ce; 0x9d7, 0x9d7; 0x9dc, 0x9dd; 0x9df, 0x9e3; 0x9e6, 0x9f1; @@ -1134,24 +1158,24 @@ module Properties = struct 0x19d0, 0x19da; 0x1a00, 0x1a1b; 0x1a20, 0x1a5e; 0x1a60, 0x1a7c; 0x1a7f, 0x1a89; 0x1a90, 0x1a99; 0x1aa7, 0x1aa7; 0x1ab0, 0x1abd; 0x1abf, 0x1ace; 0x1b00, 0x1b4c; 0x1b50, 0x1b59; 0x1b6b, 0x1b73; 0x1b80, 0x1bf3; 0x1c00, 0x1c37; 0x1c40, 0x1c49; - 0x1c4d, 0x1c7d; 0x1c80, 0x1c88; 0x1c90, 0x1cba; 0x1cbd, 0x1cbf; 0x1cd0, 0x1cd2; + 0x1c4d, 0x1c7d; 0x1c80, 0x1c8a; 0x1c90, 0x1cba; 0x1cbd, 0x1cbf; 0x1cd0, 0x1cd2; 0x1cd4, 0x1cfa; 0x1d00, 0x1f15; 0x1f18, 0x1f1d; 0x1f20, 0x1f45; 0x1f48, 0x1f4d; 0x1f50, 0x1f57; 0x1f59, 0x1f59; 0x1f5b, 0x1f5b; 0x1f5d, 0x1f5d; 0x1f5f, 0x1f7d; 0x1f80, 0x1fb4; 0x1fb6, 0x1fbc; 0x1fbe, 0x1fbe; 0x1fc2, 0x1fc4; 0x1fc6, 0x1fcc; 0x1fd0, 0x1fd3; 0x1fd6, 0x1fdb; 0x1fe0, 0x1fec; 0x1ff2, 0x1ff4; 0x1ff6, 0x1ffc; - 0x203f, 0x2040; 0x2054, 0x2054; 0x2071, 0x2071; 0x207f, 0x207f; 0x2090, 0x209c; - 0x20d0, 0x20dc; 0x20e1, 0x20e1; 0x20e5, 0x20f0; 0x2102, 0x2102; 0x2107, 0x2107; - 0x210a, 0x2113; 0x2115, 0x2115; 0x2118, 0x211d; 0x2124, 0x2124; 0x2126, 0x2126; - 0x2128, 0x2128; 0x212a, 0x2139; 0x213c, 0x213f; 0x2145, 0x2149; 0x214e, 0x214e; - 0x2160, 0x2188; 0x2c00, 0x2ce4; 0x2ceb, 0x2cf3; 0x2d00, 0x2d25; 0x2d27, 0x2d27; - 0x2d2d, 0x2d2d; 0x2d30, 0x2d67; 0x2d6f, 0x2d6f; 0x2d7f, 0x2d96; 0x2da0, 0x2da6; - 0x2da8, 0x2dae; 0x2db0, 0x2db6; 0x2db8, 0x2dbe; 0x2dc0, 0x2dc6; 0x2dc8, 0x2dce; - 0x2dd0, 0x2dd6; 0x2dd8, 0x2dde; 0x2de0, 0x2dff; 0x3005, 0x3007; 0x3021, 0x302f; - 0x3031, 0x3035; 0x3038, 0x303c; 0x3041, 0x3096; 0x3099, 0x309f; 0x30a1, 0x30fa; - 0x30fc, 0x30ff; 0x3105, 0x312f; 0x3131, 0x318e; 0x31a0, 0x31bf; 0x31f0, 0x31ff; + 0x200c, 0x200d; 0x203f, 0x2040; 0x2054, 0x2054; 0x2071, 0x2071; 0x207f, 0x207f; + 0x2090, 0x209c; 0x20d0, 0x20dc; 0x20e1, 0x20e1; 0x20e5, 0x20f0; 0x2102, 0x2102; + 0x2107, 0x2107; 0x210a, 0x2113; 0x2115, 0x2115; 0x2118, 0x211d; 0x2124, 0x2124; + 0x2126, 0x2126; 0x2128, 0x2128; 0x212a, 0x2139; 0x213c, 0x213f; 0x2145, 0x2149; + 0x214e, 0x214e; 0x2160, 0x2188; 0x2c00, 0x2ce4; 0x2ceb, 0x2cf3; 0x2d00, 0x2d25; + 0x2d27, 0x2d27; 0x2d2d, 0x2d2d; 0x2d30, 0x2d67; 0x2d6f, 0x2d6f; 0x2d7f, 0x2d96; + 0x2da0, 0x2da6; 0x2da8, 0x2dae; 0x2db0, 0x2db6; 0x2db8, 0x2dbe; 0x2dc0, 0x2dc6; + 0x2dc8, 0x2dce; 0x2dd0, 0x2dd6; 0x2dd8, 0x2dde; 0x2de0, 0x2dff; 0x3005, 0x3007; + 0x3021, 0x302f; 0x3031, 0x3035; 0x3038, 0x303c; 0x3041, 0x3096; 0x3099, 0x309f; + 0x30a1, 0x30ff; 0x3105, 0x312f; 0x3131, 0x318e; 0x31a0, 0x31bf; 0x31f0, 0x31ff; 0x3400, 0x4dbf; 0x4e00, 0xa48c; 0xa4d0, 0xa4fd; 0xa500, 0xa60c; 0xa610, 0xa62b; 0xa640, 0xa66f; 0xa674, 0xa67d; 0xa67f, 0xa6f1; 0xa717, 0xa71f; 0xa722, 0xa788; - 0xa78b, 0xa7ca; 0xa7d0, 0xa7d1; 0xa7d3, 0xa7d3; 0xa7d5, 0xa7d9; 0xa7f2, 0xa827; + 0xa78b, 0xa7cd; 0xa7d0, 0xa7d1; 0xa7d3, 0xa7d3; 0xa7d5, 0xa7dc; 0xa7f2, 0xa827; 0xa82c, 0xa82c; 0xa840, 0xa873; 0xa880, 0xa8c5; 0xa8d0, 0xa8d9; 0xa8e0, 0xa8f7; 0xa8fb, 0xa8fb; 0xa8fd, 0xa92d; 0xa930, 0xa953; 0xa960, 0xa97c; 0xa980, 0xa9c0; 0xa9cf, 0xa9d9; 0xa9e0, 0xa9fe; 0xaa00, 0xaa36; 0xaa40, 0xaa4d; 0xaa50, 0xaa59; @@ -1163,7 +1187,7 @@ module Properties = struct 0xfb3e, 0xfb3e; 0xfb40, 0xfb41; 0xfb43, 0xfb44; 0xfb46, 0xfbb1; 0xfbd3, 0xfd3d; 0xfd50, 0xfd8f; 0xfd92, 0xfdc7; 0xfdf0, 0xfdfb; 0xfe00, 0xfe0f; 0xfe20, 0xfe2f; 0xfe33, 0xfe34; 0xfe4d, 0xfe4f; 0xfe70, 0xfe74; 0xfe76, 0xfefc; 0xff10, 0xff19; - 0xff21, 0xff3a; 0xff3f, 0xff3f; 0xff41, 0xff5a; 0xff66, 0xffbe; 0xffc2, 0xffc7; + 0xff21, 0xff3a; 0xff3f, 0xff3f; 0xff41, 0xff5a; 0xff65, 0xffbe; 0xffc2, 0xffc7; 0xffca, 0xffcf; 0xffd2, 0xffd7; 0xffda, 0xffdc; 0x10000, 0x1000b; 0x1000d, 0x10026; 0x10028, 0x1003a; 0x1003c, 0x1003d; 0x1003f, 0x1004d; 0x10050, 0x1005d; 0x10080, 0x100fa; 0x10140, 0x10174; 0x101fd, 0x101fd; 0x10280, 0x1029c; 0x102a0, 0x102d0; 0x102e0, 0x102e0; @@ -1171,15 +1195,16 @@ module Properties = struct 0x103c8, 0x103cf; 0x103d1, 0x103d5; 0x10400, 0x1049d; 0x104a0, 0x104a9; 0x104b0, 0x104d3; 0x104d8, 0x104fb; 0x10500, 0x10527; 0x10530, 0x10563; 0x10570, 0x1057a; 0x1057c, 0x1058a; 0x1058c, 0x10592; 0x10594, 0x10595; 0x10597, 0x105a1; 0x105a3, 0x105b1; 0x105b3, 0x105b9; - 0x105bb, 0x105bc; 0x10600, 0x10736; 0x10740, 0x10755; 0x10760, 0x10767; 0x10780, 0x10785; - 0x10787, 0x107b0; 0x107b2, 0x107ba; 0x10800, 0x10805; 0x10808, 0x10808; 0x1080a, 0x10835; - 0x10837, 0x10838; 0x1083c, 0x1083c; 0x1083f, 0x10855; 0x10860, 0x10876; 0x10880, 0x1089e; - 0x108e0, 0x108f2; 0x108f4, 0x108f5; 0x10900, 0x10915; 0x10920, 0x10939; 0x10980, 0x109b7; - 0x109be, 0x109bf; 0x10a00, 0x10a03; 0x10a05, 0x10a06; 0x10a0c, 0x10a13; 0x10a15, 0x10a17; - 0x10a19, 0x10a35; 0x10a38, 0x10a3a; 0x10a3f, 0x10a3f; 0x10a60, 0x10a7c; 0x10a80, 0x10a9c; - 0x10ac0, 0x10ac7; 0x10ac9, 0x10ae6; 0x10b00, 0x10b35; 0x10b40, 0x10b55; 0x10b60, 0x10b72; - 0x10b80, 0x10b91; 0x10c00, 0x10c48; 0x10c80, 0x10cb2; 0x10cc0, 0x10cf2; 0x10d00, 0x10d27; - 0x10d30, 0x10d39; 0x10e80, 0x10ea9; 0x10eab, 0x10eac; 0x10eb0, 0x10eb1; 0x10efd, 0x10f1c; + 0x105bb, 0x105bc; 0x105c0, 0x105f3; 0x10600, 0x10736; 0x10740, 0x10755; 0x10760, 0x10767; + 0x10780, 0x10785; 0x10787, 0x107b0; 0x107b2, 0x107ba; 0x10800, 0x10805; 0x10808, 0x10808; + 0x1080a, 0x10835; 0x10837, 0x10838; 0x1083c, 0x1083c; 0x1083f, 0x10855; 0x10860, 0x10876; + 0x10880, 0x1089e; 0x108e0, 0x108f2; 0x108f4, 0x108f5; 0x10900, 0x10915; 0x10920, 0x10939; + 0x10980, 0x109b7; 0x109be, 0x109bf; 0x10a00, 0x10a03; 0x10a05, 0x10a06; 0x10a0c, 0x10a13; + 0x10a15, 0x10a17; 0x10a19, 0x10a35; 0x10a38, 0x10a3a; 0x10a3f, 0x10a3f; 0x10a60, 0x10a7c; + 0x10a80, 0x10a9c; 0x10ac0, 0x10ac7; 0x10ac9, 0x10ae6; 0x10b00, 0x10b35; 0x10b40, 0x10b55; + 0x10b60, 0x10b72; 0x10b80, 0x10b91; 0x10c00, 0x10c48; 0x10c80, 0x10cb2; 0x10cc0, 0x10cf2; + 0x10d00, 0x10d27; 0x10d30, 0x10d39; 0x10d40, 0x10d65; 0x10d69, 0x10d6d; 0x10d6f, 0x10d85; + 0x10e80, 0x10ea9; 0x10eab, 0x10eac; 0x10eb0, 0x10eb1; 0x10ec2, 0x10ec4; 0x10efc, 0x10f1c; 0x10f27, 0x10f27; 0x10f30, 0x10f50; 0x10f70, 0x10f85; 0x10fb0, 0x10fc4; 0x10fe0, 0x10ff6; 0x11000, 0x11046; 0x11066, 0x11075; 0x1107f, 0x110ba; 0x110c2, 0x110c2; 0x110d0, 0x110e8; 0x110f0, 0x110f9; 0x11100, 0x11134; 0x11136, 0x1113f; 0x11144, 0x11147; 0x11150, 0x11173; @@ -1189,49 +1214,53 @@ module Properties = struct 0x11300, 0x11303; 0x11305, 0x1130c; 0x1130f, 0x11310; 0x11313, 0x11328; 0x1132a, 0x11330; 0x11332, 0x11333; 0x11335, 0x11339; 0x1133b, 0x11344; 0x11347, 0x11348; 0x1134b, 0x1134d; 0x11350, 0x11350; 0x11357, 0x11357; 0x1135d, 0x11363; 0x11366, 0x1136c; 0x11370, 0x11374; + 0x11380, 0x11389; 0x1138b, 0x1138b; 0x1138e, 0x1138e; 0x11390, 0x113b5; 0x113b7, 0x113c0; + 0x113c2, 0x113c2; 0x113c5, 0x113c5; 0x113c7, 0x113ca; 0x113cc, 0x113d3; 0x113e1, 0x113e2; 0x11400, 0x1144a; 0x11450, 0x11459; 0x1145e, 0x11461; 0x11480, 0x114c5; 0x114c7, 0x114c7; 0x114d0, 0x114d9; 0x11580, 0x115b5; 0x115b8, 0x115c0; 0x115d8, 0x115dd; 0x11600, 0x11640; - 0x11644, 0x11644; 0x11650, 0x11659; 0x11680, 0x116b8; 0x116c0, 0x116c9; 0x11700, 0x1171a; - 0x1171d, 0x1172b; 0x11730, 0x11739; 0x11740, 0x11746; 0x11800, 0x1183a; 0x118a0, 0x118e9; - 0x118ff, 0x11906; 0x11909, 0x11909; 0x1190c, 0x11913; 0x11915, 0x11916; 0x11918, 0x11935; - 0x11937, 0x11938; 0x1193b, 0x11943; 0x11950, 0x11959; 0x119a0, 0x119a7; 0x119aa, 0x119d7; - 0x119da, 0x119e1; 0x119e3, 0x119e4; 0x11a00, 0x11a3e; 0x11a47, 0x11a47; 0x11a50, 0x11a99; - 0x11a9d, 0x11a9d; 0x11ab0, 0x11af8; 0x11c00, 0x11c08; 0x11c0a, 0x11c36; 0x11c38, 0x11c40; - 0x11c50, 0x11c59; 0x11c72, 0x11c8f; 0x11c92, 0x11ca7; 0x11ca9, 0x11cb6; 0x11d00, 0x11d06; - 0x11d08, 0x11d09; 0x11d0b, 0x11d36; 0x11d3a, 0x11d3a; 0x11d3c, 0x11d3d; 0x11d3f, 0x11d47; - 0x11d50, 0x11d59; 0x11d60, 0x11d65; 0x11d67, 0x11d68; 0x11d6a, 0x11d8e; 0x11d90, 0x11d91; - 0x11d93, 0x11d98; 0x11da0, 0x11da9; 0x11ee0, 0x11ef6; 0x11f00, 0x11f10; 0x11f12, 0x11f3a; - 0x11f3e, 0x11f42; 0x11f50, 0x11f59; 0x11fb0, 0x11fb0; 0x12000, 0x12399; 0x12400, 0x1246e; - 0x12480, 0x12543; 0x12f90, 0x12ff0; 0x13000, 0x1342f; 0x13440, 0x13455; 0x14400, 0x14646; + 0x11644, 0x11644; 0x11650, 0x11659; 0x11680, 0x116b8; 0x116c0, 0x116c9; 0x116d0, 0x116e3; + 0x11700, 0x1171a; 0x1171d, 0x1172b; 0x11730, 0x11739; 0x11740, 0x11746; 0x11800, 0x1183a; + 0x118a0, 0x118e9; 0x118ff, 0x11906; 0x11909, 0x11909; 0x1190c, 0x11913; 0x11915, 0x11916; + 0x11918, 0x11935; 0x11937, 0x11938; 0x1193b, 0x11943; 0x11950, 0x11959; 0x119a0, 0x119a7; + 0x119aa, 0x119d7; 0x119da, 0x119e1; 0x119e3, 0x119e4; 0x11a00, 0x11a3e; 0x11a47, 0x11a47; + 0x11a50, 0x11a99; 0x11a9d, 0x11a9d; 0x11ab0, 0x11af8; 0x11bc0, 0x11be0; 0x11bf0, 0x11bf9; + 0x11c00, 0x11c08; 0x11c0a, 0x11c36; 0x11c38, 0x11c40; 0x11c50, 0x11c59; 0x11c72, 0x11c8f; + 0x11c92, 0x11ca7; 0x11ca9, 0x11cb6; 0x11d00, 0x11d06; 0x11d08, 0x11d09; 0x11d0b, 0x11d36; + 0x11d3a, 0x11d3a; 0x11d3c, 0x11d3d; 0x11d3f, 0x11d47; 0x11d50, 0x11d59; 0x11d60, 0x11d65; + 0x11d67, 0x11d68; 0x11d6a, 0x11d8e; 0x11d90, 0x11d91; 0x11d93, 0x11d98; 0x11da0, 0x11da9; + 0x11ee0, 0x11ef6; 0x11f00, 0x11f10; 0x11f12, 0x11f3a; 0x11f3e, 0x11f42; 0x11f50, 0x11f5a; + 0x11fb0, 0x11fb0; 0x12000, 0x12399; 0x12400, 0x1246e; 0x12480, 0x12543; 0x12f90, 0x12ff0; + 0x13000, 0x1342f; 0x13440, 0x13455; 0x13460, 0x143fa; 0x14400, 0x14646; 0x16100, 0x16139; 0x16800, 0x16a38; 0x16a40, 0x16a5e; 0x16a60, 0x16a69; 0x16a70, 0x16abe; 0x16ac0, 0x16ac9; 0x16ad0, 0x16aed; 0x16af0, 0x16af4; 0x16b00, 0x16b36; 0x16b40, 0x16b43; 0x16b50, 0x16b59; - 0x16b63, 0x16b77; 0x16b7d, 0x16b8f; 0x16e40, 0x16e7f; 0x16f00, 0x16f4a; 0x16f4f, 0x16f87; - 0x16f8f, 0x16f9f; 0x16fe0, 0x16fe1; 0x16fe3, 0x16fe4; 0x16ff0, 0x16ff1; 0x17000, 0x187f7; - 0x18800, 0x18cd5; 0x18d00, 0x18d08; 0x1aff0, 0x1aff3; 0x1aff5, 0x1affb; 0x1affd, 0x1affe; - 0x1b000, 0x1b122; 0x1b132, 0x1b132; 0x1b150, 0x1b152; 0x1b155, 0x1b155; 0x1b164, 0x1b167; - 0x1b170, 0x1b2fb; 0x1bc00, 0x1bc6a; 0x1bc70, 0x1bc7c; 0x1bc80, 0x1bc88; 0x1bc90, 0x1bc99; - 0x1bc9d, 0x1bc9e; 0x1cf00, 0x1cf2d; 0x1cf30, 0x1cf46; 0x1d165, 0x1d169; 0x1d16d, 0x1d172; - 0x1d17b, 0x1d182; 0x1d185, 0x1d18b; 0x1d1aa, 0x1d1ad; 0x1d242, 0x1d244; 0x1d400, 0x1d454; - 0x1d456, 0x1d49c; 0x1d49e, 0x1d49f; 0x1d4a2, 0x1d4a2; 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; - 0x1d4ae, 0x1d4b9; 0x1d4bb, 0x1d4bb; 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d505; 0x1d507, 0x1d50a; - 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; 0x1d51e, 0x1d539; 0x1d53b, 0x1d53e; 0x1d540, 0x1d544; - 0x1d546, 0x1d546; 0x1d54a, 0x1d550; 0x1d552, 0x1d6a5; 0x1d6a8, 0x1d6c0; 0x1d6c2, 0x1d6da; - 0x1d6dc, 0x1d6fa; 0x1d6fc, 0x1d714; 0x1d716, 0x1d734; 0x1d736, 0x1d74e; 0x1d750, 0x1d76e; - 0x1d770, 0x1d788; 0x1d78a, 0x1d7a8; 0x1d7aa, 0x1d7c2; 0x1d7c4, 0x1d7cb; 0x1d7ce, 0x1d7ff; - 0x1da00, 0x1da36; 0x1da3b, 0x1da6c; 0x1da75, 0x1da75; 0x1da84, 0x1da84; 0x1da9b, 0x1da9f; - 0x1daa1, 0x1daaf; 0x1df00, 0x1df1e; 0x1df25, 0x1df2a; 0x1e000, 0x1e006; 0x1e008, 0x1e018; - 0x1e01b, 0x1e021; 0x1e023, 0x1e024; 0x1e026, 0x1e02a; 0x1e030, 0x1e06d; 0x1e08f, 0x1e08f; - 0x1e100, 0x1e12c; 0x1e130, 0x1e13d; 0x1e140, 0x1e149; 0x1e14e, 0x1e14e; 0x1e290, 0x1e2ae; - 0x1e2c0, 0x1e2f9; 0x1e4d0, 0x1e4f9; 0x1e7e0, 0x1e7e6; 0x1e7e8, 0x1e7eb; 0x1e7ed, 0x1e7ee; - 0x1e7f0, 0x1e7fe; 0x1e800, 0x1e8c4; 0x1e8d0, 0x1e8d6; 0x1e900, 0x1e94b; 0x1e950, 0x1e959; - 0x1ee00, 0x1ee03; 0x1ee05, 0x1ee1f; 0x1ee21, 0x1ee22; 0x1ee24, 0x1ee24; 0x1ee27, 0x1ee27; - 0x1ee29, 0x1ee32; 0x1ee34, 0x1ee37; 0x1ee39, 0x1ee39; 0x1ee3b, 0x1ee3b; 0x1ee42, 0x1ee42; - 0x1ee47, 0x1ee47; 0x1ee49, 0x1ee49; 0x1ee4b, 0x1ee4b; 0x1ee4d, 0x1ee4f; 0x1ee51, 0x1ee52; - 0x1ee54, 0x1ee54; 0x1ee57, 0x1ee57; 0x1ee59, 0x1ee59; 0x1ee5b, 0x1ee5b; 0x1ee5d, 0x1ee5d; - 0x1ee5f, 0x1ee5f; 0x1ee61, 0x1ee62; 0x1ee64, 0x1ee64; 0x1ee67, 0x1ee6a; 0x1ee6c, 0x1ee72; - 0x1ee74, 0x1ee77; 0x1ee79, 0x1ee7c; 0x1ee7e, 0x1ee7e; 0x1ee80, 0x1ee89; 0x1ee8b, 0x1ee9b; - 0x1eea1, 0x1eea3; 0x1eea5, 0x1eea9; 0x1eeab, 0x1eebb; 0x1fbf0, 0x1fbf9; 0x20000, 0x2a6df; - 0x2a700, 0x2b739; 0x2b740, 0x2b81d; 0x2b820, 0x2cea1; 0x2ceb0, 0x2ebe0; 0x2f800, 0x2fa1d; + 0x16b63, 0x16b77; 0x16b7d, 0x16b8f; 0x16d40, 0x16d6c; 0x16d70, 0x16d79; 0x16e40, 0x16e7f; + 0x16f00, 0x16f4a; 0x16f4f, 0x16f87; 0x16f8f, 0x16f9f; 0x16fe0, 0x16fe1; 0x16fe3, 0x16fe4; + 0x16ff0, 0x16ff1; 0x17000, 0x187f7; 0x18800, 0x18cd5; 0x18cff, 0x18d08; 0x1aff0, 0x1aff3; + 0x1aff5, 0x1affb; 0x1affd, 0x1affe; 0x1b000, 0x1b122; 0x1b132, 0x1b132; 0x1b150, 0x1b152; + 0x1b155, 0x1b155; 0x1b164, 0x1b167; 0x1b170, 0x1b2fb; 0x1bc00, 0x1bc6a; 0x1bc70, 0x1bc7c; + 0x1bc80, 0x1bc88; 0x1bc90, 0x1bc99; 0x1bc9d, 0x1bc9e; 0x1ccf0, 0x1ccf9; 0x1cf00, 0x1cf2d; + 0x1cf30, 0x1cf46; 0x1d165, 0x1d169; 0x1d16d, 0x1d172; 0x1d17b, 0x1d182; 0x1d185, 0x1d18b; + 0x1d1aa, 0x1d1ad; 0x1d242, 0x1d244; 0x1d400, 0x1d454; 0x1d456, 0x1d49c; 0x1d49e, 0x1d49f; + 0x1d4a2, 0x1d4a2; 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; 0x1d4ae, 0x1d4b9; 0x1d4bb, 0x1d4bb; + 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d505; 0x1d507, 0x1d50a; 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; + 0x1d51e, 0x1d539; 0x1d53b, 0x1d53e; 0x1d540, 0x1d544; 0x1d546, 0x1d546; 0x1d54a, 0x1d550; + 0x1d552, 0x1d6a5; 0x1d6a8, 0x1d6c0; 0x1d6c2, 0x1d6da; 0x1d6dc, 0x1d6fa; 0x1d6fc, 0x1d714; + 0x1d716, 0x1d734; 0x1d736, 0x1d74e; 0x1d750, 0x1d76e; 0x1d770, 0x1d788; 0x1d78a, 0x1d7a8; + 0x1d7aa, 0x1d7c2; 0x1d7c4, 0x1d7cb; 0x1d7ce, 0x1d7ff; 0x1da00, 0x1da36; 0x1da3b, 0x1da6c; + 0x1da75, 0x1da75; 0x1da84, 0x1da84; 0x1da9b, 0x1da9f; 0x1daa1, 0x1daaf; 0x1df00, 0x1df1e; + 0x1df25, 0x1df2a; 0x1e000, 0x1e006; 0x1e008, 0x1e018; 0x1e01b, 0x1e021; 0x1e023, 0x1e024; + 0x1e026, 0x1e02a; 0x1e030, 0x1e06d; 0x1e08f, 0x1e08f; 0x1e100, 0x1e12c; 0x1e130, 0x1e13d; + 0x1e140, 0x1e149; 0x1e14e, 0x1e14e; 0x1e290, 0x1e2ae; 0x1e2c0, 0x1e2f9; 0x1e4d0, 0x1e4f9; + 0x1e5d0, 0x1e5fa; 0x1e7e0, 0x1e7e6; 0x1e7e8, 0x1e7eb; 0x1e7ed, 0x1e7ee; 0x1e7f0, 0x1e7fe; + 0x1e800, 0x1e8c4; 0x1e8d0, 0x1e8d6; 0x1e900, 0x1e94b; 0x1e950, 0x1e959; 0x1ee00, 0x1ee03; + 0x1ee05, 0x1ee1f; 0x1ee21, 0x1ee22; 0x1ee24, 0x1ee24; 0x1ee27, 0x1ee27; 0x1ee29, 0x1ee32; + 0x1ee34, 0x1ee37; 0x1ee39, 0x1ee39; 0x1ee3b, 0x1ee3b; 0x1ee42, 0x1ee42; 0x1ee47, 0x1ee47; + 0x1ee49, 0x1ee49; 0x1ee4b, 0x1ee4b; 0x1ee4d, 0x1ee4f; 0x1ee51, 0x1ee52; 0x1ee54, 0x1ee54; + 0x1ee57, 0x1ee57; 0x1ee59, 0x1ee59; 0x1ee5b, 0x1ee5b; 0x1ee5d, 0x1ee5d; 0x1ee5f, 0x1ee5f; + 0x1ee61, 0x1ee62; 0x1ee64, 0x1ee64; 0x1ee67, 0x1ee6a; 0x1ee6c, 0x1ee72; 0x1ee74, 0x1ee77; + 0x1ee79, 0x1ee7c; 0x1ee7e, 0x1ee7e; 0x1ee80, 0x1ee89; 0x1ee8b, 0x1ee9b; 0x1eea1, 0x1eea3; + 0x1eea5, 0x1eea9; 0x1eeab, 0x1eebb; 0x1fbf0, 0x1fbf9; 0x20000, 0x2a6df; 0x2a700, 0x2b739; + 0x2b740, 0x2b81d; 0x2b820, 0x2cea1; 0x2ceb0, 0x2ebe0; 0x2ebf0, 0x2ee5d; 0x2f800, 0x2fa1d; 0x30000, 0x3134a; 0x31350, 0x323af; 0xe0100, 0xe01ef] let id_start = Sedlex_utils.Cset.of_list @@ -1279,7 +1308,7 @@ module Properties = struct 0x1880, 0x18a8; 0x18aa, 0x18aa; 0x18b0, 0x18f5; 0x1900, 0x191e; 0x1950, 0x196d; 0x1970, 0x1974; 0x1980, 0x19ab; 0x19b0, 0x19c9; 0x1a00, 0x1a16; 0x1a20, 0x1a54; 0x1aa7, 0x1aa7; 0x1b05, 0x1b33; 0x1b45, 0x1b4c; 0x1b83, 0x1ba0; 0x1bae, 0x1baf; - 0x1bba, 0x1be5; 0x1c00, 0x1c23; 0x1c4d, 0x1c4f; 0x1c5a, 0x1c7d; 0x1c80, 0x1c88; + 0x1bba, 0x1be5; 0x1c00, 0x1c23; 0x1c4d, 0x1c4f; 0x1c5a, 0x1c7d; 0x1c80, 0x1c8a; 0x1c90, 0x1cba; 0x1cbd, 0x1cbf; 0x1ce9, 0x1cec; 0x1cee, 0x1cf3; 0x1cf5, 0x1cf6; 0x1cfa, 0x1cfa; 0x1d00, 0x1dbf; 0x1e00, 0x1f15; 0x1f18, 0x1f1d; 0x1f20, 0x1f45; 0x1f48, 0x1f4d; 0x1f50, 0x1f57; 0x1f59, 0x1f59; 0x1f5b, 0x1f5b; 0x1f5d, 0x1f5d; @@ -1296,8 +1325,8 @@ module Properties = struct 0x30a1, 0x30fa; 0x30fc, 0x30ff; 0x3105, 0x312f; 0x3131, 0x318e; 0x31a0, 0x31bf; 0x31f0, 0x31ff; 0x3400, 0x4dbf; 0x4e00, 0xa48c; 0xa4d0, 0xa4fd; 0xa500, 0xa60c; 0xa610, 0xa61f; 0xa62a, 0xa62b; 0xa640, 0xa66e; 0xa67f, 0xa69d; 0xa6a0, 0xa6ef; - 0xa717, 0xa71f; 0xa722, 0xa788; 0xa78b, 0xa7ca; 0xa7d0, 0xa7d1; 0xa7d3, 0xa7d3; - 0xa7d5, 0xa7d9; 0xa7f2, 0xa801; 0xa803, 0xa805; 0xa807, 0xa80a; 0xa80c, 0xa822; + 0xa717, 0xa71f; 0xa722, 0xa788; 0xa78b, 0xa7cd; 0xa7d0, 0xa7d1; 0xa7d3, 0xa7d3; + 0xa7d5, 0xa7dc; 0xa7f2, 0xa801; 0xa803, 0xa805; 0xa807, 0xa80a; 0xa80c, 0xa822; 0xa840, 0xa873; 0xa882, 0xa8b3; 0xa8f2, 0xa8f7; 0xa8fb, 0xa8fb; 0xa8fd, 0xa8fe; 0xa90a, 0xa925; 0xa930, 0xa946; 0xa960, 0xa97c; 0xa984, 0xa9b2; 0xa9cf, 0xa9cf; 0xa9e0, 0xa9e4; 0xa9e6, 0xa9ef; 0xa9fa, 0xa9fe; 0xaa00, 0xaa28; 0xaa40, 0xaa42; @@ -1316,37 +1345,40 @@ module Properties = struct 0x10350, 0x10375; 0x10380, 0x1039d; 0x103a0, 0x103c3; 0x103c8, 0x103cf; 0x103d1, 0x103d5; 0x10400, 0x1049d; 0x104b0, 0x104d3; 0x104d8, 0x104fb; 0x10500, 0x10527; 0x10530, 0x10563; 0x10570, 0x1057a; 0x1057c, 0x1058a; 0x1058c, 0x10592; 0x10594, 0x10595; 0x10597, 0x105a1; - 0x105a3, 0x105b1; 0x105b3, 0x105b9; 0x105bb, 0x105bc; 0x10600, 0x10736; 0x10740, 0x10755; - 0x10760, 0x10767; 0x10780, 0x10785; 0x10787, 0x107b0; 0x107b2, 0x107ba; 0x10800, 0x10805; - 0x10808, 0x10808; 0x1080a, 0x10835; 0x10837, 0x10838; 0x1083c, 0x1083c; 0x1083f, 0x10855; - 0x10860, 0x10876; 0x10880, 0x1089e; 0x108e0, 0x108f2; 0x108f4, 0x108f5; 0x10900, 0x10915; - 0x10920, 0x10939; 0x10980, 0x109b7; 0x109be, 0x109bf; 0x10a00, 0x10a00; 0x10a10, 0x10a13; - 0x10a15, 0x10a17; 0x10a19, 0x10a35; 0x10a60, 0x10a7c; 0x10a80, 0x10a9c; 0x10ac0, 0x10ac7; - 0x10ac9, 0x10ae4; 0x10b00, 0x10b35; 0x10b40, 0x10b55; 0x10b60, 0x10b72; 0x10b80, 0x10b91; - 0x10c00, 0x10c48; 0x10c80, 0x10cb2; 0x10cc0, 0x10cf2; 0x10d00, 0x10d23; 0x10e80, 0x10ea9; - 0x10eb0, 0x10eb1; 0x10f00, 0x10f1c; 0x10f27, 0x10f27; 0x10f30, 0x10f45; 0x10f70, 0x10f81; - 0x10fb0, 0x10fc4; 0x10fe0, 0x10ff6; 0x11003, 0x11037; 0x11071, 0x11072; 0x11075, 0x11075; - 0x11083, 0x110af; 0x110d0, 0x110e8; 0x11103, 0x11126; 0x11144, 0x11144; 0x11147, 0x11147; - 0x11150, 0x11172; 0x11176, 0x11176; 0x11183, 0x111b2; 0x111c1, 0x111c4; 0x111da, 0x111da; - 0x111dc, 0x111dc; 0x11200, 0x11211; 0x11213, 0x1122b; 0x1123f, 0x11240; 0x11280, 0x11286; - 0x11288, 0x11288; 0x1128a, 0x1128d; 0x1128f, 0x1129d; 0x1129f, 0x112a8; 0x112b0, 0x112de; - 0x11305, 0x1130c; 0x1130f, 0x11310; 0x11313, 0x11328; 0x1132a, 0x11330; 0x11332, 0x11333; - 0x11335, 0x11339; 0x1133d, 0x1133d; 0x11350, 0x11350; 0x1135d, 0x11361; 0x11400, 0x11434; - 0x11447, 0x1144a; 0x1145f, 0x11461; 0x11480, 0x114af; 0x114c4, 0x114c5; 0x114c7, 0x114c7; - 0x11580, 0x115ae; 0x115d8, 0x115db; 0x11600, 0x1162f; 0x11644, 0x11644; 0x11680, 0x116aa; - 0x116b8, 0x116b8; 0x11700, 0x1171a; 0x11740, 0x11746; 0x11800, 0x1182b; 0x118a0, 0x118df; - 0x118ff, 0x11906; 0x11909, 0x11909; 0x1190c, 0x11913; 0x11915, 0x11916; 0x11918, 0x1192f; - 0x1193f, 0x1193f; 0x11941, 0x11941; 0x119a0, 0x119a7; 0x119aa, 0x119d0; 0x119e1, 0x119e1; - 0x119e3, 0x119e3; 0x11a00, 0x11a00; 0x11a0b, 0x11a32; 0x11a3a, 0x11a3a; 0x11a50, 0x11a50; - 0x11a5c, 0x11a89; 0x11a9d, 0x11a9d; 0x11ab0, 0x11af8; 0x11c00, 0x11c08; 0x11c0a, 0x11c2e; - 0x11c40, 0x11c40; 0x11c72, 0x11c8f; 0x11d00, 0x11d06; 0x11d08, 0x11d09; 0x11d0b, 0x11d30; - 0x11d46, 0x11d46; 0x11d60, 0x11d65; 0x11d67, 0x11d68; 0x11d6a, 0x11d89; 0x11d98, 0x11d98; - 0x11ee0, 0x11ef2; 0x11f02, 0x11f02; 0x11f04, 0x11f10; 0x11f12, 0x11f33; 0x11fb0, 0x11fb0; - 0x12000, 0x12399; 0x12400, 0x1246e; 0x12480, 0x12543; 0x12f90, 0x12ff0; 0x13000, 0x1342f; - 0x13441, 0x13446; 0x14400, 0x14646; 0x16800, 0x16a38; 0x16a40, 0x16a5e; 0x16a70, 0x16abe; - 0x16ad0, 0x16aed; 0x16b00, 0x16b2f; 0x16b40, 0x16b43; 0x16b63, 0x16b77; 0x16b7d, 0x16b8f; + 0x105a3, 0x105b1; 0x105b3, 0x105b9; 0x105bb, 0x105bc; 0x105c0, 0x105f3; 0x10600, 0x10736; + 0x10740, 0x10755; 0x10760, 0x10767; 0x10780, 0x10785; 0x10787, 0x107b0; 0x107b2, 0x107ba; + 0x10800, 0x10805; 0x10808, 0x10808; 0x1080a, 0x10835; 0x10837, 0x10838; 0x1083c, 0x1083c; + 0x1083f, 0x10855; 0x10860, 0x10876; 0x10880, 0x1089e; 0x108e0, 0x108f2; 0x108f4, 0x108f5; + 0x10900, 0x10915; 0x10920, 0x10939; 0x10980, 0x109b7; 0x109be, 0x109bf; 0x10a00, 0x10a00; + 0x10a10, 0x10a13; 0x10a15, 0x10a17; 0x10a19, 0x10a35; 0x10a60, 0x10a7c; 0x10a80, 0x10a9c; + 0x10ac0, 0x10ac7; 0x10ac9, 0x10ae4; 0x10b00, 0x10b35; 0x10b40, 0x10b55; 0x10b60, 0x10b72; + 0x10b80, 0x10b91; 0x10c00, 0x10c48; 0x10c80, 0x10cb2; 0x10cc0, 0x10cf2; 0x10d00, 0x10d23; + 0x10d4a, 0x10d65; 0x10d6f, 0x10d85; 0x10e80, 0x10ea9; 0x10eb0, 0x10eb1; 0x10ec2, 0x10ec4; + 0x10f00, 0x10f1c; 0x10f27, 0x10f27; 0x10f30, 0x10f45; 0x10f70, 0x10f81; 0x10fb0, 0x10fc4; + 0x10fe0, 0x10ff6; 0x11003, 0x11037; 0x11071, 0x11072; 0x11075, 0x11075; 0x11083, 0x110af; + 0x110d0, 0x110e8; 0x11103, 0x11126; 0x11144, 0x11144; 0x11147, 0x11147; 0x11150, 0x11172; + 0x11176, 0x11176; 0x11183, 0x111b2; 0x111c1, 0x111c4; 0x111da, 0x111da; 0x111dc, 0x111dc; + 0x11200, 0x11211; 0x11213, 0x1122b; 0x1123f, 0x11240; 0x11280, 0x11286; 0x11288, 0x11288; + 0x1128a, 0x1128d; 0x1128f, 0x1129d; 0x1129f, 0x112a8; 0x112b0, 0x112de; 0x11305, 0x1130c; + 0x1130f, 0x11310; 0x11313, 0x11328; 0x1132a, 0x11330; 0x11332, 0x11333; 0x11335, 0x11339; + 0x1133d, 0x1133d; 0x11350, 0x11350; 0x1135d, 0x11361; 0x11380, 0x11389; 0x1138b, 0x1138b; + 0x1138e, 0x1138e; 0x11390, 0x113b5; 0x113b7, 0x113b7; 0x113d1, 0x113d1; 0x113d3, 0x113d3; + 0x11400, 0x11434; 0x11447, 0x1144a; 0x1145f, 0x11461; 0x11480, 0x114af; 0x114c4, 0x114c5; + 0x114c7, 0x114c7; 0x11580, 0x115ae; 0x115d8, 0x115db; 0x11600, 0x1162f; 0x11644, 0x11644; + 0x11680, 0x116aa; 0x116b8, 0x116b8; 0x11700, 0x1171a; 0x11740, 0x11746; 0x11800, 0x1182b; + 0x118a0, 0x118df; 0x118ff, 0x11906; 0x11909, 0x11909; 0x1190c, 0x11913; 0x11915, 0x11916; + 0x11918, 0x1192f; 0x1193f, 0x1193f; 0x11941, 0x11941; 0x119a0, 0x119a7; 0x119aa, 0x119d0; + 0x119e1, 0x119e1; 0x119e3, 0x119e3; 0x11a00, 0x11a00; 0x11a0b, 0x11a32; 0x11a3a, 0x11a3a; + 0x11a50, 0x11a50; 0x11a5c, 0x11a89; 0x11a9d, 0x11a9d; 0x11ab0, 0x11af8; 0x11bc0, 0x11be0; + 0x11c00, 0x11c08; 0x11c0a, 0x11c2e; 0x11c40, 0x11c40; 0x11c72, 0x11c8f; 0x11d00, 0x11d06; + 0x11d08, 0x11d09; 0x11d0b, 0x11d30; 0x11d46, 0x11d46; 0x11d60, 0x11d65; 0x11d67, 0x11d68; + 0x11d6a, 0x11d89; 0x11d98, 0x11d98; 0x11ee0, 0x11ef2; 0x11f02, 0x11f02; 0x11f04, 0x11f10; + 0x11f12, 0x11f33; 0x11fb0, 0x11fb0; 0x12000, 0x12399; 0x12400, 0x1246e; 0x12480, 0x12543; + 0x12f90, 0x12ff0; 0x13000, 0x1342f; 0x13441, 0x13446; 0x13460, 0x143fa; 0x14400, 0x14646; + 0x16100, 0x1611d; 0x16800, 0x16a38; 0x16a40, 0x16a5e; 0x16a70, 0x16abe; 0x16ad0, 0x16aed; + 0x16b00, 0x16b2f; 0x16b40, 0x16b43; 0x16b63, 0x16b77; 0x16b7d, 0x16b8f; 0x16d40, 0x16d6c; 0x16e40, 0x16e7f; 0x16f00, 0x16f4a; 0x16f50, 0x16f50; 0x16f93, 0x16f9f; 0x16fe0, 0x16fe1; - 0x16fe3, 0x16fe3; 0x17000, 0x187f7; 0x18800, 0x18cd5; 0x18d00, 0x18d08; 0x1aff0, 0x1aff3; + 0x16fe3, 0x16fe3; 0x17000, 0x187f7; 0x18800, 0x18cd5; 0x18cff, 0x18d08; 0x1aff0, 0x1aff3; 0x1aff5, 0x1affb; 0x1affd, 0x1affe; 0x1b000, 0x1b122; 0x1b132, 0x1b132; 0x1b150, 0x1b152; 0x1b155, 0x1b155; 0x1b164, 0x1b167; 0x1b170, 0x1b2fb; 0x1bc00, 0x1bc6a; 0x1bc70, 0x1bc7c; 0x1bc80, 0x1bc88; 0x1bc90, 0x1bc99; 0x1d400, 0x1d454; 0x1d456, 0x1d49c; 0x1d49e, 0x1d49f; @@ -1357,16 +1389,17 @@ module Properties = struct 0x1d716, 0x1d734; 0x1d736, 0x1d74e; 0x1d750, 0x1d76e; 0x1d770, 0x1d788; 0x1d78a, 0x1d7a8; 0x1d7aa, 0x1d7c2; 0x1d7c4, 0x1d7cb; 0x1df00, 0x1df1e; 0x1df25, 0x1df2a; 0x1e030, 0x1e06d; 0x1e100, 0x1e12c; 0x1e137, 0x1e13d; 0x1e14e, 0x1e14e; 0x1e290, 0x1e2ad; 0x1e2c0, 0x1e2eb; - 0x1e4d0, 0x1e4eb; 0x1e7e0, 0x1e7e6; 0x1e7e8, 0x1e7eb; 0x1e7ed, 0x1e7ee; 0x1e7f0, 0x1e7fe; - 0x1e800, 0x1e8c4; 0x1e900, 0x1e943; 0x1e94b, 0x1e94b; 0x1ee00, 0x1ee03; 0x1ee05, 0x1ee1f; - 0x1ee21, 0x1ee22; 0x1ee24, 0x1ee24; 0x1ee27, 0x1ee27; 0x1ee29, 0x1ee32; 0x1ee34, 0x1ee37; - 0x1ee39, 0x1ee39; 0x1ee3b, 0x1ee3b; 0x1ee42, 0x1ee42; 0x1ee47, 0x1ee47; 0x1ee49, 0x1ee49; - 0x1ee4b, 0x1ee4b; 0x1ee4d, 0x1ee4f; 0x1ee51, 0x1ee52; 0x1ee54, 0x1ee54; 0x1ee57, 0x1ee57; - 0x1ee59, 0x1ee59; 0x1ee5b, 0x1ee5b; 0x1ee5d, 0x1ee5d; 0x1ee5f, 0x1ee5f; 0x1ee61, 0x1ee62; - 0x1ee64, 0x1ee64; 0x1ee67, 0x1ee6a; 0x1ee6c, 0x1ee72; 0x1ee74, 0x1ee77; 0x1ee79, 0x1ee7c; - 0x1ee7e, 0x1ee7e; 0x1ee80, 0x1ee89; 0x1ee8b, 0x1ee9b; 0x1eea1, 0x1eea3; 0x1eea5, 0x1eea9; - 0x1eeab, 0x1eebb; 0x20000, 0x2a6df; 0x2a700, 0x2b739; 0x2b740, 0x2b81d; 0x2b820, 0x2cea1; - 0x2ceb0, 0x2ebe0; 0x2f800, 0x2fa1d; 0x30000, 0x3134a; 0x31350, 0x323af] + 0x1e4d0, 0x1e4eb; 0x1e5d0, 0x1e5ed; 0x1e5f0, 0x1e5f0; 0x1e7e0, 0x1e7e6; 0x1e7e8, 0x1e7eb; + 0x1e7ed, 0x1e7ee; 0x1e7f0, 0x1e7fe; 0x1e800, 0x1e8c4; 0x1e900, 0x1e943; 0x1e94b, 0x1e94b; + 0x1ee00, 0x1ee03; 0x1ee05, 0x1ee1f; 0x1ee21, 0x1ee22; 0x1ee24, 0x1ee24; 0x1ee27, 0x1ee27; + 0x1ee29, 0x1ee32; 0x1ee34, 0x1ee37; 0x1ee39, 0x1ee39; 0x1ee3b, 0x1ee3b; 0x1ee42, 0x1ee42; + 0x1ee47, 0x1ee47; 0x1ee49, 0x1ee49; 0x1ee4b, 0x1ee4b; 0x1ee4d, 0x1ee4f; 0x1ee51, 0x1ee52; + 0x1ee54, 0x1ee54; 0x1ee57, 0x1ee57; 0x1ee59, 0x1ee59; 0x1ee5b, 0x1ee5b; 0x1ee5d, 0x1ee5d; + 0x1ee5f, 0x1ee5f; 0x1ee61, 0x1ee62; 0x1ee64, 0x1ee64; 0x1ee67, 0x1ee6a; 0x1ee6c, 0x1ee72; + 0x1ee74, 0x1ee77; 0x1ee79, 0x1ee7c; 0x1ee7e, 0x1ee7e; 0x1ee80, 0x1ee89; 0x1ee8b, 0x1ee9b; + 0x1eea1, 0x1eea3; 0x1eea5, 0x1eea9; 0x1eeab, 0x1eebb; 0x20000, 0x2a6df; 0x2a700, 0x2b739; + 0x2b740, 0x2b81d; 0x2b820, 0x2cea1; 0x2ceb0, 0x2ebe0; 0x2ebf0, 0x2ee5d; 0x2f800, 0x2fa1d; + 0x30000, 0x3134a; 0x31350, 0x323af] let lowercase = Sedlex_utils.Cset.of_list [0x61, 0x7a; 0xaa, 0xaa; 0xb5, 0xb5; 0xba, 0xba; 0xdf, 0xf6; @@ -1424,86 +1457,86 @@ module Properties = struct 0x515, 0x515; 0x517, 0x517; 0x519, 0x519; 0x51b, 0x51b; 0x51d, 0x51d; 0x51f, 0x51f; 0x521, 0x521; 0x523, 0x523; 0x525, 0x525; 0x527, 0x527; 0x529, 0x529; 0x52b, 0x52b; 0x52d, 0x52d; 0x52f, 0x52f; 0x560, 0x588; - 0x10d0, 0x10fa; 0x10fc, 0x10ff; 0x13f8, 0x13fd; 0x1c80, 0x1c88; 0x1d00, 0x1dbf; - 0x1e01, 0x1e01; 0x1e03, 0x1e03; 0x1e05, 0x1e05; 0x1e07, 0x1e07; 0x1e09, 0x1e09; - 0x1e0b, 0x1e0b; 0x1e0d, 0x1e0d; 0x1e0f, 0x1e0f; 0x1e11, 0x1e11; 0x1e13, 0x1e13; - 0x1e15, 0x1e15; 0x1e17, 0x1e17; 0x1e19, 0x1e19; 0x1e1b, 0x1e1b; 0x1e1d, 0x1e1d; - 0x1e1f, 0x1e1f; 0x1e21, 0x1e21; 0x1e23, 0x1e23; 0x1e25, 0x1e25; 0x1e27, 0x1e27; - 0x1e29, 0x1e29; 0x1e2b, 0x1e2b; 0x1e2d, 0x1e2d; 0x1e2f, 0x1e2f; 0x1e31, 0x1e31; - 0x1e33, 0x1e33; 0x1e35, 0x1e35; 0x1e37, 0x1e37; 0x1e39, 0x1e39; 0x1e3b, 0x1e3b; - 0x1e3d, 0x1e3d; 0x1e3f, 0x1e3f; 0x1e41, 0x1e41; 0x1e43, 0x1e43; 0x1e45, 0x1e45; - 0x1e47, 0x1e47; 0x1e49, 0x1e49; 0x1e4b, 0x1e4b; 0x1e4d, 0x1e4d; 0x1e4f, 0x1e4f; - 0x1e51, 0x1e51; 0x1e53, 0x1e53; 0x1e55, 0x1e55; 0x1e57, 0x1e57; 0x1e59, 0x1e59; - 0x1e5b, 0x1e5b; 0x1e5d, 0x1e5d; 0x1e5f, 0x1e5f; 0x1e61, 0x1e61; 0x1e63, 0x1e63; - 0x1e65, 0x1e65; 0x1e67, 0x1e67; 0x1e69, 0x1e69; 0x1e6b, 0x1e6b; 0x1e6d, 0x1e6d; - 0x1e6f, 0x1e6f; 0x1e71, 0x1e71; 0x1e73, 0x1e73; 0x1e75, 0x1e75; 0x1e77, 0x1e77; - 0x1e79, 0x1e79; 0x1e7b, 0x1e7b; 0x1e7d, 0x1e7d; 0x1e7f, 0x1e7f; 0x1e81, 0x1e81; - 0x1e83, 0x1e83; 0x1e85, 0x1e85; 0x1e87, 0x1e87; 0x1e89, 0x1e89; 0x1e8b, 0x1e8b; - 0x1e8d, 0x1e8d; 0x1e8f, 0x1e8f; 0x1e91, 0x1e91; 0x1e93, 0x1e93; 0x1e95, 0x1e9d; - 0x1e9f, 0x1e9f; 0x1ea1, 0x1ea1; 0x1ea3, 0x1ea3; 0x1ea5, 0x1ea5; 0x1ea7, 0x1ea7; - 0x1ea9, 0x1ea9; 0x1eab, 0x1eab; 0x1ead, 0x1ead; 0x1eaf, 0x1eaf; 0x1eb1, 0x1eb1; - 0x1eb3, 0x1eb3; 0x1eb5, 0x1eb5; 0x1eb7, 0x1eb7; 0x1eb9, 0x1eb9; 0x1ebb, 0x1ebb; - 0x1ebd, 0x1ebd; 0x1ebf, 0x1ebf; 0x1ec1, 0x1ec1; 0x1ec3, 0x1ec3; 0x1ec5, 0x1ec5; - 0x1ec7, 0x1ec7; 0x1ec9, 0x1ec9; 0x1ecb, 0x1ecb; 0x1ecd, 0x1ecd; 0x1ecf, 0x1ecf; - 0x1ed1, 0x1ed1; 0x1ed3, 0x1ed3; 0x1ed5, 0x1ed5; 0x1ed7, 0x1ed7; 0x1ed9, 0x1ed9; - 0x1edb, 0x1edb; 0x1edd, 0x1edd; 0x1edf, 0x1edf; 0x1ee1, 0x1ee1; 0x1ee3, 0x1ee3; - 0x1ee5, 0x1ee5; 0x1ee7, 0x1ee7; 0x1ee9, 0x1ee9; 0x1eeb, 0x1eeb; 0x1eed, 0x1eed; - 0x1eef, 0x1eef; 0x1ef1, 0x1ef1; 0x1ef3, 0x1ef3; 0x1ef5, 0x1ef5; 0x1ef7, 0x1ef7; - 0x1ef9, 0x1ef9; 0x1efb, 0x1efb; 0x1efd, 0x1efd; 0x1eff, 0x1f07; 0x1f10, 0x1f15; - 0x1f20, 0x1f27; 0x1f30, 0x1f37; 0x1f40, 0x1f45; 0x1f50, 0x1f57; 0x1f60, 0x1f67; - 0x1f70, 0x1f7d; 0x1f80, 0x1f87; 0x1f90, 0x1f97; 0x1fa0, 0x1fa7; 0x1fb0, 0x1fb4; - 0x1fb6, 0x1fb7; 0x1fbe, 0x1fbe; 0x1fc2, 0x1fc4; 0x1fc6, 0x1fc7; 0x1fd0, 0x1fd3; - 0x1fd6, 0x1fd7; 0x1fe0, 0x1fe7; 0x1ff2, 0x1ff4; 0x1ff6, 0x1ff7; 0x2071, 0x2071; - 0x207f, 0x207f; 0x2090, 0x209c; 0x210a, 0x210a; 0x210e, 0x210f; 0x2113, 0x2113; - 0x212f, 0x212f; 0x2134, 0x2134; 0x2139, 0x2139; 0x213c, 0x213d; 0x2146, 0x2149; - 0x214e, 0x214e; 0x2170, 0x217f; 0x2184, 0x2184; 0x24d0, 0x24e9; 0x2c30, 0x2c5f; - 0x2c61, 0x2c61; 0x2c65, 0x2c66; 0x2c68, 0x2c68; 0x2c6a, 0x2c6a; 0x2c6c, 0x2c6c; - 0x2c71, 0x2c71; 0x2c73, 0x2c74; 0x2c76, 0x2c7d; 0x2c81, 0x2c81; 0x2c83, 0x2c83; - 0x2c85, 0x2c85; 0x2c87, 0x2c87; 0x2c89, 0x2c89; 0x2c8b, 0x2c8b; 0x2c8d, 0x2c8d; - 0x2c8f, 0x2c8f; 0x2c91, 0x2c91; 0x2c93, 0x2c93; 0x2c95, 0x2c95; 0x2c97, 0x2c97; - 0x2c99, 0x2c99; 0x2c9b, 0x2c9b; 0x2c9d, 0x2c9d; 0x2c9f, 0x2c9f; 0x2ca1, 0x2ca1; - 0x2ca3, 0x2ca3; 0x2ca5, 0x2ca5; 0x2ca7, 0x2ca7; 0x2ca9, 0x2ca9; 0x2cab, 0x2cab; - 0x2cad, 0x2cad; 0x2caf, 0x2caf; 0x2cb1, 0x2cb1; 0x2cb3, 0x2cb3; 0x2cb5, 0x2cb5; - 0x2cb7, 0x2cb7; 0x2cb9, 0x2cb9; 0x2cbb, 0x2cbb; 0x2cbd, 0x2cbd; 0x2cbf, 0x2cbf; - 0x2cc1, 0x2cc1; 0x2cc3, 0x2cc3; 0x2cc5, 0x2cc5; 0x2cc7, 0x2cc7; 0x2cc9, 0x2cc9; - 0x2ccb, 0x2ccb; 0x2ccd, 0x2ccd; 0x2ccf, 0x2ccf; 0x2cd1, 0x2cd1; 0x2cd3, 0x2cd3; - 0x2cd5, 0x2cd5; 0x2cd7, 0x2cd7; 0x2cd9, 0x2cd9; 0x2cdb, 0x2cdb; 0x2cdd, 0x2cdd; - 0x2cdf, 0x2cdf; 0x2ce1, 0x2ce1; 0x2ce3, 0x2ce4; 0x2cec, 0x2cec; 0x2cee, 0x2cee; - 0x2cf3, 0x2cf3; 0x2d00, 0x2d25; 0x2d27, 0x2d27; 0x2d2d, 0x2d2d; 0xa641, 0xa641; - 0xa643, 0xa643; 0xa645, 0xa645; 0xa647, 0xa647; 0xa649, 0xa649; 0xa64b, 0xa64b; - 0xa64d, 0xa64d; 0xa64f, 0xa64f; 0xa651, 0xa651; 0xa653, 0xa653; 0xa655, 0xa655; - 0xa657, 0xa657; 0xa659, 0xa659; 0xa65b, 0xa65b; 0xa65d, 0xa65d; 0xa65f, 0xa65f; - 0xa661, 0xa661; 0xa663, 0xa663; 0xa665, 0xa665; 0xa667, 0xa667; 0xa669, 0xa669; - 0xa66b, 0xa66b; 0xa66d, 0xa66d; 0xa681, 0xa681; 0xa683, 0xa683; 0xa685, 0xa685; - 0xa687, 0xa687; 0xa689, 0xa689; 0xa68b, 0xa68b; 0xa68d, 0xa68d; 0xa68f, 0xa68f; - 0xa691, 0xa691; 0xa693, 0xa693; 0xa695, 0xa695; 0xa697, 0xa697; 0xa699, 0xa699; - 0xa69b, 0xa69d; 0xa723, 0xa723; 0xa725, 0xa725; 0xa727, 0xa727; 0xa729, 0xa729; - 0xa72b, 0xa72b; 0xa72d, 0xa72d; 0xa72f, 0xa731; 0xa733, 0xa733; 0xa735, 0xa735; - 0xa737, 0xa737; 0xa739, 0xa739; 0xa73b, 0xa73b; 0xa73d, 0xa73d; 0xa73f, 0xa73f; - 0xa741, 0xa741; 0xa743, 0xa743; 0xa745, 0xa745; 0xa747, 0xa747; 0xa749, 0xa749; - 0xa74b, 0xa74b; 0xa74d, 0xa74d; 0xa74f, 0xa74f; 0xa751, 0xa751; 0xa753, 0xa753; - 0xa755, 0xa755; 0xa757, 0xa757; 0xa759, 0xa759; 0xa75b, 0xa75b; 0xa75d, 0xa75d; - 0xa75f, 0xa75f; 0xa761, 0xa761; 0xa763, 0xa763; 0xa765, 0xa765; 0xa767, 0xa767; - 0xa769, 0xa769; 0xa76b, 0xa76b; 0xa76d, 0xa76d; 0xa76f, 0xa778; 0xa77a, 0xa77a; - 0xa77c, 0xa77c; 0xa77f, 0xa77f; 0xa781, 0xa781; 0xa783, 0xa783; 0xa785, 0xa785; - 0xa787, 0xa787; 0xa78c, 0xa78c; 0xa78e, 0xa78e; 0xa791, 0xa791; 0xa793, 0xa795; - 0xa797, 0xa797; 0xa799, 0xa799; 0xa79b, 0xa79b; 0xa79d, 0xa79d; 0xa79f, 0xa79f; - 0xa7a1, 0xa7a1; 0xa7a3, 0xa7a3; 0xa7a5, 0xa7a5; 0xa7a7, 0xa7a7; 0xa7a9, 0xa7a9; - 0xa7af, 0xa7af; 0xa7b5, 0xa7b5; 0xa7b7, 0xa7b7; 0xa7b9, 0xa7b9; 0xa7bb, 0xa7bb; - 0xa7bd, 0xa7bd; 0xa7bf, 0xa7bf; 0xa7c1, 0xa7c1; 0xa7c3, 0xa7c3; 0xa7c8, 0xa7c8; - 0xa7ca, 0xa7ca; 0xa7d1, 0xa7d1; 0xa7d3, 0xa7d3; 0xa7d5, 0xa7d5; 0xa7d7, 0xa7d7; - 0xa7d9, 0xa7d9; 0xa7f2, 0xa7f4; 0xa7f6, 0xa7f6; 0xa7f8, 0xa7fa; 0xab30, 0xab5a; - 0xab5c, 0xab69; 0xab70, 0xabbf; 0xfb00, 0xfb06; 0xfb13, 0xfb17; 0xff41, 0xff5a; - 0x10428, 0x1044f; 0x104d8, 0x104fb; 0x10597, 0x105a1; 0x105a3, 0x105b1; 0x105b3, 0x105b9; - 0x105bb, 0x105bc; 0x10780, 0x10780; 0x10783, 0x10785; 0x10787, 0x107b0; 0x107b2, 0x107ba; - 0x10cc0, 0x10cf2; 0x118c0, 0x118df; 0x16e60, 0x16e7f; 0x1d41a, 0x1d433; 0x1d44e, 0x1d454; - 0x1d456, 0x1d467; 0x1d482, 0x1d49b; 0x1d4b6, 0x1d4b9; 0x1d4bb, 0x1d4bb; 0x1d4bd, 0x1d4c3; - 0x1d4c5, 0x1d4cf; 0x1d4ea, 0x1d503; 0x1d51e, 0x1d537; 0x1d552, 0x1d56b; 0x1d586, 0x1d59f; - 0x1d5ba, 0x1d5d3; 0x1d5ee, 0x1d607; 0x1d622, 0x1d63b; 0x1d656, 0x1d66f; 0x1d68a, 0x1d6a5; - 0x1d6c2, 0x1d6da; 0x1d6dc, 0x1d6e1; 0x1d6fc, 0x1d714; 0x1d716, 0x1d71b; 0x1d736, 0x1d74e; - 0x1d750, 0x1d755; 0x1d770, 0x1d788; 0x1d78a, 0x1d78f; 0x1d7aa, 0x1d7c2; 0x1d7c4, 0x1d7c9; - 0x1d7cb, 0x1d7cb; 0x1df00, 0x1df09; 0x1df0b, 0x1df1e; 0x1df25, 0x1df2a; 0x1e030, 0x1e06d; - 0x1e922, 0x1e943] + 0x10d0, 0x10fa; 0x10fc, 0x10ff; 0x13f8, 0x13fd; 0x1c80, 0x1c88; 0x1c8a, 0x1c8a; + 0x1d00, 0x1dbf; 0x1e01, 0x1e01; 0x1e03, 0x1e03; 0x1e05, 0x1e05; 0x1e07, 0x1e07; + 0x1e09, 0x1e09; 0x1e0b, 0x1e0b; 0x1e0d, 0x1e0d; 0x1e0f, 0x1e0f; 0x1e11, 0x1e11; + 0x1e13, 0x1e13; 0x1e15, 0x1e15; 0x1e17, 0x1e17; 0x1e19, 0x1e19; 0x1e1b, 0x1e1b; + 0x1e1d, 0x1e1d; 0x1e1f, 0x1e1f; 0x1e21, 0x1e21; 0x1e23, 0x1e23; 0x1e25, 0x1e25; + 0x1e27, 0x1e27; 0x1e29, 0x1e29; 0x1e2b, 0x1e2b; 0x1e2d, 0x1e2d; 0x1e2f, 0x1e2f; + 0x1e31, 0x1e31; 0x1e33, 0x1e33; 0x1e35, 0x1e35; 0x1e37, 0x1e37; 0x1e39, 0x1e39; + 0x1e3b, 0x1e3b; 0x1e3d, 0x1e3d; 0x1e3f, 0x1e3f; 0x1e41, 0x1e41; 0x1e43, 0x1e43; + 0x1e45, 0x1e45; 0x1e47, 0x1e47; 0x1e49, 0x1e49; 0x1e4b, 0x1e4b; 0x1e4d, 0x1e4d; + 0x1e4f, 0x1e4f; 0x1e51, 0x1e51; 0x1e53, 0x1e53; 0x1e55, 0x1e55; 0x1e57, 0x1e57; + 0x1e59, 0x1e59; 0x1e5b, 0x1e5b; 0x1e5d, 0x1e5d; 0x1e5f, 0x1e5f; 0x1e61, 0x1e61; + 0x1e63, 0x1e63; 0x1e65, 0x1e65; 0x1e67, 0x1e67; 0x1e69, 0x1e69; 0x1e6b, 0x1e6b; + 0x1e6d, 0x1e6d; 0x1e6f, 0x1e6f; 0x1e71, 0x1e71; 0x1e73, 0x1e73; 0x1e75, 0x1e75; + 0x1e77, 0x1e77; 0x1e79, 0x1e79; 0x1e7b, 0x1e7b; 0x1e7d, 0x1e7d; 0x1e7f, 0x1e7f; + 0x1e81, 0x1e81; 0x1e83, 0x1e83; 0x1e85, 0x1e85; 0x1e87, 0x1e87; 0x1e89, 0x1e89; + 0x1e8b, 0x1e8b; 0x1e8d, 0x1e8d; 0x1e8f, 0x1e8f; 0x1e91, 0x1e91; 0x1e93, 0x1e93; + 0x1e95, 0x1e9d; 0x1e9f, 0x1e9f; 0x1ea1, 0x1ea1; 0x1ea3, 0x1ea3; 0x1ea5, 0x1ea5; + 0x1ea7, 0x1ea7; 0x1ea9, 0x1ea9; 0x1eab, 0x1eab; 0x1ead, 0x1ead; 0x1eaf, 0x1eaf; + 0x1eb1, 0x1eb1; 0x1eb3, 0x1eb3; 0x1eb5, 0x1eb5; 0x1eb7, 0x1eb7; 0x1eb9, 0x1eb9; + 0x1ebb, 0x1ebb; 0x1ebd, 0x1ebd; 0x1ebf, 0x1ebf; 0x1ec1, 0x1ec1; 0x1ec3, 0x1ec3; + 0x1ec5, 0x1ec5; 0x1ec7, 0x1ec7; 0x1ec9, 0x1ec9; 0x1ecb, 0x1ecb; 0x1ecd, 0x1ecd; + 0x1ecf, 0x1ecf; 0x1ed1, 0x1ed1; 0x1ed3, 0x1ed3; 0x1ed5, 0x1ed5; 0x1ed7, 0x1ed7; + 0x1ed9, 0x1ed9; 0x1edb, 0x1edb; 0x1edd, 0x1edd; 0x1edf, 0x1edf; 0x1ee1, 0x1ee1; + 0x1ee3, 0x1ee3; 0x1ee5, 0x1ee5; 0x1ee7, 0x1ee7; 0x1ee9, 0x1ee9; 0x1eeb, 0x1eeb; + 0x1eed, 0x1eed; 0x1eef, 0x1eef; 0x1ef1, 0x1ef1; 0x1ef3, 0x1ef3; 0x1ef5, 0x1ef5; + 0x1ef7, 0x1ef7; 0x1ef9, 0x1ef9; 0x1efb, 0x1efb; 0x1efd, 0x1efd; 0x1eff, 0x1f07; + 0x1f10, 0x1f15; 0x1f20, 0x1f27; 0x1f30, 0x1f37; 0x1f40, 0x1f45; 0x1f50, 0x1f57; + 0x1f60, 0x1f67; 0x1f70, 0x1f7d; 0x1f80, 0x1f87; 0x1f90, 0x1f97; 0x1fa0, 0x1fa7; + 0x1fb0, 0x1fb4; 0x1fb6, 0x1fb7; 0x1fbe, 0x1fbe; 0x1fc2, 0x1fc4; 0x1fc6, 0x1fc7; + 0x1fd0, 0x1fd3; 0x1fd6, 0x1fd7; 0x1fe0, 0x1fe7; 0x1ff2, 0x1ff4; 0x1ff6, 0x1ff7; + 0x2071, 0x2071; 0x207f, 0x207f; 0x2090, 0x209c; 0x210a, 0x210a; 0x210e, 0x210f; + 0x2113, 0x2113; 0x212f, 0x212f; 0x2134, 0x2134; 0x2139, 0x2139; 0x213c, 0x213d; + 0x2146, 0x2149; 0x214e, 0x214e; 0x2170, 0x217f; 0x2184, 0x2184; 0x24d0, 0x24e9; + 0x2c30, 0x2c5f; 0x2c61, 0x2c61; 0x2c65, 0x2c66; 0x2c68, 0x2c68; 0x2c6a, 0x2c6a; + 0x2c6c, 0x2c6c; 0x2c71, 0x2c71; 0x2c73, 0x2c74; 0x2c76, 0x2c7d; 0x2c81, 0x2c81; + 0x2c83, 0x2c83; 0x2c85, 0x2c85; 0x2c87, 0x2c87; 0x2c89, 0x2c89; 0x2c8b, 0x2c8b; + 0x2c8d, 0x2c8d; 0x2c8f, 0x2c8f; 0x2c91, 0x2c91; 0x2c93, 0x2c93; 0x2c95, 0x2c95; + 0x2c97, 0x2c97; 0x2c99, 0x2c99; 0x2c9b, 0x2c9b; 0x2c9d, 0x2c9d; 0x2c9f, 0x2c9f; + 0x2ca1, 0x2ca1; 0x2ca3, 0x2ca3; 0x2ca5, 0x2ca5; 0x2ca7, 0x2ca7; 0x2ca9, 0x2ca9; + 0x2cab, 0x2cab; 0x2cad, 0x2cad; 0x2caf, 0x2caf; 0x2cb1, 0x2cb1; 0x2cb3, 0x2cb3; + 0x2cb5, 0x2cb5; 0x2cb7, 0x2cb7; 0x2cb9, 0x2cb9; 0x2cbb, 0x2cbb; 0x2cbd, 0x2cbd; + 0x2cbf, 0x2cbf; 0x2cc1, 0x2cc1; 0x2cc3, 0x2cc3; 0x2cc5, 0x2cc5; 0x2cc7, 0x2cc7; + 0x2cc9, 0x2cc9; 0x2ccb, 0x2ccb; 0x2ccd, 0x2ccd; 0x2ccf, 0x2ccf; 0x2cd1, 0x2cd1; + 0x2cd3, 0x2cd3; 0x2cd5, 0x2cd5; 0x2cd7, 0x2cd7; 0x2cd9, 0x2cd9; 0x2cdb, 0x2cdb; + 0x2cdd, 0x2cdd; 0x2cdf, 0x2cdf; 0x2ce1, 0x2ce1; 0x2ce3, 0x2ce4; 0x2cec, 0x2cec; + 0x2cee, 0x2cee; 0x2cf3, 0x2cf3; 0x2d00, 0x2d25; 0x2d27, 0x2d27; 0x2d2d, 0x2d2d; + 0xa641, 0xa641; 0xa643, 0xa643; 0xa645, 0xa645; 0xa647, 0xa647; 0xa649, 0xa649; + 0xa64b, 0xa64b; 0xa64d, 0xa64d; 0xa64f, 0xa64f; 0xa651, 0xa651; 0xa653, 0xa653; + 0xa655, 0xa655; 0xa657, 0xa657; 0xa659, 0xa659; 0xa65b, 0xa65b; 0xa65d, 0xa65d; + 0xa65f, 0xa65f; 0xa661, 0xa661; 0xa663, 0xa663; 0xa665, 0xa665; 0xa667, 0xa667; + 0xa669, 0xa669; 0xa66b, 0xa66b; 0xa66d, 0xa66d; 0xa681, 0xa681; 0xa683, 0xa683; + 0xa685, 0xa685; 0xa687, 0xa687; 0xa689, 0xa689; 0xa68b, 0xa68b; 0xa68d, 0xa68d; + 0xa68f, 0xa68f; 0xa691, 0xa691; 0xa693, 0xa693; 0xa695, 0xa695; 0xa697, 0xa697; + 0xa699, 0xa699; 0xa69b, 0xa69d; 0xa723, 0xa723; 0xa725, 0xa725; 0xa727, 0xa727; + 0xa729, 0xa729; 0xa72b, 0xa72b; 0xa72d, 0xa72d; 0xa72f, 0xa731; 0xa733, 0xa733; + 0xa735, 0xa735; 0xa737, 0xa737; 0xa739, 0xa739; 0xa73b, 0xa73b; 0xa73d, 0xa73d; + 0xa73f, 0xa73f; 0xa741, 0xa741; 0xa743, 0xa743; 0xa745, 0xa745; 0xa747, 0xa747; + 0xa749, 0xa749; 0xa74b, 0xa74b; 0xa74d, 0xa74d; 0xa74f, 0xa74f; 0xa751, 0xa751; + 0xa753, 0xa753; 0xa755, 0xa755; 0xa757, 0xa757; 0xa759, 0xa759; 0xa75b, 0xa75b; + 0xa75d, 0xa75d; 0xa75f, 0xa75f; 0xa761, 0xa761; 0xa763, 0xa763; 0xa765, 0xa765; + 0xa767, 0xa767; 0xa769, 0xa769; 0xa76b, 0xa76b; 0xa76d, 0xa76d; 0xa76f, 0xa778; + 0xa77a, 0xa77a; 0xa77c, 0xa77c; 0xa77f, 0xa77f; 0xa781, 0xa781; 0xa783, 0xa783; + 0xa785, 0xa785; 0xa787, 0xa787; 0xa78c, 0xa78c; 0xa78e, 0xa78e; 0xa791, 0xa791; + 0xa793, 0xa795; 0xa797, 0xa797; 0xa799, 0xa799; 0xa79b, 0xa79b; 0xa79d, 0xa79d; + 0xa79f, 0xa79f; 0xa7a1, 0xa7a1; 0xa7a3, 0xa7a3; 0xa7a5, 0xa7a5; 0xa7a7, 0xa7a7; + 0xa7a9, 0xa7a9; 0xa7af, 0xa7af; 0xa7b5, 0xa7b5; 0xa7b7, 0xa7b7; 0xa7b9, 0xa7b9; + 0xa7bb, 0xa7bb; 0xa7bd, 0xa7bd; 0xa7bf, 0xa7bf; 0xa7c1, 0xa7c1; 0xa7c3, 0xa7c3; + 0xa7c8, 0xa7c8; 0xa7ca, 0xa7ca; 0xa7cd, 0xa7cd; 0xa7d1, 0xa7d1; 0xa7d3, 0xa7d3; + 0xa7d5, 0xa7d5; 0xa7d7, 0xa7d7; 0xa7d9, 0xa7d9; 0xa7db, 0xa7db; 0xa7f2, 0xa7f4; + 0xa7f6, 0xa7f6; 0xa7f8, 0xa7fa; 0xab30, 0xab5a; 0xab5c, 0xab69; 0xab70, 0xabbf; + 0xfb00, 0xfb06; 0xfb13, 0xfb17; 0xff41, 0xff5a; 0x10428, 0x1044f; 0x104d8, 0x104fb; + 0x10597, 0x105a1; 0x105a3, 0x105b1; 0x105b3, 0x105b9; 0x105bb, 0x105bc; 0x10780, 0x10780; + 0x10783, 0x10785; 0x10787, 0x107b0; 0x107b2, 0x107ba; 0x10cc0, 0x10cf2; 0x10d70, 0x10d85; + 0x118c0, 0x118df; 0x16e60, 0x16e7f; 0x1d41a, 0x1d433; 0x1d44e, 0x1d454; 0x1d456, 0x1d467; + 0x1d482, 0x1d49b; 0x1d4b6, 0x1d4b9; 0x1d4bb, 0x1d4bb; 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d4cf; + 0x1d4ea, 0x1d503; 0x1d51e, 0x1d537; 0x1d552, 0x1d56b; 0x1d586, 0x1d59f; 0x1d5ba, 0x1d5d3; + 0x1d5ee, 0x1d607; 0x1d622, 0x1d63b; 0x1d656, 0x1d66f; 0x1d68a, 0x1d6a5; 0x1d6c2, 0x1d6da; + 0x1d6dc, 0x1d6e1; 0x1d6fc, 0x1d714; 0x1d716, 0x1d71b; 0x1d736, 0x1d74e; 0x1d750, 0x1d755; + 0x1d770, 0x1d788; 0x1d78a, 0x1d78f; 0x1d7aa, 0x1d7c2; 0x1d7c4, 0x1d7c9; 0x1d7cb, 0x1d7cb; + 0x1df00, 0x1df09; 0x1df0b, 0x1df1e; 0x1df25, 0x1df2a; 0x1e030, 0x1e06d; 0x1e922, 0x1e943] let math = Sedlex_utils.Cset.of_list [0x2b, 0x2b; 0x3c, 0x3e; 0x5e, 0x5e; 0x7c, 0x7c; 0x7e, 0x7e; @@ -1522,65 +1555,67 @@ module Properties = struct 0x2642, 0x2642; 0x2660, 0x2663; 0x266d, 0x266f; 0x27c0, 0x27ff; 0x2900, 0x2aff; 0x2b30, 0x2b44; 0x2b47, 0x2b4c; 0xfb29, 0xfb29; 0xfe61, 0xfe66; 0xfe68, 0xfe68; 0xff0b, 0xff0b; 0xff1c, 0xff1e; 0xff3c, 0xff3c; 0xff3e, 0xff3e; 0xff5c, 0xff5c; - 0xff5e, 0xff5e; 0xffe2, 0xffe2; 0xffe9, 0xffec; 0x1d400, 0x1d454; 0x1d456, 0x1d49c; - 0x1d49e, 0x1d49f; 0x1d4a2, 0x1d4a2; 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; 0x1d4ae, 0x1d4b9; - 0x1d4bb, 0x1d4bb; 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d505; 0x1d507, 0x1d50a; 0x1d50d, 0x1d514; - 0x1d516, 0x1d51c; 0x1d51e, 0x1d539; 0x1d53b, 0x1d53e; 0x1d540, 0x1d544; 0x1d546, 0x1d546; - 0x1d54a, 0x1d550; 0x1d552, 0x1d6a5; 0x1d6a8, 0x1d7cb; 0x1d7ce, 0x1d7ff; 0x1ee00, 0x1ee03; - 0x1ee05, 0x1ee1f; 0x1ee21, 0x1ee22; 0x1ee24, 0x1ee24; 0x1ee27, 0x1ee27; 0x1ee29, 0x1ee32; - 0x1ee34, 0x1ee37; 0x1ee39, 0x1ee39; 0x1ee3b, 0x1ee3b; 0x1ee42, 0x1ee42; 0x1ee47, 0x1ee47; - 0x1ee49, 0x1ee49; 0x1ee4b, 0x1ee4b; 0x1ee4d, 0x1ee4f; 0x1ee51, 0x1ee52; 0x1ee54, 0x1ee54; - 0x1ee57, 0x1ee57; 0x1ee59, 0x1ee59; 0x1ee5b, 0x1ee5b; 0x1ee5d, 0x1ee5d; 0x1ee5f, 0x1ee5f; - 0x1ee61, 0x1ee62; 0x1ee64, 0x1ee64; 0x1ee67, 0x1ee6a; 0x1ee6c, 0x1ee72; 0x1ee74, 0x1ee77; - 0x1ee79, 0x1ee7c; 0x1ee7e, 0x1ee7e; 0x1ee80, 0x1ee89; 0x1ee8b, 0x1ee9b; 0x1eea1, 0x1eea3; - 0x1eea5, 0x1eea9; 0x1eeab, 0x1eebb; 0x1eef0, 0x1eef1] + 0xff5e, 0xff5e; 0xffe2, 0xffe2; 0xffe9, 0xffec; 0x10d8e, 0x10d8f; 0x1d400, 0x1d454; + 0x1d456, 0x1d49c; 0x1d49e, 0x1d49f; 0x1d4a2, 0x1d4a2; 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; + 0x1d4ae, 0x1d4b9; 0x1d4bb, 0x1d4bb; 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d505; 0x1d507, 0x1d50a; + 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; 0x1d51e, 0x1d539; 0x1d53b, 0x1d53e; 0x1d540, 0x1d544; + 0x1d546, 0x1d546; 0x1d54a, 0x1d550; 0x1d552, 0x1d6a5; 0x1d6a8, 0x1d7cb; 0x1d7ce, 0x1d7ff; + 0x1ee00, 0x1ee03; 0x1ee05, 0x1ee1f; 0x1ee21, 0x1ee22; 0x1ee24, 0x1ee24; 0x1ee27, 0x1ee27; + 0x1ee29, 0x1ee32; 0x1ee34, 0x1ee37; 0x1ee39, 0x1ee39; 0x1ee3b, 0x1ee3b; 0x1ee42, 0x1ee42; + 0x1ee47, 0x1ee47; 0x1ee49, 0x1ee49; 0x1ee4b, 0x1ee4b; 0x1ee4d, 0x1ee4f; 0x1ee51, 0x1ee52; + 0x1ee54, 0x1ee54; 0x1ee57, 0x1ee57; 0x1ee59, 0x1ee59; 0x1ee5b, 0x1ee5b; 0x1ee5d, 0x1ee5d; + 0x1ee5f, 0x1ee5f; 0x1ee61, 0x1ee62; 0x1ee64, 0x1ee64; 0x1ee67, 0x1ee6a; 0x1ee6c, 0x1ee72; + 0x1ee74, 0x1ee77; 0x1ee79, 0x1ee7c; 0x1ee7e, 0x1ee7e; 0x1ee80, 0x1ee89; 0x1ee8b, 0x1ee9b; + 0x1eea1, 0x1eea3; 0x1eea5, 0x1eea9; 0x1eeab, 0x1eebb; 0x1eef0, 0x1eef1] let other_alphabetic = Sedlex_utils.Cset.of_list - [0x345, 0x345; 0x5b0, 0x5bd; 0x5bf, 0x5bf; 0x5c1, 0x5c2; 0x5c4, 0x5c5; - 0x5c7, 0x5c7; 0x610, 0x61a; 0x64b, 0x657; 0x659, 0x65f; 0x670, 0x670; - 0x6d6, 0x6dc; 0x6e1, 0x6e4; 0x6e7, 0x6e8; 0x6ed, 0x6ed; 0x711, 0x711; - 0x730, 0x73f; 0x7a6, 0x7b0; 0x816, 0x817; 0x81b, 0x823; 0x825, 0x827; - 0x829, 0x82c; 0x8d4, 0x8df; 0x8e3, 0x8e9; 0x8f0, 0x903; 0x93a, 0x93b; - 0x93e, 0x94c; 0x94e, 0x94f; 0x955, 0x957; 0x962, 0x963; 0x981, 0x983; - 0x9be, 0x9c4; 0x9c7, 0x9c8; 0x9cb, 0x9cc; 0x9d7, 0x9d7; 0x9e2, 0x9e3; - 0xa01, 0xa03; 0xa3e, 0xa42; 0xa47, 0xa48; 0xa4b, 0xa4c; 0xa51, 0xa51; - 0xa70, 0xa71; 0xa75, 0xa75; 0xa81, 0xa83; 0xabe, 0xac5; 0xac7, 0xac9; - 0xacb, 0xacc; 0xae2, 0xae3; 0xafa, 0xafc; 0xb01, 0xb03; 0xb3e, 0xb44; - 0xb47, 0xb48; 0xb4b, 0xb4c; 0xb56, 0xb57; 0xb62, 0xb63; 0xb82, 0xb82; - 0xbbe, 0xbc2; 0xbc6, 0xbc8; 0xbca, 0xbcc; 0xbd7, 0xbd7; 0xc00, 0xc04; - 0xc3e, 0xc44; 0xc46, 0xc48; 0xc4a, 0xc4c; 0xc55, 0xc56; 0xc62, 0xc63; - 0xc81, 0xc83; 0xcbe, 0xcc4; 0xcc6, 0xcc8; 0xcca, 0xccc; 0xcd5, 0xcd6; - 0xce2, 0xce3; 0xcf3, 0xcf3; 0xd00, 0xd03; 0xd3e, 0xd44; 0xd46, 0xd48; - 0xd4a, 0xd4c; 0xd57, 0xd57; 0xd62, 0xd63; 0xd81, 0xd83; 0xdcf, 0xdd4; - 0xdd6, 0xdd6; 0xdd8, 0xddf; 0xdf2, 0xdf3; 0xe31, 0xe31; 0xe34, 0xe3a; - 0xe4d, 0xe4d; 0xeb1, 0xeb1; 0xeb4, 0xeb9; 0xebb, 0xebc; 0xecd, 0xecd; - 0xf71, 0xf83; 0xf8d, 0xf97; 0xf99, 0xfbc; 0x102b, 0x1036; 0x1038, 0x1038; - 0x103b, 0x103e; 0x1056, 0x1059; 0x105e, 0x1060; 0x1062, 0x1064; 0x1067, 0x106d; - 0x1071, 0x1074; 0x1082, 0x108d; 0x108f, 0x108f; 0x109a, 0x109d; 0x1712, 0x1713; - 0x1732, 0x1733; 0x1752, 0x1753; 0x1772, 0x1773; 0x17b6, 0x17c8; 0x1885, 0x1886; - 0x18a9, 0x18a9; 0x1920, 0x192b; 0x1930, 0x1938; 0x1a17, 0x1a1b; 0x1a55, 0x1a5e; - 0x1a61, 0x1a74; 0x1abf, 0x1ac0; 0x1acc, 0x1ace; 0x1b00, 0x1b04; 0x1b35, 0x1b43; - 0x1b80, 0x1b82; 0x1ba1, 0x1ba9; 0x1bac, 0x1bad; 0x1be7, 0x1bf1; 0x1c24, 0x1c36; - 0x1de7, 0x1df4; 0x24b6, 0x24e9; 0x2de0, 0x2dff; 0xa674, 0xa67b; 0xa69e, 0xa69f; - 0xa802, 0xa802; 0xa80b, 0xa80b; 0xa823, 0xa827; 0xa880, 0xa881; 0xa8b4, 0xa8c3; - 0xa8c5, 0xa8c5; 0xa8ff, 0xa8ff; 0xa926, 0xa92a; 0xa947, 0xa952; 0xa980, 0xa983; - 0xa9b4, 0xa9bf; 0xa9e5, 0xa9e5; 0xaa29, 0xaa36; 0xaa43, 0xaa43; 0xaa4c, 0xaa4d; - 0xaa7b, 0xaa7d; 0xaab0, 0xaab0; 0xaab2, 0xaab4; 0xaab7, 0xaab8; 0xaabe, 0xaabe; - 0xaaeb, 0xaaef; 0xaaf5, 0xaaf5; 0xabe3, 0xabea; 0xfb1e, 0xfb1e; 0x10376, 0x1037a; - 0x10a01, 0x10a03; 0x10a05, 0x10a06; 0x10a0c, 0x10a0f; 0x10d24, 0x10d27; 0x10eab, 0x10eac; - 0x11000, 0x11002; 0x11038, 0x11045; 0x11073, 0x11074; 0x11080, 0x11082; 0x110b0, 0x110b8; - 0x110c2, 0x110c2; 0x11100, 0x11102; 0x11127, 0x11132; 0x11145, 0x11146; 0x11180, 0x11182; - 0x111b3, 0x111bf; 0x111ce, 0x111cf; 0x1122c, 0x11234; 0x11237, 0x11237; 0x1123e, 0x1123e; - 0x11241, 0x11241; 0x112df, 0x112e8; 0x11300, 0x11303; 0x1133e, 0x11344; 0x11347, 0x11348; - 0x1134b, 0x1134c; 0x11357, 0x11357; 0x11362, 0x11363; 0x11435, 0x11441; 0x11443, 0x11445; - 0x114b0, 0x114c1; 0x115af, 0x115b5; 0x115b8, 0x115be; 0x115dc, 0x115dd; 0x11630, 0x1163e; - 0x11640, 0x11640; 0x116ab, 0x116b5; 0x1171d, 0x1172a; 0x1182c, 0x11838; 0x11930, 0x11935; - 0x11937, 0x11938; 0x1193b, 0x1193c; 0x11940, 0x11940; 0x11942, 0x11942; 0x119d1, 0x119d7; - 0x119da, 0x119df; 0x119e4, 0x119e4; 0x11a01, 0x11a0a; 0x11a35, 0x11a39; 0x11a3b, 0x11a3e; - 0x11a51, 0x11a5b; 0x11a8a, 0x11a97; 0x11c2f, 0x11c36; 0x11c38, 0x11c3e; 0x11c92, 0x11ca7; - 0x11ca9, 0x11cb6; 0x11d31, 0x11d36; 0x11d3a, 0x11d3a; 0x11d3c, 0x11d3d; 0x11d3f, 0x11d41; - 0x11d43, 0x11d43; 0x11d47, 0x11d47; 0x11d8a, 0x11d8e; 0x11d90, 0x11d91; 0x11d93, 0x11d96; - 0x11ef3, 0x11ef6; 0x11f00, 0x11f01; 0x11f03, 0x11f03; 0x11f34, 0x11f3a; 0x11f3e, 0x11f40; + [0x345, 0x345; 0x363, 0x36f; 0x5b0, 0x5bd; 0x5bf, 0x5bf; 0x5c1, 0x5c2; + 0x5c4, 0x5c5; 0x5c7, 0x5c7; 0x610, 0x61a; 0x64b, 0x657; 0x659, 0x65f; + 0x670, 0x670; 0x6d6, 0x6dc; 0x6e1, 0x6e4; 0x6e7, 0x6e8; 0x6ed, 0x6ed; + 0x711, 0x711; 0x730, 0x73f; 0x7a6, 0x7b0; 0x816, 0x817; 0x81b, 0x823; + 0x825, 0x827; 0x829, 0x82c; 0x897, 0x897; 0x8d4, 0x8df; 0x8e3, 0x8e9; + 0x8f0, 0x903; 0x93a, 0x93b; 0x93e, 0x94c; 0x94e, 0x94f; 0x955, 0x957; + 0x962, 0x963; 0x981, 0x983; 0x9be, 0x9c4; 0x9c7, 0x9c8; 0x9cb, 0x9cc; + 0x9d7, 0x9d7; 0x9e2, 0x9e3; 0xa01, 0xa03; 0xa3e, 0xa42; 0xa47, 0xa48; + 0xa4b, 0xa4c; 0xa51, 0xa51; 0xa70, 0xa71; 0xa75, 0xa75; 0xa81, 0xa83; + 0xabe, 0xac5; 0xac7, 0xac9; 0xacb, 0xacc; 0xae2, 0xae3; 0xafa, 0xafc; + 0xb01, 0xb03; 0xb3e, 0xb44; 0xb47, 0xb48; 0xb4b, 0xb4c; 0xb56, 0xb57; + 0xb62, 0xb63; 0xb82, 0xb82; 0xbbe, 0xbc2; 0xbc6, 0xbc8; 0xbca, 0xbcc; + 0xbd7, 0xbd7; 0xc00, 0xc04; 0xc3e, 0xc44; 0xc46, 0xc48; 0xc4a, 0xc4c; + 0xc55, 0xc56; 0xc62, 0xc63; 0xc81, 0xc83; 0xcbe, 0xcc4; 0xcc6, 0xcc8; + 0xcca, 0xccc; 0xcd5, 0xcd6; 0xce2, 0xce3; 0xcf3, 0xcf3; 0xd00, 0xd03; + 0xd3e, 0xd44; 0xd46, 0xd48; 0xd4a, 0xd4c; 0xd57, 0xd57; 0xd62, 0xd63; + 0xd81, 0xd83; 0xdcf, 0xdd4; 0xdd6, 0xdd6; 0xdd8, 0xddf; 0xdf2, 0xdf3; + 0xe31, 0xe31; 0xe34, 0xe3a; 0xe4d, 0xe4d; 0xeb1, 0xeb1; 0xeb4, 0xeb9; + 0xebb, 0xebc; 0xecd, 0xecd; 0xf71, 0xf83; 0xf8d, 0xf97; 0xf99, 0xfbc; + 0x102b, 0x1036; 0x1038, 0x1038; 0x103b, 0x103e; 0x1056, 0x1059; 0x105e, 0x1060; + 0x1062, 0x1064; 0x1067, 0x106d; 0x1071, 0x1074; 0x1082, 0x108d; 0x108f, 0x108f; + 0x109a, 0x109d; 0x1712, 0x1713; 0x1732, 0x1733; 0x1752, 0x1753; 0x1772, 0x1773; + 0x17b6, 0x17c8; 0x1885, 0x1886; 0x18a9, 0x18a9; 0x1920, 0x192b; 0x1930, 0x1938; + 0x1a17, 0x1a1b; 0x1a55, 0x1a5e; 0x1a61, 0x1a74; 0x1abf, 0x1ac0; 0x1acc, 0x1ace; + 0x1b00, 0x1b04; 0x1b35, 0x1b43; 0x1b80, 0x1b82; 0x1ba1, 0x1ba9; 0x1bac, 0x1bad; + 0x1be7, 0x1bf1; 0x1c24, 0x1c36; 0x1dd3, 0x1df4; 0x24b6, 0x24e9; 0x2de0, 0x2dff; + 0xa674, 0xa67b; 0xa69e, 0xa69f; 0xa802, 0xa802; 0xa80b, 0xa80b; 0xa823, 0xa827; + 0xa880, 0xa881; 0xa8b4, 0xa8c3; 0xa8c5, 0xa8c5; 0xa8ff, 0xa8ff; 0xa926, 0xa92a; + 0xa947, 0xa952; 0xa980, 0xa983; 0xa9b4, 0xa9bf; 0xa9e5, 0xa9e5; 0xaa29, 0xaa36; + 0xaa43, 0xaa43; 0xaa4c, 0xaa4d; 0xaa7b, 0xaa7d; 0xaab0, 0xaab0; 0xaab2, 0xaab4; + 0xaab7, 0xaab8; 0xaabe, 0xaabe; 0xaaeb, 0xaaef; 0xaaf5, 0xaaf5; 0xabe3, 0xabea; + 0xfb1e, 0xfb1e; 0x10376, 0x1037a; 0x10a01, 0x10a03; 0x10a05, 0x10a06; 0x10a0c, 0x10a0f; + 0x10d24, 0x10d27; 0x10d69, 0x10d69; 0x10eab, 0x10eac; 0x10efc, 0x10efc; 0x11000, 0x11002; + 0x11038, 0x11045; 0x11073, 0x11074; 0x11080, 0x11082; 0x110b0, 0x110b8; 0x110c2, 0x110c2; + 0x11100, 0x11102; 0x11127, 0x11132; 0x11145, 0x11146; 0x11180, 0x11182; 0x111b3, 0x111bf; + 0x111ce, 0x111cf; 0x1122c, 0x11234; 0x11237, 0x11237; 0x1123e, 0x1123e; 0x11241, 0x11241; + 0x112df, 0x112e8; 0x11300, 0x11303; 0x1133e, 0x11344; 0x11347, 0x11348; 0x1134b, 0x1134c; + 0x11357, 0x11357; 0x11362, 0x11363; 0x113b8, 0x113c0; 0x113c2, 0x113c2; 0x113c5, 0x113c5; + 0x113c7, 0x113ca; 0x113cc, 0x113cd; 0x11435, 0x11441; 0x11443, 0x11445; 0x114b0, 0x114c1; + 0x115af, 0x115b5; 0x115b8, 0x115be; 0x115dc, 0x115dd; 0x11630, 0x1163e; 0x11640, 0x11640; + 0x116ab, 0x116b5; 0x1171d, 0x1172a; 0x1182c, 0x11838; 0x11930, 0x11935; 0x11937, 0x11938; + 0x1193b, 0x1193c; 0x11940, 0x11940; 0x11942, 0x11942; 0x119d1, 0x119d7; 0x119da, 0x119df; + 0x119e4, 0x119e4; 0x11a01, 0x11a0a; 0x11a35, 0x11a39; 0x11a3b, 0x11a3e; 0x11a51, 0x11a5b; + 0x11a8a, 0x11a97; 0x11c2f, 0x11c36; 0x11c38, 0x11c3e; 0x11c92, 0x11ca7; 0x11ca9, 0x11cb6; + 0x11d31, 0x11d36; 0x11d3a, 0x11d3a; 0x11d3c, 0x11d3d; 0x11d3f, 0x11d41; 0x11d43, 0x11d43; + 0x11d47, 0x11d47; 0x11d8a, 0x11d8e; 0x11d90, 0x11d91; 0x11d93, 0x11d96; 0x11ef3, 0x11ef6; + 0x11f00, 0x11f01; 0x11f03, 0x11f03; 0x11f34, 0x11f3a; 0x11f3e, 0x11f40; 0x1611e, 0x1612e; 0x16f4f, 0x16f4f; 0x16f51, 0x16f87; 0x16f8f, 0x16f92; 0x16ff0, 0x16ff1; 0x1bc9e, 0x1bc9e; 0x1e000, 0x1e006; 0x1e008, 0x1e018; 0x1e01b, 0x1e021; 0x1e023, 0x1e024; 0x1e026, 0x1e02a; 0x1e08f, 0x1e08f; 0x1e947, 0x1e947; 0x1f130, 0x1f149; 0x1f150, 0x1f169; 0x1f170, 0x1f189] @@ -1681,74 +1716,75 @@ module Properties = struct 0x51a, 0x51a; 0x51c, 0x51c; 0x51e, 0x51e; 0x520, 0x520; 0x522, 0x522; 0x524, 0x524; 0x526, 0x526; 0x528, 0x528; 0x52a, 0x52a; 0x52c, 0x52c; 0x52e, 0x52e; 0x531, 0x556; 0x10a0, 0x10c5; 0x10c7, 0x10c7; 0x10cd, 0x10cd; - 0x13a0, 0x13f5; 0x1c90, 0x1cba; 0x1cbd, 0x1cbf; 0x1e00, 0x1e00; 0x1e02, 0x1e02; - 0x1e04, 0x1e04; 0x1e06, 0x1e06; 0x1e08, 0x1e08; 0x1e0a, 0x1e0a; 0x1e0c, 0x1e0c; - 0x1e0e, 0x1e0e; 0x1e10, 0x1e10; 0x1e12, 0x1e12; 0x1e14, 0x1e14; 0x1e16, 0x1e16; - 0x1e18, 0x1e18; 0x1e1a, 0x1e1a; 0x1e1c, 0x1e1c; 0x1e1e, 0x1e1e; 0x1e20, 0x1e20; - 0x1e22, 0x1e22; 0x1e24, 0x1e24; 0x1e26, 0x1e26; 0x1e28, 0x1e28; 0x1e2a, 0x1e2a; - 0x1e2c, 0x1e2c; 0x1e2e, 0x1e2e; 0x1e30, 0x1e30; 0x1e32, 0x1e32; 0x1e34, 0x1e34; - 0x1e36, 0x1e36; 0x1e38, 0x1e38; 0x1e3a, 0x1e3a; 0x1e3c, 0x1e3c; 0x1e3e, 0x1e3e; - 0x1e40, 0x1e40; 0x1e42, 0x1e42; 0x1e44, 0x1e44; 0x1e46, 0x1e46; 0x1e48, 0x1e48; - 0x1e4a, 0x1e4a; 0x1e4c, 0x1e4c; 0x1e4e, 0x1e4e; 0x1e50, 0x1e50; 0x1e52, 0x1e52; - 0x1e54, 0x1e54; 0x1e56, 0x1e56; 0x1e58, 0x1e58; 0x1e5a, 0x1e5a; 0x1e5c, 0x1e5c; - 0x1e5e, 0x1e5e; 0x1e60, 0x1e60; 0x1e62, 0x1e62; 0x1e64, 0x1e64; 0x1e66, 0x1e66; - 0x1e68, 0x1e68; 0x1e6a, 0x1e6a; 0x1e6c, 0x1e6c; 0x1e6e, 0x1e6e; 0x1e70, 0x1e70; - 0x1e72, 0x1e72; 0x1e74, 0x1e74; 0x1e76, 0x1e76; 0x1e78, 0x1e78; 0x1e7a, 0x1e7a; - 0x1e7c, 0x1e7c; 0x1e7e, 0x1e7e; 0x1e80, 0x1e80; 0x1e82, 0x1e82; 0x1e84, 0x1e84; - 0x1e86, 0x1e86; 0x1e88, 0x1e88; 0x1e8a, 0x1e8a; 0x1e8c, 0x1e8c; 0x1e8e, 0x1e8e; - 0x1e90, 0x1e90; 0x1e92, 0x1e92; 0x1e94, 0x1e94; 0x1e9e, 0x1e9e; 0x1ea0, 0x1ea0; - 0x1ea2, 0x1ea2; 0x1ea4, 0x1ea4; 0x1ea6, 0x1ea6; 0x1ea8, 0x1ea8; 0x1eaa, 0x1eaa; - 0x1eac, 0x1eac; 0x1eae, 0x1eae; 0x1eb0, 0x1eb0; 0x1eb2, 0x1eb2; 0x1eb4, 0x1eb4; - 0x1eb6, 0x1eb6; 0x1eb8, 0x1eb8; 0x1eba, 0x1eba; 0x1ebc, 0x1ebc; 0x1ebe, 0x1ebe; - 0x1ec0, 0x1ec0; 0x1ec2, 0x1ec2; 0x1ec4, 0x1ec4; 0x1ec6, 0x1ec6; 0x1ec8, 0x1ec8; - 0x1eca, 0x1eca; 0x1ecc, 0x1ecc; 0x1ece, 0x1ece; 0x1ed0, 0x1ed0; 0x1ed2, 0x1ed2; - 0x1ed4, 0x1ed4; 0x1ed6, 0x1ed6; 0x1ed8, 0x1ed8; 0x1eda, 0x1eda; 0x1edc, 0x1edc; - 0x1ede, 0x1ede; 0x1ee0, 0x1ee0; 0x1ee2, 0x1ee2; 0x1ee4, 0x1ee4; 0x1ee6, 0x1ee6; - 0x1ee8, 0x1ee8; 0x1eea, 0x1eea; 0x1eec, 0x1eec; 0x1eee, 0x1eee; 0x1ef0, 0x1ef0; - 0x1ef2, 0x1ef2; 0x1ef4, 0x1ef4; 0x1ef6, 0x1ef6; 0x1ef8, 0x1ef8; 0x1efa, 0x1efa; - 0x1efc, 0x1efc; 0x1efe, 0x1efe; 0x1f08, 0x1f0f; 0x1f18, 0x1f1d; 0x1f28, 0x1f2f; - 0x1f38, 0x1f3f; 0x1f48, 0x1f4d; 0x1f59, 0x1f59; 0x1f5b, 0x1f5b; 0x1f5d, 0x1f5d; - 0x1f5f, 0x1f5f; 0x1f68, 0x1f6f; 0x1fb8, 0x1fbb; 0x1fc8, 0x1fcb; 0x1fd8, 0x1fdb; - 0x1fe8, 0x1fec; 0x1ff8, 0x1ffb; 0x2102, 0x2102; 0x2107, 0x2107; 0x210b, 0x210d; - 0x2110, 0x2112; 0x2115, 0x2115; 0x2119, 0x211d; 0x2124, 0x2124; 0x2126, 0x2126; - 0x2128, 0x2128; 0x212a, 0x212d; 0x2130, 0x2133; 0x213e, 0x213f; 0x2145, 0x2145; - 0x2160, 0x216f; 0x2183, 0x2183; 0x24b6, 0x24cf; 0x2c00, 0x2c2f; 0x2c60, 0x2c60; - 0x2c62, 0x2c64; 0x2c67, 0x2c67; 0x2c69, 0x2c69; 0x2c6b, 0x2c6b; 0x2c6d, 0x2c70; - 0x2c72, 0x2c72; 0x2c75, 0x2c75; 0x2c7e, 0x2c80; 0x2c82, 0x2c82; 0x2c84, 0x2c84; - 0x2c86, 0x2c86; 0x2c88, 0x2c88; 0x2c8a, 0x2c8a; 0x2c8c, 0x2c8c; 0x2c8e, 0x2c8e; - 0x2c90, 0x2c90; 0x2c92, 0x2c92; 0x2c94, 0x2c94; 0x2c96, 0x2c96; 0x2c98, 0x2c98; - 0x2c9a, 0x2c9a; 0x2c9c, 0x2c9c; 0x2c9e, 0x2c9e; 0x2ca0, 0x2ca0; 0x2ca2, 0x2ca2; - 0x2ca4, 0x2ca4; 0x2ca6, 0x2ca6; 0x2ca8, 0x2ca8; 0x2caa, 0x2caa; 0x2cac, 0x2cac; - 0x2cae, 0x2cae; 0x2cb0, 0x2cb0; 0x2cb2, 0x2cb2; 0x2cb4, 0x2cb4; 0x2cb6, 0x2cb6; - 0x2cb8, 0x2cb8; 0x2cba, 0x2cba; 0x2cbc, 0x2cbc; 0x2cbe, 0x2cbe; 0x2cc0, 0x2cc0; - 0x2cc2, 0x2cc2; 0x2cc4, 0x2cc4; 0x2cc6, 0x2cc6; 0x2cc8, 0x2cc8; 0x2cca, 0x2cca; - 0x2ccc, 0x2ccc; 0x2cce, 0x2cce; 0x2cd0, 0x2cd0; 0x2cd2, 0x2cd2; 0x2cd4, 0x2cd4; - 0x2cd6, 0x2cd6; 0x2cd8, 0x2cd8; 0x2cda, 0x2cda; 0x2cdc, 0x2cdc; 0x2cde, 0x2cde; - 0x2ce0, 0x2ce0; 0x2ce2, 0x2ce2; 0x2ceb, 0x2ceb; 0x2ced, 0x2ced; 0x2cf2, 0x2cf2; - 0xa640, 0xa640; 0xa642, 0xa642; 0xa644, 0xa644; 0xa646, 0xa646; 0xa648, 0xa648; - 0xa64a, 0xa64a; 0xa64c, 0xa64c; 0xa64e, 0xa64e; 0xa650, 0xa650; 0xa652, 0xa652; - 0xa654, 0xa654; 0xa656, 0xa656; 0xa658, 0xa658; 0xa65a, 0xa65a; 0xa65c, 0xa65c; - 0xa65e, 0xa65e; 0xa660, 0xa660; 0xa662, 0xa662; 0xa664, 0xa664; 0xa666, 0xa666; - 0xa668, 0xa668; 0xa66a, 0xa66a; 0xa66c, 0xa66c; 0xa680, 0xa680; 0xa682, 0xa682; - 0xa684, 0xa684; 0xa686, 0xa686; 0xa688, 0xa688; 0xa68a, 0xa68a; 0xa68c, 0xa68c; - 0xa68e, 0xa68e; 0xa690, 0xa690; 0xa692, 0xa692; 0xa694, 0xa694; 0xa696, 0xa696; - 0xa698, 0xa698; 0xa69a, 0xa69a; 0xa722, 0xa722; 0xa724, 0xa724; 0xa726, 0xa726; - 0xa728, 0xa728; 0xa72a, 0xa72a; 0xa72c, 0xa72c; 0xa72e, 0xa72e; 0xa732, 0xa732; - 0xa734, 0xa734; 0xa736, 0xa736; 0xa738, 0xa738; 0xa73a, 0xa73a; 0xa73c, 0xa73c; - 0xa73e, 0xa73e; 0xa740, 0xa740; 0xa742, 0xa742; 0xa744, 0xa744; 0xa746, 0xa746; - 0xa748, 0xa748; 0xa74a, 0xa74a; 0xa74c, 0xa74c; 0xa74e, 0xa74e; 0xa750, 0xa750; - 0xa752, 0xa752; 0xa754, 0xa754; 0xa756, 0xa756; 0xa758, 0xa758; 0xa75a, 0xa75a; - 0xa75c, 0xa75c; 0xa75e, 0xa75e; 0xa760, 0xa760; 0xa762, 0xa762; 0xa764, 0xa764; - 0xa766, 0xa766; 0xa768, 0xa768; 0xa76a, 0xa76a; 0xa76c, 0xa76c; 0xa76e, 0xa76e; - 0xa779, 0xa779; 0xa77b, 0xa77b; 0xa77d, 0xa77e; 0xa780, 0xa780; 0xa782, 0xa782; - 0xa784, 0xa784; 0xa786, 0xa786; 0xa78b, 0xa78b; 0xa78d, 0xa78d; 0xa790, 0xa790; - 0xa792, 0xa792; 0xa796, 0xa796; 0xa798, 0xa798; 0xa79a, 0xa79a; 0xa79c, 0xa79c; - 0xa79e, 0xa79e; 0xa7a0, 0xa7a0; 0xa7a2, 0xa7a2; 0xa7a4, 0xa7a4; 0xa7a6, 0xa7a6; - 0xa7a8, 0xa7a8; 0xa7aa, 0xa7ae; 0xa7b0, 0xa7b4; 0xa7b6, 0xa7b6; 0xa7b8, 0xa7b8; - 0xa7ba, 0xa7ba; 0xa7bc, 0xa7bc; 0xa7be, 0xa7be; 0xa7c0, 0xa7c0; 0xa7c2, 0xa7c2; - 0xa7c4, 0xa7c7; 0xa7c9, 0xa7c9; 0xa7d0, 0xa7d0; 0xa7d6, 0xa7d6; 0xa7d8, 0xa7d8; - 0xa7f5, 0xa7f5; 0xff21, 0xff3a; 0x10400, 0x10427; 0x104b0, 0x104d3; 0x10570, 0x1057a; - 0x1057c, 0x1058a; 0x1058c, 0x10592; 0x10594, 0x10595; 0x10c80, 0x10cb2; 0x118a0, 0x118bf; + 0x13a0, 0x13f5; 0x1c89, 0x1c89; 0x1c90, 0x1cba; 0x1cbd, 0x1cbf; 0x1e00, 0x1e00; + 0x1e02, 0x1e02; 0x1e04, 0x1e04; 0x1e06, 0x1e06; 0x1e08, 0x1e08; 0x1e0a, 0x1e0a; + 0x1e0c, 0x1e0c; 0x1e0e, 0x1e0e; 0x1e10, 0x1e10; 0x1e12, 0x1e12; 0x1e14, 0x1e14; + 0x1e16, 0x1e16; 0x1e18, 0x1e18; 0x1e1a, 0x1e1a; 0x1e1c, 0x1e1c; 0x1e1e, 0x1e1e; + 0x1e20, 0x1e20; 0x1e22, 0x1e22; 0x1e24, 0x1e24; 0x1e26, 0x1e26; 0x1e28, 0x1e28; + 0x1e2a, 0x1e2a; 0x1e2c, 0x1e2c; 0x1e2e, 0x1e2e; 0x1e30, 0x1e30; 0x1e32, 0x1e32; + 0x1e34, 0x1e34; 0x1e36, 0x1e36; 0x1e38, 0x1e38; 0x1e3a, 0x1e3a; 0x1e3c, 0x1e3c; + 0x1e3e, 0x1e3e; 0x1e40, 0x1e40; 0x1e42, 0x1e42; 0x1e44, 0x1e44; 0x1e46, 0x1e46; + 0x1e48, 0x1e48; 0x1e4a, 0x1e4a; 0x1e4c, 0x1e4c; 0x1e4e, 0x1e4e; 0x1e50, 0x1e50; + 0x1e52, 0x1e52; 0x1e54, 0x1e54; 0x1e56, 0x1e56; 0x1e58, 0x1e58; 0x1e5a, 0x1e5a; + 0x1e5c, 0x1e5c; 0x1e5e, 0x1e5e; 0x1e60, 0x1e60; 0x1e62, 0x1e62; 0x1e64, 0x1e64; + 0x1e66, 0x1e66; 0x1e68, 0x1e68; 0x1e6a, 0x1e6a; 0x1e6c, 0x1e6c; 0x1e6e, 0x1e6e; + 0x1e70, 0x1e70; 0x1e72, 0x1e72; 0x1e74, 0x1e74; 0x1e76, 0x1e76; 0x1e78, 0x1e78; + 0x1e7a, 0x1e7a; 0x1e7c, 0x1e7c; 0x1e7e, 0x1e7e; 0x1e80, 0x1e80; 0x1e82, 0x1e82; + 0x1e84, 0x1e84; 0x1e86, 0x1e86; 0x1e88, 0x1e88; 0x1e8a, 0x1e8a; 0x1e8c, 0x1e8c; + 0x1e8e, 0x1e8e; 0x1e90, 0x1e90; 0x1e92, 0x1e92; 0x1e94, 0x1e94; 0x1e9e, 0x1e9e; + 0x1ea0, 0x1ea0; 0x1ea2, 0x1ea2; 0x1ea4, 0x1ea4; 0x1ea6, 0x1ea6; 0x1ea8, 0x1ea8; + 0x1eaa, 0x1eaa; 0x1eac, 0x1eac; 0x1eae, 0x1eae; 0x1eb0, 0x1eb0; 0x1eb2, 0x1eb2; + 0x1eb4, 0x1eb4; 0x1eb6, 0x1eb6; 0x1eb8, 0x1eb8; 0x1eba, 0x1eba; 0x1ebc, 0x1ebc; + 0x1ebe, 0x1ebe; 0x1ec0, 0x1ec0; 0x1ec2, 0x1ec2; 0x1ec4, 0x1ec4; 0x1ec6, 0x1ec6; + 0x1ec8, 0x1ec8; 0x1eca, 0x1eca; 0x1ecc, 0x1ecc; 0x1ece, 0x1ece; 0x1ed0, 0x1ed0; + 0x1ed2, 0x1ed2; 0x1ed4, 0x1ed4; 0x1ed6, 0x1ed6; 0x1ed8, 0x1ed8; 0x1eda, 0x1eda; + 0x1edc, 0x1edc; 0x1ede, 0x1ede; 0x1ee0, 0x1ee0; 0x1ee2, 0x1ee2; 0x1ee4, 0x1ee4; + 0x1ee6, 0x1ee6; 0x1ee8, 0x1ee8; 0x1eea, 0x1eea; 0x1eec, 0x1eec; 0x1eee, 0x1eee; + 0x1ef0, 0x1ef0; 0x1ef2, 0x1ef2; 0x1ef4, 0x1ef4; 0x1ef6, 0x1ef6; 0x1ef8, 0x1ef8; + 0x1efa, 0x1efa; 0x1efc, 0x1efc; 0x1efe, 0x1efe; 0x1f08, 0x1f0f; 0x1f18, 0x1f1d; + 0x1f28, 0x1f2f; 0x1f38, 0x1f3f; 0x1f48, 0x1f4d; 0x1f59, 0x1f59; 0x1f5b, 0x1f5b; + 0x1f5d, 0x1f5d; 0x1f5f, 0x1f5f; 0x1f68, 0x1f6f; 0x1fb8, 0x1fbb; 0x1fc8, 0x1fcb; + 0x1fd8, 0x1fdb; 0x1fe8, 0x1fec; 0x1ff8, 0x1ffb; 0x2102, 0x2102; 0x2107, 0x2107; + 0x210b, 0x210d; 0x2110, 0x2112; 0x2115, 0x2115; 0x2119, 0x211d; 0x2124, 0x2124; + 0x2126, 0x2126; 0x2128, 0x2128; 0x212a, 0x212d; 0x2130, 0x2133; 0x213e, 0x213f; + 0x2145, 0x2145; 0x2160, 0x216f; 0x2183, 0x2183; 0x24b6, 0x24cf; 0x2c00, 0x2c2f; + 0x2c60, 0x2c60; 0x2c62, 0x2c64; 0x2c67, 0x2c67; 0x2c69, 0x2c69; 0x2c6b, 0x2c6b; + 0x2c6d, 0x2c70; 0x2c72, 0x2c72; 0x2c75, 0x2c75; 0x2c7e, 0x2c80; 0x2c82, 0x2c82; + 0x2c84, 0x2c84; 0x2c86, 0x2c86; 0x2c88, 0x2c88; 0x2c8a, 0x2c8a; 0x2c8c, 0x2c8c; + 0x2c8e, 0x2c8e; 0x2c90, 0x2c90; 0x2c92, 0x2c92; 0x2c94, 0x2c94; 0x2c96, 0x2c96; + 0x2c98, 0x2c98; 0x2c9a, 0x2c9a; 0x2c9c, 0x2c9c; 0x2c9e, 0x2c9e; 0x2ca0, 0x2ca0; + 0x2ca2, 0x2ca2; 0x2ca4, 0x2ca4; 0x2ca6, 0x2ca6; 0x2ca8, 0x2ca8; 0x2caa, 0x2caa; + 0x2cac, 0x2cac; 0x2cae, 0x2cae; 0x2cb0, 0x2cb0; 0x2cb2, 0x2cb2; 0x2cb4, 0x2cb4; + 0x2cb6, 0x2cb6; 0x2cb8, 0x2cb8; 0x2cba, 0x2cba; 0x2cbc, 0x2cbc; 0x2cbe, 0x2cbe; + 0x2cc0, 0x2cc0; 0x2cc2, 0x2cc2; 0x2cc4, 0x2cc4; 0x2cc6, 0x2cc6; 0x2cc8, 0x2cc8; + 0x2cca, 0x2cca; 0x2ccc, 0x2ccc; 0x2cce, 0x2cce; 0x2cd0, 0x2cd0; 0x2cd2, 0x2cd2; + 0x2cd4, 0x2cd4; 0x2cd6, 0x2cd6; 0x2cd8, 0x2cd8; 0x2cda, 0x2cda; 0x2cdc, 0x2cdc; + 0x2cde, 0x2cde; 0x2ce0, 0x2ce0; 0x2ce2, 0x2ce2; 0x2ceb, 0x2ceb; 0x2ced, 0x2ced; + 0x2cf2, 0x2cf2; 0xa640, 0xa640; 0xa642, 0xa642; 0xa644, 0xa644; 0xa646, 0xa646; + 0xa648, 0xa648; 0xa64a, 0xa64a; 0xa64c, 0xa64c; 0xa64e, 0xa64e; 0xa650, 0xa650; + 0xa652, 0xa652; 0xa654, 0xa654; 0xa656, 0xa656; 0xa658, 0xa658; 0xa65a, 0xa65a; + 0xa65c, 0xa65c; 0xa65e, 0xa65e; 0xa660, 0xa660; 0xa662, 0xa662; 0xa664, 0xa664; + 0xa666, 0xa666; 0xa668, 0xa668; 0xa66a, 0xa66a; 0xa66c, 0xa66c; 0xa680, 0xa680; + 0xa682, 0xa682; 0xa684, 0xa684; 0xa686, 0xa686; 0xa688, 0xa688; 0xa68a, 0xa68a; + 0xa68c, 0xa68c; 0xa68e, 0xa68e; 0xa690, 0xa690; 0xa692, 0xa692; 0xa694, 0xa694; + 0xa696, 0xa696; 0xa698, 0xa698; 0xa69a, 0xa69a; 0xa722, 0xa722; 0xa724, 0xa724; + 0xa726, 0xa726; 0xa728, 0xa728; 0xa72a, 0xa72a; 0xa72c, 0xa72c; 0xa72e, 0xa72e; + 0xa732, 0xa732; 0xa734, 0xa734; 0xa736, 0xa736; 0xa738, 0xa738; 0xa73a, 0xa73a; + 0xa73c, 0xa73c; 0xa73e, 0xa73e; 0xa740, 0xa740; 0xa742, 0xa742; 0xa744, 0xa744; + 0xa746, 0xa746; 0xa748, 0xa748; 0xa74a, 0xa74a; 0xa74c, 0xa74c; 0xa74e, 0xa74e; + 0xa750, 0xa750; 0xa752, 0xa752; 0xa754, 0xa754; 0xa756, 0xa756; 0xa758, 0xa758; + 0xa75a, 0xa75a; 0xa75c, 0xa75c; 0xa75e, 0xa75e; 0xa760, 0xa760; 0xa762, 0xa762; + 0xa764, 0xa764; 0xa766, 0xa766; 0xa768, 0xa768; 0xa76a, 0xa76a; 0xa76c, 0xa76c; + 0xa76e, 0xa76e; 0xa779, 0xa779; 0xa77b, 0xa77b; 0xa77d, 0xa77e; 0xa780, 0xa780; + 0xa782, 0xa782; 0xa784, 0xa784; 0xa786, 0xa786; 0xa78b, 0xa78b; 0xa78d, 0xa78d; + 0xa790, 0xa790; 0xa792, 0xa792; 0xa796, 0xa796; 0xa798, 0xa798; 0xa79a, 0xa79a; + 0xa79c, 0xa79c; 0xa79e, 0xa79e; 0xa7a0, 0xa7a0; 0xa7a2, 0xa7a2; 0xa7a4, 0xa7a4; + 0xa7a6, 0xa7a6; 0xa7a8, 0xa7a8; 0xa7aa, 0xa7ae; 0xa7b0, 0xa7b4; 0xa7b6, 0xa7b6; + 0xa7b8, 0xa7b8; 0xa7ba, 0xa7ba; 0xa7bc, 0xa7bc; 0xa7be, 0xa7be; 0xa7c0, 0xa7c0; + 0xa7c2, 0xa7c2; 0xa7c4, 0xa7c7; 0xa7c9, 0xa7c9; 0xa7cb, 0xa7cc; 0xa7d0, 0xa7d0; + 0xa7d6, 0xa7d6; 0xa7d8, 0xa7d8; 0xa7da, 0xa7da; 0xa7dc, 0xa7dc; 0xa7f5, 0xa7f5; + 0xff21, 0xff3a; 0x10400, 0x10427; 0x104b0, 0x104d3; 0x10570, 0x1057a; 0x1057c, 0x1058a; + 0x1058c, 0x10592; 0x10594, 0x10595; 0x10c80, 0x10cb2; 0x10d50, 0x10d65; 0x118a0, 0x118bf; 0x16e40, 0x16e5f; 0x1d400, 0x1d419; 0x1d434, 0x1d44d; 0x1d468, 0x1d481; 0x1d49c, 0x1d49c; 0x1d49e, 0x1d49f; 0x1d4a2, 0x1d4a2; 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; 0x1d4ae, 0x1d4b5; 0x1d4d0, 0x1d4e9; 0x1d504, 0x1d505; 0x1d507, 0x1d50a; 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; @@ -1773,7 +1809,7 @@ module Properties = struct 0x5ef, 0x5f2; 0x610, 0x61a; 0x620, 0x669; 0x66e, 0x6d3; 0x6d5, 0x6dc; 0x6df, 0x6e8; 0x6ea, 0x6fc; 0x6ff, 0x6ff; 0x710, 0x74a; 0x74d, 0x7b1; 0x7c0, 0x7f5; 0x7fa, 0x7fa; 0x7fd, 0x7fd; 0x800, 0x82d; 0x840, 0x85b; - 0x860, 0x86a; 0x870, 0x887; 0x889, 0x88e; 0x898, 0x8e1; 0x8e3, 0x963; + 0x860, 0x86a; 0x870, 0x887; 0x889, 0x88e; 0x897, 0x8e1; 0x8e3, 0x963; 0x966, 0x96f; 0x971, 0x983; 0x985, 0x98c; 0x98f, 0x990; 0x993, 0x9a8; 0x9aa, 0x9b0; 0x9b2, 0x9b2; 0x9b6, 0x9b9; 0x9bc, 0x9c4; 0x9c7, 0x9c8; 0x9cb, 0x9ce; 0x9d7, 0x9d7; 0x9dc, 0x9dd; 0x9df, 0x9e3; 0x9e6, 0x9f1; @@ -1818,24 +1854,24 @@ module Properties = struct 0x19d0, 0x19da; 0x1a00, 0x1a1b; 0x1a20, 0x1a5e; 0x1a60, 0x1a7c; 0x1a7f, 0x1a89; 0x1a90, 0x1a99; 0x1aa7, 0x1aa7; 0x1ab0, 0x1abd; 0x1abf, 0x1ace; 0x1b00, 0x1b4c; 0x1b50, 0x1b59; 0x1b6b, 0x1b73; 0x1b80, 0x1bf3; 0x1c00, 0x1c37; 0x1c40, 0x1c49; - 0x1c4d, 0x1c7d; 0x1c80, 0x1c88; 0x1c90, 0x1cba; 0x1cbd, 0x1cbf; 0x1cd0, 0x1cd2; + 0x1c4d, 0x1c7d; 0x1c80, 0x1c8a; 0x1c90, 0x1cba; 0x1cbd, 0x1cbf; 0x1cd0, 0x1cd2; 0x1cd4, 0x1cfa; 0x1d00, 0x1f15; 0x1f18, 0x1f1d; 0x1f20, 0x1f45; 0x1f48, 0x1f4d; 0x1f50, 0x1f57; 0x1f59, 0x1f59; 0x1f5b, 0x1f5b; 0x1f5d, 0x1f5d; 0x1f5f, 0x1f7d; 0x1f80, 0x1fb4; 0x1fb6, 0x1fbc; 0x1fbe, 0x1fbe; 0x1fc2, 0x1fc4; 0x1fc6, 0x1fcc; 0x1fd0, 0x1fd3; 0x1fd6, 0x1fdb; 0x1fe0, 0x1fec; 0x1ff2, 0x1ff4; 0x1ff6, 0x1ffc; - 0x203f, 0x2040; 0x2054, 0x2054; 0x2071, 0x2071; 0x207f, 0x207f; 0x2090, 0x209c; - 0x20d0, 0x20dc; 0x20e1, 0x20e1; 0x20e5, 0x20f0; 0x2102, 0x2102; 0x2107, 0x2107; - 0x210a, 0x2113; 0x2115, 0x2115; 0x2118, 0x211d; 0x2124, 0x2124; 0x2126, 0x2126; - 0x2128, 0x2128; 0x212a, 0x2139; 0x213c, 0x213f; 0x2145, 0x2149; 0x214e, 0x214e; - 0x2160, 0x2188; 0x2c00, 0x2ce4; 0x2ceb, 0x2cf3; 0x2d00, 0x2d25; 0x2d27, 0x2d27; - 0x2d2d, 0x2d2d; 0x2d30, 0x2d67; 0x2d6f, 0x2d6f; 0x2d7f, 0x2d96; 0x2da0, 0x2da6; - 0x2da8, 0x2dae; 0x2db0, 0x2db6; 0x2db8, 0x2dbe; 0x2dc0, 0x2dc6; 0x2dc8, 0x2dce; - 0x2dd0, 0x2dd6; 0x2dd8, 0x2dde; 0x2de0, 0x2dff; 0x3005, 0x3007; 0x3021, 0x302f; - 0x3031, 0x3035; 0x3038, 0x303c; 0x3041, 0x3096; 0x3099, 0x309a; 0x309d, 0x309f; - 0x30a1, 0x30fa; 0x30fc, 0x30ff; 0x3105, 0x312f; 0x3131, 0x318e; 0x31a0, 0x31bf; + 0x200c, 0x200d; 0x203f, 0x2040; 0x2054, 0x2054; 0x2071, 0x2071; 0x207f, 0x207f; + 0x2090, 0x209c; 0x20d0, 0x20dc; 0x20e1, 0x20e1; 0x20e5, 0x20f0; 0x2102, 0x2102; + 0x2107, 0x2107; 0x210a, 0x2113; 0x2115, 0x2115; 0x2118, 0x211d; 0x2124, 0x2124; + 0x2126, 0x2126; 0x2128, 0x2128; 0x212a, 0x2139; 0x213c, 0x213f; 0x2145, 0x2149; + 0x214e, 0x214e; 0x2160, 0x2188; 0x2c00, 0x2ce4; 0x2ceb, 0x2cf3; 0x2d00, 0x2d25; + 0x2d27, 0x2d27; 0x2d2d, 0x2d2d; 0x2d30, 0x2d67; 0x2d6f, 0x2d6f; 0x2d7f, 0x2d96; + 0x2da0, 0x2da6; 0x2da8, 0x2dae; 0x2db0, 0x2db6; 0x2db8, 0x2dbe; 0x2dc0, 0x2dc6; + 0x2dc8, 0x2dce; 0x2dd0, 0x2dd6; 0x2dd8, 0x2dde; 0x2de0, 0x2dff; 0x3005, 0x3007; + 0x3021, 0x302f; 0x3031, 0x3035; 0x3038, 0x303c; 0x3041, 0x3096; 0x3099, 0x309a; + 0x309d, 0x309f; 0x30a1, 0x30ff; 0x3105, 0x312f; 0x3131, 0x318e; 0x31a0, 0x31bf; 0x31f0, 0x31ff; 0x3400, 0x4dbf; 0x4e00, 0xa48c; 0xa4d0, 0xa4fd; 0xa500, 0xa60c; 0xa610, 0xa62b; 0xa640, 0xa66f; 0xa674, 0xa67d; 0xa67f, 0xa6f1; 0xa717, 0xa71f; - 0xa722, 0xa788; 0xa78b, 0xa7ca; 0xa7d0, 0xa7d1; 0xa7d3, 0xa7d3; 0xa7d5, 0xa7d9; + 0xa722, 0xa788; 0xa78b, 0xa7cd; 0xa7d0, 0xa7d1; 0xa7d3, 0xa7d3; 0xa7d5, 0xa7dc; 0xa7f2, 0xa827; 0xa82c, 0xa82c; 0xa840, 0xa873; 0xa880, 0xa8c5; 0xa8d0, 0xa8d9; 0xa8e0, 0xa8f7; 0xa8fb, 0xa8fb; 0xa8fd, 0xa92d; 0xa930, 0xa953; 0xa960, 0xa97c; 0xa980, 0xa9c0; 0xa9cf, 0xa9d9; 0xa9e0, 0xa9fe; 0xaa00, 0xaa36; 0xaa40, 0xaa4d; @@ -1849,23 +1885,24 @@ module Properties = struct 0xfe00, 0xfe0f; 0xfe20, 0xfe2f; 0xfe33, 0xfe34; 0xfe4d, 0xfe4f; 0xfe71, 0xfe71; 0xfe73, 0xfe73; 0xfe77, 0xfe77; 0xfe79, 0xfe79; 0xfe7b, 0xfe7b; 0xfe7d, 0xfe7d; 0xfe7f, 0xfefc; 0xff10, 0xff19; 0xff21, 0xff3a; 0xff3f, 0xff3f; 0xff41, 0xff5a; - 0xff66, 0xffbe; 0xffc2, 0xffc7; 0xffca, 0xffcf; 0xffd2, 0xffd7; 0xffda, 0xffdc; + 0xff65, 0xffbe; 0xffc2, 0xffc7; 0xffca, 0xffcf; 0xffd2, 0xffd7; 0xffda, 0xffdc; 0x10000, 0x1000b; 0x1000d, 0x10026; 0x10028, 0x1003a; 0x1003c, 0x1003d; 0x1003f, 0x1004d; 0x10050, 0x1005d; 0x10080, 0x100fa; 0x10140, 0x10174; 0x101fd, 0x101fd; 0x10280, 0x1029c; 0x102a0, 0x102d0; 0x102e0, 0x102e0; 0x10300, 0x1031f; 0x1032d, 0x1034a; 0x10350, 0x1037a; 0x10380, 0x1039d; 0x103a0, 0x103c3; 0x103c8, 0x103cf; 0x103d1, 0x103d5; 0x10400, 0x1049d; 0x104a0, 0x104a9; 0x104b0, 0x104d3; 0x104d8, 0x104fb; 0x10500, 0x10527; 0x10530, 0x10563; 0x10570, 0x1057a; 0x1057c, 0x1058a; 0x1058c, 0x10592; 0x10594, 0x10595; 0x10597, 0x105a1; - 0x105a3, 0x105b1; 0x105b3, 0x105b9; 0x105bb, 0x105bc; 0x10600, 0x10736; 0x10740, 0x10755; - 0x10760, 0x10767; 0x10780, 0x10785; 0x10787, 0x107b0; 0x107b2, 0x107ba; 0x10800, 0x10805; - 0x10808, 0x10808; 0x1080a, 0x10835; 0x10837, 0x10838; 0x1083c, 0x1083c; 0x1083f, 0x10855; - 0x10860, 0x10876; 0x10880, 0x1089e; 0x108e0, 0x108f2; 0x108f4, 0x108f5; 0x10900, 0x10915; - 0x10920, 0x10939; 0x10980, 0x109b7; 0x109be, 0x109bf; 0x10a00, 0x10a03; 0x10a05, 0x10a06; - 0x10a0c, 0x10a13; 0x10a15, 0x10a17; 0x10a19, 0x10a35; 0x10a38, 0x10a3a; 0x10a3f, 0x10a3f; - 0x10a60, 0x10a7c; 0x10a80, 0x10a9c; 0x10ac0, 0x10ac7; 0x10ac9, 0x10ae6; 0x10b00, 0x10b35; - 0x10b40, 0x10b55; 0x10b60, 0x10b72; 0x10b80, 0x10b91; 0x10c00, 0x10c48; 0x10c80, 0x10cb2; - 0x10cc0, 0x10cf2; 0x10d00, 0x10d27; 0x10d30, 0x10d39; 0x10e80, 0x10ea9; 0x10eab, 0x10eac; - 0x10eb0, 0x10eb1; 0x10efd, 0x10f1c; 0x10f27, 0x10f27; 0x10f30, 0x10f50; 0x10f70, 0x10f85; + 0x105a3, 0x105b1; 0x105b3, 0x105b9; 0x105bb, 0x105bc; 0x105c0, 0x105f3; 0x10600, 0x10736; + 0x10740, 0x10755; 0x10760, 0x10767; 0x10780, 0x10785; 0x10787, 0x107b0; 0x107b2, 0x107ba; + 0x10800, 0x10805; 0x10808, 0x10808; 0x1080a, 0x10835; 0x10837, 0x10838; 0x1083c, 0x1083c; + 0x1083f, 0x10855; 0x10860, 0x10876; 0x10880, 0x1089e; 0x108e0, 0x108f2; 0x108f4, 0x108f5; + 0x10900, 0x10915; 0x10920, 0x10939; 0x10980, 0x109b7; 0x109be, 0x109bf; 0x10a00, 0x10a03; + 0x10a05, 0x10a06; 0x10a0c, 0x10a13; 0x10a15, 0x10a17; 0x10a19, 0x10a35; 0x10a38, 0x10a3a; + 0x10a3f, 0x10a3f; 0x10a60, 0x10a7c; 0x10a80, 0x10a9c; 0x10ac0, 0x10ac7; 0x10ac9, 0x10ae6; + 0x10b00, 0x10b35; 0x10b40, 0x10b55; 0x10b60, 0x10b72; 0x10b80, 0x10b91; 0x10c00, 0x10c48; + 0x10c80, 0x10cb2; 0x10cc0, 0x10cf2; 0x10d00, 0x10d27; 0x10d30, 0x10d39; 0x10d40, 0x10d65; + 0x10d69, 0x10d6d; 0x10d6f, 0x10d85; 0x10e80, 0x10ea9; 0x10eab, 0x10eac; 0x10eb0, 0x10eb1; + 0x10ec2, 0x10ec4; 0x10efc, 0x10f1c; 0x10f27, 0x10f27; 0x10f30, 0x10f50; 0x10f70, 0x10f85; 0x10fb0, 0x10fc4; 0x10fe0, 0x10ff6; 0x11000, 0x11046; 0x11066, 0x11075; 0x1107f, 0x110ba; 0x110c2, 0x110c2; 0x110d0, 0x110e8; 0x110f0, 0x110f9; 0x11100, 0x11134; 0x11136, 0x1113f; 0x11144, 0x11147; 0x11150, 0x11173; 0x11176, 0x11176; 0x11180, 0x111c4; 0x111c9, 0x111cc; @@ -1874,50 +1911,54 @@ module Properties = struct 0x112b0, 0x112ea; 0x112f0, 0x112f9; 0x11300, 0x11303; 0x11305, 0x1130c; 0x1130f, 0x11310; 0x11313, 0x11328; 0x1132a, 0x11330; 0x11332, 0x11333; 0x11335, 0x11339; 0x1133b, 0x11344; 0x11347, 0x11348; 0x1134b, 0x1134d; 0x11350, 0x11350; 0x11357, 0x11357; 0x1135d, 0x11363; - 0x11366, 0x1136c; 0x11370, 0x11374; 0x11400, 0x1144a; 0x11450, 0x11459; 0x1145e, 0x11461; + 0x11366, 0x1136c; 0x11370, 0x11374; 0x11380, 0x11389; 0x1138b, 0x1138b; 0x1138e, 0x1138e; + 0x11390, 0x113b5; 0x113b7, 0x113c0; 0x113c2, 0x113c2; 0x113c5, 0x113c5; 0x113c7, 0x113ca; + 0x113cc, 0x113d3; 0x113e1, 0x113e2; 0x11400, 0x1144a; 0x11450, 0x11459; 0x1145e, 0x11461; 0x11480, 0x114c5; 0x114c7, 0x114c7; 0x114d0, 0x114d9; 0x11580, 0x115b5; 0x115b8, 0x115c0; 0x115d8, 0x115dd; 0x11600, 0x11640; 0x11644, 0x11644; 0x11650, 0x11659; 0x11680, 0x116b8; - 0x116c0, 0x116c9; 0x11700, 0x1171a; 0x1171d, 0x1172b; 0x11730, 0x11739; 0x11740, 0x11746; - 0x11800, 0x1183a; 0x118a0, 0x118e9; 0x118ff, 0x11906; 0x11909, 0x11909; 0x1190c, 0x11913; - 0x11915, 0x11916; 0x11918, 0x11935; 0x11937, 0x11938; 0x1193b, 0x11943; 0x11950, 0x11959; - 0x119a0, 0x119a7; 0x119aa, 0x119d7; 0x119da, 0x119e1; 0x119e3, 0x119e4; 0x11a00, 0x11a3e; - 0x11a47, 0x11a47; 0x11a50, 0x11a99; 0x11a9d, 0x11a9d; 0x11ab0, 0x11af8; 0x11c00, 0x11c08; - 0x11c0a, 0x11c36; 0x11c38, 0x11c40; 0x11c50, 0x11c59; 0x11c72, 0x11c8f; 0x11c92, 0x11ca7; - 0x11ca9, 0x11cb6; 0x11d00, 0x11d06; 0x11d08, 0x11d09; 0x11d0b, 0x11d36; 0x11d3a, 0x11d3a; - 0x11d3c, 0x11d3d; 0x11d3f, 0x11d47; 0x11d50, 0x11d59; 0x11d60, 0x11d65; 0x11d67, 0x11d68; - 0x11d6a, 0x11d8e; 0x11d90, 0x11d91; 0x11d93, 0x11d98; 0x11da0, 0x11da9; 0x11ee0, 0x11ef6; - 0x11f00, 0x11f10; 0x11f12, 0x11f3a; 0x11f3e, 0x11f42; 0x11f50, 0x11f59; 0x11fb0, 0x11fb0; - 0x12000, 0x12399; 0x12400, 0x1246e; 0x12480, 0x12543; 0x12f90, 0x12ff0; 0x13000, 0x1342f; - 0x13440, 0x13455; 0x14400, 0x14646; 0x16800, 0x16a38; 0x16a40, 0x16a5e; 0x16a60, 0x16a69; + 0x116c0, 0x116c9; 0x116d0, 0x116e3; 0x11700, 0x1171a; 0x1171d, 0x1172b; 0x11730, 0x11739; + 0x11740, 0x11746; 0x11800, 0x1183a; 0x118a0, 0x118e9; 0x118ff, 0x11906; 0x11909, 0x11909; + 0x1190c, 0x11913; 0x11915, 0x11916; 0x11918, 0x11935; 0x11937, 0x11938; 0x1193b, 0x11943; + 0x11950, 0x11959; 0x119a0, 0x119a7; 0x119aa, 0x119d7; 0x119da, 0x119e1; 0x119e3, 0x119e4; + 0x11a00, 0x11a3e; 0x11a47, 0x11a47; 0x11a50, 0x11a99; 0x11a9d, 0x11a9d; 0x11ab0, 0x11af8; + 0x11bc0, 0x11be0; 0x11bf0, 0x11bf9; 0x11c00, 0x11c08; 0x11c0a, 0x11c36; 0x11c38, 0x11c40; + 0x11c50, 0x11c59; 0x11c72, 0x11c8f; 0x11c92, 0x11ca7; 0x11ca9, 0x11cb6; 0x11d00, 0x11d06; + 0x11d08, 0x11d09; 0x11d0b, 0x11d36; 0x11d3a, 0x11d3a; 0x11d3c, 0x11d3d; 0x11d3f, 0x11d47; + 0x11d50, 0x11d59; 0x11d60, 0x11d65; 0x11d67, 0x11d68; 0x11d6a, 0x11d8e; 0x11d90, 0x11d91; + 0x11d93, 0x11d98; 0x11da0, 0x11da9; 0x11ee0, 0x11ef6; 0x11f00, 0x11f10; 0x11f12, 0x11f3a; + 0x11f3e, 0x11f42; 0x11f50, 0x11f5a; 0x11fb0, 0x11fb0; 0x12000, 0x12399; 0x12400, 0x1246e; + 0x12480, 0x12543; 0x12f90, 0x12ff0; 0x13000, 0x1342f; 0x13440, 0x13455; 0x13460, 0x143fa; + 0x14400, 0x14646; 0x16100, 0x16139; 0x16800, 0x16a38; 0x16a40, 0x16a5e; 0x16a60, 0x16a69; 0x16a70, 0x16abe; 0x16ac0, 0x16ac9; 0x16ad0, 0x16aed; 0x16af0, 0x16af4; 0x16b00, 0x16b36; - 0x16b40, 0x16b43; 0x16b50, 0x16b59; 0x16b63, 0x16b77; 0x16b7d, 0x16b8f; 0x16e40, 0x16e7f; - 0x16f00, 0x16f4a; 0x16f4f, 0x16f87; 0x16f8f, 0x16f9f; 0x16fe0, 0x16fe1; 0x16fe3, 0x16fe4; - 0x16ff0, 0x16ff1; 0x17000, 0x187f7; 0x18800, 0x18cd5; 0x18d00, 0x18d08; 0x1aff0, 0x1aff3; - 0x1aff5, 0x1affb; 0x1affd, 0x1affe; 0x1b000, 0x1b122; 0x1b132, 0x1b132; 0x1b150, 0x1b152; - 0x1b155, 0x1b155; 0x1b164, 0x1b167; 0x1b170, 0x1b2fb; 0x1bc00, 0x1bc6a; 0x1bc70, 0x1bc7c; - 0x1bc80, 0x1bc88; 0x1bc90, 0x1bc99; 0x1bc9d, 0x1bc9e; 0x1cf00, 0x1cf2d; 0x1cf30, 0x1cf46; - 0x1d165, 0x1d169; 0x1d16d, 0x1d172; 0x1d17b, 0x1d182; 0x1d185, 0x1d18b; 0x1d1aa, 0x1d1ad; - 0x1d242, 0x1d244; 0x1d400, 0x1d454; 0x1d456, 0x1d49c; 0x1d49e, 0x1d49f; 0x1d4a2, 0x1d4a2; - 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; 0x1d4ae, 0x1d4b9; 0x1d4bb, 0x1d4bb; 0x1d4bd, 0x1d4c3; - 0x1d4c5, 0x1d505; 0x1d507, 0x1d50a; 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; 0x1d51e, 0x1d539; - 0x1d53b, 0x1d53e; 0x1d540, 0x1d544; 0x1d546, 0x1d546; 0x1d54a, 0x1d550; 0x1d552, 0x1d6a5; - 0x1d6a8, 0x1d6c0; 0x1d6c2, 0x1d6da; 0x1d6dc, 0x1d6fa; 0x1d6fc, 0x1d714; 0x1d716, 0x1d734; - 0x1d736, 0x1d74e; 0x1d750, 0x1d76e; 0x1d770, 0x1d788; 0x1d78a, 0x1d7a8; 0x1d7aa, 0x1d7c2; - 0x1d7c4, 0x1d7cb; 0x1d7ce, 0x1d7ff; 0x1da00, 0x1da36; 0x1da3b, 0x1da6c; 0x1da75, 0x1da75; - 0x1da84, 0x1da84; 0x1da9b, 0x1da9f; 0x1daa1, 0x1daaf; 0x1df00, 0x1df1e; 0x1df25, 0x1df2a; - 0x1e000, 0x1e006; 0x1e008, 0x1e018; 0x1e01b, 0x1e021; 0x1e023, 0x1e024; 0x1e026, 0x1e02a; - 0x1e030, 0x1e06d; 0x1e08f, 0x1e08f; 0x1e100, 0x1e12c; 0x1e130, 0x1e13d; 0x1e140, 0x1e149; - 0x1e14e, 0x1e14e; 0x1e290, 0x1e2ae; 0x1e2c0, 0x1e2f9; 0x1e4d0, 0x1e4f9; 0x1e7e0, 0x1e7e6; - 0x1e7e8, 0x1e7eb; 0x1e7ed, 0x1e7ee; 0x1e7f0, 0x1e7fe; 0x1e800, 0x1e8c4; 0x1e8d0, 0x1e8d6; - 0x1e900, 0x1e94b; 0x1e950, 0x1e959; 0x1ee00, 0x1ee03; 0x1ee05, 0x1ee1f; 0x1ee21, 0x1ee22; - 0x1ee24, 0x1ee24; 0x1ee27, 0x1ee27; 0x1ee29, 0x1ee32; 0x1ee34, 0x1ee37; 0x1ee39, 0x1ee39; - 0x1ee3b, 0x1ee3b; 0x1ee42, 0x1ee42; 0x1ee47, 0x1ee47; 0x1ee49, 0x1ee49; 0x1ee4b, 0x1ee4b; - 0x1ee4d, 0x1ee4f; 0x1ee51, 0x1ee52; 0x1ee54, 0x1ee54; 0x1ee57, 0x1ee57; 0x1ee59, 0x1ee59; - 0x1ee5b, 0x1ee5b; 0x1ee5d, 0x1ee5d; 0x1ee5f, 0x1ee5f; 0x1ee61, 0x1ee62; 0x1ee64, 0x1ee64; - 0x1ee67, 0x1ee6a; 0x1ee6c, 0x1ee72; 0x1ee74, 0x1ee77; 0x1ee79, 0x1ee7c; 0x1ee7e, 0x1ee7e; - 0x1ee80, 0x1ee89; 0x1ee8b, 0x1ee9b; 0x1eea1, 0x1eea3; 0x1eea5, 0x1eea9; 0x1eeab, 0x1eebb; - 0x1fbf0, 0x1fbf9; 0x20000, 0x2a6df; 0x2a700, 0x2b739; 0x2b740, 0x2b81d; 0x2b820, 0x2cea1; - 0x2ceb0, 0x2ebe0; 0x2f800, 0x2fa1d; 0x30000, 0x3134a; 0x31350, 0x323af; 0xe0100, 0xe01ef] + 0x16b40, 0x16b43; 0x16b50, 0x16b59; 0x16b63, 0x16b77; 0x16b7d, 0x16b8f; 0x16d40, 0x16d6c; + 0x16d70, 0x16d79; 0x16e40, 0x16e7f; 0x16f00, 0x16f4a; 0x16f4f, 0x16f87; 0x16f8f, 0x16f9f; + 0x16fe0, 0x16fe1; 0x16fe3, 0x16fe4; 0x16ff0, 0x16ff1; 0x17000, 0x187f7; 0x18800, 0x18cd5; + 0x18cff, 0x18d08; 0x1aff0, 0x1aff3; 0x1aff5, 0x1affb; 0x1affd, 0x1affe; 0x1b000, 0x1b122; + 0x1b132, 0x1b132; 0x1b150, 0x1b152; 0x1b155, 0x1b155; 0x1b164, 0x1b167; 0x1b170, 0x1b2fb; + 0x1bc00, 0x1bc6a; 0x1bc70, 0x1bc7c; 0x1bc80, 0x1bc88; 0x1bc90, 0x1bc99; 0x1bc9d, 0x1bc9e; + 0x1ccf0, 0x1ccf9; 0x1cf00, 0x1cf2d; 0x1cf30, 0x1cf46; 0x1d165, 0x1d169; 0x1d16d, 0x1d172; + 0x1d17b, 0x1d182; 0x1d185, 0x1d18b; 0x1d1aa, 0x1d1ad; 0x1d242, 0x1d244; 0x1d400, 0x1d454; + 0x1d456, 0x1d49c; 0x1d49e, 0x1d49f; 0x1d4a2, 0x1d4a2; 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; + 0x1d4ae, 0x1d4b9; 0x1d4bb, 0x1d4bb; 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d505; 0x1d507, 0x1d50a; + 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; 0x1d51e, 0x1d539; 0x1d53b, 0x1d53e; 0x1d540, 0x1d544; + 0x1d546, 0x1d546; 0x1d54a, 0x1d550; 0x1d552, 0x1d6a5; 0x1d6a8, 0x1d6c0; 0x1d6c2, 0x1d6da; + 0x1d6dc, 0x1d6fa; 0x1d6fc, 0x1d714; 0x1d716, 0x1d734; 0x1d736, 0x1d74e; 0x1d750, 0x1d76e; + 0x1d770, 0x1d788; 0x1d78a, 0x1d7a8; 0x1d7aa, 0x1d7c2; 0x1d7c4, 0x1d7cb; 0x1d7ce, 0x1d7ff; + 0x1da00, 0x1da36; 0x1da3b, 0x1da6c; 0x1da75, 0x1da75; 0x1da84, 0x1da84; 0x1da9b, 0x1da9f; + 0x1daa1, 0x1daaf; 0x1df00, 0x1df1e; 0x1df25, 0x1df2a; 0x1e000, 0x1e006; 0x1e008, 0x1e018; + 0x1e01b, 0x1e021; 0x1e023, 0x1e024; 0x1e026, 0x1e02a; 0x1e030, 0x1e06d; 0x1e08f, 0x1e08f; + 0x1e100, 0x1e12c; 0x1e130, 0x1e13d; 0x1e140, 0x1e149; 0x1e14e, 0x1e14e; 0x1e290, 0x1e2ae; + 0x1e2c0, 0x1e2f9; 0x1e4d0, 0x1e4f9; 0x1e5d0, 0x1e5fa; 0x1e7e0, 0x1e7e6; 0x1e7e8, 0x1e7eb; + 0x1e7ed, 0x1e7ee; 0x1e7f0, 0x1e7fe; 0x1e800, 0x1e8c4; 0x1e8d0, 0x1e8d6; 0x1e900, 0x1e94b; + 0x1e950, 0x1e959; 0x1ee00, 0x1ee03; 0x1ee05, 0x1ee1f; 0x1ee21, 0x1ee22; 0x1ee24, 0x1ee24; + 0x1ee27, 0x1ee27; 0x1ee29, 0x1ee32; 0x1ee34, 0x1ee37; 0x1ee39, 0x1ee39; 0x1ee3b, 0x1ee3b; + 0x1ee42, 0x1ee42; 0x1ee47, 0x1ee47; 0x1ee49, 0x1ee49; 0x1ee4b, 0x1ee4b; 0x1ee4d, 0x1ee4f; + 0x1ee51, 0x1ee52; 0x1ee54, 0x1ee54; 0x1ee57, 0x1ee57; 0x1ee59, 0x1ee59; 0x1ee5b, 0x1ee5b; + 0x1ee5d, 0x1ee5d; 0x1ee5f, 0x1ee5f; 0x1ee61, 0x1ee62; 0x1ee64, 0x1ee64; 0x1ee67, 0x1ee6a; + 0x1ee6c, 0x1ee72; 0x1ee74, 0x1ee77; 0x1ee79, 0x1ee7c; 0x1ee7e, 0x1ee7e; 0x1ee80, 0x1ee89; + 0x1ee8b, 0x1ee9b; 0x1eea1, 0x1eea3; 0x1eea5, 0x1eea9; 0x1eeab, 0x1eebb; 0x1fbf0, 0x1fbf9; + 0x20000, 0x2a6df; 0x2a700, 0x2b739; 0x2b740, 0x2b81d; 0x2b820, 0x2cea1; 0x2ceb0, 0x2ebe0; + 0x2ebf0, 0x2ee5d; 0x2f800, 0x2fa1d; 0x30000, 0x3134a; 0x31350, 0x323af; 0xe0100, 0xe01ef] let xid_start = Sedlex_utils.Cset.of_list [0x41, 0x5a; 0x61, 0x7a; 0xaa, 0xaa; 0xb5, 0xb5; 0xba, 0xba; @@ -1964,7 +2005,7 @@ module Properties = struct 0x1880, 0x18a8; 0x18aa, 0x18aa; 0x18b0, 0x18f5; 0x1900, 0x191e; 0x1950, 0x196d; 0x1970, 0x1974; 0x1980, 0x19ab; 0x19b0, 0x19c9; 0x1a00, 0x1a16; 0x1a20, 0x1a54; 0x1aa7, 0x1aa7; 0x1b05, 0x1b33; 0x1b45, 0x1b4c; 0x1b83, 0x1ba0; 0x1bae, 0x1baf; - 0x1bba, 0x1be5; 0x1c00, 0x1c23; 0x1c4d, 0x1c4f; 0x1c5a, 0x1c7d; 0x1c80, 0x1c88; + 0x1bba, 0x1be5; 0x1c00, 0x1c23; 0x1c4d, 0x1c4f; 0x1c5a, 0x1c7d; 0x1c80, 0x1c8a; 0x1c90, 0x1cba; 0x1cbd, 0x1cbf; 0x1ce9, 0x1cec; 0x1cee, 0x1cf3; 0x1cf5, 0x1cf6; 0x1cfa, 0x1cfa; 0x1d00, 0x1dbf; 0x1e00, 0x1f15; 0x1f18, 0x1f1d; 0x1f20, 0x1f45; 0x1f48, 0x1f4d; 0x1f50, 0x1f57; 0x1f59, 0x1f59; 0x1f5b, 0x1f5b; 0x1f5d, 0x1f5d; @@ -1981,8 +2022,8 @@ module Properties = struct 0x30a1, 0x30fa; 0x30fc, 0x30ff; 0x3105, 0x312f; 0x3131, 0x318e; 0x31a0, 0x31bf; 0x31f0, 0x31ff; 0x3400, 0x4dbf; 0x4e00, 0xa48c; 0xa4d0, 0xa4fd; 0xa500, 0xa60c; 0xa610, 0xa61f; 0xa62a, 0xa62b; 0xa640, 0xa66e; 0xa67f, 0xa69d; 0xa6a0, 0xa6ef; - 0xa717, 0xa71f; 0xa722, 0xa788; 0xa78b, 0xa7ca; 0xa7d0, 0xa7d1; 0xa7d3, 0xa7d3; - 0xa7d5, 0xa7d9; 0xa7f2, 0xa801; 0xa803, 0xa805; 0xa807, 0xa80a; 0xa80c, 0xa822; + 0xa717, 0xa71f; 0xa722, 0xa788; 0xa78b, 0xa7cd; 0xa7d0, 0xa7d1; 0xa7d3, 0xa7d3; + 0xa7d5, 0xa7dc; 0xa7f2, 0xa801; 0xa803, 0xa805; 0xa807, 0xa80a; 0xa80c, 0xa822; 0xa840, 0xa873; 0xa882, 0xa8b3; 0xa8f2, 0xa8f7; 0xa8fb, 0xa8fb; 0xa8fd, 0xa8fe; 0xa90a, 0xa925; 0xa930, 0xa946; 0xa960, 0xa97c; 0xa984, 0xa9b2; 0xa9cf, 0xa9cf; 0xa9e0, 0xa9e4; 0xa9e6, 0xa9ef; 0xa9fa, 0xa9fe; 0xaa00, 0xaa28; 0xaa40, 0xaa42; @@ -2003,37 +2044,40 @@ module Properties = struct 0x103c8, 0x103cf; 0x103d1, 0x103d5; 0x10400, 0x1049d; 0x104b0, 0x104d3; 0x104d8, 0x104fb; 0x10500, 0x10527; 0x10530, 0x10563; 0x10570, 0x1057a; 0x1057c, 0x1058a; 0x1058c, 0x10592; 0x10594, 0x10595; 0x10597, 0x105a1; 0x105a3, 0x105b1; 0x105b3, 0x105b9; 0x105bb, 0x105bc; - 0x10600, 0x10736; 0x10740, 0x10755; 0x10760, 0x10767; 0x10780, 0x10785; 0x10787, 0x107b0; - 0x107b2, 0x107ba; 0x10800, 0x10805; 0x10808, 0x10808; 0x1080a, 0x10835; 0x10837, 0x10838; - 0x1083c, 0x1083c; 0x1083f, 0x10855; 0x10860, 0x10876; 0x10880, 0x1089e; 0x108e0, 0x108f2; - 0x108f4, 0x108f5; 0x10900, 0x10915; 0x10920, 0x10939; 0x10980, 0x109b7; 0x109be, 0x109bf; - 0x10a00, 0x10a00; 0x10a10, 0x10a13; 0x10a15, 0x10a17; 0x10a19, 0x10a35; 0x10a60, 0x10a7c; - 0x10a80, 0x10a9c; 0x10ac0, 0x10ac7; 0x10ac9, 0x10ae4; 0x10b00, 0x10b35; 0x10b40, 0x10b55; - 0x10b60, 0x10b72; 0x10b80, 0x10b91; 0x10c00, 0x10c48; 0x10c80, 0x10cb2; 0x10cc0, 0x10cf2; - 0x10d00, 0x10d23; 0x10e80, 0x10ea9; 0x10eb0, 0x10eb1; 0x10f00, 0x10f1c; 0x10f27, 0x10f27; - 0x10f30, 0x10f45; 0x10f70, 0x10f81; 0x10fb0, 0x10fc4; 0x10fe0, 0x10ff6; 0x11003, 0x11037; - 0x11071, 0x11072; 0x11075, 0x11075; 0x11083, 0x110af; 0x110d0, 0x110e8; 0x11103, 0x11126; - 0x11144, 0x11144; 0x11147, 0x11147; 0x11150, 0x11172; 0x11176, 0x11176; 0x11183, 0x111b2; - 0x111c1, 0x111c4; 0x111da, 0x111da; 0x111dc, 0x111dc; 0x11200, 0x11211; 0x11213, 0x1122b; - 0x1123f, 0x11240; 0x11280, 0x11286; 0x11288, 0x11288; 0x1128a, 0x1128d; 0x1128f, 0x1129d; - 0x1129f, 0x112a8; 0x112b0, 0x112de; 0x11305, 0x1130c; 0x1130f, 0x11310; 0x11313, 0x11328; - 0x1132a, 0x11330; 0x11332, 0x11333; 0x11335, 0x11339; 0x1133d, 0x1133d; 0x11350, 0x11350; - 0x1135d, 0x11361; 0x11400, 0x11434; 0x11447, 0x1144a; 0x1145f, 0x11461; 0x11480, 0x114af; - 0x114c4, 0x114c5; 0x114c7, 0x114c7; 0x11580, 0x115ae; 0x115d8, 0x115db; 0x11600, 0x1162f; - 0x11644, 0x11644; 0x11680, 0x116aa; 0x116b8, 0x116b8; 0x11700, 0x1171a; 0x11740, 0x11746; - 0x11800, 0x1182b; 0x118a0, 0x118df; 0x118ff, 0x11906; 0x11909, 0x11909; 0x1190c, 0x11913; - 0x11915, 0x11916; 0x11918, 0x1192f; 0x1193f, 0x1193f; 0x11941, 0x11941; 0x119a0, 0x119a7; - 0x119aa, 0x119d0; 0x119e1, 0x119e1; 0x119e3, 0x119e3; 0x11a00, 0x11a00; 0x11a0b, 0x11a32; - 0x11a3a, 0x11a3a; 0x11a50, 0x11a50; 0x11a5c, 0x11a89; 0x11a9d, 0x11a9d; 0x11ab0, 0x11af8; - 0x11c00, 0x11c08; 0x11c0a, 0x11c2e; 0x11c40, 0x11c40; 0x11c72, 0x11c8f; 0x11d00, 0x11d06; - 0x11d08, 0x11d09; 0x11d0b, 0x11d30; 0x11d46, 0x11d46; 0x11d60, 0x11d65; 0x11d67, 0x11d68; - 0x11d6a, 0x11d89; 0x11d98, 0x11d98; 0x11ee0, 0x11ef2; 0x11f02, 0x11f02; 0x11f04, 0x11f10; - 0x11f12, 0x11f33; 0x11fb0, 0x11fb0; 0x12000, 0x12399; 0x12400, 0x1246e; 0x12480, 0x12543; - 0x12f90, 0x12ff0; 0x13000, 0x1342f; 0x13441, 0x13446; 0x14400, 0x14646; 0x16800, 0x16a38; - 0x16a40, 0x16a5e; 0x16a70, 0x16abe; 0x16ad0, 0x16aed; 0x16b00, 0x16b2f; 0x16b40, 0x16b43; - 0x16b63, 0x16b77; 0x16b7d, 0x16b8f; 0x16e40, 0x16e7f; 0x16f00, 0x16f4a; 0x16f50, 0x16f50; + 0x105c0, 0x105f3; 0x10600, 0x10736; 0x10740, 0x10755; 0x10760, 0x10767; 0x10780, 0x10785; + 0x10787, 0x107b0; 0x107b2, 0x107ba; 0x10800, 0x10805; 0x10808, 0x10808; 0x1080a, 0x10835; + 0x10837, 0x10838; 0x1083c, 0x1083c; 0x1083f, 0x10855; 0x10860, 0x10876; 0x10880, 0x1089e; + 0x108e0, 0x108f2; 0x108f4, 0x108f5; 0x10900, 0x10915; 0x10920, 0x10939; 0x10980, 0x109b7; + 0x109be, 0x109bf; 0x10a00, 0x10a00; 0x10a10, 0x10a13; 0x10a15, 0x10a17; 0x10a19, 0x10a35; + 0x10a60, 0x10a7c; 0x10a80, 0x10a9c; 0x10ac0, 0x10ac7; 0x10ac9, 0x10ae4; 0x10b00, 0x10b35; + 0x10b40, 0x10b55; 0x10b60, 0x10b72; 0x10b80, 0x10b91; 0x10c00, 0x10c48; 0x10c80, 0x10cb2; + 0x10cc0, 0x10cf2; 0x10d00, 0x10d23; 0x10d4a, 0x10d65; 0x10d6f, 0x10d85; 0x10e80, 0x10ea9; + 0x10eb0, 0x10eb1; 0x10ec2, 0x10ec4; 0x10f00, 0x10f1c; 0x10f27, 0x10f27; 0x10f30, 0x10f45; + 0x10f70, 0x10f81; 0x10fb0, 0x10fc4; 0x10fe0, 0x10ff6; 0x11003, 0x11037; 0x11071, 0x11072; + 0x11075, 0x11075; 0x11083, 0x110af; 0x110d0, 0x110e8; 0x11103, 0x11126; 0x11144, 0x11144; + 0x11147, 0x11147; 0x11150, 0x11172; 0x11176, 0x11176; 0x11183, 0x111b2; 0x111c1, 0x111c4; + 0x111da, 0x111da; 0x111dc, 0x111dc; 0x11200, 0x11211; 0x11213, 0x1122b; 0x1123f, 0x11240; + 0x11280, 0x11286; 0x11288, 0x11288; 0x1128a, 0x1128d; 0x1128f, 0x1129d; 0x1129f, 0x112a8; + 0x112b0, 0x112de; 0x11305, 0x1130c; 0x1130f, 0x11310; 0x11313, 0x11328; 0x1132a, 0x11330; + 0x11332, 0x11333; 0x11335, 0x11339; 0x1133d, 0x1133d; 0x11350, 0x11350; 0x1135d, 0x11361; + 0x11380, 0x11389; 0x1138b, 0x1138b; 0x1138e, 0x1138e; 0x11390, 0x113b5; 0x113b7, 0x113b7; + 0x113d1, 0x113d1; 0x113d3, 0x113d3; 0x11400, 0x11434; 0x11447, 0x1144a; 0x1145f, 0x11461; + 0x11480, 0x114af; 0x114c4, 0x114c5; 0x114c7, 0x114c7; 0x11580, 0x115ae; 0x115d8, 0x115db; + 0x11600, 0x1162f; 0x11644, 0x11644; 0x11680, 0x116aa; 0x116b8, 0x116b8; 0x11700, 0x1171a; + 0x11740, 0x11746; 0x11800, 0x1182b; 0x118a0, 0x118df; 0x118ff, 0x11906; 0x11909, 0x11909; + 0x1190c, 0x11913; 0x11915, 0x11916; 0x11918, 0x1192f; 0x1193f, 0x1193f; 0x11941, 0x11941; + 0x119a0, 0x119a7; 0x119aa, 0x119d0; 0x119e1, 0x119e1; 0x119e3, 0x119e3; 0x11a00, 0x11a00; + 0x11a0b, 0x11a32; 0x11a3a, 0x11a3a; 0x11a50, 0x11a50; 0x11a5c, 0x11a89; 0x11a9d, 0x11a9d; + 0x11ab0, 0x11af8; 0x11bc0, 0x11be0; 0x11c00, 0x11c08; 0x11c0a, 0x11c2e; 0x11c40, 0x11c40; + 0x11c72, 0x11c8f; 0x11d00, 0x11d06; 0x11d08, 0x11d09; 0x11d0b, 0x11d30; 0x11d46, 0x11d46; + 0x11d60, 0x11d65; 0x11d67, 0x11d68; 0x11d6a, 0x11d89; 0x11d98, 0x11d98; 0x11ee0, 0x11ef2; + 0x11f02, 0x11f02; 0x11f04, 0x11f10; 0x11f12, 0x11f33; 0x11fb0, 0x11fb0; 0x12000, 0x12399; + 0x12400, 0x1246e; 0x12480, 0x12543; 0x12f90, 0x12ff0; 0x13000, 0x1342f; 0x13441, 0x13446; + 0x13460, 0x143fa; 0x14400, 0x14646; 0x16100, 0x1611d; 0x16800, 0x16a38; 0x16a40, 0x16a5e; + 0x16a70, 0x16abe; 0x16ad0, 0x16aed; 0x16b00, 0x16b2f; 0x16b40, 0x16b43; 0x16b63, 0x16b77; + 0x16b7d, 0x16b8f; 0x16d40, 0x16d6c; 0x16e40, 0x16e7f; 0x16f00, 0x16f4a; 0x16f50, 0x16f50; 0x16f93, 0x16f9f; 0x16fe0, 0x16fe1; 0x16fe3, 0x16fe3; 0x17000, 0x187f7; 0x18800, 0x18cd5; - 0x18d00, 0x18d08; 0x1aff0, 0x1aff3; 0x1aff5, 0x1affb; 0x1affd, 0x1affe; 0x1b000, 0x1b122; + 0x18cff, 0x18d08; 0x1aff0, 0x1aff3; 0x1aff5, 0x1affb; 0x1affd, 0x1affe; 0x1b000, 0x1b122; 0x1b132, 0x1b132; 0x1b150, 0x1b152; 0x1b155, 0x1b155; 0x1b164, 0x1b167; 0x1b170, 0x1b2fb; 0x1bc00, 0x1bc6a; 0x1bc70, 0x1bc7c; 0x1bc80, 0x1bc88; 0x1bc90, 0x1bc99; 0x1d400, 0x1d454; 0x1d456, 0x1d49c; 0x1d49e, 0x1d49f; 0x1d4a2, 0x1d4a2; 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; @@ -2043,17 +2087,17 @@ module Properties = struct 0x1d6dc, 0x1d6fa; 0x1d6fc, 0x1d714; 0x1d716, 0x1d734; 0x1d736, 0x1d74e; 0x1d750, 0x1d76e; 0x1d770, 0x1d788; 0x1d78a, 0x1d7a8; 0x1d7aa, 0x1d7c2; 0x1d7c4, 0x1d7cb; 0x1df00, 0x1df1e; 0x1df25, 0x1df2a; 0x1e030, 0x1e06d; 0x1e100, 0x1e12c; 0x1e137, 0x1e13d; 0x1e14e, 0x1e14e; - 0x1e290, 0x1e2ad; 0x1e2c0, 0x1e2eb; 0x1e4d0, 0x1e4eb; 0x1e7e0, 0x1e7e6; 0x1e7e8, 0x1e7eb; - 0x1e7ed, 0x1e7ee; 0x1e7f0, 0x1e7fe; 0x1e800, 0x1e8c4; 0x1e900, 0x1e943; 0x1e94b, 0x1e94b; - 0x1ee00, 0x1ee03; 0x1ee05, 0x1ee1f; 0x1ee21, 0x1ee22; 0x1ee24, 0x1ee24; 0x1ee27, 0x1ee27; - 0x1ee29, 0x1ee32; 0x1ee34, 0x1ee37; 0x1ee39, 0x1ee39; 0x1ee3b, 0x1ee3b; 0x1ee42, 0x1ee42; - 0x1ee47, 0x1ee47; 0x1ee49, 0x1ee49; 0x1ee4b, 0x1ee4b; 0x1ee4d, 0x1ee4f; 0x1ee51, 0x1ee52; - 0x1ee54, 0x1ee54; 0x1ee57, 0x1ee57; 0x1ee59, 0x1ee59; 0x1ee5b, 0x1ee5b; 0x1ee5d, 0x1ee5d; - 0x1ee5f, 0x1ee5f; 0x1ee61, 0x1ee62; 0x1ee64, 0x1ee64; 0x1ee67, 0x1ee6a; 0x1ee6c, 0x1ee72; - 0x1ee74, 0x1ee77; 0x1ee79, 0x1ee7c; 0x1ee7e, 0x1ee7e; 0x1ee80, 0x1ee89; 0x1ee8b, 0x1ee9b; - 0x1eea1, 0x1eea3; 0x1eea5, 0x1eea9; 0x1eeab, 0x1eebb; 0x20000, 0x2a6df; 0x2a700, 0x2b739; - 0x2b740, 0x2b81d; 0x2b820, 0x2cea1; 0x2ceb0, 0x2ebe0; 0x2f800, 0x2fa1d; 0x30000, 0x3134a; - 0x31350, 0x323af] + 0x1e290, 0x1e2ad; 0x1e2c0, 0x1e2eb; 0x1e4d0, 0x1e4eb; 0x1e5d0, 0x1e5ed; 0x1e5f0, 0x1e5f0; + 0x1e7e0, 0x1e7e6; 0x1e7e8, 0x1e7eb; 0x1e7ed, 0x1e7ee; 0x1e7f0, 0x1e7fe; 0x1e800, 0x1e8c4; + 0x1e900, 0x1e943; 0x1e94b, 0x1e94b; 0x1ee00, 0x1ee03; 0x1ee05, 0x1ee1f; 0x1ee21, 0x1ee22; + 0x1ee24, 0x1ee24; 0x1ee27, 0x1ee27; 0x1ee29, 0x1ee32; 0x1ee34, 0x1ee37; 0x1ee39, 0x1ee39; + 0x1ee3b, 0x1ee3b; 0x1ee42, 0x1ee42; 0x1ee47, 0x1ee47; 0x1ee49, 0x1ee49; 0x1ee4b, 0x1ee4b; + 0x1ee4d, 0x1ee4f; 0x1ee51, 0x1ee52; 0x1ee54, 0x1ee54; 0x1ee57, 0x1ee57; 0x1ee59, 0x1ee59; + 0x1ee5b, 0x1ee5b; 0x1ee5d, 0x1ee5d; 0x1ee5f, 0x1ee5f; 0x1ee61, 0x1ee62; 0x1ee64, 0x1ee64; + 0x1ee67, 0x1ee6a; 0x1ee6c, 0x1ee72; 0x1ee74, 0x1ee77; 0x1ee79, 0x1ee7c; 0x1ee7e, 0x1ee7e; + 0x1ee80, 0x1ee89; 0x1ee8b, 0x1ee9b; 0x1eea1, 0x1eea3; 0x1eea5, 0x1eea9; 0x1eeab, 0x1eebb; + 0x20000, 0x2a6df; 0x2a700, 0x2b739; 0x2b740, 0x2b81d; 0x2b820, 0x2cea1; 0x2ceb0, 0x2ebe0; + 0x2ebf0, 0x2ee5d; 0x2f800, 0x2fa1d; 0x30000, 0x3134a; 0x31350, 0x323af] let list = [ ("alphabetic", alphabetic); @@ -2091,11 +2135,15 @@ module Properties = struct - grapheme_extend - grapheme_link - hyphen + - id_compat_math_continue + - id_compat_math_start - ideographic - ids_binary_operator - ids_trinary_operator + - ids_unary_operator - join_control - logical_order_exception + - modifier_combining_mark - noncharacter_code_point - other_default_ignorable_code_point - other_grapheme_extend diff --git a/sedlex.opam b/sedlex.opam index fdde84f..8386103 100644 --- a/sedlex.opam +++ b/sedlex.opam @@ -1,6 +1,6 @@ # This file is generated by dune, edit dune-project instead opam-version: "2.0" -version: "3.6" +version: "3.7" synopsis: "An OCaml lexer generator for Unicode" description: """ sedlex is a lexer generator for OCaml. It is similar to ocamllex, but supports diff --git a/src/generator/data/base_url b/src/generator/data/base_url index c87473f..1264fd1 100644 --- a/src/generator/data/base_url +++ b/src/generator/data/base_url @@ -1 +1 @@ -https://www.unicode.org/Public/16.0.0 \ No newline at end of file +https://www.unicode.org/Public/17.0.0 \ No newline at end of file diff --git a/src/syntax/unicode.ml b/src/syntax/unicode.ml index be3e74f..24bcf11 100644 --- a/src/syntax/unicode.ml +++ b/src/syntax/unicode.ml @@ -5,7 +5,7 @@ -let version = "16.0.0" +let version = "17.0.0" module Categories = struct @@ -24,149 +24,149 @@ module Categories = struct 0x530, 0x530; 0x557, 0x558; 0x58b, 0x58c; 0x590, 0x590; 0x5c8, 0x5cf; 0x5eb, 0x5ee; 0x5f5, 0x5ff; 0x70e, 0x70e; 0x74b, 0x74c; 0x7b2, 0x7bf; 0x7fb, 0x7fc; 0x82e, 0x82f; 0x83f, 0x83f; 0x85c, 0x85d; 0x85f, 0x85f; - 0x86b, 0x86f; 0x88f, 0x88f; 0x892, 0x896; 0x984, 0x984; 0x98d, 0x98e; - 0x991, 0x992; 0x9a9, 0x9a9; 0x9b1, 0x9b1; 0x9b3, 0x9b5; 0x9ba, 0x9bb; - 0x9c5, 0x9c6; 0x9c9, 0x9ca; 0x9cf, 0x9d6; 0x9d8, 0x9db; 0x9de, 0x9de; - 0x9e4, 0x9e5; 0x9ff, 0xa00; 0xa04, 0xa04; 0xa0b, 0xa0e; 0xa11, 0xa12; - 0xa29, 0xa29; 0xa31, 0xa31; 0xa34, 0xa34; 0xa37, 0xa37; 0xa3a, 0xa3b; - 0xa3d, 0xa3d; 0xa43, 0xa46; 0xa49, 0xa4a; 0xa4e, 0xa50; 0xa52, 0xa58; - 0xa5d, 0xa5d; 0xa5f, 0xa65; 0xa77, 0xa80; 0xa84, 0xa84; 0xa8e, 0xa8e; - 0xa92, 0xa92; 0xaa9, 0xaa9; 0xab1, 0xab1; 0xab4, 0xab4; 0xaba, 0xabb; - 0xac6, 0xac6; 0xaca, 0xaca; 0xace, 0xacf; 0xad1, 0xadf; 0xae4, 0xae5; - 0xaf2, 0xaf8; 0xb00, 0xb00; 0xb04, 0xb04; 0xb0d, 0xb0e; 0xb11, 0xb12; - 0xb29, 0xb29; 0xb31, 0xb31; 0xb34, 0xb34; 0xb3a, 0xb3b; 0xb45, 0xb46; - 0xb49, 0xb4a; 0xb4e, 0xb54; 0xb58, 0xb5b; 0xb5e, 0xb5e; 0xb64, 0xb65; - 0xb78, 0xb81; 0xb84, 0xb84; 0xb8b, 0xb8d; 0xb91, 0xb91; 0xb96, 0xb98; - 0xb9b, 0xb9b; 0xb9d, 0xb9d; 0xba0, 0xba2; 0xba5, 0xba7; 0xbab, 0xbad; - 0xbba, 0xbbd; 0xbc3, 0xbc5; 0xbc9, 0xbc9; 0xbce, 0xbcf; 0xbd1, 0xbd6; - 0xbd8, 0xbe5; 0xbfb, 0xbff; 0xc0d, 0xc0d; 0xc11, 0xc11; 0xc29, 0xc29; - 0xc3a, 0xc3b; 0xc45, 0xc45; 0xc49, 0xc49; 0xc4e, 0xc54; 0xc57, 0xc57; - 0xc5b, 0xc5c; 0xc5e, 0xc5f; 0xc64, 0xc65; 0xc70, 0xc76; 0xc8d, 0xc8d; - 0xc91, 0xc91; 0xca9, 0xca9; 0xcb4, 0xcb4; 0xcba, 0xcbb; 0xcc5, 0xcc5; - 0xcc9, 0xcc9; 0xcce, 0xcd4; 0xcd7, 0xcdc; 0xcdf, 0xcdf; 0xce4, 0xce5; - 0xcf0, 0xcf0; 0xcf4, 0xcff; 0xd0d, 0xd0d; 0xd11, 0xd11; 0xd45, 0xd45; - 0xd49, 0xd49; 0xd50, 0xd53; 0xd64, 0xd65; 0xd80, 0xd80; 0xd84, 0xd84; - 0xd97, 0xd99; 0xdb2, 0xdb2; 0xdbc, 0xdbc; 0xdbe, 0xdbf; 0xdc7, 0xdc9; - 0xdcb, 0xdce; 0xdd5, 0xdd5; 0xdd7, 0xdd7; 0xde0, 0xde5; 0xdf0, 0xdf1; - 0xdf5, 0xe00; 0xe3b, 0xe3e; 0xe5c, 0xe80; 0xe83, 0xe83; 0xe85, 0xe85; - 0xe8b, 0xe8b; 0xea4, 0xea4; 0xea6, 0xea6; 0xebe, 0xebf; 0xec5, 0xec5; - 0xec7, 0xec7; 0xecf, 0xecf; 0xeda, 0xedb; 0xee0, 0xeff; 0xf48, 0xf48; - 0xf6d, 0xf70; 0xf98, 0xf98; 0xfbd, 0xfbd; 0xfcd, 0xfcd; 0xfdb, 0xfff; - 0x10c6, 0x10c6; 0x10c8, 0x10cc; 0x10ce, 0x10cf; 0x1249, 0x1249; 0x124e, 0x124f; - 0x1257, 0x1257; 0x1259, 0x1259; 0x125e, 0x125f; 0x1289, 0x1289; 0x128e, 0x128f; - 0x12b1, 0x12b1; 0x12b6, 0x12b7; 0x12bf, 0x12bf; 0x12c1, 0x12c1; 0x12c6, 0x12c7; - 0x12d7, 0x12d7; 0x1311, 0x1311; 0x1316, 0x1317; 0x135b, 0x135c; 0x137d, 0x137f; - 0x139a, 0x139f; 0x13f6, 0x13f7; 0x13fe, 0x13ff; 0x169d, 0x169f; 0x16f9, 0x16ff; - 0x1716, 0x171e; 0x1737, 0x173f; 0x1754, 0x175f; 0x176d, 0x176d; 0x1771, 0x1771; - 0x1774, 0x177f; 0x17de, 0x17df; 0x17ea, 0x17ef; 0x17fa, 0x17ff; 0x181a, 0x181f; - 0x1879, 0x187f; 0x18ab, 0x18af; 0x18f6, 0x18ff; 0x191f, 0x191f; 0x192c, 0x192f; - 0x193c, 0x193f; 0x1941, 0x1943; 0x196e, 0x196f; 0x1975, 0x197f; 0x19ac, 0x19af; - 0x19ca, 0x19cf; 0x19db, 0x19dd; 0x1a1c, 0x1a1d; 0x1a5f, 0x1a5f; 0x1a7d, 0x1a7e; - 0x1a8a, 0x1a8f; 0x1a9a, 0x1a9f; 0x1aae, 0x1aaf; 0x1acf, 0x1aff; 0x1b4d, 0x1b4d; + 0x86b, 0x86f; 0x892, 0x896; 0x984, 0x984; 0x98d, 0x98e; 0x991, 0x992; + 0x9a9, 0x9a9; 0x9b1, 0x9b1; 0x9b3, 0x9b5; 0x9ba, 0x9bb; 0x9c5, 0x9c6; + 0x9c9, 0x9ca; 0x9cf, 0x9d6; 0x9d8, 0x9db; 0x9de, 0x9de; 0x9e4, 0x9e5; + 0x9ff, 0xa00; 0xa04, 0xa04; 0xa0b, 0xa0e; 0xa11, 0xa12; 0xa29, 0xa29; + 0xa31, 0xa31; 0xa34, 0xa34; 0xa37, 0xa37; 0xa3a, 0xa3b; 0xa3d, 0xa3d; + 0xa43, 0xa46; 0xa49, 0xa4a; 0xa4e, 0xa50; 0xa52, 0xa58; 0xa5d, 0xa5d; + 0xa5f, 0xa65; 0xa77, 0xa80; 0xa84, 0xa84; 0xa8e, 0xa8e; 0xa92, 0xa92; + 0xaa9, 0xaa9; 0xab1, 0xab1; 0xab4, 0xab4; 0xaba, 0xabb; 0xac6, 0xac6; + 0xaca, 0xaca; 0xace, 0xacf; 0xad1, 0xadf; 0xae4, 0xae5; 0xaf2, 0xaf8; + 0xb00, 0xb00; 0xb04, 0xb04; 0xb0d, 0xb0e; 0xb11, 0xb12; 0xb29, 0xb29; + 0xb31, 0xb31; 0xb34, 0xb34; 0xb3a, 0xb3b; 0xb45, 0xb46; 0xb49, 0xb4a; + 0xb4e, 0xb54; 0xb58, 0xb5b; 0xb5e, 0xb5e; 0xb64, 0xb65; 0xb78, 0xb81; + 0xb84, 0xb84; 0xb8b, 0xb8d; 0xb91, 0xb91; 0xb96, 0xb98; 0xb9b, 0xb9b; + 0xb9d, 0xb9d; 0xba0, 0xba2; 0xba5, 0xba7; 0xbab, 0xbad; 0xbba, 0xbbd; + 0xbc3, 0xbc5; 0xbc9, 0xbc9; 0xbce, 0xbcf; 0xbd1, 0xbd6; 0xbd8, 0xbe5; + 0xbfb, 0xbff; 0xc0d, 0xc0d; 0xc11, 0xc11; 0xc29, 0xc29; 0xc3a, 0xc3b; + 0xc45, 0xc45; 0xc49, 0xc49; 0xc4e, 0xc54; 0xc57, 0xc57; 0xc5b, 0xc5b; + 0xc5e, 0xc5f; 0xc64, 0xc65; 0xc70, 0xc76; 0xc8d, 0xc8d; 0xc91, 0xc91; + 0xca9, 0xca9; 0xcb4, 0xcb4; 0xcba, 0xcbb; 0xcc5, 0xcc5; 0xcc9, 0xcc9; + 0xcce, 0xcd4; 0xcd7, 0xcdb; 0xcdf, 0xcdf; 0xce4, 0xce5; 0xcf0, 0xcf0; + 0xcf4, 0xcff; 0xd0d, 0xd0d; 0xd11, 0xd11; 0xd45, 0xd45; 0xd49, 0xd49; + 0xd50, 0xd53; 0xd64, 0xd65; 0xd80, 0xd80; 0xd84, 0xd84; 0xd97, 0xd99; + 0xdb2, 0xdb2; 0xdbc, 0xdbc; 0xdbe, 0xdbf; 0xdc7, 0xdc9; 0xdcb, 0xdce; + 0xdd5, 0xdd5; 0xdd7, 0xdd7; 0xde0, 0xde5; 0xdf0, 0xdf1; 0xdf5, 0xe00; + 0xe3b, 0xe3e; 0xe5c, 0xe80; 0xe83, 0xe83; 0xe85, 0xe85; 0xe8b, 0xe8b; + 0xea4, 0xea4; 0xea6, 0xea6; 0xebe, 0xebf; 0xec5, 0xec5; 0xec7, 0xec7; + 0xecf, 0xecf; 0xeda, 0xedb; 0xee0, 0xeff; 0xf48, 0xf48; 0xf6d, 0xf70; + 0xf98, 0xf98; 0xfbd, 0xfbd; 0xfcd, 0xfcd; 0xfdb, 0xfff; 0x10c6, 0x10c6; + 0x10c8, 0x10cc; 0x10ce, 0x10cf; 0x1249, 0x1249; 0x124e, 0x124f; 0x1257, 0x1257; + 0x1259, 0x1259; 0x125e, 0x125f; 0x1289, 0x1289; 0x128e, 0x128f; 0x12b1, 0x12b1; + 0x12b6, 0x12b7; 0x12bf, 0x12bf; 0x12c1, 0x12c1; 0x12c6, 0x12c7; 0x12d7, 0x12d7; + 0x1311, 0x1311; 0x1316, 0x1317; 0x135b, 0x135c; 0x137d, 0x137f; 0x139a, 0x139f; + 0x13f6, 0x13f7; 0x13fe, 0x13ff; 0x169d, 0x169f; 0x16f9, 0x16ff; 0x1716, 0x171e; + 0x1737, 0x173f; 0x1754, 0x175f; 0x176d, 0x176d; 0x1771, 0x1771; 0x1774, 0x177f; + 0x17de, 0x17df; 0x17ea, 0x17ef; 0x17fa, 0x17ff; 0x181a, 0x181f; 0x1879, 0x187f; + 0x18ab, 0x18af; 0x18f6, 0x18ff; 0x191f, 0x191f; 0x192c, 0x192f; 0x193c, 0x193f; + 0x1941, 0x1943; 0x196e, 0x196f; 0x1975, 0x197f; 0x19ac, 0x19af; 0x19ca, 0x19cf; + 0x19db, 0x19dd; 0x1a1c, 0x1a1d; 0x1a5f, 0x1a5f; 0x1a7d, 0x1a7e; 0x1a8a, 0x1a8f; + 0x1a9a, 0x1a9f; 0x1aae, 0x1aaf; 0x1ade, 0x1adf; 0x1aec, 0x1aff; 0x1b4d, 0x1b4d; 0x1bf4, 0x1bfb; 0x1c38, 0x1c3a; 0x1c4a, 0x1c4c; 0x1c8b, 0x1c8f; 0x1cbb, 0x1cbc; 0x1cc8, 0x1ccf; 0x1cfb, 0x1cff; 0x1f16, 0x1f17; 0x1f1e, 0x1f1f; 0x1f46, 0x1f47; 0x1f4e, 0x1f4f; 0x1f58, 0x1f58; 0x1f5a, 0x1f5a; 0x1f5c, 0x1f5c; 0x1f5e, 0x1f5e; 0x1f7e, 0x1f7f; 0x1fb5, 0x1fb5; 0x1fc5, 0x1fc5; 0x1fd4, 0x1fd5; 0x1fdc, 0x1fdc; 0x1ff0, 0x1ff1; 0x1ff5, 0x1ff5; 0x1fff, 0x1fff; 0x2065, 0x2065; 0x2072, 0x2073; - 0x208f, 0x208f; 0x209d, 0x209f; 0x20c1, 0x20cf; 0x20f1, 0x20ff; 0x218c, 0x218f; - 0x242a, 0x243f; 0x244b, 0x245f; 0x2b74, 0x2b75; 0x2b96, 0x2b96; 0x2cf4, 0x2cf8; - 0x2d26, 0x2d26; 0x2d28, 0x2d2c; 0x2d2e, 0x2d2f; 0x2d68, 0x2d6e; 0x2d71, 0x2d7e; - 0x2d97, 0x2d9f; 0x2da7, 0x2da7; 0x2daf, 0x2daf; 0x2db7, 0x2db7; 0x2dbf, 0x2dbf; - 0x2dc7, 0x2dc7; 0x2dcf, 0x2dcf; 0x2dd7, 0x2dd7; 0x2ddf, 0x2ddf; 0x2e5e, 0x2e7f; - 0x2e9a, 0x2e9a; 0x2ef4, 0x2eff; 0x2fd6, 0x2fef; 0x3040, 0x3040; 0x3097, 0x3098; - 0x3100, 0x3104; 0x3130, 0x3130; 0x318f, 0x318f; 0x31e6, 0x31ee; 0x321f, 0x321f; - 0xa48d, 0xa48f; 0xa4c7, 0xa4cf; 0xa62c, 0xa63f; 0xa6f8, 0xa6ff; 0xa7ce, 0xa7cf; - 0xa7d2, 0xa7d2; 0xa7d4, 0xa7d4; 0xa7dd, 0xa7f1; 0xa82d, 0xa82f; 0xa83a, 0xa83f; - 0xa878, 0xa87f; 0xa8c6, 0xa8cd; 0xa8da, 0xa8df; 0xa954, 0xa95e; 0xa97d, 0xa97f; - 0xa9ce, 0xa9ce; 0xa9da, 0xa9dd; 0xa9ff, 0xa9ff; 0xaa37, 0xaa3f; 0xaa4e, 0xaa4f; - 0xaa5a, 0xaa5b; 0xaac3, 0xaada; 0xaaf7, 0xab00; 0xab07, 0xab08; 0xab0f, 0xab10; - 0xab17, 0xab1f; 0xab27, 0xab27; 0xab2f, 0xab2f; 0xab6c, 0xab6f; 0xabee, 0xabef; - 0xabfa, 0xabff; 0xd7a4, 0xd7af; 0xd7c7, 0xd7ca; 0xd7fc, 0xd7ff; 0xfa6e, 0xfa6f; - 0xfada, 0xfaff; 0xfb07, 0xfb12; 0xfb18, 0xfb1c; 0xfb37, 0xfb37; 0xfb3d, 0xfb3d; - 0xfb3f, 0xfb3f; 0xfb42, 0xfb42; 0xfb45, 0xfb45; 0xfbc3, 0xfbd2; 0xfd90, 0xfd91; - 0xfdc8, 0xfdce; 0xfdd0, 0xfdef; 0xfe1a, 0xfe1f; 0xfe53, 0xfe53; 0xfe67, 0xfe67; - 0xfe6c, 0xfe6f; 0xfe75, 0xfe75; 0xfefd, 0xfefe; 0xff00, 0xff00; 0xffbf, 0xffc1; - 0xffc8, 0xffc9; 0xffd0, 0xffd1; 0xffd8, 0xffd9; 0xffdd, 0xffdf; 0xffe7, 0xffe7; - 0xffef, 0xfff8; 0xfffe, 0xffff; 0x1000c, 0x1000c; 0x10027, 0x10027; 0x1003b, 0x1003b; - 0x1003e, 0x1003e; 0x1004e, 0x1004f; 0x1005e, 0x1007f; 0x100fb, 0x100ff; 0x10103, 0x10106; - 0x10134, 0x10136; 0x1018f, 0x1018f; 0x1019d, 0x1019f; 0x101a1, 0x101cf; 0x101fe, 0x1027f; - 0x1029d, 0x1029f; 0x102d1, 0x102df; 0x102fc, 0x102ff; 0x10324, 0x1032c; 0x1034b, 0x1034f; - 0x1037b, 0x1037f; 0x1039e, 0x1039e; 0x103c4, 0x103c7; 0x103d6, 0x103ff; 0x1049e, 0x1049f; - 0x104aa, 0x104af; 0x104d4, 0x104d7; 0x104fc, 0x104ff; 0x10528, 0x1052f; 0x10564, 0x1056e; - 0x1057b, 0x1057b; 0x1058b, 0x1058b; 0x10593, 0x10593; 0x10596, 0x10596; 0x105a2, 0x105a2; - 0x105b2, 0x105b2; 0x105ba, 0x105ba; 0x105bd, 0x105bf; 0x105f4, 0x105ff; 0x10737, 0x1073f; - 0x10756, 0x1075f; 0x10768, 0x1077f; 0x10786, 0x10786; 0x107b1, 0x107b1; 0x107bb, 0x107ff; - 0x10806, 0x10807; 0x10809, 0x10809; 0x10836, 0x10836; 0x10839, 0x1083b; 0x1083d, 0x1083e; - 0x10856, 0x10856; 0x1089f, 0x108a6; 0x108b0, 0x108df; 0x108f3, 0x108f3; 0x108f6, 0x108fa; - 0x1091c, 0x1091e; 0x1093a, 0x1093e; 0x10940, 0x1097f; 0x109b8, 0x109bb; 0x109d0, 0x109d1; - 0x10a04, 0x10a04; 0x10a07, 0x10a0b; 0x10a14, 0x10a14; 0x10a18, 0x10a18; 0x10a36, 0x10a37; - 0x10a3b, 0x10a3e; 0x10a49, 0x10a4f; 0x10a59, 0x10a5f; 0x10aa0, 0x10abf; 0x10ae7, 0x10aea; - 0x10af7, 0x10aff; 0x10b36, 0x10b38; 0x10b56, 0x10b57; 0x10b73, 0x10b77; 0x10b92, 0x10b98; - 0x10b9d, 0x10ba8; 0x10bb0, 0x10bff; 0x10c49, 0x10c7f; 0x10cb3, 0x10cbf; 0x10cf3, 0x10cf9; - 0x10d28, 0x10d2f; 0x10d3a, 0x10d3f; 0x10d66, 0x10d68; 0x10d86, 0x10d8d; 0x10d90, 0x10e5f; - 0x10e7f, 0x10e7f; 0x10eaa, 0x10eaa; 0x10eae, 0x10eaf; 0x10eb2, 0x10ec1; 0x10ec5, 0x10efb; - 0x10f28, 0x10f2f; 0x10f5a, 0x10f6f; 0x10f8a, 0x10faf; 0x10fcc, 0x10fdf; 0x10ff7, 0x10fff; - 0x1104e, 0x11051; 0x11076, 0x1107e; 0x110c3, 0x110cc; 0x110ce, 0x110cf; 0x110e9, 0x110ef; - 0x110fa, 0x110ff; 0x11135, 0x11135; 0x11148, 0x1114f; 0x11177, 0x1117f; 0x111e0, 0x111e0; - 0x111f5, 0x111ff; 0x11212, 0x11212; 0x11242, 0x1127f; 0x11287, 0x11287; 0x11289, 0x11289; - 0x1128e, 0x1128e; 0x1129e, 0x1129e; 0x112aa, 0x112af; 0x112eb, 0x112ef; 0x112fa, 0x112ff; - 0x11304, 0x11304; 0x1130d, 0x1130e; 0x11311, 0x11312; 0x11329, 0x11329; 0x11331, 0x11331; - 0x11334, 0x11334; 0x1133a, 0x1133a; 0x11345, 0x11346; 0x11349, 0x1134a; 0x1134e, 0x1134f; - 0x11351, 0x11356; 0x11358, 0x1135c; 0x11364, 0x11365; 0x1136d, 0x1136f; 0x11375, 0x1137f; - 0x1138a, 0x1138a; 0x1138c, 0x1138d; 0x1138f, 0x1138f; 0x113b6, 0x113b6; 0x113c1, 0x113c1; - 0x113c3, 0x113c4; 0x113c6, 0x113c6; 0x113cb, 0x113cb; 0x113d6, 0x113d6; 0x113d9, 0x113e0; - 0x113e3, 0x113ff; 0x1145c, 0x1145c; 0x11462, 0x1147f; 0x114c8, 0x114cf; 0x114da, 0x1157f; - 0x115b6, 0x115b7; 0x115de, 0x115ff; 0x11645, 0x1164f; 0x1165a, 0x1165f; 0x1166d, 0x1167f; - 0x116ba, 0x116bf; 0x116ca, 0x116cf; 0x116e4, 0x116ff; 0x1171b, 0x1171c; 0x1172c, 0x1172f; - 0x11747, 0x117ff; 0x1183c, 0x1189f; 0x118f3, 0x118fe; 0x11907, 0x11908; 0x1190a, 0x1190b; - 0x11914, 0x11914; 0x11917, 0x11917; 0x11936, 0x11936; 0x11939, 0x1193a; 0x11947, 0x1194f; - 0x1195a, 0x1199f; 0x119a8, 0x119a9; 0x119d8, 0x119d9; 0x119e5, 0x119ff; 0x11a48, 0x11a4f; - 0x11aa3, 0x11aaf; 0x11af9, 0x11aff; 0x11b0a, 0x11bbf; 0x11be2, 0x11bef; 0x11bfa, 0x11bff; + 0x208f, 0x208f; 0x209d, 0x209f; 0x20c2, 0x20cf; 0x20f1, 0x20ff; 0x218c, 0x218f; + 0x242a, 0x243f; 0x244b, 0x245f; 0x2b74, 0x2b75; 0x2cf4, 0x2cf8; 0x2d26, 0x2d26; + 0x2d28, 0x2d2c; 0x2d2e, 0x2d2f; 0x2d68, 0x2d6e; 0x2d71, 0x2d7e; 0x2d97, 0x2d9f; + 0x2da7, 0x2da7; 0x2daf, 0x2daf; 0x2db7, 0x2db7; 0x2dbf, 0x2dbf; 0x2dc7, 0x2dc7; + 0x2dcf, 0x2dcf; 0x2dd7, 0x2dd7; 0x2ddf, 0x2ddf; 0x2e5e, 0x2e7f; 0x2e9a, 0x2e9a; + 0x2ef4, 0x2eff; 0x2fd6, 0x2fef; 0x3040, 0x3040; 0x3097, 0x3098; 0x3100, 0x3104; + 0x3130, 0x3130; 0x318f, 0x318f; 0x31e6, 0x31ee; 0x321f, 0x321f; 0xa48d, 0xa48f; + 0xa4c7, 0xa4cf; 0xa62c, 0xa63f; 0xa6f8, 0xa6ff; 0xa7dd, 0xa7f0; 0xa82d, 0xa82f; + 0xa83a, 0xa83f; 0xa878, 0xa87f; 0xa8c6, 0xa8cd; 0xa8da, 0xa8df; 0xa954, 0xa95e; + 0xa97d, 0xa97f; 0xa9ce, 0xa9ce; 0xa9da, 0xa9dd; 0xa9ff, 0xa9ff; 0xaa37, 0xaa3f; + 0xaa4e, 0xaa4f; 0xaa5a, 0xaa5b; 0xaac3, 0xaada; 0xaaf7, 0xab00; 0xab07, 0xab08; + 0xab0f, 0xab10; 0xab17, 0xab1f; 0xab27, 0xab27; 0xab2f, 0xab2f; 0xab6c, 0xab6f; + 0xabee, 0xabef; 0xabfa, 0xabff; 0xd7a4, 0xd7af; 0xd7c7, 0xd7ca; 0xd7fc, 0xd7ff; + 0xfa6e, 0xfa6f; 0xfada, 0xfaff; 0xfb07, 0xfb12; 0xfb18, 0xfb1c; 0xfb37, 0xfb37; + 0xfb3d, 0xfb3d; 0xfb3f, 0xfb3f; 0xfb42, 0xfb42; 0xfb45, 0xfb45; 0xfdd0, 0xfdef; + 0xfe1a, 0xfe1f; 0xfe53, 0xfe53; 0xfe67, 0xfe67; 0xfe6c, 0xfe6f; 0xfe75, 0xfe75; + 0xfefd, 0xfefe; 0xff00, 0xff00; 0xffbf, 0xffc1; 0xffc8, 0xffc9; 0xffd0, 0xffd1; + 0xffd8, 0xffd9; 0xffdd, 0xffdf; 0xffe7, 0xffe7; 0xffef, 0xfff8; 0xfffe, 0xffff; + 0x1000c, 0x1000c; 0x10027, 0x10027; 0x1003b, 0x1003b; 0x1003e, 0x1003e; 0x1004e, 0x1004f; + 0x1005e, 0x1007f; 0x100fb, 0x100ff; 0x10103, 0x10106; 0x10134, 0x10136; 0x1018f, 0x1018f; + 0x1019d, 0x1019f; 0x101a1, 0x101cf; 0x101fe, 0x1027f; 0x1029d, 0x1029f; 0x102d1, 0x102df; + 0x102fc, 0x102ff; 0x10324, 0x1032c; 0x1034b, 0x1034f; 0x1037b, 0x1037f; 0x1039e, 0x1039e; + 0x103c4, 0x103c7; 0x103d6, 0x103ff; 0x1049e, 0x1049f; 0x104aa, 0x104af; 0x104d4, 0x104d7; + 0x104fc, 0x104ff; 0x10528, 0x1052f; 0x10564, 0x1056e; 0x1057b, 0x1057b; 0x1058b, 0x1058b; + 0x10593, 0x10593; 0x10596, 0x10596; 0x105a2, 0x105a2; 0x105b2, 0x105b2; 0x105ba, 0x105ba; + 0x105bd, 0x105bf; 0x105f4, 0x105ff; 0x10737, 0x1073f; 0x10756, 0x1075f; 0x10768, 0x1077f; + 0x10786, 0x10786; 0x107b1, 0x107b1; 0x107bb, 0x107ff; 0x10806, 0x10807; 0x10809, 0x10809; + 0x10836, 0x10836; 0x10839, 0x1083b; 0x1083d, 0x1083e; 0x10856, 0x10856; 0x1089f, 0x108a6; + 0x108b0, 0x108df; 0x108f3, 0x108f3; 0x108f6, 0x108fa; 0x1091c, 0x1091e; 0x1093a, 0x1093e; + 0x1095a, 0x1097f; 0x109b8, 0x109bb; 0x109d0, 0x109d1; 0x10a04, 0x10a04; 0x10a07, 0x10a0b; + 0x10a14, 0x10a14; 0x10a18, 0x10a18; 0x10a36, 0x10a37; 0x10a3b, 0x10a3e; 0x10a49, 0x10a4f; + 0x10a59, 0x10a5f; 0x10aa0, 0x10abf; 0x10ae7, 0x10aea; 0x10af7, 0x10aff; 0x10b36, 0x10b38; + 0x10b56, 0x10b57; 0x10b73, 0x10b77; 0x10b92, 0x10b98; 0x10b9d, 0x10ba8; 0x10bb0, 0x10bff; + 0x10c49, 0x10c7f; 0x10cb3, 0x10cbf; 0x10cf3, 0x10cf9; 0x10d28, 0x10d2f; 0x10d3a, 0x10d3f; + 0x10d66, 0x10d68; 0x10d86, 0x10d8d; 0x10d90, 0x10e5f; 0x10e7f, 0x10e7f; 0x10eaa, 0x10eaa; + 0x10eae, 0x10eaf; 0x10eb2, 0x10ec1; 0x10ec8, 0x10ecf; 0x10ed9, 0x10ef9; 0x10f28, 0x10f2f; + 0x10f5a, 0x10f6f; 0x10f8a, 0x10faf; 0x10fcc, 0x10fdf; 0x10ff7, 0x10fff; 0x1104e, 0x11051; + 0x11076, 0x1107e; 0x110c3, 0x110cc; 0x110ce, 0x110cf; 0x110e9, 0x110ef; 0x110fa, 0x110ff; + 0x11135, 0x11135; 0x11148, 0x1114f; 0x11177, 0x1117f; 0x111e0, 0x111e0; 0x111f5, 0x111ff; + 0x11212, 0x11212; 0x11242, 0x1127f; 0x11287, 0x11287; 0x11289, 0x11289; 0x1128e, 0x1128e; + 0x1129e, 0x1129e; 0x112aa, 0x112af; 0x112eb, 0x112ef; 0x112fa, 0x112ff; 0x11304, 0x11304; + 0x1130d, 0x1130e; 0x11311, 0x11312; 0x11329, 0x11329; 0x11331, 0x11331; 0x11334, 0x11334; + 0x1133a, 0x1133a; 0x11345, 0x11346; 0x11349, 0x1134a; 0x1134e, 0x1134f; 0x11351, 0x11356; + 0x11358, 0x1135c; 0x11364, 0x11365; 0x1136d, 0x1136f; 0x11375, 0x1137f; 0x1138a, 0x1138a; + 0x1138c, 0x1138d; 0x1138f, 0x1138f; 0x113b6, 0x113b6; 0x113c1, 0x113c1; 0x113c3, 0x113c4; + 0x113c6, 0x113c6; 0x113cb, 0x113cb; 0x113d6, 0x113d6; 0x113d9, 0x113e0; 0x113e3, 0x113ff; + 0x1145c, 0x1145c; 0x11462, 0x1147f; 0x114c8, 0x114cf; 0x114da, 0x1157f; 0x115b6, 0x115b7; + 0x115de, 0x115ff; 0x11645, 0x1164f; 0x1165a, 0x1165f; 0x1166d, 0x1167f; 0x116ba, 0x116bf; + 0x116ca, 0x116cf; 0x116e4, 0x116ff; 0x1171b, 0x1171c; 0x1172c, 0x1172f; 0x11747, 0x117ff; + 0x1183c, 0x1189f; 0x118f3, 0x118fe; 0x11907, 0x11908; 0x1190a, 0x1190b; 0x11914, 0x11914; + 0x11917, 0x11917; 0x11936, 0x11936; 0x11939, 0x1193a; 0x11947, 0x1194f; 0x1195a, 0x1199f; + 0x119a8, 0x119a9; 0x119d8, 0x119d9; 0x119e5, 0x119ff; 0x11a48, 0x11a4f; 0x11aa3, 0x11aaf; + 0x11af9, 0x11aff; 0x11b0a, 0x11b5f; 0x11b68, 0x11bbf; 0x11be2, 0x11bef; 0x11bfa, 0x11bff; 0x11c09, 0x11c09; 0x11c37, 0x11c37; 0x11c46, 0x11c4f; 0x11c6d, 0x11c6f; 0x11c90, 0x11c91; 0x11ca8, 0x11ca8; 0x11cb7, 0x11cff; 0x11d07, 0x11d07; 0x11d0a, 0x11d0a; 0x11d37, 0x11d39; 0x11d3b, 0x11d3b; 0x11d3e, 0x11d3e; 0x11d48, 0x11d4f; 0x11d5a, 0x11d5f; 0x11d66, 0x11d66; - 0x11d69, 0x11d69; 0x11d8f, 0x11d8f; 0x11d92, 0x11d92; 0x11d99, 0x11d9f; 0x11daa, 0x11edf; - 0x11ef9, 0x11eff; 0x11f11, 0x11f11; 0x11f3b, 0x11f3d; 0x11f5b, 0x11faf; 0x11fb1, 0x11fbf; - 0x11ff2, 0x11ffe; 0x1239a, 0x123ff; 0x1246f, 0x1246f; 0x12475, 0x1247f; 0x12544, 0x12f8f; - 0x12ff3, 0x12fff; 0x13456, 0x1345f; 0x143fb, 0x143ff; 0x14647, 0x160ff; 0x1613a, 0x167ff; - 0x16a39, 0x16a3f; 0x16a5f, 0x16a5f; 0x16a6a, 0x16a6d; 0x16abf, 0x16abf; 0x16aca, 0x16acf; - 0x16aee, 0x16aef; 0x16af6, 0x16aff; 0x16b46, 0x16b4f; 0x16b5a, 0x16b5a; 0x16b62, 0x16b62; - 0x16b78, 0x16b7c; 0x16b90, 0x16d3f; 0x16d7a, 0x16e3f; 0x16e9b, 0x16eff; 0x16f4b, 0x16f4e; - 0x16f88, 0x16f8e; 0x16fa0, 0x16fdf; 0x16fe5, 0x16fef; 0x16ff2, 0x16fff; 0x187f8, 0x187ff; - 0x18cd6, 0x18cfe; 0x18d09, 0x1afef; 0x1aff4, 0x1aff4; 0x1affc, 0x1affc; 0x1afff, 0x1afff; - 0x1b123, 0x1b131; 0x1b133, 0x1b14f; 0x1b153, 0x1b154; 0x1b156, 0x1b163; 0x1b168, 0x1b16f; - 0x1b2fc, 0x1bbff; 0x1bc6b, 0x1bc6f; 0x1bc7d, 0x1bc7f; 0x1bc89, 0x1bc8f; 0x1bc9a, 0x1bc9b; - 0x1bca4, 0x1cbff; 0x1ccfa, 0x1ccff; 0x1ceb4, 0x1ceff; 0x1cf2e, 0x1cf2f; 0x1cf47, 0x1cf4f; - 0x1cfc4, 0x1cfff; 0x1d0f6, 0x1d0ff; 0x1d127, 0x1d128; 0x1d1eb, 0x1d1ff; 0x1d246, 0x1d2bf; - 0x1d2d4, 0x1d2df; 0x1d2f4, 0x1d2ff; 0x1d357, 0x1d35f; 0x1d379, 0x1d3ff; 0x1d455, 0x1d455; - 0x1d49d, 0x1d49d; 0x1d4a0, 0x1d4a1; 0x1d4a3, 0x1d4a4; 0x1d4a7, 0x1d4a8; 0x1d4ad, 0x1d4ad; - 0x1d4ba, 0x1d4ba; 0x1d4bc, 0x1d4bc; 0x1d4c4, 0x1d4c4; 0x1d506, 0x1d506; 0x1d50b, 0x1d50c; - 0x1d515, 0x1d515; 0x1d51d, 0x1d51d; 0x1d53a, 0x1d53a; 0x1d53f, 0x1d53f; 0x1d545, 0x1d545; - 0x1d547, 0x1d549; 0x1d551, 0x1d551; 0x1d6a6, 0x1d6a7; 0x1d7cc, 0x1d7cd; 0x1da8c, 0x1da9a; - 0x1daa0, 0x1daa0; 0x1dab0, 0x1deff; 0x1df1f, 0x1df24; 0x1df2b, 0x1dfff; 0x1e007, 0x1e007; - 0x1e019, 0x1e01a; 0x1e022, 0x1e022; 0x1e025, 0x1e025; 0x1e02b, 0x1e02f; 0x1e06e, 0x1e08e; - 0x1e090, 0x1e0ff; 0x1e12d, 0x1e12f; 0x1e13e, 0x1e13f; 0x1e14a, 0x1e14d; 0x1e150, 0x1e28f; - 0x1e2af, 0x1e2bf; 0x1e2fa, 0x1e2fe; 0x1e300, 0x1e4cf; 0x1e4fa, 0x1e5cf; 0x1e5fb, 0x1e5fe; - 0x1e600, 0x1e7df; 0x1e7e7, 0x1e7e7; 0x1e7ec, 0x1e7ec; 0x1e7ef, 0x1e7ef; 0x1e7ff, 0x1e7ff; - 0x1e8c5, 0x1e8c6; 0x1e8d7, 0x1e8ff; 0x1e94c, 0x1e94f; 0x1e95a, 0x1e95d; 0x1e960, 0x1ec70; - 0x1ecb5, 0x1ed00; 0x1ed3e, 0x1edff; 0x1ee04, 0x1ee04; 0x1ee20, 0x1ee20; 0x1ee23, 0x1ee23; - 0x1ee25, 0x1ee26; 0x1ee28, 0x1ee28; 0x1ee33, 0x1ee33; 0x1ee38, 0x1ee38; 0x1ee3a, 0x1ee3a; - 0x1ee3c, 0x1ee41; 0x1ee43, 0x1ee46; 0x1ee48, 0x1ee48; 0x1ee4a, 0x1ee4a; 0x1ee4c, 0x1ee4c; - 0x1ee50, 0x1ee50; 0x1ee53, 0x1ee53; 0x1ee55, 0x1ee56; 0x1ee58, 0x1ee58; 0x1ee5a, 0x1ee5a; - 0x1ee5c, 0x1ee5c; 0x1ee5e, 0x1ee5e; 0x1ee60, 0x1ee60; 0x1ee63, 0x1ee63; 0x1ee65, 0x1ee66; - 0x1ee6b, 0x1ee6b; 0x1ee73, 0x1ee73; 0x1ee78, 0x1ee78; 0x1ee7d, 0x1ee7d; 0x1ee7f, 0x1ee7f; - 0x1ee8a, 0x1ee8a; 0x1ee9c, 0x1eea0; 0x1eea4, 0x1eea4; 0x1eeaa, 0x1eeaa; 0x1eebc, 0x1eeef; - 0x1eef2, 0x1efff; 0x1f02c, 0x1f02f; 0x1f094, 0x1f09f; 0x1f0af, 0x1f0b0; 0x1f0c0, 0x1f0c0; - 0x1f0d0, 0x1f0d0; 0x1f0f6, 0x1f0ff; 0x1f1ae, 0x1f1e5; 0x1f203, 0x1f20f; 0x1f23c, 0x1f23f; - 0x1f249, 0x1f24f; 0x1f252, 0x1f25f; 0x1f266, 0x1f2ff; 0x1f6d8, 0x1f6db; 0x1f6ed, 0x1f6ef; - 0x1f6fd, 0x1f6ff; 0x1f777, 0x1f77a; 0x1f7da, 0x1f7df; 0x1f7ec, 0x1f7ef; 0x1f7f1, 0x1f7ff; - 0x1f80c, 0x1f80f; 0x1f848, 0x1f84f; 0x1f85a, 0x1f85f; 0x1f888, 0x1f88f; 0x1f8ae, 0x1f8af; - 0x1f8bc, 0x1f8bf; 0x1f8c2, 0x1f8ff; 0x1fa54, 0x1fa5f; 0x1fa6e, 0x1fa6f; 0x1fa7d, 0x1fa7f; - 0x1fa8a, 0x1fa8e; 0x1fac7, 0x1facd; 0x1fadd, 0x1fade; 0x1faea, 0x1faef; 0x1faf9, 0x1faff; - 0x1fb93, 0x1fb93; 0x1fbfa, 0x1ffff; 0x2a6e0, 0x2a6ff; 0x2b73a, 0x2b73f; 0x2b81e, 0x2b81f; - 0x2cea2, 0x2ceaf; 0x2ebe1, 0x2ebef; 0x2ee5e, 0x2f7ff; 0x2fa1e, 0x2ffff; 0x3134b, 0x3134f; - 0x323b0, 0xe0000; 0xe0002, 0xe001f; 0xe0080, 0xe00ff; 0xe01f0, 0xeffff; 0xffffe, 0xfffff; - 0x10fffe, 0x10ffff] + 0x11d69, 0x11d69; 0x11d8f, 0x11d8f; 0x11d92, 0x11d92; 0x11d99, 0x11d9f; 0x11daa, 0x11daf; + 0x11ddc, 0x11ddf; 0x11dea, 0x11edf; 0x11ef9, 0x11eff; 0x11f11, 0x11f11; 0x11f3b, 0x11f3d; + 0x11f5b, 0x11faf; 0x11fb1, 0x11fbf; 0x11ff2, 0x11ffe; 0x1239a, 0x123ff; 0x1246f, 0x1246f; + 0x12475, 0x1247f; 0x12544, 0x12f8f; 0x12ff3, 0x12fff; 0x13456, 0x1345f; 0x143fb, 0x143ff; + 0x14647, 0x160ff; 0x1613a, 0x167ff; 0x16a39, 0x16a3f; 0x16a5f, 0x16a5f; 0x16a6a, 0x16a6d; + 0x16abf, 0x16abf; 0x16aca, 0x16acf; 0x16aee, 0x16aef; 0x16af6, 0x16aff; 0x16b46, 0x16b4f; + 0x16b5a, 0x16b5a; 0x16b62, 0x16b62; 0x16b78, 0x16b7c; 0x16b90, 0x16d3f; 0x16d7a, 0x16e3f; + 0x16e9b, 0x16e9f; 0x16eb9, 0x16eba; 0x16ed4, 0x16eff; 0x16f4b, 0x16f4e; 0x16f88, 0x16f8e; + 0x16fa0, 0x16fdf; 0x16fe5, 0x16fef; 0x16ff7, 0x16fff; 0x18cd6, 0x18cfe; 0x18d1f, 0x18d7f; + 0x18df3, 0x1afef; 0x1aff4, 0x1aff4; 0x1affc, 0x1affc; 0x1afff, 0x1afff; 0x1b123, 0x1b131; + 0x1b133, 0x1b14f; 0x1b153, 0x1b154; 0x1b156, 0x1b163; 0x1b168, 0x1b16f; 0x1b2fc, 0x1bbff; + 0x1bc6b, 0x1bc6f; 0x1bc7d, 0x1bc7f; 0x1bc89, 0x1bc8f; 0x1bc9a, 0x1bc9b; 0x1bca4, 0x1cbff; + 0x1ccfd, 0x1ccff; 0x1ceb4, 0x1ceb9; 0x1ced1, 0x1cedf; 0x1cef1, 0x1ceff; 0x1cf2e, 0x1cf2f; + 0x1cf47, 0x1cf4f; 0x1cfc4, 0x1cfff; 0x1d0f6, 0x1d0ff; 0x1d127, 0x1d128; 0x1d1eb, 0x1d1ff; + 0x1d246, 0x1d2bf; 0x1d2d4, 0x1d2df; 0x1d2f4, 0x1d2ff; 0x1d357, 0x1d35f; 0x1d379, 0x1d3ff; + 0x1d455, 0x1d455; 0x1d49d, 0x1d49d; 0x1d4a0, 0x1d4a1; 0x1d4a3, 0x1d4a4; 0x1d4a7, 0x1d4a8; + 0x1d4ad, 0x1d4ad; 0x1d4ba, 0x1d4ba; 0x1d4bc, 0x1d4bc; 0x1d4c4, 0x1d4c4; 0x1d506, 0x1d506; + 0x1d50b, 0x1d50c; 0x1d515, 0x1d515; 0x1d51d, 0x1d51d; 0x1d53a, 0x1d53a; 0x1d53f, 0x1d53f; + 0x1d545, 0x1d545; 0x1d547, 0x1d549; 0x1d551, 0x1d551; 0x1d6a6, 0x1d6a7; 0x1d7cc, 0x1d7cd; + 0x1da8c, 0x1da9a; 0x1daa0, 0x1daa0; 0x1dab0, 0x1deff; 0x1df1f, 0x1df24; 0x1df2b, 0x1dfff; + 0x1e007, 0x1e007; 0x1e019, 0x1e01a; 0x1e022, 0x1e022; 0x1e025, 0x1e025; 0x1e02b, 0x1e02f; + 0x1e06e, 0x1e08e; 0x1e090, 0x1e0ff; 0x1e12d, 0x1e12f; 0x1e13e, 0x1e13f; 0x1e14a, 0x1e14d; + 0x1e150, 0x1e28f; 0x1e2af, 0x1e2bf; 0x1e2fa, 0x1e2fe; 0x1e300, 0x1e4cf; 0x1e4fa, 0x1e5cf; + 0x1e5fb, 0x1e5fe; 0x1e600, 0x1e6bf; 0x1e6df, 0x1e6df; 0x1e6f6, 0x1e6fd; 0x1e700, 0x1e7df; + 0x1e7e7, 0x1e7e7; 0x1e7ec, 0x1e7ec; 0x1e7ef, 0x1e7ef; 0x1e7ff, 0x1e7ff; 0x1e8c5, 0x1e8c6; + 0x1e8d7, 0x1e8ff; 0x1e94c, 0x1e94f; 0x1e95a, 0x1e95d; 0x1e960, 0x1ec70; 0x1ecb5, 0x1ed00; + 0x1ed3e, 0x1edff; 0x1ee04, 0x1ee04; 0x1ee20, 0x1ee20; 0x1ee23, 0x1ee23; 0x1ee25, 0x1ee26; + 0x1ee28, 0x1ee28; 0x1ee33, 0x1ee33; 0x1ee38, 0x1ee38; 0x1ee3a, 0x1ee3a; 0x1ee3c, 0x1ee41; + 0x1ee43, 0x1ee46; 0x1ee48, 0x1ee48; 0x1ee4a, 0x1ee4a; 0x1ee4c, 0x1ee4c; 0x1ee50, 0x1ee50; + 0x1ee53, 0x1ee53; 0x1ee55, 0x1ee56; 0x1ee58, 0x1ee58; 0x1ee5a, 0x1ee5a; 0x1ee5c, 0x1ee5c; + 0x1ee5e, 0x1ee5e; 0x1ee60, 0x1ee60; 0x1ee63, 0x1ee63; 0x1ee65, 0x1ee66; 0x1ee6b, 0x1ee6b; + 0x1ee73, 0x1ee73; 0x1ee78, 0x1ee78; 0x1ee7d, 0x1ee7d; 0x1ee7f, 0x1ee7f; 0x1ee8a, 0x1ee8a; + 0x1ee9c, 0x1eea0; 0x1eea4, 0x1eea4; 0x1eeaa, 0x1eeaa; 0x1eebc, 0x1eeef; 0x1eef2, 0x1efff; + 0x1f02c, 0x1f02f; 0x1f094, 0x1f09f; 0x1f0af, 0x1f0b0; 0x1f0c0, 0x1f0c0; 0x1f0d0, 0x1f0d0; + 0x1f0f6, 0x1f0ff; 0x1f1ae, 0x1f1e5; 0x1f203, 0x1f20f; 0x1f23c, 0x1f23f; 0x1f249, 0x1f24f; + 0x1f252, 0x1f25f; 0x1f266, 0x1f2ff; 0x1f6d9, 0x1f6db; 0x1f6ed, 0x1f6ef; 0x1f6fd, 0x1f6ff; + 0x1f7da, 0x1f7df; 0x1f7ec, 0x1f7ef; 0x1f7f1, 0x1f7ff; 0x1f80c, 0x1f80f; 0x1f848, 0x1f84f; + 0x1f85a, 0x1f85f; 0x1f888, 0x1f88f; 0x1f8ae, 0x1f8af; 0x1f8bc, 0x1f8bf; 0x1f8c2, 0x1f8cf; + 0x1f8d9, 0x1f8ff; 0x1fa58, 0x1fa5f; 0x1fa6e, 0x1fa6f; 0x1fa7d, 0x1fa7f; 0x1fa8b, 0x1fa8d; + 0x1fac7, 0x1fac7; 0x1fac9, 0x1facc; 0x1fadd, 0x1fade; 0x1faeb, 0x1faee; 0x1faf9, 0x1faff; + 0x1fb93, 0x1fb93; 0x1fbfb, 0x1ffff; 0x2a6e0, 0x2a6ff; 0x2b81e, 0x2b81f; 0x2ceae, 0x2ceaf; + 0x2ebe1, 0x2ebef; 0x2ee5e, 0x2f7ff; 0x2fa1e, 0x2ffff; 0x3134b, 0x3134f; 0x3347a, 0xe0000; + 0xe0002, 0xe001f; 0xe0080, 0xe00ff; 0xe01f0, 0xeffff; 0xffffe, 0xfffff; 0x10fffe, 0x10ffff] let co = Sedlex_cset.of_list [0xe000, 0xf8ff; 0xf0000, 0xffffd; 0x100000, 0x10fffd] @@ -204,7 +204,7 @@ module Categories = struct 0x227, 0x227; 0x229, 0x229; 0x22b, 0x22b; 0x22d, 0x22d; 0x22f, 0x22f; 0x231, 0x231; 0x233, 0x239; 0x23c, 0x23c; 0x23f, 0x240; 0x242, 0x242; 0x247, 0x247; 0x249, 0x249; 0x24b, 0x24b; 0x24d, 0x24d; 0x24f, 0x293; - 0x295, 0x2af; 0x371, 0x371; 0x373, 0x373; 0x377, 0x377; 0x37b, 0x37d; + 0x296, 0x2af; 0x371, 0x371; 0x373, 0x373; 0x377, 0x377; 0x37b, 0x37d; 0x390, 0x390; 0x3ac, 0x3ce; 0x3d0, 0x3d1; 0x3d5, 0x3d7; 0x3d9, 0x3d9; 0x3db, 0x3db; 0x3dd, 0x3dd; 0x3df, 0x3df; 0x3e1, 0x3e1; 0x3e3, 0x3e3; 0x3e5, 0x3e5; 0x3e7, 0x3e7; 0x3e9, 0x3e9; 0x3eb, 0x3eb; 0x3ed, 0x3ed; @@ -296,18 +296,18 @@ module Categories = struct 0xa7a3, 0xa7a3; 0xa7a5, 0xa7a5; 0xa7a7, 0xa7a7; 0xa7a9, 0xa7a9; 0xa7af, 0xa7af; 0xa7b5, 0xa7b5; 0xa7b7, 0xa7b7; 0xa7b9, 0xa7b9; 0xa7bb, 0xa7bb; 0xa7bd, 0xa7bd; 0xa7bf, 0xa7bf; 0xa7c1, 0xa7c1; 0xa7c3, 0xa7c3; 0xa7c8, 0xa7c8; 0xa7ca, 0xa7ca; - 0xa7cd, 0xa7cd; 0xa7d1, 0xa7d1; 0xa7d3, 0xa7d3; 0xa7d5, 0xa7d5; 0xa7d7, 0xa7d7; - 0xa7d9, 0xa7d9; 0xa7db, 0xa7db; 0xa7f6, 0xa7f6; 0xa7fa, 0xa7fa; 0xab30, 0xab5a; - 0xab60, 0xab68; 0xab70, 0xabbf; 0xfb00, 0xfb06; 0xfb13, 0xfb17; 0xff41, 0xff5a; - 0x10428, 0x1044f; 0x104d8, 0x104fb; 0x10597, 0x105a1; 0x105a3, 0x105b1; 0x105b3, 0x105b9; - 0x105bb, 0x105bc; 0x10cc0, 0x10cf2; 0x10d70, 0x10d85; 0x118c0, 0x118df; 0x16e60, 0x16e7f; - 0x1d41a, 0x1d433; 0x1d44e, 0x1d454; 0x1d456, 0x1d467; 0x1d482, 0x1d49b; 0x1d4b6, 0x1d4b9; - 0x1d4bb, 0x1d4bb; 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d4cf; 0x1d4ea, 0x1d503; 0x1d51e, 0x1d537; - 0x1d552, 0x1d56b; 0x1d586, 0x1d59f; 0x1d5ba, 0x1d5d3; 0x1d5ee, 0x1d607; 0x1d622, 0x1d63b; - 0x1d656, 0x1d66f; 0x1d68a, 0x1d6a5; 0x1d6c2, 0x1d6da; 0x1d6dc, 0x1d6e1; 0x1d6fc, 0x1d714; - 0x1d716, 0x1d71b; 0x1d736, 0x1d74e; 0x1d750, 0x1d755; 0x1d770, 0x1d788; 0x1d78a, 0x1d78f; - 0x1d7aa, 0x1d7c2; 0x1d7c4, 0x1d7c9; 0x1d7cb, 0x1d7cb; 0x1df00, 0x1df09; 0x1df0b, 0x1df1e; - 0x1df25, 0x1df2a; 0x1e922, 0x1e943] + 0xa7cd, 0xa7cd; 0xa7cf, 0xa7cf; 0xa7d1, 0xa7d1; 0xa7d3, 0xa7d3; 0xa7d5, 0xa7d5; + 0xa7d7, 0xa7d7; 0xa7d9, 0xa7d9; 0xa7db, 0xa7db; 0xa7f6, 0xa7f6; 0xa7fa, 0xa7fa; + 0xab30, 0xab5a; 0xab60, 0xab68; 0xab70, 0xabbf; 0xfb00, 0xfb06; 0xfb13, 0xfb17; + 0xff41, 0xff5a; 0x10428, 0x1044f; 0x104d8, 0x104fb; 0x10597, 0x105a1; 0x105a3, 0x105b1; + 0x105b3, 0x105b9; 0x105bb, 0x105bc; 0x10cc0, 0x10cf2; 0x10d70, 0x10d85; 0x118c0, 0x118df; + 0x16e60, 0x16e7f; 0x16ebb, 0x16ed3; 0x1d41a, 0x1d433; 0x1d44e, 0x1d454; 0x1d456, 0x1d467; + 0x1d482, 0x1d49b; 0x1d4b6, 0x1d4b9; 0x1d4bb, 0x1d4bb; 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d4cf; + 0x1d4ea, 0x1d503; 0x1d51e, 0x1d537; 0x1d552, 0x1d56b; 0x1d586, 0x1d59f; 0x1d5ba, 0x1d5d3; + 0x1d5ee, 0x1d607; 0x1d622, 0x1d63b; 0x1d656, 0x1d66f; 0x1d68a, 0x1d6a5; 0x1d6c2, 0x1d6da; + 0x1d6dc, 0x1d6e1; 0x1d6fc, 0x1d714; 0x1d716, 0x1d71b; 0x1d736, 0x1d74e; 0x1d750, 0x1d755; + 0x1d770, 0x1d788; 0x1d78a, 0x1d78f; 0x1d7aa, 0x1d7c2; 0x1d7c4, 0x1d7c9; 0x1d7cb, 0x1d7cb; + 0x1df00, 0x1df09; 0x1df0b, 0x1df1e; 0x1df25, 0x1df2a; 0x1e922, 0x1e943] let lm = Sedlex_cset.of_list [0x2b0, 0x2c1; 0x2c6, 0x2d1; 0x2e0, 0x2e4; 0x2ec, 0x2ec; 0x2ee, 0x2ee; @@ -319,19 +319,20 @@ module Categories = struct 0x2c7c, 0x2c7d; 0x2d6f, 0x2d6f; 0x2e2f, 0x2e2f; 0x3005, 0x3005; 0x3031, 0x3035; 0x303b, 0x303b; 0x309d, 0x309e; 0x30fc, 0x30fe; 0xa015, 0xa015; 0xa4f8, 0xa4fd; 0xa60c, 0xa60c; 0xa67f, 0xa67f; 0xa69c, 0xa69d; 0xa717, 0xa71f; 0xa770, 0xa770; - 0xa788, 0xa788; 0xa7f2, 0xa7f4; 0xa7f8, 0xa7f9; 0xa9cf, 0xa9cf; 0xa9e6, 0xa9e6; + 0xa788, 0xa788; 0xa7f1, 0xa7f4; 0xa7f8, 0xa7f9; 0xa9cf, 0xa9cf; 0xa9e6, 0xa9e6; 0xaa70, 0xaa70; 0xaadd, 0xaadd; 0xaaf3, 0xaaf4; 0xab5c, 0xab5f; 0xab69, 0xab69; 0xff70, 0xff70; 0xff9e, 0xff9f; 0x10780, 0x10785; 0x10787, 0x107b0; 0x107b2, 0x107ba; - 0x10d4e, 0x10d4e; 0x10d6f, 0x10d6f; 0x16b40, 0x16b43; 0x16d40, 0x16d42; 0x16d6b, 0x16d6c; - 0x16f93, 0x16f9f; 0x16fe0, 0x16fe1; 0x16fe3, 0x16fe3; 0x1aff0, 0x1aff3; 0x1aff5, 0x1affb; - 0x1affd, 0x1affe; 0x1e030, 0x1e06d; 0x1e137, 0x1e13d; 0x1e4eb, 0x1e4eb; 0x1e94b, 0x1e94b] + 0x10d4e, 0x10d4e; 0x10d6f, 0x10d6f; 0x10ec5, 0x10ec5; 0x11dd9, 0x11dd9; 0x16b40, 0x16b43; + 0x16d40, 0x16d42; 0x16d6b, 0x16d6c; 0x16f93, 0x16f9f; 0x16fe0, 0x16fe1; 0x16fe3, 0x16fe3; + 0x16ff2, 0x16ff3; 0x1aff0, 0x1aff3; 0x1aff5, 0x1affb; 0x1affd, 0x1affe; 0x1e030, 0x1e06d; + 0x1e137, 0x1e13d; 0x1e4eb, 0x1e4eb; 0x1e6ff, 0x1e6ff; 0x1e94b, 0x1e94b] let lo = Sedlex_cset.of_list - [0xaa, 0xaa; 0xba, 0xba; 0x1bb, 0x1bb; 0x1c0, 0x1c3; 0x294, 0x294; + [0xaa, 0xaa; 0xba, 0xba; 0x1bb, 0x1bb; 0x1c0, 0x1c3; 0x294, 0x295; 0x5d0, 0x5ea; 0x5ef, 0x5f2; 0x620, 0x63f; 0x641, 0x64a; 0x66e, 0x66f; 0x671, 0x6d3; 0x6d5, 0x6d5; 0x6ee, 0x6ef; 0x6fa, 0x6fc; 0x6ff, 0x6ff; 0x710, 0x710; 0x712, 0x72f; 0x74d, 0x7a5; 0x7b1, 0x7b1; 0x7ca, 0x7ea; - 0x800, 0x815; 0x840, 0x858; 0x860, 0x86a; 0x870, 0x887; 0x889, 0x88e; + 0x800, 0x815; 0x840, 0x858; 0x860, 0x86a; 0x870, 0x887; 0x889, 0x88f; 0x8a0, 0x8c8; 0x904, 0x939; 0x93d, 0x93d; 0x950, 0x950; 0x958, 0x961; 0x972, 0x980; 0x985, 0x98c; 0x98f, 0x990; 0x993, 0x9a8; 0x9aa, 0x9b0; 0x9b2, 0x9b2; 0x9b6, 0x9b9; 0x9bd, 0x9bd; 0x9ce, 0x9ce; 0x9dc, 0x9dd; @@ -345,8 +346,8 @@ module Categories = struct 0xb8e, 0xb90; 0xb92, 0xb95; 0xb99, 0xb9a; 0xb9c, 0xb9c; 0xb9e, 0xb9f; 0xba3, 0xba4; 0xba8, 0xbaa; 0xbae, 0xbb9; 0xbd0, 0xbd0; 0xc05, 0xc0c; 0xc0e, 0xc10; 0xc12, 0xc28; 0xc2a, 0xc39; 0xc3d, 0xc3d; 0xc58, 0xc5a; - 0xc5d, 0xc5d; 0xc60, 0xc61; 0xc80, 0xc80; 0xc85, 0xc8c; 0xc8e, 0xc90; - 0xc92, 0xca8; 0xcaa, 0xcb3; 0xcb5, 0xcb9; 0xcbd, 0xcbd; 0xcdd, 0xcde; + 0xc5c, 0xc5d; 0xc60, 0xc61; 0xc80, 0xc80; 0xc85, 0xc8c; 0xc8e, 0xc90; + 0xc92, 0xca8; 0xcaa, 0xcb3; 0xcb5, 0xcb9; 0xcbd, 0xcbd; 0xcdc, 0xcde; 0xce0, 0xce1; 0xcf1, 0xcf2; 0xd04, 0xd0c; 0xd0e, 0xd10; 0xd12, 0xd3a; 0xd3d, 0xd3d; 0xd4e, 0xd4e; 0xd54, 0xd56; 0xd5f, 0xd61; 0xd7a, 0xd7f; 0xd85, 0xd96; 0xd9a, 0xdb1; 0xdb3, 0xdbb; 0xdbd, 0xdbd; 0xdc0, 0xdc6; @@ -392,38 +393,40 @@ module Categories = struct 0x10600, 0x10736; 0x10740, 0x10755; 0x10760, 0x10767; 0x10800, 0x10805; 0x10808, 0x10808; 0x1080a, 0x10835; 0x10837, 0x10838; 0x1083c, 0x1083c; 0x1083f, 0x10855; 0x10860, 0x10876; 0x10880, 0x1089e; 0x108e0, 0x108f2; 0x108f4, 0x108f5; 0x10900, 0x10915; 0x10920, 0x10939; - 0x10980, 0x109b7; 0x109be, 0x109bf; 0x10a00, 0x10a00; 0x10a10, 0x10a13; 0x10a15, 0x10a17; - 0x10a19, 0x10a35; 0x10a60, 0x10a7c; 0x10a80, 0x10a9c; 0x10ac0, 0x10ac7; 0x10ac9, 0x10ae4; - 0x10b00, 0x10b35; 0x10b40, 0x10b55; 0x10b60, 0x10b72; 0x10b80, 0x10b91; 0x10c00, 0x10c48; - 0x10d00, 0x10d23; 0x10d4a, 0x10d4d; 0x10d4f, 0x10d4f; 0x10e80, 0x10ea9; 0x10eb0, 0x10eb1; - 0x10ec2, 0x10ec4; 0x10f00, 0x10f1c; 0x10f27, 0x10f27; 0x10f30, 0x10f45; 0x10f70, 0x10f81; - 0x10fb0, 0x10fc4; 0x10fe0, 0x10ff6; 0x11003, 0x11037; 0x11071, 0x11072; 0x11075, 0x11075; - 0x11083, 0x110af; 0x110d0, 0x110e8; 0x11103, 0x11126; 0x11144, 0x11144; 0x11147, 0x11147; - 0x11150, 0x11172; 0x11176, 0x11176; 0x11183, 0x111b2; 0x111c1, 0x111c4; 0x111da, 0x111da; - 0x111dc, 0x111dc; 0x11200, 0x11211; 0x11213, 0x1122b; 0x1123f, 0x11240; 0x11280, 0x11286; - 0x11288, 0x11288; 0x1128a, 0x1128d; 0x1128f, 0x1129d; 0x1129f, 0x112a8; 0x112b0, 0x112de; - 0x11305, 0x1130c; 0x1130f, 0x11310; 0x11313, 0x11328; 0x1132a, 0x11330; 0x11332, 0x11333; - 0x11335, 0x11339; 0x1133d, 0x1133d; 0x11350, 0x11350; 0x1135d, 0x11361; 0x11380, 0x11389; - 0x1138b, 0x1138b; 0x1138e, 0x1138e; 0x11390, 0x113b5; 0x113b7, 0x113b7; 0x113d1, 0x113d1; - 0x113d3, 0x113d3; 0x11400, 0x11434; 0x11447, 0x1144a; 0x1145f, 0x11461; 0x11480, 0x114af; - 0x114c4, 0x114c5; 0x114c7, 0x114c7; 0x11580, 0x115ae; 0x115d8, 0x115db; 0x11600, 0x1162f; - 0x11644, 0x11644; 0x11680, 0x116aa; 0x116b8, 0x116b8; 0x11700, 0x1171a; 0x11740, 0x11746; - 0x11800, 0x1182b; 0x118ff, 0x11906; 0x11909, 0x11909; 0x1190c, 0x11913; 0x11915, 0x11916; - 0x11918, 0x1192f; 0x1193f, 0x1193f; 0x11941, 0x11941; 0x119a0, 0x119a7; 0x119aa, 0x119d0; - 0x119e1, 0x119e1; 0x119e3, 0x119e3; 0x11a00, 0x11a00; 0x11a0b, 0x11a32; 0x11a3a, 0x11a3a; - 0x11a50, 0x11a50; 0x11a5c, 0x11a89; 0x11a9d, 0x11a9d; 0x11ab0, 0x11af8; 0x11bc0, 0x11be0; - 0x11c00, 0x11c08; 0x11c0a, 0x11c2e; 0x11c40, 0x11c40; 0x11c72, 0x11c8f; 0x11d00, 0x11d06; - 0x11d08, 0x11d09; 0x11d0b, 0x11d30; 0x11d46, 0x11d46; 0x11d60, 0x11d65; 0x11d67, 0x11d68; - 0x11d6a, 0x11d89; 0x11d98, 0x11d98; 0x11ee0, 0x11ef2; 0x11f02, 0x11f02; 0x11f04, 0x11f10; - 0x11f12, 0x11f33; 0x11fb0, 0x11fb0; 0x12000, 0x12399; 0x12480, 0x12543; 0x12f90, 0x12ff0; - 0x13000, 0x1342f; 0x13441, 0x13446; 0x13460, 0x143fa; 0x14400, 0x14646; 0x16100, 0x1611d; - 0x16800, 0x16a38; 0x16a40, 0x16a5e; 0x16a70, 0x16abe; 0x16ad0, 0x16aed; 0x16b00, 0x16b2f; - 0x16b63, 0x16b77; 0x16b7d, 0x16b8f; 0x16d43, 0x16d6a; 0x16f00, 0x16f4a; 0x16f50, 0x16f50; - 0x17000, 0x187f7; 0x18800, 0x18cd5; 0x18cff, 0x18d08; 0x1b000, 0x1b122; 0x1b132, 0x1b132; - 0x1b150, 0x1b152; 0x1b155, 0x1b155; 0x1b164, 0x1b167; 0x1b170, 0x1b2fb; 0x1bc00, 0x1bc6a; - 0x1bc70, 0x1bc7c; 0x1bc80, 0x1bc88; 0x1bc90, 0x1bc99; 0x1df0a, 0x1df0a; 0x1e100, 0x1e12c; - 0x1e14e, 0x1e14e; 0x1e290, 0x1e2ad; 0x1e2c0, 0x1e2eb; 0x1e4d0, 0x1e4ea; 0x1e5d0, 0x1e5ed; - 0x1e5f0, 0x1e5f0; 0x1e7e0, 0x1e7e6; 0x1e7e8, 0x1e7eb; 0x1e7ed, 0x1e7ee; 0x1e7f0, 0x1e7fe; + 0x10940, 0x10959; 0x10980, 0x109b7; 0x109be, 0x109bf; 0x10a00, 0x10a00; 0x10a10, 0x10a13; + 0x10a15, 0x10a17; 0x10a19, 0x10a35; 0x10a60, 0x10a7c; 0x10a80, 0x10a9c; 0x10ac0, 0x10ac7; + 0x10ac9, 0x10ae4; 0x10b00, 0x10b35; 0x10b40, 0x10b55; 0x10b60, 0x10b72; 0x10b80, 0x10b91; + 0x10c00, 0x10c48; 0x10d00, 0x10d23; 0x10d4a, 0x10d4d; 0x10d4f, 0x10d4f; 0x10e80, 0x10ea9; + 0x10eb0, 0x10eb1; 0x10ec2, 0x10ec4; 0x10ec6, 0x10ec7; 0x10f00, 0x10f1c; 0x10f27, 0x10f27; + 0x10f30, 0x10f45; 0x10f70, 0x10f81; 0x10fb0, 0x10fc4; 0x10fe0, 0x10ff6; 0x11003, 0x11037; + 0x11071, 0x11072; 0x11075, 0x11075; 0x11083, 0x110af; 0x110d0, 0x110e8; 0x11103, 0x11126; + 0x11144, 0x11144; 0x11147, 0x11147; 0x11150, 0x11172; 0x11176, 0x11176; 0x11183, 0x111b2; + 0x111c1, 0x111c4; 0x111da, 0x111da; 0x111dc, 0x111dc; 0x11200, 0x11211; 0x11213, 0x1122b; + 0x1123f, 0x11240; 0x11280, 0x11286; 0x11288, 0x11288; 0x1128a, 0x1128d; 0x1128f, 0x1129d; + 0x1129f, 0x112a8; 0x112b0, 0x112de; 0x11305, 0x1130c; 0x1130f, 0x11310; 0x11313, 0x11328; + 0x1132a, 0x11330; 0x11332, 0x11333; 0x11335, 0x11339; 0x1133d, 0x1133d; 0x11350, 0x11350; + 0x1135d, 0x11361; 0x11380, 0x11389; 0x1138b, 0x1138b; 0x1138e, 0x1138e; 0x11390, 0x113b5; + 0x113b7, 0x113b7; 0x113d1, 0x113d1; 0x113d3, 0x113d3; 0x11400, 0x11434; 0x11447, 0x1144a; + 0x1145f, 0x11461; 0x11480, 0x114af; 0x114c4, 0x114c5; 0x114c7, 0x114c7; 0x11580, 0x115ae; + 0x115d8, 0x115db; 0x11600, 0x1162f; 0x11644, 0x11644; 0x11680, 0x116aa; 0x116b8, 0x116b8; + 0x11700, 0x1171a; 0x11740, 0x11746; 0x11800, 0x1182b; 0x118ff, 0x11906; 0x11909, 0x11909; + 0x1190c, 0x11913; 0x11915, 0x11916; 0x11918, 0x1192f; 0x1193f, 0x1193f; 0x11941, 0x11941; + 0x119a0, 0x119a7; 0x119aa, 0x119d0; 0x119e1, 0x119e1; 0x119e3, 0x119e3; 0x11a00, 0x11a00; + 0x11a0b, 0x11a32; 0x11a3a, 0x11a3a; 0x11a50, 0x11a50; 0x11a5c, 0x11a89; 0x11a9d, 0x11a9d; + 0x11ab0, 0x11af8; 0x11bc0, 0x11be0; 0x11c00, 0x11c08; 0x11c0a, 0x11c2e; 0x11c40, 0x11c40; + 0x11c72, 0x11c8f; 0x11d00, 0x11d06; 0x11d08, 0x11d09; 0x11d0b, 0x11d30; 0x11d46, 0x11d46; + 0x11d60, 0x11d65; 0x11d67, 0x11d68; 0x11d6a, 0x11d89; 0x11d98, 0x11d98; 0x11db0, 0x11dd8; + 0x11dda, 0x11ddb; 0x11ee0, 0x11ef2; 0x11f02, 0x11f02; 0x11f04, 0x11f10; 0x11f12, 0x11f33; + 0x11fb0, 0x11fb0; 0x12000, 0x12399; 0x12480, 0x12543; 0x12f90, 0x12ff0; 0x13000, 0x1342f; + 0x13441, 0x13446; 0x13460, 0x143fa; 0x14400, 0x14646; 0x16100, 0x1611d; 0x16800, 0x16a38; + 0x16a40, 0x16a5e; 0x16a70, 0x16abe; 0x16ad0, 0x16aed; 0x16b00, 0x16b2f; 0x16b63, 0x16b77; + 0x16b7d, 0x16b8f; 0x16d43, 0x16d6a; 0x16f00, 0x16f4a; 0x16f50, 0x16f50; 0x17000, 0x18cd5; + 0x18cff, 0x18d1e; 0x18d80, 0x18df2; 0x1b000, 0x1b122; 0x1b132, 0x1b132; 0x1b150, 0x1b152; + 0x1b155, 0x1b155; 0x1b164, 0x1b167; 0x1b170, 0x1b2fb; 0x1bc00, 0x1bc6a; 0x1bc70, 0x1bc7c; + 0x1bc80, 0x1bc88; 0x1bc90, 0x1bc99; 0x1df0a, 0x1df0a; 0x1e100, 0x1e12c; 0x1e14e, 0x1e14e; + 0x1e290, 0x1e2ad; 0x1e2c0, 0x1e2eb; 0x1e4d0, 0x1e4ea; 0x1e5d0, 0x1e5ed; 0x1e5f0, 0x1e5f0; + 0x1e6c0, 0x1e6de; 0x1e6e0, 0x1e6e2; 0x1e6e4, 0x1e6e5; 0x1e6e7, 0x1e6ed; 0x1e6f0, 0x1e6f4; + 0x1e6fe, 0x1e6fe; 0x1e7e0, 0x1e7e6; 0x1e7e8, 0x1e7eb; 0x1e7ed, 0x1e7ee; 0x1e7f0, 0x1e7fe; 0x1e800, 0x1e8c4; 0x1ee00, 0x1ee03; 0x1ee05, 0x1ee1f; 0x1ee21, 0x1ee22; 0x1ee24, 0x1ee24; 0x1ee27, 0x1ee27; 0x1ee29, 0x1ee32; 0x1ee34, 0x1ee37; 0x1ee39, 0x1ee39; 0x1ee3b, 0x1ee3b; 0x1ee42, 0x1ee42; 0x1ee47, 0x1ee47; 0x1ee49, 0x1ee49; 0x1ee4b, 0x1ee4b; 0x1ee4d, 0x1ee4f; @@ -431,8 +434,8 @@ module Categories = struct 0x1ee5d, 0x1ee5d; 0x1ee5f, 0x1ee5f; 0x1ee61, 0x1ee62; 0x1ee64, 0x1ee64; 0x1ee67, 0x1ee6a; 0x1ee6c, 0x1ee72; 0x1ee74, 0x1ee77; 0x1ee79, 0x1ee7c; 0x1ee7e, 0x1ee7e; 0x1ee80, 0x1ee89; 0x1ee8b, 0x1ee9b; 0x1eea1, 0x1eea3; 0x1eea5, 0x1eea9; 0x1eeab, 0x1eebb; 0x20000, 0x2a6df; - 0x2a700, 0x2b739; 0x2b740, 0x2b81d; 0x2b820, 0x2cea1; 0x2ceb0, 0x2ebe0; 0x2ebf0, 0x2ee5d; - 0x2f800, 0x2fa1d; 0x30000, 0x3134a; 0x31350, 0x323af] + 0x2a700, 0x2b81d; 0x2b820, 0x2cead; 0x2ceb0, 0x2ebe0; 0x2ebf0, 0x2ee5d; 0x2f800, 0x2fa1d; + 0x30000, 0x3134a; 0x31350, 0x33479] let lt = Sedlex_cset.of_list [0x1c5, 0x1c5; 0x1c8, 0x1c8; 0x1cb, 0x1cb; 0x1f2, 0x1f2; 0x1f88, 0x1f8f; @@ -559,17 +562,17 @@ module Categories = struct 0xa7a0, 0xa7a0; 0xa7a2, 0xa7a2; 0xa7a4, 0xa7a4; 0xa7a6, 0xa7a6; 0xa7a8, 0xa7a8; 0xa7aa, 0xa7ae; 0xa7b0, 0xa7b4; 0xa7b6, 0xa7b6; 0xa7b8, 0xa7b8; 0xa7ba, 0xa7ba; 0xa7bc, 0xa7bc; 0xa7be, 0xa7be; 0xa7c0, 0xa7c0; 0xa7c2, 0xa7c2; 0xa7c4, 0xa7c7; - 0xa7c9, 0xa7c9; 0xa7cb, 0xa7cc; 0xa7d0, 0xa7d0; 0xa7d6, 0xa7d6; 0xa7d8, 0xa7d8; - 0xa7da, 0xa7da; 0xa7dc, 0xa7dc; 0xa7f5, 0xa7f5; 0xff21, 0xff3a; 0x10400, 0x10427; - 0x104b0, 0x104d3; 0x10570, 0x1057a; 0x1057c, 0x1058a; 0x1058c, 0x10592; 0x10594, 0x10595; - 0x10c80, 0x10cb2; 0x10d50, 0x10d65; 0x118a0, 0x118bf; 0x16e40, 0x16e5f; 0x1d400, 0x1d419; - 0x1d434, 0x1d44d; 0x1d468, 0x1d481; 0x1d49c, 0x1d49c; 0x1d49e, 0x1d49f; 0x1d4a2, 0x1d4a2; - 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; 0x1d4ae, 0x1d4b5; 0x1d4d0, 0x1d4e9; 0x1d504, 0x1d505; - 0x1d507, 0x1d50a; 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; 0x1d538, 0x1d539; 0x1d53b, 0x1d53e; - 0x1d540, 0x1d544; 0x1d546, 0x1d546; 0x1d54a, 0x1d550; 0x1d56c, 0x1d585; 0x1d5a0, 0x1d5b9; - 0x1d5d4, 0x1d5ed; 0x1d608, 0x1d621; 0x1d63c, 0x1d655; 0x1d670, 0x1d689; 0x1d6a8, 0x1d6c0; - 0x1d6e2, 0x1d6fa; 0x1d71c, 0x1d734; 0x1d756, 0x1d76e; 0x1d790, 0x1d7a8; 0x1d7ca, 0x1d7ca; - 0x1e900, 0x1e921] + 0xa7c9, 0xa7c9; 0xa7cb, 0xa7cc; 0xa7ce, 0xa7ce; 0xa7d0, 0xa7d0; 0xa7d2, 0xa7d2; + 0xa7d4, 0xa7d4; 0xa7d6, 0xa7d6; 0xa7d8, 0xa7d8; 0xa7da, 0xa7da; 0xa7dc, 0xa7dc; + 0xa7f5, 0xa7f5; 0xff21, 0xff3a; 0x10400, 0x10427; 0x104b0, 0x104d3; 0x10570, 0x1057a; + 0x1057c, 0x1058a; 0x1058c, 0x10592; 0x10594, 0x10595; 0x10c80, 0x10cb2; 0x10d50, 0x10d65; + 0x118a0, 0x118bf; 0x16e40, 0x16e5f; 0x16ea0, 0x16eb8; 0x1d400, 0x1d419; 0x1d434, 0x1d44d; + 0x1d468, 0x1d481; 0x1d49c, 0x1d49c; 0x1d49e, 0x1d49f; 0x1d4a2, 0x1d4a2; 0x1d4a5, 0x1d4a6; + 0x1d4a9, 0x1d4ac; 0x1d4ae, 0x1d4b5; 0x1d4d0, 0x1d4e9; 0x1d504, 0x1d505; 0x1d507, 0x1d50a; + 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; 0x1d538, 0x1d539; 0x1d53b, 0x1d53e; 0x1d540, 0x1d544; + 0x1d546, 0x1d546; 0x1d54a, 0x1d550; 0x1d56c, 0x1d585; 0x1d5a0, 0x1d5b9; 0x1d5d4, 0x1d5ed; + 0x1d608, 0x1d621; 0x1d63c, 0x1d655; 0x1d670, 0x1d689; 0x1d6a8, 0x1d6c0; 0x1d6e2, 0x1d6fa; + 0x1d71c, 0x1d734; 0x1d756, 0x1d76e; 0x1d790, 0x1d7a8; 0x1d7ca, 0x1d7ca; 0x1e900, 0x1e921] let mc = Sedlex_cset.of_list [0x903, 0x903; 0x93b, 0x93b; 0x93e, 0x940; 0x949, 0x94c; 0x94e, 0x94f; @@ -606,10 +609,11 @@ module Categories = struct 0x116b6, 0x116b6; 0x1171e, 0x1171e; 0x11720, 0x11721; 0x11726, 0x11726; 0x1182c, 0x1182e; 0x11838, 0x11838; 0x11930, 0x11935; 0x11937, 0x11938; 0x1193d, 0x1193d; 0x11940, 0x11940; 0x11942, 0x11942; 0x119d1, 0x119d3; 0x119dc, 0x119df; 0x119e4, 0x119e4; 0x11a39, 0x11a39; - 0x11a57, 0x11a58; 0x11a97, 0x11a97; 0x11c2f, 0x11c2f; 0x11c3e, 0x11c3e; 0x11ca9, 0x11ca9; - 0x11cb1, 0x11cb1; 0x11cb4, 0x11cb4; 0x11d8a, 0x11d8e; 0x11d93, 0x11d94; 0x11d96, 0x11d96; - 0x11ef5, 0x11ef6; 0x11f03, 0x11f03; 0x11f34, 0x11f35; 0x11f3e, 0x11f3f; 0x11f41, 0x11f41; - 0x1612a, 0x1612c; 0x16f51, 0x16f87; 0x16ff0, 0x16ff1; 0x1d165, 0x1d166; 0x1d16d, 0x1d172] + 0x11a57, 0x11a58; 0x11a97, 0x11a97; 0x11b61, 0x11b61; 0x11b65, 0x11b65; 0x11b67, 0x11b67; + 0x11c2f, 0x11c2f; 0x11c3e, 0x11c3e; 0x11ca9, 0x11ca9; 0x11cb1, 0x11cb1; 0x11cb4, 0x11cb4; + 0x11d8a, 0x11d8e; 0x11d93, 0x11d94; 0x11d96, 0x11d96; 0x11ef5, 0x11ef6; 0x11f03, 0x11f03; + 0x11f34, 0x11f35; 0x11f3e, 0x11f3f; 0x11f41, 0x11f41; 0x1612a, 0x1612c; 0x16f51, 0x16f87; + 0x16ff0, 0x16ff1; 0x1d165, 0x1d166; 0x1d16d, 0x1d172] let me = Sedlex_cset.of_list [0x488, 0x489; 0x1abe, 0x1abe; 0x20dd, 0x20e0; 0x20e2, 0x20e4; 0xa670, 0xa672] @@ -643,50 +647,51 @@ module Categories = struct 0x180f, 0x180f; 0x1885, 0x1886; 0x18a9, 0x18a9; 0x1920, 0x1922; 0x1927, 0x1928; 0x1932, 0x1932; 0x1939, 0x193b; 0x1a17, 0x1a18; 0x1a1b, 0x1a1b; 0x1a56, 0x1a56; 0x1a58, 0x1a5e; 0x1a60, 0x1a60; 0x1a62, 0x1a62; 0x1a65, 0x1a6c; 0x1a73, 0x1a7c; - 0x1a7f, 0x1a7f; 0x1ab0, 0x1abd; 0x1abf, 0x1ace; 0x1b00, 0x1b03; 0x1b34, 0x1b34; - 0x1b36, 0x1b3a; 0x1b3c, 0x1b3c; 0x1b42, 0x1b42; 0x1b6b, 0x1b73; 0x1b80, 0x1b81; - 0x1ba2, 0x1ba5; 0x1ba8, 0x1ba9; 0x1bab, 0x1bad; 0x1be6, 0x1be6; 0x1be8, 0x1be9; - 0x1bed, 0x1bed; 0x1bef, 0x1bf1; 0x1c2c, 0x1c33; 0x1c36, 0x1c37; 0x1cd0, 0x1cd2; - 0x1cd4, 0x1ce0; 0x1ce2, 0x1ce8; 0x1ced, 0x1ced; 0x1cf4, 0x1cf4; 0x1cf8, 0x1cf9; - 0x1dc0, 0x1dff; 0x20d0, 0x20dc; 0x20e1, 0x20e1; 0x20e5, 0x20f0; 0x2cef, 0x2cf1; - 0x2d7f, 0x2d7f; 0x2de0, 0x2dff; 0x302a, 0x302d; 0x3099, 0x309a; 0xa66f, 0xa66f; - 0xa674, 0xa67d; 0xa69e, 0xa69f; 0xa6f0, 0xa6f1; 0xa802, 0xa802; 0xa806, 0xa806; - 0xa80b, 0xa80b; 0xa825, 0xa826; 0xa82c, 0xa82c; 0xa8c4, 0xa8c5; 0xa8e0, 0xa8f1; - 0xa8ff, 0xa8ff; 0xa926, 0xa92d; 0xa947, 0xa951; 0xa980, 0xa982; 0xa9b3, 0xa9b3; - 0xa9b6, 0xa9b9; 0xa9bc, 0xa9bd; 0xa9e5, 0xa9e5; 0xaa29, 0xaa2e; 0xaa31, 0xaa32; - 0xaa35, 0xaa36; 0xaa43, 0xaa43; 0xaa4c, 0xaa4c; 0xaa7c, 0xaa7c; 0xaab0, 0xaab0; - 0xaab2, 0xaab4; 0xaab7, 0xaab8; 0xaabe, 0xaabf; 0xaac1, 0xaac1; 0xaaec, 0xaaed; - 0xaaf6, 0xaaf6; 0xabe5, 0xabe5; 0xabe8, 0xabe8; 0xabed, 0xabed; 0xfb1e, 0xfb1e; - 0xfe00, 0xfe0f; 0xfe20, 0xfe2f; 0x101fd, 0x101fd; 0x102e0, 0x102e0; 0x10376, 0x1037a; - 0x10a01, 0x10a03; 0x10a05, 0x10a06; 0x10a0c, 0x10a0f; 0x10a38, 0x10a3a; 0x10a3f, 0x10a3f; - 0x10ae5, 0x10ae6; 0x10d24, 0x10d27; 0x10d69, 0x10d6d; 0x10eab, 0x10eac; 0x10efc, 0x10eff; - 0x10f46, 0x10f50; 0x10f82, 0x10f85; 0x11001, 0x11001; 0x11038, 0x11046; 0x11070, 0x11070; - 0x11073, 0x11074; 0x1107f, 0x11081; 0x110b3, 0x110b6; 0x110b9, 0x110ba; 0x110c2, 0x110c2; - 0x11100, 0x11102; 0x11127, 0x1112b; 0x1112d, 0x11134; 0x11173, 0x11173; 0x11180, 0x11181; - 0x111b6, 0x111be; 0x111c9, 0x111cc; 0x111cf, 0x111cf; 0x1122f, 0x11231; 0x11234, 0x11234; - 0x11236, 0x11237; 0x1123e, 0x1123e; 0x11241, 0x11241; 0x112df, 0x112df; 0x112e3, 0x112ea; - 0x11300, 0x11301; 0x1133b, 0x1133c; 0x11340, 0x11340; 0x11366, 0x1136c; 0x11370, 0x11374; - 0x113bb, 0x113c0; 0x113ce, 0x113ce; 0x113d0, 0x113d0; 0x113d2, 0x113d2; 0x113e1, 0x113e2; - 0x11438, 0x1143f; 0x11442, 0x11444; 0x11446, 0x11446; 0x1145e, 0x1145e; 0x114b3, 0x114b8; - 0x114ba, 0x114ba; 0x114bf, 0x114c0; 0x114c2, 0x114c3; 0x115b2, 0x115b5; 0x115bc, 0x115bd; - 0x115bf, 0x115c0; 0x115dc, 0x115dd; 0x11633, 0x1163a; 0x1163d, 0x1163d; 0x1163f, 0x11640; - 0x116ab, 0x116ab; 0x116ad, 0x116ad; 0x116b0, 0x116b5; 0x116b7, 0x116b7; 0x1171d, 0x1171d; - 0x1171f, 0x1171f; 0x11722, 0x11725; 0x11727, 0x1172b; 0x1182f, 0x11837; 0x11839, 0x1183a; - 0x1193b, 0x1193c; 0x1193e, 0x1193e; 0x11943, 0x11943; 0x119d4, 0x119d7; 0x119da, 0x119db; - 0x119e0, 0x119e0; 0x11a01, 0x11a0a; 0x11a33, 0x11a38; 0x11a3b, 0x11a3e; 0x11a47, 0x11a47; - 0x11a51, 0x11a56; 0x11a59, 0x11a5b; 0x11a8a, 0x11a96; 0x11a98, 0x11a99; 0x11c30, 0x11c36; - 0x11c38, 0x11c3d; 0x11c3f, 0x11c3f; 0x11c92, 0x11ca7; 0x11caa, 0x11cb0; 0x11cb2, 0x11cb3; - 0x11cb5, 0x11cb6; 0x11d31, 0x11d36; 0x11d3a, 0x11d3a; 0x11d3c, 0x11d3d; 0x11d3f, 0x11d45; - 0x11d47, 0x11d47; 0x11d90, 0x11d91; 0x11d95, 0x11d95; 0x11d97, 0x11d97; 0x11ef3, 0x11ef4; - 0x11f00, 0x11f01; 0x11f36, 0x11f3a; 0x11f40, 0x11f40; 0x11f42, 0x11f42; 0x11f5a, 0x11f5a; - 0x13440, 0x13440; 0x13447, 0x13455; 0x1611e, 0x16129; 0x1612d, 0x1612f; 0x16af0, 0x16af4; - 0x16b30, 0x16b36; 0x16f4f, 0x16f4f; 0x16f8f, 0x16f92; 0x16fe4, 0x16fe4; 0x1bc9d, 0x1bc9e; - 0x1cf00, 0x1cf2d; 0x1cf30, 0x1cf46; 0x1d167, 0x1d169; 0x1d17b, 0x1d182; 0x1d185, 0x1d18b; - 0x1d1aa, 0x1d1ad; 0x1d242, 0x1d244; 0x1da00, 0x1da36; 0x1da3b, 0x1da6c; 0x1da75, 0x1da75; - 0x1da84, 0x1da84; 0x1da9b, 0x1da9f; 0x1daa1, 0x1daaf; 0x1e000, 0x1e006; 0x1e008, 0x1e018; - 0x1e01b, 0x1e021; 0x1e023, 0x1e024; 0x1e026, 0x1e02a; 0x1e08f, 0x1e08f; 0x1e130, 0x1e136; - 0x1e2ae, 0x1e2ae; 0x1e2ec, 0x1e2ef; 0x1e4ec, 0x1e4ef; 0x1e5ee, 0x1e5ef; 0x1e8d0, 0x1e8d6; - 0x1e944, 0x1e94a; 0xe0100, 0xe01ef] + 0x1a7f, 0x1a7f; 0x1ab0, 0x1abd; 0x1abf, 0x1add; 0x1ae0, 0x1aeb; 0x1b00, 0x1b03; + 0x1b34, 0x1b34; 0x1b36, 0x1b3a; 0x1b3c, 0x1b3c; 0x1b42, 0x1b42; 0x1b6b, 0x1b73; + 0x1b80, 0x1b81; 0x1ba2, 0x1ba5; 0x1ba8, 0x1ba9; 0x1bab, 0x1bad; 0x1be6, 0x1be6; + 0x1be8, 0x1be9; 0x1bed, 0x1bed; 0x1bef, 0x1bf1; 0x1c2c, 0x1c33; 0x1c36, 0x1c37; + 0x1cd0, 0x1cd2; 0x1cd4, 0x1ce0; 0x1ce2, 0x1ce8; 0x1ced, 0x1ced; 0x1cf4, 0x1cf4; + 0x1cf8, 0x1cf9; 0x1dc0, 0x1dff; 0x20d0, 0x20dc; 0x20e1, 0x20e1; 0x20e5, 0x20f0; + 0x2cef, 0x2cf1; 0x2d7f, 0x2d7f; 0x2de0, 0x2dff; 0x302a, 0x302d; 0x3099, 0x309a; + 0xa66f, 0xa66f; 0xa674, 0xa67d; 0xa69e, 0xa69f; 0xa6f0, 0xa6f1; 0xa802, 0xa802; + 0xa806, 0xa806; 0xa80b, 0xa80b; 0xa825, 0xa826; 0xa82c, 0xa82c; 0xa8c4, 0xa8c5; + 0xa8e0, 0xa8f1; 0xa8ff, 0xa8ff; 0xa926, 0xa92d; 0xa947, 0xa951; 0xa980, 0xa982; + 0xa9b3, 0xa9b3; 0xa9b6, 0xa9b9; 0xa9bc, 0xa9bd; 0xa9e5, 0xa9e5; 0xaa29, 0xaa2e; + 0xaa31, 0xaa32; 0xaa35, 0xaa36; 0xaa43, 0xaa43; 0xaa4c, 0xaa4c; 0xaa7c, 0xaa7c; + 0xaab0, 0xaab0; 0xaab2, 0xaab4; 0xaab7, 0xaab8; 0xaabe, 0xaabf; 0xaac1, 0xaac1; + 0xaaec, 0xaaed; 0xaaf6, 0xaaf6; 0xabe5, 0xabe5; 0xabe8, 0xabe8; 0xabed, 0xabed; + 0xfb1e, 0xfb1e; 0xfe00, 0xfe0f; 0xfe20, 0xfe2f; 0x101fd, 0x101fd; 0x102e0, 0x102e0; + 0x10376, 0x1037a; 0x10a01, 0x10a03; 0x10a05, 0x10a06; 0x10a0c, 0x10a0f; 0x10a38, 0x10a3a; + 0x10a3f, 0x10a3f; 0x10ae5, 0x10ae6; 0x10d24, 0x10d27; 0x10d69, 0x10d6d; 0x10eab, 0x10eac; + 0x10efa, 0x10eff; 0x10f46, 0x10f50; 0x10f82, 0x10f85; 0x11001, 0x11001; 0x11038, 0x11046; + 0x11070, 0x11070; 0x11073, 0x11074; 0x1107f, 0x11081; 0x110b3, 0x110b6; 0x110b9, 0x110ba; + 0x110c2, 0x110c2; 0x11100, 0x11102; 0x11127, 0x1112b; 0x1112d, 0x11134; 0x11173, 0x11173; + 0x11180, 0x11181; 0x111b6, 0x111be; 0x111c9, 0x111cc; 0x111cf, 0x111cf; 0x1122f, 0x11231; + 0x11234, 0x11234; 0x11236, 0x11237; 0x1123e, 0x1123e; 0x11241, 0x11241; 0x112df, 0x112df; + 0x112e3, 0x112ea; 0x11300, 0x11301; 0x1133b, 0x1133c; 0x11340, 0x11340; 0x11366, 0x1136c; + 0x11370, 0x11374; 0x113bb, 0x113c0; 0x113ce, 0x113ce; 0x113d0, 0x113d0; 0x113d2, 0x113d2; + 0x113e1, 0x113e2; 0x11438, 0x1143f; 0x11442, 0x11444; 0x11446, 0x11446; 0x1145e, 0x1145e; + 0x114b3, 0x114b8; 0x114ba, 0x114ba; 0x114bf, 0x114c0; 0x114c2, 0x114c3; 0x115b2, 0x115b5; + 0x115bc, 0x115bd; 0x115bf, 0x115c0; 0x115dc, 0x115dd; 0x11633, 0x1163a; 0x1163d, 0x1163d; + 0x1163f, 0x11640; 0x116ab, 0x116ab; 0x116ad, 0x116ad; 0x116b0, 0x116b5; 0x116b7, 0x116b7; + 0x1171d, 0x1171d; 0x1171f, 0x1171f; 0x11722, 0x11725; 0x11727, 0x1172b; 0x1182f, 0x11837; + 0x11839, 0x1183a; 0x1193b, 0x1193c; 0x1193e, 0x1193e; 0x11943, 0x11943; 0x119d4, 0x119d7; + 0x119da, 0x119db; 0x119e0, 0x119e0; 0x11a01, 0x11a0a; 0x11a33, 0x11a38; 0x11a3b, 0x11a3e; + 0x11a47, 0x11a47; 0x11a51, 0x11a56; 0x11a59, 0x11a5b; 0x11a8a, 0x11a96; 0x11a98, 0x11a99; + 0x11b60, 0x11b60; 0x11b62, 0x11b64; 0x11b66, 0x11b66; 0x11c30, 0x11c36; 0x11c38, 0x11c3d; + 0x11c3f, 0x11c3f; 0x11c92, 0x11ca7; 0x11caa, 0x11cb0; 0x11cb2, 0x11cb3; 0x11cb5, 0x11cb6; + 0x11d31, 0x11d36; 0x11d3a, 0x11d3a; 0x11d3c, 0x11d3d; 0x11d3f, 0x11d45; 0x11d47, 0x11d47; + 0x11d90, 0x11d91; 0x11d95, 0x11d95; 0x11d97, 0x11d97; 0x11ef3, 0x11ef4; 0x11f00, 0x11f01; + 0x11f36, 0x11f3a; 0x11f40, 0x11f40; 0x11f42, 0x11f42; 0x11f5a, 0x11f5a; 0x13440, 0x13440; + 0x13447, 0x13455; 0x1611e, 0x16129; 0x1612d, 0x1612f; 0x16af0, 0x16af4; 0x16b30, 0x16b36; + 0x16f4f, 0x16f4f; 0x16f8f, 0x16f92; 0x16fe4, 0x16fe4; 0x1bc9d, 0x1bc9e; 0x1cf00, 0x1cf2d; + 0x1cf30, 0x1cf46; 0x1d167, 0x1d169; 0x1d17b, 0x1d182; 0x1d185, 0x1d18b; 0x1d1aa, 0x1d1ad; + 0x1d242, 0x1d244; 0x1da00, 0x1da36; 0x1da3b, 0x1da6c; 0x1da75, 0x1da75; 0x1da84, 0x1da84; + 0x1da9b, 0x1da9f; 0x1daa1, 0x1daaf; 0x1e000, 0x1e006; 0x1e008, 0x1e018; 0x1e01b, 0x1e021; + 0x1e023, 0x1e024; 0x1e026, 0x1e02a; 0x1e08f, 0x1e08f; 0x1e130, 0x1e136; 0x1e2ae, 0x1e2ae; + 0x1e2ec, 0x1e2ef; 0x1e4ec, 0x1e4ef; 0x1e5ee, 0x1e5ef; 0x1e6e3, 0x1e6e3; 0x1e6e6, 0x1e6e6; + 0x1e6ee, 0x1e6ef; 0x1e6f5, 0x1e6f5; 0x1e8d0, 0x1e8d6; 0x1e944, 0x1e94a; 0xe0100, 0xe01ef] let nd = Sedlex_cset.of_list [0x30, 0x39; 0x660, 0x669; 0x6f0, 0x6f9; 0x7c0, 0x7c9; 0x966, 0x96f; @@ -700,15 +705,15 @@ module Categories = struct 0x11066, 0x1106f; 0x110f0, 0x110f9; 0x11136, 0x1113f; 0x111d0, 0x111d9; 0x112f0, 0x112f9; 0x11450, 0x11459; 0x114d0, 0x114d9; 0x11650, 0x11659; 0x116c0, 0x116c9; 0x116d0, 0x116e3; 0x11730, 0x11739; 0x118e0, 0x118e9; 0x11950, 0x11959; 0x11bf0, 0x11bf9; 0x11c50, 0x11c59; - 0x11d50, 0x11d59; 0x11da0, 0x11da9; 0x11f50, 0x11f59; 0x16130, 0x16139; 0x16a60, 0x16a69; - 0x16ac0, 0x16ac9; 0x16b50, 0x16b59; 0x16d70, 0x16d79; 0x1ccf0, 0x1ccf9; 0x1d7ce, 0x1d7ff; - 0x1e140, 0x1e149; 0x1e2f0, 0x1e2f9; 0x1e4f0, 0x1e4f9; 0x1e5f1, 0x1e5fa; 0x1e950, 0x1e959; - 0x1fbf0, 0x1fbf9] + 0x11d50, 0x11d59; 0x11da0, 0x11da9; 0x11de0, 0x11de9; 0x11f50, 0x11f59; 0x16130, 0x16139; + 0x16a60, 0x16a69; 0x16ac0, 0x16ac9; 0x16b50, 0x16b59; 0x16d70, 0x16d79; 0x1ccf0, 0x1ccf9; + 0x1d7ce, 0x1d7ff; 0x1e140, 0x1e149; 0x1e2f0, 0x1e2f9; 0x1e4f0, 0x1e4f9; 0x1e5f1, 0x1e5fa; + 0x1e950, 0x1e959; 0x1fbf0, 0x1fbf9] let nl = Sedlex_cset.of_list [0x16ee, 0x16f0; 0x2160, 0x2182; 0x2185, 0x2188; 0x3007, 0x3007; 0x3021, 0x3029; 0x3038, 0x303a; 0xa6e6, 0xa6ef; 0x10140, 0x10174; 0x10341, 0x10341; 0x1034a, 0x1034a; - 0x103d1, 0x103d5; 0x12400, 0x1246e] + 0x103d1, 0x103d5; 0x12400, 0x1246e; 0x16ff4, 0x16ff6] let no = Sedlex_cset.of_list [0xb2, 0xb3; 0xb9, 0xb9; 0xbc, 0xbe; 0x9f4, 0x9f9; 0xb72, 0xb77; @@ -793,17 +798,17 @@ module Categories = struct 0xff0e, 0xff0f; 0xff1a, 0xff1b; 0xff1f, 0xff20; 0xff3c, 0xff3c; 0xff61, 0xff61; 0xff64, 0xff65; 0x10100, 0x10102; 0x1039f, 0x1039f; 0x103d0, 0x103d0; 0x1056f, 0x1056f; 0x10857, 0x10857; 0x1091f, 0x1091f; 0x1093f, 0x1093f; 0x10a50, 0x10a58; 0x10a7f, 0x10a7f; - 0x10af0, 0x10af6; 0x10b39, 0x10b3f; 0x10b99, 0x10b9c; 0x10f55, 0x10f59; 0x10f86, 0x10f89; - 0x11047, 0x1104d; 0x110bb, 0x110bc; 0x110be, 0x110c1; 0x11140, 0x11143; 0x11174, 0x11175; - 0x111c5, 0x111c8; 0x111cd, 0x111cd; 0x111db, 0x111db; 0x111dd, 0x111df; 0x11238, 0x1123d; - 0x112a9, 0x112a9; 0x113d4, 0x113d5; 0x113d7, 0x113d8; 0x1144b, 0x1144f; 0x1145a, 0x1145b; - 0x1145d, 0x1145d; 0x114c6, 0x114c6; 0x115c1, 0x115d7; 0x11641, 0x11643; 0x11660, 0x1166c; - 0x116b9, 0x116b9; 0x1173c, 0x1173e; 0x1183b, 0x1183b; 0x11944, 0x11946; 0x119e2, 0x119e2; - 0x11a3f, 0x11a46; 0x11a9a, 0x11a9c; 0x11a9e, 0x11aa2; 0x11b00, 0x11b09; 0x11be1, 0x11be1; - 0x11c41, 0x11c45; 0x11c70, 0x11c71; 0x11ef7, 0x11ef8; 0x11f43, 0x11f4f; 0x11fff, 0x11fff; - 0x12470, 0x12474; 0x12ff1, 0x12ff2; 0x16a6e, 0x16a6f; 0x16af5, 0x16af5; 0x16b37, 0x16b3b; - 0x16b44, 0x16b44; 0x16d6d, 0x16d6f; 0x16e97, 0x16e9a; 0x16fe2, 0x16fe2; 0x1bc9f, 0x1bc9f; - 0x1da87, 0x1da8b; 0x1e5ff, 0x1e5ff; 0x1e95e, 0x1e95f] + 0x10af0, 0x10af6; 0x10b39, 0x10b3f; 0x10b99, 0x10b9c; 0x10ed0, 0x10ed0; 0x10f55, 0x10f59; + 0x10f86, 0x10f89; 0x11047, 0x1104d; 0x110bb, 0x110bc; 0x110be, 0x110c1; 0x11140, 0x11143; + 0x11174, 0x11175; 0x111c5, 0x111c8; 0x111cd, 0x111cd; 0x111db, 0x111db; 0x111dd, 0x111df; + 0x11238, 0x1123d; 0x112a9, 0x112a9; 0x113d4, 0x113d5; 0x113d7, 0x113d8; 0x1144b, 0x1144f; + 0x1145a, 0x1145b; 0x1145d, 0x1145d; 0x114c6, 0x114c6; 0x115c1, 0x115d7; 0x11641, 0x11643; + 0x11660, 0x1166c; 0x116b9, 0x116b9; 0x1173c, 0x1173e; 0x1183b, 0x1183b; 0x11944, 0x11946; + 0x119e2, 0x119e2; 0x11a3f, 0x11a46; 0x11a9a, 0x11a9c; 0x11a9e, 0x11aa2; 0x11b00, 0x11b09; + 0x11be1, 0x11be1; 0x11c41, 0x11c45; 0x11c70, 0x11c71; 0x11ef7, 0x11ef8; 0x11f43, 0x11f4f; + 0x11fff, 0x11fff; 0x12470, 0x12474; 0x12ff1, 0x12ff2; 0x16a6e, 0x16a6f; 0x16af5, 0x16af5; + 0x16b37, 0x16b3b; 0x16b44, 0x16b44; 0x16d6d, 0x16d6f; 0x16e97, 0x16e9a; 0x16fe2, 0x16fe2; + 0x1bc9f, 0x1bc9f; 0x1da87, 0x1da8b; 0x1e5ff, 0x1e5ff; 0x1e95e, 0x1e95f] let ps = Sedlex_cset.of_list [0x28, 0x28; 0x5b, 0x5b; 0x7b, 0x7b; 0xf3a, 0xf3a; 0xf3c, 0xf3c; @@ -826,7 +831,7 @@ module Categories = struct let sc = Sedlex_cset.of_list [0x24, 0x24; 0xa2, 0xa5; 0x58f, 0x58f; 0x60b, 0x60b; 0x7fe, 0x7ff; 0x9f2, 0x9f3; 0x9fb, 0x9fb; 0xaf1, 0xaf1; 0xbf9, 0xbf9; 0xe3f, 0xe3f; - 0x17db, 0x17db; 0x20a0, 0x20c0; 0xa838, 0xa838; 0xfdfc, 0xfdfc; 0xfe69, 0xfe69; + 0x17db, 0x17db; 0x20a0, 0x20c1; 0xa838, 0xa838; 0xfdfc, 0xfdfc; 0xfe69, 0xfe69; 0xff04, 0xff04; 0xffe0, 0xffe1; 0xffe5, 0xffe6; 0x11fdd, 0x11fe0; 0x1e2ff, 0x1e2ff; 0x1ecb0, 0x1ecb0] @@ -850,9 +855,10 @@ module Categories = struct 0x27c0, 0x27c4; 0x27c7, 0x27e5; 0x27f0, 0x27ff; 0x2900, 0x2982; 0x2999, 0x29d7; 0x29dc, 0x29fb; 0x29fe, 0x2aff; 0x2b30, 0x2b44; 0x2b47, 0x2b4c; 0xfb29, 0xfb29; 0xfe62, 0xfe62; 0xfe64, 0xfe66; 0xff0b, 0xff0b; 0xff1c, 0xff1e; 0xff5c, 0xff5c; - 0xff5e, 0xff5e; 0xffe2, 0xffe2; 0xffe9, 0xffec; 0x10d8e, 0x10d8f; 0x1d6c1, 0x1d6c1; - 0x1d6db, 0x1d6db; 0x1d6fb, 0x1d6fb; 0x1d715, 0x1d715; 0x1d735, 0x1d735; 0x1d74f, 0x1d74f; - 0x1d76f, 0x1d76f; 0x1d789, 0x1d789; 0x1d7a9, 0x1d7a9; 0x1d7c3, 0x1d7c3; 0x1eef0, 0x1eef1] + 0xff5e, 0xff5e; 0xffe2, 0xffe2; 0xffe9, 0xffec; 0x10d8e, 0x10d8f; 0x1cef0, 0x1cef0; + 0x1d6c1, 0x1d6c1; 0x1d6db, 0x1d6db; 0x1d6fb, 0x1d6fb; 0x1d715, 0x1d715; 0x1d735, 0x1d735; + 0x1d74f, 0x1d74f; 0x1d76f, 0x1d76f; 0x1d789, 0x1d789; 0x1d7a9, 0x1d7a9; 0x1d7c3, 0x1d7c3; + 0x1eef0, 0x1eef1; 0x1f8d0, 0x1f8d8] let so = Sedlex_cset.of_list [0xa6, 0xa6; 0xa9, 0xa9; 0xae, 0xae; 0xb0, 0xb0; 0x482, 0x482; @@ -870,29 +876,30 @@ module Categories = struct 0x2300, 0x2307; 0x230c, 0x231f; 0x2322, 0x2328; 0x232b, 0x237b; 0x237d, 0x239a; 0x23b4, 0x23db; 0x23e2, 0x2429; 0x2440, 0x244a; 0x249c, 0x24e9; 0x2500, 0x25b6; 0x25b8, 0x25c0; 0x25c2, 0x25f7; 0x2600, 0x266e; 0x2670, 0x2767; 0x2794, 0x27bf; - 0x2800, 0x28ff; 0x2b00, 0x2b2f; 0x2b45, 0x2b46; 0x2b4d, 0x2b73; 0x2b76, 0x2b95; - 0x2b97, 0x2bff; 0x2ce5, 0x2cea; 0x2e50, 0x2e51; 0x2e80, 0x2e99; 0x2e9b, 0x2ef3; - 0x2f00, 0x2fd5; 0x2ff0, 0x2fff; 0x3004, 0x3004; 0x3012, 0x3013; 0x3020, 0x3020; - 0x3036, 0x3037; 0x303e, 0x303f; 0x3190, 0x3191; 0x3196, 0x319f; 0x31c0, 0x31e5; - 0x31ef, 0x31ef; 0x3200, 0x321e; 0x322a, 0x3247; 0x3250, 0x3250; 0x3260, 0x327f; - 0x328a, 0x32b0; 0x32c0, 0x33ff; 0x4dc0, 0x4dff; 0xa490, 0xa4c6; 0xa828, 0xa82b; - 0xa836, 0xa837; 0xa839, 0xa839; 0xaa77, 0xaa79; 0xfd40, 0xfd4f; 0xfdcf, 0xfdcf; - 0xfdfd, 0xfdff; 0xffe4, 0xffe4; 0xffe8, 0xffe8; 0xffed, 0xffee; 0xfffc, 0xfffd; - 0x10137, 0x1013f; 0x10179, 0x10189; 0x1018c, 0x1018e; 0x10190, 0x1019c; 0x101a0, 0x101a0; - 0x101d0, 0x101fc; 0x10877, 0x10878; 0x10ac8, 0x10ac8; 0x1173f, 0x1173f; 0x11fd5, 0x11fdc; - 0x11fe1, 0x11ff1; 0x16b3c, 0x16b3f; 0x16b45, 0x16b45; 0x1bc9c, 0x1bc9c; 0x1cc00, 0x1ccef; - 0x1cd00, 0x1ceb3; 0x1cf50, 0x1cfc3; 0x1d000, 0x1d0f5; 0x1d100, 0x1d126; 0x1d129, 0x1d164; + 0x2800, 0x28ff; 0x2b00, 0x2b2f; 0x2b45, 0x2b46; 0x2b4d, 0x2b73; 0x2b76, 0x2bff; + 0x2ce5, 0x2cea; 0x2e50, 0x2e51; 0x2e80, 0x2e99; 0x2e9b, 0x2ef3; 0x2f00, 0x2fd5; + 0x2ff0, 0x2fff; 0x3004, 0x3004; 0x3012, 0x3013; 0x3020, 0x3020; 0x3036, 0x3037; + 0x303e, 0x303f; 0x3190, 0x3191; 0x3196, 0x319f; 0x31c0, 0x31e5; 0x31ef, 0x31ef; + 0x3200, 0x321e; 0x322a, 0x3247; 0x3250, 0x3250; 0x3260, 0x327f; 0x328a, 0x32b0; + 0x32c0, 0x33ff; 0x4dc0, 0x4dff; 0xa490, 0xa4c6; 0xa828, 0xa82b; 0xa836, 0xa837; + 0xa839, 0xa839; 0xaa77, 0xaa79; 0xfbc3, 0xfbd2; 0xfd40, 0xfd4f; 0xfd90, 0xfd91; + 0xfdc8, 0xfdcf; 0xfdfd, 0xfdff; 0xffe4, 0xffe4; 0xffe8, 0xffe8; 0xffed, 0xffee; + 0xfffc, 0xfffd; 0x10137, 0x1013f; 0x10179, 0x10189; 0x1018c, 0x1018e; 0x10190, 0x1019c; + 0x101a0, 0x101a0; 0x101d0, 0x101fc; 0x10877, 0x10878; 0x10ac8, 0x10ac8; 0x10ed1, 0x10ed8; + 0x1173f, 0x1173f; 0x11fd5, 0x11fdc; 0x11fe1, 0x11ff1; 0x16b3c, 0x16b3f; 0x16b45, 0x16b45; + 0x1bc9c, 0x1bc9c; 0x1cc00, 0x1ccef; 0x1ccfa, 0x1ccfc; 0x1cd00, 0x1ceb3; 0x1ceba, 0x1ced0; + 0x1cee0, 0x1ceef; 0x1cf50, 0x1cfc3; 0x1d000, 0x1d0f5; 0x1d100, 0x1d126; 0x1d129, 0x1d164; 0x1d16a, 0x1d16c; 0x1d183, 0x1d184; 0x1d18c, 0x1d1a9; 0x1d1ae, 0x1d1ea; 0x1d200, 0x1d241; 0x1d245, 0x1d245; 0x1d300, 0x1d356; 0x1d800, 0x1d9ff; 0x1da37, 0x1da3a; 0x1da6d, 0x1da74; 0x1da76, 0x1da83; 0x1da85, 0x1da86; 0x1e14f, 0x1e14f; 0x1ecac, 0x1ecac; 0x1ed2e, 0x1ed2e; 0x1f000, 0x1f02b; 0x1f030, 0x1f093; 0x1f0a0, 0x1f0ae; 0x1f0b1, 0x1f0bf; 0x1f0c1, 0x1f0cf; 0x1f0d1, 0x1f0f5; 0x1f10d, 0x1f1ad; 0x1f1e6, 0x1f202; 0x1f210, 0x1f23b; 0x1f240, 0x1f248; - 0x1f250, 0x1f251; 0x1f260, 0x1f265; 0x1f300, 0x1f3fa; 0x1f400, 0x1f6d7; 0x1f6dc, 0x1f6ec; - 0x1f6f0, 0x1f6fc; 0x1f700, 0x1f776; 0x1f77b, 0x1f7d9; 0x1f7e0, 0x1f7eb; 0x1f7f0, 0x1f7f0; - 0x1f800, 0x1f80b; 0x1f810, 0x1f847; 0x1f850, 0x1f859; 0x1f860, 0x1f887; 0x1f890, 0x1f8ad; - 0x1f8b0, 0x1f8bb; 0x1f8c0, 0x1f8c1; 0x1f900, 0x1fa53; 0x1fa60, 0x1fa6d; 0x1fa70, 0x1fa7c; - 0x1fa80, 0x1fa89; 0x1fa8f, 0x1fac6; 0x1face, 0x1fadc; 0x1fadf, 0x1fae9; 0x1faf0, 0x1faf8; - 0x1fb00, 0x1fb92; 0x1fb94, 0x1fbef] + 0x1f250, 0x1f251; 0x1f260, 0x1f265; 0x1f300, 0x1f3fa; 0x1f400, 0x1f6d8; 0x1f6dc, 0x1f6ec; + 0x1f6f0, 0x1f6fc; 0x1f700, 0x1f7d9; 0x1f7e0, 0x1f7eb; 0x1f7f0, 0x1f7f0; 0x1f800, 0x1f80b; + 0x1f810, 0x1f847; 0x1f850, 0x1f859; 0x1f860, 0x1f887; 0x1f890, 0x1f8ad; 0x1f8b0, 0x1f8bb; + 0x1f8c0, 0x1f8c1; 0x1f900, 0x1fa57; 0x1fa60, 0x1fa6d; 0x1fa70, 0x1fa7c; 0x1fa80, 0x1fa8a; + 0x1fa8e, 0x1fac6; 0x1fac8, 0x1fac8; 0x1facd, 0x1fadc; 0x1fadf, 0x1faea; 0x1faef, 0x1faf8; + 0x1fb00, 0x1fb92; 0x1fb94, 0x1fbef; 0x1fbfa, 0x1fbfa] let zl = Sedlex_cset.of_list [0x2028, 0x2028] @@ -952,7 +959,7 @@ module Properties = struct 0x620, 0x657; 0x659, 0x65f; 0x66e, 0x6d3; 0x6d5, 0x6dc; 0x6e1, 0x6e8; 0x6ed, 0x6ef; 0x6fa, 0x6fc; 0x6ff, 0x6ff; 0x710, 0x73f; 0x74d, 0x7b1; 0x7ca, 0x7ea; 0x7f4, 0x7f5; 0x7fa, 0x7fa; 0x800, 0x817; 0x81a, 0x82c; - 0x840, 0x858; 0x860, 0x86a; 0x870, 0x887; 0x889, 0x88e; 0x897, 0x897; + 0x840, 0x858; 0x860, 0x86a; 0x870, 0x887; 0x889, 0x88f; 0x897, 0x897; 0x8a0, 0x8c9; 0x8d4, 0x8df; 0x8e3, 0x8e9; 0x8f0, 0x93b; 0x93d, 0x94c; 0x94e, 0x950; 0x955, 0x963; 0x971, 0x983; 0x985, 0x98c; 0x98f, 0x990; 0x993, 0x9a8; 0x9aa, 0x9b0; 0x9b2, 0x9b2; 0x9b6, 0x9b9; 0x9bd, 0x9c4; @@ -970,10 +977,10 @@ module Properties = struct 0xb9c, 0xb9c; 0xb9e, 0xb9f; 0xba3, 0xba4; 0xba8, 0xbaa; 0xbae, 0xbb9; 0xbbe, 0xbc2; 0xbc6, 0xbc8; 0xbca, 0xbcc; 0xbd0, 0xbd0; 0xbd7, 0xbd7; 0xc00, 0xc0c; 0xc0e, 0xc10; 0xc12, 0xc28; 0xc2a, 0xc39; 0xc3d, 0xc44; - 0xc46, 0xc48; 0xc4a, 0xc4c; 0xc55, 0xc56; 0xc58, 0xc5a; 0xc5d, 0xc5d; + 0xc46, 0xc48; 0xc4a, 0xc4c; 0xc55, 0xc56; 0xc58, 0xc5a; 0xc5c, 0xc5d; 0xc60, 0xc63; 0xc80, 0xc83; 0xc85, 0xc8c; 0xc8e, 0xc90; 0xc92, 0xca8; 0xcaa, 0xcb3; 0xcb5, 0xcb9; 0xcbd, 0xcc4; 0xcc6, 0xcc8; 0xcca, 0xccc; - 0xcd5, 0xcd6; 0xcdd, 0xcde; 0xce0, 0xce3; 0xcf1, 0xcf3; 0xd00, 0xd0c; + 0xcd5, 0xcd6; 0xcdc, 0xcde; 0xce0, 0xce3; 0xcf1, 0xcf3; 0xd00, 0xd0c; 0xd0e, 0xd10; 0xd12, 0xd3a; 0xd3d, 0xd44; 0xd46, 0xd48; 0xd4a, 0xd4c; 0xd4e, 0xd4e; 0xd54, 0xd57; 0xd5f, 0xd63; 0xd7a, 0xd7f; 0xd81, 0xd83; 0xd85, 0xd96; 0xd9a, 0xdb1; 0xdb3, 0xdbb; 0xdbd, 0xdbd; 0xdc0, 0xdc6; @@ -1013,76 +1020,77 @@ module Properties = struct 0x30fc, 0x30ff; 0x3105, 0x312f; 0x3131, 0x318e; 0x31a0, 0x31bf; 0x31f0, 0x31ff; 0x3400, 0x4dbf; 0x4e00, 0xa48c; 0xa4d0, 0xa4fd; 0xa500, 0xa60c; 0xa610, 0xa61f; 0xa62a, 0xa62b; 0xa640, 0xa66e; 0xa674, 0xa67b; 0xa67f, 0xa6ef; 0xa717, 0xa71f; - 0xa722, 0xa788; 0xa78b, 0xa7cd; 0xa7d0, 0xa7d1; 0xa7d3, 0xa7d3; 0xa7d5, 0xa7dc; - 0xa7f2, 0xa805; 0xa807, 0xa827; 0xa840, 0xa873; 0xa880, 0xa8c3; 0xa8c5, 0xa8c5; - 0xa8f2, 0xa8f7; 0xa8fb, 0xa8fb; 0xa8fd, 0xa8ff; 0xa90a, 0xa92a; 0xa930, 0xa952; - 0xa960, 0xa97c; 0xa980, 0xa9b2; 0xa9b4, 0xa9bf; 0xa9cf, 0xa9cf; 0xa9e0, 0xa9ef; - 0xa9fa, 0xa9fe; 0xaa00, 0xaa36; 0xaa40, 0xaa4d; 0xaa60, 0xaa76; 0xaa7a, 0xaabe; - 0xaac0, 0xaac0; 0xaac2, 0xaac2; 0xaadb, 0xaadd; 0xaae0, 0xaaef; 0xaaf2, 0xaaf5; - 0xab01, 0xab06; 0xab09, 0xab0e; 0xab11, 0xab16; 0xab20, 0xab26; 0xab28, 0xab2e; - 0xab30, 0xab5a; 0xab5c, 0xab69; 0xab70, 0xabea; 0xac00, 0xd7a3; 0xd7b0, 0xd7c6; - 0xd7cb, 0xd7fb; 0xf900, 0xfa6d; 0xfa70, 0xfad9; 0xfb00, 0xfb06; 0xfb13, 0xfb17; - 0xfb1d, 0xfb28; 0xfb2a, 0xfb36; 0xfb38, 0xfb3c; 0xfb3e, 0xfb3e; 0xfb40, 0xfb41; - 0xfb43, 0xfb44; 0xfb46, 0xfbb1; 0xfbd3, 0xfd3d; 0xfd50, 0xfd8f; 0xfd92, 0xfdc7; - 0xfdf0, 0xfdfb; 0xfe70, 0xfe74; 0xfe76, 0xfefc; 0xff21, 0xff3a; 0xff41, 0xff5a; - 0xff66, 0xffbe; 0xffc2, 0xffc7; 0xffca, 0xffcf; 0xffd2, 0xffd7; 0xffda, 0xffdc; - 0x10000, 0x1000b; 0x1000d, 0x10026; 0x10028, 0x1003a; 0x1003c, 0x1003d; 0x1003f, 0x1004d; - 0x10050, 0x1005d; 0x10080, 0x100fa; 0x10140, 0x10174; 0x10280, 0x1029c; 0x102a0, 0x102d0; - 0x10300, 0x1031f; 0x1032d, 0x1034a; 0x10350, 0x1037a; 0x10380, 0x1039d; 0x103a0, 0x103c3; - 0x103c8, 0x103cf; 0x103d1, 0x103d5; 0x10400, 0x1049d; 0x104b0, 0x104d3; 0x104d8, 0x104fb; - 0x10500, 0x10527; 0x10530, 0x10563; 0x10570, 0x1057a; 0x1057c, 0x1058a; 0x1058c, 0x10592; - 0x10594, 0x10595; 0x10597, 0x105a1; 0x105a3, 0x105b1; 0x105b3, 0x105b9; 0x105bb, 0x105bc; - 0x105c0, 0x105f3; 0x10600, 0x10736; 0x10740, 0x10755; 0x10760, 0x10767; 0x10780, 0x10785; - 0x10787, 0x107b0; 0x107b2, 0x107ba; 0x10800, 0x10805; 0x10808, 0x10808; 0x1080a, 0x10835; - 0x10837, 0x10838; 0x1083c, 0x1083c; 0x1083f, 0x10855; 0x10860, 0x10876; 0x10880, 0x1089e; - 0x108e0, 0x108f2; 0x108f4, 0x108f5; 0x10900, 0x10915; 0x10920, 0x10939; 0x10980, 0x109b7; - 0x109be, 0x109bf; 0x10a00, 0x10a03; 0x10a05, 0x10a06; 0x10a0c, 0x10a13; 0x10a15, 0x10a17; - 0x10a19, 0x10a35; 0x10a60, 0x10a7c; 0x10a80, 0x10a9c; 0x10ac0, 0x10ac7; 0x10ac9, 0x10ae4; - 0x10b00, 0x10b35; 0x10b40, 0x10b55; 0x10b60, 0x10b72; 0x10b80, 0x10b91; 0x10c00, 0x10c48; - 0x10c80, 0x10cb2; 0x10cc0, 0x10cf2; 0x10d00, 0x10d27; 0x10d4a, 0x10d65; 0x10d69, 0x10d69; - 0x10d6f, 0x10d85; 0x10e80, 0x10ea9; 0x10eab, 0x10eac; 0x10eb0, 0x10eb1; 0x10ec2, 0x10ec4; - 0x10efc, 0x10efc; 0x10f00, 0x10f1c; 0x10f27, 0x10f27; 0x10f30, 0x10f45; 0x10f70, 0x10f81; - 0x10fb0, 0x10fc4; 0x10fe0, 0x10ff6; 0x11000, 0x11045; 0x11071, 0x11075; 0x11080, 0x110b8; - 0x110c2, 0x110c2; 0x110d0, 0x110e8; 0x11100, 0x11132; 0x11144, 0x11147; 0x11150, 0x11172; - 0x11176, 0x11176; 0x11180, 0x111bf; 0x111c1, 0x111c4; 0x111ce, 0x111cf; 0x111da, 0x111da; - 0x111dc, 0x111dc; 0x11200, 0x11211; 0x11213, 0x11234; 0x11237, 0x11237; 0x1123e, 0x11241; - 0x11280, 0x11286; 0x11288, 0x11288; 0x1128a, 0x1128d; 0x1128f, 0x1129d; 0x1129f, 0x112a8; - 0x112b0, 0x112e8; 0x11300, 0x11303; 0x11305, 0x1130c; 0x1130f, 0x11310; 0x11313, 0x11328; - 0x1132a, 0x11330; 0x11332, 0x11333; 0x11335, 0x11339; 0x1133d, 0x11344; 0x11347, 0x11348; - 0x1134b, 0x1134c; 0x11350, 0x11350; 0x11357, 0x11357; 0x1135d, 0x11363; 0x11380, 0x11389; - 0x1138b, 0x1138b; 0x1138e, 0x1138e; 0x11390, 0x113b5; 0x113b7, 0x113c0; 0x113c2, 0x113c2; - 0x113c5, 0x113c5; 0x113c7, 0x113ca; 0x113cc, 0x113cd; 0x113d1, 0x113d1; 0x113d3, 0x113d3; - 0x11400, 0x11441; 0x11443, 0x11445; 0x11447, 0x1144a; 0x1145f, 0x11461; 0x11480, 0x114c1; - 0x114c4, 0x114c5; 0x114c7, 0x114c7; 0x11580, 0x115b5; 0x115b8, 0x115be; 0x115d8, 0x115dd; - 0x11600, 0x1163e; 0x11640, 0x11640; 0x11644, 0x11644; 0x11680, 0x116b5; 0x116b8, 0x116b8; - 0x11700, 0x1171a; 0x1171d, 0x1172a; 0x11740, 0x11746; 0x11800, 0x11838; 0x118a0, 0x118df; - 0x118ff, 0x11906; 0x11909, 0x11909; 0x1190c, 0x11913; 0x11915, 0x11916; 0x11918, 0x11935; - 0x11937, 0x11938; 0x1193b, 0x1193c; 0x1193f, 0x11942; 0x119a0, 0x119a7; 0x119aa, 0x119d7; - 0x119da, 0x119df; 0x119e1, 0x119e1; 0x119e3, 0x119e4; 0x11a00, 0x11a32; 0x11a35, 0x11a3e; - 0x11a50, 0x11a97; 0x11a9d, 0x11a9d; 0x11ab0, 0x11af8; 0x11bc0, 0x11be0; 0x11c00, 0x11c08; - 0x11c0a, 0x11c36; 0x11c38, 0x11c3e; 0x11c40, 0x11c40; 0x11c72, 0x11c8f; 0x11c92, 0x11ca7; - 0x11ca9, 0x11cb6; 0x11d00, 0x11d06; 0x11d08, 0x11d09; 0x11d0b, 0x11d36; 0x11d3a, 0x11d3a; - 0x11d3c, 0x11d3d; 0x11d3f, 0x11d41; 0x11d43, 0x11d43; 0x11d46, 0x11d47; 0x11d60, 0x11d65; - 0x11d67, 0x11d68; 0x11d6a, 0x11d8e; 0x11d90, 0x11d91; 0x11d93, 0x11d96; 0x11d98, 0x11d98; + 0xa722, 0xa788; 0xa78b, 0xa7dc; 0xa7f1, 0xa805; 0xa807, 0xa827; 0xa840, 0xa873; + 0xa880, 0xa8c3; 0xa8c5, 0xa8c5; 0xa8f2, 0xa8f7; 0xa8fb, 0xa8fb; 0xa8fd, 0xa8ff; + 0xa90a, 0xa92a; 0xa930, 0xa952; 0xa960, 0xa97c; 0xa980, 0xa9b2; 0xa9b4, 0xa9bf; + 0xa9cf, 0xa9cf; 0xa9e0, 0xa9ef; 0xa9fa, 0xa9fe; 0xaa00, 0xaa36; 0xaa40, 0xaa4d; + 0xaa60, 0xaa76; 0xaa7a, 0xaabe; 0xaac0, 0xaac0; 0xaac2, 0xaac2; 0xaadb, 0xaadd; + 0xaae0, 0xaaef; 0xaaf2, 0xaaf5; 0xab01, 0xab06; 0xab09, 0xab0e; 0xab11, 0xab16; + 0xab20, 0xab26; 0xab28, 0xab2e; 0xab30, 0xab5a; 0xab5c, 0xab69; 0xab70, 0xabea; + 0xac00, 0xd7a3; 0xd7b0, 0xd7c6; 0xd7cb, 0xd7fb; 0xf900, 0xfa6d; 0xfa70, 0xfad9; + 0xfb00, 0xfb06; 0xfb13, 0xfb17; 0xfb1d, 0xfb28; 0xfb2a, 0xfb36; 0xfb38, 0xfb3c; + 0xfb3e, 0xfb3e; 0xfb40, 0xfb41; 0xfb43, 0xfb44; 0xfb46, 0xfbb1; 0xfbd3, 0xfd3d; + 0xfd50, 0xfd8f; 0xfd92, 0xfdc7; 0xfdf0, 0xfdfb; 0xfe70, 0xfe74; 0xfe76, 0xfefc; + 0xff21, 0xff3a; 0xff41, 0xff5a; 0xff66, 0xffbe; 0xffc2, 0xffc7; 0xffca, 0xffcf; + 0xffd2, 0xffd7; 0xffda, 0xffdc; 0x10000, 0x1000b; 0x1000d, 0x10026; 0x10028, 0x1003a; + 0x1003c, 0x1003d; 0x1003f, 0x1004d; 0x10050, 0x1005d; 0x10080, 0x100fa; 0x10140, 0x10174; + 0x10280, 0x1029c; 0x102a0, 0x102d0; 0x10300, 0x1031f; 0x1032d, 0x1034a; 0x10350, 0x1037a; + 0x10380, 0x1039d; 0x103a0, 0x103c3; 0x103c8, 0x103cf; 0x103d1, 0x103d5; 0x10400, 0x1049d; + 0x104b0, 0x104d3; 0x104d8, 0x104fb; 0x10500, 0x10527; 0x10530, 0x10563; 0x10570, 0x1057a; + 0x1057c, 0x1058a; 0x1058c, 0x10592; 0x10594, 0x10595; 0x10597, 0x105a1; 0x105a3, 0x105b1; + 0x105b3, 0x105b9; 0x105bb, 0x105bc; 0x105c0, 0x105f3; 0x10600, 0x10736; 0x10740, 0x10755; + 0x10760, 0x10767; 0x10780, 0x10785; 0x10787, 0x107b0; 0x107b2, 0x107ba; 0x10800, 0x10805; + 0x10808, 0x10808; 0x1080a, 0x10835; 0x10837, 0x10838; 0x1083c, 0x1083c; 0x1083f, 0x10855; + 0x10860, 0x10876; 0x10880, 0x1089e; 0x108e0, 0x108f2; 0x108f4, 0x108f5; 0x10900, 0x10915; + 0x10920, 0x10939; 0x10940, 0x10959; 0x10980, 0x109b7; 0x109be, 0x109bf; 0x10a00, 0x10a03; + 0x10a05, 0x10a06; 0x10a0c, 0x10a13; 0x10a15, 0x10a17; 0x10a19, 0x10a35; 0x10a60, 0x10a7c; + 0x10a80, 0x10a9c; 0x10ac0, 0x10ac7; 0x10ac9, 0x10ae4; 0x10b00, 0x10b35; 0x10b40, 0x10b55; + 0x10b60, 0x10b72; 0x10b80, 0x10b91; 0x10c00, 0x10c48; 0x10c80, 0x10cb2; 0x10cc0, 0x10cf2; + 0x10d00, 0x10d27; 0x10d4a, 0x10d65; 0x10d69, 0x10d69; 0x10d6f, 0x10d85; 0x10e80, 0x10ea9; + 0x10eab, 0x10eac; 0x10eb0, 0x10eb1; 0x10ec2, 0x10ec7; 0x10efa, 0x10efc; 0x10f00, 0x10f1c; + 0x10f27, 0x10f27; 0x10f30, 0x10f45; 0x10f70, 0x10f81; 0x10fb0, 0x10fc4; 0x10fe0, 0x10ff6; + 0x11000, 0x11045; 0x11071, 0x11075; 0x11080, 0x110b8; 0x110c2, 0x110c2; 0x110d0, 0x110e8; + 0x11100, 0x11132; 0x11144, 0x11147; 0x11150, 0x11172; 0x11176, 0x11176; 0x11180, 0x111bf; + 0x111c1, 0x111c4; 0x111ce, 0x111cf; 0x111da, 0x111da; 0x111dc, 0x111dc; 0x11200, 0x11211; + 0x11213, 0x11234; 0x11237, 0x11237; 0x1123e, 0x11241; 0x11280, 0x11286; 0x11288, 0x11288; + 0x1128a, 0x1128d; 0x1128f, 0x1129d; 0x1129f, 0x112a8; 0x112b0, 0x112e8; 0x11300, 0x11303; + 0x11305, 0x1130c; 0x1130f, 0x11310; 0x11313, 0x11328; 0x1132a, 0x11330; 0x11332, 0x11333; + 0x11335, 0x11339; 0x1133d, 0x11344; 0x11347, 0x11348; 0x1134b, 0x1134c; 0x11350, 0x11350; + 0x11357, 0x11357; 0x1135d, 0x11363; 0x11380, 0x11389; 0x1138b, 0x1138b; 0x1138e, 0x1138e; + 0x11390, 0x113b5; 0x113b7, 0x113c0; 0x113c2, 0x113c2; 0x113c5, 0x113c5; 0x113c7, 0x113ca; + 0x113cc, 0x113cd; 0x113d1, 0x113d1; 0x113d3, 0x113d3; 0x11400, 0x11441; 0x11443, 0x11445; + 0x11447, 0x1144a; 0x1145f, 0x11461; 0x11480, 0x114c1; 0x114c4, 0x114c5; 0x114c7, 0x114c7; + 0x11580, 0x115b5; 0x115b8, 0x115be; 0x115d8, 0x115dd; 0x11600, 0x1163e; 0x11640, 0x11640; + 0x11644, 0x11644; 0x11680, 0x116b5; 0x116b8, 0x116b8; 0x11700, 0x1171a; 0x1171d, 0x1172a; + 0x11740, 0x11746; 0x11800, 0x11838; 0x118a0, 0x118df; 0x118ff, 0x11906; 0x11909, 0x11909; + 0x1190c, 0x11913; 0x11915, 0x11916; 0x11918, 0x11935; 0x11937, 0x11938; 0x1193b, 0x1193c; + 0x1193f, 0x11942; 0x119a0, 0x119a7; 0x119aa, 0x119d7; 0x119da, 0x119df; 0x119e1, 0x119e1; + 0x119e3, 0x119e4; 0x11a00, 0x11a32; 0x11a35, 0x11a3e; 0x11a50, 0x11a97; 0x11a9d, 0x11a9d; + 0x11ab0, 0x11af8; 0x11b60, 0x11b67; 0x11bc0, 0x11be0; 0x11c00, 0x11c08; 0x11c0a, 0x11c36; + 0x11c38, 0x11c3e; 0x11c40, 0x11c40; 0x11c72, 0x11c8f; 0x11c92, 0x11ca7; 0x11ca9, 0x11cb6; + 0x11d00, 0x11d06; 0x11d08, 0x11d09; 0x11d0b, 0x11d36; 0x11d3a, 0x11d3a; 0x11d3c, 0x11d3d; + 0x11d3f, 0x11d41; 0x11d43, 0x11d43; 0x11d46, 0x11d47; 0x11d60, 0x11d65; 0x11d67, 0x11d68; + 0x11d6a, 0x11d8e; 0x11d90, 0x11d91; 0x11d93, 0x11d96; 0x11d98, 0x11d98; 0x11db0, 0x11ddb; 0x11ee0, 0x11ef6; 0x11f00, 0x11f10; 0x11f12, 0x11f3a; 0x11f3e, 0x11f40; 0x11fb0, 0x11fb0; 0x12000, 0x12399; 0x12400, 0x1246e; 0x12480, 0x12543; 0x12f90, 0x12ff0; 0x13000, 0x1342f; 0x13441, 0x13446; 0x13460, 0x143fa; 0x14400, 0x14646; 0x16100, 0x1612e; 0x16800, 0x16a38; 0x16a40, 0x16a5e; 0x16a70, 0x16abe; 0x16ad0, 0x16aed; 0x16b00, 0x16b2f; 0x16b40, 0x16b43; - 0x16b63, 0x16b77; 0x16b7d, 0x16b8f; 0x16d40, 0x16d6c; 0x16e40, 0x16e7f; 0x16f00, 0x16f4a; - 0x16f4f, 0x16f87; 0x16f8f, 0x16f9f; 0x16fe0, 0x16fe1; 0x16fe3, 0x16fe3; 0x16ff0, 0x16ff1; - 0x17000, 0x187f7; 0x18800, 0x18cd5; 0x18cff, 0x18d08; 0x1aff0, 0x1aff3; 0x1aff5, 0x1affb; - 0x1affd, 0x1affe; 0x1b000, 0x1b122; 0x1b132, 0x1b132; 0x1b150, 0x1b152; 0x1b155, 0x1b155; - 0x1b164, 0x1b167; 0x1b170, 0x1b2fb; 0x1bc00, 0x1bc6a; 0x1bc70, 0x1bc7c; 0x1bc80, 0x1bc88; - 0x1bc90, 0x1bc99; 0x1bc9e, 0x1bc9e; 0x1d400, 0x1d454; 0x1d456, 0x1d49c; 0x1d49e, 0x1d49f; - 0x1d4a2, 0x1d4a2; 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; 0x1d4ae, 0x1d4b9; 0x1d4bb, 0x1d4bb; - 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d505; 0x1d507, 0x1d50a; 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; - 0x1d51e, 0x1d539; 0x1d53b, 0x1d53e; 0x1d540, 0x1d544; 0x1d546, 0x1d546; 0x1d54a, 0x1d550; - 0x1d552, 0x1d6a5; 0x1d6a8, 0x1d6c0; 0x1d6c2, 0x1d6da; 0x1d6dc, 0x1d6fa; 0x1d6fc, 0x1d714; - 0x1d716, 0x1d734; 0x1d736, 0x1d74e; 0x1d750, 0x1d76e; 0x1d770, 0x1d788; 0x1d78a, 0x1d7a8; - 0x1d7aa, 0x1d7c2; 0x1d7c4, 0x1d7cb; 0x1df00, 0x1df1e; 0x1df25, 0x1df2a; 0x1e000, 0x1e006; - 0x1e008, 0x1e018; 0x1e01b, 0x1e021; 0x1e023, 0x1e024; 0x1e026, 0x1e02a; 0x1e030, 0x1e06d; - 0x1e08f, 0x1e08f; 0x1e100, 0x1e12c; 0x1e137, 0x1e13d; 0x1e14e, 0x1e14e; 0x1e290, 0x1e2ad; - 0x1e2c0, 0x1e2eb; 0x1e4d0, 0x1e4eb; 0x1e5d0, 0x1e5ed; 0x1e5f0, 0x1e5f0; 0x1e7e0, 0x1e7e6; + 0x16b63, 0x16b77; 0x16b7d, 0x16b8f; 0x16d40, 0x16d6c; 0x16e40, 0x16e7f; 0x16ea0, 0x16eb8; + 0x16ebb, 0x16ed3; 0x16f00, 0x16f4a; 0x16f4f, 0x16f87; 0x16f8f, 0x16f9f; 0x16fe0, 0x16fe1; + 0x16fe3, 0x16fe3; 0x16ff0, 0x16ff6; 0x17000, 0x18cd5; 0x18cff, 0x18d1e; 0x18d80, 0x18df2; + 0x1aff0, 0x1aff3; 0x1aff5, 0x1affb; 0x1affd, 0x1affe; 0x1b000, 0x1b122; 0x1b132, 0x1b132; + 0x1b150, 0x1b152; 0x1b155, 0x1b155; 0x1b164, 0x1b167; 0x1b170, 0x1b2fb; 0x1bc00, 0x1bc6a; + 0x1bc70, 0x1bc7c; 0x1bc80, 0x1bc88; 0x1bc90, 0x1bc99; 0x1bc9e, 0x1bc9e; 0x1d400, 0x1d454; + 0x1d456, 0x1d49c; 0x1d49e, 0x1d49f; 0x1d4a2, 0x1d4a2; 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; + 0x1d4ae, 0x1d4b9; 0x1d4bb, 0x1d4bb; 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d505; 0x1d507, 0x1d50a; + 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; 0x1d51e, 0x1d539; 0x1d53b, 0x1d53e; 0x1d540, 0x1d544; + 0x1d546, 0x1d546; 0x1d54a, 0x1d550; 0x1d552, 0x1d6a5; 0x1d6a8, 0x1d6c0; 0x1d6c2, 0x1d6da; + 0x1d6dc, 0x1d6fa; 0x1d6fc, 0x1d714; 0x1d716, 0x1d734; 0x1d736, 0x1d74e; 0x1d750, 0x1d76e; + 0x1d770, 0x1d788; 0x1d78a, 0x1d7a8; 0x1d7aa, 0x1d7c2; 0x1d7c4, 0x1d7cb; 0x1df00, 0x1df1e; + 0x1df25, 0x1df2a; 0x1e000, 0x1e006; 0x1e008, 0x1e018; 0x1e01b, 0x1e021; 0x1e023, 0x1e024; + 0x1e026, 0x1e02a; 0x1e030, 0x1e06d; 0x1e08f, 0x1e08f; 0x1e100, 0x1e12c; 0x1e137, 0x1e13d; + 0x1e14e, 0x1e14e; 0x1e290, 0x1e2ad; 0x1e2c0, 0x1e2eb; 0x1e4d0, 0x1e4eb; 0x1e5d0, 0x1e5ed; + 0x1e5f0, 0x1e5f0; 0x1e6c0, 0x1e6de; 0x1e6e0, 0x1e6f5; 0x1e6fe, 0x1e6ff; 0x1e7e0, 0x1e7e6; 0x1e7e8, 0x1e7eb; 0x1e7ed, 0x1e7ee; 0x1e7f0, 0x1e7fe; 0x1e800, 0x1e8c4; 0x1e900, 0x1e943; 0x1e947, 0x1e947; 0x1e94b, 0x1e94b; 0x1ee00, 0x1ee03; 0x1ee05, 0x1ee1f; 0x1ee21, 0x1ee22; 0x1ee24, 0x1ee24; 0x1ee27, 0x1ee27; 0x1ee29, 0x1ee32; 0x1ee34, 0x1ee37; 0x1ee39, 0x1ee39; @@ -1091,9 +1099,9 @@ module Properties = struct 0x1ee5b, 0x1ee5b; 0x1ee5d, 0x1ee5d; 0x1ee5f, 0x1ee5f; 0x1ee61, 0x1ee62; 0x1ee64, 0x1ee64; 0x1ee67, 0x1ee6a; 0x1ee6c, 0x1ee72; 0x1ee74, 0x1ee77; 0x1ee79, 0x1ee7c; 0x1ee7e, 0x1ee7e; 0x1ee80, 0x1ee89; 0x1ee8b, 0x1ee9b; 0x1eea1, 0x1eea3; 0x1eea5, 0x1eea9; 0x1eeab, 0x1eebb; - 0x1f130, 0x1f149; 0x1f150, 0x1f169; 0x1f170, 0x1f189; 0x20000, 0x2a6df; 0x2a700, 0x2b739; - 0x2b740, 0x2b81d; 0x2b820, 0x2cea1; 0x2ceb0, 0x2ebe0; 0x2ebf0, 0x2ee5d; 0x2f800, 0x2fa1d; - 0x30000, 0x3134a; 0x31350, 0x323af] + 0x1f130, 0x1f149; 0x1f150, 0x1f169; 0x1f170, 0x1f189; 0x20000, 0x2a6df; 0x2a700, 0x2b81d; + 0x2b820, 0x2cead; 0x2ceb0, 0x2ebe0; 0x2ebf0, 0x2ee5d; 0x2f800, 0x2fa1d; 0x30000, 0x3134a; + 0x31350, 0x33479] let ascii_hex_digit = Sedlex_cset.of_list [0x30, 0x39; 0x41, 0x46; 0x61, 0x66] @@ -1113,7 +1121,7 @@ module Properties = struct 0x5ef, 0x5f2; 0x610, 0x61a; 0x620, 0x669; 0x66e, 0x6d3; 0x6d5, 0x6dc; 0x6df, 0x6e8; 0x6ea, 0x6fc; 0x6ff, 0x6ff; 0x710, 0x74a; 0x74d, 0x7b1; 0x7c0, 0x7f5; 0x7fa, 0x7fa; 0x7fd, 0x7fd; 0x800, 0x82d; 0x840, 0x85b; - 0x860, 0x86a; 0x870, 0x887; 0x889, 0x88e; 0x897, 0x8e1; 0x8e3, 0x963; + 0x860, 0x86a; 0x870, 0x887; 0x889, 0x88f; 0x897, 0x8e1; 0x8e3, 0x963; 0x966, 0x96f; 0x971, 0x983; 0x985, 0x98c; 0x98f, 0x990; 0x993, 0x9a8; 0x9aa, 0x9b0; 0x9b2, 0x9b2; 0x9b6, 0x9b9; 0x9bc, 0x9c4; 0x9c7, 0x9c8; 0x9cb, 0x9ce; 0x9d7, 0x9d7; 0x9dc, 0x9dd; 0x9df, 0x9e3; 0x9e6, 0x9f1; @@ -1131,10 +1139,10 @@ module Properties = struct 0xba8, 0xbaa; 0xbae, 0xbb9; 0xbbe, 0xbc2; 0xbc6, 0xbc8; 0xbca, 0xbcd; 0xbd0, 0xbd0; 0xbd7, 0xbd7; 0xbe6, 0xbef; 0xc00, 0xc0c; 0xc0e, 0xc10; 0xc12, 0xc28; 0xc2a, 0xc39; 0xc3c, 0xc44; 0xc46, 0xc48; 0xc4a, 0xc4d; - 0xc55, 0xc56; 0xc58, 0xc5a; 0xc5d, 0xc5d; 0xc60, 0xc63; 0xc66, 0xc6f; + 0xc55, 0xc56; 0xc58, 0xc5a; 0xc5c, 0xc5d; 0xc60, 0xc63; 0xc66, 0xc6f; 0xc80, 0xc83; 0xc85, 0xc8c; 0xc8e, 0xc90; 0xc92, 0xca8; 0xcaa, 0xcb3; 0xcb5, 0xcb9; 0xcbc, 0xcc4; 0xcc6, 0xcc8; 0xcca, 0xccd; 0xcd5, 0xcd6; - 0xcdd, 0xcde; 0xce0, 0xce3; 0xce6, 0xcef; 0xcf1, 0xcf3; 0xd00, 0xd0c; + 0xcdc, 0xcde; 0xce0, 0xce3; 0xce6, 0xcef; 0xcf1, 0xcf3; 0xd00, 0xd0c; 0xd0e, 0xd10; 0xd12, 0xd44; 0xd46, 0xd48; 0xd4a, 0xd4e; 0xd54, 0xd57; 0xd5f, 0xd63; 0xd66, 0xd6f; 0xd7a, 0xd7f; 0xd81, 0xd83; 0xd85, 0xd96; 0xd9a, 0xdb1; 0xdb3, 0xdbb; 0xdbd, 0xdbd; 0xdc0, 0xdc6; 0xdca, 0xdca; @@ -1156,112 +1164,113 @@ module Properties = struct 0x1820, 0x1878; 0x1880, 0x18aa; 0x18b0, 0x18f5; 0x1900, 0x191e; 0x1920, 0x192b; 0x1930, 0x193b; 0x1946, 0x196d; 0x1970, 0x1974; 0x1980, 0x19ab; 0x19b0, 0x19c9; 0x19d0, 0x19da; 0x1a00, 0x1a1b; 0x1a20, 0x1a5e; 0x1a60, 0x1a7c; 0x1a7f, 0x1a89; - 0x1a90, 0x1a99; 0x1aa7, 0x1aa7; 0x1ab0, 0x1abd; 0x1abf, 0x1ace; 0x1b00, 0x1b4c; - 0x1b50, 0x1b59; 0x1b6b, 0x1b73; 0x1b80, 0x1bf3; 0x1c00, 0x1c37; 0x1c40, 0x1c49; - 0x1c4d, 0x1c7d; 0x1c80, 0x1c8a; 0x1c90, 0x1cba; 0x1cbd, 0x1cbf; 0x1cd0, 0x1cd2; - 0x1cd4, 0x1cfa; 0x1d00, 0x1f15; 0x1f18, 0x1f1d; 0x1f20, 0x1f45; 0x1f48, 0x1f4d; - 0x1f50, 0x1f57; 0x1f59, 0x1f59; 0x1f5b, 0x1f5b; 0x1f5d, 0x1f5d; 0x1f5f, 0x1f7d; - 0x1f80, 0x1fb4; 0x1fb6, 0x1fbc; 0x1fbe, 0x1fbe; 0x1fc2, 0x1fc4; 0x1fc6, 0x1fcc; - 0x1fd0, 0x1fd3; 0x1fd6, 0x1fdb; 0x1fe0, 0x1fec; 0x1ff2, 0x1ff4; 0x1ff6, 0x1ffc; - 0x200c, 0x200d; 0x203f, 0x2040; 0x2054, 0x2054; 0x2071, 0x2071; 0x207f, 0x207f; - 0x2090, 0x209c; 0x20d0, 0x20dc; 0x20e1, 0x20e1; 0x20e5, 0x20f0; 0x2102, 0x2102; - 0x2107, 0x2107; 0x210a, 0x2113; 0x2115, 0x2115; 0x2118, 0x211d; 0x2124, 0x2124; - 0x2126, 0x2126; 0x2128, 0x2128; 0x212a, 0x2139; 0x213c, 0x213f; 0x2145, 0x2149; - 0x214e, 0x214e; 0x2160, 0x2188; 0x2c00, 0x2ce4; 0x2ceb, 0x2cf3; 0x2d00, 0x2d25; - 0x2d27, 0x2d27; 0x2d2d, 0x2d2d; 0x2d30, 0x2d67; 0x2d6f, 0x2d6f; 0x2d7f, 0x2d96; - 0x2da0, 0x2da6; 0x2da8, 0x2dae; 0x2db0, 0x2db6; 0x2db8, 0x2dbe; 0x2dc0, 0x2dc6; - 0x2dc8, 0x2dce; 0x2dd0, 0x2dd6; 0x2dd8, 0x2dde; 0x2de0, 0x2dff; 0x3005, 0x3007; - 0x3021, 0x302f; 0x3031, 0x3035; 0x3038, 0x303c; 0x3041, 0x3096; 0x3099, 0x309f; - 0x30a1, 0x30ff; 0x3105, 0x312f; 0x3131, 0x318e; 0x31a0, 0x31bf; 0x31f0, 0x31ff; - 0x3400, 0x4dbf; 0x4e00, 0xa48c; 0xa4d0, 0xa4fd; 0xa500, 0xa60c; 0xa610, 0xa62b; - 0xa640, 0xa66f; 0xa674, 0xa67d; 0xa67f, 0xa6f1; 0xa717, 0xa71f; 0xa722, 0xa788; - 0xa78b, 0xa7cd; 0xa7d0, 0xa7d1; 0xa7d3, 0xa7d3; 0xa7d5, 0xa7dc; 0xa7f2, 0xa827; - 0xa82c, 0xa82c; 0xa840, 0xa873; 0xa880, 0xa8c5; 0xa8d0, 0xa8d9; 0xa8e0, 0xa8f7; - 0xa8fb, 0xa8fb; 0xa8fd, 0xa92d; 0xa930, 0xa953; 0xa960, 0xa97c; 0xa980, 0xa9c0; - 0xa9cf, 0xa9d9; 0xa9e0, 0xa9fe; 0xaa00, 0xaa36; 0xaa40, 0xaa4d; 0xaa50, 0xaa59; - 0xaa60, 0xaa76; 0xaa7a, 0xaac2; 0xaadb, 0xaadd; 0xaae0, 0xaaef; 0xaaf2, 0xaaf6; - 0xab01, 0xab06; 0xab09, 0xab0e; 0xab11, 0xab16; 0xab20, 0xab26; 0xab28, 0xab2e; - 0xab30, 0xab5a; 0xab5c, 0xab69; 0xab70, 0xabea; 0xabec, 0xabed; 0xabf0, 0xabf9; - 0xac00, 0xd7a3; 0xd7b0, 0xd7c6; 0xd7cb, 0xd7fb; 0xf900, 0xfa6d; 0xfa70, 0xfad9; - 0xfb00, 0xfb06; 0xfb13, 0xfb17; 0xfb1d, 0xfb28; 0xfb2a, 0xfb36; 0xfb38, 0xfb3c; - 0xfb3e, 0xfb3e; 0xfb40, 0xfb41; 0xfb43, 0xfb44; 0xfb46, 0xfbb1; 0xfbd3, 0xfd3d; - 0xfd50, 0xfd8f; 0xfd92, 0xfdc7; 0xfdf0, 0xfdfb; 0xfe00, 0xfe0f; 0xfe20, 0xfe2f; - 0xfe33, 0xfe34; 0xfe4d, 0xfe4f; 0xfe70, 0xfe74; 0xfe76, 0xfefc; 0xff10, 0xff19; - 0xff21, 0xff3a; 0xff3f, 0xff3f; 0xff41, 0xff5a; 0xff65, 0xffbe; 0xffc2, 0xffc7; - 0xffca, 0xffcf; 0xffd2, 0xffd7; 0xffda, 0xffdc; 0x10000, 0x1000b; 0x1000d, 0x10026; - 0x10028, 0x1003a; 0x1003c, 0x1003d; 0x1003f, 0x1004d; 0x10050, 0x1005d; 0x10080, 0x100fa; - 0x10140, 0x10174; 0x101fd, 0x101fd; 0x10280, 0x1029c; 0x102a0, 0x102d0; 0x102e0, 0x102e0; - 0x10300, 0x1031f; 0x1032d, 0x1034a; 0x10350, 0x1037a; 0x10380, 0x1039d; 0x103a0, 0x103c3; - 0x103c8, 0x103cf; 0x103d1, 0x103d5; 0x10400, 0x1049d; 0x104a0, 0x104a9; 0x104b0, 0x104d3; - 0x104d8, 0x104fb; 0x10500, 0x10527; 0x10530, 0x10563; 0x10570, 0x1057a; 0x1057c, 0x1058a; - 0x1058c, 0x10592; 0x10594, 0x10595; 0x10597, 0x105a1; 0x105a3, 0x105b1; 0x105b3, 0x105b9; - 0x105bb, 0x105bc; 0x105c0, 0x105f3; 0x10600, 0x10736; 0x10740, 0x10755; 0x10760, 0x10767; - 0x10780, 0x10785; 0x10787, 0x107b0; 0x107b2, 0x107ba; 0x10800, 0x10805; 0x10808, 0x10808; - 0x1080a, 0x10835; 0x10837, 0x10838; 0x1083c, 0x1083c; 0x1083f, 0x10855; 0x10860, 0x10876; - 0x10880, 0x1089e; 0x108e0, 0x108f2; 0x108f4, 0x108f5; 0x10900, 0x10915; 0x10920, 0x10939; - 0x10980, 0x109b7; 0x109be, 0x109bf; 0x10a00, 0x10a03; 0x10a05, 0x10a06; 0x10a0c, 0x10a13; - 0x10a15, 0x10a17; 0x10a19, 0x10a35; 0x10a38, 0x10a3a; 0x10a3f, 0x10a3f; 0x10a60, 0x10a7c; - 0x10a80, 0x10a9c; 0x10ac0, 0x10ac7; 0x10ac9, 0x10ae6; 0x10b00, 0x10b35; 0x10b40, 0x10b55; - 0x10b60, 0x10b72; 0x10b80, 0x10b91; 0x10c00, 0x10c48; 0x10c80, 0x10cb2; 0x10cc0, 0x10cf2; - 0x10d00, 0x10d27; 0x10d30, 0x10d39; 0x10d40, 0x10d65; 0x10d69, 0x10d6d; 0x10d6f, 0x10d85; - 0x10e80, 0x10ea9; 0x10eab, 0x10eac; 0x10eb0, 0x10eb1; 0x10ec2, 0x10ec4; 0x10efc, 0x10f1c; - 0x10f27, 0x10f27; 0x10f30, 0x10f50; 0x10f70, 0x10f85; 0x10fb0, 0x10fc4; 0x10fe0, 0x10ff6; - 0x11000, 0x11046; 0x11066, 0x11075; 0x1107f, 0x110ba; 0x110c2, 0x110c2; 0x110d0, 0x110e8; - 0x110f0, 0x110f9; 0x11100, 0x11134; 0x11136, 0x1113f; 0x11144, 0x11147; 0x11150, 0x11173; - 0x11176, 0x11176; 0x11180, 0x111c4; 0x111c9, 0x111cc; 0x111ce, 0x111da; 0x111dc, 0x111dc; - 0x11200, 0x11211; 0x11213, 0x11237; 0x1123e, 0x11241; 0x11280, 0x11286; 0x11288, 0x11288; - 0x1128a, 0x1128d; 0x1128f, 0x1129d; 0x1129f, 0x112a8; 0x112b0, 0x112ea; 0x112f0, 0x112f9; - 0x11300, 0x11303; 0x11305, 0x1130c; 0x1130f, 0x11310; 0x11313, 0x11328; 0x1132a, 0x11330; - 0x11332, 0x11333; 0x11335, 0x11339; 0x1133b, 0x11344; 0x11347, 0x11348; 0x1134b, 0x1134d; - 0x11350, 0x11350; 0x11357, 0x11357; 0x1135d, 0x11363; 0x11366, 0x1136c; 0x11370, 0x11374; - 0x11380, 0x11389; 0x1138b, 0x1138b; 0x1138e, 0x1138e; 0x11390, 0x113b5; 0x113b7, 0x113c0; - 0x113c2, 0x113c2; 0x113c5, 0x113c5; 0x113c7, 0x113ca; 0x113cc, 0x113d3; 0x113e1, 0x113e2; - 0x11400, 0x1144a; 0x11450, 0x11459; 0x1145e, 0x11461; 0x11480, 0x114c5; 0x114c7, 0x114c7; - 0x114d0, 0x114d9; 0x11580, 0x115b5; 0x115b8, 0x115c0; 0x115d8, 0x115dd; 0x11600, 0x11640; - 0x11644, 0x11644; 0x11650, 0x11659; 0x11680, 0x116b8; 0x116c0, 0x116c9; 0x116d0, 0x116e3; - 0x11700, 0x1171a; 0x1171d, 0x1172b; 0x11730, 0x11739; 0x11740, 0x11746; 0x11800, 0x1183a; - 0x118a0, 0x118e9; 0x118ff, 0x11906; 0x11909, 0x11909; 0x1190c, 0x11913; 0x11915, 0x11916; - 0x11918, 0x11935; 0x11937, 0x11938; 0x1193b, 0x11943; 0x11950, 0x11959; 0x119a0, 0x119a7; - 0x119aa, 0x119d7; 0x119da, 0x119e1; 0x119e3, 0x119e4; 0x11a00, 0x11a3e; 0x11a47, 0x11a47; - 0x11a50, 0x11a99; 0x11a9d, 0x11a9d; 0x11ab0, 0x11af8; 0x11bc0, 0x11be0; 0x11bf0, 0x11bf9; + 0x1a90, 0x1a99; 0x1aa7, 0x1aa7; 0x1ab0, 0x1abd; 0x1abf, 0x1add; 0x1ae0, 0x1aeb; + 0x1b00, 0x1b4c; 0x1b50, 0x1b59; 0x1b6b, 0x1b73; 0x1b80, 0x1bf3; 0x1c00, 0x1c37; + 0x1c40, 0x1c49; 0x1c4d, 0x1c7d; 0x1c80, 0x1c8a; 0x1c90, 0x1cba; 0x1cbd, 0x1cbf; + 0x1cd0, 0x1cd2; 0x1cd4, 0x1cfa; 0x1d00, 0x1f15; 0x1f18, 0x1f1d; 0x1f20, 0x1f45; + 0x1f48, 0x1f4d; 0x1f50, 0x1f57; 0x1f59, 0x1f59; 0x1f5b, 0x1f5b; 0x1f5d, 0x1f5d; + 0x1f5f, 0x1f7d; 0x1f80, 0x1fb4; 0x1fb6, 0x1fbc; 0x1fbe, 0x1fbe; 0x1fc2, 0x1fc4; + 0x1fc6, 0x1fcc; 0x1fd0, 0x1fd3; 0x1fd6, 0x1fdb; 0x1fe0, 0x1fec; 0x1ff2, 0x1ff4; + 0x1ff6, 0x1ffc; 0x200c, 0x200d; 0x203f, 0x2040; 0x2054, 0x2054; 0x2071, 0x2071; + 0x207f, 0x207f; 0x2090, 0x209c; 0x20d0, 0x20dc; 0x20e1, 0x20e1; 0x20e5, 0x20f0; + 0x2102, 0x2102; 0x2107, 0x2107; 0x210a, 0x2113; 0x2115, 0x2115; 0x2118, 0x211d; + 0x2124, 0x2124; 0x2126, 0x2126; 0x2128, 0x2128; 0x212a, 0x2139; 0x213c, 0x213f; + 0x2145, 0x2149; 0x214e, 0x214e; 0x2160, 0x2188; 0x2c00, 0x2ce4; 0x2ceb, 0x2cf3; + 0x2d00, 0x2d25; 0x2d27, 0x2d27; 0x2d2d, 0x2d2d; 0x2d30, 0x2d67; 0x2d6f, 0x2d6f; + 0x2d7f, 0x2d96; 0x2da0, 0x2da6; 0x2da8, 0x2dae; 0x2db0, 0x2db6; 0x2db8, 0x2dbe; + 0x2dc0, 0x2dc6; 0x2dc8, 0x2dce; 0x2dd0, 0x2dd6; 0x2dd8, 0x2dde; 0x2de0, 0x2dff; + 0x3005, 0x3007; 0x3021, 0x302f; 0x3031, 0x3035; 0x3038, 0x303c; 0x3041, 0x3096; + 0x3099, 0x309f; 0x30a1, 0x30ff; 0x3105, 0x312f; 0x3131, 0x318e; 0x31a0, 0x31bf; + 0x31f0, 0x31ff; 0x3400, 0x4dbf; 0x4e00, 0xa48c; 0xa4d0, 0xa4fd; 0xa500, 0xa60c; + 0xa610, 0xa62b; 0xa640, 0xa66f; 0xa674, 0xa67d; 0xa67f, 0xa6f1; 0xa717, 0xa71f; + 0xa722, 0xa788; 0xa78b, 0xa7dc; 0xa7f1, 0xa827; 0xa82c, 0xa82c; 0xa840, 0xa873; + 0xa880, 0xa8c5; 0xa8d0, 0xa8d9; 0xa8e0, 0xa8f7; 0xa8fb, 0xa8fb; 0xa8fd, 0xa92d; + 0xa930, 0xa953; 0xa960, 0xa97c; 0xa980, 0xa9c0; 0xa9cf, 0xa9d9; 0xa9e0, 0xa9fe; + 0xaa00, 0xaa36; 0xaa40, 0xaa4d; 0xaa50, 0xaa59; 0xaa60, 0xaa76; 0xaa7a, 0xaac2; + 0xaadb, 0xaadd; 0xaae0, 0xaaef; 0xaaf2, 0xaaf6; 0xab01, 0xab06; 0xab09, 0xab0e; + 0xab11, 0xab16; 0xab20, 0xab26; 0xab28, 0xab2e; 0xab30, 0xab5a; 0xab5c, 0xab69; + 0xab70, 0xabea; 0xabec, 0xabed; 0xabf0, 0xabf9; 0xac00, 0xd7a3; 0xd7b0, 0xd7c6; + 0xd7cb, 0xd7fb; 0xf900, 0xfa6d; 0xfa70, 0xfad9; 0xfb00, 0xfb06; 0xfb13, 0xfb17; + 0xfb1d, 0xfb28; 0xfb2a, 0xfb36; 0xfb38, 0xfb3c; 0xfb3e, 0xfb3e; 0xfb40, 0xfb41; + 0xfb43, 0xfb44; 0xfb46, 0xfbb1; 0xfbd3, 0xfd3d; 0xfd50, 0xfd8f; 0xfd92, 0xfdc7; + 0xfdf0, 0xfdfb; 0xfe00, 0xfe0f; 0xfe20, 0xfe2f; 0xfe33, 0xfe34; 0xfe4d, 0xfe4f; + 0xfe70, 0xfe74; 0xfe76, 0xfefc; 0xff10, 0xff19; 0xff21, 0xff3a; 0xff3f, 0xff3f; + 0xff41, 0xff5a; 0xff65, 0xffbe; 0xffc2, 0xffc7; 0xffca, 0xffcf; 0xffd2, 0xffd7; + 0xffda, 0xffdc; 0x10000, 0x1000b; 0x1000d, 0x10026; 0x10028, 0x1003a; 0x1003c, 0x1003d; + 0x1003f, 0x1004d; 0x10050, 0x1005d; 0x10080, 0x100fa; 0x10140, 0x10174; 0x101fd, 0x101fd; + 0x10280, 0x1029c; 0x102a0, 0x102d0; 0x102e0, 0x102e0; 0x10300, 0x1031f; 0x1032d, 0x1034a; + 0x10350, 0x1037a; 0x10380, 0x1039d; 0x103a0, 0x103c3; 0x103c8, 0x103cf; 0x103d1, 0x103d5; + 0x10400, 0x1049d; 0x104a0, 0x104a9; 0x104b0, 0x104d3; 0x104d8, 0x104fb; 0x10500, 0x10527; + 0x10530, 0x10563; 0x10570, 0x1057a; 0x1057c, 0x1058a; 0x1058c, 0x10592; 0x10594, 0x10595; + 0x10597, 0x105a1; 0x105a3, 0x105b1; 0x105b3, 0x105b9; 0x105bb, 0x105bc; 0x105c0, 0x105f3; + 0x10600, 0x10736; 0x10740, 0x10755; 0x10760, 0x10767; 0x10780, 0x10785; 0x10787, 0x107b0; + 0x107b2, 0x107ba; 0x10800, 0x10805; 0x10808, 0x10808; 0x1080a, 0x10835; 0x10837, 0x10838; + 0x1083c, 0x1083c; 0x1083f, 0x10855; 0x10860, 0x10876; 0x10880, 0x1089e; 0x108e0, 0x108f2; + 0x108f4, 0x108f5; 0x10900, 0x10915; 0x10920, 0x10939; 0x10940, 0x10959; 0x10980, 0x109b7; + 0x109be, 0x109bf; 0x10a00, 0x10a03; 0x10a05, 0x10a06; 0x10a0c, 0x10a13; 0x10a15, 0x10a17; + 0x10a19, 0x10a35; 0x10a38, 0x10a3a; 0x10a3f, 0x10a3f; 0x10a60, 0x10a7c; 0x10a80, 0x10a9c; + 0x10ac0, 0x10ac7; 0x10ac9, 0x10ae6; 0x10b00, 0x10b35; 0x10b40, 0x10b55; 0x10b60, 0x10b72; + 0x10b80, 0x10b91; 0x10c00, 0x10c48; 0x10c80, 0x10cb2; 0x10cc0, 0x10cf2; 0x10d00, 0x10d27; + 0x10d30, 0x10d39; 0x10d40, 0x10d65; 0x10d69, 0x10d6d; 0x10d6f, 0x10d85; 0x10e80, 0x10ea9; + 0x10eab, 0x10eac; 0x10eb0, 0x10eb1; 0x10ec2, 0x10ec7; 0x10efa, 0x10f1c; 0x10f27, 0x10f27; + 0x10f30, 0x10f50; 0x10f70, 0x10f85; 0x10fb0, 0x10fc4; 0x10fe0, 0x10ff6; 0x11000, 0x11046; + 0x11066, 0x11075; 0x1107f, 0x110ba; 0x110c2, 0x110c2; 0x110d0, 0x110e8; 0x110f0, 0x110f9; + 0x11100, 0x11134; 0x11136, 0x1113f; 0x11144, 0x11147; 0x11150, 0x11173; 0x11176, 0x11176; + 0x11180, 0x111c4; 0x111c9, 0x111cc; 0x111ce, 0x111da; 0x111dc, 0x111dc; 0x11200, 0x11211; + 0x11213, 0x11237; 0x1123e, 0x11241; 0x11280, 0x11286; 0x11288, 0x11288; 0x1128a, 0x1128d; + 0x1128f, 0x1129d; 0x1129f, 0x112a8; 0x112b0, 0x112ea; 0x112f0, 0x112f9; 0x11300, 0x11303; + 0x11305, 0x1130c; 0x1130f, 0x11310; 0x11313, 0x11328; 0x1132a, 0x11330; 0x11332, 0x11333; + 0x11335, 0x11339; 0x1133b, 0x11344; 0x11347, 0x11348; 0x1134b, 0x1134d; 0x11350, 0x11350; + 0x11357, 0x11357; 0x1135d, 0x11363; 0x11366, 0x1136c; 0x11370, 0x11374; 0x11380, 0x11389; + 0x1138b, 0x1138b; 0x1138e, 0x1138e; 0x11390, 0x113b5; 0x113b7, 0x113c0; 0x113c2, 0x113c2; + 0x113c5, 0x113c5; 0x113c7, 0x113ca; 0x113cc, 0x113d3; 0x113e1, 0x113e2; 0x11400, 0x1144a; + 0x11450, 0x11459; 0x1145e, 0x11461; 0x11480, 0x114c5; 0x114c7, 0x114c7; 0x114d0, 0x114d9; + 0x11580, 0x115b5; 0x115b8, 0x115c0; 0x115d8, 0x115dd; 0x11600, 0x11640; 0x11644, 0x11644; + 0x11650, 0x11659; 0x11680, 0x116b8; 0x116c0, 0x116c9; 0x116d0, 0x116e3; 0x11700, 0x1171a; + 0x1171d, 0x1172b; 0x11730, 0x11739; 0x11740, 0x11746; 0x11800, 0x1183a; 0x118a0, 0x118e9; + 0x118ff, 0x11906; 0x11909, 0x11909; 0x1190c, 0x11913; 0x11915, 0x11916; 0x11918, 0x11935; + 0x11937, 0x11938; 0x1193b, 0x11943; 0x11950, 0x11959; 0x119a0, 0x119a7; 0x119aa, 0x119d7; + 0x119da, 0x119e1; 0x119e3, 0x119e4; 0x11a00, 0x11a3e; 0x11a47, 0x11a47; 0x11a50, 0x11a99; + 0x11a9d, 0x11a9d; 0x11ab0, 0x11af8; 0x11b60, 0x11b67; 0x11bc0, 0x11be0; 0x11bf0, 0x11bf9; 0x11c00, 0x11c08; 0x11c0a, 0x11c36; 0x11c38, 0x11c40; 0x11c50, 0x11c59; 0x11c72, 0x11c8f; 0x11c92, 0x11ca7; 0x11ca9, 0x11cb6; 0x11d00, 0x11d06; 0x11d08, 0x11d09; 0x11d0b, 0x11d36; 0x11d3a, 0x11d3a; 0x11d3c, 0x11d3d; 0x11d3f, 0x11d47; 0x11d50, 0x11d59; 0x11d60, 0x11d65; 0x11d67, 0x11d68; 0x11d6a, 0x11d8e; 0x11d90, 0x11d91; 0x11d93, 0x11d98; 0x11da0, 0x11da9; - 0x11ee0, 0x11ef6; 0x11f00, 0x11f10; 0x11f12, 0x11f3a; 0x11f3e, 0x11f42; 0x11f50, 0x11f5a; - 0x11fb0, 0x11fb0; 0x12000, 0x12399; 0x12400, 0x1246e; 0x12480, 0x12543; 0x12f90, 0x12ff0; - 0x13000, 0x1342f; 0x13440, 0x13455; 0x13460, 0x143fa; 0x14400, 0x14646; 0x16100, 0x16139; - 0x16800, 0x16a38; 0x16a40, 0x16a5e; 0x16a60, 0x16a69; 0x16a70, 0x16abe; 0x16ac0, 0x16ac9; - 0x16ad0, 0x16aed; 0x16af0, 0x16af4; 0x16b00, 0x16b36; 0x16b40, 0x16b43; 0x16b50, 0x16b59; - 0x16b63, 0x16b77; 0x16b7d, 0x16b8f; 0x16d40, 0x16d6c; 0x16d70, 0x16d79; 0x16e40, 0x16e7f; - 0x16f00, 0x16f4a; 0x16f4f, 0x16f87; 0x16f8f, 0x16f9f; 0x16fe0, 0x16fe1; 0x16fe3, 0x16fe4; - 0x16ff0, 0x16ff1; 0x17000, 0x187f7; 0x18800, 0x18cd5; 0x18cff, 0x18d08; 0x1aff0, 0x1aff3; - 0x1aff5, 0x1affb; 0x1affd, 0x1affe; 0x1b000, 0x1b122; 0x1b132, 0x1b132; 0x1b150, 0x1b152; - 0x1b155, 0x1b155; 0x1b164, 0x1b167; 0x1b170, 0x1b2fb; 0x1bc00, 0x1bc6a; 0x1bc70, 0x1bc7c; - 0x1bc80, 0x1bc88; 0x1bc90, 0x1bc99; 0x1bc9d, 0x1bc9e; 0x1ccf0, 0x1ccf9; 0x1cf00, 0x1cf2d; - 0x1cf30, 0x1cf46; 0x1d165, 0x1d169; 0x1d16d, 0x1d172; 0x1d17b, 0x1d182; 0x1d185, 0x1d18b; - 0x1d1aa, 0x1d1ad; 0x1d242, 0x1d244; 0x1d400, 0x1d454; 0x1d456, 0x1d49c; 0x1d49e, 0x1d49f; - 0x1d4a2, 0x1d4a2; 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; 0x1d4ae, 0x1d4b9; 0x1d4bb, 0x1d4bb; - 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d505; 0x1d507, 0x1d50a; 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; - 0x1d51e, 0x1d539; 0x1d53b, 0x1d53e; 0x1d540, 0x1d544; 0x1d546, 0x1d546; 0x1d54a, 0x1d550; - 0x1d552, 0x1d6a5; 0x1d6a8, 0x1d6c0; 0x1d6c2, 0x1d6da; 0x1d6dc, 0x1d6fa; 0x1d6fc, 0x1d714; - 0x1d716, 0x1d734; 0x1d736, 0x1d74e; 0x1d750, 0x1d76e; 0x1d770, 0x1d788; 0x1d78a, 0x1d7a8; - 0x1d7aa, 0x1d7c2; 0x1d7c4, 0x1d7cb; 0x1d7ce, 0x1d7ff; 0x1da00, 0x1da36; 0x1da3b, 0x1da6c; - 0x1da75, 0x1da75; 0x1da84, 0x1da84; 0x1da9b, 0x1da9f; 0x1daa1, 0x1daaf; 0x1df00, 0x1df1e; - 0x1df25, 0x1df2a; 0x1e000, 0x1e006; 0x1e008, 0x1e018; 0x1e01b, 0x1e021; 0x1e023, 0x1e024; - 0x1e026, 0x1e02a; 0x1e030, 0x1e06d; 0x1e08f, 0x1e08f; 0x1e100, 0x1e12c; 0x1e130, 0x1e13d; - 0x1e140, 0x1e149; 0x1e14e, 0x1e14e; 0x1e290, 0x1e2ae; 0x1e2c0, 0x1e2f9; 0x1e4d0, 0x1e4f9; - 0x1e5d0, 0x1e5fa; 0x1e7e0, 0x1e7e6; 0x1e7e8, 0x1e7eb; 0x1e7ed, 0x1e7ee; 0x1e7f0, 0x1e7fe; - 0x1e800, 0x1e8c4; 0x1e8d0, 0x1e8d6; 0x1e900, 0x1e94b; 0x1e950, 0x1e959; 0x1ee00, 0x1ee03; - 0x1ee05, 0x1ee1f; 0x1ee21, 0x1ee22; 0x1ee24, 0x1ee24; 0x1ee27, 0x1ee27; 0x1ee29, 0x1ee32; - 0x1ee34, 0x1ee37; 0x1ee39, 0x1ee39; 0x1ee3b, 0x1ee3b; 0x1ee42, 0x1ee42; 0x1ee47, 0x1ee47; - 0x1ee49, 0x1ee49; 0x1ee4b, 0x1ee4b; 0x1ee4d, 0x1ee4f; 0x1ee51, 0x1ee52; 0x1ee54, 0x1ee54; - 0x1ee57, 0x1ee57; 0x1ee59, 0x1ee59; 0x1ee5b, 0x1ee5b; 0x1ee5d, 0x1ee5d; 0x1ee5f, 0x1ee5f; - 0x1ee61, 0x1ee62; 0x1ee64, 0x1ee64; 0x1ee67, 0x1ee6a; 0x1ee6c, 0x1ee72; 0x1ee74, 0x1ee77; - 0x1ee79, 0x1ee7c; 0x1ee7e, 0x1ee7e; 0x1ee80, 0x1ee89; 0x1ee8b, 0x1ee9b; 0x1eea1, 0x1eea3; - 0x1eea5, 0x1eea9; 0x1eeab, 0x1eebb; 0x1fbf0, 0x1fbf9; 0x20000, 0x2a6df; 0x2a700, 0x2b739; - 0x2b740, 0x2b81d; 0x2b820, 0x2cea1; 0x2ceb0, 0x2ebe0; 0x2ebf0, 0x2ee5d; 0x2f800, 0x2fa1d; - 0x30000, 0x3134a; 0x31350, 0x323af; 0xe0100, 0xe01ef] + 0x11db0, 0x11ddb; 0x11de0, 0x11de9; 0x11ee0, 0x11ef6; 0x11f00, 0x11f10; 0x11f12, 0x11f3a; + 0x11f3e, 0x11f42; 0x11f50, 0x11f5a; 0x11fb0, 0x11fb0; 0x12000, 0x12399; 0x12400, 0x1246e; + 0x12480, 0x12543; 0x12f90, 0x12ff0; 0x13000, 0x1342f; 0x13440, 0x13455; 0x13460, 0x143fa; + 0x14400, 0x14646; 0x16100, 0x16139; 0x16800, 0x16a38; 0x16a40, 0x16a5e; 0x16a60, 0x16a69; + 0x16a70, 0x16abe; 0x16ac0, 0x16ac9; 0x16ad0, 0x16aed; 0x16af0, 0x16af4; 0x16b00, 0x16b36; + 0x16b40, 0x16b43; 0x16b50, 0x16b59; 0x16b63, 0x16b77; 0x16b7d, 0x16b8f; 0x16d40, 0x16d6c; + 0x16d70, 0x16d79; 0x16e40, 0x16e7f; 0x16ea0, 0x16eb8; 0x16ebb, 0x16ed3; 0x16f00, 0x16f4a; + 0x16f4f, 0x16f87; 0x16f8f, 0x16f9f; 0x16fe0, 0x16fe1; 0x16fe3, 0x16fe4; 0x16ff0, 0x16ff6; + 0x17000, 0x18cd5; 0x18cff, 0x18d1e; 0x18d80, 0x18df2; 0x1aff0, 0x1aff3; 0x1aff5, 0x1affb; + 0x1affd, 0x1affe; 0x1b000, 0x1b122; 0x1b132, 0x1b132; 0x1b150, 0x1b152; 0x1b155, 0x1b155; + 0x1b164, 0x1b167; 0x1b170, 0x1b2fb; 0x1bc00, 0x1bc6a; 0x1bc70, 0x1bc7c; 0x1bc80, 0x1bc88; + 0x1bc90, 0x1bc99; 0x1bc9d, 0x1bc9e; 0x1ccf0, 0x1ccf9; 0x1cf00, 0x1cf2d; 0x1cf30, 0x1cf46; + 0x1d165, 0x1d169; 0x1d16d, 0x1d172; 0x1d17b, 0x1d182; 0x1d185, 0x1d18b; 0x1d1aa, 0x1d1ad; + 0x1d242, 0x1d244; 0x1d400, 0x1d454; 0x1d456, 0x1d49c; 0x1d49e, 0x1d49f; 0x1d4a2, 0x1d4a2; + 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; 0x1d4ae, 0x1d4b9; 0x1d4bb, 0x1d4bb; 0x1d4bd, 0x1d4c3; + 0x1d4c5, 0x1d505; 0x1d507, 0x1d50a; 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; 0x1d51e, 0x1d539; + 0x1d53b, 0x1d53e; 0x1d540, 0x1d544; 0x1d546, 0x1d546; 0x1d54a, 0x1d550; 0x1d552, 0x1d6a5; + 0x1d6a8, 0x1d6c0; 0x1d6c2, 0x1d6da; 0x1d6dc, 0x1d6fa; 0x1d6fc, 0x1d714; 0x1d716, 0x1d734; + 0x1d736, 0x1d74e; 0x1d750, 0x1d76e; 0x1d770, 0x1d788; 0x1d78a, 0x1d7a8; 0x1d7aa, 0x1d7c2; + 0x1d7c4, 0x1d7cb; 0x1d7ce, 0x1d7ff; 0x1da00, 0x1da36; 0x1da3b, 0x1da6c; 0x1da75, 0x1da75; + 0x1da84, 0x1da84; 0x1da9b, 0x1da9f; 0x1daa1, 0x1daaf; 0x1df00, 0x1df1e; 0x1df25, 0x1df2a; + 0x1e000, 0x1e006; 0x1e008, 0x1e018; 0x1e01b, 0x1e021; 0x1e023, 0x1e024; 0x1e026, 0x1e02a; + 0x1e030, 0x1e06d; 0x1e08f, 0x1e08f; 0x1e100, 0x1e12c; 0x1e130, 0x1e13d; 0x1e140, 0x1e149; + 0x1e14e, 0x1e14e; 0x1e290, 0x1e2ae; 0x1e2c0, 0x1e2f9; 0x1e4d0, 0x1e4f9; 0x1e5d0, 0x1e5fa; + 0x1e6c0, 0x1e6de; 0x1e6e0, 0x1e6f5; 0x1e6fe, 0x1e6ff; 0x1e7e0, 0x1e7e6; 0x1e7e8, 0x1e7eb; + 0x1e7ed, 0x1e7ee; 0x1e7f0, 0x1e7fe; 0x1e800, 0x1e8c4; 0x1e8d0, 0x1e8d6; 0x1e900, 0x1e94b; + 0x1e950, 0x1e959; 0x1ee00, 0x1ee03; 0x1ee05, 0x1ee1f; 0x1ee21, 0x1ee22; 0x1ee24, 0x1ee24; + 0x1ee27, 0x1ee27; 0x1ee29, 0x1ee32; 0x1ee34, 0x1ee37; 0x1ee39, 0x1ee39; 0x1ee3b, 0x1ee3b; + 0x1ee42, 0x1ee42; 0x1ee47, 0x1ee47; 0x1ee49, 0x1ee49; 0x1ee4b, 0x1ee4b; 0x1ee4d, 0x1ee4f; + 0x1ee51, 0x1ee52; 0x1ee54, 0x1ee54; 0x1ee57, 0x1ee57; 0x1ee59, 0x1ee59; 0x1ee5b, 0x1ee5b; + 0x1ee5d, 0x1ee5d; 0x1ee5f, 0x1ee5f; 0x1ee61, 0x1ee62; 0x1ee64, 0x1ee64; 0x1ee67, 0x1ee6a; + 0x1ee6c, 0x1ee72; 0x1ee74, 0x1ee77; 0x1ee79, 0x1ee7c; 0x1ee7e, 0x1ee7e; 0x1ee80, 0x1ee89; + 0x1ee8b, 0x1ee9b; 0x1eea1, 0x1eea3; 0x1eea5, 0x1eea9; 0x1eeab, 0x1eebb; 0x1fbf0, 0x1fbf9; + 0x20000, 0x2a6df; 0x2a700, 0x2b81d; 0x2b820, 0x2cead; 0x2ceb0, 0x2ebe0; 0x2ebf0, 0x2ee5d; + 0x2f800, 0x2fa1d; 0x30000, 0x3134a; 0x31350, 0x33479; 0xe0100, 0xe01ef] let id_start = Sedlex_cset.of_list [0x41, 0x5a; 0x61, 0x7a; 0xaa, 0xaa; 0xb5, 0xb5; 0xba, 0xba; @@ -1274,7 +1283,7 @@ module Properties = struct 0x6ff, 0x6ff; 0x710, 0x710; 0x712, 0x72f; 0x74d, 0x7a5; 0x7b1, 0x7b1; 0x7ca, 0x7ea; 0x7f4, 0x7f5; 0x7fa, 0x7fa; 0x800, 0x815; 0x81a, 0x81a; 0x824, 0x824; 0x828, 0x828; 0x840, 0x858; 0x860, 0x86a; 0x870, 0x887; - 0x889, 0x88e; 0x8a0, 0x8c9; 0x904, 0x939; 0x93d, 0x93d; 0x950, 0x950; + 0x889, 0x88f; 0x8a0, 0x8c9; 0x904, 0x939; 0x93d, 0x93d; 0x950, 0x950; 0x958, 0x961; 0x971, 0x980; 0x985, 0x98c; 0x98f, 0x990; 0x993, 0x9a8; 0x9aa, 0x9b0; 0x9b2, 0x9b2; 0x9b6, 0x9b9; 0x9bd, 0x9bd; 0x9ce, 0x9ce; 0x9dc, 0x9dd; 0x9df, 0x9e1; 0x9f0, 0x9f1; 0x9fc, 0x9fc; 0xa05, 0xa0a; @@ -1287,9 +1296,9 @@ module Properties = struct 0xb85, 0xb8a; 0xb8e, 0xb90; 0xb92, 0xb95; 0xb99, 0xb9a; 0xb9c, 0xb9c; 0xb9e, 0xb9f; 0xba3, 0xba4; 0xba8, 0xbaa; 0xbae, 0xbb9; 0xbd0, 0xbd0; 0xc05, 0xc0c; 0xc0e, 0xc10; 0xc12, 0xc28; 0xc2a, 0xc39; 0xc3d, 0xc3d; - 0xc58, 0xc5a; 0xc5d, 0xc5d; 0xc60, 0xc61; 0xc80, 0xc80; 0xc85, 0xc8c; + 0xc58, 0xc5a; 0xc5c, 0xc5d; 0xc60, 0xc61; 0xc80, 0xc80; 0xc85, 0xc8c; 0xc8e, 0xc90; 0xc92, 0xca8; 0xcaa, 0xcb3; 0xcb5, 0xcb9; 0xcbd, 0xcbd; - 0xcdd, 0xcde; 0xce0, 0xce1; 0xcf1, 0xcf2; 0xd04, 0xd0c; 0xd0e, 0xd10; + 0xcdc, 0xcde; 0xce0, 0xce1; 0xcf1, 0xcf2; 0xd04, 0xd0c; 0xd0e, 0xd10; 0xd12, 0xd3a; 0xd3d, 0xd3d; 0xd4e, 0xd4e; 0xd54, 0xd56; 0xd5f, 0xd61; 0xd7a, 0xd7f; 0xd85, 0xd96; 0xd9a, 0xdb1; 0xdb3, 0xdbb; 0xdbd, 0xdbd; 0xdc0, 0xdc6; 0xe01, 0xe30; 0xe32, 0xe33; 0xe40, 0xe46; 0xe81, 0xe82; @@ -1325,81 +1334,82 @@ module Properties = struct 0x30a1, 0x30fa; 0x30fc, 0x30ff; 0x3105, 0x312f; 0x3131, 0x318e; 0x31a0, 0x31bf; 0x31f0, 0x31ff; 0x3400, 0x4dbf; 0x4e00, 0xa48c; 0xa4d0, 0xa4fd; 0xa500, 0xa60c; 0xa610, 0xa61f; 0xa62a, 0xa62b; 0xa640, 0xa66e; 0xa67f, 0xa69d; 0xa6a0, 0xa6ef; - 0xa717, 0xa71f; 0xa722, 0xa788; 0xa78b, 0xa7cd; 0xa7d0, 0xa7d1; 0xa7d3, 0xa7d3; - 0xa7d5, 0xa7dc; 0xa7f2, 0xa801; 0xa803, 0xa805; 0xa807, 0xa80a; 0xa80c, 0xa822; - 0xa840, 0xa873; 0xa882, 0xa8b3; 0xa8f2, 0xa8f7; 0xa8fb, 0xa8fb; 0xa8fd, 0xa8fe; - 0xa90a, 0xa925; 0xa930, 0xa946; 0xa960, 0xa97c; 0xa984, 0xa9b2; 0xa9cf, 0xa9cf; - 0xa9e0, 0xa9e4; 0xa9e6, 0xa9ef; 0xa9fa, 0xa9fe; 0xaa00, 0xaa28; 0xaa40, 0xaa42; - 0xaa44, 0xaa4b; 0xaa60, 0xaa76; 0xaa7a, 0xaa7a; 0xaa7e, 0xaaaf; 0xaab1, 0xaab1; - 0xaab5, 0xaab6; 0xaab9, 0xaabd; 0xaac0, 0xaac0; 0xaac2, 0xaac2; 0xaadb, 0xaadd; - 0xaae0, 0xaaea; 0xaaf2, 0xaaf4; 0xab01, 0xab06; 0xab09, 0xab0e; 0xab11, 0xab16; - 0xab20, 0xab26; 0xab28, 0xab2e; 0xab30, 0xab5a; 0xab5c, 0xab69; 0xab70, 0xabe2; - 0xac00, 0xd7a3; 0xd7b0, 0xd7c6; 0xd7cb, 0xd7fb; 0xf900, 0xfa6d; 0xfa70, 0xfad9; - 0xfb00, 0xfb06; 0xfb13, 0xfb17; 0xfb1d, 0xfb1d; 0xfb1f, 0xfb28; 0xfb2a, 0xfb36; - 0xfb38, 0xfb3c; 0xfb3e, 0xfb3e; 0xfb40, 0xfb41; 0xfb43, 0xfb44; 0xfb46, 0xfbb1; - 0xfbd3, 0xfd3d; 0xfd50, 0xfd8f; 0xfd92, 0xfdc7; 0xfdf0, 0xfdfb; 0xfe70, 0xfe74; - 0xfe76, 0xfefc; 0xff21, 0xff3a; 0xff41, 0xff5a; 0xff66, 0xffbe; 0xffc2, 0xffc7; - 0xffca, 0xffcf; 0xffd2, 0xffd7; 0xffda, 0xffdc; 0x10000, 0x1000b; 0x1000d, 0x10026; - 0x10028, 0x1003a; 0x1003c, 0x1003d; 0x1003f, 0x1004d; 0x10050, 0x1005d; 0x10080, 0x100fa; - 0x10140, 0x10174; 0x10280, 0x1029c; 0x102a0, 0x102d0; 0x10300, 0x1031f; 0x1032d, 0x1034a; - 0x10350, 0x10375; 0x10380, 0x1039d; 0x103a0, 0x103c3; 0x103c8, 0x103cf; 0x103d1, 0x103d5; - 0x10400, 0x1049d; 0x104b0, 0x104d3; 0x104d8, 0x104fb; 0x10500, 0x10527; 0x10530, 0x10563; - 0x10570, 0x1057a; 0x1057c, 0x1058a; 0x1058c, 0x10592; 0x10594, 0x10595; 0x10597, 0x105a1; - 0x105a3, 0x105b1; 0x105b3, 0x105b9; 0x105bb, 0x105bc; 0x105c0, 0x105f3; 0x10600, 0x10736; - 0x10740, 0x10755; 0x10760, 0x10767; 0x10780, 0x10785; 0x10787, 0x107b0; 0x107b2, 0x107ba; - 0x10800, 0x10805; 0x10808, 0x10808; 0x1080a, 0x10835; 0x10837, 0x10838; 0x1083c, 0x1083c; - 0x1083f, 0x10855; 0x10860, 0x10876; 0x10880, 0x1089e; 0x108e0, 0x108f2; 0x108f4, 0x108f5; - 0x10900, 0x10915; 0x10920, 0x10939; 0x10980, 0x109b7; 0x109be, 0x109bf; 0x10a00, 0x10a00; - 0x10a10, 0x10a13; 0x10a15, 0x10a17; 0x10a19, 0x10a35; 0x10a60, 0x10a7c; 0x10a80, 0x10a9c; - 0x10ac0, 0x10ac7; 0x10ac9, 0x10ae4; 0x10b00, 0x10b35; 0x10b40, 0x10b55; 0x10b60, 0x10b72; - 0x10b80, 0x10b91; 0x10c00, 0x10c48; 0x10c80, 0x10cb2; 0x10cc0, 0x10cf2; 0x10d00, 0x10d23; - 0x10d4a, 0x10d65; 0x10d6f, 0x10d85; 0x10e80, 0x10ea9; 0x10eb0, 0x10eb1; 0x10ec2, 0x10ec4; - 0x10f00, 0x10f1c; 0x10f27, 0x10f27; 0x10f30, 0x10f45; 0x10f70, 0x10f81; 0x10fb0, 0x10fc4; - 0x10fe0, 0x10ff6; 0x11003, 0x11037; 0x11071, 0x11072; 0x11075, 0x11075; 0x11083, 0x110af; - 0x110d0, 0x110e8; 0x11103, 0x11126; 0x11144, 0x11144; 0x11147, 0x11147; 0x11150, 0x11172; - 0x11176, 0x11176; 0x11183, 0x111b2; 0x111c1, 0x111c4; 0x111da, 0x111da; 0x111dc, 0x111dc; - 0x11200, 0x11211; 0x11213, 0x1122b; 0x1123f, 0x11240; 0x11280, 0x11286; 0x11288, 0x11288; - 0x1128a, 0x1128d; 0x1128f, 0x1129d; 0x1129f, 0x112a8; 0x112b0, 0x112de; 0x11305, 0x1130c; - 0x1130f, 0x11310; 0x11313, 0x11328; 0x1132a, 0x11330; 0x11332, 0x11333; 0x11335, 0x11339; - 0x1133d, 0x1133d; 0x11350, 0x11350; 0x1135d, 0x11361; 0x11380, 0x11389; 0x1138b, 0x1138b; - 0x1138e, 0x1138e; 0x11390, 0x113b5; 0x113b7, 0x113b7; 0x113d1, 0x113d1; 0x113d3, 0x113d3; - 0x11400, 0x11434; 0x11447, 0x1144a; 0x1145f, 0x11461; 0x11480, 0x114af; 0x114c4, 0x114c5; - 0x114c7, 0x114c7; 0x11580, 0x115ae; 0x115d8, 0x115db; 0x11600, 0x1162f; 0x11644, 0x11644; - 0x11680, 0x116aa; 0x116b8, 0x116b8; 0x11700, 0x1171a; 0x11740, 0x11746; 0x11800, 0x1182b; - 0x118a0, 0x118df; 0x118ff, 0x11906; 0x11909, 0x11909; 0x1190c, 0x11913; 0x11915, 0x11916; - 0x11918, 0x1192f; 0x1193f, 0x1193f; 0x11941, 0x11941; 0x119a0, 0x119a7; 0x119aa, 0x119d0; - 0x119e1, 0x119e1; 0x119e3, 0x119e3; 0x11a00, 0x11a00; 0x11a0b, 0x11a32; 0x11a3a, 0x11a3a; - 0x11a50, 0x11a50; 0x11a5c, 0x11a89; 0x11a9d, 0x11a9d; 0x11ab0, 0x11af8; 0x11bc0, 0x11be0; - 0x11c00, 0x11c08; 0x11c0a, 0x11c2e; 0x11c40, 0x11c40; 0x11c72, 0x11c8f; 0x11d00, 0x11d06; - 0x11d08, 0x11d09; 0x11d0b, 0x11d30; 0x11d46, 0x11d46; 0x11d60, 0x11d65; 0x11d67, 0x11d68; - 0x11d6a, 0x11d89; 0x11d98, 0x11d98; 0x11ee0, 0x11ef2; 0x11f02, 0x11f02; 0x11f04, 0x11f10; - 0x11f12, 0x11f33; 0x11fb0, 0x11fb0; 0x12000, 0x12399; 0x12400, 0x1246e; 0x12480, 0x12543; - 0x12f90, 0x12ff0; 0x13000, 0x1342f; 0x13441, 0x13446; 0x13460, 0x143fa; 0x14400, 0x14646; - 0x16100, 0x1611d; 0x16800, 0x16a38; 0x16a40, 0x16a5e; 0x16a70, 0x16abe; 0x16ad0, 0x16aed; - 0x16b00, 0x16b2f; 0x16b40, 0x16b43; 0x16b63, 0x16b77; 0x16b7d, 0x16b8f; 0x16d40, 0x16d6c; - 0x16e40, 0x16e7f; 0x16f00, 0x16f4a; 0x16f50, 0x16f50; 0x16f93, 0x16f9f; 0x16fe0, 0x16fe1; - 0x16fe3, 0x16fe3; 0x17000, 0x187f7; 0x18800, 0x18cd5; 0x18cff, 0x18d08; 0x1aff0, 0x1aff3; - 0x1aff5, 0x1affb; 0x1affd, 0x1affe; 0x1b000, 0x1b122; 0x1b132, 0x1b132; 0x1b150, 0x1b152; - 0x1b155, 0x1b155; 0x1b164, 0x1b167; 0x1b170, 0x1b2fb; 0x1bc00, 0x1bc6a; 0x1bc70, 0x1bc7c; - 0x1bc80, 0x1bc88; 0x1bc90, 0x1bc99; 0x1d400, 0x1d454; 0x1d456, 0x1d49c; 0x1d49e, 0x1d49f; - 0x1d4a2, 0x1d4a2; 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; 0x1d4ae, 0x1d4b9; 0x1d4bb, 0x1d4bb; - 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d505; 0x1d507, 0x1d50a; 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; - 0x1d51e, 0x1d539; 0x1d53b, 0x1d53e; 0x1d540, 0x1d544; 0x1d546, 0x1d546; 0x1d54a, 0x1d550; - 0x1d552, 0x1d6a5; 0x1d6a8, 0x1d6c0; 0x1d6c2, 0x1d6da; 0x1d6dc, 0x1d6fa; 0x1d6fc, 0x1d714; - 0x1d716, 0x1d734; 0x1d736, 0x1d74e; 0x1d750, 0x1d76e; 0x1d770, 0x1d788; 0x1d78a, 0x1d7a8; - 0x1d7aa, 0x1d7c2; 0x1d7c4, 0x1d7cb; 0x1df00, 0x1df1e; 0x1df25, 0x1df2a; 0x1e030, 0x1e06d; - 0x1e100, 0x1e12c; 0x1e137, 0x1e13d; 0x1e14e, 0x1e14e; 0x1e290, 0x1e2ad; 0x1e2c0, 0x1e2eb; - 0x1e4d0, 0x1e4eb; 0x1e5d0, 0x1e5ed; 0x1e5f0, 0x1e5f0; 0x1e7e0, 0x1e7e6; 0x1e7e8, 0x1e7eb; - 0x1e7ed, 0x1e7ee; 0x1e7f0, 0x1e7fe; 0x1e800, 0x1e8c4; 0x1e900, 0x1e943; 0x1e94b, 0x1e94b; - 0x1ee00, 0x1ee03; 0x1ee05, 0x1ee1f; 0x1ee21, 0x1ee22; 0x1ee24, 0x1ee24; 0x1ee27, 0x1ee27; - 0x1ee29, 0x1ee32; 0x1ee34, 0x1ee37; 0x1ee39, 0x1ee39; 0x1ee3b, 0x1ee3b; 0x1ee42, 0x1ee42; - 0x1ee47, 0x1ee47; 0x1ee49, 0x1ee49; 0x1ee4b, 0x1ee4b; 0x1ee4d, 0x1ee4f; 0x1ee51, 0x1ee52; - 0x1ee54, 0x1ee54; 0x1ee57, 0x1ee57; 0x1ee59, 0x1ee59; 0x1ee5b, 0x1ee5b; 0x1ee5d, 0x1ee5d; - 0x1ee5f, 0x1ee5f; 0x1ee61, 0x1ee62; 0x1ee64, 0x1ee64; 0x1ee67, 0x1ee6a; 0x1ee6c, 0x1ee72; - 0x1ee74, 0x1ee77; 0x1ee79, 0x1ee7c; 0x1ee7e, 0x1ee7e; 0x1ee80, 0x1ee89; 0x1ee8b, 0x1ee9b; - 0x1eea1, 0x1eea3; 0x1eea5, 0x1eea9; 0x1eeab, 0x1eebb; 0x20000, 0x2a6df; 0x2a700, 0x2b739; - 0x2b740, 0x2b81d; 0x2b820, 0x2cea1; 0x2ceb0, 0x2ebe0; 0x2ebf0, 0x2ee5d; 0x2f800, 0x2fa1d; - 0x30000, 0x3134a; 0x31350, 0x323af] + 0xa717, 0xa71f; 0xa722, 0xa788; 0xa78b, 0xa7dc; 0xa7f1, 0xa801; 0xa803, 0xa805; + 0xa807, 0xa80a; 0xa80c, 0xa822; 0xa840, 0xa873; 0xa882, 0xa8b3; 0xa8f2, 0xa8f7; + 0xa8fb, 0xa8fb; 0xa8fd, 0xa8fe; 0xa90a, 0xa925; 0xa930, 0xa946; 0xa960, 0xa97c; + 0xa984, 0xa9b2; 0xa9cf, 0xa9cf; 0xa9e0, 0xa9e4; 0xa9e6, 0xa9ef; 0xa9fa, 0xa9fe; + 0xaa00, 0xaa28; 0xaa40, 0xaa42; 0xaa44, 0xaa4b; 0xaa60, 0xaa76; 0xaa7a, 0xaa7a; + 0xaa7e, 0xaaaf; 0xaab1, 0xaab1; 0xaab5, 0xaab6; 0xaab9, 0xaabd; 0xaac0, 0xaac0; + 0xaac2, 0xaac2; 0xaadb, 0xaadd; 0xaae0, 0xaaea; 0xaaf2, 0xaaf4; 0xab01, 0xab06; + 0xab09, 0xab0e; 0xab11, 0xab16; 0xab20, 0xab26; 0xab28, 0xab2e; 0xab30, 0xab5a; + 0xab5c, 0xab69; 0xab70, 0xabe2; 0xac00, 0xd7a3; 0xd7b0, 0xd7c6; 0xd7cb, 0xd7fb; + 0xf900, 0xfa6d; 0xfa70, 0xfad9; 0xfb00, 0xfb06; 0xfb13, 0xfb17; 0xfb1d, 0xfb1d; + 0xfb1f, 0xfb28; 0xfb2a, 0xfb36; 0xfb38, 0xfb3c; 0xfb3e, 0xfb3e; 0xfb40, 0xfb41; + 0xfb43, 0xfb44; 0xfb46, 0xfbb1; 0xfbd3, 0xfd3d; 0xfd50, 0xfd8f; 0xfd92, 0xfdc7; + 0xfdf0, 0xfdfb; 0xfe70, 0xfe74; 0xfe76, 0xfefc; 0xff21, 0xff3a; 0xff41, 0xff5a; + 0xff66, 0xffbe; 0xffc2, 0xffc7; 0xffca, 0xffcf; 0xffd2, 0xffd7; 0xffda, 0xffdc; + 0x10000, 0x1000b; 0x1000d, 0x10026; 0x10028, 0x1003a; 0x1003c, 0x1003d; 0x1003f, 0x1004d; + 0x10050, 0x1005d; 0x10080, 0x100fa; 0x10140, 0x10174; 0x10280, 0x1029c; 0x102a0, 0x102d0; + 0x10300, 0x1031f; 0x1032d, 0x1034a; 0x10350, 0x10375; 0x10380, 0x1039d; 0x103a0, 0x103c3; + 0x103c8, 0x103cf; 0x103d1, 0x103d5; 0x10400, 0x1049d; 0x104b0, 0x104d3; 0x104d8, 0x104fb; + 0x10500, 0x10527; 0x10530, 0x10563; 0x10570, 0x1057a; 0x1057c, 0x1058a; 0x1058c, 0x10592; + 0x10594, 0x10595; 0x10597, 0x105a1; 0x105a3, 0x105b1; 0x105b3, 0x105b9; 0x105bb, 0x105bc; + 0x105c0, 0x105f3; 0x10600, 0x10736; 0x10740, 0x10755; 0x10760, 0x10767; 0x10780, 0x10785; + 0x10787, 0x107b0; 0x107b2, 0x107ba; 0x10800, 0x10805; 0x10808, 0x10808; 0x1080a, 0x10835; + 0x10837, 0x10838; 0x1083c, 0x1083c; 0x1083f, 0x10855; 0x10860, 0x10876; 0x10880, 0x1089e; + 0x108e0, 0x108f2; 0x108f4, 0x108f5; 0x10900, 0x10915; 0x10920, 0x10939; 0x10940, 0x10959; + 0x10980, 0x109b7; 0x109be, 0x109bf; 0x10a00, 0x10a00; 0x10a10, 0x10a13; 0x10a15, 0x10a17; + 0x10a19, 0x10a35; 0x10a60, 0x10a7c; 0x10a80, 0x10a9c; 0x10ac0, 0x10ac7; 0x10ac9, 0x10ae4; + 0x10b00, 0x10b35; 0x10b40, 0x10b55; 0x10b60, 0x10b72; 0x10b80, 0x10b91; 0x10c00, 0x10c48; + 0x10c80, 0x10cb2; 0x10cc0, 0x10cf2; 0x10d00, 0x10d23; 0x10d4a, 0x10d65; 0x10d6f, 0x10d85; + 0x10e80, 0x10ea9; 0x10eb0, 0x10eb1; 0x10ec2, 0x10ec7; 0x10f00, 0x10f1c; 0x10f27, 0x10f27; + 0x10f30, 0x10f45; 0x10f70, 0x10f81; 0x10fb0, 0x10fc4; 0x10fe0, 0x10ff6; 0x11003, 0x11037; + 0x11071, 0x11072; 0x11075, 0x11075; 0x11083, 0x110af; 0x110d0, 0x110e8; 0x11103, 0x11126; + 0x11144, 0x11144; 0x11147, 0x11147; 0x11150, 0x11172; 0x11176, 0x11176; 0x11183, 0x111b2; + 0x111c1, 0x111c4; 0x111da, 0x111da; 0x111dc, 0x111dc; 0x11200, 0x11211; 0x11213, 0x1122b; + 0x1123f, 0x11240; 0x11280, 0x11286; 0x11288, 0x11288; 0x1128a, 0x1128d; 0x1128f, 0x1129d; + 0x1129f, 0x112a8; 0x112b0, 0x112de; 0x11305, 0x1130c; 0x1130f, 0x11310; 0x11313, 0x11328; + 0x1132a, 0x11330; 0x11332, 0x11333; 0x11335, 0x11339; 0x1133d, 0x1133d; 0x11350, 0x11350; + 0x1135d, 0x11361; 0x11380, 0x11389; 0x1138b, 0x1138b; 0x1138e, 0x1138e; 0x11390, 0x113b5; + 0x113b7, 0x113b7; 0x113d1, 0x113d1; 0x113d3, 0x113d3; 0x11400, 0x11434; 0x11447, 0x1144a; + 0x1145f, 0x11461; 0x11480, 0x114af; 0x114c4, 0x114c5; 0x114c7, 0x114c7; 0x11580, 0x115ae; + 0x115d8, 0x115db; 0x11600, 0x1162f; 0x11644, 0x11644; 0x11680, 0x116aa; 0x116b8, 0x116b8; + 0x11700, 0x1171a; 0x11740, 0x11746; 0x11800, 0x1182b; 0x118a0, 0x118df; 0x118ff, 0x11906; + 0x11909, 0x11909; 0x1190c, 0x11913; 0x11915, 0x11916; 0x11918, 0x1192f; 0x1193f, 0x1193f; + 0x11941, 0x11941; 0x119a0, 0x119a7; 0x119aa, 0x119d0; 0x119e1, 0x119e1; 0x119e3, 0x119e3; + 0x11a00, 0x11a00; 0x11a0b, 0x11a32; 0x11a3a, 0x11a3a; 0x11a50, 0x11a50; 0x11a5c, 0x11a89; + 0x11a9d, 0x11a9d; 0x11ab0, 0x11af8; 0x11bc0, 0x11be0; 0x11c00, 0x11c08; 0x11c0a, 0x11c2e; + 0x11c40, 0x11c40; 0x11c72, 0x11c8f; 0x11d00, 0x11d06; 0x11d08, 0x11d09; 0x11d0b, 0x11d30; + 0x11d46, 0x11d46; 0x11d60, 0x11d65; 0x11d67, 0x11d68; 0x11d6a, 0x11d89; 0x11d98, 0x11d98; + 0x11db0, 0x11ddb; 0x11ee0, 0x11ef2; 0x11f02, 0x11f02; 0x11f04, 0x11f10; 0x11f12, 0x11f33; + 0x11fb0, 0x11fb0; 0x12000, 0x12399; 0x12400, 0x1246e; 0x12480, 0x12543; 0x12f90, 0x12ff0; + 0x13000, 0x1342f; 0x13441, 0x13446; 0x13460, 0x143fa; 0x14400, 0x14646; 0x16100, 0x1611d; + 0x16800, 0x16a38; 0x16a40, 0x16a5e; 0x16a70, 0x16abe; 0x16ad0, 0x16aed; 0x16b00, 0x16b2f; + 0x16b40, 0x16b43; 0x16b63, 0x16b77; 0x16b7d, 0x16b8f; 0x16d40, 0x16d6c; 0x16e40, 0x16e7f; + 0x16ea0, 0x16eb8; 0x16ebb, 0x16ed3; 0x16f00, 0x16f4a; 0x16f50, 0x16f50; 0x16f93, 0x16f9f; + 0x16fe0, 0x16fe1; 0x16fe3, 0x16fe3; 0x16ff2, 0x16ff6; 0x17000, 0x18cd5; 0x18cff, 0x18d1e; + 0x18d80, 0x18df2; 0x1aff0, 0x1aff3; 0x1aff5, 0x1affb; 0x1affd, 0x1affe; 0x1b000, 0x1b122; + 0x1b132, 0x1b132; 0x1b150, 0x1b152; 0x1b155, 0x1b155; 0x1b164, 0x1b167; 0x1b170, 0x1b2fb; + 0x1bc00, 0x1bc6a; 0x1bc70, 0x1bc7c; 0x1bc80, 0x1bc88; 0x1bc90, 0x1bc99; 0x1d400, 0x1d454; + 0x1d456, 0x1d49c; 0x1d49e, 0x1d49f; 0x1d4a2, 0x1d4a2; 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; + 0x1d4ae, 0x1d4b9; 0x1d4bb, 0x1d4bb; 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d505; 0x1d507, 0x1d50a; + 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; 0x1d51e, 0x1d539; 0x1d53b, 0x1d53e; 0x1d540, 0x1d544; + 0x1d546, 0x1d546; 0x1d54a, 0x1d550; 0x1d552, 0x1d6a5; 0x1d6a8, 0x1d6c0; 0x1d6c2, 0x1d6da; + 0x1d6dc, 0x1d6fa; 0x1d6fc, 0x1d714; 0x1d716, 0x1d734; 0x1d736, 0x1d74e; 0x1d750, 0x1d76e; + 0x1d770, 0x1d788; 0x1d78a, 0x1d7a8; 0x1d7aa, 0x1d7c2; 0x1d7c4, 0x1d7cb; 0x1df00, 0x1df1e; + 0x1df25, 0x1df2a; 0x1e030, 0x1e06d; 0x1e100, 0x1e12c; 0x1e137, 0x1e13d; 0x1e14e, 0x1e14e; + 0x1e290, 0x1e2ad; 0x1e2c0, 0x1e2eb; 0x1e4d0, 0x1e4eb; 0x1e5d0, 0x1e5ed; 0x1e5f0, 0x1e5f0; + 0x1e6c0, 0x1e6de; 0x1e6e0, 0x1e6e2; 0x1e6e4, 0x1e6e5; 0x1e6e7, 0x1e6ed; 0x1e6f0, 0x1e6f4; + 0x1e6fe, 0x1e6ff; 0x1e7e0, 0x1e7e6; 0x1e7e8, 0x1e7eb; 0x1e7ed, 0x1e7ee; 0x1e7f0, 0x1e7fe; + 0x1e800, 0x1e8c4; 0x1e900, 0x1e943; 0x1e94b, 0x1e94b; 0x1ee00, 0x1ee03; 0x1ee05, 0x1ee1f; + 0x1ee21, 0x1ee22; 0x1ee24, 0x1ee24; 0x1ee27, 0x1ee27; 0x1ee29, 0x1ee32; 0x1ee34, 0x1ee37; + 0x1ee39, 0x1ee39; 0x1ee3b, 0x1ee3b; 0x1ee42, 0x1ee42; 0x1ee47, 0x1ee47; 0x1ee49, 0x1ee49; + 0x1ee4b, 0x1ee4b; 0x1ee4d, 0x1ee4f; 0x1ee51, 0x1ee52; 0x1ee54, 0x1ee54; 0x1ee57, 0x1ee57; + 0x1ee59, 0x1ee59; 0x1ee5b, 0x1ee5b; 0x1ee5d, 0x1ee5d; 0x1ee5f, 0x1ee5f; 0x1ee61, 0x1ee62; + 0x1ee64, 0x1ee64; 0x1ee67, 0x1ee6a; 0x1ee6c, 0x1ee72; 0x1ee74, 0x1ee77; 0x1ee79, 0x1ee7c; + 0x1ee7e, 0x1ee7e; 0x1ee80, 0x1ee89; 0x1ee8b, 0x1ee9b; 0x1eea1, 0x1eea3; 0x1eea5, 0x1eea9; + 0x1eeab, 0x1eebb; 0x20000, 0x2a6df; 0x2a700, 0x2b81d; 0x2b820, 0x2cead; 0x2ceb0, 0x2ebe0; + 0x2ebf0, 0x2ee5d; 0x2f800, 0x2fa1d; 0x30000, 0x3134a; 0x31350, 0x33479] let lowercase = Sedlex_cset.of_list [0x61, 0x7a; 0xaa, 0xaa; 0xb5, 0xb5; 0xba, 0xba; 0xdf, 0xf6; @@ -1431,7 +1441,7 @@ module Properties = struct 0x223, 0x223; 0x225, 0x225; 0x227, 0x227; 0x229, 0x229; 0x22b, 0x22b; 0x22d, 0x22d; 0x22f, 0x22f; 0x231, 0x231; 0x233, 0x239; 0x23c, 0x23c; 0x23f, 0x240; 0x242, 0x242; 0x247, 0x247; 0x249, 0x249; 0x24b, 0x24b; - 0x24d, 0x24d; 0x24f, 0x293; 0x295, 0x2b8; 0x2c0, 0x2c1; 0x2e0, 0x2e4; + 0x24d, 0x24d; 0x24f, 0x293; 0x296, 0x2b8; 0x2c0, 0x2c1; 0x2e0, 0x2e4; 0x345, 0x345; 0x371, 0x371; 0x373, 0x373; 0x377, 0x377; 0x37a, 0x37d; 0x390, 0x390; 0x3ac, 0x3ce; 0x3d0, 0x3d1; 0x3d5, 0x3d7; 0x3d9, 0x3d9; 0x3db, 0x3db; 0x3dd, 0x3dd; 0x3df, 0x3df; 0x3e1, 0x3e1; 0x3e3, 0x3e3; @@ -1524,19 +1534,20 @@ module Properties = struct 0xa79f, 0xa79f; 0xa7a1, 0xa7a1; 0xa7a3, 0xa7a3; 0xa7a5, 0xa7a5; 0xa7a7, 0xa7a7; 0xa7a9, 0xa7a9; 0xa7af, 0xa7af; 0xa7b5, 0xa7b5; 0xa7b7, 0xa7b7; 0xa7b9, 0xa7b9; 0xa7bb, 0xa7bb; 0xa7bd, 0xa7bd; 0xa7bf, 0xa7bf; 0xa7c1, 0xa7c1; 0xa7c3, 0xa7c3; - 0xa7c8, 0xa7c8; 0xa7ca, 0xa7ca; 0xa7cd, 0xa7cd; 0xa7d1, 0xa7d1; 0xa7d3, 0xa7d3; - 0xa7d5, 0xa7d5; 0xa7d7, 0xa7d7; 0xa7d9, 0xa7d9; 0xa7db, 0xa7db; 0xa7f2, 0xa7f4; - 0xa7f6, 0xa7f6; 0xa7f8, 0xa7fa; 0xab30, 0xab5a; 0xab5c, 0xab69; 0xab70, 0xabbf; - 0xfb00, 0xfb06; 0xfb13, 0xfb17; 0xff41, 0xff5a; 0x10428, 0x1044f; 0x104d8, 0x104fb; - 0x10597, 0x105a1; 0x105a3, 0x105b1; 0x105b3, 0x105b9; 0x105bb, 0x105bc; 0x10780, 0x10780; - 0x10783, 0x10785; 0x10787, 0x107b0; 0x107b2, 0x107ba; 0x10cc0, 0x10cf2; 0x10d70, 0x10d85; - 0x118c0, 0x118df; 0x16e60, 0x16e7f; 0x1d41a, 0x1d433; 0x1d44e, 0x1d454; 0x1d456, 0x1d467; - 0x1d482, 0x1d49b; 0x1d4b6, 0x1d4b9; 0x1d4bb, 0x1d4bb; 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d4cf; - 0x1d4ea, 0x1d503; 0x1d51e, 0x1d537; 0x1d552, 0x1d56b; 0x1d586, 0x1d59f; 0x1d5ba, 0x1d5d3; - 0x1d5ee, 0x1d607; 0x1d622, 0x1d63b; 0x1d656, 0x1d66f; 0x1d68a, 0x1d6a5; 0x1d6c2, 0x1d6da; - 0x1d6dc, 0x1d6e1; 0x1d6fc, 0x1d714; 0x1d716, 0x1d71b; 0x1d736, 0x1d74e; 0x1d750, 0x1d755; - 0x1d770, 0x1d788; 0x1d78a, 0x1d78f; 0x1d7aa, 0x1d7c2; 0x1d7c4, 0x1d7c9; 0x1d7cb, 0x1d7cb; - 0x1df00, 0x1df09; 0x1df0b, 0x1df1e; 0x1df25, 0x1df2a; 0x1e030, 0x1e06d; 0x1e922, 0x1e943] + 0xa7c8, 0xa7c8; 0xa7ca, 0xa7ca; 0xa7cd, 0xa7cd; 0xa7cf, 0xa7cf; 0xa7d1, 0xa7d1; + 0xa7d3, 0xa7d3; 0xa7d5, 0xa7d5; 0xa7d7, 0xa7d7; 0xa7d9, 0xa7d9; 0xa7db, 0xa7db; + 0xa7f1, 0xa7f4; 0xa7f6, 0xa7f6; 0xa7f8, 0xa7fa; 0xab30, 0xab5a; 0xab5c, 0xab69; + 0xab70, 0xabbf; 0xfb00, 0xfb06; 0xfb13, 0xfb17; 0xff41, 0xff5a; 0x10428, 0x1044f; + 0x104d8, 0x104fb; 0x10597, 0x105a1; 0x105a3, 0x105b1; 0x105b3, 0x105b9; 0x105bb, 0x105bc; + 0x10780, 0x10780; 0x10783, 0x10785; 0x10787, 0x107b0; 0x107b2, 0x107ba; 0x10cc0, 0x10cf2; + 0x10d70, 0x10d85; 0x118c0, 0x118df; 0x16e60, 0x16e7f; 0x16ebb, 0x16ed3; 0x1d41a, 0x1d433; + 0x1d44e, 0x1d454; 0x1d456, 0x1d467; 0x1d482, 0x1d49b; 0x1d4b6, 0x1d4b9; 0x1d4bb, 0x1d4bb; + 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d4cf; 0x1d4ea, 0x1d503; 0x1d51e, 0x1d537; 0x1d552, 0x1d56b; + 0x1d586, 0x1d59f; 0x1d5ba, 0x1d5d3; 0x1d5ee, 0x1d607; 0x1d622, 0x1d63b; 0x1d656, 0x1d66f; + 0x1d68a, 0x1d6a5; 0x1d6c2, 0x1d6da; 0x1d6dc, 0x1d6e1; 0x1d6fc, 0x1d714; 0x1d716, 0x1d71b; + 0x1d736, 0x1d74e; 0x1d750, 0x1d755; 0x1d770, 0x1d788; 0x1d78a, 0x1d78f; 0x1d7aa, 0x1d7c2; + 0x1d7c4, 0x1d7c9; 0x1d7cb, 0x1d7cb; 0x1df00, 0x1df09; 0x1df0b, 0x1df1e; 0x1df25, 0x1df2a; + 0x1e030, 0x1e06d; 0x1e922, 0x1e943] let math = Sedlex_cset.of_list [0x2b, 0x2b; 0x3c, 0x3e; 0x5e, 0x5e; 0x7c, 0x7c; 0x7e, 0x7e; @@ -1555,18 +1566,19 @@ module Properties = struct 0x2642, 0x2642; 0x2660, 0x2663; 0x266d, 0x266f; 0x27c0, 0x27ff; 0x2900, 0x2aff; 0x2b30, 0x2b44; 0x2b47, 0x2b4c; 0xfb29, 0xfb29; 0xfe61, 0xfe66; 0xfe68, 0xfe68; 0xff0b, 0xff0b; 0xff1c, 0xff1e; 0xff3c, 0xff3c; 0xff3e, 0xff3e; 0xff5c, 0xff5c; - 0xff5e, 0xff5e; 0xffe2, 0xffe2; 0xffe9, 0xffec; 0x10d8e, 0x10d8f; 0x1d400, 0x1d454; - 0x1d456, 0x1d49c; 0x1d49e, 0x1d49f; 0x1d4a2, 0x1d4a2; 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; - 0x1d4ae, 0x1d4b9; 0x1d4bb, 0x1d4bb; 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d505; 0x1d507, 0x1d50a; - 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; 0x1d51e, 0x1d539; 0x1d53b, 0x1d53e; 0x1d540, 0x1d544; - 0x1d546, 0x1d546; 0x1d54a, 0x1d550; 0x1d552, 0x1d6a5; 0x1d6a8, 0x1d7cb; 0x1d7ce, 0x1d7ff; - 0x1ee00, 0x1ee03; 0x1ee05, 0x1ee1f; 0x1ee21, 0x1ee22; 0x1ee24, 0x1ee24; 0x1ee27, 0x1ee27; - 0x1ee29, 0x1ee32; 0x1ee34, 0x1ee37; 0x1ee39, 0x1ee39; 0x1ee3b, 0x1ee3b; 0x1ee42, 0x1ee42; - 0x1ee47, 0x1ee47; 0x1ee49, 0x1ee49; 0x1ee4b, 0x1ee4b; 0x1ee4d, 0x1ee4f; 0x1ee51, 0x1ee52; - 0x1ee54, 0x1ee54; 0x1ee57, 0x1ee57; 0x1ee59, 0x1ee59; 0x1ee5b, 0x1ee5b; 0x1ee5d, 0x1ee5d; - 0x1ee5f, 0x1ee5f; 0x1ee61, 0x1ee62; 0x1ee64, 0x1ee64; 0x1ee67, 0x1ee6a; 0x1ee6c, 0x1ee72; - 0x1ee74, 0x1ee77; 0x1ee79, 0x1ee7c; 0x1ee7e, 0x1ee7e; 0x1ee80, 0x1ee89; 0x1ee8b, 0x1ee9b; - 0x1eea1, 0x1eea3; 0x1eea5, 0x1eea9; 0x1eeab, 0x1eebb; 0x1eef0, 0x1eef1] + 0xff5e, 0xff5e; 0xffe2, 0xffe2; 0xffe9, 0xffec; 0x10d8e, 0x10d8f; 0x1cef0, 0x1cef0; + 0x1d400, 0x1d454; 0x1d456, 0x1d49c; 0x1d49e, 0x1d49f; 0x1d4a2, 0x1d4a2; 0x1d4a5, 0x1d4a6; + 0x1d4a9, 0x1d4ac; 0x1d4ae, 0x1d4b9; 0x1d4bb, 0x1d4bb; 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d505; + 0x1d507, 0x1d50a; 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; 0x1d51e, 0x1d539; 0x1d53b, 0x1d53e; + 0x1d540, 0x1d544; 0x1d546, 0x1d546; 0x1d54a, 0x1d550; 0x1d552, 0x1d6a5; 0x1d6a8, 0x1d7cb; + 0x1d7ce, 0x1d7ff; 0x1ee00, 0x1ee03; 0x1ee05, 0x1ee1f; 0x1ee21, 0x1ee22; 0x1ee24, 0x1ee24; + 0x1ee27, 0x1ee27; 0x1ee29, 0x1ee32; 0x1ee34, 0x1ee37; 0x1ee39, 0x1ee39; 0x1ee3b, 0x1ee3b; + 0x1ee42, 0x1ee42; 0x1ee47, 0x1ee47; 0x1ee49, 0x1ee49; 0x1ee4b, 0x1ee4b; 0x1ee4d, 0x1ee4f; + 0x1ee51, 0x1ee52; 0x1ee54, 0x1ee54; 0x1ee57, 0x1ee57; 0x1ee59, 0x1ee59; 0x1ee5b, 0x1ee5b; + 0x1ee5d, 0x1ee5d; 0x1ee5f, 0x1ee5f; 0x1ee61, 0x1ee62; 0x1ee64, 0x1ee64; 0x1ee67, 0x1ee6a; + 0x1ee6c, 0x1ee72; 0x1ee74, 0x1ee77; 0x1ee79, 0x1ee7c; 0x1ee7e, 0x1ee7e; 0x1ee80, 0x1ee89; + 0x1ee8b, 0x1ee9b; 0x1eea1, 0x1eea3; 0x1eea5, 0x1eea9; 0x1eeab, 0x1eebb; 0x1eef0, 0x1eef1; + 0x1f8d0, 0x1f8d8] let other_alphabetic = Sedlex_cset.of_list [0x345, 0x345; 0x363, 0x36f; 0x5b0, 0x5bd; 0x5bf, 0x5bf; 0x5c1, 0x5c2; @@ -1601,7 +1613,7 @@ module Properties = struct 0xaa43, 0xaa43; 0xaa4c, 0xaa4d; 0xaa7b, 0xaa7d; 0xaab0, 0xaab0; 0xaab2, 0xaab4; 0xaab7, 0xaab8; 0xaabe, 0xaabe; 0xaaeb, 0xaaef; 0xaaf5, 0xaaf5; 0xabe3, 0xabea; 0xfb1e, 0xfb1e; 0x10376, 0x1037a; 0x10a01, 0x10a03; 0x10a05, 0x10a06; 0x10a0c, 0x10a0f; - 0x10d24, 0x10d27; 0x10d69, 0x10d69; 0x10eab, 0x10eac; 0x10efc, 0x10efc; 0x11000, 0x11002; + 0x10d24, 0x10d27; 0x10d69, 0x10d69; 0x10eab, 0x10eac; 0x10efa, 0x10efc; 0x11000, 0x11002; 0x11038, 0x11045; 0x11073, 0x11074; 0x11080, 0x11082; 0x110b0, 0x110b8; 0x110c2, 0x110c2; 0x11100, 0x11102; 0x11127, 0x11132; 0x11145, 0x11146; 0x11180, 0x11182; 0x111b3, 0x111bf; 0x111ce, 0x111cf; 0x1122c, 0x11234; 0x11237, 0x11237; 0x1123e, 0x1123e; 0x11241, 0x11241; @@ -1612,19 +1624,20 @@ module Properties = struct 0x116ab, 0x116b5; 0x1171d, 0x1172a; 0x1182c, 0x11838; 0x11930, 0x11935; 0x11937, 0x11938; 0x1193b, 0x1193c; 0x11940, 0x11940; 0x11942, 0x11942; 0x119d1, 0x119d7; 0x119da, 0x119df; 0x119e4, 0x119e4; 0x11a01, 0x11a0a; 0x11a35, 0x11a39; 0x11a3b, 0x11a3e; 0x11a51, 0x11a5b; - 0x11a8a, 0x11a97; 0x11c2f, 0x11c36; 0x11c38, 0x11c3e; 0x11c92, 0x11ca7; 0x11ca9, 0x11cb6; - 0x11d31, 0x11d36; 0x11d3a, 0x11d3a; 0x11d3c, 0x11d3d; 0x11d3f, 0x11d41; 0x11d43, 0x11d43; - 0x11d47, 0x11d47; 0x11d8a, 0x11d8e; 0x11d90, 0x11d91; 0x11d93, 0x11d96; 0x11ef3, 0x11ef6; - 0x11f00, 0x11f01; 0x11f03, 0x11f03; 0x11f34, 0x11f3a; 0x11f3e, 0x11f40; 0x1611e, 0x1612e; - 0x16f4f, 0x16f4f; 0x16f51, 0x16f87; 0x16f8f, 0x16f92; 0x16ff0, 0x16ff1; 0x1bc9e, 0x1bc9e; - 0x1e000, 0x1e006; 0x1e008, 0x1e018; 0x1e01b, 0x1e021; 0x1e023, 0x1e024; 0x1e026, 0x1e02a; - 0x1e08f, 0x1e08f; 0x1e947, 0x1e947; 0x1f130, 0x1f149; 0x1f150, 0x1f169; 0x1f170, 0x1f189] + 0x11a8a, 0x11a97; 0x11b60, 0x11b67; 0x11c2f, 0x11c36; 0x11c38, 0x11c3e; 0x11c92, 0x11ca7; + 0x11ca9, 0x11cb6; 0x11d31, 0x11d36; 0x11d3a, 0x11d3a; 0x11d3c, 0x11d3d; 0x11d3f, 0x11d41; + 0x11d43, 0x11d43; 0x11d47, 0x11d47; 0x11d8a, 0x11d8e; 0x11d90, 0x11d91; 0x11d93, 0x11d96; + 0x11ef3, 0x11ef6; 0x11f00, 0x11f01; 0x11f03, 0x11f03; 0x11f34, 0x11f3a; 0x11f3e, 0x11f40; + 0x1611e, 0x1612e; 0x16f4f, 0x16f4f; 0x16f51, 0x16f87; 0x16f8f, 0x16f92; 0x16ff0, 0x16ff1; + 0x1bc9e, 0x1bc9e; 0x1e000, 0x1e006; 0x1e008, 0x1e018; 0x1e01b, 0x1e021; 0x1e023, 0x1e024; + 0x1e026, 0x1e02a; 0x1e08f, 0x1e08f; 0x1e6e3, 0x1e6e3; 0x1e6e6, 0x1e6e6; 0x1e6ee, 0x1e6ef; + 0x1e6f5, 0x1e6f5; 0x1e947, 0x1e947; 0x1f130, 0x1f149; 0x1f150, 0x1f169; 0x1f170, 0x1f189] let other_lowercase = Sedlex_cset.of_list [0xaa, 0xaa; 0xba, 0xba; 0x2b0, 0x2b8; 0x2c0, 0x2c1; 0x2e0, 0x2e4; 0x345, 0x345; 0x37a, 0x37a; 0x10fc, 0x10fc; 0x1d2c, 0x1d6a; 0x1d78, 0x1d78; 0x1d9b, 0x1dbf; 0x2071, 0x2071; 0x207f, 0x207f; 0x2090, 0x209c; 0x2170, 0x217f; - 0x24d0, 0x24e9; 0x2c7c, 0x2c7d; 0xa69c, 0xa69d; 0xa770, 0xa770; 0xa7f2, 0xa7f4; + 0x24d0, 0x24e9; 0x2c7c, 0x2c7d; 0xa69c, 0xa69d; 0xa770, 0xa770; 0xa7f1, 0xa7f4; 0xa7f8, 0xa7f9; 0xab5c, 0xab5f; 0xab69, 0xab69; 0x10780, 0x10780; 0x10783, 0x10785; 0x10787, 0x107b0; 0x107b2, 0x107ba; 0x1e030, 0x1e06d] @@ -1781,18 +1794,18 @@ module Properties = struct 0xa79c, 0xa79c; 0xa79e, 0xa79e; 0xa7a0, 0xa7a0; 0xa7a2, 0xa7a2; 0xa7a4, 0xa7a4; 0xa7a6, 0xa7a6; 0xa7a8, 0xa7a8; 0xa7aa, 0xa7ae; 0xa7b0, 0xa7b4; 0xa7b6, 0xa7b6; 0xa7b8, 0xa7b8; 0xa7ba, 0xa7ba; 0xa7bc, 0xa7bc; 0xa7be, 0xa7be; 0xa7c0, 0xa7c0; - 0xa7c2, 0xa7c2; 0xa7c4, 0xa7c7; 0xa7c9, 0xa7c9; 0xa7cb, 0xa7cc; 0xa7d0, 0xa7d0; - 0xa7d6, 0xa7d6; 0xa7d8, 0xa7d8; 0xa7da, 0xa7da; 0xa7dc, 0xa7dc; 0xa7f5, 0xa7f5; - 0xff21, 0xff3a; 0x10400, 0x10427; 0x104b0, 0x104d3; 0x10570, 0x1057a; 0x1057c, 0x1058a; - 0x1058c, 0x10592; 0x10594, 0x10595; 0x10c80, 0x10cb2; 0x10d50, 0x10d65; 0x118a0, 0x118bf; - 0x16e40, 0x16e5f; 0x1d400, 0x1d419; 0x1d434, 0x1d44d; 0x1d468, 0x1d481; 0x1d49c, 0x1d49c; - 0x1d49e, 0x1d49f; 0x1d4a2, 0x1d4a2; 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; 0x1d4ae, 0x1d4b5; - 0x1d4d0, 0x1d4e9; 0x1d504, 0x1d505; 0x1d507, 0x1d50a; 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; - 0x1d538, 0x1d539; 0x1d53b, 0x1d53e; 0x1d540, 0x1d544; 0x1d546, 0x1d546; 0x1d54a, 0x1d550; - 0x1d56c, 0x1d585; 0x1d5a0, 0x1d5b9; 0x1d5d4, 0x1d5ed; 0x1d608, 0x1d621; 0x1d63c, 0x1d655; - 0x1d670, 0x1d689; 0x1d6a8, 0x1d6c0; 0x1d6e2, 0x1d6fa; 0x1d71c, 0x1d734; 0x1d756, 0x1d76e; - 0x1d790, 0x1d7a8; 0x1d7ca, 0x1d7ca; 0x1e900, 0x1e921; 0x1f130, 0x1f149; 0x1f150, 0x1f169; - 0x1f170, 0x1f189] + 0xa7c2, 0xa7c2; 0xa7c4, 0xa7c7; 0xa7c9, 0xa7c9; 0xa7cb, 0xa7cc; 0xa7ce, 0xa7ce; + 0xa7d0, 0xa7d0; 0xa7d2, 0xa7d2; 0xa7d4, 0xa7d4; 0xa7d6, 0xa7d6; 0xa7d8, 0xa7d8; + 0xa7da, 0xa7da; 0xa7dc, 0xa7dc; 0xa7f5, 0xa7f5; 0xff21, 0xff3a; 0x10400, 0x10427; + 0x104b0, 0x104d3; 0x10570, 0x1057a; 0x1057c, 0x1058a; 0x1058c, 0x10592; 0x10594, 0x10595; + 0x10c80, 0x10cb2; 0x10d50, 0x10d65; 0x118a0, 0x118bf; 0x16e40, 0x16e5f; 0x16ea0, 0x16eb8; + 0x1d400, 0x1d419; 0x1d434, 0x1d44d; 0x1d468, 0x1d481; 0x1d49c, 0x1d49c; 0x1d49e, 0x1d49f; + 0x1d4a2, 0x1d4a2; 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; 0x1d4ae, 0x1d4b5; 0x1d4d0, 0x1d4e9; + 0x1d504, 0x1d505; 0x1d507, 0x1d50a; 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; 0x1d538, 0x1d539; + 0x1d53b, 0x1d53e; 0x1d540, 0x1d544; 0x1d546, 0x1d546; 0x1d54a, 0x1d550; 0x1d56c, 0x1d585; + 0x1d5a0, 0x1d5b9; 0x1d5d4, 0x1d5ed; 0x1d608, 0x1d621; 0x1d63c, 0x1d655; 0x1d670, 0x1d689; + 0x1d6a8, 0x1d6c0; 0x1d6e2, 0x1d6fa; 0x1d71c, 0x1d734; 0x1d756, 0x1d76e; 0x1d790, 0x1d7a8; + 0x1d7ca, 0x1d7ca; 0x1e900, 0x1e921; 0x1f130, 0x1f149; 0x1f150, 0x1f169; 0x1f170, 0x1f189] let white_space = Sedlex_cset.of_list [0x9, 0xd; 0x20, 0x20; 0x85, 0x85; 0xa0, 0xa0; 0x1680, 0x1680; @@ -1809,7 +1822,7 @@ module Properties = struct 0x5ef, 0x5f2; 0x610, 0x61a; 0x620, 0x669; 0x66e, 0x6d3; 0x6d5, 0x6dc; 0x6df, 0x6e8; 0x6ea, 0x6fc; 0x6ff, 0x6ff; 0x710, 0x74a; 0x74d, 0x7b1; 0x7c0, 0x7f5; 0x7fa, 0x7fa; 0x7fd, 0x7fd; 0x800, 0x82d; 0x840, 0x85b; - 0x860, 0x86a; 0x870, 0x887; 0x889, 0x88e; 0x897, 0x8e1; 0x8e3, 0x963; + 0x860, 0x86a; 0x870, 0x887; 0x889, 0x88f; 0x897, 0x8e1; 0x8e3, 0x963; 0x966, 0x96f; 0x971, 0x983; 0x985, 0x98c; 0x98f, 0x990; 0x993, 0x9a8; 0x9aa, 0x9b0; 0x9b2, 0x9b2; 0x9b6, 0x9b9; 0x9bc, 0x9c4; 0x9c7, 0x9c8; 0x9cb, 0x9ce; 0x9d7, 0x9d7; 0x9dc, 0x9dd; 0x9df, 0x9e3; 0x9e6, 0x9f1; @@ -1827,10 +1840,10 @@ module Properties = struct 0xba8, 0xbaa; 0xbae, 0xbb9; 0xbbe, 0xbc2; 0xbc6, 0xbc8; 0xbca, 0xbcd; 0xbd0, 0xbd0; 0xbd7, 0xbd7; 0xbe6, 0xbef; 0xc00, 0xc0c; 0xc0e, 0xc10; 0xc12, 0xc28; 0xc2a, 0xc39; 0xc3c, 0xc44; 0xc46, 0xc48; 0xc4a, 0xc4d; - 0xc55, 0xc56; 0xc58, 0xc5a; 0xc5d, 0xc5d; 0xc60, 0xc63; 0xc66, 0xc6f; + 0xc55, 0xc56; 0xc58, 0xc5a; 0xc5c, 0xc5d; 0xc60, 0xc63; 0xc66, 0xc6f; 0xc80, 0xc83; 0xc85, 0xc8c; 0xc8e, 0xc90; 0xc92, 0xca8; 0xcaa, 0xcb3; 0xcb5, 0xcb9; 0xcbc, 0xcc4; 0xcc6, 0xcc8; 0xcca, 0xccd; 0xcd5, 0xcd6; - 0xcdd, 0xcde; 0xce0, 0xce3; 0xce6, 0xcef; 0xcf1, 0xcf3; 0xd00, 0xd0c; + 0xcdc, 0xcde; 0xce0, 0xce3; 0xce6, 0xcef; 0xcf1, 0xcf3; 0xd00, 0xd0c; 0xd0e, 0xd10; 0xd12, 0xd44; 0xd46, 0xd48; 0xd4a, 0xd4e; 0xd54, 0xd57; 0xd5f, 0xd63; 0xd66, 0xd6f; 0xd7a, 0xd7f; 0xd81, 0xd83; 0xd85, 0xd96; 0xd9a, 0xdb1; 0xdb3, 0xdbb; 0xdbd, 0xdbd; 0xdc0, 0xdc6; 0xdca, 0xdca; @@ -1852,113 +1865,115 @@ module Properties = struct 0x1820, 0x1878; 0x1880, 0x18aa; 0x18b0, 0x18f5; 0x1900, 0x191e; 0x1920, 0x192b; 0x1930, 0x193b; 0x1946, 0x196d; 0x1970, 0x1974; 0x1980, 0x19ab; 0x19b0, 0x19c9; 0x19d0, 0x19da; 0x1a00, 0x1a1b; 0x1a20, 0x1a5e; 0x1a60, 0x1a7c; 0x1a7f, 0x1a89; - 0x1a90, 0x1a99; 0x1aa7, 0x1aa7; 0x1ab0, 0x1abd; 0x1abf, 0x1ace; 0x1b00, 0x1b4c; - 0x1b50, 0x1b59; 0x1b6b, 0x1b73; 0x1b80, 0x1bf3; 0x1c00, 0x1c37; 0x1c40, 0x1c49; - 0x1c4d, 0x1c7d; 0x1c80, 0x1c8a; 0x1c90, 0x1cba; 0x1cbd, 0x1cbf; 0x1cd0, 0x1cd2; - 0x1cd4, 0x1cfa; 0x1d00, 0x1f15; 0x1f18, 0x1f1d; 0x1f20, 0x1f45; 0x1f48, 0x1f4d; - 0x1f50, 0x1f57; 0x1f59, 0x1f59; 0x1f5b, 0x1f5b; 0x1f5d, 0x1f5d; 0x1f5f, 0x1f7d; - 0x1f80, 0x1fb4; 0x1fb6, 0x1fbc; 0x1fbe, 0x1fbe; 0x1fc2, 0x1fc4; 0x1fc6, 0x1fcc; - 0x1fd0, 0x1fd3; 0x1fd6, 0x1fdb; 0x1fe0, 0x1fec; 0x1ff2, 0x1ff4; 0x1ff6, 0x1ffc; - 0x200c, 0x200d; 0x203f, 0x2040; 0x2054, 0x2054; 0x2071, 0x2071; 0x207f, 0x207f; - 0x2090, 0x209c; 0x20d0, 0x20dc; 0x20e1, 0x20e1; 0x20e5, 0x20f0; 0x2102, 0x2102; - 0x2107, 0x2107; 0x210a, 0x2113; 0x2115, 0x2115; 0x2118, 0x211d; 0x2124, 0x2124; - 0x2126, 0x2126; 0x2128, 0x2128; 0x212a, 0x2139; 0x213c, 0x213f; 0x2145, 0x2149; - 0x214e, 0x214e; 0x2160, 0x2188; 0x2c00, 0x2ce4; 0x2ceb, 0x2cf3; 0x2d00, 0x2d25; - 0x2d27, 0x2d27; 0x2d2d, 0x2d2d; 0x2d30, 0x2d67; 0x2d6f, 0x2d6f; 0x2d7f, 0x2d96; - 0x2da0, 0x2da6; 0x2da8, 0x2dae; 0x2db0, 0x2db6; 0x2db8, 0x2dbe; 0x2dc0, 0x2dc6; - 0x2dc8, 0x2dce; 0x2dd0, 0x2dd6; 0x2dd8, 0x2dde; 0x2de0, 0x2dff; 0x3005, 0x3007; - 0x3021, 0x302f; 0x3031, 0x3035; 0x3038, 0x303c; 0x3041, 0x3096; 0x3099, 0x309a; - 0x309d, 0x309f; 0x30a1, 0x30ff; 0x3105, 0x312f; 0x3131, 0x318e; 0x31a0, 0x31bf; - 0x31f0, 0x31ff; 0x3400, 0x4dbf; 0x4e00, 0xa48c; 0xa4d0, 0xa4fd; 0xa500, 0xa60c; - 0xa610, 0xa62b; 0xa640, 0xa66f; 0xa674, 0xa67d; 0xa67f, 0xa6f1; 0xa717, 0xa71f; - 0xa722, 0xa788; 0xa78b, 0xa7cd; 0xa7d0, 0xa7d1; 0xa7d3, 0xa7d3; 0xa7d5, 0xa7dc; - 0xa7f2, 0xa827; 0xa82c, 0xa82c; 0xa840, 0xa873; 0xa880, 0xa8c5; 0xa8d0, 0xa8d9; - 0xa8e0, 0xa8f7; 0xa8fb, 0xa8fb; 0xa8fd, 0xa92d; 0xa930, 0xa953; 0xa960, 0xa97c; - 0xa980, 0xa9c0; 0xa9cf, 0xa9d9; 0xa9e0, 0xa9fe; 0xaa00, 0xaa36; 0xaa40, 0xaa4d; - 0xaa50, 0xaa59; 0xaa60, 0xaa76; 0xaa7a, 0xaac2; 0xaadb, 0xaadd; 0xaae0, 0xaaef; - 0xaaf2, 0xaaf6; 0xab01, 0xab06; 0xab09, 0xab0e; 0xab11, 0xab16; 0xab20, 0xab26; - 0xab28, 0xab2e; 0xab30, 0xab5a; 0xab5c, 0xab69; 0xab70, 0xabea; 0xabec, 0xabed; - 0xabf0, 0xabf9; 0xac00, 0xd7a3; 0xd7b0, 0xd7c6; 0xd7cb, 0xd7fb; 0xf900, 0xfa6d; - 0xfa70, 0xfad9; 0xfb00, 0xfb06; 0xfb13, 0xfb17; 0xfb1d, 0xfb28; 0xfb2a, 0xfb36; - 0xfb38, 0xfb3c; 0xfb3e, 0xfb3e; 0xfb40, 0xfb41; 0xfb43, 0xfb44; 0xfb46, 0xfbb1; - 0xfbd3, 0xfc5d; 0xfc64, 0xfd3d; 0xfd50, 0xfd8f; 0xfd92, 0xfdc7; 0xfdf0, 0xfdf9; - 0xfe00, 0xfe0f; 0xfe20, 0xfe2f; 0xfe33, 0xfe34; 0xfe4d, 0xfe4f; 0xfe71, 0xfe71; - 0xfe73, 0xfe73; 0xfe77, 0xfe77; 0xfe79, 0xfe79; 0xfe7b, 0xfe7b; 0xfe7d, 0xfe7d; - 0xfe7f, 0xfefc; 0xff10, 0xff19; 0xff21, 0xff3a; 0xff3f, 0xff3f; 0xff41, 0xff5a; - 0xff65, 0xffbe; 0xffc2, 0xffc7; 0xffca, 0xffcf; 0xffd2, 0xffd7; 0xffda, 0xffdc; - 0x10000, 0x1000b; 0x1000d, 0x10026; 0x10028, 0x1003a; 0x1003c, 0x1003d; 0x1003f, 0x1004d; - 0x10050, 0x1005d; 0x10080, 0x100fa; 0x10140, 0x10174; 0x101fd, 0x101fd; 0x10280, 0x1029c; - 0x102a0, 0x102d0; 0x102e0, 0x102e0; 0x10300, 0x1031f; 0x1032d, 0x1034a; 0x10350, 0x1037a; - 0x10380, 0x1039d; 0x103a0, 0x103c3; 0x103c8, 0x103cf; 0x103d1, 0x103d5; 0x10400, 0x1049d; - 0x104a0, 0x104a9; 0x104b0, 0x104d3; 0x104d8, 0x104fb; 0x10500, 0x10527; 0x10530, 0x10563; - 0x10570, 0x1057a; 0x1057c, 0x1058a; 0x1058c, 0x10592; 0x10594, 0x10595; 0x10597, 0x105a1; - 0x105a3, 0x105b1; 0x105b3, 0x105b9; 0x105bb, 0x105bc; 0x105c0, 0x105f3; 0x10600, 0x10736; - 0x10740, 0x10755; 0x10760, 0x10767; 0x10780, 0x10785; 0x10787, 0x107b0; 0x107b2, 0x107ba; - 0x10800, 0x10805; 0x10808, 0x10808; 0x1080a, 0x10835; 0x10837, 0x10838; 0x1083c, 0x1083c; - 0x1083f, 0x10855; 0x10860, 0x10876; 0x10880, 0x1089e; 0x108e0, 0x108f2; 0x108f4, 0x108f5; - 0x10900, 0x10915; 0x10920, 0x10939; 0x10980, 0x109b7; 0x109be, 0x109bf; 0x10a00, 0x10a03; - 0x10a05, 0x10a06; 0x10a0c, 0x10a13; 0x10a15, 0x10a17; 0x10a19, 0x10a35; 0x10a38, 0x10a3a; - 0x10a3f, 0x10a3f; 0x10a60, 0x10a7c; 0x10a80, 0x10a9c; 0x10ac0, 0x10ac7; 0x10ac9, 0x10ae6; - 0x10b00, 0x10b35; 0x10b40, 0x10b55; 0x10b60, 0x10b72; 0x10b80, 0x10b91; 0x10c00, 0x10c48; - 0x10c80, 0x10cb2; 0x10cc0, 0x10cf2; 0x10d00, 0x10d27; 0x10d30, 0x10d39; 0x10d40, 0x10d65; - 0x10d69, 0x10d6d; 0x10d6f, 0x10d85; 0x10e80, 0x10ea9; 0x10eab, 0x10eac; 0x10eb0, 0x10eb1; - 0x10ec2, 0x10ec4; 0x10efc, 0x10f1c; 0x10f27, 0x10f27; 0x10f30, 0x10f50; 0x10f70, 0x10f85; - 0x10fb0, 0x10fc4; 0x10fe0, 0x10ff6; 0x11000, 0x11046; 0x11066, 0x11075; 0x1107f, 0x110ba; - 0x110c2, 0x110c2; 0x110d0, 0x110e8; 0x110f0, 0x110f9; 0x11100, 0x11134; 0x11136, 0x1113f; - 0x11144, 0x11147; 0x11150, 0x11173; 0x11176, 0x11176; 0x11180, 0x111c4; 0x111c9, 0x111cc; - 0x111ce, 0x111da; 0x111dc, 0x111dc; 0x11200, 0x11211; 0x11213, 0x11237; 0x1123e, 0x11241; - 0x11280, 0x11286; 0x11288, 0x11288; 0x1128a, 0x1128d; 0x1128f, 0x1129d; 0x1129f, 0x112a8; - 0x112b0, 0x112ea; 0x112f0, 0x112f9; 0x11300, 0x11303; 0x11305, 0x1130c; 0x1130f, 0x11310; - 0x11313, 0x11328; 0x1132a, 0x11330; 0x11332, 0x11333; 0x11335, 0x11339; 0x1133b, 0x11344; - 0x11347, 0x11348; 0x1134b, 0x1134d; 0x11350, 0x11350; 0x11357, 0x11357; 0x1135d, 0x11363; - 0x11366, 0x1136c; 0x11370, 0x11374; 0x11380, 0x11389; 0x1138b, 0x1138b; 0x1138e, 0x1138e; - 0x11390, 0x113b5; 0x113b7, 0x113c0; 0x113c2, 0x113c2; 0x113c5, 0x113c5; 0x113c7, 0x113ca; - 0x113cc, 0x113d3; 0x113e1, 0x113e2; 0x11400, 0x1144a; 0x11450, 0x11459; 0x1145e, 0x11461; - 0x11480, 0x114c5; 0x114c7, 0x114c7; 0x114d0, 0x114d9; 0x11580, 0x115b5; 0x115b8, 0x115c0; - 0x115d8, 0x115dd; 0x11600, 0x11640; 0x11644, 0x11644; 0x11650, 0x11659; 0x11680, 0x116b8; - 0x116c0, 0x116c9; 0x116d0, 0x116e3; 0x11700, 0x1171a; 0x1171d, 0x1172b; 0x11730, 0x11739; - 0x11740, 0x11746; 0x11800, 0x1183a; 0x118a0, 0x118e9; 0x118ff, 0x11906; 0x11909, 0x11909; - 0x1190c, 0x11913; 0x11915, 0x11916; 0x11918, 0x11935; 0x11937, 0x11938; 0x1193b, 0x11943; - 0x11950, 0x11959; 0x119a0, 0x119a7; 0x119aa, 0x119d7; 0x119da, 0x119e1; 0x119e3, 0x119e4; - 0x11a00, 0x11a3e; 0x11a47, 0x11a47; 0x11a50, 0x11a99; 0x11a9d, 0x11a9d; 0x11ab0, 0x11af8; + 0x1a90, 0x1a99; 0x1aa7, 0x1aa7; 0x1ab0, 0x1abd; 0x1abf, 0x1add; 0x1ae0, 0x1aeb; + 0x1b00, 0x1b4c; 0x1b50, 0x1b59; 0x1b6b, 0x1b73; 0x1b80, 0x1bf3; 0x1c00, 0x1c37; + 0x1c40, 0x1c49; 0x1c4d, 0x1c7d; 0x1c80, 0x1c8a; 0x1c90, 0x1cba; 0x1cbd, 0x1cbf; + 0x1cd0, 0x1cd2; 0x1cd4, 0x1cfa; 0x1d00, 0x1f15; 0x1f18, 0x1f1d; 0x1f20, 0x1f45; + 0x1f48, 0x1f4d; 0x1f50, 0x1f57; 0x1f59, 0x1f59; 0x1f5b, 0x1f5b; 0x1f5d, 0x1f5d; + 0x1f5f, 0x1f7d; 0x1f80, 0x1fb4; 0x1fb6, 0x1fbc; 0x1fbe, 0x1fbe; 0x1fc2, 0x1fc4; + 0x1fc6, 0x1fcc; 0x1fd0, 0x1fd3; 0x1fd6, 0x1fdb; 0x1fe0, 0x1fec; 0x1ff2, 0x1ff4; + 0x1ff6, 0x1ffc; 0x200c, 0x200d; 0x203f, 0x2040; 0x2054, 0x2054; 0x2071, 0x2071; + 0x207f, 0x207f; 0x2090, 0x209c; 0x20d0, 0x20dc; 0x20e1, 0x20e1; 0x20e5, 0x20f0; + 0x2102, 0x2102; 0x2107, 0x2107; 0x210a, 0x2113; 0x2115, 0x2115; 0x2118, 0x211d; + 0x2124, 0x2124; 0x2126, 0x2126; 0x2128, 0x2128; 0x212a, 0x2139; 0x213c, 0x213f; + 0x2145, 0x2149; 0x214e, 0x214e; 0x2160, 0x2188; 0x2c00, 0x2ce4; 0x2ceb, 0x2cf3; + 0x2d00, 0x2d25; 0x2d27, 0x2d27; 0x2d2d, 0x2d2d; 0x2d30, 0x2d67; 0x2d6f, 0x2d6f; + 0x2d7f, 0x2d96; 0x2da0, 0x2da6; 0x2da8, 0x2dae; 0x2db0, 0x2db6; 0x2db8, 0x2dbe; + 0x2dc0, 0x2dc6; 0x2dc8, 0x2dce; 0x2dd0, 0x2dd6; 0x2dd8, 0x2dde; 0x2de0, 0x2dff; + 0x3005, 0x3007; 0x3021, 0x302f; 0x3031, 0x3035; 0x3038, 0x303c; 0x3041, 0x3096; + 0x3099, 0x309a; 0x309d, 0x309f; 0x30a1, 0x30ff; 0x3105, 0x312f; 0x3131, 0x318e; + 0x31a0, 0x31bf; 0x31f0, 0x31ff; 0x3400, 0x4dbf; 0x4e00, 0xa48c; 0xa4d0, 0xa4fd; + 0xa500, 0xa60c; 0xa610, 0xa62b; 0xa640, 0xa66f; 0xa674, 0xa67d; 0xa67f, 0xa6f1; + 0xa717, 0xa71f; 0xa722, 0xa788; 0xa78b, 0xa7dc; 0xa7f1, 0xa827; 0xa82c, 0xa82c; + 0xa840, 0xa873; 0xa880, 0xa8c5; 0xa8d0, 0xa8d9; 0xa8e0, 0xa8f7; 0xa8fb, 0xa8fb; + 0xa8fd, 0xa92d; 0xa930, 0xa953; 0xa960, 0xa97c; 0xa980, 0xa9c0; 0xa9cf, 0xa9d9; + 0xa9e0, 0xa9fe; 0xaa00, 0xaa36; 0xaa40, 0xaa4d; 0xaa50, 0xaa59; 0xaa60, 0xaa76; + 0xaa7a, 0xaac2; 0xaadb, 0xaadd; 0xaae0, 0xaaef; 0xaaf2, 0xaaf6; 0xab01, 0xab06; + 0xab09, 0xab0e; 0xab11, 0xab16; 0xab20, 0xab26; 0xab28, 0xab2e; 0xab30, 0xab5a; + 0xab5c, 0xab69; 0xab70, 0xabea; 0xabec, 0xabed; 0xabf0, 0xabf9; 0xac00, 0xd7a3; + 0xd7b0, 0xd7c6; 0xd7cb, 0xd7fb; 0xf900, 0xfa6d; 0xfa70, 0xfad9; 0xfb00, 0xfb06; + 0xfb13, 0xfb17; 0xfb1d, 0xfb28; 0xfb2a, 0xfb36; 0xfb38, 0xfb3c; 0xfb3e, 0xfb3e; + 0xfb40, 0xfb41; 0xfb43, 0xfb44; 0xfb46, 0xfbb1; 0xfbd3, 0xfc5d; 0xfc64, 0xfd3d; + 0xfd50, 0xfd8f; 0xfd92, 0xfdc7; 0xfdf0, 0xfdf9; 0xfe00, 0xfe0f; 0xfe20, 0xfe2f; + 0xfe33, 0xfe34; 0xfe4d, 0xfe4f; 0xfe71, 0xfe71; 0xfe73, 0xfe73; 0xfe77, 0xfe77; + 0xfe79, 0xfe79; 0xfe7b, 0xfe7b; 0xfe7d, 0xfe7d; 0xfe7f, 0xfefc; 0xff10, 0xff19; + 0xff21, 0xff3a; 0xff3f, 0xff3f; 0xff41, 0xff5a; 0xff65, 0xffbe; 0xffc2, 0xffc7; + 0xffca, 0xffcf; 0xffd2, 0xffd7; 0xffda, 0xffdc; 0x10000, 0x1000b; 0x1000d, 0x10026; + 0x10028, 0x1003a; 0x1003c, 0x1003d; 0x1003f, 0x1004d; 0x10050, 0x1005d; 0x10080, 0x100fa; + 0x10140, 0x10174; 0x101fd, 0x101fd; 0x10280, 0x1029c; 0x102a0, 0x102d0; 0x102e0, 0x102e0; + 0x10300, 0x1031f; 0x1032d, 0x1034a; 0x10350, 0x1037a; 0x10380, 0x1039d; 0x103a0, 0x103c3; + 0x103c8, 0x103cf; 0x103d1, 0x103d5; 0x10400, 0x1049d; 0x104a0, 0x104a9; 0x104b0, 0x104d3; + 0x104d8, 0x104fb; 0x10500, 0x10527; 0x10530, 0x10563; 0x10570, 0x1057a; 0x1057c, 0x1058a; + 0x1058c, 0x10592; 0x10594, 0x10595; 0x10597, 0x105a1; 0x105a3, 0x105b1; 0x105b3, 0x105b9; + 0x105bb, 0x105bc; 0x105c0, 0x105f3; 0x10600, 0x10736; 0x10740, 0x10755; 0x10760, 0x10767; + 0x10780, 0x10785; 0x10787, 0x107b0; 0x107b2, 0x107ba; 0x10800, 0x10805; 0x10808, 0x10808; + 0x1080a, 0x10835; 0x10837, 0x10838; 0x1083c, 0x1083c; 0x1083f, 0x10855; 0x10860, 0x10876; + 0x10880, 0x1089e; 0x108e0, 0x108f2; 0x108f4, 0x108f5; 0x10900, 0x10915; 0x10920, 0x10939; + 0x10940, 0x10959; 0x10980, 0x109b7; 0x109be, 0x109bf; 0x10a00, 0x10a03; 0x10a05, 0x10a06; + 0x10a0c, 0x10a13; 0x10a15, 0x10a17; 0x10a19, 0x10a35; 0x10a38, 0x10a3a; 0x10a3f, 0x10a3f; + 0x10a60, 0x10a7c; 0x10a80, 0x10a9c; 0x10ac0, 0x10ac7; 0x10ac9, 0x10ae6; 0x10b00, 0x10b35; + 0x10b40, 0x10b55; 0x10b60, 0x10b72; 0x10b80, 0x10b91; 0x10c00, 0x10c48; 0x10c80, 0x10cb2; + 0x10cc0, 0x10cf2; 0x10d00, 0x10d27; 0x10d30, 0x10d39; 0x10d40, 0x10d65; 0x10d69, 0x10d6d; + 0x10d6f, 0x10d85; 0x10e80, 0x10ea9; 0x10eab, 0x10eac; 0x10eb0, 0x10eb1; 0x10ec2, 0x10ec7; + 0x10efa, 0x10f1c; 0x10f27, 0x10f27; 0x10f30, 0x10f50; 0x10f70, 0x10f85; 0x10fb0, 0x10fc4; + 0x10fe0, 0x10ff6; 0x11000, 0x11046; 0x11066, 0x11075; 0x1107f, 0x110ba; 0x110c2, 0x110c2; + 0x110d0, 0x110e8; 0x110f0, 0x110f9; 0x11100, 0x11134; 0x11136, 0x1113f; 0x11144, 0x11147; + 0x11150, 0x11173; 0x11176, 0x11176; 0x11180, 0x111c4; 0x111c9, 0x111cc; 0x111ce, 0x111da; + 0x111dc, 0x111dc; 0x11200, 0x11211; 0x11213, 0x11237; 0x1123e, 0x11241; 0x11280, 0x11286; + 0x11288, 0x11288; 0x1128a, 0x1128d; 0x1128f, 0x1129d; 0x1129f, 0x112a8; 0x112b0, 0x112ea; + 0x112f0, 0x112f9; 0x11300, 0x11303; 0x11305, 0x1130c; 0x1130f, 0x11310; 0x11313, 0x11328; + 0x1132a, 0x11330; 0x11332, 0x11333; 0x11335, 0x11339; 0x1133b, 0x11344; 0x11347, 0x11348; + 0x1134b, 0x1134d; 0x11350, 0x11350; 0x11357, 0x11357; 0x1135d, 0x11363; 0x11366, 0x1136c; + 0x11370, 0x11374; 0x11380, 0x11389; 0x1138b, 0x1138b; 0x1138e, 0x1138e; 0x11390, 0x113b5; + 0x113b7, 0x113c0; 0x113c2, 0x113c2; 0x113c5, 0x113c5; 0x113c7, 0x113ca; 0x113cc, 0x113d3; + 0x113e1, 0x113e2; 0x11400, 0x1144a; 0x11450, 0x11459; 0x1145e, 0x11461; 0x11480, 0x114c5; + 0x114c7, 0x114c7; 0x114d0, 0x114d9; 0x11580, 0x115b5; 0x115b8, 0x115c0; 0x115d8, 0x115dd; + 0x11600, 0x11640; 0x11644, 0x11644; 0x11650, 0x11659; 0x11680, 0x116b8; 0x116c0, 0x116c9; + 0x116d0, 0x116e3; 0x11700, 0x1171a; 0x1171d, 0x1172b; 0x11730, 0x11739; 0x11740, 0x11746; + 0x11800, 0x1183a; 0x118a0, 0x118e9; 0x118ff, 0x11906; 0x11909, 0x11909; 0x1190c, 0x11913; + 0x11915, 0x11916; 0x11918, 0x11935; 0x11937, 0x11938; 0x1193b, 0x11943; 0x11950, 0x11959; + 0x119a0, 0x119a7; 0x119aa, 0x119d7; 0x119da, 0x119e1; 0x119e3, 0x119e4; 0x11a00, 0x11a3e; + 0x11a47, 0x11a47; 0x11a50, 0x11a99; 0x11a9d, 0x11a9d; 0x11ab0, 0x11af8; 0x11b60, 0x11b67; 0x11bc0, 0x11be0; 0x11bf0, 0x11bf9; 0x11c00, 0x11c08; 0x11c0a, 0x11c36; 0x11c38, 0x11c40; 0x11c50, 0x11c59; 0x11c72, 0x11c8f; 0x11c92, 0x11ca7; 0x11ca9, 0x11cb6; 0x11d00, 0x11d06; 0x11d08, 0x11d09; 0x11d0b, 0x11d36; 0x11d3a, 0x11d3a; 0x11d3c, 0x11d3d; 0x11d3f, 0x11d47; 0x11d50, 0x11d59; 0x11d60, 0x11d65; 0x11d67, 0x11d68; 0x11d6a, 0x11d8e; 0x11d90, 0x11d91; - 0x11d93, 0x11d98; 0x11da0, 0x11da9; 0x11ee0, 0x11ef6; 0x11f00, 0x11f10; 0x11f12, 0x11f3a; - 0x11f3e, 0x11f42; 0x11f50, 0x11f5a; 0x11fb0, 0x11fb0; 0x12000, 0x12399; 0x12400, 0x1246e; - 0x12480, 0x12543; 0x12f90, 0x12ff0; 0x13000, 0x1342f; 0x13440, 0x13455; 0x13460, 0x143fa; - 0x14400, 0x14646; 0x16100, 0x16139; 0x16800, 0x16a38; 0x16a40, 0x16a5e; 0x16a60, 0x16a69; - 0x16a70, 0x16abe; 0x16ac0, 0x16ac9; 0x16ad0, 0x16aed; 0x16af0, 0x16af4; 0x16b00, 0x16b36; - 0x16b40, 0x16b43; 0x16b50, 0x16b59; 0x16b63, 0x16b77; 0x16b7d, 0x16b8f; 0x16d40, 0x16d6c; - 0x16d70, 0x16d79; 0x16e40, 0x16e7f; 0x16f00, 0x16f4a; 0x16f4f, 0x16f87; 0x16f8f, 0x16f9f; - 0x16fe0, 0x16fe1; 0x16fe3, 0x16fe4; 0x16ff0, 0x16ff1; 0x17000, 0x187f7; 0x18800, 0x18cd5; - 0x18cff, 0x18d08; 0x1aff0, 0x1aff3; 0x1aff5, 0x1affb; 0x1affd, 0x1affe; 0x1b000, 0x1b122; - 0x1b132, 0x1b132; 0x1b150, 0x1b152; 0x1b155, 0x1b155; 0x1b164, 0x1b167; 0x1b170, 0x1b2fb; - 0x1bc00, 0x1bc6a; 0x1bc70, 0x1bc7c; 0x1bc80, 0x1bc88; 0x1bc90, 0x1bc99; 0x1bc9d, 0x1bc9e; - 0x1ccf0, 0x1ccf9; 0x1cf00, 0x1cf2d; 0x1cf30, 0x1cf46; 0x1d165, 0x1d169; 0x1d16d, 0x1d172; - 0x1d17b, 0x1d182; 0x1d185, 0x1d18b; 0x1d1aa, 0x1d1ad; 0x1d242, 0x1d244; 0x1d400, 0x1d454; - 0x1d456, 0x1d49c; 0x1d49e, 0x1d49f; 0x1d4a2, 0x1d4a2; 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; - 0x1d4ae, 0x1d4b9; 0x1d4bb, 0x1d4bb; 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d505; 0x1d507, 0x1d50a; - 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; 0x1d51e, 0x1d539; 0x1d53b, 0x1d53e; 0x1d540, 0x1d544; - 0x1d546, 0x1d546; 0x1d54a, 0x1d550; 0x1d552, 0x1d6a5; 0x1d6a8, 0x1d6c0; 0x1d6c2, 0x1d6da; - 0x1d6dc, 0x1d6fa; 0x1d6fc, 0x1d714; 0x1d716, 0x1d734; 0x1d736, 0x1d74e; 0x1d750, 0x1d76e; - 0x1d770, 0x1d788; 0x1d78a, 0x1d7a8; 0x1d7aa, 0x1d7c2; 0x1d7c4, 0x1d7cb; 0x1d7ce, 0x1d7ff; - 0x1da00, 0x1da36; 0x1da3b, 0x1da6c; 0x1da75, 0x1da75; 0x1da84, 0x1da84; 0x1da9b, 0x1da9f; - 0x1daa1, 0x1daaf; 0x1df00, 0x1df1e; 0x1df25, 0x1df2a; 0x1e000, 0x1e006; 0x1e008, 0x1e018; - 0x1e01b, 0x1e021; 0x1e023, 0x1e024; 0x1e026, 0x1e02a; 0x1e030, 0x1e06d; 0x1e08f, 0x1e08f; - 0x1e100, 0x1e12c; 0x1e130, 0x1e13d; 0x1e140, 0x1e149; 0x1e14e, 0x1e14e; 0x1e290, 0x1e2ae; - 0x1e2c0, 0x1e2f9; 0x1e4d0, 0x1e4f9; 0x1e5d0, 0x1e5fa; 0x1e7e0, 0x1e7e6; 0x1e7e8, 0x1e7eb; - 0x1e7ed, 0x1e7ee; 0x1e7f0, 0x1e7fe; 0x1e800, 0x1e8c4; 0x1e8d0, 0x1e8d6; 0x1e900, 0x1e94b; - 0x1e950, 0x1e959; 0x1ee00, 0x1ee03; 0x1ee05, 0x1ee1f; 0x1ee21, 0x1ee22; 0x1ee24, 0x1ee24; - 0x1ee27, 0x1ee27; 0x1ee29, 0x1ee32; 0x1ee34, 0x1ee37; 0x1ee39, 0x1ee39; 0x1ee3b, 0x1ee3b; - 0x1ee42, 0x1ee42; 0x1ee47, 0x1ee47; 0x1ee49, 0x1ee49; 0x1ee4b, 0x1ee4b; 0x1ee4d, 0x1ee4f; - 0x1ee51, 0x1ee52; 0x1ee54, 0x1ee54; 0x1ee57, 0x1ee57; 0x1ee59, 0x1ee59; 0x1ee5b, 0x1ee5b; - 0x1ee5d, 0x1ee5d; 0x1ee5f, 0x1ee5f; 0x1ee61, 0x1ee62; 0x1ee64, 0x1ee64; 0x1ee67, 0x1ee6a; - 0x1ee6c, 0x1ee72; 0x1ee74, 0x1ee77; 0x1ee79, 0x1ee7c; 0x1ee7e, 0x1ee7e; 0x1ee80, 0x1ee89; - 0x1ee8b, 0x1ee9b; 0x1eea1, 0x1eea3; 0x1eea5, 0x1eea9; 0x1eeab, 0x1eebb; 0x1fbf0, 0x1fbf9; - 0x20000, 0x2a6df; 0x2a700, 0x2b739; 0x2b740, 0x2b81d; 0x2b820, 0x2cea1; 0x2ceb0, 0x2ebe0; - 0x2ebf0, 0x2ee5d; 0x2f800, 0x2fa1d; 0x30000, 0x3134a; 0x31350, 0x323af; 0xe0100, 0xe01ef] + 0x11d93, 0x11d98; 0x11da0, 0x11da9; 0x11db0, 0x11ddb; 0x11de0, 0x11de9; 0x11ee0, 0x11ef6; + 0x11f00, 0x11f10; 0x11f12, 0x11f3a; 0x11f3e, 0x11f42; 0x11f50, 0x11f5a; 0x11fb0, 0x11fb0; + 0x12000, 0x12399; 0x12400, 0x1246e; 0x12480, 0x12543; 0x12f90, 0x12ff0; 0x13000, 0x1342f; + 0x13440, 0x13455; 0x13460, 0x143fa; 0x14400, 0x14646; 0x16100, 0x16139; 0x16800, 0x16a38; + 0x16a40, 0x16a5e; 0x16a60, 0x16a69; 0x16a70, 0x16abe; 0x16ac0, 0x16ac9; 0x16ad0, 0x16aed; + 0x16af0, 0x16af4; 0x16b00, 0x16b36; 0x16b40, 0x16b43; 0x16b50, 0x16b59; 0x16b63, 0x16b77; + 0x16b7d, 0x16b8f; 0x16d40, 0x16d6c; 0x16d70, 0x16d79; 0x16e40, 0x16e7f; 0x16ea0, 0x16eb8; + 0x16ebb, 0x16ed3; 0x16f00, 0x16f4a; 0x16f4f, 0x16f87; 0x16f8f, 0x16f9f; 0x16fe0, 0x16fe1; + 0x16fe3, 0x16fe4; 0x16ff0, 0x16ff6; 0x17000, 0x18cd5; 0x18cff, 0x18d1e; 0x18d80, 0x18df2; + 0x1aff0, 0x1aff3; 0x1aff5, 0x1affb; 0x1affd, 0x1affe; 0x1b000, 0x1b122; 0x1b132, 0x1b132; + 0x1b150, 0x1b152; 0x1b155, 0x1b155; 0x1b164, 0x1b167; 0x1b170, 0x1b2fb; 0x1bc00, 0x1bc6a; + 0x1bc70, 0x1bc7c; 0x1bc80, 0x1bc88; 0x1bc90, 0x1bc99; 0x1bc9d, 0x1bc9e; 0x1ccf0, 0x1ccf9; + 0x1cf00, 0x1cf2d; 0x1cf30, 0x1cf46; 0x1d165, 0x1d169; 0x1d16d, 0x1d172; 0x1d17b, 0x1d182; + 0x1d185, 0x1d18b; 0x1d1aa, 0x1d1ad; 0x1d242, 0x1d244; 0x1d400, 0x1d454; 0x1d456, 0x1d49c; + 0x1d49e, 0x1d49f; 0x1d4a2, 0x1d4a2; 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; 0x1d4ae, 0x1d4b9; + 0x1d4bb, 0x1d4bb; 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d505; 0x1d507, 0x1d50a; 0x1d50d, 0x1d514; + 0x1d516, 0x1d51c; 0x1d51e, 0x1d539; 0x1d53b, 0x1d53e; 0x1d540, 0x1d544; 0x1d546, 0x1d546; + 0x1d54a, 0x1d550; 0x1d552, 0x1d6a5; 0x1d6a8, 0x1d6c0; 0x1d6c2, 0x1d6da; 0x1d6dc, 0x1d6fa; + 0x1d6fc, 0x1d714; 0x1d716, 0x1d734; 0x1d736, 0x1d74e; 0x1d750, 0x1d76e; 0x1d770, 0x1d788; + 0x1d78a, 0x1d7a8; 0x1d7aa, 0x1d7c2; 0x1d7c4, 0x1d7cb; 0x1d7ce, 0x1d7ff; 0x1da00, 0x1da36; + 0x1da3b, 0x1da6c; 0x1da75, 0x1da75; 0x1da84, 0x1da84; 0x1da9b, 0x1da9f; 0x1daa1, 0x1daaf; + 0x1df00, 0x1df1e; 0x1df25, 0x1df2a; 0x1e000, 0x1e006; 0x1e008, 0x1e018; 0x1e01b, 0x1e021; + 0x1e023, 0x1e024; 0x1e026, 0x1e02a; 0x1e030, 0x1e06d; 0x1e08f, 0x1e08f; 0x1e100, 0x1e12c; + 0x1e130, 0x1e13d; 0x1e140, 0x1e149; 0x1e14e, 0x1e14e; 0x1e290, 0x1e2ae; 0x1e2c0, 0x1e2f9; + 0x1e4d0, 0x1e4f9; 0x1e5d0, 0x1e5fa; 0x1e6c0, 0x1e6de; 0x1e6e0, 0x1e6f5; 0x1e6fe, 0x1e6ff; + 0x1e7e0, 0x1e7e6; 0x1e7e8, 0x1e7eb; 0x1e7ed, 0x1e7ee; 0x1e7f0, 0x1e7fe; 0x1e800, 0x1e8c4; + 0x1e8d0, 0x1e8d6; 0x1e900, 0x1e94b; 0x1e950, 0x1e959; 0x1ee00, 0x1ee03; 0x1ee05, 0x1ee1f; + 0x1ee21, 0x1ee22; 0x1ee24, 0x1ee24; 0x1ee27, 0x1ee27; 0x1ee29, 0x1ee32; 0x1ee34, 0x1ee37; + 0x1ee39, 0x1ee39; 0x1ee3b, 0x1ee3b; 0x1ee42, 0x1ee42; 0x1ee47, 0x1ee47; 0x1ee49, 0x1ee49; + 0x1ee4b, 0x1ee4b; 0x1ee4d, 0x1ee4f; 0x1ee51, 0x1ee52; 0x1ee54, 0x1ee54; 0x1ee57, 0x1ee57; + 0x1ee59, 0x1ee59; 0x1ee5b, 0x1ee5b; 0x1ee5d, 0x1ee5d; 0x1ee5f, 0x1ee5f; 0x1ee61, 0x1ee62; + 0x1ee64, 0x1ee64; 0x1ee67, 0x1ee6a; 0x1ee6c, 0x1ee72; 0x1ee74, 0x1ee77; 0x1ee79, 0x1ee7c; + 0x1ee7e, 0x1ee7e; 0x1ee80, 0x1ee89; 0x1ee8b, 0x1ee9b; 0x1eea1, 0x1eea3; 0x1eea5, 0x1eea9; + 0x1eeab, 0x1eebb; 0x1fbf0, 0x1fbf9; 0x20000, 0x2a6df; 0x2a700, 0x2b81d; 0x2b820, 0x2cead; + 0x2ceb0, 0x2ebe0; 0x2ebf0, 0x2ee5d; 0x2f800, 0x2fa1d; 0x30000, 0x3134a; 0x31350, 0x33479; + 0xe0100, 0xe01ef] let xid_start = Sedlex_cset.of_list [0x41, 0x5a; 0x61, 0x7a; 0xaa, 0xaa; 0xb5, 0xb5; 0xba, 0xba; @@ -1971,7 +1986,7 @@ module Properties = struct 0x6ff, 0x6ff; 0x710, 0x710; 0x712, 0x72f; 0x74d, 0x7a5; 0x7b1, 0x7b1; 0x7ca, 0x7ea; 0x7f4, 0x7f5; 0x7fa, 0x7fa; 0x800, 0x815; 0x81a, 0x81a; 0x824, 0x824; 0x828, 0x828; 0x840, 0x858; 0x860, 0x86a; 0x870, 0x887; - 0x889, 0x88e; 0x8a0, 0x8c9; 0x904, 0x939; 0x93d, 0x93d; 0x950, 0x950; + 0x889, 0x88f; 0x8a0, 0x8c9; 0x904, 0x939; 0x93d, 0x93d; 0x950, 0x950; 0x958, 0x961; 0x971, 0x980; 0x985, 0x98c; 0x98f, 0x990; 0x993, 0x9a8; 0x9aa, 0x9b0; 0x9b2, 0x9b2; 0x9b6, 0x9b9; 0x9bd, 0x9bd; 0x9ce, 0x9ce; 0x9dc, 0x9dd; 0x9df, 0x9e1; 0x9f0, 0x9f1; 0x9fc, 0x9fc; 0xa05, 0xa0a; @@ -1984,9 +1999,9 @@ module Properties = struct 0xb85, 0xb8a; 0xb8e, 0xb90; 0xb92, 0xb95; 0xb99, 0xb9a; 0xb9c, 0xb9c; 0xb9e, 0xb9f; 0xba3, 0xba4; 0xba8, 0xbaa; 0xbae, 0xbb9; 0xbd0, 0xbd0; 0xc05, 0xc0c; 0xc0e, 0xc10; 0xc12, 0xc28; 0xc2a, 0xc39; 0xc3d, 0xc3d; - 0xc58, 0xc5a; 0xc5d, 0xc5d; 0xc60, 0xc61; 0xc80, 0xc80; 0xc85, 0xc8c; + 0xc58, 0xc5a; 0xc5c, 0xc5d; 0xc60, 0xc61; 0xc80, 0xc80; 0xc85, 0xc8c; 0xc8e, 0xc90; 0xc92, 0xca8; 0xcaa, 0xcb3; 0xcb5, 0xcb9; 0xcbd, 0xcbd; - 0xcdd, 0xcde; 0xce0, 0xce1; 0xcf1, 0xcf2; 0xd04, 0xd0c; 0xd0e, 0xd10; + 0xcdc, 0xcde; 0xce0, 0xce1; 0xcf1, 0xcf2; 0xd04, 0xd0c; 0xd0e, 0xd10; 0xd12, 0xd3a; 0xd3d, 0xd3d; 0xd4e, 0xd4e; 0xd54, 0xd56; 0xd5f, 0xd61; 0xd7a, 0xd7f; 0xd85, 0xd96; 0xd9a, 0xdb1; 0xdb3, 0xdbb; 0xdbd, 0xdbd; 0xdc0, 0xdc6; 0xe01, 0xe30; 0xe32, 0xe32; 0xe40, 0xe46; 0xe81, 0xe82; @@ -2022,82 +2037,84 @@ module Properties = struct 0x30a1, 0x30fa; 0x30fc, 0x30ff; 0x3105, 0x312f; 0x3131, 0x318e; 0x31a0, 0x31bf; 0x31f0, 0x31ff; 0x3400, 0x4dbf; 0x4e00, 0xa48c; 0xa4d0, 0xa4fd; 0xa500, 0xa60c; 0xa610, 0xa61f; 0xa62a, 0xa62b; 0xa640, 0xa66e; 0xa67f, 0xa69d; 0xa6a0, 0xa6ef; - 0xa717, 0xa71f; 0xa722, 0xa788; 0xa78b, 0xa7cd; 0xa7d0, 0xa7d1; 0xa7d3, 0xa7d3; - 0xa7d5, 0xa7dc; 0xa7f2, 0xa801; 0xa803, 0xa805; 0xa807, 0xa80a; 0xa80c, 0xa822; - 0xa840, 0xa873; 0xa882, 0xa8b3; 0xa8f2, 0xa8f7; 0xa8fb, 0xa8fb; 0xa8fd, 0xa8fe; - 0xa90a, 0xa925; 0xa930, 0xa946; 0xa960, 0xa97c; 0xa984, 0xa9b2; 0xa9cf, 0xa9cf; - 0xa9e0, 0xa9e4; 0xa9e6, 0xa9ef; 0xa9fa, 0xa9fe; 0xaa00, 0xaa28; 0xaa40, 0xaa42; - 0xaa44, 0xaa4b; 0xaa60, 0xaa76; 0xaa7a, 0xaa7a; 0xaa7e, 0xaaaf; 0xaab1, 0xaab1; - 0xaab5, 0xaab6; 0xaab9, 0xaabd; 0xaac0, 0xaac0; 0xaac2, 0xaac2; 0xaadb, 0xaadd; - 0xaae0, 0xaaea; 0xaaf2, 0xaaf4; 0xab01, 0xab06; 0xab09, 0xab0e; 0xab11, 0xab16; - 0xab20, 0xab26; 0xab28, 0xab2e; 0xab30, 0xab5a; 0xab5c, 0xab69; 0xab70, 0xabe2; - 0xac00, 0xd7a3; 0xd7b0, 0xd7c6; 0xd7cb, 0xd7fb; 0xf900, 0xfa6d; 0xfa70, 0xfad9; - 0xfb00, 0xfb06; 0xfb13, 0xfb17; 0xfb1d, 0xfb1d; 0xfb1f, 0xfb28; 0xfb2a, 0xfb36; - 0xfb38, 0xfb3c; 0xfb3e, 0xfb3e; 0xfb40, 0xfb41; 0xfb43, 0xfb44; 0xfb46, 0xfbb1; - 0xfbd3, 0xfc5d; 0xfc64, 0xfd3d; 0xfd50, 0xfd8f; 0xfd92, 0xfdc7; 0xfdf0, 0xfdf9; - 0xfe71, 0xfe71; 0xfe73, 0xfe73; 0xfe77, 0xfe77; 0xfe79, 0xfe79; 0xfe7b, 0xfe7b; - 0xfe7d, 0xfe7d; 0xfe7f, 0xfefc; 0xff21, 0xff3a; 0xff41, 0xff5a; 0xff66, 0xff9d; - 0xffa0, 0xffbe; 0xffc2, 0xffc7; 0xffca, 0xffcf; 0xffd2, 0xffd7; 0xffda, 0xffdc; - 0x10000, 0x1000b; 0x1000d, 0x10026; 0x10028, 0x1003a; 0x1003c, 0x1003d; 0x1003f, 0x1004d; - 0x10050, 0x1005d; 0x10080, 0x100fa; 0x10140, 0x10174; 0x10280, 0x1029c; 0x102a0, 0x102d0; - 0x10300, 0x1031f; 0x1032d, 0x1034a; 0x10350, 0x10375; 0x10380, 0x1039d; 0x103a0, 0x103c3; - 0x103c8, 0x103cf; 0x103d1, 0x103d5; 0x10400, 0x1049d; 0x104b0, 0x104d3; 0x104d8, 0x104fb; - 0x10500, 0x10527; 0x10530, 0x10563; 0x10570, 0x1057a; 0x1057c, 0x1058a; 0x1058c, 0x10592; - 0x10594, 0x10595; 0x10597, 0x105a1; 0x105a3, 0x105b1; 0x105b3, 0x105b9; 0x105bb, 0x105bc; - 0x105c0, 0x105f3; 0x10600, 0x10736; 0x10740, 0x10755; 0x10760, 0x10767; 0x10780, 0x10785; - 0x10787, 0x107b0; 0x107b2, 0x107ba; 0x10800, 0x10805; 0x10808, 0x10808; 0x1080a, 0x10835; - 0x10837, 0x10838; 0x1083c, 0x1083c; 0x1083f, 0x10855; 0x10860, 0x10876; 0x10880, 0x1089e; - 0x108e0, 0x108f2; 0x108f4, 0x108f5; 0x10900, 0x10915; 0x10920, 0x10939; 0x10980, 0x109b7; - 0x109be, 0x109bf; 0x10a00, 0x10a00; 0x10a10, 0x10a13; 0x10a15, 0x10a17; 0x10a19, 0x10a35; - 0x10a60, 0x10a7c; 0x10a80, 0x10a9c; 0x10ac0, 0x10ac7; 0x10ac9, 0x10ae4; 0x10b00, 0x10b35; - 0x10b40, 0x10b55; 0x10b60, 0x10b72; 0x10b80, 0x10b91; 0x10c00, 0x10c48; 0x10c80, 0x10cb2; - 0x10cc0, 0x10cf2; 0x10d00, 0x10d23; 0x10d4a, 0x10d65; 0x10d6f, 0x10d85; 0x10e80, 0x10ea9; - 0x10eb0, 0x10eb1; 0x10ec2, 0x10ec4; 0x10f00, 0x10f1c; 0x10f27, 0x10f27; 0x10f30, 0x10f45; - 0x10f70, 0x10f81; 0x10fb0, 0x10fc4; 0x10fe0, 0x10ff6; 0x11003, 0x11037; 0x11071, 0x11072; - 0x11075, 0x11075; 0x11083, 0x110af; 0x110d0, 0x110e8; 0x11103, 0x11126; 0x11144, 0x11144; - 0x11147, 0x11147; 0x11150, 0x11172; 0x11176, 0x11176; 0x11183, 0x111b2; 0x111c1, 0x111c4; - 0x111da, 0x111da; 0x111dc, 0x111dc; 0x11200, 0x11211; 0x11213, 0x1122b; 0x1123f, 0x11240; - 0x11280, 0x11286; 0x11288, 0x11288; 0x1128a, 0x1128d; 0x1128f, 0x1129d; 0x1129f, 0x112a8; - 0x112b0, 0x112de; 0x11305, 0x1130c; 0x1130f, 0x11310; 0x11313, 0x11328; 0x1132a, 0x11330; - 0x11332, 0x11333; 0x11335, 0x11339; 0x1133d, 0x1133d; 0x11350, 0x11350; 0x1135d, 0x11361; - 0x11380, 0x11389; 0x1138b, 0x1138b; 0x1138e, 0x1138e; 0x11390, 0x113b5; 0x113b7, 0x113b7; - 0x113d1, 0x113d1; 0x113d3, 0x113d3; 0x11400, 0x11434; 0x11447, 0x1144a; 0x1145f, 0x11461; - 0x11480, 0x114af; 0x114c4, 0x114c5; 0x114c7, 0x114c7; 0x11580, 0x115ae; 0x115d8, 0x115db; - 0x11600, 0x1162f; 0x11644, 0x11644; 0x11680, 0x116aa; 0x116b8, 0x116b8; 0x11700, 0x1171a; - 0x11740, 0x11746; 0x11800, 0x1182b; 0x118a0, 0x118df; 0x118ff, 0x11906; 0x11909, 0x11909; - 0x1190c, 0x11913; 0x11915, 0x11916; 0x11918, 0x1192f; 0x1193f, 0x1193f; 0x11941, 0x11941; - 0x119a0, 0x119a7; 0x119aa, 0x119d0; 0x119e1, 0x119e1; 0x119e3, 0x119e3; 0x11a00, 0x11a00; - 0x11a0b, 0x11a32; 0x11a3a, 0x11a3a; 0x11a50, 0x11a50; 0x11a5c, 0x11a89; 0x11a9d, 0x11a9d; - 0x11ab0, 0x11af8; 0x11bc0, 0x11be0; 0x11c00, 0x11c08; 0x11c0a, 0x11c2e; 0x11c40, 0x11c40; - 0x11c72, 0x11c8f; 0x11d00, 0x11d06; 0x11d08, 0x11d09; 0x11d0b, 0x11d30; 0x11d46, 0x11d46; - 0x11d60, 0x11d65; 0x11d67, 0x11d68; 0x11d6a, 0x11d89; 0x11d98, 0x11d98; 0x11ee0, 0x11ef2; - 0x11f02, 0x11f02; 0x11f04, 0x11f10; 0x11f12, 0x11f33; 0x11fb0, 0x11fb0; 0x12000, 0x12399; - 0x12400, 0x1246e; 0x12480, 0x12543; 0x12f90, 0x12ff0; 0x13000, 0x1342f; 0x13441, 0x13446; - 0x13460, 0x143fa; 0x14400, 0x14646; 0x16100, 0x1611d; 0x16800, 0x16a38; 0x16a40, 0x16a5e; - 0x16a70, 0x16abe; 0x16ad0, 0x16aed; 0x16b00, 0x16b2f; 0x16b40, 0x16b43; 0x16b63, 0x16b77; - 0x16b7d, 0x16b8f; 0x16d40, 0x16d6c; 0x16e40, 0x16e7f; 0x16f00, 0x16f4a; 0x16f50, 0x16f50; - 0x16f93, 0x16f9f; 0x16fe0, 0x16fe1; 0x16fe3, 0x16fe3; 0x17000, 0x187f7; 0x18800, 0x18cd5; - 0x18cff, 0x18d08; 0x1aff0, 0x1aff3; 0x1aff5, 0x1affb; 0x1affd, 0x1affe; 0x1b000, 0x1b122; - 0x1b132, 0x1b132; 0x1b150, 0x1b152; 0x1b155, 0x1b155; 0x1b164, 0x1b167; 0x1b170, 0x1b2fb; - 0x1bc00, 0x1bc6a; 0x1bc70, 0x1bc7c; 0x1bc80, 0x1bc88; 0x1bc90, 0x1bc99; 0x1d400, 0x1d454; - 0x1d456, 0x1d49c; 0x1d49e, 0x1d49f; 0x1d4a2, 0x1d4a2; 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; - 0x1d4ae, 0x1d4b9; 0x1d4bb, 0x1d4bb; 0x1d4bd, 0x1d4c3; 0x1d4c5, 0x1d505; 0x1d507, 0x1d50a; - 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; 0x1d51e, 0x1d539; 0x1d53b, 0x1d53e; 0x1d540, 0x1d544; - 0x1d546, 0x1d546; 0x1d54a, 0x1d550; 0x1d552, 0x1d6a5; 0x1d6a8, 0x1d6c0; 0x1d6c2, 0x1d6da; - 0x1d6dc, 0x1d6fa; 0x1d6fc, 0x1d714; 0x1d716, 0x1d734; 0x1d736, 0x1d74e; 0x1d750, 0x1d76e; - 0x1d770, 0x1d788; 0x1d78a, 0x1d7a8; 0x1d7aa, 0x1d7c2; 0x1d7c4, 0x1d7cb; 0x1df00, 0x1df1e; - 0x1df25, 0x1df2a; 0x1e030, 0x1e06d; 0x1e100, 0x1e12c; 0x1e137, 0x1e13d; 0x1e14e, 0x1e14e; - 0x1e290, 0x1e2ad; 0x1e2c0, 0x1e2eb; 0x1e4d0, 0x1e4eb; 0x1e5d0, 0x1e5ed; 0x1e5f0, 0x1e5f0; - 0x1e7e0, 0x1e7e6; 0x1e7e8, 0x1e7eb; 0x1e7ed, 0x1e7ee; 0x1e7f0, 0x1e7fe; 0x1e800, 0x1e8c4; - 0x1e900, 0x1e943; 0x1e94b, 0x1e94b; 0x1ee00, 0x1ee03; 0x1ee05, 0x1ee1f; 0x1ee21, 0x1ee22; - 0x1ee24, 0x1ee24; 0x1ee27, 0x1ee27; 0x1ee29, 0x1ee32; 0x1ee34, 0x1ee37; 0x1ee39, 0x1ee39; - 0x1ee3b, 0x1ee3b; 0x1ee42, 0x1ee42; 0x1ee47, 0x1ee47; 0x1ee49, 0x1ee49; 0x1ee4b, 0x1ee4b; - 0x1ee4d, 0x1ee4f; 0x1ee51, 0x1ee52; 0x1ee54, 0x1ee54; 0x1ee57, 0x1ee57; 0x1ee59, 0x1ee59; - 0x1ee5b, 0x1ee5b; 0x1ee5d, 0x1ee5d; 0x1ee5f, 0x1ee5f; 0x1ee61, 0x1ee62; 0x1ee64, 0x1ee64; - 0x1ee67, 0x1ee6a; 0x1ee6c, 0x1ee72; 0x1ee74, 0x1ee77; 0x1ee79, 0x1ee7c; 0x1ee7e, 0x1ee7e; - 0x1ee80, 0x1ee89; 0x1ee8b, 0x1ee9b; 0x1eea1, 0x1eea3; 0x1eea5, 0x1eea9; 0x1eeab, 0x1eebb; - 0x20000, 0x2a6df; 0x2a700, 0x2b739; 0x2b740, 0x2b81d; 0x2b820, 0x2cea1; 0x2ceb0, 0x2ebe0; - 0x2ebf0, 0x2ee5d; 0x2f800, 0x2fa1d; 0x30000, 0x3134a; 0x31350, 0x323af] + 0xa717, 0xa71f; 0xa722, 0xa788; 0xa78b, 0xa7dc; 0xa7f1, 0xa801; 0xa803, 0xa805; + 0xa807, 0xa80a; 0xa80c, 0xa822; 0xa840, 0xa873; 0xa882, 0xa8b3; 0xa8f2, 0xa8f7; + 0xa8fb, 0xa8fb; 0xa8fd, 0xa8fe; 0xa90a, 0xa925; 0xa930, 0xa946; 0xa960, 0xa97c; + 0xa984, 0xa9b2; 0xa9cf, 0xa9cf; 0xa9e0, 0xa9e4; 0xa9e6, 0xa9ef; 0xa9fa, 0xa9fe; + 0xaa00, 0xaa28; 0xaa40, 0xaa42; 0xaa44, 0xaa4b; 0xaa60, 0xaa76; 0xaa7a, 0xaa7a; + 0xaa7e, 0xaaaf; 0xaab1, 0xaab1; 0xaab5, 0xaab6; 0xaab9, 0xaabd; 0xaac0, 0xaac0; + 0xaac2, 0xaac2; 0xaadb, 0xaadd; 0xaae0, 0xaaea; 0xaaf2, 0xaaf4; 0xab01, 0xab06; + 0xab09, 0xab0e; 0xab11, 0xab16; 0xab20, 0xab26; 0xab28, 0xab2e; 0xab30, 0xab5a; + 0xab5c, 0xab69; 0xab70, 0xabe2; 0xac00, 0xd7a3; 0xd7b0, 0xd7c6; 0xd7cb, 0xd7fb; + 0xf900, 0xfa6d; 0xfa70, 0xfad9; 0xfb00, 0xfb06; 0xfb13, 0xfb17; 0xfb1d, 0xfb1d; + 0xfb1f, 0xfb28; 0xfb2a, 0xfb36; 0xfb38, 0xfb3c; 0xfb3e, 0xfb3e; 0xfb40, 0xfb41; + 0xfb43, 0xfb44; 0xfb46, 0xfbb1; 0xfbd3, 0xfc5d; 0xfc64, 0xfd3d; 0xfd50, 0xfd8f; + 0xfd92, 0xfdc7; 0xfdf0, 0xfdf9; 0xfe71, 0xfe71; 0xfe73, 0xfe73; 0xfe77, 0xfe77; + 0xfe79, 0xfe79; 0xfe7b, 0xfe7b; 0xfe7d, 0xfe7d; 0xfe7f, 0xfefc; 0xff21, 0xff3a; + 0xff41, 0xff5a; 0xff66, 0xff9d; 0xffa0, 0xffbe; 0xffc2, 0xffc7; 0xffca, 0xffcf; + 0xffd2, 0xffd7; 0xffda, 0xffdc; 0x10000, 0x1000b; 0x1000d, 0x10026; 0x10028, 0x1003a; + 0x1003c, 0x1003d; 0x1003f, 0x1004d; 0x10050, 0x1005d; 0x10080, 0x100fa; 0x10140, 0x10174; + 0x10280, 0x1029c; 0x102a0, 0x102d0; 0x10300, 0x1031f; 0x1032d, 0x1034a; 0x10350, 0x10375; + 0x10380, 0x1039d; 0x103a0, 0x103c3; 0x103c8, 0x103cf; 0x103d1, 0x103d5; 0x10400, 0x1049d; + 0x104b0, 0x104d3; 0x104d8, 0x104fb; 0x10500, 0x10527; 0x10530, 0x10563; 0x10570, 0x1057a; + 0x1057c, 0x1058a; 0x1058c, 0x10592; 0x10594, 0x10595; 0x10597, 0x105a1; 0x105a3, 0x105b1; + 0x105b3, 0x105b9; 0x105bb, 0x105bc; 0x105c0, 0x105f3; 0x10600, 0x10736; 0x10740, 0x10755; + 0x10760, 0x10767; 0x10780, 0x10785; 0x10787, 0x107b0; 0x107b2, 0x107ba; 0x10800, 0x10805; + 0x10808, 0x10808; 0x1080a, 0x10835; 0x10837, 0x10838; 0x1083c, 0x1083c; 0x1083f, 0x10855; + 0x10860, 0x10876; 0x10880, 0x1089e; 0x108e0, 0x108f2; 0x108f4, 0x108f5; 0x10900, 0x10915; + 0x10920, 0x10939; 0x10940, 0x10959; 0x10980, 0x109b7; 0x109be, 0x109bf; 0x10a00, 0x10a00; + 0x10a10, 0x10a13; 0x10a15, 0x10a17; 0x10a19, 0x10a35; 0x10a60, 0x10a7c; 0x10a80, 0x10a9c; + 0x10ac0, 0x10ac7; 0x10ac9, 0x10ae4; 0x10b00, 0x10b35; 0x10b40, 0x10b55; 0x10b60, 0x10b72; + 0x10b80, 0x10b91; 0x10c00, 0x10c48; 0x10c80, 0x10cb2; 0x10cc0, 0x10cf2; 0x10d00, 0x10d23; + 0x10d4a, 0x10d65; 0x10d6f, 0x10d85; 0x10e80, 0x10ea9; 0x10eb0, 0x10eb1; 0x10ec2, 0x10ec7; + 0x10f00, 0x10f1c; 0x10f27, 0x10f27; 0x10f30, 0x10f45; 0x10f70, 0x10f81; 0x10fb0, 0x10fc4; + 0x10fe0, 0x10ff6; 0x11003, 0x11037; 0x11071, 0x11072; 0x11075, 0x11075; 0x11083, 0x110af; + 0x110d0, 0x110e8; 0x11103, 0x11126; 0x11144, 0x11144; 0x11147, 0x11147; 0x11150, 0x11172; + 0x11176, 0x11176; 0x11183, 0x111b2; 0x111c1, 0x111c4; 0x111da, 0x111da; 0x111dc, 0x111dc; + 0x11200, 0x11211; 0x11213, 0x1122b; 0x1123f, 0x11240; 0x11280, 0x11286; 0x11288, 0x11288; + 0x1128a, 0x1128d; 0x1128f, 0x1129d; 0x1129f, 0x112a8; 0x112b0, 0x112de; 0x11305, 0x1130c; + 0x1130f, 0x11310; 0x11313, 0x11328; 0x1132a, 0x11330; 0x11332, 0x11333; 0x11335, 0x11339; + 0x1133d, 0x1133d; 0x11350, 0x11350; 0x1135d, 0x11361; 0x11380, 0x11389; 0x1138b, 0x1138b; + 0x1138e, 0x1138e; 0x11390, 0x113b5; 0x113b7, 0x113b7; 0x113d1, 0x113d1; 0x113d3, 0x113d3; + 0x11400, 0x11434; 0x11447, 0x1144a; 0x1145f, 0x11461; 0x11480, 0x114af; 0x114c4, 0x114c5; + 0x114c7, 0x114c7; 0x11580, 0x115ae; 0x115d8, 0x115db; 0x11600, 0x1162f; 0x11644, 0x11644; + 0x11680, 0x116aa; 0x116b8, 0x116b8; 0x11700, 0x1171a; 0x11740, 0x11746; 0x11800, 0x1182b; + 0x118a0, 0x118df; 0x118ff, 0x11906; 0x11909, 0x11909; 0x1190c, 0x11913; 0x11915, 0x11916; + 0x11918, 0x1192f; 0x1193f, 0x1193f; 0x11941, 0x11941; 0x119a0, 0x119a7; 0x119aa, 0x119d0; + 0x119e1, 0x119e1; 0x119e3, 0x119e3; 0x11a00, 0x11a00; 0x11a0b, 0x11a32; 0x11a3a, 0x11a3a; + 0x11a50, 0x11a50; 0x11a5c, 0x11a89; 0x11a9d, 0x11a9d; 0x11ab0, 0x11af8; 0x11bc0, 0x11be0; + 0x11c00, 0x11c08; 0x11c0a, 0x11c2e; 0x11c40, 0x11c40; 0x11c72, 0x11c8f; 0x11d00, 0x11d06; + 0x11d08, 0x11d09; 0x11d0b, 0x11d30; 0x11d46, 0x11d46; 0x11d60, 0x11d65; 0x11d67, 0x11d68; + 0x11d6a, 0x11d89; 0x11d98, 0x11d98; 0x11db0, 0x11ddb; 0x11ee0, 0x11ef2; 0x11f02, 0x11f02; + 0x11f04, 0x11f10; 0x11f12, 0x11f33; 0x11fb0, 0x11fb0; 0x12000, 0x12399; 0x12400, 0x1246e; + 0x12480, 0x12543; 0x12f90, 0x12ff0; 0x13000, 0x1342f; 0x13441, 0x13446; 0x13460, 0x143fa; + 0x14400, 0x14646; 0x16100, 0x1611d; 0x16800, 0x16a38; 0x16a40, 0x16a5e; 0x16a70, 0x16abe; + 0x16ad0, 0x16aed; 0x16b00, 0x16b2f; 0x16b40, 0x16b43; 0x16b63, 0x16b77; 0x16b7d, 0x16b8f; + 0x16d40, 0x16d6c; 0x16e40, 0x16e7f; 0x16ea0, 0x16eb8; 0x16ebb, 0x16ed3; 0x16f00, 0x16f4a; + 0x16f50, 0x16f50; 0x16f93, 0x16f9f; 0x16fe0, 0x16fe1; 0x16fe3, 0x16fe3; 0x16ff2, 0x16ff6; + 0x17000, 0x18cd5; 0x18cff, 0x18d1e; 0x18d80, 0x18df2; 0x1aff0, 0x1aff3; 0x1aff5, 0x1affb; + 0x1affd, 0x1affe; 0x1b000, 0x1b122; 0x1b132, 0x1b132; 0x1b150, 0x1b152; 0x1b155, 0x1b155; + 0x1b164, 0x1b167; 0x1b170, 0x1b2fb; 0x1bc00, 0x1bc6a; 0x1bc70, 0x1bc7c; 0x1bc80, 0x1bc88; + 0x1bc90, 0x1bc99; 0x1d400, 0x1d454; 0x1d456, 0x1d49c; 0x1d49e, 0x1d49f; 0x1d4a2, 0x1d4a2; + 0x1d4a5, 0x1d4a6; 0x1d4a9, 0x1d4ac; 0x1d4ae, 0x1d4b9; 0x1d4bb, 0x1d4bb; 0x1d4bd, 0x1d4c3; + 0x1d4c5, 0x1d505; 0x1d507, 0x1d50a; 0x1d50d, 0x1d514; 0x1d516, 0x1d51c; 0x1d51e, 0x1d539; + 0x1d53b, 0x1d53e; 0x1d540, 0x1d544; 0x1d546, 0x1d546; 0x1d54a, 0x1d550; 0x1d552, 0x1d6a5; + 0x1d6a8, 0x1d6c0; 0x1d6c2, 0x1d6da; 0x1d6dc, 0x1d6fa; 0x1d6fc, 0x1d714; 0x1d716, 0x1d734; + 0x1d736, 0x1d74e; 0x1d750, 0x1d76e; 0x1d770, 0x1d788; 0x1d78a, 0x1d7a8; 0x1d7aa, 0x1d7c2; + 0x1d7c4, 0x1d7cb; 0x1df00, 0x1df1e; 0x1df25, 0x1df2a; 0x1e030, 0x1e06d; 0x1e100, 0x1e12c; + 0x1e137, 0x1e13d; 0x1e14e, 0x1e14e; 0x1e290, 0x1e2ad; 0x1e2c0, 0x1e2eb; 0x1e4d0, 0x1e4eb; + 0x1e5d0, 0x1e5ed; 0x1e5f0, 0x1e5f0; 0x1e6c0, 0x1e6de; 0x1e6e0, 0x1e6e2; 0x1e6e4, 0x1e6e5; + 0x1e6e7, 0x1e6ed; 0x1e6f0, 0x1e6f4; 0x1e6fe, 0x1e6ff; 0x1e7e0, 0x1e7e6; 0x1e7e8, 0x1e7eb; + 0x1e7ed, 0x1e7ee; 0x1e7f0, 0x1e7fe; 0x1e800, 0x1e8c4; 0x1e900, 0x1e943; 0x1e94b, 0x1e94b; + 0x1ee00, 0x1ee03; 0x1ee05, 0x1ee1f; 0x1ee21, 0x1ee22; 0x1ee24, 0x1ee24; 0x1ee27, 0x1ee27; + 0x1ee29, 0x1ee32; 0x1ee34, 0x1ee37; 0x1ee39, 0x1ee39; 0x1ee3b, 0x1ee3b; 0x1ee42, 0x1ee42; + 0x1ee47, 0x1ee47; 0x1ee49, 0x1ee49; 0x1ee4b, 0x1ee4b; 0x1ee4d, 0x1ee4f; 0x1ee51, 0x1ee52; + 0x1ee54, 0x1ee54; 0x1ee57, 0x1ee57; 0x1ee59, 0x1ee59; 0x1ee5b, 0x1ee5b; 0x1ee5d, 0x1ee5d; + 0x1ee5f, 0x1ee5f; 0x1ee61, 0x1ee62; 0x1ee64, 0x1ee64; 0x1ee67, 0x1ee6a; 0x1ee6c, 0x1ee72; + 0x1ee74, 0x1ee77; 0x1ee79, 0x1ee7c; 0x1ee7e, 0x1ee7e; 0x1ee80, 0x1ee89; 0x1ee8b, 0x1ee9b; + 0x1eea1, 0x1eea3; 0x1eea5, 0x1eea9; 0x1eeab, 0x1eebb; 0x20000, 0x2a6df; 0x2a700, 0x2b81d; + 0x2b820, 0x2cead; 0x2ceb0, 0x2ebe0; 0x2ebf0, 0x2ee5d; 0x2f800, 0x2fa1d; 0x30000, 0x3134a; + 0x31350, 0x33479] let list = [ ("alphabetic", alphabetic); From 589f26b0cc6c64d22953aac61a7103b4ef36cb5f Mon Sep 17 00:00:00 2001 From: Romain Beauxis Date: Mon, 6 Oct 2025 16:25:12 -0500 Subject: [PATCH 03/24] Release v3.7! --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index ff42aec..a3adf36 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,4 +1,4 @@ -# 3.7 (unreleased) +# 3.7 (2025-10-06) - Update to unicode 17.0.0 # 3.6 (2025-01-05) From bb8ee4afcd3c838bc4cb58d55016ddab8eef9418 Mon Sep 17 00:00:00 2001 From: Romain Beauxis Date: Mon, 24 Nov 2025 09:02:38 +0100 Subject: [PATCH 04/24] Add x-maintenance-intent: ["(latest)"] Fixes: #173 --- sedlex.opam => opam/sedlex.opam | 0 sedlex.opam.template => opam/sedlex.opam.template | 1 + 2 files changed, 1 insertion(+) rename sedlex.opam => opam/sedlex.opam (100%) rename sedlex.opam.template => opam/sedlex.opam.template (62%) diff --git a/sedlex.opam b/opam/sedlex.opam similarity index 100% rename from sedlex.opam rename to opam/sedlex.opam diff --git a/sedlex.opam.template b/opam/sedlex.opam.template similarity index 62% rename from sedlex.opam.template rename to opam/sedlex.opam.template index 5538031..d195fd0 100644 --- a/sedlex.opam.template +++ b/opam/sedlex.opam.template @@ -1 +1,2 @@ doc: "https://ocaml-community.github.io/sedlex/index.html" +x-maintenance-intent: ["(latest)"] From c04ad99789168df0bfe3ddf6f6937ea3aac58ef5 Mon Sep 17 00:00:00 2001 From: Romain Beauxis Date: Mon, 24 Nov 2025 09:04:07 +0100 Subject: [PATCH 05/24] Also more/regenerate main opam file. --- opam/sedlex.opam | 1 - 1 file changed, 1 deletion(-) diff --git a/opam/sedlex.opam b/opam/sedlex.opam index 8386103..34515f1 100644 --- a/opam/sedlex.opam +++ b/opam/sedlex.opam @@ -38,4 +38,3 @@ build: [ ] ] dev-repo: "git+https://github.com/ocaml-community/sedlex.git" -doc: "https://ocaml-community.github.io/sedlex/index.html" From 97f74183e4118c1f568800ed6f2612316f43ad4a Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Sun, 8 Feb 2026 00:36:24 +0100 Subject: [PATCH 06/24] Bump ocamlformat --- .ocamlformat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ocamlformat b/.ocamlformat index 97acec8..45c6e9f 100644 --- a/.ocamlformat +++ b/.ocamlformat @@ -1,4 +1,4 @@ -version=0.27.0 +version=0.28.1 profile = conventional break-separators = after space-around-lists = false From ca3d4e18c64cb4b45f9062ed2094a5a58a058b6e Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Sat, 7 Feb 2026 23:43:31 +0100 Subject: [PATCH 07/24] Tests: add a %sedlex_test ppx to help write tests --- src/syntax/ppx_sedlex.ml | 68 ++++++++++++++++++-------------- test/ppx_test/dune | 6 +++ test/ppx_test/ppx_sedlex_test.ml | 32 +++++++++++++++ 3 files changed, 77 insertions(+), 29 deletions(-) create mode 100644 test/ppx_test/dune create mode 100644 test/ppx_test/ppx_sedlex_test.ml diff --git a/src/syntax/ppx_sedlex.ml b/src/syntax/ppx_sedlex.ml index 53badf1..e2f077c 100644 --- a/src/syntax/ppx_sedlex.ml +++ b/src/syntax/ppx_sedlex.ml @@ -467,6 +467,43 @@ let regexp_of_pattern env = in aux ~encoding:Ascii +let handle_sedlex_match ~env ~map_rhs match_expr = + let lexbuf = + match match_expr with + | { pexp_desc = Pexp_match (lexbuf, _) } -> ( + match lexbuf with + | { pexp_desc = Pexp_ident { txt = Lident txt } } -> (txt, lexbuf) + | _ -> + err lexbuf.pexp_loc + "the matched expression must be a single identifier") + | _ -> + err match_expr.pexp_loc + "the %%sedlex extension is only recognized on match expressions" + in + let cases = + match match_expr with + | { pexp_desc = Pexp_match (_, cases) } -> cases + | _ -> assert false + in + let cases = List.rev cases in + let error = + match List.hd cases with + | { pc_lhs = [%pat? _]; pc_rhs = e; pc_guard = None } -> map_rhs e + | { pc_lhs = p } -> + err p.ppat_loc "the last branch must be a catch-all error case" + in + let cases = List.rev (List.tl cases) in + let cases = + List.map + (function + | { pc_lhs = p; pc_rhs = e; pc_guard = None } -> + (regexp_of_pattern env p, map_rhs e) + | { pc_guard = Some e } -> + err e.pexp_loc "'when' guards are not supported") + cases + in + gen_definition lexbuf cases error + let previous = ref [] let regexps = ref [] let should_set_cookies = ref false @@ -481,35 +518,8 @@ let mapper = method! expression e = match e with - | [%expr [%sedlex [%e? { pexp_desc = Pexp_match (lexbuf, cases) }]]] -> - let lexbuf = - match lexbuf with - | { pexp_desc = Pexp_ident { txt = Lident txt } } -> - (txt, lexbuf) - | _ -> - err lexbuf.pexp_loc - "the matched expression must be a single identifier" - in - let cases = List.rev cases in - let error = - match List.hd cases with - | { pc_lhs = [%pat? _]; pc_rhs = e; pc_guard = None } -> - this#expression e - | { pc_lhs = p } -> - err p.ppat_loc - "the last branch must be a catch-all error case" - in - let cases = List.rev (List.tl cases) in - let cases = - List.map - (function - | { pc_lhs = p; pc_rhs = e; pc_guard = None } -> - (regexp_of_pattern env p, this#expression e) - | { pc_guard = Some e } -> - err e.pexp_loc "'when' guards are not supported") - cases - in - gen_definition lexbuf cases error + | [%expr [%sedlex [%e? { pexp_desc = Pexp_match _ } as match_expr]]] -> + handle_sedlex_match ~env ~map_rhs:this#expression match_expr | [%expr let [%p? { ppat_desc = Ppat_var { txt = name } }] = [%sedlex.regexp? [%p? p]] diff --git a/test/ppx_test/dune b/test/ppx_test/dune new file mode 100644 index 0000000..56eb19e --- /dev/null +++ b/test/ppx_test/dune @@ -0,0 +1,6 @@ +(library + (name ppx_sedlex_test) + (kind ppx_rewriter) + (libraries ppxlib sedlex_ppx) + (preprocess + (pps ppxlib.metaquot))) diff --git a/test/ppx_test/ppx_sedlex_test.ml b/test/ppx_test/ppx_sedlex_test.ml new file mode 100644 index 0000000..b80d1ce --- /dev/null +++ b/test/ppx_test/ppx_sedlex_test.ml @@ -0,0 +1,32 @@ +open Ppxlib +module P = Sedlex_ppx.Ppx_sedlex + +let reset_state () = + P.partition_counter := 0; + P.table_counter := 0; + Hashtbl.clear P.partitions; + Hashtbl.clear P.tables + +let clear_tables () = + Hashtbl.clear P.partitions; + Hashtbl.clear P.tables + +let expand ~ctxt:_ expr = + reset_state (); + let loc = Location.none in + let code_expr = + P.handle_sedlex_match ~env:P.builtin_regexps ~map_rhs:Fun.id expr + in + let code_str = Pprintast.string_of_expression code_expr in + clear_tables (); + [%expr + print_string "CODE:\n"; + print_string [%e Ast_builder.Default.estring ~loc code_str]; + print_newline ()] + +let ext = + Extension.V3.declare "sedlex_test" Extension.Context.expression + Ast_pattern.(single_expr_payload __) + expand + +let () = Driver.register_transformation "sedlex_test" ~extensions:[ext] From fb6c9400921074b14efdef815ad2e58c41c9e23d Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Sat, 7 Feb 2026 23:57:45 +0100 Subject: [PATCH 08/24] Tests: add codegen tests --- test/codegen/dune | 8 +++++ test/codegen/test_gen.ml | 69 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 test/codegen/dune create mode 100644 test/codegen/test_gen.ml diff --git a/test/codegen/dune b/test/codegen/dune new file mode 100644 index 0000000..d7e0101 --- /dev/null +++ b/test/codegen/dune @@ -0,0 +1,8 @@ +(library + (name sedlex_gen_test) + (libraries sedlex) + (inline_tests) + (enabled_if + (>= %{ocaml_version} 4.14)) + (preprocess + (pps ppx_sedlex_test ppx_expect))) diff --git a/test/codegen/test_gen.ml b/test/codegen/test_gen.ml new file mode 100644 index 0000000..84a2dc3 --- /dev/null +++ b/test/codegen/test_gen.ml @@ -0,0 +1,69 @@ +let%expect_test "simple string match" = + (match%sedlex_test buf with "ab" | "de" -> () | _ -> ()); + [%expect + {| + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> __sedlex_state_1 buf + | 1 -> __sedlex_state_3 buf + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> 0 + | _ -> Sedlexing.backtrack buf + and __sedlex_state_3 buf = + match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with + | 0 -> 0 + | _ -> Sedlexing.backtrack buf in + Sedlexing.start buf; (match __sedlex_state_0 buf with | 0 -> () | _ -> ()) + |}] + +let%expect_test "character class" = + (match%sedlex_test buf with Plus 'a' .. 'z' -> () | _ -> ()); + [%expect + {| + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> __sedlex_state_1 buf + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + Sedlexing.mark buf 0; + (match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> __sedlex_state_1 buf + | _ -> Sedlexing.backtrack buf) in + Sedlexing.start buf; (match __sedlex_state_0 buf with | 0 -> () | _ -> ()) + |}] + +let%expect_test "multi-rule" = + (match%sedlex_test buf with + | "ab" -> () + | "de" -> () + | Plus '0' .. '9' -> () + | _ -> ()); + [%expect + {| + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> __sedlex_state_1 buf + | 1 -> __sedlex_state_2 buf + | 2 -> __sedlex_state_4 buf + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + Sedlexing.mark buf 2; + (match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> __sedlex_state_1 buf + | _ -> Sedlexing.backtrack buf) + and __sedlex_state_2 buf = + match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with + | 0 -> 0 + | _ -> Sedlexing.backtrack buf + and __sedlex_state_4 buf = + match __sedlex_partition_4 (Sedlexing.__private__next_int buf) with + | 0 -> 1 + | _ -> Sedlexing.backtrack buf in + Sedlexing.start buf; + (match __sedlex_state_0 buf with | 0 -> () | 1 -> () | 2 -> () | _ -> ()) + |}] From 8820ed8ae1d7ce7ed71d3d8663b5c9546f161bbe Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Sun, 8 Feb 2026 00:18:37 +0100 Subject: [PATCH 09/24] Tests: output automata using dot syntax for easier review --- src/syntax/ppx_sedlex.ml | 15 ++++----- src/syntax/sedlex.ml | 53 ++++++++++++++++++++++++++++++++ src/syntax/sedlex.mli | 1 + test/codegen/test_gen.ml | 51 ++++++++++++++++++++++++++++++ test/ppx_test/ppx_sedlex_test.ml | 6 +++- 5 files changed, 116 insertions(+), 10 deletions(-) diff --git a/src/syntax/ppx_sedlex.ml b/src/syntax/ppx_sedlex.ml index e2f077c..90e30a2 100644 --- a/src/syntax/ppx_sedlex.ml +++ b/src/syntax/ppx_sedlex.ml @@ -259,15 +259,10 @@ let gen_recflag auto = Nonrecursive with Exit -> Recursive -let gen_definition ((_, lexbuf) as lexbuf_with_name) l error = +let gen_definition ((_, lexbuf) as lexbuf_with_name) auto l error = let loc = default_loc in - let brs = Array.of_list l in - let auto = Sedlex.compile (Array.map fst brs) in let cases = - Array.to_list - (Array.mapi - (fun i (_, e) -> case ~lhs:(pint ~loc i) ~guard:None ~rhs:e) - brs) + List.mapi (fun i (_, e) -> case ~lhs:(pint ~loc i) ~guard:None ~rhs:e) l in let states = Array.mapi (gen_state lexbuf_with_name auto) auto in let states = List.flatten (Array.to_list states) in @@ -502,7 +497,9 @@ let handle_sedlex_match ~env ~map_rhs match_expr = err e.pexp_loc "'when' guards are not supported") cases in - gen_definition lexbuf cases error + let brs = Array.of_list cases in + let auto = Sedlex.compile (Array.map fst brs) in + (gen_definition lexbuf auto cases error, auto) let previous = ref [] let regexps = ref [] @@ -519,7 +516,7 @@ let mapper = method! expression e = match e with | [%expr [%sedlex [%e? { pexp_desc = Pexp_match _ } as match_expr]]] -> - handle_sedlex_match ~env ~map_rhs:this#expression match_expr + fst (handle_sedlex_match ~env ~map_rhs:this#expression match_expr) | [%expr let [%p? { ppat_desc = Ppat_var { txt = name } }] = [%sedlex.regexp? [%p? p]] diff --git a/src/syntax/sedlex.ml b/src/syntax/sedlex.ml index 9a5f53e..383ebcc 100644 --- a/src/syntax/sedlex.ml +++ b/src/syntax/sedlex.ml @@ -139,3 +139,56 @@ let compile rs = let i = aux !init in assert (i = 0); Array.init !counter (Hashtbl.find states_def) + +let cset_to_label cset = + let escape_dot c = + match c with + | '"' -> "\\\"" + | '\\' -> "\\\\" + | '<' -> "\\<" + | '>' -> "\\>" + | _ -> String.make 1 c + in + let format_interval (lo, hi) = + if lo = -1 && hi = -1 then "EOF" + else if lo = hi then + if lo >= 32 && lo <= 126 then "'" ^ escape_dot (Char.chr lo) ^ "'" + else Printf.sprintf "U+%04X" lo + else if lo >= 32 && lo <= 126 && hi >= 32 && hi <= 126 then + "'" ^ escape_dot (Char.chr lo) ^ "'-'" ^ escape_dot (Char.chr hi) ^ "'" + else Printf.sprintf "U+%04X-U+%04X" lo hi + in + String.concat ", " + (List.map format_interval (cset : Cset.t :> (int * int) list)) + +let dfa_to_dot dfa = + let buf = Buffer.create 1024 in + let bprintf = Printf.bprintf in + bprintf buf "digraph {\n"; + bprintf buf " rankdir=LR;\n"; + bprintf buf " node [shape=circle];\n\n"; + bprintf buf " _start [shape=point];\n"; + bprintf buf " _start -> state0;\n\n"; + Array.iteri + (fun i (trans, finals) -> + let accepted = + let acc = ref [] in + for r = Array.length finals - 1 downto 0 do + if finals.(r) then acc := r :: !acc + done; + !acc + in + (match accepted with + | [] -> bprintf buf " state%d [label=\"%d\"];\n" i i + | rules -> + bprintf buf + " state%d [label=\"%d\\n[rule %s]\", shape=doublecircle];\n" i i + (String.concat "," (List.map string_of_int rules))); + Array.iter + (fun (cset, target) -> + let label = cset_to_label cset in + bprintf buf " state%d -> state%d [label=\"%s\"];\n" i target label) + trans) + dfa; + bprintf buf "}\n"; + Buffer.contents buf diff --git a/src/syntax/sedlex.mli b/src/syntax/sedlex.mli index d809010..04f2720 100644 --- a/src/syntax/sedlex.mli +++ b/src/syntax/sedlex.mli @@ -23,3 +23,4 @@ val intersection : regexp -> regexp -> regexp option which matches the intersection set. Otherwise returns [None]. *) val compile : regexp array -> ((Sedlex_cset.t * int) array * bool array) array +val dfa_to_dot : ((Sedlex_cset.t * int) array * bool array) array -> string diff --git a/test/codegen/test_gen.ml b/test/codegen/test_gen.ml index 84a2dc3..1166a81 100644 --- a/test/codegen/test_gen.ml +++ b/test/codegen/test_gen.ml @@ -2,6 +2,23 @@ let%expect_test "simple string match" = (match%sedlex_test buf with "ab" | "de" -> () | _ -> ()); [%expect {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'a'"]; + state0 -> state3 [label="'d'"]; + state1 [label="1"]; + state1 -> state2 [label="'b'"]; + state2 [label="2\n[rule 0]", shape=doublecircle]; + state3 [label="3"]; + state3 -> state2 [label="'e'"]; + } CODE: let rec __sedlex_state_0 buf = match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with @@ -23,6 +40,19 @@ let%expect_test "character class" = (match%sedlex_test buf with Plus 'a' .. 'z' -> () | _ -> ()); [%expect {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'a'-'z'"]; + state1 [label="1\n[rule 0]", shape=doublecircle]; + state1 -> state1 [label="'a'-'z'"]; + } CODE: let rec __sedlex_state_0 buf = match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with @@ -44,6 +74,27 @@ let%expect_test "multi-rule" = | _ -> ()); [%expect {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'0'-'9'"]; + state0 -> state2 [label="'a'"]; + state0 -> state4 [label="'d'"]; + state1 [label="1\n[rule 2]", shape=doublecircle]; + state1 -> state1 [label="'0'-'9'"]; + state2 [label="2"]; + state2 -> state3 [label="'b'"]; + state3 [label="3\n[rule 0]", shape=doublecircle]; + state4 [label="4"]; + state4 -> state5 [label="'e'"]; + state5 [label="5\n[rule 1]", shape=doublecircle]; + } CODE: let rec __sedlex_state_0 buf = match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with diff --git a/test/ppx_test/ppx_sedlex_test.ml b/test/ppx_test/ppx_sedlex_test.ml index b80d1ce..1e1fb2f 100644 --- a/test/ppx_test/ppx_sedlex_test.ml +++ b/test/ppx_test/ppx_sedlex_test.ml @@ -1,5 +1,6 @@ open Ppxlib module P = Sedlex_ppx.Ppx_sedlex +module S = Sedlex_ppx.Sedlex let reset_state () = P.partition_counter := 0; @@ -14,12 +15,15 @@ let clear_tables () = let expand ~ctxt:_ expr = reset_state (); let loc = Location.none in - let code_expr = + let code_expr, auto = P.handle_sedlex_match ~env:P.builtin_regexps ~map_rhs:Fun.id expr in let code_str = Pprintast.string_of_expression code_expr in + let dot_str = S.dfa_to_dot auto in clear_tables (); [%expr + print_string "DOT:\n"; + print_string [%e Ast_builder.Default.estring ~loc dot_str]; print_string "CODE:\n"; print_string [%e Ast_builder.Default.estring ~loc code_str]; print_newline ()] From 950655a2213c01acb99ece1e42c7ec198167d37c Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Sun, 8 Feb 2026 00:29:31 +0100 Subject: [PATCH 10/24] Refactoring, new type for dfa_state and dfa --- src/syntax/ppx_sedlex.ml | 15 +++++++-------- src/syntax/sedlex.ml | 7 +++++-- src/syntax/sedlex.mli | 7 +++++-- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/syntax/ppx_sedlex.ml b/src/syntax/ppx_sedlex.ml index 90e30a2..c7c782f 100644 --- a/src/syntax/ppx_sedlex.ml +++ b/src/syntax/ppx_sedlex.ml @@ -198,14 +198,14 @@ let best_final final = let state_fun state = Printf.sprintf "__sedlex_state_%i" state let call_state lexbuf auto state = - let trans, final = auto.(state) in + let { Sedlex.trans; finals } = auto.(state) in if Array.length trans = 0 then ( - match best_final final with + match best_final finals with | Some i -> eint ~loc:default_loc i | None -> assert false) else appfun (state_fun state) [lexbuf] -let gen_state (lexbuf_name, lexbuf) auto i (trans, final) = +let gen_state (lexbuf_name, lexbuf) auto i { Sedlex.trans; finals } = let loc = default_loc in let partition = Array.map fst trans in let cases = @@ -235,7 +235,7 @@ let gen_state (lexbuf_name, lexbuf) auto i (trans, final) = ~expr:(Exp.fun_ ~loc Nolabel None lhs body); ] in - match best_final final with + match best_final finals with | None -> ret (body ()) | Some _ when Array.length trans = 0 -> [] | Some i -> @@ -249,12 +249,11 @@ let gen_recflag auto = in states with no further transitions. *) try Array.iter - (fun (trans_i, _) -> + (fun { Sedlex.trans } -> Array.iter (fun (_, j) -> - let trans_j, _ = auto.(j) in - if Array.length trans_j > 0 then raise Exit) - trans_i) + if Array.length auto.(j).Sedlex.trans > 0 then raise Exit) + trans) auto; Nonrecursive with Exit -> Recursive diff --git a/src/syntax/sedlex.ml b/src/syntax/sedlex.ml index 383ebcc..4a01a1a 100644 --- a/src/syntax/sedlex.ml +++ b/src/syntax/sedlex.ml @@ -117,6 +117,9 @@ let transition (state : state) = Array.sort (fun (c1, _) (c2, _) -> compare c1 c2) t; t +type dfa_state = { trans : (Cset.t * int) array; finals : bool array } +type dfa = dfa_state array + let compile rs = let rs = Array.map compile_re rs in let counter = ref 0 in @@ -131,7 +134,7 @@ let compile rs = let trans = transition state in let trans = Array.map (fun (p, t) -> (p, aux t)) trans in let finals = Array.map (fun (_, f) -> List.memq f state) rs in - Hashtbl.add states_def i (trans, finals); + Hashtbl.add states_def i { trans; finals }; i in let init = ref [] in @@ -170,7 +173,7 @@ let dfa_to_dot dfa = bprintf buf " _start [shape=point];\n"; bprintf buf " _start -> state0;\n\n"; Array.iteri - (fun i (trans, finals) -> + (fun i { trans; finals } -> let accepted = let acc = ref [] in for r = Array.length finals - 1 downto 0 do diff --git a/src/syntax/sedlex.mli b/src/syntax/sedlex.mli index 04f2720..05a95de 100644 --- a/src/syntax/sedlex.mli +++ b/src/syntax/sedlex.mli @@ -22,5 +22,8 @@ val intersection : regexp -> regexp -> regexp option (* If each argument is a single [chars] regexp, returns a regexp which matches the intersection set. Otherwise returns [None]. *) -val compile : regexp array -> ((Sedlex_cset.t * int) array * bool array) array -val dfa_to_dot : ((Sedlex_cset.t * int) array * bool array) array -> string +type dfa_state = { trans : (Sedlex_cset.t * int) array; finals : bool array } +type dfa = dfa_state array + +val compile : regexp array -> dfa +val dfa_to_dot : dfa -> string From 6543e1b68bed68e4e18ddf47eb6a87e7e0764766 Mon Sep 17 00:00:00 2001 From: hhugo Date: Thu, 12 Mar 2026 04:20:34 +0100 Subject: [PATCH 11/24] Update opam file (#183) --- dune-project | 4 +++- opam/sedlex.opam.template | 2 -- opam/sedlex.opam => sedlex.opam | 4 +++- 3 files changed, 6 insertions(+), 4 deletions(-) delete mode 100644 opam/sedlex.opam.template rename opam/sedlex.opam => sedlex.opam (90%) diff --git a/dune-project b/dune-project index c678939..56d6691 100644 --- a/dune-project +++ b/dune-project @@ -1,4 +1,4 @@ -(lang dune 3.0) +(lang dune 3.18) (version 3.7) (name sedlex) (source (github ocaml-community/sedlex)) @@ -7,6 +7,8 @@ "https://github.com/ocaml-community/sedlex/graphs/contributors") (maintainers "Alain Frisch ") (homepage "https://github.com/ocaml-community/sedlex") +(maintenance_intent "(latest)") +(documentation "https://ocaml-community.github.io/sedlex/index.html") (generate_opam_files true) (executables_implicit_empty_intf true) diff --git a/opam/sedlex.opam.template b/opam/sedlex.opam.template deleted file mode 100644 index d195fd0..0000000 --- a/opam/sedlex.opam.template +++ /dev/null @@ -1,2 +0,0 @@ -doc: "https://ocaml-community.github.io/sedlex/index.html" -x-maintenance-intent: ["(latest)"] diff --git a/opam/sedlex.opam b/sedlex.opam similarity index 90% rename from opam/sedlex.opam rename to sedlex.opam index 34515f1..30f3bf7 100644 --- a/opam/sedlex.opam +++ b/sedlex.opam @@ -14,10 +14,11 @@ authors: [ ] license: "MIT" homepage: "https://github.com/ocaml-community/sedlex" +doc: "https://ocaml-community.github.io/sedlex/index.html" bug-reports: "https://github.com/ocaml-community/sedlex/issues" depends: [ "ocaml" {>= "4.08"} - "dune" {>= "3.0"} + "dune" {>= "3.18"} "ppxlib" {>= "0.26.0"} "gen" "ppx_expect" {with-test} @@ -38,3 +39,4 @@ build: [ ] ] dev-repo: "git+https://github.com/ocaml-community/sedlex.git" +x-maintenance-intent: ["(latest)"] From 1787c56e30e1af686ee0933d6f0b89dc2361f964 Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Wed, 11 Mar 2026 23:32:21 +0100 Subject: [PATCH 12/24] Add ocamlyacc and menhir integration examples and docs Add two example calculators showing how to bridge Sedlexing.lexbuf with ocamlyacc/menhir parsers, and document the pattern in the README. Co-Authored-By: Claude Opus 4.6 --- README.md | 52 ++++++++++++++++++++++++++++++ dune-project | 4 ++- examples/with_menhir/dune | 16 +++++++++ examples/with_menhir/lexer.ml | 40 +++++++++++++++++++++++ examples/with_menhir/main.ml | 17 ++++++++++ examples/with_menhir/parser.mly | 26 +++++++++++++++ examples/with_ocamlyacc/dune | 14 ++++++++ examples/with_ocamlyacc/lexer.ml | 35 ++++++++++++++++++++ examples/with_ocamlyacc/main.ml | 6 ++++ examples/with_ocamlyacc/parser.mly | 27 ++++++++++++++++ sedlex.opam | 1 + 11 files changed, 237 insertions(+), 1 deletion(-) create mode 100644 examples/with_menhir/dune create mode 100644 examples/with_menhir/lexer.ml create mode 100644 examples/with_menhir/main.ml create mode 100644 examples/with_menhir/parser.mly create mode 100644 examples/with_ocamlyacc/dune create mode 100644 examples/with_ocamlyacc/lexer.ml create mode 100644 examples/with_ocamlyacc/main.ml create mode 100644 examples/with_ocamlyacc/parser.mly diff --git a/README.md b/README.md index 595a95a..cd336a6 100644 --- a/README.md +++ b/README.md @@ -233,6 +233,58 @@ Once sedlex is installed as per above, simply type #require "sedlex.ppx";; ``` +## Integration with ocamlyacc and menhir + +sedlex uses its own `Sedlexing.lexbuf` type, while ocamlyacc and menhir +(classic API) expect a lexer function of type `Lexing.lexbuf -> token`. +To bridge the two, create a dummy `Lexing.lexbuf` and update its position +fields after each token: + +```ocaml +(* In lexer.ml — the sedlex lexer *) +let rec token buf = + match%sedlex buf with + | Plus ('0'..'9') -> Parser.INT (int_of_string (Sedlexing.Utf8.lexeme buf)) + | '+' -> Parser.PLUS + | Plus white_space -> token buf + | eof -> Parser.EOF + | _ -> failwith "Unexpected character" + +(* Wrap for ocamlyacc / menhir classic API *) +let tokenize buf = + let lexbuf = Lexing.from_string "" in + let tokenize lexbuf = + let tok = token buf in + let start_pos, end_pos = Sedlexing.lexing_positions buf in + lexbuf.Lexing.lex_start_p <- start_pos; + lexbuf.Lexing.lex_curr_p <- end_pos; + tok + in + (tokenize, lexbuf) + +(* In main.ml *) +let () = + let buf = Sedlexing.Utf8.from_string "1 + 2" in + let tokenize, lexbuf = Lexer.tokenize buf in + let result = Parser.main tokenize lexbuf in + ... +``` + +For menhir's **incremental API**, use `Sedlexing.with_tokenizer` which +returns a `unit -> token * position * position` supplier directly: + +```ocaml +let supplier = Sedlexing.with_tokenizer token buf in +let result = + Parser.MenhirInterpreter.loop supplier + (Parser.Incremental.main Lexing.dummy_pos) +in +... +``` + +Complete working examples are in `examples/with_ocamlyacc/` and +`examples/with_menhir/`. + ## Examples The `examples/` subdirectory contains several samples of sedlex in use. diff --git a/dune-project b/dune-project index 56d6691..5421cec 100644 --- a/dune-project +++ b/dune-project @@ -1,4 +1,5 @@ (lang dune 3.18) +(using menhir 2.1) (version 3.7) (name sedlex) (source (github ocaml-community/sedlex)) @@ -25,4 +26,5 @@ extension.") dune (ppxlib (>= 0.26.0)) gen - (ppx_expect :with-test))) + (ppx_expect :with-test) + (menhir :with-test))) diff --git a/examples/with_menhir/dune b/examples/with_menhir/dune new file mode 100644 index 0000000..6eab021 --- /dev/null +++ b/examples/with_menhir/dune @@ -0,0 +1,16 @@ +(executable + (name main) + (libraries sedlex menhirLib) + (preprocess + (pps sedlex.ppx))) + +(rule + (alias runtest) + (deps + (:< main.exe)) + (action + (run %{<}))) + +(menhir + (modules parser) + (flags --table)) diff --git a/examples/with_menhir/lexer.ml b/examples/with_menhir/lexer.ml new file mode 100644 index 0000000..eee8b30 --- /dev/null +++ b/examples/with_menhir/lexer.ml @@ -0,0 +1,40 @@ +(* Sedlex lexer that produces tokens for a menhir parser. + + Two approaches are shown: + + 1. [tokenize] — for menhir's classic API (same technique as ocamlyacc): + wraps the sedlex lexer into [Lexing.lexbuf -> token]. + + 2. [tokenize_incremental] — for menhir's incremental API: + uses [Sedlexing.with_tokenizer] which returns a + [unit -> token * position * position] supplier. *) + +let digit = [%sedlex.regexp? '0' .. '9'] + +let rec token buf = + match%sedlex buf with + | Plus digit -> Parser.INT (int_of_string (Sedlexing.Utf8.lexeme buf)) + | '+' -> Parser.PLUS + | '-' -> Parser.MINUS + | '*' -> Parser.TIMES + | '/' -> Parser.DIV + | '(' -> Parser.LPAREN + | ')' -> Parser.RPAREN + | Plus white_space -> token buf + | eof -> Parser.EOF + | _ -> failwith ("Unexpected character: " ^ Sedlexing.Utf8.lexeme buf) + +(* Approach 1: classic API — wrap into [Lexing.lexbuf -> token] *) +let tokenize buf = + let lexing_lexbuf = Lexing.from_string "" in + let tokenize _lexing_lexbuf = + let tok = token buf in + let start_pos, end_pos = Sedlexing.lexing_positions buf in + _lexing_lexbuf.Lexing.lex_start_p <- start_pos; + _lexing_lexbuf.Lexing.lex_curr_p <- end_pos; + tok + in + (tokenize, lexing_lexbuf) + +(* Approach 2: incremental API — use [Sedlexing.with_tokenizer] *) +let tokenize_incremental buf = Sedlexing.with_tokenizer token buf diff --git a/examples/with_menhir/main.ml b/examples/with_menhir/main.ml new file mode 100644 index 0000000..c926adf --- /dev/null +++ b/examples/with_menhir/main.ml @@ -0,0 +1,17 @@ +let () = + let input = "1 + 2 * (3 - 1)" in + + (* Classic API *) + let buf = Sedlexing.Utf8.from_string input in + let tokenize, lexbuf = Lexer.tokenize buf in + let result = Parser.main tokenize lexbuf in + Printf.printf "classic: %s = %d\n" input result; + + (* Incremental API *) + let buf = Sedlexing.Utf8.from_string input in + let supplier = Lexer.tokenize_incremental buf in + let result = + Parser.MenhirInterpreter.loop supplier + (Parser.Incremental.main Lexing.dummy_pos) + in + Printf.printf "incremental: %s = %d\n" input result diff --git a/examples/with_menhir/parser.mly b/examples/with_menhir/parser.mly new file mode 100644 index 0000000..f7e701a --- /dev/null +++ b/examples/with_menhir/parser.mly @@ -0,0 +1,26 @@ +/* Simple calculator parser for use with sedlex via menhir */ + +%token INT +%token PLUS MINUS TIMES DIV +%token LPAREN RPAREN +%token EOF + +%left PLUS MINUS +%left TIMES DIV + +%start main + +%% + +main: + | e = expr; EOF { e } +; + +expr: + | n = INT { n } + | LPAREN; e = expr; RPAREN { e } + | e1 = expr; PLUS; e2 = expr { e1 + e2 } + | e1 = expr; MINUS; e2 = expr { e1 - e2 } + | e1 = expr; TIMES; e2 = expr { e1 * e2 } + | e1 = expr; DIV; e2 = expr { e1 / e2 } +; diff --git a/examples/with_ocamlyacc/dune b/examples/with_ocamlyacc/dune new file mode 100644 index 0000000..8881ef5 --- /dev/null +++ b/examples/with_ocamlyacc/dune @@ -0,0 +1,14 @@ +(executable + (name main) + (libraries sedlex) + (preprocess + (pps sedlex.ppx))) + +(rule + (alias runtest) + (deps + (:< main.exe)) + (action + (run %{<}))) + +(ocamlyacc parser) diff --git a/examples/with_ocamlyacc/lexer.ml b/examples/with_ocamlyacc/lexer.ml new file mode 100644 index 0000000..0b31790 --- /dev/null +++ b/examples/with_ocamlyacc/lexer.ml @@ -0,0 +1,35 @@ +(* Sedlex lexer that produces tokens for an ocamlyacc parser. + + The key integration point: ocamlyacc parsers expect a function of type + [Lexing.lexbuf -> token], but sedlex uses [Sedlexing.lexbuf]. + We create a dummy [Lexing.lexbuf] and update its position fields + after each token so that error reporting works correctly. *) + +let digit = [%sedlex.regexp? '0' .. '9'] + +let rec token buf = + match%sedlex buf with + | Plus digit -> Parser.INT (int_of_string (Sedlexing.Utf8.lexeme buf)) + | '+' -> Parser.PLUS + | '-' -> Parser.MINUS + | '*' -> Parser.TIMES + | '/' -> Parser.DIV + | '(' -> Parser.LPAREN + | ')' -> Parser.RPAREN + | Plus white_space -> token buf + | eof -> Parser.EOF + | _ -> failwith ("Unexpected character: " ^ Sedlexing.Utf8.lexeme buf) + +(* Wrap a sedlex lexer for use with ocamlyacc. + Returns [(tokenize, lexing_lexbuf)] where [tokenize] has type + [Lexing.lexbuf -> token] as expected by the generated parser. *) +let tokenize buf = + let lexing_lexbuf = Lexing.from_string "" in + let tokenize _lexing_lexbuf = + let tok = token buf in + let start_pos, end_pos = Sedlexing.lexing_positions buf in + _lexing_lexbuf.Lexing.lex_start_p <- start_pos; + _lexing_lexbuf.Lexing.lex_curr_p <- end_pos; + tok + in + (tokenize, lexing_lexbuf) diff --git a/examples/with_ocamlyacc/main.ml b/examples/with_ocamlyacc/main.ml new file mode 100644 index 0000000..fe249ad --- /dev/null +++ b/examples/with_ocamlyacc/main.ml @@ -0,0 +1,6 @@ +let () = + let input = "1 + 2 * (3 - 1)" in + let buf = Sedlexing.Utf8.from_string input in + let tokenize, lexbuf = Lexer.tokenize buf in + let result = Parser.main tokenize lexbuf in + Printf.printf "%s = %d\n" input result diff --git a/examples/with_ocamlyacc/parser.mly b/examples/with_ocamlyacc/parser.mly new file mode 100644 index 0000000..53a8e16 --- /dev/null +++ b/examples/with_ocamlyacc/parser.mly @@ -0,0 +1,27 @@ +/* Simple calculator parser for use with sedlex via ocamlyacc */ + +%token INT +%token PLUS MINUS TIMES DIV +%token LPAREN RPAREN +%token EOF + +%left PLUS MINUS +%left TIMES DIV + +%start main +%type main + +%% + +main: + | expr EOF { $1 } +; + +expr: + | INT { $1 } + | LPAREN expr RPAREN { $2 } + | expr PLUS expr { $1 + $3 } + | expr MINUS expr { $1 - $3 } + | expr TIMES expr { $1 * $3 } + | expr DIV expr { $1 / $3 } +; diff --git a/sedlex.opam b/sedlex.opam index 30f3bf7..7b917f4 100644 --- a/sedlex.opam +++ b/sedlex.opam @@ -22,6 +22,7 @@ depends: [ "ppxlib" {>= "0.26.0"} "gen" "ppx_expect" {with-test} + "menhir" {with-test} "odoc" {with-doc} ] build: [ From 90bc11297d338ee228bc1f9c512bffb125a9951a Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Thu, 12 Mar 2026 10:19:55 +0100 Subject: [PATCH 13/24] Document longest match behavior --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index cd336a6..4949493 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,19 @@ where: Unlike ocamllex, lexers work on stream of Unicode codepoints, not bytes. +Like ocamllex, sedlex uses **longest match** with **first rule priority**: + +- The lexer always tries to match the longest possible prefix of the + input. It does so by continuing to read characters as long as some + rule can still match a longer string, while remembering the last + position at which a rule did match. + +- When two or more rules match the same longest prefix (a tie), the + rule that appears first in the `match%sedlex` definition wins. For + example, given the rules `| "if" -> ...` and `| Plus ('a'..'z') -> ...`, + the input `"if"` is matched by the first rule because it is listed + first, even though the second rule also accepts `"if"`. + The actions can call functions from the Sedlexing module to extract (parts of) the matched lexeme, in the desired encoding. From 8b1439b90a5a4c8e4925858753c47d0c639990df Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Thu, 12 Mar 2026 09:48:30 +0100 Subject: [PATCH 14/24] Use default flags --- examples/dune | 3 +- src/lib/dune | 3 +- src/syntax/dune | 4 +-- src/syntax/ppx_sedlex.ml | 66 +++++++++++++++++++++------------------- src/syntax/sedlex.ml | 2 +- 5 files changed, 39 insertions(+), 39 deletions(-) diff --git a/examples/dune b/examples/dune index 06d90c9..df44ee8 100644 --- a/examples/dune +++ b/examples/dune @@ -2,8 +2,7 @@ (names tokenizer regressions complement subtraction repeat performance) (libraries sedlex sedlex_ppx) (preprocess - (pps sedlex.ppx)) - (flags :standard -w +39)) + (pps sedlex.ppx))) (rule (alias runtest) diff --git a/src/lib/dune b/src/lib/dune index d3b428b..7243e5d 100644 --- a/src/lib/dune +++ b/src/lib/dune @@ -2,5 +2,4 @@ (name sedlex) (public_name sedlex) (wrapped false) - (libraries gen) - (flags :standard -w +A-4-9 -safe-string)) + (libraries gen)) diff --git a/src/syntax/dune b/src/syntax/dune index 1a6c44a..d70a0ff 100644 --- a/src/syntax/dune +++ b/src/syntax/dune @@ -5,9 +5,7 @@ (libraries ppxlib sedlex sedlex.utils) (ppx_runtime_libraries sedlex) (preprocess - (pps ppxlib.metaquot)) - (flags - (:standard -w -9))) + (pps ppxlib.metaquot))) (rule (targets unicode.ml) diff --git a/src/syntax/ppx_sedlex.ml b/src/syntax/ppx_sedlex.ml index c7c782f..742e6e7 100644 --- a/src/syntax/ppx_sedlex.ml +++ b/src/syntax/ppx_sedlex.ml @@ -249,7 +249,7 @@ let gen_recflag auto = in states with no further transitions. *) try Array.iter - (fun { Sedlex.trans } -> + (fun { Sedlex.trans; _ } -> Array.iter (fun (_, j) -> if Array.length auto.(j).Sedlex.trans > 0 then raise Exit) @@ -326,21 +326,21 @@ let rec repeat r = function | n, m -> Sedlex.seq r (repeat r (n - 1, m - 1)) let regexp_of_pattern env = - let rec char_pair_op func name ~encoding p tuple = + let rec char_pair_op func name ~encoding ~loc tuple = (* Construct something like Sub(a,b) *) match tuple with - | Some { ppat_desc = Ppat_tuple [p0; p1] } -> begin + | Some { ppat_desc = Ppat_tuple [p0; p1]; _ } -> begin match func (aux ~encoding p0) (aux ~encoding p1) with | Some r -> r | None -> - err p.ppat_loc + err loc "the %s operator can only applied to single-character length \ regexps" name end | _ -> - err p.ppat_loc "the %s operator requires two arguments, like %s(a,b)" - name name + err loc "the %s operator requires two arguments, like %s(a,b)" name + name and aux ~encoding p = (* interpret one pattern node *) match p.ppat_desc with @@ -349,18 +349,18 @@ let regexp_of_pattern env = List.fold_left (fun r p -> Sedlex.seq r (aux ~encoding p)) (aux ~encoding p) pl - | Ppat_construct ({ txt = Lident "Star" }, Some (_, p)) -> + | Ppat_construct ({ txt = Lident "Star"; _ }, Some (_, p)) -> Sedlex.rep (aux ~encoding p) - | Ppat_construct ({ txt = Lident "Plus" }, Some (_, p)) -> + | Ppat_construct ({ txt = Lident "Plus"; _ }, Some (_, p)) -> Sedlex.plus (aux ~encoding p) - | Ppat_construct ({ txt = Lident "Utf8" }, Some (_, p)) -> + | Ppat_construct ({ txt = Lident "Utf8"; _ }, Some (_, p)) -> aux ~encoding:Utf8 p - | Ppat_construct ({ txt = Lident "Latin1" }, Some (_, p)) -> + | Ppat_construct ({ txt = Lident "Latin1"; _ }, Some (_, p)) -> aux ~encoding:Latin1 p - | Ppat_construct ({ txt = Lident "Ascii" }, Some (_, p)) -> + | Ppat_construct ({ txt = Lident "Ascii"; _ }, Some (_, p)) -> aux ~encoding:Ascii p | Ppat_construct - ( { txt = Lident "Rep" }, + ( { txt = Lident "Rep"; _ }, Some ( _, { @@ -371,8 +371,10 @@ let regexp_of_pattern env = { ppat_desc = Ppat_constant (i1 as i2) | Ppat_interval (i1, i2); + _; }; ]; + _; } ) ) -> begin match (i1, i2) with | Pconst_integer (i1, _), Pconst_integer (i2, _) -> @@ -383,11 +385,11 @@ let regexp_of_pattern env = | _ -> err p.ppat_loc "Rep must take an integer constant or interval" end - | Ppat_construct ({ txt = Lident "Rep" }, _) -> + | Ppat_construct ({ txt = Lident "Rep"; _ }, _) -> err p.ppat_loc "the Rep operator takes 2 arguments" - | Ppat_construct ({ txt = Lident "Opt" }, Some (_, p)) -> + | Ppat_construct ({ txt = Lident "Opt"; _ }, Some (_, p)) -> Sedlex.alt Sedlex.eps (aux ~encoding p) - | Ppat_construct ({ txt = Lident "Compl" }, arg) -> begin + | Ppat_construct ({ txt = Lident "Compl"; _ }, arg) -> begin match arg with | Some (_, p0) -> begin match Sedlex.compl (aux ~encoding p0) with @@ -399,16 +401,16 @@ let regexp_of_pattern env = end | _ -> err p.ppat_loc "the Compl operator requires an argument" end - | Ppat_construct ({ txt = Lident "Sub" }, arg) -> - char_pair_op ~encoding Sedlex.subtract "Sub" p + | Ppat_construct ({ txt = Lident "Sub"; _ }, arg) -> + char_pair_op ~encoding Sedlex.subtract "Sub" ~loc:p.ppat_loc (Option.map (fun (_, arg) -> arg) arg) - | Ppat_construct ({ txt = Lident "Intersect" }, arg) -> - char_pair_op ~encoding Sedlex.intersection "Intersect" p + | Ppat_construct ({ txt = Lident "Intersect"; _ }, arg) -> + char_pair_op ~encoding Sedlex.intersection "Intersect" ~loc:p.ppat_loc (Option.map (fun (_, arg) -> arg) arg) - | Ppat_construct ({ txt = Lident "Chars" }, arg) -> ( + | Ppat_construct ({ txt = Lident "Chars"; _ }, arg) -> ( let const = match arg with - | Some (_, { ppat_desc = Ppat_constant const }) -> Some const + | Some (_, { ppat_desc = Ppat_constant const; _ }) -> Some const | _ -> None in match const with @@ -453,7 +455,7 @@ let regexp_of_pattern env = Sedlex.chars (Cset.singleton (codepoint (int_of_string i))) | _ -> err p.ppat_loc "this pattern is not a valid regexp" end - | Ppat_var { txt = x } -> begin + | Ppat_var { txt = x; _ } -> begin try StringMap.find x env with Not_found -> err p.ppat_loc "unbound regexp %s" x end @@ -464,9 +466,10 @@ let regexp_of_pattern env = let handle_sedlex_match ~env ~map_rhs match_expr = let lexbuf = match match_expr with - | { pexp_desc = Pexp_match (lexbuf, _) } -> ( + | { pexp_desc = Pexp_match (lexbuf, _); _ } -> ( match lexbuf with - | { pexp_desc = Pexp_ident { txt = Lident txt } } -> (txt, lexbuf) + | { pexp_desc = Pexp_ident { txt = Lident txt; _ }; _ } -> + (txt, lexbuf) | _ -> err lexbuf.pexp_loc "the matched expression must be a single identifier") @@ -476,14 +479,14 @@ let handle_sedlex_match ~env ~map_rhs match_expr = in let cases = match match_expr with - | { pexp_desc = Pexp_match (_, cases) } -> cases + | { pexp_desc = Pexp_match (_, cases); _ } -> cases | _ -> assert false in let cases = List.rev cases in let error = match List.hd cases with | { pc_lhs = [%pat? _]; pc_rhs = e; pc_guard = None } -> map_rhs e - | { pc_lhs = p } -> + | { pc_lhs = p; _ } -> err p.ppat_loc "the last branch must be a catch-all error case" in let cases = List.rev (List.tl cases) in @@ -492,7 +495,7 @@ let handle_sedlex_match ~env ~map_rhs match_expr = (function | { pc_lhs = p; pc_rhs = e; pc_guard = None } -> (regexp_of_pattern env p, map_rhs e) - | { pc_guard = Some e } -> + | { pc_guard = Some e; _ } -> err e.pexp_loc "'when' guards are not supported") cases in @@ -514,10 +517,11 @@ let mapper = method! expression e = match e with - | [%expr [%sedlex [%e? { pexp_desc = Pexp_match _ } as match_expr]]] -> + | [%expr [%sedlex [%e? { pexp_desc = Pexp_match _; _ } as match_expr]]] + -> fst (handle_sedlex_match ~env ~map_rhs:this#expression match_expr) | [%expr - let [%p? { ppat_desc = Ppat_var { txt = name } }] = + let [%p? { ppat_desc = Ppat_var { txt = name; _ }; _ }] = [%sedlex.regexp? [%p? p]] in [%e? body]] -> @@ -537,7 +541,7 @@ let mapper = (List.map (function | [%stri - let [%p? { ppat_desc = Ppat_var { txt = name } }] = + let [%p? { ppat_desc = Ppat_var { txt = name; _ }; _ }] = [%sedlex.regexp? [%p? p]]] as i -> regexps := i :: !regexps; mapper := !mapper#define_regexp name p; @@ -562,7 +566,7 @@ let mapper = let pre_handler cookies = previous := match Driver.Cookies.get cookies "sedlex.regexps" Ast_pattern.__ with - | Some { pexp_desc = Pexp_extension (_, PStr l) } -> l + | Some { pexp_desc = Pexp_extension (_, PStr l); _ } -> l | Some _ -> assert false | None -> [] diff --git a/src/syntax/sedlex.ml b/src/syntax/sedlex.ml index 4a01a1a..76f359c 100644 --- a/src/syntax/sedlex.ml +++ b/src/syntax/sedlex.ml @@ -25,7 +25,7 @@ let new_node () = let seq r1 r2 succ = r1 (r2 succ) let is_chars final = function - | { eps = []; trans = [(c, f)] } when f == final -> Some c + | { eps = []; trans = [(c, f)]; _ } when f == final -> Some c | _ -> None let chars c succ = From 0e53c627aecb12f7916d86ebba4ec907c8bf22c3 Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Thu, 12 Mar 2026 09:25:49 +0100 Subject: [PATCH 15/24] =?UTF-8?q?Fix=20UTF-8=20surrogate=20range=20check?= =?UTF-8?q?=20in=20check=5Fthree=20(0xdf00=20=E2=86=92=200xdfff)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The upper bound of the surrogate rejection range was 0xdf00 instead of 0xdfff, which would have allowed U+DF01..U+DFFF through. In practice the bug was masked by the local Uchar.of_int wrapper, but fix it for correctness. Add comments explaining why only check_three needs the surrogate check, and add an expect test for surrogate rejection. Co-Authored-By: Claude Opus 4.6 --- src/lib/sedlexing.ml | 5 ++++- test/basic.ml | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/lib/sedlexing.ml b/src/lib/sedlexing.ml index 7ecdc94..be475c7 100644 --- a/src/lib/sedlexing.ml +++ b/src/lib/sedlexing.ml @@ -437,6 +437,7 @@ module Utf8 = struct | _ -> raise MalFormed (* https://www.unicode.org/versions/corrigendum1.html *) + (* U+0080..U+07FF — no surrogate check needed, below U+D800 *) let check_two n1 n2 = if n1 < 0xc2 || 0xdf < n1 then raise MalFormed; if n2 < 0x80 || 0xbf < n2 then raise MalFormed; @@ -455,9 +456,11 @@ module Utf8 = struct let p = ((n1 land 0x0f) lsl 12) lor ((n2 land 0x3f) lsl 6) lor (n3 land 0x3f) in - if p >= 0xd800 && p <= 0xdf00 then raise MalFormed; + (* Reject UTF-16 surrogates (U+D800..U+DFFF) *) + if p >= 0xd800 && p <= 0xdfff then raise MalFormed; p + (* U+10000..U+10FFFF — no surrogate check needed, above U+DFFF *) let check_four n1 n2 n3 n4 = if n1 = 0xf0 then ( if n2 < 0x90 || 0xbf < n2 then raise MalFormed; diff --git a/test/basic.ml b/test/basic.ml index 30c6c45..320b257 100644 --- a/test/basic.ml +++ b/test/basic.ml @@ -1060,3 +1060,23 @@ let%expect_test "utf16-le" = code point pos: [line=1:bol=0:cnum=7;line=1:bol=0:cnum=7] bytes pos: [line=1:bol=0:cnum=14;line=1:bol=0:cnum=14] EOF |}] + +let%expect_test "utf8 surrogate rejection" = + (* UTF-16 surrogates (U+D800..U+DFFF) must be rejected as invalid UTF-8 *) + let test s = + try + let lb = Sedlexing.Utf8.from_string s in + ignore (Sedlexing.__private__next_int lb); + Printf.printf "accepted (BUG)\n" + with Sedlexing.MalFormed -> Printf.printf "rejected\n" + in + (* U+D800: first high surrogate *) + test "\xED\xA0\x80"; + (* U+DF01: low surrogate *) + test "\xED\xBC\x81"; + (* U+DFFF: last low surrogate *) + test "\xED\xBF\xBF"; + [%expect {| + rejected + rejected + rejected |}] From 3c1b3707ad50057d245c09b94a55cb6957403dc6 Mon Sep 17 00:00:00 2001 From: hhugo Date: Fri, 13 Mar 2026 14:11:09 +0100 Subject: [PATCH 16/24] Support nested let..in for [%sedlex.regexp?] definitions (#176) * Support nested let..in for [%sedlex.regexp?] definitions (#41) Allow users to define named regexps using nested let statements, e.g.: let int_lit = let digit = [%sedlex.regexp? '0'..'9'] in [%sedlex.regexp? Plus digit] Add eval_regexp_expr method that recursively evaluates let..in chains of regexp definitions, used by both the expression handler and structure_with_regexps. Co-Authored-By: Claude Opus 4.6 * Add comment to ast match * Update documentation --------- Co-authored-by: Claude Opus 4.6 --- CHANGES.md | 3 +++ README.md | 41 +++++++++++++++++++++-------- src/syntax/ppx_sedlex.ml | 56 +++++++++++++++++++++++++++++++++------- test/basic.ml | 48 ++++++++++++++++++++++++++++++++++ 4 files changed, 128 insertions(+), 20 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index a3adf36..03958b3 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,6 @@ +# unreleased +- Support nested `let..in` for `[%sedlex.regexp?]` definitions + # 3.7 (2025-10-06) - Update to unicode 17.0.0 diff --git a/README.md b/README.md index 4949493..51e1a70 100644 --- a/README.md +++ b/README.md @@ -133,25 +133,44 @@ Note: specified by the `Latin1`,`Ascii` and `Utf8` constructors in patterns. -It is possible to define named regular expressions with the following -construction, that can appear in place of a structure item: +### Named regular expressions + +You can give names to regular expressions with `[%sedlex.regexp? ...]` and +reference them by name in lexer rules. + +**Top-level definitions** are visible for the rest of the module: ```ocaml - let lid = [%sedlex.regexp? R] -``` +let digit = [%sedlex.regexp? '0' .. '9'] +let number = [%sedlex.regexp? Plus digit] -where lid is the regexp name to be defined and R its definition. The -scope of the "lid" regular expression is the rest of the structure, -after the definition. +let rec token buf = + match%sedlex buf with + | number -> INT (Sedlexing.Utf8.lexeme buf) + | _ -> ... +``` -The same syntax can be used for local binding: +**Local definitions** with `let ... in` are scoped to the body expression: ```ocaml - let lid = [%sedlex.regexp? R] in - body +let hex_digit = + let digit = [%sedlex.regexp? '0' .. '9'] in + let hex_letter = [%sedlex.regexp? 'a' .. 'f' | 'A' .. 'F'] in + [%sedlex.regexp? digit | hex_letter] ``` -The scope of "lid" is the body expression. +Local definitions also work inside expressions: + +```ocaml +let token buf = + let int_lit = + let digit = [%sedlex.regexp? '0' .. '9'] in + [%sedlex.regexp? Plus digit] + in + match%sedlex buf with + | int_lit -> ... + | _ -> ... +``` ## Predefined regexps diff --git a/src/syntax/ppx_sedlex.ml b/src/syntax/ppx_sedlex.ml index 742e6e7..fbaaf6a 100644 --- a/src/syntax/ppx_sedlex.ml +++ b/src/syntax/ppx_sedlex.ml @@ -344,21 +344,29 @@ let regexp_of_pattern env = and aux ~encoding p = (* interpret one pattern node *) match p.ppat_desc with + (* p1 | p2 — alternation *) | Ppat_or (p1, p2) -> Sedlex.alt (aux ~encoding p1) (aux ~encoding p2) + (* (p1, p2, ...) — sequence *) | Ppat_tuple (p :: pl) -> List.fold_left (fun r p -> Sedlex.seq r (aux ~encoding p)) (aux ~encoding p) pl + (* Star p — zero-or-more repetition *) | Ppat_construct ({ txt = Lident "Star"; _ }, Some (_, p)) -> Sedlex.rep (aux ~encoding p) + (* Plus p — one-or-more repetition *) | Ppat_construct ({ txt = Lident "Plus"; _ }, Some (_, p)) -> Sedlex.plus (aux ~encoding p) + (* Utf8 p — switch to UTF-8 encoding *) | Ppat_construct ({ txt = Lident "Utf8"; _ }, Some (_, p)) -> aux ~encoding:Utf8 p + (* Latin1 p — switch to Latin-1 encoding *) | Ppat_construct ({ txt = Lident "Latin1"; _ }, Some (_, p)) -> aux ~encoding:Latin1 p + (* Ascii p — switch to ASCII encoding *) | Ppat_construct ({ txt = Lident "Ascii"; _ }, Some (_, p)) -> aux ~encoding:Ascii p + (* Rep (p, n..m) — bounded repetition *) | Ppat_construct ( { txt = Lident "Rep"; _ }, Some @@ -385,10 +393,13 @@ let regexp_of_pattern env = | _ -> err p.ppat_loc "Rep must take an integer constant or interval" end + (* Rep _ — malformed Rep *) | Ppat_construct ({ txt = Lident "Rep"; _ }, _) -> err p.ppat_loc "the Rep operator takes 2 arguments" + (* Opt p — optional (zero or one) *) | Ppat_construct ({ txt = Lident "Opt"; _ }, Some (_, p)) -> Sedlex.alt Sedlex.eps (aux ~encoding p) + (* Compl p — complement of a character class *) | Ppat_construct ({ txt = Lident "Compl"; _ }, arg) -> begin match arg with | Some (_, p0) -> begin @@ -401,12 +412,15 @@ let regexp_of_pattern env = end | _ -> err p.ppat_loc "the Compl operator requires an argument" end + (* Sub (a, b) — character class subtraction *) | Ppat_construct ({ txt = Lident "Sub"; _ }, arg) -> char_pair_op ~encoding Sedlex.subtract "Sub" ~loc:p.ppat_loc (Option.map (fun (_, arg) -> arg) arg) + (* Intersect (a, b) — character class intersection *) | Ppat_construct ({ txt = Lident "Intersect"; _ }, arg) -> char_pair_op ~encoding Sedlex.intersection "Intersect" ~loc:p.ppat_loc (Option.map (fun (_, arg) -> arg) arg) + (* Chars "..." — character set from string literal *) | Ppat_construct ({ txt = Lident "Chars"; _ }, arg) -> ( let const = match arg with @@ -420,6 +434,7 @@ let regexp_of_pattern env = Sedlex.chars chars | _ -> err p.ppat_loc "the Chars operator requires a string argument") + (* 'a' .. 'z' or 0x41 .. 0x5a — character/codepoint range *) | Ppat_interval (i_start, i_end) -> begin match (i_start, i_end) with | Pconst_char c1, Pconst_char c2 -> @@ -443,6 +458,7 @@ let regexp_of_pattern env = (codepoint (int_of_string i2))) | _ -> err p.ppat_loc "this pattern is not a valid interval regexp" end + (* "string" or 'c' or 0x42 — literal string, char, or codepoint *) | Ppat_constant const -> begin match const with | Pconst_string (s, _, _) -> @@ -455,6 +471,7 @@ let regexp_of_pattern env = Sedlex.chars (Cset.singleton (codepoint (int_of_string i))) | _ -> err p.ppat_loc "this pattern is not a valid regexp" end + (* name — reference to a previously defined regexp *) | Ppat_var { txt = x; _ } -> begin try StringMap.find x env with Not_found -> err p.ppat_loc "unbound regexp %s" x @@ -511,21 +528,38 @@ let mapper = object (this) inherit Ast_traverse.map as super val env = builtin_regexps + method define_regexp name r = {} - method define_regexp name p = - {} + method eval_regexp_expr e = + match e with + (* [%sedlex.regexp? ] *) + | [%expr [%sedlex.regexp? [%p? p]]] -> Some (regexp_of_pattern env p) + (* let = [%sedlex.regexp? ] in *) + | [%expr + let [%p? { ppat_desc = Ppat_var { txt = name; _ }; _ }] = + [%sedlex.regexp? [%p? p]] + in + [%e? body]] -> + let r = regexp_of_pattern env p in + (this#define_regexp name r)#eval_regexp_expr body + | _ -> None method! expression e = match e with + (* match%sedlex with ... *) | [%expr [%sedlex [%e? { pexp_desc = Pexp_match _; _ } as match_expr]]] -> fst (handle_sedlex_match ~env ~map_rhs:this#expression match_expr) + (* let = in — intercept when is a regexp *) | [%expr let [%p? { ppat_desc = Ppat_var { txt = name; _ }; _ }] = - [%sedlex.regexp? [%p? p]] + [%e? rhs] in - [%e? body]] -> - (this#define_regexp name p)#expression body + [%e? _body]] -> ( + match this#eval_regexp_expr rhs with + | Some r -> (this#define_regexp name r)#expression _body + | None -> super#expression e) + (* [%sedlex ] — error *) | [%expr [%sedlex [%e? _]]] -> err e.pexp_loc "the %%sedlex extension is only recognized on match expressions" @@ -540,12 +574,16 @@ let mapper = List.concat (List.map (function + (* let = — intercept top-level regexp definitions *) | [%stri let [%p? { ppat_desc = Ppat_var { txt = name; _ }; _ }] = - [%sedlex.regexp? [%p? p]]] as i -> - regexps := i :: !regexps; - mapper := !mapper#define_regexp name p; - [] + [%e? e]] as i -> ( + match !mapper#eval_regexp_expr e with + | Some r -> + regexps := i :: !regexps; + mapper := !mapper#define_regexp name r; + [] + | None -> [!mapper#structure_item i]) | i -> [!mapper#structure_item i]) l) in diff --git a/test/basic.ml b/test/basic.ml index 320b257..93b44c8 100644 --- a/test/basic.ml +++ b/test/basic.ml @@ -2,6 +2,11 @@ let () = set_binary_mode_out stdout true let digit = [%sedlex.regexp? '0' .. '9'] let number = [%sedlex.regexp? Plus digit] +let hex_digit = + let digit = [%sedlex.regexp? '0' .. '9'] in + let hex_letter = [%sedlex.regexp? 'a' .. 'f' | 'A' .. 'F'] in + [%sedlex.regexp? digit | hex_letter] + let print_pos buf = let f { Lexing.pos_lnum; pos_bol; pos_cnum; _ } = Printf.sprintf "line=%d:bol=%d:cnum=%d" pos_lnum pos_bol pos_cnum @@ -1080,3 +1085,46 @@ let%expect_test "utf8 surrogate rejection" = rejected rejected rejected |}] + +let%expect_test "nested_let_regexp" = + let int_lit = + let digit = [%sedlex.regexp? '0' .. '9'] in + [%sedlex.regexp? Plus digit] + in + let buf = Sedlexing.Utf8.from_string "123abc" in + let rec loop () = + match%sedlex buf with + | int_lit -> + Printf.printf "Int: %s\n" (Sedlexing.Utf8.lexeme buf); + loop () + | Plus 'a' .. 'z' -> + Printf.printf "Word: %s\n" (Sedlexing.Utf8.lexeme buf); + loop () + | eof -> Printf.printf "EOF\n" + | _ -> assert false + in + loop (); + [%expect {| + Int: 123 + Word: abc + EOF |}] + +let%expect_test "nested_let_regexp_toplevel" = + let buf = Sedlexing.Utf8.from_string "0xDEAD rest" in + let rec loop () = + match%sedlex buf with + | "0x", Plus hex_digit -> + Printf.printf "Hex: %s\n" (Sedlexing.Utf8.lexeme buf); + loop () + | Plus 'a' .. 'z' -> + Printf.printf "Word: %s\n" (Sedlexing.Utf8.lexeme buf); + loop () + | ' ' -> loop () + | eof -> Printf.printf "EOF\n" + | _ -> assert false + in + loop (); + [%expect {| + Hex: 0xDEAD + Word: rest + EOF |}] From 2534bf1d20029b0b27eadeeb06d4da519efd519c Mon Sep 17 00:00:00 2001 From: hhugo Date: Fri, 13 Mar 2026 14:21:13 +0100 Subject: [PATCH 17/24] Cleanup CI (#186) --- .github/workflows/build.yml | 6 ------ .github/workflows/changelog.yml | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8f55a57..0b89b4f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -30,12 +30,6 @@ jobs: runs-on: ${{ matrix.os }} steps: - - name: Set git to use LF - run: | - git config --global core.autocrlf false - git config --global core.eol lf - git config --global core.ignorecase false - - name: Checkout code uses: actions/checkout@v4 diff --git a/.github/workflows/changelog.yml b/.github/workflows/changelog.yml index ae4ac99..5e75635 100644 --- a/.github/workflows/changelog.yml +++ b/.github/workflows/changelog.yml @@ -17,4 +17,4 @@ jobs: runs-on: ubuntu-latest steps: - name: Check changelog - uses: tarides/changelog-check-action@v1 + uses: tarides/changelog-check-action@v3 From 0d4c515aabb7a7c626edc2928c4cc943ae639f1e Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Fri, 13 Mar 2026 11:58:08 +0100 Subject: [PATCH 18/24] Document that _ (catch-all) matches empty lexeme MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The default branch in a match%sedlex is not a regexp — it fires when no rule matches, so zero characters are consumed and the lexeme is "". To catch unexpected characters, use `any` instead. Closes #51 Co-Authored-By: Claude Opus 4.6 --- README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.md b/README.md index 51e1a70..5520c8a 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,16 @@ Like ocamllex, sedlex uses **longest match** with **first rule priority**: the input `"if"` is matched by the first rule because it is listed first, even though the second rule also accepts `"if"`. +**Important:** The `_` (catch-all) case is *not* a regexp — it is the +fallback when no rule matches. Because no characters were consumed, the +matched lexeme is empty (`""`). If you need to consume and report an +unexpected character, use `any` instead: + +```ocaml + | any -> Printf.sprintf "unexpected character: %s" (Sedlexing.Utf8.lexeme buf) + | _ -> failwith "unreachable: any matches all characters" +``` + The actions can call functions from the Sedlexing module to extract (parts of) the matched lexeme, in the desired encoding. From 2ec39201c78788026e65caec3af068b8d6858080 Mon Sep 17 00:00:00 2001 From: hhugo Date: Thu, 19 Mar 2026 04:27:15 +0100 Subject: [PATCH 19/24] Document regexp operator precedence (fixes #35) (#181) * Document regexp operator precedence (fixes #35) Since sedlex regexps are OCaml patterns, they follow OCaml's pattern precedence: | (lowest) < , < constructor application (highest). Co-Authored-By: Claude Opus 4.6 * Doc: add new sub sections * cleanup * cleanup --------- Co-authored-by: Claude Opus 4.6 --- README.md | 104 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 61 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index 5520c8a..7b206b1 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ if you use camlp4 or camlp5, with the standard or revised syntax. ## Lexer specifications +### Match rules sedlex adds a new kind of expression to OCaml: lexer definitions. The syntax for the new construction is: @@ -58,7 +59,7 @@ The syntax for the new construction is: or: ```ocaml - [%sedlex match lexbuf with + [%sedlex match lexbuf with | R1 -> e1 ... | Rn -> en @@ -105,43 +106,60 @@ unexpected character, use `any` instead: The actions can call functions from the Sedlexing module to extract (parts of) the matched lexeme, in the desired encoding. +### Regexp syntax + Regular expressions are syntactically OCaml patterns: - `"...."` (string constant): recognize the specified string. -- `'....'` (character constant) : recognize the specified character -- `i` (integer constant) : recognize the specified codepoint -- `'...' .. '...'`: character range -- `i1 .. i2`: range between two codepoints -- `R1 | R2` : alternation -- `R, R2, ..., Rn` : concatenation -- `Star R` : Kleene star (0 or more repetition) -- `Plus R` : equivalent to `R, R*` -- `Opt R` : equivalent to `("" | R)` -- `Rep (R, n)` : equivalent to `R{n}` -- `Rep (R, n .. m)` : equivalent to `R{n, m}` -- `Chars "..."` : recognize any character in the string -- `Compl R` : assume that R is a single-character length regexp (see below) - and recognize the complement set -- `Sub (R1,R2)` : assume that R is a single-character length regexp (see below) - and recognize the set of items in `R1` but not in `R2` ("subtract") -- `Intersect (R1,R2)` : assume that `R` is a single-character length regexp (see - below) and recognize the set of items which are in both `R1` and `R2` -- `Utf8 R` : string literals inside R are assumed to be utf-8 encoded. -- `Latin1 R` : string literals inside R are assumed to be latin1 encoded. -- `Ascii R` : string literals inside R are assumed to be ascii encoded. -- `lid` (lowercase identifier) : reference a named regexp (see below) +- `'....'` (character constant): recognize the specified character. +- `i` (integer constant): recognize the specified codepoint. +- `'...' .. '...'`: character range. +- `i1 .. i2`: range between two codepoints. +- `R1 | R2`: alternation. +- `R, R2, ..., Rn`: concatenation. +- `Star R`: Kleene star (0 or more repetition). +- `Plus R`: equivalent to `R, R*`. +- `Opt R`: equivalent to `("" | R)`. +- `Rep (R, n)`: equivalent to `R{n}`. +- `Rep (R, n .. m)`: equivalent to `R{n, m}`. +- `Chars "..."`: recognize any character in the string. +- `Compl R`: assume that R is a single-character length regexp (see below) + and recognize the complement set. +- `Sub (R1,R2)`: assume that R is a single-character length regexp (see below) + and recognize the set of items in `R1` but not in `R2` ("subtract"). +- `Intersect (R1,R2)`: assume that `R` is a single-character length regexp (see + below) and recognize the set of items which are in both `R1` and `R2`. +- `Utf8 R`: string literals inside R are assumed to be utf-8 encoded. +- `Latin1 R`: string literals inside R are assumed to be latin1 encoded. +- `Ascii R`: string literals inside R are assumed to be ascii encoded. +- `lid` (lowercase identifier): reference a named regexp (see below). A single-character length regexp is a regexp which does not contain (after expansion of references) concatenation, Star, Plus, Opt or string constants with a length different from one. +### Precedence + +Since sedlex regular expressions are encoded as OCaml patterns, they follow +OCaml's pattern precedence rules. From lowest to highest: + +| Precedence | Operator | Example | Parses as | +|---|---|---|---| +| lowest | `\|` (alternation) | `'a' \| 'b', 'c'` | `'a' \| ('b', 'c')` | +| | `,` (concatenation) | `"ab", Star 'c'` | `"ab", (Star 'c')` | +| highest | Constructor application (`Star`, `Plus`, `Opt`, ...) | `Star 'a' \| 'b'` | `(Star 'a') \| 'b'` | + +In particular, `Star r1, r2` is `(Star r1), r2` (not `Star (r1, r2)`), +and `r1 | r2, r3` is `r1 | (r2, r3)` (not `(r1 | r2), r3`). +Use parentheses to override: `Star (r1, r2)`, `(r1 | r2), r3`. +### Encoding -Note: - - The OCaml source is assumed to be encoded in UTF-8. - - Strings and chars litterals will be interpreted in ASCII unless otherwise - specified by the `Latin1`,`Ascii` and `Utf8` constructors in patterns. +- The OCaml source is assumed to be encoded in UTF-8. +- Strings and chars literals will be interpreted in ASCII unless otherwise + specified by the `Latin1`,`Ascii` and `Utf8` constructors in patterns. +### Named regexps ### Named regular expressions @@ -186,32 +204,32 @@ let token buf = ## Predefined regexps sedlex provides a set of predefined regexps: -- any: any character -- eof: the virtual end-of-file character -- xml_letter, xml_digit, xml_extender, xml_base_char, xml_ideographic, - xml_combining_char, xml_blank: as defined by the XML recommandation -- tr8876_ident_char: characters names in identifiers from ISO TR8876 -- cc, cf, cn, co, cs, ll, lm, lo, lt, lu, mc, me, mn, nd, nl, no, pc, pd, - pe, pf, pi, po, ps, sc, sk, sm, so, zl, zp, zs: as defined by the - Unicode standard (categories) -- alphabetic, ascii_hex_digit, hex_digit, id_continue, id_start, - lowercase, math, other_alphabetic, other_lowercase, other_math, - other_uppercase, uppercase, white_space, xid_continue, xid_start: as - defined by the Unicode standard (properties) +- `any`: any character. +- `eof`: the virtual end-of-file character. +- `xml_letter`, `xml_digit`, `xml_extender`, `xml_base_char`, `xml_ideographic`, + `xml_combining_char`, `xml_blank`: as defined by the XML recommendation. +- `tr8876_ident_char`: character names in identifiers from ISO TR8876. +- `cc`, `cf`, `cn`, `co`, `cs`, `ll`, `lm`, `lo`, `lt`, `lu`, `mc`, `me`, `mn`, `nd`, `nl`, `no`, `pc`, `pd`, + `pe`, `pf`, `pi`, `po`, `ps`, `sc`, `sk`, `sm`, `so`, `zl`, `zp`, `zs`: as defined by the + Unicode standard (categories). +- `alphabetic`, `ascii_hex_digit`, `hex_digit`, `id_continue`, `id_start`, + `lowercase`, `math`, `other_alphabetic`, `other_lowercase`, `other_math`, + `other_uppercase`, `uppercase`, `white_space`, `xid_continue`, `xid_start`: as + defined by the Unicode standard (properties). ## Running a lexer -See the interface of the Sedlexing module for a description of how to +See the interface of the `Sedlexing` module for a description of how to create lexbuf values (from strings, stream or channels encoded in Latin1, utf8 or utf16, or from integer arrays or streams representing Unicode code points). It is possible to work with a custom implementation for lex buffers. -To do this, you just have to ensure that a module called Sedlexing is +To do this, you just have to ensure that a module called `Sedlexing` is in scope of your lexer specifications, and that it defines at least -the following functions: start, next, mark, backtrack. See the interface -of the Sedlexing module for more information. +the following functions: `start`, `next`, `mark`, `backtrack`. See the interface +of the `Sedlexing` module for more information. From ed85e3e5ccf97fc10ade96d1ec88b59465eec19a Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Tue, 24 Mar 2026 10:39:43 +0100 Subject: [PATCH 20/24] Add mli for ppx_sedlex, minimize exposed interface - Add ppx_sedlex.mli with minimal public surface - Replace table_counter/partition_counter refs with Hashtbl.length - Expose reset_state instead of raw partitions/tables hashtables - Bake builtin_regexps and Fun.id into handle_sedlex_match - Comment out unused extensions value - Remove StringMap, builtin_regexps, regexp_of_pattern from interface Co-Authored-By: Claude Opus 4.6 (1M context) --- src/syntax/ppx_sedlex.ml | 35 +++++++++++++++++++------------- src/syntax/ppx_sedlex.mli | 25 +++++++++++++++++++++++ test/ppx_test/ppx_sedlex_test.ml | 18 +++------------- 3 files changed, 49 insertions(+), 29 deletions(-) create mode 100644 src/syntax/ppx_sedlex.mli diff --git a/src/syntax/ppx_sedlex.ml b/src/syntax/ppx_sedlex.ml index fbaaf6a..90a07fb 100644 --- a/src/syntax/ppx_sedlex.ml +++ b/src/syntax/ppx_sedlex.ml @@ -128,14 +128,12 @@ let builtin_regexps = (* Tables (indexed mapping: codepoint -> next state) *) let tables = Hashtbl.create 31 -let table_counter = ref 0 let get_tables () = Hashtbl.fold (fun key x accu -> (x, key) :: accu) tables [] let table_name x = try Hashtbl.find tables x with Not_found -> - incr table_counter; - let s = Printf.sprintf "__sedlex_table_%i" !table_counter in + let s = Printf.sprintf "__sedlex_table_%i" (Hashtbl.length tables + 1) in Hashtbl.add tables x s; s @@ -150,7 +148,6 @@ let table (name, v) = (* Partition (function: codepoint -> next state) *) let partitions = Hashtbl.create 31 -let partition_counter = ref 0 let get_partitions () = Hashtbl.fold (fun key x accu -> (x, key) :: accu) partitions [] @@ -158,11 +155,16 @@ let get_partitions () = let partition_name x = try Hashtbl.find partitions x with Not_found -> - incr partition_counter; - let s = Printf.sprintf "__sedlex_partition_%i" !partition_counter in + let s = + Printf.sprintf "__sedlex_partition_%i" (Hashtbl.length partitions + 1) + in Hashtbl.add partitions x s; s +let reset_state () = + Hashtbl.clear tables; + Hashtbl.clear partitions + (* We duplicate the body for the EOF (-1) case rather than creating an interior utility function. *) let partition (name, p) = @@ -480,7 +482,7 @@ let regexp_of_pattern env = in aux ~encoding:Ascii -let handle_sedlex_match ~env ~map_rhs match_expr = +let handle_sedlex_match_ ~env ~map_rhs match_expr = let lexbuf = match match_expr with | { pexp_desc = Pexp_match (lexbuf, _); _ } -> ( @@ -520,6 +522,9 @@ let handle_sedlex_match ~env ~map_rhs match_expr = let auto = Sedlex.compile (Array.map fst brs) in (gen_definition lexbuf auto cases error, auto) +let handle_sedlex_match match_expr = + handle_sedlex_match_ ~env:builtin_regexps ~map_rhs:Fun.id match_expr + let previous = ref [] let regexps = ref [] let should_set_cookies = ref false @@ -549,7 +554,7 @@ let mapper = (* match%sedlex with ... *) | [%expr [%sedlex [%e? { pexp_desc = Pexp_match _; _ } as match_expr]]] -> - fst (handle_sedlex_match ~env ~map_rhs:this#expression match_expr) + fst (handle_sedlex_match_ ~env ~map_rhs:this#expression match_expr) (* let = in — intercept when is a regexp *) | [%expr let [%p? { ppat_desc = Ppat_var { txt = name; _ }; _ }] = @@ -614,12 +619,14 @@ let post_handler cookies = Driver.Cookies.set cookies "sedlex.regexps" (pexp_extension ~loc ({ loc; txt = "regexps" }, PStr !regexps))) -let extensions = - [ - Extension.declare "sedlex" Extension.Context.expression - Ast_pattern.(single_expr_payload __) - (fun ~loc:_ ~path:_ expr -> mapper#expression expr); - ] +(* We register via ~impl:mapper#structure rather than ~extensions so that + partition and table definitions can be inserted at the top level. *) +(* let extensions = + [ + Extension.declare "sedlex" Extension.Context.expression + Ast_pattern.(single_expr_payload __) + (fun ~loc:_ ~path:_ expr -> mapper#expression expr); + ] *) let () = Driver.Cookies.add_handler pre_handler; diff --git a/src/syntax/ppx_sedlex.mli b/src/syntax/ppx_sedlex.mli new file mode 100644 index 0000000..5a9b6e8 --- /dev/null +++ b/src/syntax/ppx_sedlex.mli @@ -0,0 +1,25 @@ +(* The package sedlex is released under the terms of an MIT-like license. *) +(* See the attached LICENSE file. *) +(* Copyright 2005, 2013 by Alain Frisch and LexiFi. *) + +(** PPX rewriter for sedlex. + + Transforms [match%sedlex lexbuf with ...] into DFA-based lexer code. + Registration with ppxlib is done via side effects at module initialization. +*) + +(** {2 Internals exposed for the test PPX} + + The values below are not part of the public API. They are exposed so that + [ppx_sedlex_test] can invoke the code generator in isolation and inspect the + generated AST and DFA. *) + +(** Clear all internal tables (partitions, lookup tables). Must be called + between independent compilations in tests. *) +val reset_state : unit -> unit + +(** [handle_sedlex_match match_expr] compiles a [match%sedlex lexbuf with ...] + expression into generated DFA code using the built-in regexp environment. + Returns the generated expression and the DFA automaton. *) +val handle_sedlex_match : + Ppxlib.Parsetree.expression -> Ppxlib.Parsetree.expression * Sedlex.dfa diff --git a/test/ppx_test/ppx_sedlex_test.ml b/test/ppx_test/ppx_sedlex_test.ml index 1e1fb2f..16f4133 100644 --- a/test/ppx_test/ppx_sedlex_test.ml +++ b/test/ppx_test/ppx_sedlex_test.ml @@ -2,25 +2,13 @@ open Ppxlib module P = Sedlex_ppx.Ppx_sedlex module S = Sedlex_ppx.Sedlex -let reset_state () = - P.partition_counter := 0; - P.table_counter := 0; - Hashtbl.clear P.partitions; - Hashtbl.clear P.tables - -let clear_tables () = - Hashtbl.clear P.partitions; - Hashtbl.clear P.tables - let expand ~ctxt:_ expr = - reset_state (); + P.reset_state (); let loc = Location.none in - let code_expr, auto = - P.handle_sedlex_match ~env:P.builtin_regexps ~map_rhs:Fun.id expr - in + let code_expr, auto = P.handle_sedlex_match expr in let code_str = Pprintast.string_of_expression code_expr in let dot_str = S.dfa_to_dot auto in - clear_tables (); + P.reset_state (); [%expr print_string "DOT:\n"; print_string [%e Ast_builder.Default.estring ~loc dot_str]; From 0b952824df679ab70da9b60eb825fadb3f8f3477 Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Sun, 8 Feb 2026 01:54:39 +0100 Subject: [PATCH 21/24] Add basic support for named pattern --- CHANGES.md | 1 + src/lib/sedlexing.ml | 44 +- src/lib/sedlexing.mli | 27 + src/syntax/ppx_sedlex.ml | 255 ++++- src/syntax/sedlex.ml | 97 +- src/syntax/sedlex.mli | 16 +- test/basic.ml | 347 +++++++ test/codegen/test_gen.ml | 1482 +++++++++++++++++++++++++++++- test/codegen/test_realistic.ml | 227 +++++ test/ppx_test/ppx_sedlex_test.ml | 1 + 10 files changed, 2433 insertions(+), 64 deletions(-) create mode 100644 test/codegen/test_realistic.ml diff --git a/CHANGES.md b/CHANGES.md index 03958b3..d898603 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,6 @@ # unreleased - Support nested `let..in` for `[%sedlex.regexp?]` definitions +- Add support for named captured group (#177) # 3.7 (2025-10-06) - Update to unicode 17.0.0 diff --git a/src/lib/sedlexing.ml b/src/lib/sedlexing.ml index be475c7..736e4ab 100644 --- a/src/lib/sedlexing.ml +++ b/src/lib/sedlexing.ml @@ -79,6 +79,8 @@ type lexbuf = { mutable marked_val : int; mutable filename : string; mutable finished : bool; + mutable __private__mem : int array; + mutable __private__mem_saved : int array; } let chunk_size = 512 @@ -109,6 +111,8 @@ let empty_lexbuf bytes_per_char = marked_val = 0; filename = ""; finished = false; + __private__mem = [||]; + __private__mem_saved = [||]; } let dummy_uchar = Uchar.of_int 0 @@ -185,7 +189,15 @@ let refill lexbuf = lexbuf.marked_pos <- lexbuf.marked_pos - s; lexbuf.marked_bytes_pos <- lexbuf.marked_bytes_pos - s_bytes; lexbuf.start_pos <- 0; - lexbuf.start_bytes_pos <- 0 + lexbuf.start_bytes_pos <- 0; + for i = 0 to Array.length lexbuf.__private__mem - 1 do + if lexbuf.__private__mem.(i) >= 0 then + lexbuf.__private__mem.(i) <- lexbuf.__private__mem.(i) - s + done; + for i = 0 to Array.length lexbuf.__private__mem_saved - 1 do + if lexbuf.__private__mem_saved.(i) >= 0 then + lexbuf.__private__mem_saved.(i) <- lexbuf.__private__mem_saved.(i) - s + done end; let n = lexbuf.refill lexbuf.buf lexbuf.pos chunk_size in if n = 0 then lexbuf.finished <- true else lexbuf.len <- lexbuf.len + n @@ -215,7 +227,10 @@ let mark lexbuf i = lexbuf.marked_bol <- lexbuf.curr_bol; lexbuf.marked_bytes_bol <- lexbuf.curr_bytes_bol; lexbuf.marked_line <- lexbuf.curr_line; - lexbuf.marked_val <- i + lexbuf.marked_val <- i; + let n = Array.length lexbuf.__private__mem in + if n > 0 then + Array.blit lexbuf.__private__mem 0 lexbuf.__private__mem_saved 0 n let start lexbuf = lexbuf.start_pos <- lexbuf.pos; @@ -231,6 +246,9 @@ let backtrack lexbuf = lexbuf.curr_bol <- lexbuf.marked_bol; lexbuf.curr_bytes_bol <- lexbuf.marked_bytes_bol; lexbuf.curr_line <- lexbuf.marked_line; + let n = Array.length lexbuf.__private__mem in + if n > 0 then + Array.blit lexbuf.__private__mem_saved 0 lexbuf.__private__mem 0 n; lexbuf.marked_val let rollback lexbuf = @@ -240,6 +258,21 @@ let rollback lexbuf = lexbuf.curr_bytes_bol <- lexbuf.start_bytes_bol; lexbuf.curr_line <- lexbuf.start_line +let __private__init_mem lexbuf n = + if Array.length lexbuf.__private__mem < n then begin + lexbuf.__private__mem <- Array.make n (-1); + lexbuf.__private__mem_saved <- Array.make n (-1) + end + else begin + Array.fill lexbuf.__private__mem 0 n (-1); + Array.fill lexbuf.__private__mem_saved 0 n (-1) + end + +let __private__set_mem_pos lexbuf i = lexbuf.__private__mem.(i) <- lexbuf.pos +let __private__set_mem_value lexbuf i v = lexbuf.__private__mem.(i) <- -(v + 2) +let __private__mem_pos lexbuf i = lexbuf.__private__mem.(i) - lexbuf.start_pos +let __private__mem_value lexbuf i = -(lexbuf.__private__mem.(i) + 2) +let __private__num_mem_cells lexbuf = Array.length lexbuf.__private__mem let lexeme_start lexbuf = lexbuf.start_pos + lexbuf.offset let lexeme_bytes_start lexbuf = lexbuf.start_bytes_pos + lexbuf.bytes_offset let lexeme_end lexbuf = lexbuf.pos + lexbuf.offset @@ -256,6 +289,10 @@ let lexeme_bytes_length lexbuf = lexbuf.bytes_pos - lexbuf.start_bytes_pos let sub_lexeme lexbuf pos len = Array.sub lexbuf.buf (lexbuf.start_pos + pos) len +type submatch = { lexbuf : lexbuf; pos : int; len : int } + +let lexeme_of_submatch s = sub_lexeme s.lexbuf s.pos s.len + let lexeme lexbuf = Array.sub lexbuf.buf lexbuf.start_pos (lexbuf.pos - lexbuf.start_pos) @@ -423,6 +460,7 @@ module Latin1 = struct Bytes.to_string s let lexeme lexbuf = sub_lexeme lexbuf 0 (lexbuf.pos - lexbuf.start_pos) + let of_submatch s = sub_lexeme s.lexbuf s.pos s.len end module Utf8 = struct @@ -561,6 +599,7 @@ module Utf8 = struct Buffer.contents buf let lexeme lexbuf = sub_lexeme lexbuf 0 (lexbuf.pos - lexbuf.start_pos) + let of_submatch s = sub_lexeme s.lexbuf s.pos s.len end module Utf16 = struct @@ -659,4 +698,5 @@ module Utf16 = struct Buffer.contents buf let lexeme lb bo bom = sub_lexeme lb 0 (lb.pos - lb.start_pos) bo bom + let of_submatch s bo bom = sub_lexeme s.lexbuf s.pos s.len bo bom end diff --git a/src/lib/sedlexing.mli b/src/lib/sedlexing.mli index a2b62e4..ae9e6e5 100644 --- a/src/lib/sedlexing.mli +++ b/src/lib/sedlexing.mli @@ -166,6 +166,16 @@ val lexeme_char : lexbuf -> int -> Uchar.t matched by the regular expression as an array of Unicode code point. *) val sub_lexeme : lexbuf -> int -> int -> Uchar.t array +(** A submatch captures a sub-pattern matched by an [as] binding. It carries the + lexbuf and the position/length of the submatch (in code points, relative to + the start of the current token). Use the extraction functions below to + obtain the matched content in the desired encoding. *) +type submatch = { lexbuf : lexbuf; pos : int; len : int } + +(** [Sedlexing.lexeme_of_submatch s] returns the submatch as an array of Unicode + code points. *) +val lexeme_of_submatch : submatch -> Uchar.t array + (** [Sedlexing.rollback lexbuf] puts [lexbuf] back in its configuration before the last lexeme was matched. It is then possible to use another lexer to parse the same characters again. The other functions above in this section @@ -201,6 +211,13 @@ val next : lexbuf -> Uchar.t option and can be removed at any time. *) val __private__next_int : lexbuf -> int +val __private__init_mem : lexbuf -> int -> unit +val __private__set_mem_pos : lexbuf -> int -> unit +val __private__set_mem_value : lexbuf -> int -> int -> unit +val __private__mem_pos : lexbuf -> int -> int +val __private__mem_value : lexbuf -> int -> int +val __private__num_mem_cells : lexbuf -> int + (** [mark lexbuf i] stores the integer [i] in the internal slot. The backtrack position is set to the current position. *) val mark : lexbuf -> int -> unit @@ -247,6 +264,9 @@ module Latin1 : sig throws an exception [InvalidCodepoint] if it is not possible to encode the result in Latin1. *) val lexeme_char : lexbuf -> int -> char + + (** [of_submatch s] extracts the submatch as a Latin1 encoded string. *) + val of_submatch : submatch -> string end module Utf8 : sig @@ -265,6 +285,9 @@ module Utf8 : sig (** As [Sedlexing.sub_lexeme] with a result encoded in UTF-8. *) val sub_lexeme : lexbuf -> int -> int -> string + (** [of_submatch s] extracts the submatch as a UTF-8 encoded string. *) + val of_submatch : submatch -> string + module Helper : sig val width : char -> int val check_two : int -> int -> int @@ -298,4 +321,8 @@ module Utf16 : sig encoded in UTF-16 with byte order [bo] and starting with a BOM if [bom=true] *) val sub_lexeme : lexbuf -> int -> int -> byte_order -> bool -> string + + (** [of_submatch s bo bom] extracts the submatch as a UTF-16 encoded string + with byte order [bo] and starting with a BOM if [bom=true]. *) + val of_submatch : submatch -> byte_order -> bool -> string end diff --git a/src/syntax/ppx_sedlex.ml b/src/syntax/ppx_sedlex.ml index 90a07fb..79d727d 100644 --- a/src/syntax/ppx_sedlex.ml +++ b/src/syntax/ppx_sedlex.ml @@ -199,7 +199,7 @@ let best_final final = let state_fun state = Printf.sprintf "__sedlex_state_%i" state -let call_state lexbuf auto state = +let call_state lexbuf (auto : Sedlex.dfa) state = let { Sedlex.trans; finals } = auto.(state) in if Array.length trans = 0 then ( match best_final finals with @@ -207,13 +207,31 @@ let call_state lexbuf auto state = | None -> assert false) else appfun (state_fun state) [lexbuf] -let gen_state (lexbuf_name, lexbuf) auto i { Sedlex.trans; finals } = +let gen_tag_ops lexbuf (ops : Sedlex.tag_op list) cont = let loc = default_loc in - let partition = Array.map fst trans in + List.fold_right + (fun (op : Sedlex.tag_op) acc -> + match op with + | Set_position t -> + [%expr + Sedlexing.__private__set_mem_pos [%e lexbuf] [%e eint ~loc t]; + [%e acc]] + | Set_value (cell, value) -> + [%expr + Sedlexing.__private__set_mem_value [%e lexbuf] [%e eint ~loc cell] + [%e eint ~loc value]; + [%e acc]]) + ops cont + +let gen_state (lexbuf_name, lexbuf) (auto : Sedlex.dfa) i + { Sedlex.trans; finals } = + let loc = default_loc in + let partition = Array.map (fun (cs, _, _) -> cs) trans in let cases = Array.mapi - (fun i (_, j) -> - case ~lhs:(pint ~loc i) ~guard:None ~rhs:(call_state lexbuf auto j)) + (fun i (_, j, tags) -> + let rhs = gen_tag_ops lexbuf tags (call_state lexbuf auto j) in + case ~lhs:(pint ~loc i) ~guard:None ~rhs) trans in let cases = Array.to_list cases in @@ -246,33 +264,50 @@ let gen_state (lexbuf_name, lexbuf) auto i { Sedlex.trans; finals } = Sedlexing.mark [%e lexbuf] [%e eint ~loc i]; [%e body ()]] -let gen_recflag auto = +let gen_recflag (auto : Sedlex.dfa) = (* The generated function is not recursive if the transitions end in states with no further transitions. *) try Array.iter (fun { Sedlex.trans; _ } -> Array.iter - (fun (_, j) -> + (fun (_, j, _) -> if Array.length auto.(j).Sedlex.trans > 0 then raise Exit) trans) auto; Nonrecursive with Exit -> Recursive -let gen_definition ((_, lexbuf) as lexbuf_with_name) auto l error = +let gen_definition ((_, lexbuf) as lexbuf_with_name) + (compiled : Sedlex.compiled) l error = let loc = default_loc in + let auto = compiled.dfa in let cases = List.mapi (fun i (_, e) -> case ~lhs:(pint ~loc i) ~guard:None ~rhs:e) l in let states = Array.mapi (gen_state lexbuf_with_name auto) auto in let states = List.flatten (Array.to_list states) in + let start_expr = + if compiled.num_tags > 0 then ( + let init_mem = + [%expr + Sedlexing.__private__init_mem [%e lexbuf] + [%e eint ~loc compiled.num_tags]] + in + let set_init_tags = + gen_tag_ops lexbuf compiled.init_tags (appfun (state_fun 0) [lexbuf]) + in + pexp_sequence ~loc + [%expr Sedlexing.start [%e lexbuf]] + (pexp_sequence ~loc init_mem set_init_tags)) + else + pexp_sequence ~loc + [%expr Sedlexing.start [%e lexbuf]] + (appfun (state_fun 0) [lexbuf]) + in pexp_let ~loc (gen_recflag auto) states - (pexp_sequence ~loc - [%expr Sedlexing.start [%e lexbuf]] - (pexp_match ~loc - (appfun (state_fun 0) [lexbuf]) - (cases @ [case ~lhs:(ppat_any ~loc) ~guard:None ~rhs:error]))) + (pexp_match ~loc start_expr + (cases @ [case ~lhs:(ppat_any ~loc) ~guard:None ~rhs:error])) (* Lexer specification parser *) @@ -327,13 +362,92 @@ let rec repeat r = function | 0, m -> Sedlex.alt Sedlex.eps (Sedlex.seq r (repeat r (0, m - 1))) | n, m -> Sedlex.seq r (repeat r (n - 1, m - 1)) +type tag_info = { + name : string; + start_tag : int; + end_tag : int; + disc : (int * int) list; +} + +let gen_sub_lexeme lexbuf st et = + let loc = default_loc in + [%expr + let __s = Sedlexing.__private__mem_pos [%e lexbuf] [%e eint ~loc st] in + let __e = Sedlexing.__private__mem_pos [%e lexbuf] [%e eint ~loc et] in + { Sedlexing.lexbuf = [%e lexbuf]; pos = __s; len = __e - __s }] + +let gen_binding_code lexbuf (tag_info : tag_info list) action = + let loc = default_loc in + ignore loc; + if tag_info = [] then action + else ( + (* Group tag_info by variable name *) + let by_name = + let tbl = Hashtbl.create 8 in + let order = ref [] in + List.iter + (fun { name; start_tag; end_tag; disc } -> + if not (Hashtbl.mem tbl name) then order := name :: !order; + let existing = try Hashtbl.find tbl name with Not_found -> [] in + Hashtbl.replace tbl name (existing @ [(start_tag, end_tag, disc)])) + tag_info; + List.rev_map (fun name -> (name, Hashtbl.find tbl name)) !order + in + List.fold_right + (fun (name, entries) acc -> + match entries with + | [(st, et, _)] -> + [%expr + let [%p pvar ~loc name] = [%e gen_sub_lexeme lexbuf st et] in + [%e acc]] + | _ -> + let gen_disc_cond discs = + let check (cell, value) = + [%expr + Sedlexing.__private__mem_value [%e lexbuf] + [%e eint ~loc cell] + = [%e eint ~loc value]] + in + match discs with + | [] -> assert false + | [d] -> check d + | d :: ds -> + List.fold_left + (fun acc d -> [%expr [%e acc] && [%e check d]]) + (check d) ds + in + let rec gen_checks = function + | [(st, et, _)] -> gen_sub_lexeme lexbuf st et + | (st, et, discs) :: rest -> + if discs = [] then + failwith + "discriminator required for or-pattern with multiple \ + bindings"; + [%expr + if [%e gen_disc_cond discs] then + [%e gen_sub_lexeme lexbuf st et] + else [%e gen_checks rest]] + | [] -> assert false + in + [%expr + let [%p pvar ~loc name] = [%e gen_checks entries] in + [%e acc]]) + by_name action) + let regexp_of_pattern env = + let no_tags r = (r, ([] : tag_info list)) in + let reject_tags loc ctx (r, tags) = + if tags <> [] then err loc "'as' bindings are not supported inside %s" ctx; + r + in let rec char_pair_op func name ~encoding ~loc tuple = (* Construct something like Sub(a,b) *) match tuple with | Some { ppat_desc = Ppat_tuple [p0; p1]; _ } -> begin - match func (aux ~encoding p0) (aux ~encoding p1) with - | Some r -> r + let r0 = reject_tags p0.ppat_loc name (aux ~encoding p0) in + let r1 = reject_tags p1.ppat_loc name (aux ~encoding p1) in + match func r0 r1 with + | Some r -> no_tags r | None -> err loc "the %s operator can only applied to single-character length \ @@ -346,19 +460,49 @@ let regexp_of_pattern env = and aux ~encoding p = (* interpret one pattern node *) match p.ppat_desc with + (* name as x — named sub-match binding *) + | Ppat_alias (inner, { txt = name; _ }) -> + let r, tags = aux ~encoding inner in + let wrapped, start_tag, end_tag = Sedlex.bind r in + (wrapped, { name; start_tag; end_tag; disc = [] } :: tags) (* p1 | p2 — alternation *) - | Ppat_or (p1, p2) -> Sedlex.alt (aux ~encoding p1) (aux ~encoding p2) + | Ppat_or (p1, p2) -> + let r1, tags1 = aux ~encoding p1 in + let r2, tags2 = aux ~encoding p2 in + if tags1 <> [] || tags2 <> [] then begin + let names tags = + List.map (fun ti -> ti.name) tags |> List.sort_uniq String.compare + in + if names tags1 <> names tags2 then + err p.ppat_loc + "both sides of '|' must bind the same names with 'as'"; + let stamp disc_cell value tags = + List.map + (fun ti -> { ti with disc = (disc_cell, value) :: ti.disc }) + tags + in + let disc_cell = Sedlex.new_disc_cell () in + let r1w = Sedlex.bind_disc r1 disc_cell 0 in + let r2w = Sedlex.bind_disc r2 disc_cell 1 in + ( Sedlex.alt r1w r2w, + stamp disc_cell 0 tags1 @ stamp disc_cell 1 tags2 ) + end + else (Sedlex.alt r1 r2, tags1 @ tags2) (* (p1, p2, ...) — sequence *) | Ppat_tuple (p :: pl) -> List.fold_left - (fun r p -> Sedlex.seq r (aux ~encoding p)) + (fun (r, tags) p -> + let r', tags' = aux ~encoding p in + (Sedlex.seq r r', tags @ tags')) (aux ~encoding p) pl (* Star p — zero-or-more repetition *) | Ppat_construct ({ txt = Lident "Star"; _ }, Some (_, p)) -> - Sedlex.rep (aux ~encoding p) + let r = reject_tags p.ppat_loc "Star" (aux ~encoding p) in + no_tags (Sedlex.rep r) (* Plus p — one-or-more repetition *) | Ppat_construct ({ txt = Lident "Plus"; _ }, Some (_, p)) -> - Sedlex.plus (aux ~encoding p) + let r = reject_tags p.ppat_loc "Plus" (aux ~encoding p) in + no_tags (Sedlex.plus r) (* Utf8 p — switch to UTF-8 encoding *) | Ppat_construct ({ txt = Lident "Utf8"; _ }, Some (_, p)) -> aux ~encoding:Utf8 p @@ -386,11 +530,12 @@ let regexp_of_pattern env = ]; _; } ) ) -> begin + let r = reject_tags p0.ppat_loc "Rep" (aux ~encoding p0) in match (i1, i2) with | Pconst_integer (i1, _), Pconst_integer (i2, _) -> let i1 = int_of_string i1 in let i2 = int_of_string i2 in - if 0 <= i1 && i1 <= i2 then repeat (aux ~encoding p0) (i1, i2) + if 0 <= i1 && i1 <= i2 then no_tags (repeat r (i1, i2)) else err p.ppat_loc "Invalid range for Rep operator" | _ -> err p.ppat_loc "Rep must take an integer constant or interval" @@ -400,13 +545,15 @@ let regexp_of_pattern env = err p.ppat_loc "the Rep operator takes 2 arguments" (* Opt p — optional (zero or one) *) | Ppat_construct ({ txt = Lident "Opt"; _ }, Some (_, p)) -> - Sedlex.alt Sedlex.eps (aux ~encoding p) + let r = reject_tags p.ppat_loc "Opt" (aux ~encoding p) in + no_tags (Sedlex.alt Sedlex.eps r) (* Compl p — complement of a character class *) | Ppat_construct ({ txt = Lident "Compl"; _ }, arg) -> begin match arg with | Some (_, p0) -> begin - match Sedlex.compl (aux ~encoding p0) with - | Some r -> r + let r = reject_tags p0.ppat_loc "Compl" (aux ~encoding p0) in + match Sedlex.compl r with + | Some r -> no_tags r | None -> err p.ppat_loc "the Compl operator can only applied to a \ @@ -433,7 +580,7 @@ let regexp_of_pattern env = | Some (Pconst_string (s, _, _)) -> let l = rev_csets_of_string ~loc:p.ppat_loc ~encoding s in let chars = List.fold_left Cset.union Cset.empty l in - Sedlex.chars chars + no_tags (Sedlex.chars chars) | _ -> err p.ppat_loc "the Chars operator requires a string argument") (* 'a' .. 'z' or 0x41 .. 0x5a — character/codepoint range *) @@ -452,12 +599,14 @@ let regexp_of_pattern env = err p.ppat_loc "this pattern is not a valid %s interval regexp" (string_of_encoding encoding); - Sedlex.chars (Cset.interval (Char.code c1) (Char.code c2)) + no_tags + (Sedlex.chars (Cset.interval (Char.code c1) (Char.code c2))) | Pconst_integer (i1, _), Pconst_integer (i2, _) -> - Sedlex.chars - (Cset.interval - (codepoint (int_of_string i1)) - (codepoint (int_of_string i2))) + no_tags + (Sedlex.chars + (Cset.interval + (codepoint (int_of_string i1)) + (codepoint (int_of_string i2)))) | _ -> err p.ppat_loc "this pattern is not a valid interval regexp" end (* "string" or 'c' or 0x42 — literal string, char, or codepoint *) @@ -465,17 +614,19 @@ let regexp_of_pattern env = match const with | Pconst_string (s, _, _) -> let rev_l = rev_csets_of_string s ~loc:p.ppat_loc ~encoding in - List.fold_left - (fun acc cset -> Sedlex.seq (Sedlex.chars cset) acc) - Sedlex.eps rev_l - | Pconst_char c -> Sedlex.chars (char c) + no_tags + (List.fold_left + (fun acc cset -> Sedlex.seq (Sedlex.chars cset) acc) + Sedlex.eps rev_l) + | Pconst_char c -> no_tags (Sedlex.chars (char c)) | Pconst_integer (i, _) -> - Sedlex.chars (Cset.singleton (codepoint (int_of_string i))) + no_tags + (Sedlex.chars (Cset.singleton (codepoint (int_of_string i)))) | _ -> err p.ppat_loc "this pattern is not a valid regexp" end (* name — reference to a previously defined regexp *) | Ppat_var { txt = x; _ } -> begin - try StringMap.find x env + try no_tags (StringMap.find x env) with Not_found -> err p.ppat_loc "unbound regexp %s" x end | _ -> err p.ppat_loc "this pattern is not a valid regexp" @@ -509,18 +660,30 @@ let handle_sedlex_match_ ~env ~map_rhs match_expr = err p.ppat_loc "the last branch must be a catch-all error case" in let cases = List.rev (List.tl cases) in - let cases = + Sedlex.reset_tags (); + let cases_parsed = List.map (function | { pc_lhs = p; pc_rhs = e; pc_guard = None } -> - (regexp_of_pattern env p, map_rhs e) + let regexp, tag_info = regexp_of_pattern env p in + (regexp, tag_info, e) | { pc_guard = Some e; _ } -> err e.pexp_loc "'when' guards are not supported") cases in - let brs = Array.of_list cases in - let auto = Sedlex.compile (Array.map fst brs) in - (gen_definition lexbuf auto cases error, auto) + let compiled = + Sedlex.compile (Array.of_list (List.map (fun (r, _, _) -> r) cases_parsed)) + in + (* map_rhs is called after compile so that nested match%sedlex blocks + (which call reset_tags) cannot corrupt the outer tag counter. *) + let cases = + List.map + (fun (_, tag_info, e) -> + let action = gen_binding_code (snd lexbuf) tag_info (map_rhs e) in + ((), action)) + cases_parsed + in + (gen_definition lexbuf compiled cases error, compiled.dfa) let handle_sedlex_match match_expr = handle_sedlex_match_ ~env:builtin_regexps ~map_rhs:Fun.id match_expr @@ -538,14 +701,22 @@ let mapper = method eval_regexp_expr e = match e with (* [%sedlex.regexp? ] *) - | [%expr [%sedlex.regexp? [%p? p]]] -> Some (regexp_of_pattern env p) + | [%expr [%sedlex.regexp? [%p? p]]] -> + let r, tags = regexp_of_pattern env p in + if tags <> [] then + err p.ppat_loc + "'as' bindings are not allowed in regexp definitions"; + Some r (* let = [%sedlex.regexp? ] in *) | [%expr let [%p? { ppat_desc = Ppat_var { txt = name; _ }; _ }] = [%sedlex.regexp? [%p? p]] in [%e? body]] -> - let r = regexp_of_pattern env p in + let r, tags = regexp_of_pattern env p in + if tags <> [] then + err p.ppat_loc + "'as' bindings are not allowed in regexp definitions"; (this#define_regexp name r)#eval_regexp_expr body | _ -> None diff --git a/src/syntax/sedlex.ml b/src/syntax/sedlex.ml index 76f359c..63abf05 100644 --- a/src/syntax/sedlex.ml +++ b/src/syntax/sedlex.ml @@ -6,10 +6,13 @@ module Cset = Sedlex_cset (* NFA *) +type tag_op = Set_position of int | Set_value of int * int + type node = { id : int; mutable eps : node list; mutable trans : (Cset.t * node) list; + tag : tag_op option; } (* Compilation regexp -> NFA *) @@ -20,12 +23,16 @@ let cur_id = ref 0 let new_node () = incr cur_id; - { id = !cur_id; eps = []; trans = [] } + { id = !cur_id; eps = []; trans = []; tag = None } + +let new_tagged_node tag_op = + incr cur_id; + { id = !cur_id; eps = []; trans = []; tag = Some tag_op } let seq r1 r2 succ = r1 (r2 succ) let is_chars final = function - | { eps = []; trans = [(c, f)]; _ } when f == final -> Some c + | { eps = []; trans = [(c, f)]; tag = None; _ } when f == final -> Some c | _ -> None let chars c succ = @@ -72,6 +79,39 @@ let pair_op f r0 r1 = let subtract = pair_op Cset.difference let intersection = pair_op Cset.intersection +(* Tags for as-bindings *) + +let cur_tag = ref 0 +let reset_tags () = cur_tag := 0 + +let new_tag () = + let t = !cur_tag in + incr cur_tag; + t + +let bind r = + let start_tag = new_tag () in + let end_tag = new_tag () in + let wrapped succ = + let end_node = new_tagged_node (Set_position end_tag) in + end_node.eps <- [succ]; + let inner = r end_node in + let start_node = new_tagged_node (Set_position start_tag) in + start_node.eps <- [inner]; + start_node + in + (wrapped, start_tag, end_tag) + +let new_disc_cell () = new_tag () + +let bind_disc r cell value = + let wrapped succ = + let disc_node = new_tagged_node (Set_value (cell, value)) in + disc_node.eps <- [succ]; + r disc_node + in + wrapped + let compile_re re = let final = new_node () in (re final, final) @@ -81,10 +121,13 @@ let compile_re re = type state = node list (* A state of the DFA corresponds to a set of nodes in the NFA. *) -let rec add_node state node = - if List.memq node state then state else add_nodes (node :: state) node.eps +let rec add_node (state, tags) node = + if List.memq node state then (state, tags) + else ( + let tags = match node.tag with Some op -> op :: tags | None -> tags in + add_nodes (node :: state, tags) node.eps) -and add_nodes state nodes = List.fold_left add_node state nodes +and add_nodes acc nodes = List.fold_left add_node acc nodes let transition (state : state) = (* Merge transition with the same target *) @@ -109,16 +152,27 @@ let transition (state : state) = let _, t = List.fold_left split (Cset.empty, []) t in - (* Epsilon closure of targets *) - let t = List.map (fun (c, ns) -> (c, add_nodes [] ns)) t in + (* Epsilon closure of targets, collecting tags *) + let t = + List.map + (fun (c, ns) -> + let state, tags = add_nodes ([], []) ns in + (c, state, tags)) + t + in (* Canonical ordering *) let t = Array.of_list t in - Array.sort (fun (c1, _) (c2, _) -> compare c1 c2) t; + Array.sort (fun (c1, _, _) (c2, _, _) -> compare c1 c2) t; t -type dfa_state = { trans : (Cset.t * int) array; finals : bool array } +type dfa_state = { + trans : (Cset.t * int * tag_op list) array; + finals : bool array; +} + type dfa = dfa_state array +type compiled = { dfa : dfa; init_tags : tag_op list; num_tags : int } let compile rs = let rs = Array.map compile_re rs in @@ -132,16 +186,21 @@ let compile rs = incr counter; Hashtbl.add states state i; let trans = transition state in - let trans = Array.map (fun (p, t) -> (p, aux t)) trans in + let trans = Array.map (fun (p, t, tags) -> (p, aux t, tags)) trans in let finals = Array.map (fun (_, f) -> List.memq f state) rs in Hashtbl.add states_def i { trans; finals }; i in - let init = ref [] in + let init = ref ([], []) in Array.iter (fun (i, _) -> init := add_node !init i) rs; - let i = aux !init in + let init_state, init_tags = !init in + let i = aux init_state in assert (i = 0); - Array.init !counter (Hashtbl.find states_def) + { + dfa = Array.init !counter (Hashtbl.find states_def); + init_tags; + num_tags = !cur_tag; + } let cset_to_label cset = let escape_dot c = @@ -188,8 +247,18 @@ let dfa_to_dot dfa = " state%d [label=\"%d\\n[rule %s]\", shape=doublecircle];\n" i i (String.concat "," (List.map string_of_int rules))); Array.iter - (fun (cset, target) -> + (fun (cset, target, tags) -> let label = cset_to_label cset in + let tag_str op = + match op with + | Set_position t -> "t" ^ string_of_int t + | Set_value (c, v) -> + "d" ^ string_of_int c ^ "=" ^ string_of_int v + in + let label = + if tags = [] then label + else label ^ " {" ^ String.concat "," (List.map tag_str tags) ^ "}" + in bprintf buf " state%d -> state%d [label=\"%s\"];\n" i target label) trans) dfa; diff --git a/src/syntax/sedlex.mli b/src/syntax/sedlex.mli index 05a95de..c82484c 100644 --- a/src/syntax/sedlex.mli +++ b/src/syntax/sedlex.mli @@ -22,8 +22,20 @@ val intersection : regexp -> regexp -> regexp option (* If each argument is a single [chars] regexp, returns a regexp which matches the intersection set. Otherwise returns [None]. *) -type dfa_state = { trans : (Sedlex_cset.t * int) array; finals : bool array } +type tag_op = Set_position of int | Set_value of int * int + +val bind : regexp -> regexp * int * int +val new_disc_cell : unit -> int +val bind_disc : regexp -> int -> int -> regexp +val reset_tags : unit -> unit + +type dfa_state = { + trans : (Sedlex_cset.t * int * tag_op list) array; + finals : bool array; +} + type dfa = dfa_state array +type compiled = { dfa : dfa; init_tags : tag_op list; num_tags : int } -val compile : regexp array -> dfa +val compile : regexp array -> compiled val dfa_to_dot : dfa -> string diff --git a/test/basic.ml b/test/basic.ml index 93b44c8..e248c36 100644 --- a/test/basic.ml +++ b/test/basic.ml @@ -1128,3 +1128,350 @@ let%expect_test "nested_let_regexp_toplevel" = Hex: 0xDEAD Word: rest EOF |}] + +let letter = [%sedlex.regexp? 'a' .. 'z' | 'A' .. 'Z'] + +let%expect_test "as_bindings" = + (* Test 1: simple binding in middle of sequence *) + let buf = Sedlexing.Utf8.from_string "abc" in + (match%sedlex buf with + | 'a', ('b' as x), 'c' -> + Printf.printf "x=%s\n" (Sedlexing.Utf8.of_submatch x) + | _ -> assert false); + [%expect {| x=b |}]; + (* Test 2: multiple bindings *) + let buf = Sedlexing.Utf8.from_string "abc" in + (match%sedlex buf with + | ('a' as x), ('b' as y), 'c' -> + Printf.printf "x=%s y=%s\n" + (Sedlexing.Utf8.of_submatch x) + (Sedlexing.Utf8.of_submatch y) + | _ -> assert false); + [%expect {| x=a y=b |}]; + (* Test 3: binding with named regexp *) + let buf = Sedlexing.Utf8.from_string "123z" in + (match%sedlex buf with + | number, (letter as x) -> + Printf.printf "x=%s\n" (Sedlexing.Utf8.of_submatch x) + | _ -> assert false); + [%expect {| x=z |}]; + (* Test 4: whole-match binding *) + let buf = Sedlexing.Utf8.from_string "hello" in + (match%sedlex buf with + | Plus 'a' .. 'z' as x -> + Printf.printf "x=%s\n" (Sedlexing.Utf8.of_submatch x) + | _ -> assert false); + [%expect {| x=hello |}]; + (* Test 5: multi-char UTF-8 *) + let buf = Sedlexing.Utf8.from_string "a\xC3\xA9b" in + (match%sedlex buf with + | 'a', (any as x), 'b' -> + Printf.printf "x=%s\n" (Sedlexing.Utf8.of_submatch x) + | _ -> assert false); + [%expect {| x=é |}]; + (* Test 6: variable-length named segment *) + let buf = Sedlexing.Utf8.from_string {|"hello"|} in + (match%sedlex buf with + | '"', (Star (Compl '"') as content), '"' -> + Printf.printf "content=%s\n" (Sedlexing.Utf8.of_submatch content) + | _ -> assert false); + [%expect {| content=hello |}]; + (* Test 7: as binding wrapping an alternation *) + let buf = Sedlexing.Utf8.from_string "xb" in + (match%sedlex buf with + | 'x', (('a' | 'b') as x) -> + Printf.printf "x=%s\n" (Sedlexing.Utf8.of_submatch x) + | _ -> assert false); + [%expect {| x=b |}]; + (* Test 8: as binding in both branches of or-pattern *) + let buf = Sedlexing.Utf8.from_string "123" in + (match%sedlex buf with + | (number as x) | (Plus letter as x) -> + Printf.printf "x=%s\n" (Sedlexing.Utf8.of_submatch x) + | _ -> assert false); + [%expect {| x=123 |}]; + let buf = Sedlexing.Utf8.from_string "abc" in + (match%sedlex buf with + | (number as x) | (Plus letter as x) -> + Printf.printf "x=%s\n" (Sedlexing.Utf8.of_submatch x) + | _ -> assert false); + [%expect {| x=abc |}]; + (* Test 9: as binding inside or, in a sequence *) + let buf = Sedlexing.Utf8.from_string "<42>" in + (match%sedlex buf with + | '<', ((number as x) | (Plus letter as x)), '>' -> + Printf.printf "x=%s\n" (Sedlexing.Utf8.of_submatch x) + | _ -> assert false); + [%expect {| x=42 |}]; + let buf = Sedlexing.Utf8.from_string "" in + (match%sedlex buf with + | '<', ((number as x) | (Plus letter as x)), '>' -> + Printf.printf "x=%s\n" (Sedlexing.Utf8.of_submatch x) + | _ -> assert false); + [%expect {| x=hello |}]; + (* Test 10: or-pattern with shared prefix requiring discriminator tags *) + let buf = Sedlexing.Utf8.from_string "abcdef" in + (match%sedlex buf with + | ("abc" as x), "def" | "a", ("bcd" as x), "ey" -> + Printf.printf "x=%s\n" (Sedlexing.Utf8.of_submatch x) + | _ -> assert false); + [%expect {| x=abc |}]; + let buf = Sedlexing.Utf8.from_string "abcdey" in + (match%sedlex buf with + | ("abc" as x), "def" | "a", ("bcd" as x), "ey" -> + Printf.printf "x=%s\n" (Sedlexing.Utf8.of_submatch x) + | _ -> assert false); + [%expect {| x=bcd |}]; + (* Test 10b: 3-way or-pattern reuses single disc cell *) + let buf = Sedlexing.Utf8.from_string "abcd" in + (match%sedlex buf with + | ("ab" as x), "cd" | ("a" as x), "bce" | ("abc" as x), "df" -> + Printf.printf "x=%s\n" (Sedlexing.Utf8.of_submatch x) + | _ -> assert false); + [%expect {| x=ab |}]; + let buf = Sedlexing.Utf8.from_string "abce" in + (match%sedlex buf with + | ("ab" as x), "cd" | ("a" as x), "bce" | ("abc" as x), "df" -> + Printf.printf "x=%s\n" (Sedlexing.Utf8.of_submatch x) + | _ -> assert false); + [%expect {| x=a |}]; + let buf = Sedlexing.Utf8.from_string "abcdf" in + (match%sedlex buf with + | ("ab" as x), "cd" | ("a" as x), "bce" | ("abc" as x), "df" -> + Printf.printf "x=%s\n" (Sedlexing.Utf8.of_submatch x) + | _ -> assert false); + [%expect {| x=abc |}]; + (* Test 10c: or-pattern with inner or + extra binding without disc *) + let buf = Sedlexing.Utf8.from_string "aef" in + (match%sedlex buf with + | (("a" as x) | ("b" as x)), ("ef" as y) | ("cd" as x), ("gh" as y) -> + Printf.printf "x=%s y=%s\n" + (Sedlexing.Utf8.of_submatch x) + (Sedlexing.Utf8.of_submatch y) + | _ -> assert false); + [%expect {| x=a y=ef |}]; + let buf = Sedlexing.Utf8.from_string "bef" in + (match%sedlex buf with + | (("a" as x) | ("b" as x)), ("ef" as y) | ("cd" as x), ("gh" as y) -> + Printf.printf "x=%s y=%s\n" + (Sedlexing.Utf8.of_submatch x) + (Sedlexing.Utf8.of_submatch y) + | _ -> assert false); + [%expect {| x=b y=ef |}]; + let buf = Sedlexing.Utf8.from_string "cdgh" in + (match%sedlex buf with + | (("a" as x) | ("b" as x)), ("ef" as y) | ("cd" as x), ("gh" as y) -> + Printf.printf "x=%s y=%s\n" + (Sedlexing.Utf8.of_submatch x) + (Sedlexing.Utf8.of_submatch y) + | _ -> assert false); + [%expect {| x=cd y=gh |}]; + (* Test 10d: nested or-patterns on both sides *) + let buf = Sedlexing.Utf8.from_string "aef" in + (match%sedlex buf with + | (("a" as x) | ("b" as x)), ("ef" as y) + | (("c" as x) | ("d" as x)), ("gh" as y) -> + Printf.printf "x=%s y=%s\n" + (Sedlexing.Utf8.of_submatch x) + (Sedlexing.Utf8.of_submatch y) + | _ -> assert false); + [%expect {| x=a y=ef |}]; + let buf = Sedlexing.Utf8.from_string "bef" in + (match%sedlex buf with + | (("a" as x) | ("b" as x)), ("ef" as y) + | (("c" as x) | ("d" as x)), ("gh" as y) -> + Printf.printf "x=%s y=%s\n" + (Sedlexing.Utf8.of_submatch x) + (Sedlexing.Utf8.of_submatch y) + | _ -> assert false); + [%expect {| x=b y=ef |}]; + let buf = Sedlexing.Utf8.from_string "cgh" in + (match%sedlex buf with + | (("a" as x) | ("b" as x)), ("ef" as y) + | (("c" as x) | ("d" as x)), ("gh" as y) -> + Printf.printf "x=%s y=%s\n" + (Sedlexing.Utf8.of_submatch x) + (Sedlexing.Utf8.of_submatch y) + | _ -> assert false); + [%expect {| x=c y=gh |}]; + let buf = Sedlexing.Utf8.from_string "dgh" in + (match%sedlex buf with + | (("a" as x) | ("b" as x)), ("ef" as y) + | (("c" as x) | ("d" as x)), ("gh" as y) -> + Printf.printf "x=%s y=%s\n" + (Sedlexing.Utf8.of_submatch x) + (Sedlexing.Utf8.of_submatch y) + | _ -> assert false); + [%expect {| x=d y=gh |}]; + (* Test 11: Set_prev with backtracking (Opt at end) *) + let buf = Sedlexing.Utf8.from_string "aabba" in + (match%sedlex buf with + | (Plus 'a' as x), ((Plus 'b', Opt 'a') as y) -> + Printf.printf "x=%s y=%s\n" + (Sedlexing.Utf8.of_submatch x) + (Sedlexing.Utf8.of_submatch y) + | _ -> assert false); + [%expect {| x=aa y=bba |}]; + let buf = Sedlexing.Utf8.from_string "aabb" in + (match%sedlex buf with + | (Plus 'a' as x), ((Plus 'b', Opt 'a') as y) -> + Printf.printf "x=%s y=%s\n" + (Sedlexing.Utf8.of_submatch x) + (Sedlexing.Utf8.of_submatch y) + | _ -> assert false); + [%expect {| x=aa y=bb |}]; + let buf = Sedlexing.Utf8.from_string "aba" in + (match%sedlex buf with + | (Plus 'a' as x), ((Plus 'b', Opt 'a') as y) -> + Printf.printf "x=%s y=%s\n" + (Sedlexing.Utf8.of_submatch x) + (Sedlexing.Utf8.of_submatch y) + | _ -> assert false); + [%expect {| x=a y=ba |}] + +let num_mem buf = Sedlexing.__private__num_mem_cells buf + +let%expect_test "as_bindings_num_mem_cells" = + (* No bindings: 0 cells *) + let buf = Sedlexing.Utf8.from_string "abc" in + (match%sedlex buf with + | "abc" -> Printf.printf "mem_cells=%d\n" (num_mem buf) + | _ -> assert false); + [%expect {| mem_cells=0 |}]; + (* Single binding in tuple: 2 cells (start + end tags) *) + let buf = Sedlexing.Utf8.from_string "abc" in + (match%sedlex buf with + | 'a', ('b' as _x), 'c' -> Printf.printf "mem_cells=%d\n" (num_mem buf) + | _ -> assert false); + [%expect {| mem_cells=2 |}]; + (* Two bindings in tuple: 4 cells (2 tags each) *) + let buf = Sedlexing.Utf8.from_string "abc" in + (match%sedlex buf with + | ('a' as _x), ('b' as _y), 'c' -> + Printf.printf "mem_cells=%d\n" (num_mem buf) + | _ -> assert false); + [%expect {| mem_cells=4 |}]; + (* Whole-match binding: 2 cells (start + end tags) *) + let buf = Sedlexing.Utf8.from_string "hello" in + (match%sedlex buf with + | Plus 'a' .. 'z' as _x -> Printf.printf "mem_cells=%d\n" (num_mem buf) + | _ -> assert false); + [%expect {| mem_cells=2 |}]; + (* as wrapping alternation in tuple: 2 cells (start + end tags) *) + let buf = Sedlexing.Utf8.from_string "xb" in + (match%sedlex buf with + | 'x', (('a' | 'b') as _x) -> Printf.printf "mem_cells=%d\n" (num_mem buf) + | _ -> assert false); + [%expect {| mem_cells=2 |}]; + (* Or-pattern: 5 cells (2 binding tags per branch + 1 disc cell) *) + let buf = Sedlexing.Utf8.from_string "123" in + (match%sedlex buf with + | (number as _x) | (Plus letter as _x) -> + Printf.printf "mem_cells=%d\n" (num_mem buf) + | _ -> assert false); + [%expect {| mem_cells=5 |}]; + (* Shared-prefix or-pattern: 5 cells (2 binding tags per branch + 1 disc cell) *) + let buf = Sedlexing.Utf8.from_string "abcdef" in + (match%sedlex buf with + | ("abc" as _x), "def" | "a", ("bcd" as _x), "ey" -> + Printf.printf "mem_cells=%d\n" (num_mem buf) + | _ -> assert false); + [%expect {| mem_cells=5 |}] + +let%expect_test "as_bindings_multi_rule_mem_cells" = + (* All rules in a match%sedlex share one pool of memory cells. + The total is the sum of tags across ALL rules, not just the matched one. *) + + (* One rule with binding in tuple, one without: 2 cells *) + let buf = Sedlexing.Utf8.from_string "ab" in + (match%sedlex buf with + | 'a', ('b' as _x) -> Printf.printf "mem_cells=%d\n" (num_mem buf) + | "cd" -> Printf.printf "mem_cells=%d\n" (num_mem buf) + | _ -> assert false); + [%expect {| mem_cells=2 |}]; + (* Even when the no-binding rule matches, cells are still allocated *) + let buf = Sedlexing.Utf8.from_string "cd" in + (match%sedlex buf with + | 'a', ('b' as _x) -> Printf.printf "mem_cells=%d\n" (num_mem buf) + | "cd" -> Printf.printf "mem_cells=%d\n" (num_mem buf) + | _ -> assert false); + [%expect {| mem_cells=2 |}]; + (* Two rules, each with one binding: 4 cells (2 per rule) *) + let buf = Sedlexing.Utf8.from_string "ab" in + (match%sedlex buf with + | 'a', ('b' as _x) -> Printf.printf "mem_cells=%d\n" (num_mem buf) + | 'c', ('d' as _y) -> Printf.printf "mem_cells=%d\n" (num_mem buf) + | _ -> assert false); + [%expect {| mem_cells=4 |}]; + (* Three rules with one binding each: 6 cells (2 per rule) *) + let buf = Sedlexing.Utf8.from_string "ab" in + (match%sedlex buf with + | 'a', ('b' as _x) -> Printf.printf "mem_cells=%d\n" (num_mem buf) + | 'c', ('d' as _y) -> Printf.printf "mem_cells=%d\n" (num_mem buf) + | 'e', ('f' as _z) -> Printf.printf "mem_cells=%d\n" (num_mem buf) + | _ -> assert false); + [%expect {| mem_cells=6 |}]; + (* Whole-match + tuple binding: 4 cells (2 per rule) *) + let buf = Sedlexing.Utf8.from_string "hello" in + (match%sedlex buf with + | Plus 'a' .. 'z' as _x -> Printf.printf "mem_cells=%d\n" (num_mem buf) + | '0', (number as _y) -> Printf.printf "mem_cells=%d\n" (num_mem buf) + | _ -> assert false); + [%expect {| mem_cells=4 |}]; + (* Or-pattern rule + tuple binding rule: 7 cells (5 + 2) *) + let buf = Sedlexing.Utf8.from_string "123" in + (match%sedlex buf with + | (number as _x) | (Plus letter as _x) -> + Printf.printf "mem_cells=%d\n" (num_mem buf) + | '{', (Star (Compl '}') as _y), '}' -> + Printf.printf "mem_cells=%d\n" (num_mem buf) + | _ -> assert false); + [%expect {| mem_cells=7 |}]; + (* Two or-pattern rules: 10 cells (5 per rule) *) + let buf = Sedlexing.Utf8.from_string "123" in + (match%sedlex buf with + | (number as _x) | (Plus letter as _x) -> + Printf.printf "mem_cells=%d\n" (num_mem buf) + | ('<' as _y) | ('>' as _y) -> Printf.printf "mem_cells=%d\n" (num_mem buf) + | _ -> assert false); + [%expect {| mem_cells=10 |}] + +let%expect_test "as_bindings_nested_sedlex" = + (* Regression: a nested match%sedlex in a case RHS must not reset the + outer match's tag counter, which would cause init_mem/set_mem to be + dropped and as-bindings to read uninitialized memory cells. *) + let buf = Sedlexing.Utf8.from_string "abc" in + (match%sedlex buf with + | 'a', ('b' as x), 'c' -> + Printf.printf "x=%s\n" (Sedlexing.Utf8.of_submatch x) + | Star any -> ( + (* Nested match%sedlex in a case RHS *) + Sedlexing.rollback buf; + match%sedlex buf with + | Plus 'a' .. 'z' -> Printf.printf "word\n" + | _ -> Printf.printf "other\n") + | _ -> assert false); + [%expect {| x=b |}]; + (* Same but the nested match comes in a case BEFORE the as-binding rule *) + let buf = Sedlexing.Utf8.from_string "abc" in + (match%sedlex buf with + | '0' .. '9' -> ( + Sedlexing.rollback buf; + match%sedlex buf with + | '0' .. '9' -> Printf.printf "digit\n" + | _ -> Printf.printf "other\n") + | Plus 'a' .. 'z' as x -> + Printf.printf "x=%s\n" (Sedlexing.Utf8.of_submatch x) + | _ -> assert false); + [%expect {| x=abc |}]; + (* Verify the outer match still allocates memory cells *) + let buf = Sedlexing.Utf8.from_string "abc" in + (match%sedlex buf with + | '0' .. '9' -> ( + Sedlexing.rollback buf; + match%sedlex buf with + | '0' .. '9' -> Printf.printf "digit\n" + | _ -> Printf.printf "other\n") + | Plus 'a' .. 'z' as _x -> Printf.printf "mem_cells=%d\n" (num_mem buf) + | _ -> assert false); + [%expect {| mem_cells=2 |}] diff --git a/test/codegen/test_gen.ml b/test/codegen/test_gen.ml index 1166a81..46ae5bf 100644 --- a/test/codegen/test_gen.ml +++ b/test/codegen/test_gen.ml @@ -33,7 +33,7 @@ let%expect_test "simple string match" = match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with | 0 -> 0 | _ -> Sedlexing.backtrack buf in - Sedlexing.start buf; (match __sedlex_state_0 buf with | 0 -> () | _ -> ()) + match Sedlexing.start buf; __sedlex_state_0 buf with | 0 -> () | _ -> () |}] let%expect_test "character class" = @@ -63,7 +63,7 @@ let%expect_test "character class" = (match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with | 0 -> __sedlex_state_1 buf | _ -> Sedlexing.backtrack buf) in - Sedlexing.start buf; (match __sedlex_state_0 buf with | 0 -> () | _ -> ()) + match Sedlexing.start buf; __sedlex_state_0 buf with | 0 -> () | _ -> () |}] let%expect_test "multi-rule" = @@ -115,6 +115,1480 @@ let%expect_test "multi-rule" = match __sedlex_partition_4 (Sedlexing.__private__next_int buf) with | 0 -> 1 | _ -> Sedlexing.backtrack buf in - Sedlexing.start buf; - (match __sedlex_state_0 buf with | 0 -> () | 1 -> () | 2 -> () | _ -> ()) + match Sedlexing.start buf; __sedlex_state_0 buf with + | 0 -> () + | 1 -> () + | 2 -> () + | _ -> () + |}] + +let%expect_test "as binding: simple" = + (match%sedlex_test buf with 'a', ('b' as x), 'c' -> ignore x | _ -> ()); + [%expect + {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'a' {t0}"]; + state1 [label="1"]; + state1 -> state2 [label="'b' {t1}"]; + state2 [label="2"]; + state2 -> state3 [label="'c'"]; + state3 [label="3\n[rule 0]", shape=doublecircle]; + } + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 0; __sedlex_state_1 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 1; __sedlex_state_2 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_2 buf = + match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with + | 0 -> 0 + | _ -> Sedlexing.backtrack buf in + match Sedlexing.start buf; + Sedlexing.__private__init_mem buf 2; + __sedlex_state_0 buf + with + | 0 -> + let x = + let __s = Sedlexing.__private__mem_pos buf 0 in + let __e = Sedlexing.__private__mem_pos buf 1 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + ignore x + | _ -> () + |}] + +let%expect_test "as binding: whole-match shortcut" = + (match%sedlex_test buf with Plus 'a' .. 'z' as x -> ignore x | _ -> ()); + [%expect + {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'a'-'z' {t1}"]; + state1 [label="1\n[rule 0]", shape=doublecircle]; + state1 -> state1 [label="'a'-'z' {t1}"]; + } + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 1; __sedlex_state_1 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + Sedlexing.mark buf 0; + (match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 1; __sedlex_state_1 buf) + | _ -> Sedlexing.backtrack buf) in + match Sedlexing.start buf; + Sedlexing.__private__init_mem buf 2; + Sedlexing.__private__set_mem_pos buf 0; + __sedlex_state_0 buf + with + | 0 -> + let x = + let __s = Sedlexing.__private__mem_pos buf 0 in + let __e = Sedlexing.__private__mem_pos buf 1 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + ignore x + | _ -> () + |}] + +let%expect_test "as binding: multiple bindings" = + (match%sedlex_test buf with + | ('a' as x), ('b' as y), 'c' -> ignore (x, y) + | _ -> ()); + [%expect + {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'a' {t2,t1}"]; + state1 [label="1"]; + state1 -> state2 [label="'b' {t3}"]; + state2 [label="2"]; + state2 -> state3 [label="'c'"]; + state3 [label="3\n[rule 0]", shape=doublecircle]; + } + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_pos buf 2; + Sedlexing.__private__set_mem_pos buf 1; + __sedlex_state_1 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 3; __sedlex_state_2 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_2 buf = + match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with + | 0 -> 0 + | _ -> Sedlexing.backtrack buf in + match Sedlexing.start buf; + Sedlexing.__private__init_mem buf 4; + Sedlexing.__private__set_mem_pos buf 0; + __sedlex_state_0 buf + with + | 0 -> + let x = + let __s = Sedlexing.__private__mem_pos buf 0 in + let __e = Sedlexing.__private__mem_pos buf 1 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + let y = + let __s = Sedlexing.__private__mem_pos buf 2 in + let __e = Sedlexing.__private__mem_pos buf 3 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + ignore (x, y) + | _ -> () + |}] + +let%expect_test "as binding: or-pattern with discriminator" = + (match%sedlex_test buf with + | (Plus '0' .. '9' as x) | (Plus 'a' .. 'z' as x) -> ignore x + | _ -> ()); + [%expect + {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'0'-'9' {d4=0,t1}"]; + state0 -> state2 [label="'a'-'z' {d4=1,t3}"]; + state1 [label="1\n[rule 0]", shape=doublecircle]; + state1 -> state1 [label="'0'-'9' {d4=0,t1}"]; + state2 [label="2\n[rule 0]", shape=doublecircle]; + state2 -> state2 [label="'a'-'z' {d4=1,t3}"]; + } + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_value buf 4 0; + Sedlexing.__private__set_mem_pos buf 1; + __sedlex_state_1 buf) + | 1 -> + (Sedlexing.__private__set_mem_value buf 4 1; + Sedlexing.__private__set_mem_pos buf 3; + __sedlex_state_2 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + Sedlexing.mark buf 0; + (match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_value buf 4 0; + Sedlexing.__private__set_mem_pos buf 1; + __sedlex_state_1 buf) + | _ -> Sedlexing.backtrack buf) + and __sedlex_state_2 buf = + Sedlexing.mark buf 0; + (match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_value buf 4 1; + Sedlexing.__private__set_mem_pos buf 3; + __sedlex_state_2 buf) + | _ -> Sedlexing.backtrack buf) in + match Sedlexing.start buf; + Sedlexing.__private__init_mem buf 5; + Sedlexing.__private__set_mem_pos buf 2; + Sedlexing.__private__set_mem_pos buf 0; + __sedlex_state_0 buf + with + | 0 -> + let x = + if (Sedlexing.__private__mem_value buf 4) = 0 + then + let __s = Sedlexing.__private__mem_pos buf 0 in + let __e = Sedlexing.__private__mem_pos buf 1 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } + else + (let __s = Sedlexing.__private__mem_pos buf 2 in + let __e = Sedlexing.__private__mem_pos buf 3 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) }) in + ignore x + | _ -> () + |}] + +let%expect_test "as binding: shared prefix or-pattern" = + (match%sedlex_test buf with + | ("abc" as x), "def" | "a", ("bcd" as x), "ey" -> ignore x + | _ -> ()); + [%expect + {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'a' {t2}"]; + state1 [label="1"]; + state1 -> state2 [label="'b'"]; + state2 [label="2"]; + state2 -> state3 [label="'c' {t1}"]; + state3 [label="3"]; + state3 -> state4 [label="'d' {t3}"]; + state4 [label="4"]; + state4 -> state5 [label="'e'"]; + state5 [label="5"]; + state5 -> state6 [label="'f' {d4=0}"]; + state5 -> state7 [label="'y' {d4=1}"]; + state6 [label="6\n[rule 0]", shape=doublecircle]; + state7 [label="7\n[rule 0]", shape=doublecircle]; + } + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 2; __sedlex_state_1 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> __sedlex_state_2 buf + | _ -> Sedlexing.backtrack buf + and __sedlex_state_2 buf = + match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 1; __sedlex_state_3 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_3 buf = + match __sedlex_partition_4 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 3; __sedlex_state_4 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_4 buf = + match __sedlex_partition_5 (Sedlexing.__private__next_int buf) with + | 0 -> __sedlex_state_5 buf + | _ -> Sedlexing.backtrack buf + and __sedlex_state_5 buf = + match __sedlex_partition_6 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_value buf 4 0; 0) + | 1 -> (Sedlexing.__private__set_mem_value buf 4 1; 0) + | _ -> Sedlexing.backtrack buf in + match Sedlexing.start buf; + Sedlexing.__private__init_mem buf 5; + Sedlexing.__private__set_mem_pos buf 0; + __sedlex_state_0 buf + with + | 0 -> + let x = + if (Sedlexing.__private__mem_value buf 4) = 0 + then + let __s = Sedlexing.__private__mem_pos buf 0 in + let __e = Sedlexing.__private__mem_pos buf 1 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } + else + (let __s = Sedlexing.__private__mem_pos buf 2 in + let __e = Sedlexing.__private__mem_pos buf 3 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) }) in + ignore x + | _ -> () + |}] + +let%expect_test "as binding: 3-way or reuses disc cell" = + (match%sedlex_test buf with + | ("ab" as x), "cd" | ("a" as x), "bce" | ("abc" as x), "df" -> ignore x + | _ -> ()); + [%expect + {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'a' {t3}"]; + state1 [label="1"]; + state1 -> state2 [label="'b' {t1}"]; + state2 [label="2"]; + state2 -> state3 [label="'c' {t6}"]; + state3 [label="3"]; + state3 -> state4 [label="'d' {d7=0,d4=0}"]; + state3 -> state6 [label="'e' {d7=0,d4=1}"]; + state4 [label="4\n[rule 0]", shape=doublecircle]; + state4 -> state5 [label="'f' {d7=1}"]; + state5 [label="5\n[rule 0]", shape=doublecircle]; + state6 [label="6\n[rule 0]", shape=doublecircle]; + } + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 3; __sedlex_state_1 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 1; __sedlex_state_2 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_2 buf = + match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 6; __sedlex_state_3 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_3 buf = + match __sedlex_partition_4 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_value buf 7 0; + Sedlexing.__private__set_mem_value buf 4 0; + __sedlex_state_4 buf) + | 1 -> + (Sedlexing.__private__set_mem_value buf 7 0; + Sedlexing.__private__set_mem_value buf 4 1; + 0) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_4 buf = + Sedlexing.mark buf 0; + (match __sedlex_partition_5 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_value buf 7 1; 0) + | _ -> Sedlexing.backtrack buf) in + match Sedlexing.start buf; + Sedlexing.__private__init_mem buf 8; + Sedlexing.__private__set_mem_pos buf 5; + Sedlexing.__private__set_mem_pos buf 2; + Sedlexing.__private__set_mem_pos buf 0; + __sedlex_state_0 buf + with + | 0 -> + let x = + if + ((Sedlexing.__private__mem_value buf 7) = 0) && + ((Sedlexing.__private__mem_value buf 4) = 0) + then + let __s = Sedlexing.__private__mem_pos buf 0 in + let __e = Sedlexing.__private__mem_pos buf 1 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } + else + if + ((Sedlexing.__private__mem_value buf 7) = 0) && + ((Sedlexing.__private__mem_value buf 4) = 1) + then + (let __s = Sedlexing.__private__mem_pos buf 2 in + let __e = Sedlexing.__private__mem_pos buf 3 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) }) + else + (let __s = Sedlexing.__private__mem_pos buf 5 in + let __e = Sedlexing.__private__mem_pos buf 6 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) }) in + ignore x + | _ -> () + |}] + +let%expect_test "as binding: multi-rule" = + (match%sedlex_test buf with + | 'a', ('b' as x) -> ignore x + | "cd" -> () + | _ -> ()); + [%expect + {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'a' {t0}"]; + state0 -> state3 [label="'c'"]; + state1 [label="1"]; + state1 -> state2 [label="'b' {t1}"]; + state2 [label="2\n[rule 0]", shape=doublecircle]; + state3 [label="3"]; + state3 -> state4 [label="'d'"]; + state4 [label="4\n[rule 1]", shape=doublecircle]; + } + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 0; __sedlex_state_1 buf) + | 1 -> __sedlex_state_3 buf + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 1; 0) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_3 buf = + match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with + | 0 -> 1 + | _ -> Sedlexing.backtrack buf in + match Sedlexing.start buf; + Sedlexing.__private__init_mem buf 2; + __sedlex_state_0 buf + with + | 0 -> + let x = + let __s = Sedlexing.__private__mem_pos buf 0 in + let __e = Sedlexing.__private__mem_pos buf 1 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + ignore x + | 1 -> () + | _ -> () + |}] + +let%expect_test "as binding: wrapping alternation" = + (match%sedlex_test buf with 'x', (('a' | 'b') as y) -> ignore y | _ -> ()); + [%expect + {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'x' {t0}"]; + state1 [label="1"]; + state1 -> state2 [label="'a'-'b' {t1}"]; + state2 [label="2\n[rule 0]", shape=doublecircle]; + } + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 0; __sedlex_state_1 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 1; 0) + | _ -> Sedlexing.backtrack buf in + match Sedlexing.start buf; + Sedlexing.__private__init_mem buf 2; + __sedlex_state_0 buf + with + | 0 -> + let y = + let __s = Sedlexing.__private__mem_pos buf 0 in + let __e = Sedlexing.__private__mem_pos buf 1 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + ignore y + | _ -> () + |}] + +(* === Optimization tracking tests === + Each test below targets a specific optimization. + As optimizations are implemented, the expected output will improve + (fewer tags, fewer set_mem calls, etc.). *) + +(* Optimization 1: Element-length (Offset_from_tag) + When neither prefix nor suffix length is known but the element itself + has a fixed codepoint length, only 1 tag should be needed instead of 2. + Current: init_mem 2 (start + end tags). + Goal: init_mem 1, with end computed as start_tag + 1. *) +let%expect_test "optim: element-length (Offset_from_tag)" = + (match%sedlex_test buf with + | Plus 'a', ('b' as x), Plus 'c' -> ignore x + | _ -> ()); + [%expect + {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'a' {t0}"]; + state1 [label="1"]; + state1 -> state1 [label="'a' {t0}"]; + state1 -> state2 [label="'b' {t1}"]; + state2 [label="2"]; + state2 -> state3 [label="'c'"]; + state3 [label="3\n[rule 0]", shape=doublecircle]; + state3 -> state3 [label="'c'"]; + } + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 0; __sedlex_state_1 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 0; __sedlex_state_1 buf) + | 1 -> (Sedlexing.__private__set_mem_pos buf 1; __sedlex_state_2 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_2 buf = + match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with + | 0 -> __sedlex_state_3 buf + | _ -> Sedlexing.backtrack buf + and __sedlex_state_3 buf = + Sedlexing.mark buf 0; + (match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with + | 0 -> __sedlex_state_3 buf + | _ -> Sedlexing.backtrack buf) in + match Sedlexing.start buf; + Sedlexing.__private__init_mem buf 2; + __sedlex_state_0 buf + with + | 0 -> + let x = + let __s = Sedlexing.__private__mem_pos buf 0 in + let __e = Sedlexing.__private__mem_pos buf 1 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + ignore x + | _ -> () + |}] + +(* Optimization 2: Or-pattern offset propagation + When an or-pattern is at the top level of a rule, as-bindings + covering the whole branch should get Start_plus 0 / End_minus 0 + without allocating any tags. + Current: init_mem 5 (2 binding tags per branch + 1 disc cell). + Goal: init_mem 0 (or no init_mem at all). *) +let%expect_test "optim: or-pattern offset propagation" = + (match%sedlex_test buf with + | (Plus 'a' as x) | (Plus 'b' as x) -> ignore x + | _ -> ()); + [%expect + {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'a' {d4=0,t1}"]; + state0 -> state2 [label="'b' {d4=1,t3}"]; + state1 [label="1\n[rule 0]", shape=doublecircle]; + state1 -> state1 [label="'a' {d4=0,t1}"]; + state2 [label="2\n[rule 0]", shape=doublecircle]; + state2 -> state2 [label="'b' {d4=1,t3}"]; + } + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_value buf 4 0; + Sedlexing.__private__set_mem_pos buf 1; + __sedlex_state_1 buf) + | 1 -> + (Sedlexing.__private__set_mem_value buf 4 1; + Sedlexing.__private__set_mem_pos buf 3; + __sedlex_state_2 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + Sedlexing.mark buf 0; + (match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_value buf 4 0; + Sedlexing.__private__set_mem_pos buf 1; + __sedlex_state_1 buf) + | _ -> Sedlexing.backtrack buf) + and __sedlex_state_2 buf = + Sedlexing.mark buf 0; + (match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_value buf 4 1; + Sedlexing.__private__set_mem_pos buf 3; + __sedlex_state_2 buf) + | _ -> Sedlexing.backtrack buf) in + match Sedlexing.start buf; + Sedlexing.__private__init_mem buf 5; + Sedlexing.__private__set_mem_pos buf 2; + Sedlexing.__private__set_mem_pos buf 0; + __sedlex_state_0 buf + with + | 0 -> + let x = + if (Sedlexing.__private__mem_value buf 4) = 0 + then + let __s = Sedlexing.__private__mem_pos buf 0 in + let __e = Sedlexing.__private__mem_pos buf 1 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } + else + (let __s = Sedlexing.__private__mem_pos buf 2 in + let __e = Sedlexing.__private__mem_pos buf 3 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) }) in + ignore x + | _ -> () + |}] + +(* Optimization 3: Discriminator elision + When both branches of an or-pattern produce identical position + expressions, the discriminator tag should be skipped entirely. + Current: init_mem 5 (same as optim 2). + Goal: 0 tags (both branches yield Start_plus 0, End_minus 0). *) +let%expect_test "optim: discriminator elision" = + (match%sedlex_test buf with + | (Plus '0' .. '9' as x) | (Plus 'a' .. 'z' as x) -> ignore x + | _ -> ()); + [%expect + {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'0'-'9' {d4=0,t1}"]; + state0 -> state2 [label="'a'-'z' {d4=1,t3}"]; + state1 [label="1\n[rule 0]", shape=doublecircle]; + state1 -> state1 [label="'0'-'9' {d4=0,t1}"]; + state2 [label="2\n[rule 0]", shape=doublecircle]; + state2 -> state2 [label="'a'-'z' {d4=1,t3}"]; + } + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_value buf 4 0; + Sedlexing.__private__set_mem_pos buf 1; + __sedlex_state_1 buf) + | 1 -> + (Sedlexing.__private__set_mem_value buf 4 1; + Sedlexing.__private__set_mem_pos buf 3; + __sedlex_state_2 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + Sedlexing.mark buf 0; + (match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_value buf 4 0; + Sedlexing.__private__set_mem_pos buf 1; + __sedlex_state_1 buf) + | _ -> Sedlexing.backtrack buf) + and __sedlex_state_2 buf = + Sedlexing.mark buf 0; + (match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_value buf 4 1; + Sedlexing.__private__set_mem_pos buf 3; + __sedlex_state_2 buf) + | _ -> Sedlexing.backtrack buf) in + match Sedlexing.start buf; + Sedlexing.__private__init_mem buf 5; + Sedlexing.__private__set_mem_pos buf 2; + Sedlexing.__private__set_mem_pos buf 0; + __sedlex_state_0 buf + with + | 0 -> + let x = + if (Sedlexing.__private__mem_value buf 4) = 0 + then + let __s = Sedlexing.__private__mem_pos buf 0 in + let __e = Sedlexing.__private__mem_pos buf 1 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } + else + (let __s = Sedlexing.__private__mem_pos buf 2 in + let __e = Sedlexing.__private__mem_pos buf 3 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) }) in + ignore x + | _ -> () + |}] + +(* Optimization 4: Intra-rule tag coalescing + Tags with identical occurrence signatures should share one memory cell. + Here x_end (t1) and y_start (t2) fire on the same transitions. + Current: init_mem 4 (x_start=t0, x_end=t1, y_start=t2, y_end=t3). + Goal: init_mem 3 (t1 and t2 coalesce → x_start, x_end=y_start, y_end). *) +let%expect_test "optim: intra-rule tag coalescing" = + (match%sedlex_test buf with + | (Plus 'a' as x), (Plus 'b' as y) -> ignore (x, y) + | _ -> ()); + [%expect + {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'a' {t2,t1}"]; + state1 [label="1"]; + state1 -> state1 [label="'a' {t2,t1}"]; + state1 -> state2 [label="'b' {t3}"]; + state2 [label="2\n[rule 0]", shape=doublecircle]; + state2 -> state2 [label="'b' {t3}"]; + } + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_pos buf 2; + Sedlexing.__private__set_mem_pos buf 1; + __sedlex_state_1 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_pos buf 2; + Sedlexing.__private__set_mem_pos buf 1; + __sedlex_state_1 buf) + | 1 -> (Sedlexing.__private__set_mem_pos buf 3; __sedlex_state_2 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_2 buf = + Sedlexing.mark buf 0; + (match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 3; __sedlex_state_2 buf) + | _ -> Sedlexing.backtrack buf) in + match Sedlexing.start buf; + Sedlexing.__private__init_mem buf 4; + Sedlexing.__private__set_mem_pos buf 0; + __sedlex_state_0 buf + with + | 0 -> + let x = + let __s = Sedlexing.__private__mem_pos buf 0 in + let __e = Sedlexing.__private__mem_pos buf 1 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + let y = + let __s = Sedlexing.__private__mem_pos buf 2 in + let __e = Sedlexing.__private__mem_pos buf 3 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + ignore (x, y) + | _ -> () + |}] + +(* Coalescing: or-pattern where both branches bind x at the same DFA + position (Plus 'b' after Plus 'a'). Each branch allocates its own + start/end tags for x, but since x covers the same DFA transitions + in both branches, its tags have identical occurrence signatures and + could coalesce. y's tags differ (Plus 'c' vs Plus 'f' → different states). + Current: init_mem 9 (4 binding tags per branch + 1 disc cell). + Goal with coalescing: x's branch tags share cells, reducing total. *) +let%expect_test "coalescing: or-pattern with same-position bindings" = + (match%sedlex_test buf with + | Plus 'a', (Plus 'b' as x), (Plus 'c' as y) + | Plus 'a', (Plus 'b' as x), (Plus 'f' as y) -> + ignore (x, y) + | _ -> ()); + [%expect + {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'a' {t0,t4}"]; + state1 [label="1"]; + state1 -> state1 [label="'a' {t0,t4}"]; + state1 -> state2 [label="'b' {t2,t1,t6,t5}"]; + state2 [label="2"]; + state2 -> state2 [label="'b' {t2,t1,t6,t5}"]; + state2 -> state3 [label="'c' {d8=0,t3}"]; + state2 -> state4 [label="'f' {d8=1,t7}"]; + state3 [label="3\n[rule 0]", shape=doublecircle]; + state3 -> state3 [label="'c' {d8=0,t3}"]; + state4 [label="4\n[rule 0]", shape=doublecircle]; + state4 -> state4 [label="'f' {d8=1,t7}"]; + } + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_pos buf 0; + Sedlexing.__private__set_mem_pos buf 4; + __sedlex_state_1 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_pos buf 0; + Sedlexing.__private__set_mem_pos buf 4; + __sedlex_state_1 buf) + | 1 -> + (Sedlexing.__private__set_mem_pos buf 2; + Sedlexing.__private__set_mem_pos buf 1; + Sedlexing.__private__set_mem_pos buf 6; + Sedlexing.__private__set_mem_pos buf 5; + __sedlex_state_2 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_2 buf = + match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_pos buf 2; + Sedlexing.__private__set_mem_pos buf 1; + Sedlexing.__private__set_mem_pos buf 6; + Sedlexing.__private__set_mem_pos buf 5; + __sedlex_state_2 buf) + | 1 -> + (Sedlexing.__private__set_mem_value buf 8 0; + Sedlexing.__private__set_mem_pos buf 3; + __sedlex_state_3 buf) + | 2 -> + (Sedlexing.__private__set_mem_value buf 8 1; + Sedlexing.__private__set_mem_pos buf 7; + __sedlex_state_4 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_3 buf = + Sedlexing.mark buf 0; + (match __sedlex_partition_4 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_value buf 8 0; + Sedlexing.__private__set_mem_pos buf 3; + __sedlex_state_3 buf) + | _ -> Sedlexing.backtrack buf) + and __sedlex_state_4 buf = + Sedlexing.mark buf 0; + (match __sedlex_partition_5 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_value buf 8 1; + Sedlexing.__private__set_mem_pos buf 7; + __sedlex_state_4 buf) + | _ -> Sedlexing.backtrack buf) in + match Sedlexing.start buf; + Sedlexing.__private__init_mem buf 9; + __sedlex_state_0 buf + with + | 0 -> + let x = + if (Sedlexing.__private__mem_value buf 8) = 0 + then + let __s = Sedlexing.__private__mem_pos buf 0 in + let __e = Sedlexing.__private__mem_pos buf 1 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } + else + (let __s = Sedlexing.__private__mem_pos buf 4 in + let __e = Sedlexing.__private__mem_pos buf 5 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) }) in + let y = + if (Sedlexing.__private__mem_value buf 8) = 0 + then + let __s = Sedlexing.__private__mem_pos buf 2 in + let __e = Sedlexing.__private__mem_pos buf 3 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } + else + (let __s = Sedlexing.__private__mem_pos buf 6 in + let __e = Sedlexing.__private__mem_pos buf 7 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) }) in + ignore (x, y) + | _ -> () + |}] + +(* Optimization 5: Cross-rule cell sharing (graph coloring) + Non-interfering rules should reuse the same memory cells. + Rule 0 and rule 1 never co-exist in the same DFA state (beyond state 0), + so their tags can share cells. + Current: init_mem 4 (2 per rule, summed). + Goal: init_mem 2 (max of the two, cells shared). *) +let%expect_test "optim: cross-rule cell sharing" = + (match%sedlex_test buf with + | Plus 'a', (Plus 'b' as x), Plus 'c' -> ignore x + | Plus 'd', (Plus 'e' as y), Plus 'f' -> ignore y + | _ -> ()); + [%expect + {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'a' {t0}"]; + state0 -> state4 [label="'d' {t2}"]; + state1 [label="1"]; + state1 -> state1 [label="'a' {t0}"]; + state1 -> state2 [label="'b' {t1}"]; + state2 [label="2"]; + state2 -> state2 [label="'b' {t1}"]; + state2 -> state3 [label="'c'"]; + state3 [label="3\n[rule 0]", shape=doublecircle]; + state3 -> state3 [label="'c'"]; + state4 [label="4"]; + state4 -> state4 [label="'d' {t2}"]; + state4 -> state5 [label="'e' {t3}"]; + state5 [label="5"]; + state5 -> state5 [label="'e' {t3}"]; + state5 -> state6 [label="'f'"]; + state6 [label="6\n[rule 1]", shape=doublecircle]; + state6 -> state6 [label="'f'"]; + } + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 0; __sedlex_state_1 buf) + | 1 -> (Sedlexing.__private__set_mem_pos buf 2; __sedlex_state_4 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 0; __sedlex_state_1 buf) + | 1 -> (Sedlexing.__private__set_mem_pos buf 1; __sedlex_state_2 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_2 buf = + match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 1; __sedlex_state_2 buf) + | 1 -> __sedlex_state_3 buf + | _ -> Sedlexing.backtrack buf + and __sedlex_state_3 buf = + Sedlexing.mark buf 0; + (match __sedlex_partition_4 (Sedlexing.__private__next_int buf) with + | 0 -> __sedlex_state_3 buf + | _ -> Sedlexing.backtrack buf) + and __sedlex_state_4 buf = + match __sedlex_partition_5 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 2; __sedlex_state_4 buf) + | 1 -> (Sedlexing.__private__set_mem_pos buf 3; __sedlex_state_5 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_5 buf = + match __sedlex_partition_6 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 3; __sedlex_state_5 buf) + | 1 -> __sedlex_state_6 buf + | _ -> Sedlexing.backtrack buf + and __sedlex_state_6 buf = + Sedlexing.mark buf 1; + (match __sedlex_partition_7 (Sedlexing.__private__next_int buf) with + | 0 -> __sedlex_state_6 buf + | _ -> Sedlexing.backtrack buf) in + match Sedlexing.start buf; + Sedlexing.__private__init_mem buf 4; + __sedlex_state_0 buf + with + | 0 -> + let x = + let __s = Sedlexing.__private__mem_pos buf 0 in + let __e = Sedlexing.__private__mem_pos buf 1 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + ignore x + | 1 -> + let y = + let __s = Sedlexing.__private__mem_pos buf 2 in + let __e = Sedlexing.__private__mem_pos buf 3 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + ignore y + | _ -> () + |}] + +(* Optimization 6: Dead tag elimination + Tags on transitions where the tag's owning rule can no longer reach + a final state should be removed. + Rule 0 has a binding on Plus 'b'; rule 1 does not. + Both share the Plus 'a', Plus 'b' prefix in the DFA. + Current: init_mem 2, tags t0/t1 set on shared prefix transitions + even when only rule 1 is reachable via 'd'. + Goal: no tags on transitions leading exclusively to rule 1. *) +let%expect_test "optim: dead tag elimination" = + (match%sedlex_test buf with + | Plus 'a', (Plus 'b' as x), 'c' -> ignore x + | Plus 'a', Plus 'b', 'd' -> () + | _ -> ()); + [%expect + {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'a' {t0}"]; + state1 [label="1"]; + state1 -> state1 [label="'a' {t0}"]; + state1 -> state2 [label="'b' {t1}"]; + state2 [label="2"]; + state2 -> state2 [label="'b' {t1}"]; + state2 -> state3 [label="'c'"]; + state2 -> state4 [label="'d'"]; + state3 [label="3\n[rule 0]", shape=doublecircle]; + state4 [label="4\n[rule 1]", shape=doublecircle]; + } + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 0; __sedlex_state_1 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 0; __sedlex_state_1 buf) + | 1 -> (Sedlexing.__private__set_mem_pos buf 1; __sedlex_state_2 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_2 buf = + match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 1; __sedlex_state_2 buf) + | 1 -> 0 + | 2 -> 1 + | _ -> Sedlexing.backtrack buf in + match Sedlexing.start buf; + Sedlexing.__private__init_mem buf 2; + __sedlex_state_0 buf + with + | 0 -> + let x = + let __s = Sedlexing.__private__mem_pos buf 0 in + let __e = Sedlexing.__private__mem_pos buf 1 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + ignore x + | 1 -> () + | _ -> () + |}] + +(* Optimization 7: Self-loop tag delay (Set_prev) + Tags on a self-loop that also appear on all entering transitions + should be delayed to exit transitions as Set_prev. + Current: init_mem 2, set_mem t1 on every 'a' iteration (O(n)). + Goal: no set_mem on the self-loop, set_mem_prev on exit (O(1)). *) +let%expect_test "optim: self-loop tag delay" = + (match%sedlex_test buf with (Plus 'a' as x), Plus 'b' -> ignore x | _ -> ()); + [%expect + {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'a' {t1}"]; + state1 [label="1"]; + state1 -> state1 [label="'a' {t1}"]; + state1 -> state2 [label="'b'"]; + state2 [label="2\n[rule 0]", shape=doublecircle]; + state2 -> state2 [label="'b'"]; + } + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 1; __sedlex_state_1 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 1; __sedlex_state_1 buf) + | 1 -> __sedlex_state_2 buf + | _ -> Sedlexing.backtrack buf + and __sedlex_state_2 buf = + Sedlexing.mark buf 0; + (match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with + | 0 -> __sedlex_state_2 buf + | _ -> Sedlexing.backtrack buf) in + match Sedlexing.start buf; + Sedlexing.__private__init_mem buf 2; + Sedlexing.__private__set_mem_pos buf 0; + __sedlex_state_0 buf + with + | 0 -> + let x = + let __s = Sedlexing.__private__mem_pos buf 0 in + let __e = Sedlexing.__private__mem_pos buf 1 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + ignore x + | _ -> () + |}] + +(* Optimization 8: Tag remapping + After coalescing and dead-tag elimination, the PPX should remap + Tag references through the compiler's tag_map. + Current: init_mem 6 (x: t0+t1, y: t2+t3, z: t4+t5). + Goal: tested implicitly by coalescing — if remapping is wrong, + the generated code will reference incorrect cell indices. *) +let%expect_test "optim: tag remapping after coalescing" = + (match%sedlex_test buf with + | ('a' as x), (Plus 'b' as y), ('c' as z) -> ignore (x, y, z) + | _ -> ()); + [%expect + {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'a' {t2,t1}"]; + state1 [label="1"]; + state1 -> state2 [label="'b' {t4,t3}"]; + state2 [label="2"]; + state2 -> state2 [label="'b' {t4,t3}"]; + state2 -> state3 [label="'c' {t5}"]; + state3 [label="3\n[rule 0]", shape=doublecircle]; + } + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_pos buf 2; + Sedlexing.__private__set_mem_pos buf 1; + __sedlex_state_1 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_pos buf 4; + Sedlexing.__private__set_mem_pos buf 3; + __sedlex_state_2 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_2 buf = + match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_pos buf 4; + Sedlexing.__private__set_mem_pos buf 3; + __sedlex_state_2 buf) + | 1 -> (Sedlexing.__private__set_mem_pos buf 5; 0) + | _ -> Sedlexing.backtrack buf in + match Sedlexing.start buf; + Sedlexing.__private__init_mem buf 6; + Sedlexing.__private__set_mem_pos buf 0; + __sedlex_state_0 buf + with + | 0 -> + let x = + let __s = Sedlexing.__private__mem_pos buf 0 in + let __e = Sedlexing.__private__mem_pos buf 1 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + let y = + let __s = Sedlexing.__private__mem_pos buf 2 in + let __e = Sedlexing.__private__mem_pos buf 3 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + let z = + let __s = Sedlexing.__private__mem_pos buf 4 in + let __e = Sedlexing.__private__mem_pos buf 5 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + ignore (x, y, z) + | _ -> () + |}] + +(* Optimization 9: Set_prev with backtracking + Opt at the end means the DFA can accept at two states (with or without + the optional 'a'). When self-loop tag delay is implemented, the delayed + tags (Set_prev) must survive mark/backtrack correctly. + Current: init_mem 4 (x: t0+t1, y: t2+t3), set_mem on every iteration. *) +let%expect_test "optim: set_prev with backtracking" = + (match%sedlex_test buf with + | (Plus 'a' as x), ((Plus 'b', Opt 'a') as y) -> ignore (x, y) + | _ -> ()); + [%expect + {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'a' {t2,t1}"]; + state1 [label="1"]; + state1 -> state1 [label="'a' {t2,t1}"]; + state1 -> state2 [label="'b' {t3}"]; + state2 [label="2\n[rule 0]", shape=doublecircle]; + state2 -> state3 [label="'a' {t3}"]; + state2 -> state2 [label="'b' {t3}"]; + state3 [label="3\n[rule 0]", shape=doublecircle]; + } + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_pos buf 2; + Sedlexing.__private__set_mem_pos buf 1; + __sedlex_state_1 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_pos buf 2; + Sedlexing.__private__set_mem_pos buf 1; + __sedlex_state_1 buf) + | 1 -> (Sedlexing.__private__set_mem_pos buf 3; __sedlex_state_2 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_2 buf = + Sedlexing.mark buf 0; + (match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 3; 0) + | 1 -> (Sedlexing.__private__set_mem_pos buf 3; __sedlex_state_2 buf) + | _ -> Sedlexing.backtrack buf) in + match Sedlexing.start buf; + Sedlexing.__private__init_mem buf 4; + Sedlexing.__private__set_mem_pos buf 0; + __sedlex_state_0 buf + with + | 0 -> + let x = + let __s = Sedlexing.__private__mem_pos buf 0 in + let __e = Sedlexing.__private__mem_pos buf 1 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + let y = + let __s = Sedlexing.__private__mem_pos buf 2 in + let __e = Sedlexing.__private__mem_pos buf 3 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + ignore (x, y) + | _ -> () + |}] + +let%expect_test "Rep fixed-length prefix enables Start_plus" = + (* Rep('0'..'9', 3..3) has fixed length 3. + Current: init_mem 2 (start + end tags for x). + Goal: 0 tags (prefix=3, suffix=0 both known → Start_plus/End_minus). *) + (match%sedlex_test buf with + | Rep ('0' .. '9', 3 .. 3), (Plus 'a' .. 'z' as x) -> ignore x + | _ -> ()); + [%expect + {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'0'-'9'"]; + state1 [label="1"]; + state1 -> state2 [label="'0'-'9'"]; + state2 [label="2"]; + state2 -> state3 [label="'0'-'9' {t0}"]; + state3 [label="3"]; + state3 -> state4 [label="'a'-'z' {t1}"]; + state4 [label="4\n[rule 0]", shape=doublecircle]; + state4 -> state4 [label="'a'-'z' {t1}"]; + } + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> __sedlex_state_1 buf + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> __sedlex_state_2 buf + | _ -> Sedlexing.backtrack buf + and __sedlex_state_2 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 0; __sedlex_state_3 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_3 buf = + match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 1; __sedlex_state_4 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_4 buf = + Sedlexing.mark buf 0; + (match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 1; __sedlex_state_4 buf) + | _ -> Sedlexing.backtrack buf) in + match Sedlexing.start buf; + Sedlexing.__private__init_mem buf 2; + __sedlex_state_0 buf + with + | 0 -> + let x = + let __s = Sedlexing.__private__mem_pos buf 0 in + let __e = Sedlexing.__private__mem_pos buf 1 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + ignore x + | _ -> () + |}] + +let%expect_test "as binding: or-chain then nested or on right" = + (match%sedlex_test buf with + | ("ab" as x), ("ef" as y) + | ("a" as x), ("bef" as y) + | (("cde" as x), "f" | ("c" as x), "ef"), ("gh" as y) -> + ignore (x, y) + | _ -> ()); + [%expect + {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'a' {t6,t5}"]; + state0 -> state5 [label="'c' {t12}"]; + state1 [label="1"]; + state1 -> state2 [label="'b' {t2,t1}"]; + state2 [label="2"]; + state2 -> state3 [label="'e'"]; + state3 [label="3"]; + state3 -> state4 [label="'f' {d8=0,t3,d16=0,d8=1,t7}"]; + state4 [label="4\n[rule 0]", shape=doublecircle]; + state5 [label="5"]; + state5 -> state6 [label="'d'"]; + state5 -> state11 [label="'e'"]; + state6 [label="6"]; + state6 -> state7 [label="'e' {t10}"]; + state7 [label="7"]; + state7 -> state8 [label="'f' {t14,d13=0}"]; + state8 [label="8"]; + state8 -> state9 [label="'g'"]; + state9 [label="9"]; + state9 -> state10 [label="'h' {d16=1,t15}"]; + state10 [label="10\n[rule 0]", shape=doublecircle]; + state11 [label="11"]; + state11 -> state12 [label="'f' {t14,d13=1}"]; + state12 [label="12"]; + state12 -> state9 [label="'g'"]; + } + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_pos buf 6; + Sedlexing.__private__set_mem_pos buf 5; + __sedlex_state_1 buf) + | 1 -> (Sedlexing.__private__set_mem_pos buf 12; __sedlex_state_5 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_pos buf 2; + Sedlexing.__private__set_mem_pos buf 1; + __sedlex_state_2 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_2 buf = + match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with + | 0 -> __sedlex_state_3 buf + | _ -> Sedlexing.backtrack buf + and __sedlex_state_3 buf = + match __sedlex_partition_4 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_value buf 8 0; + Sedlexing.__private__set_mem_pos buf 3; + Sedlexing.__private__set_mem_value buf 16 0; + Sedlexing.__private__set_mem_value buf 8 1; + Sedlexing.__private__set_mem_pos buf 7; + 0) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_5 buf = + match __sedlex_partition_5 (Sedlexing.__private__next_int buf) with + | 0 -> __sedlex_state_6 buf + | 1 -> __sedlex_state_11 buf + | _ -> Sedlexing.backtrack buf + and __sedlex_state_6 buf = + match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 10; __sedlex_state_7 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_7 buf = + match __sedlex_partition_4 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_pos buf 14; + Sedlexing.__private__set_mem_value buf 13 0; + __sedlex_state_8 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_8 buf = + match __sedlex_partition_6 (Sedlexing.__private__next_int buf) with + | 0 -> __sedlex_state_9 buf + | _ -> Sedlexing.backtrack buf + and __sedlex_state_9 buf = + match __sedlex_partition_7 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_value buf 16 1; + Sedlexing.__private__set_mem_pos buf 15; + 0) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_11 buf = + match __sedlex_partition_4 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_pos buf 14; + Sedlexing.__private__set_mem_value buf 13 1; + __sedlex_state_12 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_12 buf = + match __sedlex_partition_6 (Sedlexing.__private__next_int buf) with + | 0 -> __sedlex_state_9 buf + | _ -> Sedlexing.backtrack buf in + match Sedlexing.start buf; + Sedlexing.__private__init_mem buf 17; + Sedlexing.__private__set_mem_pos buf 11; + Sedlexing.__private__set_mem_pos buf 9; + Sedlexing.__private__set_mem_pos buf 4; + Sedlexing.__private__set_mem_pos buf 0; + __sedlex_state_0 buf + with + | 0 -> + let x = + if + ((Sedlexing.__private__mem_value buf 16) = 0) && + ((Sedlexing.__private__mem_value buf 8) = 0) + then + let __s = Sedlexing.__private__mem_pos buf 0 in + let __e = Sedlexing.__private__mem_pos buf 1 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } + else + if + ((Sedlexing.__private__mem_value buf 16) = 0) && + ((Sedlexing.__private__mem_value buf 8) = 1) + then + (let __s = Sedlexing.__private__mem_pos buf 4 in + let __e = Sedlexing.__private__mem_pos buf 5 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) }) + else + if + ((Sedlexing.__private__mem_value buf 16) = 1) && + ((Sedlexing.__private__mem_value buf 13) = 0) + then + (let __s = Sedlexing.__private__mem_pos buf 9 in + let __e = Sedlexing.__private__mem_pos buf 10 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) }) + else + (let __s = Sedlexing.__private__mem_pos buf 11 in + let __e = Sedlexing.__private__mem_pos buf 12 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) }) in + let y = + if + ((Sedlexing.__private__mem_value buf 16) = 0) && + ((Sedlexing.__private__mem_value buf 8) = 0) + then + let __s = Sedlexing.__private__mem_pos buf 2 in + let __e = Sedlexing.__private__mem_pos buf 3 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } + else + if + ((Sedlexing.__private__mem_value buf 16) = 0) && + ((Sedlexing.__private__mem_value buf 8) = 1) + then + (let __s = Sedlexing.__private__mem_pos buf 6 in + let __e = Sedlexing.__private__mem_pos buf 7 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) }) + else + (let __s = Sedlexing.__private__mem_pos buf 14 in + let __e = Sedlexing.__private__mem_pos buf 15 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) }) in + ignore (x, y) + | _ -> () |}] diff --git a/test/codegen/test_realistic.ml b/test/codegen/test_realistic.ml new file mode 100644 index 0000000..147c02d --- /dev/null +++ b/test/codegen/test_realistic.ml @@ -0,0 +1,227 @@ +(* Realistic multi-rule lexer exercising many patterns simultaneously. + Current baseline (no optimizations): init_mem 22. + - Rule 0: (Plus 'a'..'z' as ns), '.', (Plus 'a'..'z' as name) → 4 tags + - Rule 1: (Plus 'A'..'Z' as label), '=', (Plus '0'..'9' as value) → 4 tags + - Rule 2: "0x", (Plus hex as hex), ';' → 2 tags + - Rule 3: '(', ('a'..'z' as x), ',', ('a'..'z' as y), ')' → 4 tags + - Rule 4: (Plus digits as tok) | (Plus letters as tok) → 8 tags (or-pattern) + Optimization goals: + - Prefix/suffix offsets → rules 2,3 need 0 tags + - Or-pattern discriminator elision → rule 4 needs 0 tags + - Self-loop tag delay → rules 0,1 use Set_prev + - Cross-rule cell sharing → rules 0,1 share cells + - Adjacent gap elimination → rules 0,1 share end/start tags + Optimized goal: init_mem 1. *) +let%expect_test "realistic: multi-token lexer" = + (match%sedlex_test buf with + | (Plus 'a' .. 'z' as ns), '.', (Plus 'a' .. 'z' as name) -> + ignore (ns, name) + | (Plus 'A' .. 'Z' as label), '=', (Plus '0' .. '9' as value) -> + ignore (label, value) + | "0x", (Plus ('0' .. '9' | 'a' .. 'f') as hex), ';' -> ignore hex + | '(', ('a' .. 'z' as x), ',', ('a' .. 'z' as y), ')' -> ignore (x, y) + | (Plus '0' .. '9' as tok) | (Plus 'a' .. 'z' as tok) -> ignore tok + | _ -> ()); + [%expect + {| + DOT: + digraph { + rankdir=LR; + node [shape=circle]; + + _start [shape=point]; + _start -> state0; + + state0 [label="0"]; + state0 -> state1 [label="'(' {t10}"]; + state0 -> state6 [label="'0' {d18=0,t15}"]; + state0 -> state7 [label="'1'-'9' {d18=0,t15}"]; + state0 -> state11 [label="'A'-'Z' {t5}"]; + state0 -> state14 [label="'a'-'z' {t1,d18=1,t17}"]; + state1 [label="1"]; + state1 -> state2 [label="'a'-'z' {t11}"]; + state2 [label="2"]; + state2 -> state3 [label="',' {t12}"]; + state3 [label="3"]; + state3 -> state4 [label="'a'-'z' {t13}"]; + state4 [label="4"]; + state4 -> state5 [label="')'"]; + state5 [label="5\n[rule 3]", shape=doublecircle]; + state6 [label="6\n[rule 4]", shape=doublecircle]; + state6 -> state7 [label="'0'-'9' {d18=0,t15}"]; + state6 -> state8 [label="'x' {t8}"]; + state7 [label="7\n[rule 4]", shape=doublecircle]; + state7 -> state7 [label="'0'-'9' {d18=0,t15}"]; + state8 [label="8"]; + state8 -> state9 [label="'0'-'9', 'a'-'f' {t9}"]; + state9 [label="9"]; + state9 -> state9 [label="'0'-'9', 'a'-'f' {t9}"]; + state9 -> state10 [label="';'"]; + state10 [label="10\n[rule 2]", shape=doublecircle]; + state11 [label="11"]; + state11 -> state12 [label="'=' {t6}"]; + state11 -> state11 [label="'A'-'Z' {t5}"]; + state12 [label="12"]; + state12 -> state13 [label="'0'-'9' {t7}"]; + state13 [label="13\n[rule 1]", shape=doublecircle]; + state13 -> state13 [label="'0'-'9' {t7}"]; + state14 [label="14\n[rule 4]", shape=doublecircle]; + state14 -> state15 [label="'.' {t2}"]; + state14 -> state14 [label="'a'-'z' {t1,d18=1,t17}"]; + state15 [label="15"]; + state15 -> state16 [label="'a'-'z' {t3}"]; + state16 [label="16\n[rule 0]", shape=doublecircle]; + state16 -> state16 [label="'a'-'z' {t3}"]; + } + CODE: + let rec __sedlex_state_0 buf = + match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 10; __sedlex_state_1 buf) + | 1 -> + (Sedlexing.__private__set_mem_value buf 18 0; + Sedlexing.__private__set_mem_pos buf 15; + __sedlex_state_6 buf) + | 2 -> + (Sedlexing.__private__set_mem_value buf 18 0; + Sedlexing.__private__set_mem_pos buf 15; + __sedlex_state_7 buf) + | 3 -> (Sedlexing.__private__set_mem_pos buf 5; __sedlex_state_11 buf) + | 4 -> + (Sedlexing.__private__set_mem_pos buf 1; + Sedlexing.__private__set_mem_value buf 18 1; + Sedlexing.__private__set_mem_pos buf 17; + __sedlex_state_14 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_1 buf = + match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 11; __sedlex_state_2 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_2 buf = + match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 12; __sedlex_state_3 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_3 buf = + match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 13; __sedlex_state_4 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_4 buf = + match __sedlex_partition_4 (Sedlexing.__private__next_int buf) with + | 0 -> 3 + | _ -> Sedlexing.backtrack buf + and __sedlex_state_6 buf = + Sedlexing.mark buf 4; + (match __sedlex_partition_5 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_value buf 18 0; + Sedlexing.__private__set_mem_pos buf 15; + __sedlex_state_7 buf) + | 1 -> (Sedlexing.__private__set_mem_pos buf 8; __sedlex_state_8 buf) + | _ -> Sedlexing.backtrack buf) + and __sedlex_state_7 buf = + Sedlexing.mark buf 4; + (match __sedlex_partition_6 (Sedlexing.__private__next_int buf) with + | 0 -> + (Sedlexing.__private__set_mem_value buf 18 0; + Sedlexing.__private__set_mem_pos buf 15; + __sedlex_state_7 buf) + | _ -> Sedlexing.backtrack buf) + and __sedlex_state_8 buf = + match __sedlex_partition_7 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 9; __sedlex_state_9 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_9 buf = + match __sedlex_partition_8 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 9; __sedlex_state_9 buf) + | 1 -> 2 + | _ -> Sedlexing.backtrack buf + and __sedlex_state_11 buf = + match __sedlex_partition_9 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 6; __sedlex_state_12 buf) + | 1 -> (Sedlexing.__private__set_mem_pos buf 5; __sedlex_state_11 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_12 buf = + match __sedlex_partition_6 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 7; __sedlex_state_13 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_13 buf = + Sedlexing.mark buf 1; + (match __sedlex_partition_6 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 7; __sedlex_state_13 buf) + | _ -> Sedlexing.backtrack buf) + and __sedlex_state_14 buf = + Sedlexing.mark buf 4; + (match __sedlex_partition_10 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 2; __sedlex_state_15 buf) + | 1 -> + (Sedlexing.__private__set_mem_pos buf 1; + Sedlexing.__private__set_mem_value buf 18 1; + Sedlexing.__private__set_mem_pos buf 17; + __sedlex_state_14 buf) + | _ -> Sedlexing.backtrack buf) + and __sedlex_state_15 buf = + match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 3; __sedlex_state_16 buf) + | _ -> Sedlexing.backtrack buf + and __sedlex_state_16 buf = + Sedlexing.mark buf 0; + (match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with + | 0 -> (Sedlexing.__private__set_mem_pos buf 3; __sedlex_state_16 buf) + | _ -> Sedlexing.backtrack buf) in + match Sedlexing.start buf; + Sedlexing.__private__init_mem buf 19; + Sedlexing.__private__set_mem_pos buf 16; + Sedlexing.__private__set_mem_pos buf 14; + Sedlexing.__private__set_mem_pos buf 4; + Sedlexing.__private__set_mem_pos buf 0; + __sedlex_state_0 buf + with + | 0 -> + let ns = + let __s = Sedlexing.__private__mem_pos buf 0 in + let __e = Sedlexing.__private__mem_pos buf 1 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + let name = + let __s = Sedlexing.__private__mem_pos buf 2 in + let __e = Sedlexing.__private__mem_pos buf 3 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + ignore (ns, name) + | 1 -> + let label = + let __s = Sedlexing.__private__mem_pos buf 4 in + let __e = Sedlexing.__private__mem_pos buf 5 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + let value = + let __s = Sedlexing.__private__mem_pos buf 6 in + let __e = Sedlexing.__private__mem_pos buf 7 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + ignore (label, value) + | 2 -> + let hex = + let __s = Sedlexing.__private__mem_pos buf 8 in + let __e = Sedlexing.__private__mem_pos buf 9 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + ignore hex + | 3 -> + let x = + let __s = Sedlexing.__private__mem_pos buf 10 in + let __e = Sedlexing.__private__mem_pos buf 11 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + let y = + let __s = Sedlexing.__private__mem_pos buf 12 in + let __e = Sedlexing.__private__mem_pos buf 13 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } in + ignore (x, y) + | 4 -> + let tok = + if (Sedlexing.__private__mem_value buf 18) = 0 + then + let __s = Sedlexing.__private__mem_pos buf 14 in + let __e = Sedlexing.__private__mem_pos buf 15 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) } + else + (let __s = Sedlexing.__private__mem_pos buf 16 in + let __e = Sedlexing.__private__mem_pos buf 17 in + { Sedlexing.lexbuf = buf; pos = __s; len = (__e - __s) }) in + ignore tok + | _ -> () + |}] diff --git a/test/ppx_test/ppx_sedlex_test.ml b/test/ppx_test/ppx_sedlex_test.ml index 16f4133..2957aa5 100644 --- a/test/ppx_test/ppx_sedlex_test.ml +++ b/test/ppx_test/ppx_sedlex_test.ml @@ -4,6 +4,7 @@ module S = Sedlex_ppx.Sedlex let expand ~ctxt:_ expr = P.reset_state (); + S.reset_tags (); let loc = Location.none in let code_expr, auto = P.handle_sedlex_match expr in let code_str = Pprintast.string_of_expression code_expr in From 43cd5829fa1869348f0bac78aa9bf53d594e9156 Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Tue, 24 Mar 2026 10:17:16 +0100 Subject: [PATCH 22/24] Add lots of documentation --- src/lib/sedlexing.ml | 65 ++++++++++++++++----- src/lib/sedlexing.mli | 112 ++++++++++++++++++++++++------------- src/syntax/ppx_sedlex.ml | 118 ++++++++++++++++++++++++++++++++++++++- src/syntax/sedlex.ml | 106 +++++++++++++++++++++++++++++++++-- src/syntax/sedlex.mli | 87 ++++++++++++++++++++++++++--- 5 files changed, 419 insertions(+), 69 deletions(-) diff --git a/src/lib/sedlexing.ml b/src/lib/sedlexing.ml index 736e4ab..653a955 100644 --- a/src/lib/sedlexing.ml +++ b/src/lib/sedlexing.ml @@ -44,41 +44,55 @@ type lexbuf = { refill : Uchar.t array -> int -> int -> int; bytes_per_char : Uchar.t -> int; mutable buf : Uchar.t array; + (* Number of valid uchars in [buf] (from index 0 to len-1). *) mutable len : int; - (* Number of meaningful uchar in buffer *) + (* Cumulative uchar count: number of uchars discarded before buf[0]. + Absolute uchar position of buf[i] = offset + i. *) mutable offset : apos; - (* Number of meaningful bytes in buffer *) + (* Cumulative byte count: number of bytes discarded before buf[0]. *) mutable bytes_offset : apos; - (* Position of the first uchar in buffer - in the input stream *) + (* Current read position in [buf] (buffer-relative index, 0-based). *) mutable pos : int; - (* Position of the first byte in buffer - in the input stream *) + (* Current read position in bytes (buffer-relative). *) mutable bytes_pos : int; - (* Position of the beginning of the line in the buffer, in uchar *) + (* Absolute position of the beginning of the current line, in uchar. *) mutable curr_bol : int; - (* Position of the beginning of the line in the buffer, in bytes *) + (* Absolute position of the beginning of the current line, in bytes. *) mutable curr_bytes_bol : int; (* Index of the current line in the input stream. *) mutable curr_line : int; - (* starting position, in uchar. *) + (* Token start position in [buf], in uchars (buffer-relative). *) mutable start_pos : int; - (* starting position, in bytes. *) + (* Token start position in bytes (buffer-relative). *) mutable start_bytes_pos : int; - (* First uchar we need to keep visible *) + (* Absolute beginning-of-line position at token start, in uchars. *) mutable start_bol : int; - (* First byte we need to keep visible *) + (* Absolute beginning-of-line position at token start, in bytes. *) mutable start_bytes_bol : int; - (* start from 1 *) + (* Line number at token start (starts from 1). *) mutable start_line : int; + (* Backtrack snapshot: saved by [mark], restored by [backtrack]. *) mutable marked_pos : int; mutable marked_bytes_pos : int; mutable marked_bol : int; mutable marked_bytes_bol : int; mutable marked_line : int; + (* The rule index stored by [mark]. *) mutable marked_val : int; mutable filename : string; + (* True when the input source is exhausted. *) mutable finished : bool; + (* Memory cells for tagged DFA transitions (as-bindings). + A single int array stores both positions and discriminator values, + distinguished by range: + - positions: buffer-relative uchar indices (>= 0), adjusted by + [refill] when the buffer is compacted, and converted to + token-relative offsets on read by [__private__mem_pos]. + - discriminator values: stored as [-(v + 2)], always <= -2, + disjoint from positions and the unset sentinel (-1). + [mark] snapshots this array into [__private__mem_saved]; + [backtrack] restores it, so that sub-match positions reflect + the last accepting state rather than a later speculative state. *) mutable __private__mem : int array; mutable __private__mem_saved : int array; } @@ -190,6 +204,10 @@ let refill lexbuf = lexbuf.marked_bytes_pos <- lexbuf.marked_bytes_pos - s_bytes; lexbuf.start_pos <- 0; lexbuf.start_bytes_pos <- 0; + (* Adjust tagged DFA memory cells: position cells (>= 0) are + buffer-relative uchar indices and must be shifted by [s] after + compaction. Value cells (<= -2) and unset cells (-1) are left + unchanged. *) for i = 0 to Array.length lexbuf.__private__mem - 1 do if lexbuf.__private__mem.(i) >= 0 then lexbuf.__private__mem.(i) <- lexbuf.__private__mem.(i) - s @@ -228,6 +246,7 @@ let mark lexbuf i = lexbuf.marked_bytes_bol <- lexbuf.curr_bytes_bol; lexbuf.marked_line <- lexbuf.curr_line; lexbuf.marked_val <- i; + (* Snapshot tagged DFA memory cells so backtrack can restore them. *) let n = Array.length lexbuf.__private__mem in if n > 0 then Array.blit lexbuf.__private__mem 0 lexbuf.__private__mem_saved 0 n @@ -246,6 +265,8 @@ let backtrack lexbuf = lexbuf.curr_bol <- lexbuf.marked_bol; lexbuf.curr_bytes_bol <- lexbuf.marked_bytes_bol; lexbuf.curr_line <- lexbuf.marked_line; + (* Restore tagged DFA memory cells to the snapshot taken at the last + accepting state, so sub-match positions are correct after backtracking. *) let n = Array.length lexbuf.__private__mem in if n > 0 then Array.blit lexbuf.__private__mem_saved 0 lexbuf.__private__mem 0 n; @@ -258,7 +279,16 @@ let rollback lexbuf = lexbuf.curr_bytes_bol <- lexbuf.start_bytes_bol; lexbuf.curr_line <- lexbuf.start_line +(* Tagged DFA memory cells for `as` bindings. + Positions are stored as buffer-relative uchar indices (>= 0), converted + to token-relative offsets on read by [__private__mem_pos]. Discriminator + values are stored as -(v + 2), always <= -2. The sentinel -1 means + "unset". This range convention lets [refill] adjust only position + cells (>= 0) when compacting the buffer. *) + let __private__init_mem lexbuf n = + (* Reuse existing arrays if large enough; otherwise allocate fresh ones. + Both mem and mem_saved are reset to -1 (unset). *) if Array.length lexbuf.__private__mem < n then begin lexbuf.__private__mem <- Array.make n (-1); lexbuf.__private__mem_saved <- Array.make n (-1) @@ -269,8 +299,15 @@ let __private__init_mem lexbuf n = end let __private__set_mem_pos lexbuf i = lexbuf.__private__mem.(i) <- lexbuf.pos -let __private__set_mem_value lexbuf i v = lexbuf.__private__mem.(i) <- -(v + 2) + +let __private__set_mem_value lexbuf i v = + assert (v >= 0); + lexbuf.__private__mem.(i) <- -(v + 2) + +(* Returns position relative to token start, for use in sub_lexeme. *) let __private__mem_pos lexbuf i = lexbuf.__private__mem.(i) - lexbuf.start_pos + +(* Decodes the -(v + 2) encoding back to the original integer value. *) let __private__mem_value lexbuf i = -(lexbuf.__private__mem.(i) + 2) let __private__num_mem_cells lexbuf = Array.length lexbuf.__private__mem let lexeme_start lexbuf = lexbuf.start_pos + lexbuf.offset diff --git a/src/lib/sedlexing.mli b/src/lib/sedlexing.mli index ae9e6e5..dcba7a6 100644 --- a/src/lib/sedlexing.mli +++ b/src/lib/sedlexing.mli @@ -49,8 +49,8 @@ val create : (Uchar.t array -> int -> int -> int) -> lexbuf -(** set the initial tracked input position, in code point, for [lexbuf]. If - unspecified, byte postion is set to the same value as code point position. +(** Set the initial tracked input position, in code points, for [lexbuf]. If + unspecified, byte position is set to the same value as code point position. *) val set_position : ?bytes_position:Lexing.position -> lexbuf -> Lexing.position -> unit @@ -87,19 +87,19 @@ val from_uchar_array : stream has offset 0. *) val lexeme_start : lexbuf -> int -(** [Sedlexing.lexeme_start lexbuf] returns the offset in the input stream of - the first byte of the matched string. The first code point of the stream has +(** [Sedlexing.lexeme_bytes_start lexbuf] returns the offset in the input stream + of the first byte of the matched string. The first byte of the stream has offset 0. *) val lexeme_bytes_start : lexbuf -> int (** [Sedlexing.lexeme_end lexbuf] returns the offset in the input stream of the - character following the last code point of the matched string. The first - character of the stream has offset 0. *) + code point following the last code point of the matched string. The first + code point of the stream has offset 0. *) val lexeme_end : lexbuf -> int -(** [Sedlexing.lexeme_end lexbuf] returns the offset in the input stream of the - byte following the last code point of the matched string. The first - character of the stream has offset 0. *) +(** [Sedlexing.lexeme_bytes_end lexbuf] returns the offset in the input stream + of the byte following the last byte of the matched string. The first byte of + the stream has offset 0. *) val lexeme_bytes_end : lexbuf -> int (** [Sedlexing.loc lexbuf] returns the pair @@ -116,7 +116,7 @@ val bytes_loc : lexbuf -> int * int the length (in code points) of the matched string. *) val lexeme_length : lexbuf -> int -(** [Sedlexing.lexeme_length lexbuf] returns the difference +(** [Sedlexing.lexeme_bytes_length lexbuf] returns the difference [(Sedlexing.lexeme_bytes_end lexbuf) - (Sedlexing.lexeme_bytes_start lexbuf)], that is, the length (in bytes) of the matched string. *) val lexeme_bytes_length : lexbuf -> int @@ -155,7 +155,7 @@ val lexing_bytes_position_curr : lexbuf -> Lexing.position val new_line : lexbuf -> unit (** [Sedlexing.lexeme lexbuf] returns the string matched by the regular - expression as an array of Unicode code point. *) + expression as an array of Unicode code points. *) val lexeme : lexbuf -> Uchar.t array (** [Sedlexing.lexeme_char lexbuf pos] returns code point number [pos] in the @@ -163,7 +163,7 @@ val lexeme : lexbuf -> Uchar.t array val lexeme_char : lexbuf -> int -> Uchar.t (** [Sedlexing.sub_lexeme lexbuf pos len] returns a substring of the string - matched by the regular expression as an array of Unicode code point. *) + matched by the regular expression as an array of Unicode code points. *) val sub_lexeme : lexbuf -> int -> int -> Uchar.t array (** A submatch captures a sub-pattern matched by an [as] binding. It carries the @@ -183,6 +183,15 @@ val lexeme_of_submatch : submatch -> Uchar.t array [Sedlexing.rollback]. *) val rollback : lexbuf -> unit +(** [with_tokenizer tokenizer lexbuf] given a lexer and a lexbuf, returns a + generator of tokens annotated with positions. This generator can be used + with the Menhir parser generator's incremental API. *) +val with_tokenizer : + (lexbuf -> 'token) -> + lexbuf -> + unit -> + 'token * Lexing.position * Lexing.position + (** {6 Internal interface} *) (** These functions are used internally by the lexers. They could be used to @@ -190,51 +199,78 @@ val rollback : lexbuf -> unit lexer buffers have a unique internal slot that can store an integer. They also store a "backtrack" position. *) -(** [start t] informs the lexer buffer that any code points until the current - position can be discarded. The current position become the "start" position - as returned by [Sedlexing.lexeme_start]. Moreover, the internal slot is set - to [-1] and the backtrack position is set to the current position. *) +(** [start lexbuf] informs the lexer buffer that any code points until the + current position can be discarded. The current position becomes the "start" + position as returned by [Sedlexing.lexeme_start]. Moreover, the internal + slot is set to [-1] and the backtrack position is set to the current + position. *) val start : lexbuf -> unit (** [next lexbuf] extracts the next code point from the lexer buffer and - increments to current position. If the input stream is exhausted, the + increments the current position. If the input stream is exhausted, the function returns [None]. If a ['\n'] is encountered, the tracked line number is incremented. *) val next : lexbuf -> Uchar.t option +(** [mark lexbuf i] stores the integer [i] in the internal slot. The backtrack + position is set to the current position. If the lexbuf has tagged DFA memory + cells (from [as] bindings), the current cell values are snapshotted so they + can be restored by [backtrack]. *) +val mark : lexbuf -> int -> unit + +(** [backtrack lexbuf] returns the value stored in the internal slot of the + buffer, and performs backtracking (the current position is set to the value + of the backtrack position). If the lexbuf has tagged DFA memory cells, they + are restored to the values saved by the last [mark] call, so that sub-match + positions reflect the last accepting state. *) +val backtrack : lexbuf -> int + (** [__private__next_int lexbuf] extracts the next code point from the lexer - buffer and increments to current position. If the input stream is exhausted, - the function returns -1. If a ['\n'] is encountered, the tracked line number - is incremented. + buffer and increments the current position. If the input stream is + exhausted, the function returns -1. If a ['\n'] is encountered, the tracked + line number is incremented. This is a private API, it should not be used by code using this module's API and can be removed at any time. *) val __private__next_int : lexbuf -> int +(** Tagged DFA memory cells for [as] bindings. + + The following functions manage an internal array of memory cells used to + record sub-match positions during DFA execution. Cells store either + positions (>= 0) or encoded integer values (<= -2). The sentinel -1 means + "unset". Positions are automatically adjusted when the internal buffer is + compacted, and converted to token-relative offsets on read by + {!__private__mem_pos}. + + This is a private API used by generated code and may change at any time. *) + +(** [__private__init_mem lexbuf n] ensures at least [n] memory cells are + available, resetting all cells to -1 (unset). Called once at the start of + each [match%sedlex] block that uses [as] bindings. *) val __private__init_mem : lexbuf -> int -> unit + +(** [__private__set_mem_pos lexbuf i] records the current position in cell [i], + for later retrieval by {!__private__mem_pos}. Used by [Set_position] tag + operations on DFA transitions. *) val __private__set_mem_pos : lexbuf -> int -> unit + +(** [__private__set_mem_value lexbuf i v] stores integer [v] in cell [i], + encoded as [-(v + 2)] so it is disjoint from positions and the unset + sentinel. Used by [Set_value] tag operations for or-pattern discriminators. +*) val __private__set_mem_value : lexbuf -> int -> int -> unit -val __private__mem_pos : lexbuf -> int -> int -val __private__mem_value : lexbuf -> int -> int -val __private__num_mem_cells : lexbuf -> int -(** [mark lexbuf i] stores the integer [i] in the internal slot. The backtrack - position is set to the current position. *) -val mark : lexbuf -> int -> unit +(** [__private__mem_pos lexbuf i] returns the position stored in cell [i], as an + offset relative to the start of the current token. *) +val __private__mem_pos : lexbuf -> int -> int -(** [backtrack lexbuf] returns the value stored in the internal slot of the - buffer, and performs backtracking (the current position is set to the value - of the backtrack position). *) -val backtrack : lexbuf -> int +(** [__private__mem_value lexbuf i] decodes and returns the integer value stored + in cell [i] (reverses the [-(v + 2)] encoding). *) +val __private__mem_value : lexbuf -> int -> int -(** [with_tokenizer tokenizer lexbuf] given a lexer and a lexbuf, returns a - generator of tokens annotated with positions. This generator can be used - with the Menir parser generator's incremental API. *) -val with_tokenizer : - (lexbuf -> 'token) -> - lexbuf -> - unit -> - 'token * Lexing.position * Lexing.position +(** Returns the current number of allocated memory cells. *) +val __private__num_mem_cells : lexbuf -> int (** {6 Support for common encodings} *) diff --git a/src/syntax/ppx_sedlex.ml b/src/syntax/ppx_sedlex.ml index 79d727d..f36bd73 100644 --- a/src/syntax/ppx_sedlex.ml +++ b/src/syntax/ppx_sedlex.ml @@ -10,7 +10,13 @@ open Ast_helper module Cset = Sedlex_cset -(* Decision tree for partitions *) +(* Decision tree for partitions. + + A partition maps Unicode code points to equivalence class indices. Rather + than generating a flat lookup table (which would be huge), we build a + binary decision tree that tests code points against split values. For + dense regions below [limit], a compact byte-string table is used instead. + [simplify] prunes unreachable branches given a known code-point range. *) let default_loc = Location.none @@ -30,6 +36,10 @@ let rec simplify_decision_tree (x : decision_tree) = | Return a, Return b when a = b -> l | _ -> Lte (i, l, r)) +(* [decision segments] builds a balanced binary decision tree from a sorted + list of [(lo, hi, class_index)] segments. Pairs of adjacent segments are + merged bottom-up into [Lte] nodes. Gaps between segments return -1 + (no match). *) let decision l = let l = List.map (fun (a, b, i) -> (a, b, Return i)) l in let rec merge2 = function @@ -45,8 +55,13 @@ let decision l = in aux l +(* Code points below [limit] with class index < 255 are eligible for + compact byte-string table lookup instead of a decision tree. *) let limit = 8192 +(* [decision_table segments] partitions segments into a table-eligible + prefix (dense, low code points) and a tree-handled suffix. The prefix + becomes a [Table] node for O(1) lookup; the suffix uses [decision]. *) let decision_table l = let rec aux m accu = function | ((a, b, i) as x) :: rem when b < limit && i < 255 -> @@ -75,6 +90,9 @@ let rec simplify min max = function else Lte (i, simplify min i yes, simplify (i + 1) max no) | x -> x +(* [segments_of_partition p] flattens a partition (array of char sets, one + per equivalence class) into a sorted list of [(lo, hi, class_index)] + segments suitable for [decision_table]. *) let segments_of_partition p = let seg = ref [] in Array.iteri @@ -85,6 +103,9 @@ let segments_of_partition p = p; List.sort (fun (a1, _, _) (a2, _, _) -> compare a1 a2) !seg +(* [decision_table partition] builds a complete decision tree for a + partition: extracts segments, builds the hybrid table/tree, then + simplifies by pruning branches outside the valid code-point range. *) let decision_table p = simplify (-1) Cset.max_code (decision_table (segments_of_partition p)) @@ -137,6 +158,9 @@ let table_name x = Hashtbl.add tables x s; s +(* [table (name, v)] generates a top-level [let __sedlex_table_N = "..."] + binding where the string encodes the byte array [v] (one byte per entry, + used for compact partition lookup). *) let table (name, v) = let n = Array.length v in let s = Bytes.create n in @@ -165,8 +189,10 @@ let reset_state () = Hashtbl.clear tables; Hashtbl.clear partitions -(* We duplicate the body for the EOF (-1) case rather than creating - an interior utility function. *) +(* [partition (name, p)] generates a top-level [let __sedlex_partition_N c = ...] + function that maps a code point [c] to its equivalence class index using + the decision tree built from partition [p]. The EOF case (-1) is handled + naturally by the decision tree since [simplify] uses -1 as the lower bound. *) let partition (name, p) = let loc = default_loc in let rec gen_tree = function @@ -190,6 +216,9 @@ let partition (name, p) = (* Code generation for the automata *) +(* [best_final finals] returns the lowest-numbered accepting rule for this + state, or [None] if the state is not accepting. Lowest-numbered = highest + priority, matching the first-match semantics of [match%sedlex]. *) let best_final final = let fin = ref None in for i = Array.length final - 1 downto 0 do @@ -199,6 +228,10 @@ let best_final final = let state_fun state = Printf.sprintf "__sedlex_state_%i" state +(* [call_state lexbuf auto state] generates the expression that transitions + into DFA [state]. If the state has no outgoing transitions (a sink), it + returns the accepting rule index directly; otherwise it emits a function + call to the generated state function. *) let call_state lexbuf (auto : Sedlex.dfa) state = let { Sedlex.trans; finals } = auto.(state) in if Array.length trans = 0 then ( @@ -207,6 +240,11 @@ let call_state lexbuf (auto : Sedlex.dfa) state = | None -> assert false) else appfun (state_fun state) [lexbuf] +(* [gen_tag_ops lexbuf ops cont] wraps [cont] in a sequence of tag + operation calls. Each [Set_position t] becomes a call to + [__private__set_mem_pos], and each [Set_value (cell, v)] becomes a call to + [__private__set_mem_value]. Operations are folded right so they execute + before [cont]. *) let gen_tag_ops lexbuf (ops : Sedlex.tag_op list) cont = let loc = default_loc in List.fold_right @@ -223,6 +261,15 @@ let gen_tag_ops lexbuf (ops : Sedlex.tag_op list) cont = [%e acc]]) ops cont +(* [gen_state (lexbuf_name, lexbuf) auto i {trans; finals}] generates the + function [__sedlex_state_N] for DFA state [i]. The function: + 1. If the state is accepting, calls [mark] to save the current position. + 2. Reads the next code point, maps it through the partition function to + get an equivalence class index, then pattern-matches on that index. + 3. Each transition arm executes its tag operations then calls the target + state function (or returns the rule index for sink states). + 4. The default arm calls [backtrack] to return the last accepted rule. + Returns [] for accepting states with no outgoing transitions (sinks). *) let gen_state (lexbuf_name, lexbuf) (auto : Sedlex.dfa) i { Sedlex.trans; finals } = let loc = default_loc in @@ -264,6 +311,10 @@ let gen_state (lexbuf_name, lexbuf) (auto : Sedlex.dfa) i Sedlexing.mark [%e lexbuf] [%e eint ~loc i]; [%e body ()]] +(* [gen_recflag auto] determines whether the generated state functions need + [let rec]. If every transition leads to a sink state (no further + transitions), the functions are non-recursive; otherwise they are + mutually recursive. *) let gen_recflag (auto : Sedlex.dfa) = (* The generated function is not recursive if the transitions end in states with no further transitions. *) @@ -278,6 +329,13 @@ let gen_recflag (auto : Sedlex.dfa) = Nonrecursive with Exit -> Recursive +(* [gen_definition lexbuf_with_name compiled cases error] generates the + complete lexer expression for one [match%sedlex] block: + - Defines all [__sedlex_state_N] functions via [let rec ... in]. + - Emits a start sequence: [start lexbuf], then (if the pattern has [as] + bindings) [init_mem] + initial tag operations, then calls state 0. + - Wraps the result in a [match] on the returned rule index, dispatching + to user-provided right-hand-side expressions, with [error] as default. *) let gen_definition ((_, lexbuf) as lexbuf_with_name) (compiled : Sedlex.compiled) l error = let loc = default_loc in @@ -332,6 +390,9 @@ let string_of_encoding = function | Latin1 -> "Latin-1" | Ascii -> "ASCII" +(* [rev_csets_of_string ~loc ~encoding s] decodes string [s] under + [encoding] and returns a list of singleton char sets in reverse order + (one per character). Used to build sequence regexps from string literals. *) let rev_csets_of_string ~loc ~encoding s = match encoding with | Utf8 -> @@ -357,18 +418,42 @@ let rev_csets_of_string ~loc ~encoding s = done; !l +(* [repeat r (n, m)] expands bounded repetition [Rep(r, n..m)] into a + sequence of [n] mandatory copies followed by [m - n] optional copies. *) let rec repeat r = function | 0, 0 -> Sedlex.eps | 0, m -> Sedlex.alt Sedlex.eps (Sedlex.seq r (repeat r (0, m - 1))) | n, m -> Sedlex.seq r (repeat r (n - 1, m - 1)) +(* Code generation for `as` bindings. + + [regexp_of_pattern] parses OCaml patterns into regexps and collects a + [tag_info list] for every [as] binding it encounters. Each [tag_info] + records the variable name and the memory cell indices for its start/end + positions. For or-patterns like [(p1 as x) | (p2 as x)], each branch + gets its own tag pair and a discriminator [(cell, value)] so the + generated code can determine which branch matched. + + [gen_binding_code] turns the [tag_info list] into [let] bindings that + extract sub-matches from the lexbuf's memory cells. For a simple + binding it emits: + [let x = { lexbuf; pos = mem.(start_tag); len = ... } in ...] + For or-patterns with discriminators it emits a chain of if/else + checks on the discriminator cell to select the right tag pair. *) + type tag_info = { name : string; start_tag : int; end_tag : int; disc : (int * int) list; + (* Discriminator conditions: [(cell, value)] pairs. For simple + bindings this is [[]]. For or-patterns, each branch has a + distinct value in the shared discriminator cell. *) } +(* [gen_sub_lexeme lexbuf st et] generates an expression that reads the + start and end positions from memory cells [st] and [et] and constructs + a [Sedlexing.submatch] record. *) let gen_sub_lexeme lexbuf st et = let loc = default_loc in [%expr @@ -376,6 +461,11 @@ let gen_sub_lexeme lexbuf st et = let __e = Sedlexing.__private__mem_pos [%e lexbuf] [%e eint ~loc et] in { Sedlexing.lexbuf = [%e lexbuf]; pos = __s; len = __e - __s }] +(* [gen_binding_code lexbuf tag_info action] wraps [action] with [let] + bindings that extract sub-match values from the lexbuf's memory cells. + For a single binding (no or-pattern), emits a direct sub_lexeme call. + For or-patterns with multiple tag pairs, emits a chain of + [if disc_cell = value then ...] to select the correct tags at runtime. *) let gen_binding_code lexbuf (tag_info : tag_info list) action = let loc = default_loc in ignore loc; @@ -434,6 +524,12 @@ let gen_binding_code lexbuf (tag_info : tag_info list) action = [%e acc]]) by_name action) +(* [regexp_of_pattern env pattern] parses an OCaml pattern AST into a + [Sedlex.regexp] and a [tag_info list] for any [as] bindings encountered. + [env] maps names to previously defined regexps (built-in + user-defined). + Handles all sedlex pattern constructors: literals, Star, Plus, Rep, Opt, + Compl, Sub, Intersect, Chars, character intervals, tuple (sequence), + or-patterns, and [Ppat_alias] for [as] bindings. *) let regexp_of_pattern env = let no_tags r = (r, ([] : tag_info list)) in let reject_tags loc ctx (r, tags) = @@ -633,6 +729,16 @@ let regexp_of_pattern env = in aux ~encoding:Ascii +(* [handle_sedlex_match_ ~env ~map_rhs match_expr] is the main entry point + for compiling a [match%sedlex lexbuf with ...] expression. It: + 1. Extracts the lexbuf identifier and match cases. + 2. Parses each case's pattern into a regexp + tag_info via [regexp_of_pattern]. + 3. Compiles all regexps into a single DFA via [Sedlex.compile]. + 4. Applies [map_rhs] to each case's right-hand side (for recursive PPX + expansion of nested [match%sedlex] blocks). + 5. Wraps each RHS with [gen_binding_code] for [as] binding extraction. + 6. Generates the full lexer code via [gen_definition]. + Returns [(generated_expr, dfa)] for use by the main mapper and tests. *) let handle_sedlex_match_ ~env ~map_rhs match_expr = let lexbuf = match match_expr with @@ -692,6 +798,10 @@ let previous = ref [] let regexps = ref [] let should_set_cookies = ref false +(* The ppxlib AST mapper. It carries an [env] of named regexps (built-in + Unicode categories + user [let%sedlex.regexp] definitions). The [toplevel] + flag distinguishes the outermost structure (where partition/table + definitions are emitted) from nested modules. *) let mapper = object (this) inherit Ast_traverse.map as super @@ -777,6 +887,8 @@ let mapper = else fst (this#structure_with_regexps l) end +(* ppxlib cookie handlers: regexp definitions survive across compilation + units by round-tripping through a ppxlib cookie named "sedlex.regexps". *) let pre_handler cookies = previous := match Driver.Cookies.get cookies "sedlex.regexps" Ast_pattern.__ with diff --git a/src/syntax/sedlex.ml b/src/syntax/sedlex.ml index 63abf05..84e5f04 100644 --- a/src/syntax/sedlex.ml +++ b/src/syntax/sedlex.ml @@ -2,6 +2,74 @@ (* See the attached LICENSE file. *) (* Copyright 2005, 2013 by Alain Frisch and LexiFi. *) +(* + Implementation overview + ======================= + + Sedlex compiles regular expressions to Tagged DFAs. + + 1. NFA construction (type regexp = node -> node) + Each regexp combinator (chars, seq, alt, rep, ...) is a function that, + given a successor node, builds a fragment of NFA and returns its entry + node. This continuation-passing style makes sequencing natural (seq is + just function composition) and avoids explicit epsilon nodes for + concatenation. + + 2. Tags for `as` bindings (Laurikari-style) + NFA nodes may carry a tag operation (Set_position or Set_value). + [bind] wraps a sub-regexp with start/end tagged epsilon nodes so the + DFA can record sub-match positions at runtime. Discriminator tags + (Set_value) disambiguate or-patterns where multiple branches bind the + same name. + + 3. Determinization (compile) + Classic subset construction, extended to handle tags (Laurikari, NFAs + with Tagged Transitions, 2000). + + Each DFA state is a set of NFA nodes (represented as a list, identified + by physical identity via memq). DFA states are memoized in a hash table + keyed by node lists. + + Tags live on epsilon nodes in the NFA, so they are naturally collected + during epsilon closure. To compute a DFA transition for character + set [c]: follow all NFA [c]-transitions from nodes in the current DFA + state, then compute the epsilon closure of the targets. Every tagged + node visited during closure contributes its tag operation to the + transition's tag list. Each (target DFA state, tag list) pair becomes + one DFA transition: "on input [c], execute these tag ops, go to state N." + + In general, tagged determinization must resolve conflicts when multiple + active NFA paths write different values to the same tag (Laurikari uses + per-path tag valuations and priority ordering). Sedlex avoids this: + each [as] binding gets unique tag IDs, and [as] is rejected inside + repetition operators, so no two active NFA paths ever write to the + same tag. Tags can therefore be collected as a flat list with no + conflict resolution. + + Possible future optimizations (see #175) + ----------------------------------------- + + Tag optimizations for `as` bindings: + - Known-offset elision: when a sub-match boundary is at a fixed offset + from the start or end of the overall match, replace the tag with a + computed offset and eliminate the memory cell entirely. This is the + most impactful optimization since constant offsets are the common case. + - Self-loop tag delay: tags on self-loops that also appear on all + entering transitions can be removed from those transitions and emitted + as a "set previous position" on exit. This turns O(n) tag writes in + loops (e.g. Star) into O(1) on exit. + - Intra-rule tag coalescing: tags with identical occurrence signatures + (same presence in init_tags and same set of transitions) can share a + single memory cell. + - Cross-rule cell sharing: memory cells from non-interfering rules can + share the same physical slot via liveness analysis and graph coloring. + + DFA construction: + - DFA minimization: the generated DFA is not minimized. Hopcroft's or + Moore's algorithm could reduce state count, especially for patterns with + many character classes that converge to the same accepting state. +*) + module Cset = Sedlex_cset (* NFA *) @@ -9,10 +77,10 @@ module Cset = Sedlex_cset type tag_op = Set_position of int | Set_value of int * int type node = { - id : int; - mutable eps : node list; - mutable trans : (Cset.t * node) list; - tag : tag_op option; + id : int; (** Unique identifier, used for sorting transitions by target. *) + mutable eps : node list; (** Epsilon successors (no input consumed). *) + mutable trans : (Cset.t * node) list; (** Char-set-labelled transitions. *) + tag : tag_op option; (** Tag operation executed when entering this node. *) } (* Compilation regexp -> NFA *) @@ -31,6 +99,10 @@ let new_tagged_node tag_op = let seq r1 r2 succ = r1 (r2 succ) +(* [is_chars final node] tests whether [node] is a simple character-set + node: no epsilon edges, a single transition to [final], and no tag. + Used by [alt] to merge adjacent character classes into a single [chars] + node instead of introducing an epsilon fork. *) let is_chars final = function | { eps = []; trans = [(c, f)]; tag = None; _ } when f == final -> Some c | _ -> None @@ -112,6 +184,8 @@ let bind_disc r cell value = in wrapped +(* [compile_re re] instantiates a regexp by creating a fresh final node + and passing it as the successor. Returns [(entry_node, final_node)]. *) let compile_re re = let final = new_node () in (re final, final) @@ -119,8 +193,14 @@ let compile_re re = (* Determinization *) type state = node list -(* A state of the DFA corresponds to a set of nodes in the NFA. *) - +(* A DFA state is a set of NFA nodes (subset construction). + Membership is checked by physical identity (List.memq) since each + node is created exactly once by new_node/new_tagged_node. *) + +(* [add_node (state, tags) node] adds [node] to the NFA-node set [state] + via epsilon closure: it follows all epsilon edges recursively, collecting + any tag operations encountered along the way. Returns the updated + (state, tags) pair. Physical identity (memq) prevents revisiting nodes. *) let rec add_node (state, tags) node = if List.memq node state then (state, tags) else ( @@ -129,6 +209,14 @@ let rec add_node (state, tags) node = and add_nodes acc nodes = List.fold_left add_node acc nodes +(* [transition state] computes all outgoing DFA transitions from a DFA state. + Three phases: + 1. Normalize: collect all NFA transitions from all nodes in [state], + sort by target node id, and merge char sets for identical targets. + 2. Split: make char sets pairwise disjoint so each DFA transition fires + for an unambiguous set of code points. + 3. Epsilon closure: for each disjoint char set, compute the epsilon + closure of the target NFA nodes, collecting tag operations. *) let transition (state : state) = (* Merge transition with the same target *) let rec norm = function @@ -174,6 +262,12 @@ type dfa_state = { type dfa = dfa_state array type compiled = { dfa : dfa; init_tags : tag_op list; num_tags : int } +(* [compile rs] determinizes the NFA for an array of regexp rules. + Each rule is compiled to an NFA (entry node, final node) pair. The initial + DFA state is the epsilon closure of all entry nodes. States are explored + via [transition] and memoized in a hash table keyed by NFA node lists + (physical identity). Returns a {compiled} record with the DFA, initial + tag operations, and total number of memory cells needed. *) let compile rs = let rs = Array.map compile_re rs in let counter = ref 0 in diff --git a/src/syntax/sedlex.mli b/src/syntax/sedlex.mli index c82484c..d4f0c6b 100644 --- a/src/syntax/sedlex.mli +++ b/src/syntax/sedlex.mli @@ -2,40 +2,111 @@ (* See the attached LICENSE file. *) (* Copyright 2005, 2013 by Alain Frisch and LexiFi. *) +(** {2 Regexp combinators} + + Regular expressions are built from combinators and compiled to a DFA. *) + +(** Abstract type of regular expressions. *) type regexp +(** [chars cset] matches a single code point in [cset]. *) val chars : Sedlex_cset.t -> regexp + +(** [seq r1 r2] matches [r1] followed by [r2] (concatenation). *) val seq : regexp -> regexp -> regexp + +(** [alt r1 r2] matches [r1] or [r2] (alternation). When both operands are + simple [chars] regexps, their character sets are merged into one. *) val alt : regexp -> regexp -> regexp + +(** [rep r] matches zero or more repetitions of [r] (Kleene star). *) val rep : regexp -> regexp + +(** [plus r] matches one or more repetitions of [r]. *) val plus : regexp -> regexp + +(** The empty regexp — matches the empty string (epsilon). *) val eps : regexp + +(** If the argument is a single [chars] regexp, returns a regexp which matches + the complement set. Otherwise returns [None]. *) val compl : regexp -> regexp option -(* If the argument is a single [chars] regexp, returns a regexp - which matches the complement set. Otherwise returns [None]. *) +(** If each argument is a single [chars] regexp, returns a regexp which matches + the set (arg1 - arg2). Otherwise returns [None]. *) val subtract : regexp -> regexp -> regexp option -(* If each argument is a single [chars] regexp, returns a regexp - which matches the set (arg1 - arg2). Otherwise returns [None]. *) +(** If each argument is a single [chars] regexp, returns a regexp which matches + the intersection set. Otherwise returns [None]. *) val intersection : regexp -> regexp -> regexp option -(* If each argument is a single [chars] regexp, returns a regexp - which matches the intersection set. Otherwise returns [None]. *) -type tag_op = Set_position of int | Set_value of int * int +(** {2 Tagged DFA for [as] bindings} + + Named sub-match bindings (e.g. [Star any as x]) are implemented using tagged + transitions in the DFA. Each [as] binding introduces a pair of tags that + record the start and end positions of the sub-match in the lexbuf's memory + cells at runtime. + Or-patterns [(p1 as x) | (p2 as x)] additionally use discriminator cells: + integer values that record which branch was taken, so the PPX can extract + the correct positions at match time. *) + +(** Tag operations emitted on DFA transitions. *) +type tag_op = + | Set_position of int + (** [Set_position i]: record the current lexbuf position in memory cell + [i]. *) + | Set_value of int * int + (** [Set_value (cell, v)]: record integer [v] in memory cell [cell] (used + for or-pattern discriminators). *) + +(** [bind r] wraps [r] with start/end tag epsilon nodes. Returns + [(wrapped_regexp, start_tag, end_tag)] where [start_tag] and [end_tag] are + the allocated memory cell indices. *) val bind : regexp -> regexp * int * int + +(** Allocate a fresh memory cell for an or-pattern discriminator. *) val new_disc_cell : unit -> int + +(** [bind_disc r cell value] wraps [r] with an epsilon node that sets [cell] to + [value] on entry. Used to tag each branch of an or-pattern so the PPX can + tell which branch matched. *) val bind_disc : regexp -> int -> int -> regexp + +(** Reset the tag counter. Called before compiling each [match%sedlex] block. *) val reset_tags : unit -> unit +(** {2 DFA compilation} *) + type dfa_state = { trans : (Sedlex_cset.t * int * tag_op list) array; + (** Each transition: (character set, target state, tag operations to + execute when this transition fires). *) finals : bool array; + (** [finals.(i)] is [true] if this state is accepting for rule [i]. *) } +(** DFA states, indexed by state number. State 0 is the initial state. *) type dfa = dfa_state array -type compiled = { dfa : dfa; init_tags : tag_op list; num_tags : int } +(** Result of [compile]. *) +type compiled = { + dfa : dfa; + init_tags : tag_op list; + (** Tag operations to execute before entering the DFA (from epsilon + closure of the initial NFA nodes). *) + num_tags : int; + (** Total number of memory cells needed at runtime. When [num_tags = 0], + no memory is allocated (pattern has no [as] bindings). *) +} + +(** [compile rules] determinizes the NFA for an array of regexp rules using + subset construction. Returns the DFA, initial tag operations, and the total + number of memory cells needed for [as] bindings. State 0 is always the + initial state. *) val compile : regexp array -> compiled + +(** [dfa_to_dot dfa] returns a Graphviz DOT representation of the DFA, including + state labels, accepting state markers, transition character sets, and tag + operations on edges. *) val dfa_to_dot : dfa -> string From d06adb3eb5991ea1ac106ca617b5d9fc4d6d88b8 Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Tue, 24 Mar 2026 16:39:45 +0100 Subject: [PATCH 23/24] Add compile error tests for all PPX error paths Add [%compile_error] test extension that applies the sedlex mapper to an expression, catches errors, and prints them with OCaml's caret display (line numbers stripped for stability). Expose map_expression in ppx_sedlex for this purpose. 27 expect tests in test/codegen/test_errors.ml covering every error path in ppx_sedlex.ml: as-binding restrictions, operator misuse, malformed strings, invalid patterns, match structure, and regexp definition errors. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/syntax/ppx_sedlex.ml | 2 + src/syntax/ppx_sedlex.mli | 4 + test/codegen/test_errors.ml | 331 +++++++++++++++++++++++++++++++ test/ppx_test/dune | 2 +- test/ppx_test/ppx_sedlex_test.ml | 36 +++- 5 files changed, 373 insertions(+), 2 deletions(-) create mode 100644 test/codegen/test_errors.ml diff --git a/src/syntax/ppx_sedlex.ml b/src/syntax/ppx_sedlex.ml index f36bd73..cf668b7 100644 --- a/src/syntax/ppx_sedlex.ml +++ b/src/syntax/ppx_sedlex.ml @@ -887,6 +887,8 @@ let mapper = else fst (this#structure_with_regexps l) end +let map_expression expr = mapper#expression expr + (* ppxlib cookie handlers: regexp definitions survive across compilation units by round-tripping through a ppxlib cookie named "sedlex.regexps". *) let pre_handler cookies = diff --git a/src/syntax/ppx_sedlex.mli b/src/syntax/ppx_sedlex.mli index 5a9b6e8..a9a1bd8 100644 --- a/src/syntax/ppx_sedlex.mli +++ b/src/syntax/ppx_sedlex.mli @@ -23,3 +23,7 @@ val reset_state : unit -> unit Returns the generated expression and the DFA automaton. *) val handle_sedlex_match : Ppxlib.Parsetree.expression -> Ppxlib.Parsetree.expression * Sedlex.dfa + +(** [map_expression expr] applies the sedlex mapper to [expr], processing any + [[%sedlex]] or [[%sedlex.regexp?]] extensions it contains. *) +val map_expression : Ppxlib.Parsetree.expression -> Ppxlib.Parsetree.expression diff --git a/test/codegen/test_errors.ml b/test/codegen/test_errors.ml new file mode 100644 index 0000000..50745da --- /dev/null +++ b/test/codegen/test_errors.ml @@ -0,0 +1,331 @@ +(* Error tests for `as` bindings *) + +let%expect_test "error: as inside Star" = + [%compile_error + [%sedlex match buf with Star ('a' as x) -> ignore x | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 33-43: + | [%sedlex match buf with Star ('a' as x) -> ignore x | _ -> ()]]; + ^^^^^^^^^^ + Error: Sedlex: 'as' bindings are not supported inside Star + |}] + +let%expect_test "error: as inside Plus" = + [%compile_error + [%sedlex match buf with Plus ('a' as x) -> ignore x | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 33-43: + | [%sedlex match buf with Plus ('a' as x) -> ignore x | _ -> ()]]; + ^^^^^^^^^^ + Error: Sedlex: 'as' bindings are not supported inside Plus + |}] + +let%expect_test "error: as inside Opt" = + [%compile_error + [%sedlex match buf with Opt ('a' as x) -> ignore x | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 32-42: + | [%sedlex match buf with Opt ('a' as x) -> ignore x | _ -> ()]]; + ^^^^^^^^^^ + Error: Sedlex: 'as' bindings are not supported inside Opt + |}] + +let%expect_test "error: as inside Rep" = + [%compile_error + [%sedlex + match buf with Rep ((('a' as x), 'b'), 2 .. 3) -> ignore x | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 26-43: + | match buf with Rep ((('a' as x), 'b'), 2 .. 3) -> ignore x | _ -> ()]]; + ^^^^^^^^^^^^^^^^^ + Error: Sedlex: 'as' bindings are not supported inside Rep + |}] + +let%expect_test "error: as inside Compl" = + [%compile_error + [%sedlex match buf with Compl ('a' as x) -> ignore x | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 34-44: + | [%sedlex match buf with Compl ('a' as x) -> ignore x | _ -> ()]]; + ^^^^^^^^^^ + Error: Sedlex: 'as' bindings are not supported inside Compl + |}] + +let%expect_test "error: as inside Sub" = + [%compile_error + [%sedlex match buf with Sub (('a' as x), 'b') -> ignore x | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 33-43: + | [%sedlex match buf with Sub (('a' as x), 'b') -> ignore x | _ -> ()]]; + ^^^^^^^^^^ + Error: Sedlex: 'as' bindings are not supported inside Sub + |}] + +let%expect_test "error: as inside Intersect" = + [%compile_error + [%sedlex match buf with Intersect (('a' as x), 'b') -> ignore x | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 39-49: + | [%sedlex match buf with Intersect (('a' as x), 'b') -> ignore x | _ -> ()]]; + ^^^^^^^^^^ + Error: Sedlex: 'as' bindings are not supported inside Intersect + |}] + +let%expect_test "error: different names in or-pattern" = + [%compile_error + [%sedlex + match buf with ('a' as x) | ('b' as y) -> ignore (x, y) | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 21-44: + | match buf with ('a' as x) | ('b' as y) -> ignore (x, y) | _ -> ()]]; + ^^^^^^^^^^^^^^^^^^^^^^^ + Error: Sedlex: both sides of '|' must bind the same names with 'as' + |}] + +(* Error tests for Sub/Intersect/Compl on multi-char regexps *) + +let%expect_test "error: Sub on multi-char regexp" = + [%compile_error [%sedlex match buf with Sub ("ab", 'a') -> () | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 42-57: + | [%compile_error [%sedlex match buf with Sub ("ab", 'a') -> () | _ -> ()]]; + ^^^^^^^^^^^^^^^ + Error: Sedlex: the Sub operator can only applied to single-character length regexps + |}] + +let%expect_test "error: Intersect on multi-char regexp" = + [%compile_error + [%sedlex match buf with Intersect ("ab", 'a') -> () | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 28-49: + | [%sedlex match buf with Intersect ("ab", 'a') -> () | _ -> ()]]; + ^^^^^^^^^^^^^^^^^^^^^ + Error: Sedlex: the Intersect operator can only applied to single-character length regexps + |}] + +let%expect_test "error: Compl on multi-char regexp" = + [%compile_error [%sedlex match buf with Compl "ab" -> () | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 42-52: + | [%compile_error [%sedlex match buf with Compl "ab" -> () | _ -> ()]]; + ^^^^^^^^^^ + Error: Sedlex: the Compl operator can only applied to a single-character length regexp + |}] + +let%expect_test "error: Sub with one argument" = + [%compile_error [%sedlex match buf with Sub 'a' -> () | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 42-49: + | [%compile_error [%sedlex match buf with Sub 'a' -> () | _ -> ()]]; + ^^^^^^^ + Error: Sedlex: the Sub operator requires two arguments, like Sub(a,b) + |}] + +let%expect_test "error: Intersect with one argument" = + [%compile_error [%sedlex match buf with Intersect 'a' -> () | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 42-55: + | [%compile_error [%sedlex match buf with Intersect 'a' -> () | _ -> ()]]; + ^^^^^^^^^^^^^ + Error: Sedlex: the Intersect operator requires two arguments, like Intersect(a,b) + |}] + +(* Error tests for Rep *) + +let%expect_test "error: Rep invalid range" = + [%compile_error [%sedlex match buf with Rep ('a', 5 .. 2) -> () | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 42-59: + | [%compile_error [%sedlex match buf with Rep ('a', 5 .. 2) -> () | _ -> ()]]; + ^^^^^^^^^^^^^^^^^ + Error: Sedlex: Invalid range for Rep operator + |}] + +let%expect_test "error: Rep with non-integer" = + [%compile_error + [%sedlex match buf with Rep ('a', 'b' .. 'c') -> () | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 28-49: + | [%sedlex match buf with Rep ('a', 'b' .. 'c') -> () | _ -> ()]]; + ^^^^^^^^^^^^^^^^^^^^^ + Error: Sedlex: Rep must take an integer constant or interval + |}] + +let%expect_test "error: Rep with one argument" = + [%compile_error [%sedlex match buf with Rep 'a' -> () | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 42-49: + | [%compile_error [%sedlex match buf with Rep 'a' -> () | _ -> ()]]; + ^^^^^^^ + Error: Sedlex: the Rep operator takes 2 arguments + |}] + +(* Error tests for Compl/Chars *) + +let%expect_test "error: Compl without argument" = + [%compile_error [%sedlex match buf with Compl -> () | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 42-47: + | [%compile_error [%sedlex match buf with Compl -> () | _ -> ()]]; + ^^^^^ + Error: Sedlex: the Compl operator requires an argument + |}] + +let%expect_test "error: Chars with non-string" = + [%compile_error [%sedlex match buf with Chars 42 -> () | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 42-50: + | [%compile_error [%sedlex match buf with Chars 42 -> () | _ -> ()]]; + ^^^^^^^^ + Error: Sedlex: the Chars operator requires a string argument + |}] + +(* Error tests for unbound regexp and invalid patterns *) + +let%expect_test "error: unbound regexp" = + [%compile_error [%sedlex match buf with nonexistent_regexp -> () | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 42-60: + | [%compile_error [%sedlex match buf with nonexistent_regexp -> () | _ -> ()]]; + ^^^^^^^^^^^^^^^^^^ + Error: Sedlex: unbound regexp nonexistent_regexp + |}] + +let%expect_test "error: invalid pattern" = + [%compile_error [%sedlex match buf with Some 'a' -> () | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 42-50: + | [%compile_error [%sedlex match buf with Some 'a' -> () | _ -> ()]]; + ^^^^^^^^ + Error: Sedlex: this pattern is not a valid regexp + |}] + +let%expect_test "error: invalid interval type" = + [%compile_error [%sedlex match buf with 1.0 .. 2.0 -> () | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 42-52: + | [%compile_error [%sedlex match buf with 1.0 .. 2.0 -> () | _ -> ()]]; + ^^^^^^^^^^ + Error: Sedlex: this pattern is not a valid interval regexp + |}] + +let%expect_test "error: invalid constant pattern" = + [%compile_error [%sedlex match buf with 1.0 -> () | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 42-45: + | [%compile_error [%sedlex match buf with 1.0 -> () | _ -> ()]]; + ^^^ + Error: Sedlex: this pattern is not a valid regexp + |}] + +(* Error tests for match structure *) + +let%expect_test "error: missing catch-all" = + [%compile_error [%sedlex match buf with 'a' -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 42-45: + | [%compile_error [%sedlex match buf with 'a' -> ()]]; + ^^^ + Error: Sedlex: the last branch must be a catch-all error case + |}] + +let%expect_test "error: when guard" = + [%compile_error [%sedlex match buf with 'a' when true -> () | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 51-55: + | [%compile_error [%sedlex match buf with 'a' when true -> () | _ -> ()]]; + ^^^^ + Error: Sedlex: 'when' guards are not supported + |}] + +let%expect_test "error: matched expression must be a single identifier" = + [%compile_error [%sedlex match foo bar with 'a' -> () | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 33-40: + | [%compile_error [%sedlex match foo bar with 'a' -> () | _ -> ()]]; + ^^^^^^^ + Error: Sedlex: the matched expression must be a single identifier + |}] + +(* Error tests for malformed strings *) + +let%expect_test "error: malformed ASCII string" = + [%compile_error [%sedlex match buf with "\x80" -> () | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 42-48: + | [%compile_error [%sedlex match buf with "\x80" -> () | _ -> ()]]; + ^^^^^^ + Error: Sedlex: Malformed ASCII string + |}] + +let%expect_test "error: malformed UTF-8 string" = + [%compile_error [%sedlex match buf with Utf8 "\x80" -> () | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 47-53: + | [%compile_error [%sedlex match buf with Utf8 "\x80" -> () | _ -> ()]]; + ^^^^^^ + Error: Sedlex: Malformed UTF-8 string + |}] + +let%expect_test "error: non-ASCII char interval" = + [%compile_error [%sedlex match buf with '\x80' .. '\xff' -> () | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 42-58: + | [%compile_error [%sedlex match buf with '\x80' .. '\xff' -> () | _ -> ()]]; + ^^^^^^^^^^^^^^^^ + Error: Sedlex: this pattern is not a valid ASCII interval regexp + |}] + +(* Error tests for sedlex extension misuse *) + +let%expect_test "error: sedlex not on match expression" = + [%compile_error [%sedlex 42]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 18-30: + | [%compile_error [%sedlex 42]]; + ^^^^^^^^^^^^ + Error: Sedlex: the %sedlex extension is only recognized on match expressions + |}] + +(* Error tests for regexp definitions *) + +let%expect_test "error: as in regexp definition" = + [%compile_error + let dummy = [%sedlex.regexp? 'a' as x] in + ignore dummy]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 33-41: + | let dummy = [%sedlex.regexp? 'a' as x] in + ^^^^^^^^ + Error: Sedlex: 'as' bindings are not allowed in regexp definitions + |}] diff --git a/test/ppx_test/dune b/test/ppx_test/dune index 56eb19e..e67d873 100644 --- a/test/ppx_test/dune +++ b/test/ppx_test/dune @@ -1,6 +1,6 @@ (library (name ppx_sedlex_test) (kind ppx_rewriter) - (libraries ppxlib sedlex_ppx) + (libraries ppxlib sedlex_ppx str) (preprocess (pps ppxlib.metaquot))) diff --git a/test/ppx_test/ppx_sedlex_test.ml b/test/ppx_test/ppx_sedlex_test.ml index 2957aa5..c87b3ac 100644 --- a/test/ppx_test/ppx_sedlex_test.ml +++ b/test/ppx_test/ppx_sedlex_test.ml @@ -17,9 +17,43 @@ let expand ~ctxt:_ expr = print_string [%e Ast_builder.Default.estring ~loc code_str]; print_newline ()] +let strip_line_numbers s = + let s = Str.global_replace (Str.regexp "line [0-9]+, ") "" s in + let blank_digits m = + let t = Str.matched_string m in + String.init (String.length t) (fun i -> if t.[i] = '|' then '|' else ' ') + in + Str.global_substitute (Str.regexp "^ *[0-9]+ |") blank_digits s + +let expand_error ~ctxt:_ expr = + P.reset_state (); + S.reset_tags (); + let loc = Location.none in + let msg = + try + let _ = P.map_expression expr in + "NO ERROR" + with exn -> + let buf = Buffer.create 256 in + let fmt = Format.formatter_of_buffer buf in + Location.report_exception fmt exn; + Format.pp_print_flush fmt (); + strip_line_numbers (Buffer.contents buf) + in + P.reset_state (); + [%expr + print_string [%e Ast_builder.Default.estring ~loc msg]; + print_newline ()] + let ext = Extension.V3.declare "sedlex_test" Extension.Context.expression Ast_pattern.(single_expr_payload __) expand -let () = Driver.register_transformation "sedlex_test" ~extensions:[ext] +let ext_error = + Extension.V3.declare "compile_error" Extension.Context.expression + Ast_pattern.(single_expr_payload __) + expand_error + +let () = + Driver.register_transformation "sedlex_test" ~extensions:[ext; ext_error] From d51b71e9046219156b7815cf741b170b5e0bcf1c Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Tue, 24 Mar 2026 16:46:34 +0100 Subject: [PATCH 24/24] Document named capture groups in README Add section covering `as` binding syntax, submatch extraction functions, or-pattern support, and operator restrictions. Co-Authored-By: Claude Opus 4.6 (1M context) --- README.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/README.md b/README.md index 7b206b1..7ec8328 100644 --- a/README.md +++ b/README.md @@ -153,6 +153,38 @@ In particular, `Star r1, r2` is `(Star r1), r2` (not `Star (r1, r2)`), and `r1 | r2, r3` is `r1 | (r2, r3)` (not `(r1 | r2), r3`). Use parentheses to override: `Star (r1, r2)`, `(r1 | r2), r3`. +### Named capture groups (`as` bindings) + +You can capture sub-matches using OCaml's `as` pattern syntax: + +```ocaml +match%sedlex buf with +| (Plus ('0'..'9') as num), '.', (Plus ('0'..'9') as frac) -> + let n = Sedlexing.Utf8.of_submatch num in + let f = Sedlexing.Utf8.of_submatch frac in + Printf.printf "integer=%s fractional=%s\n" n f +| _ -> () +``` + +Each `as` binding produces a value of type `Sedlexing.submatch`. Use the +extraction functions to obtain the matched content: + +- `Sedlexing.Utf8.of_submatch s` returns the sub-match as a UTF-8 string. +- `Sedlexing.Latin1.of_submatch s` returns the sub-match as a Latin-1 string. +- `Sedlexing.lexeme_of_submatch s` returns the sub-match as a `Uchar.t array`. + +Or-patterns work as expected — both sides must bind the same names: + +```ocaml +match%sedlex buf with +| ("0x", Plus hex_digit as n) | (Plus ('0'..'9') as n) -> + Sedlexing.Utf8.of_submatch n +| _ -> ... +``` + +**Restriction:** `as` bindings are not allowed inside repetition operators +(`Star`, `Plus`, `Opt`, `Rep`) or set operators (`Compl`, `Sub`, `Intersect`). + ### Encoding - The OCaml source is assumed to be encoded in UTF-8.