diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..ad2de38 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,42 @@ +on: + push: + branches: + - main + pull_request: + +name: Generate docs + +# Make sure CI fails on all warnings, and pretend we're docs.rs +env: + RUSTFLAGS: "-Dwarnings -Adead-code -Aunused-imports -Aunused-macros" + # FIXME: in future, use the below to better match docs.rs. At the time of + # writing (2025/10/30), a very common documentation-related nightly + # feature attribute (doc_auto_cfg) has been renamed (to doc_cfg) which + # is resulting in the pipeline failing due to transient dependencies + # using the removed feature (even though I specify --no-deps). Not + # fully matching docs.rs also causes some warnings to get emitted + # while building SLWL, so allow those for now. + # RUSTFLAGS: "-Dwarnings --cfg docsrs" + +jobs: + generate-docs: + name: ${{ matrix.crate }} + runs-on: ubuntu-latest + strategy: + matrix: + crate: + - fontheight + - harfshapedfa + - static-lang-word-lists + steps: + # docs.rs uses nightly rust + - name: Setup nightly Rust + uses: dtolnay/rust-toolchain@nightly + - name: Checkout ${{ github.head_ref || github.ref_name }} + uses: actions/checkout@v4 + - name: Document + shell: bash + run: cargo +nightly doc --all-features --no-deps --package ${{ matrix.crate }} + # TODO: could we deploy this to GitHub pages somehow? + # https://github.com/actions/upload-pages-artifact + # https://github.com/actions/deploy-pages diff --git a/Cargo.lock b/Cargo.lock index 589ffc4..4fadaaf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -322,6 +322,15 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "euclid" +version = "0.22.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad9cdb4b747e485a12abb0e6566612956c7a1bafa3bdb8d682c5b6d403589e48" +dependencies = [ + "num-traits", +] + [[package]] name = "flate2" version = "1.1.1" @@ -344,11 +353,11 @@ dependencies = [ [[package]] name = "fontheight" -version = "0.1.8" +version = "0.2.0" dependencies = [ "harfrust", + "harfshapedfa", "itertools", - "kurbo", "ordered-float", "rayon", "skrifa", @@ -358,16 +367,22 @@ dependencies = [ [[package]] name = "fontheight-cli" -version = "0.1.0" +version = "0.1.1" dependencies = [ "anyhow", "clap", "clap-verbosity-flag", "env_logger", "fontheight", + "harfrust", + "harfshapedfa", "log", + "maud", + "ordered-float", "rayon", + "skrifa", "static-lang-word-lists", + "svg", ] [[package]] @@ -394,11 +409,23 @@ dependencies = [ "smallvec", ] +[[package]] +name = "harfshapedfa" +version = "0.1.0" +dependencies = [ + "harfrust", + "indexmap", + "kurbo", + "ordered-float", + "skrifa", + "thiserror", +] + [[package]] name = "hashbrown" -version = "0.15.3" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" +checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" [[package]] name = "heck" @@ -408,9 +435,9 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "indexmap" -version = "2.11.4" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b0f83760fb341a774ed326568e19f5a863af4a952def8c39f9ab92fd95b88e5" +checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" dependencies = [ "equivalent", "hashbrown", @@ -431,13 +458,20 @@ dependencies = [ "either", ] +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + [[package]] name = "kurbo" -version = "0.11.1" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89234b2cc610a7dd927ebde6b41dd1a5d4214cffaef4cf1fb2195d592f92518f" +checksum = "ce9729cc38c18d86123ab736fd2e7151763ba226ac2490ec092d1dd148825e32" dependencies = [ "arrayvec", + "euclid", "smallvec", ] @@ -480,6 +514,28 @@ version = "0.4.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" +[[package]] +name = "maud" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8156733e27020ea5c684db5beac5d1d611e1272ab17901a49466294b84fc217e" +dependencies = [ + "itoa", + "maud_macros", +] + +[[package]] +name = "maud_macros" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7261b00f3952f617899bc012e3dbd56e4f0110a038175929fa5d18e5a19913ca" +dependencies = [ + "proc-macro2", + "proc-macro2-diagnostics", + "quote", + "syn", +] + [[package]] name = "memchr" version = "2.7.4" @@ -557,11 +613,23 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proc-macro2-diagnostics" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "version_check", +] + [[package]] name = "quote" -version = "1.0.41" +version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" dependencies = [ "proc-macro2", ] @@ -753,11 +821,17 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "svg" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94afda9cd163c04f6bee8b4bf2501c91548deae308373c436f36aeff3cf3c4a3" + [[package]] name = "syn" -version = "2.0.106" +version = "2.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" dependencies = [ "proc-macro2", "quote", @@ -843,6 +917,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "walkdir" version = "2.5.0" diff --git a/Cargo.toml b/Cargo.toml index c1e5905..d35c204 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,14 +2,20 @@ resolver = "2" members = [ "cli", - "core", + "core", "harfshapedfa", "static-lang-word-lists", "xtask", ] [workspace.dependencies] +# Note: make sure harfrust's read-fonts matches skrifa's read-fonts +harfrust = "=0.3.2" +harfshapedfa = { version = "0.1.0", path = "harfshapedfa", features = ["pens"] } log = "0.4.25" +ordered-float = "5" rayon = "1.10" +# Note: make sure skrifa's read-fonts matches harfrust's read-fonts +skrifa = "=0.37.0" thiserror = "2" [profile.release] diff --git a/README.md b/README.md index 7794f81..dd0b0c8 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,8 @@ Options: -k, --words The number of words from each list to test [default: all words] -v, --verbose... Increase logging verbosity -q, --quiet... Decrease logging verbosity + -o, --output Write the reports into the given path. Will print to stdout if not specified + --html Output all the reports into a single HTML file -h, --help Print help -V, --version Print version ``` diff --git a/cli/CHANGELOG.md b/cli/CHANGELOG.md index 12bf4f3..d7c71c4 100644 --- a/cli/CHANGELOG.md +++ b/cli/CHANGELOG.md @@ -1,10 +1,17 @@ # `fontheight` CLI -## Unreleased +## v0.1.1 + +### Added + +- LibreOffice word lists +- HTML reporter, use `--html` to receive a visual report of the exemplar words against various metric lines +- `-o/--output` flag, to have `fontheight` write into a file ### Changes - Update word lists +- Sort exemplars lexicographically if their vertical extents match ## v0.1.0 - 2025/09/12 diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 96907ab..16d4ed4 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "fontheight-cli" description = "Find out the vertical extents your font reaches on shaped words" -version = "0.1.0" +version = "0.1.1" edition = "2024" authors = [ "Dalton Maag ", @@ -19,9 +19,15 @@ path = "src/main.rs" [dependencies] anyhow = "1" clap-verbosity-flag = { version = "3", features = ["log"] } -fontheight = { version = "0.1.5", path = "../core" } -log = { workspace = true } -rayon = { workspace = true } +fontheight = { version = "0.2", path = "../core" } +harfrust.workspace = true +harfshapedfa.workspace = true +log.workspace = true +maud = "0.27.0" +ordered-float.workspace = true +rayon.workspace = true +skrifa.workspace = true +svg = "0.18" [dependencies.static-lang-word-lists] version = "0.4" diff --git a/cli/src/fmt/html.rs b/cli/src/fmt/html.rs new file mode 100644 index 0000000..3dc827d --- /dev/null +++ b/cli/src/fmt/html.rs @@ -0,0 +1,687 @@ +use std::{ + cell::RefCell, + collections::{BTreeMap, HashMap, hash_map::Entry}, + fmt, + fmt::Write, + ops::Neg, + rc::Rc, +}; + +use anyhow::{Context, bail}; +use fontheight::{Location, Report, VerticalExtremes}; +use harfrust::{ShaperData, ShaperInstance, UnicodeBuffer}; +use harfshapedfa::{ + HarfRustShaperExt, ShapingMeta, + convert::{iso639_to_opentype, iso15924_to_opentype}, + pens::BoundsPen, +}; +use log::{debug, error}; +use maud::{DOCTYPE, Escaper, Markup, PreEscaped, Render, html}; +use ordered_float::NotNan; +use skrifa::{ + FontRef, GlyphId, MetadataProvider, OutlineGlyph, + instance::Size, + outline::{DrawSettings, OutlinePen, pen::SvgPen}, + raw::TableProvider, +}; +use static_lang_word_lists::WordList; +use svg::node::element::{Group, Line, Path, SVG}; + +static CSS: &str = "\ +body { + margin: 1em; + + font-family: sans-serif; +} + +h1 { + text-align: center; +} + +details { + margin: 4rem 0; +} + +summary h2 { + display: inline; +} + +ul.drawn { + list-style: none; + margin-left: 0; + padding-left: 0; + + display: flex; + flex-wrap: wrap; + gap: 2rem; +} + +.drawn figure { + margin: 0; +} + +.drawn figcaption { + font-family: monospace; + text-align: center; +} + +.drawn svg { + height: 175px; + border: 1px grey dashed; +}"; + +// Percentage (0..=1) of UPM to pad SVG by +const SVG_PAD_SCALE: f32 = 0.15; + +struct RenderUsingDebug(T); + +impl Render for RenderUsingDebug { + fn render_to(&self, output: &mut String) { + let mut escaper = Escaper::new(output); + write!(escaper, "{:?}", self.0).unwrap(); + } +} + +/// What a BASE min/max record is when you *really* boil it down +#[derive(Debug, Copy, Clone)] +struct SimpleBase { + min: Option, + max: Option, +} + +impl SimpleBase { + fn line_iter(self) -> impl Iterator, &'static str)> { + self.min + .into_iter() + .chain(self.max) + .map(|val| (NotNan::from(val), "cyan")) + } +} + +// Think InstanceExtremes, but lazy instead of ahead-of-time. Also holds the +// buffer so it can be re-used between words. +#[derive(Debug)] +struct LocationCache { + skrifa_location: skrifa::instance::Location, + shaper_instance: ShaperInstance, + glyph_bounds: HashMap, + buffer: Option, +} + +impl LocationCache { + fn new(font: &FontRef, location: &Location) -> Self { + Self { + skrifa_location: location.to_skrifa(font), + shaper_instance: ShaperInstance::from_variations( + font, + location.to_harfrust(), + ), + glyph_bounds: Default::default(), + buffer: Some(UnicodeBuffer::new()), + } + } + + fn get_extremes(&mut self, glyph: &OutlineGlyph) -> VerticalExtremes { + *self + .glyph_bounds + .entry(glyph.glyph_id()) + .or_insert_with(|| { + let mut bounds_pen = BoundsPen::new(); + glyph + .draw( + DrawSettings::unhinted( + Size::unscaled(), + &self.skrifa_location, + ), + &mut bounds_pen, + ) + .unwrap(); + let harfshapedfa::kurbo::Rect { y0, y1, .. } = + bounds_pen.bounds(); + VerticalExtremes::new(y0, y1) + }) + } +} + +// Any information that only needs to be computed once +struct FontCache<'a> { + font: &'a FontRef<'a>, + shaper_data: ShaperData, + // (script , language ) + base_entries: HashMap<(&'a str, Option<&'a str>), Option>, + // (y , colour ) + const_metrics: Vec<(NotNan, &'static str)>, + initial_highest: NotNan, + initial_lowest: NotNan, + upm: NotNan, +} + +impl<'a> FontCache<'a> { + fn new(font: &'a FontRef<'a>) -> anyhow::Result { + let os2 = font.os2().context("failed to read OS/2")?; + let head = font.head().context("failed to read HEAD")?; + let upm = NotNan::::from(head.units_per_em()); + + let const_metrics = vec![ + // Baseline + (NotNan::default(), "grey"), + // Highs + (os2.s_typo_ascender().into(), "red"), + (os2.us_win_ascent().into(), "blue"), + (head.y_max().into(), "green"), + (NotNan::new(1900. / 2048.).unwrap() * upm, "pink"), + // Lows + (os2.s_typo_descender().into(), "red"), + (NotNan::::from(os2.us_win_descent()).neg(), "blue"), + (head.y_min().into(), "green"), + (NotNan::new(-500. / 2048.).unwrap() * upm, "pink"), + ]; + + let initial_highest = const_metrics + .iter() + .copied() + .map(|(val, _)| val) + .max() + .unwrap(); + let initial_lowest = const_metrics + .iter() + .copied() + .map(|(val, _)| val) + .min() + .unwrap(); + + Ok(Self { + shaper_data: ShaperData::new(font), + base_entries: Default::default(), + font, + const_metrics, + initial_highest, + initial_lowest, + upm, + }) + } + + fn get_base_entry( + &mut self, + word_list: &'a WordList, + ) -> Option { + fn get_uncached_base_entry( + font: &FontRef, + script: &str, + language: Option<&str>, + ) -> anyhow::Result> { + let base = match font.base() { + Ok(base) => base, + Err(skrifa::raw::ReadError::TableIsMissing(_)) => { + return Ok(None); + }, + Err(why) => bail!("failed to read BASE: {why}"), + }; + + debug!( + "looking up BASE entry for script: {script}, lang: \ + {language:?}" + ); + + let ot_script = iso15924_to_opentype(script) + .context("word list's script wasn't a valid tag")?; + let ot_language = language + .map(|lang| { + iso639_to_opentype(lang).context( + "word list language couldn't be converted to an \ + OpenType language", + ) + }) + .transpose()?; + + let Some(horiz_axis) = base.horiz_axis() else { + debug!("no horizontal BASE entries"); + return Ok(None); + }; + let base_script_list = horiz_axis?.base_script_list()?; + let Some(relevant_script_record) = base_script_list + .base_script_records() + .iter() + .find(|record| record.base_script_tag == ot_script) + else { + debug!("no BASE entry with script `{ot_script}`"); + return Ok(None); + }; + + let base_script = relevant_script_record + .base_script(base_script_list.offset_data())?; + + let language_min_max = ot_language + .and_then(|lang| { + base_script + .base_lang_sys_records() + .iter() + .find(|record| record.base_lang_sys_tag == lang) + }) + .map(|lang_record| { + lang_record.min_max(base_script.offset_data()) + }) + .transpose()?; + + let min_max = match language_min_max { + None => { + let Some(default_min_max) = base_script.default_min_max() + else { + debug!("no default MinMax for `{ot_script}`"); + return Ok(None); + }; + debug!("found script BASE entry for `{ot_script}`"); + default_min_max? + }, + Some(min_max) => { + debug!( + "found language-specific BASE override for \ + `{ot_script}`" + ); + min_max + }, + }; + + let min = min_max + .min_coord() + .transpose()? + .map(|base_coord| base_coord.coordinate()); + let max = min_max + .max_coord() + .transpose()? + .map(|base_coord| base_coord.coordinate()); + + Ok(Some(SimpleBase { min, max })) + } + + let script = word_list.script()?; + let language = word_list.language(); + + match self.base_entries.entry((script, language)) { + Entry::Occupied(entry) => *entry.get(), + Entry::Vacant(entry) => { + let opt_base = + get_uncached_base_entry(self.font, script, language) + .unwrap_or_else(|why| { + // Store None in the case of errors as it's a + // reasonable assumption that they'll be consistent, + // and we don't need to emit the error multiple + // times every time this script/language combo is + // looked up + error!( + "failed to check for BASE entry (script: \ + {script}, lang: {language:?}: {why}", + ); + None + }); + *entry.insert(opt_base) + }, + } + } +} + +/// Everything we need to keep track of while shaping a word +#[derive(Debug)] +struct ShapingAccumulator { + /// Where to position the next glyph relative to + x_origin: f32, + /// Where to position the next glyph relative to + y_origin: f32, + /// All the glyphs in the current word + glyph_svgs: Vec, +} + +impl ShapingAccumulator { + fn new(word: &str) -> Self { + Self { + x_origin: 0f32, + y_origin: 0f32, + glyph_svgs: Vec::with_capacity(word.len()), + } + } + + // Taking self and returning a new one makes this easier to use with + // Iterator::fold (i.e. the whole point of this struct) + fn next(self, x_advance: i32, y_advance: i32, glyph_svg: Path) -> Self { + let ShapingAccumulator { + x_origin, + y_origin, + mut glyph_svgs, + } = self; + glyph_svgs.push(glyph_svg); + Self { + x_origin: x_origin + x_advance as f32, + y_origin: y_origin + y_advance as f32, + glyph_svgs, + } + } +} + +fn draw_svg<'a>( + font_cache: Rc>>, + location_cache: Rc>, + word: &str, + word_list: &'a WordList, +) -> SVG { + // We only ever process one SVG at a time, so we can just borrow mutably for + // the duration of this function for simplicity's sake + let mut font_cache = font_cache.borrow_mut(); + let mut location_cache = location_cache.borrow_mut(); + + let mut buffer = location_cache + .buffer + .take() + .expect("GlyphBuffer was not returned to location_cache"); + buffer.push_str(word); + + let shaper = font_cache + .shaper_data + .shaper(font_cache.font) + .instance(Some(&location_cache.shaper_instance)) + .build(); + // This word has already been shaped by main so we should encounter no + // errors here; unwrapping is fine + let shaping_meta = word_list + .script() + .map(|script| ShapingMeta::new(script, word_list.language(), &shaper)) + .transpose() + .unwrap(); + + // Default features are still included by default + let glyph_buffer = match &shaping_meta { + Some(meta) => shaper.shape_with_meta(meta, buffer, &[]), + None => { + buffer.guess_segment_properties(); + shaper.shape(buffer, &[]) + }, + }; + + // These values do not factor in padding + let mut highest = font_cache.initial_highest; + let mut lowest = font_cache.initial_lowest; + + let maybe_base = font_cache.get_base_entry(word_list); + if let Some(base) = maybe_base { + if let Some(max) = base.max { + highest = highest.max(NotNan::from(max)); + } + if let Some(min) = base.min { + lowest = lowest.min(NotNan::from(min)); + } + } + + let svg_pad = font_cache.upm * SVG_PAD_SCALE; + let outlines = font_cache.font.outline_glyphs(); + // FIXME: in theory, using the final x_advance is insufficient. We would + // have to use the bounds of the final glyph instead of just where + // it reports the next one should start. + // In practice, the padding will probably save us even if end_width + // should be larger. + let ShapingAccumulator { + x_origin: end_width, + glyph_svgs, + .. + } = glyph_buffer + .glyph_infos() + .iter() + .zip(glyph_buffer.glyph_positions()) + .fold( + ShapingAccumulator::new(word), + |acc, (glyph_info, position)| { + let glyph = outlines.get(glyph_info.glyph_id.into()).unwrap(); + + // Draw the glyph, flipped because SVG space has y=0 at the top, + // unlike fonts + let mut svg_pen = SvgPen::new(); + let mut flipped_svg_pen = VerticalFlipPen { + inner: &mut svg_pen, + }; + glyph + .draw( + DrawSettings::unhinted( + Size::unscaled(), + &location_cache.skrifa_location, + ), + &mut flipped_svg_pen, + ) + .unwrap(); + + // Pull the SVG path out of the pen and position it correctly + let glyph_svg = Path::new() + .set( + "transform", + format!( + "translate({x}, {y})", + x = acc.x_origin + position.x_offset as f32, + y = acc.y_origin + position.y_offset as f32 + ), + ) + .set("d", svg_pen.to_string()); + + // Look at the bounds and update highest/lowest as needed + let extrema = location_cache.get_extremes(&glyph); + highest = + highest.max(NotNan::new(extrema.highest() as f32).unwrap()); + lowest = + lowest.min(NotNan::new(extrema.lowest() as f32).unwrap()); + + acc.next(position.x_advance, position.y_advance, glyph_svg) + }, + ); + location_cache.buffer = Some(glyph_buffer.clear()); + + let x_min = -svg_pad; + let x_max = end_width + svg_pad; + let y_min = lowest - svg_pad; + let y_max = highest + svg_pad; + + // This group is positioned to factor in padding, everything within it is + // just font coordinates with y negated. + let word_svg = glyph_svgs + .into_iter() + .fold(Group::new(), |group, path| group.add(path)) + .set( + "transform", + // Move the word down now to complete the move from font-land to + // SVG-land, coordinates-wise + format!("translate({x}, {y})", x = svg_pad, y = y_max), + ); + + let word_and_lines_svg = font_cache + .const_metrics + .iter() + .copied() + .chain(maybe_base.into_iter().flat_map(|base| base.line_iter())) + .fold(word_svg, |group, (line_y, colour)| { + // Here we're back to working within the group in font cooordinates, + // just need to flip y + let y = line_y.into_inner(); + // Draw the lines the full width of the box + let line = Line::new() + .set("x1", x_min) + .set("y1", -y) + .set("x2", x_max) + .set("y2", -y) + .set("stroke-width", 10) + .set("stroke", colour); + group.add(line) + }); + + SVG::new() + .set( + "viewBox", + format!( + "0 0 {width} {height}", + width = x_max - x_min, + height = y_max - y_min + ), + ) + .set("preserveAspectRatio", "meet") + .add(word_and_lines_svg) +} + +fn draw_exemplar<'a>( + font_cache: Rc>>, + location_cache: Rc>, + exemplar: &str, + source: &'a WordList, + location: &Location, +) -> Markup { + let svg = + draw_svg(font_cache, location_cache, exemplar, source).to_string(); + html! { + li { + figure { + (PreEscaped(svg)) + figcaption { + "\"" (exemplar) "\" (from " (source.name()) ")" br; + // TODO: give instance name if it is a named instance? + (RenderUsingDebug(location)) + } + } + } + } +} + +fn format_script_reports<'a>( + font_cache: Rc>>, + script: &str, + reports: &[&Report<'a>], +) -> Markup { + html! { + details open { + summary { h2 { (script) } } + @for report in reports { + @let location_cache = + Rc::new(RefCell::new(LocationCache::new(font_cache.borrow().font, report.location))); + ul.drawn { + @for high_exemplar in report.exemplars.highest() { + (draw_exemplar( + font_cache.clone(), + location_cache.clone(), + high_exemplar.word, + report.word_list, + report.location, + )) + } + @for low_exemplar in report.exemplars.lowest() { + (draw_exemplar( + font_cache.clone(), + location_cache.clone(), + low_exemplar.word, + report.word_list, + report.location, + )) + } + } + } + } + } +} + +pub fn format_all_reports( + reports: &[Report], + font: &FontRef, +) -> anyhow::Result { + // Group on script and then present exemplars from word lists in order by + // name + let mut script_exemplars = BTreeMap::<&str, Vec<&Report>>::new(); + reports.iter().for_each(|report| { + // ZWSP at the start of Unknown so it gets sorted last + let script = report.word_list.script().unwrap_or("\u{200B}Unknown"); + script_exemplars.entry(script).or_default().push(report); + }); + // Sort reports by name, then by location + script_exemplars.values_mut().for_each(|reports| { + reports.sort_unstable_by(|report_a, report_b| { + Ord::cmp(report_a.word_list.name(), report_b.word_list.name()) + .then_with(|| { + PartialOrd::partial_cmp( + &report_a.location, + &report_b.location, + ) + .expect("fontheight produced unsortable locations") + }) + }); + }); + + let font_cache = Rc::new(RefCell::new(FontCache::new(font)?)); + + let html = html! { + (DOCTYPE) + html { + head { + title { "Font Height report" } + meta charset="utf-8"; + style { (CSS) } + } + body { + h1 { "Font Height report" } + h3 { "Lines legend" } + p { + span style="color: grey" { + "Grey: baseline" + } br; + span style="color: green" { + "Green: [head.yMax, head.yMin]" + } br; + span style="color: blue" { + "Blue: [os2.usWinAscent, -os2.usWinDescent]" + } br; + span style="color: red" { + "Red: [os2.sTypoAscender, os2.sTypoDescender] " + "= clipping limit for Android" + } br; + span style="color: pink" { + (PreEscaped("Pink: [1900⁄2048×upem, ")) + (PreEscaped("−500⁄2048×upem]")) + } br; + span style="color: cyan" { + "Cyan: BASE table entry for script/language (if present)" + } br; + } + @for (script, reports) in script_exemplars { + (format_script_reports(font_cache.clone(), script, &reports)) + } + } + } + }; + Ok(html.into_string()) +} + +struct VerticalFlipPen<'p, P> { + inner: &'p mut P, +} + +impl

OutlinePen for VerticalFlipPen<'_, P> +where + P: OutlinePen, +{ + fn move_to(&mut self, x: f32, y: f32) { + self.inner.move_to(x, -y); + } + + fn line_to(&mut self, x: f32, y: f32) { + self.inner.line_to(x, -y); + } + + fn quad_to(&mut self, cx0: f32, cy0: f32, x: f32, y: f32) { + self.inner.quad_to(cx0, -cy0, x, -y); + } + + fn curve_to( + &mut self, + cx0: f32, + cy0: f32, + cx1: f32, + cy1: f32, + x: f32, + y: f32, + ) { + self.inner.curve_to(cx0, -cy0, cx1, -cy1, x, -y); + } + + fn close(&mut self) { + self.inner.close() + } +} diff --git a/cli/src/fmt.rs b/cli/src/fmt/mod.rs similarity index 99% rename from cli/src/fmt.rs rename to cli/src/fmt/mod.rs index 1064b20..11d92bb 100644 --- a/cli/src/fmt.rs +++ b/cli/src/fmt/mod.rs @@ -2,6 +2,8 @@ use std::fmt; use fontheight::Report; +pub mod html; + #[derive(Debug, Copy, Clone)] pub struct ReportFormatter<'a> { report: &'a Report<'a>, diff --git a/cli/src/main.rs b/cli/src/main.rs index 31b9bbd..9ca6f69 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,18 +1,25 @@ #![allow(missing_docs)] -mod fmt; - -use std::{fs, iter, path::PathBuf, process::ExitCode, time::Instant}; - -use anyhow::Context; +use std::{ + fs, + fs::OpenOptions, + io::{Write, stdout}, + iter, + path::PathBuf, + process::ExitCode, + time::Instant, +}; + +use anyhow::{Context, bail}; use clap::Parser; use clap_verbosity_flag::Verbosity; use env_logger::Env; +use fmt::{FormatReport, OutputFormat}; use fontheight::Reporter; use log::{error, info, warn}; use rayon::prelude::*; -use crate::fmt::{FormatReport, OutputFormat}; +mod fmt; fn main() -> ExitCode { match _main() { @@ -47,17 +54,41 @@ struct Args { #[command(flatten)] verbosity: FontheightVerbosity, + + /// Write the reports into the given path. + /// Will print to stdout if not specified + #[arg(short, long = "output")] + output_path: Option, + + /// Output all the reports into a single HTML file + #[arg(long)] + html: bool, } fn _main() -> anyhow::Result<()> { let args = Args::parse(); - debug_assert!(!args.font_path.is_empty()); + if args.font_path.len() > 1 && args.html { + bail!("you can't pass multiple fonts if using --html"); + } env_logger::builder() .filter_level(args.verbosity.into()) .parse_env(Env::new().filter("FONTHEIGHT_LOG")) .init(); + let mut output: Box = match &args.output_path { + None => Box::new(stdout().lock()), + Some(path) => { + let handle = OpenOptions::new() + .write(true) + .truncate(true) + .create(true) + .open(path) + .context("failed to open output file")?; + Box::new(handle) + }, + }; + args.font_path .iter() .try_for_each(|font_path| -> anyhow::Result<()> { @@ -116,11 +147,33 @@ fn _main() -> anyhow::Result<()> { .collect::, _>>()?; let took = start.elapsed(); - println!("{}:", font_path.display()); - reports.iter().for_each(|report| { - println!("{}", report.format(OutputFormat::Human)); - }); - info!("Took {took:?}"); + info!("{} took {took:?}", font_path.display()); + + if !args.html { + writeln!(&mut output, "{}:", font_path.display()) + .context("failed to write to output")?; + reports + .iter() + .try_for_each(|report| { + writeln!( + &mut output, + "{}", + report.format(OutputFormat::Human) + ) + }) + .context("failed to write to output")?; + } else { + info!("generating HTML report"); + let start = Instant::now(); + let html = fmt::html::format_all_reports( + &reports, + reporter.fontref(), + )?; + info!("took {:?}", start.elapsed()); + output + .write_all(html.as_bytes()) + .context("failed to write to output")?; + } Ok(()) }) } diff --git a/core/CHANGELOG.md b/core/CHANGELOG.md index 90781c4..7424cd8 100644 --- a/core/CHANGELOG.md +++ b/core/CHANGELOG.md @@ -1,5 +1,24 @@ # `fontheight` library crate changelog +## v0.2.0 + +**Breaking change**: the `Location` type and several errors now reside in [`harfshapedfa`](../harfshapedfa/README.md) instead of this crate. Error types of some functions have changed. + +### Removed + +All now live in [`harfshapedfa`](../harfshapedfa/README.md). +Other changes have been made to the `Location` API/behaviour (see [`harfshapedfa/CHANGELOG.md`](../harfshapedfa/CHANGELOG.md). + +- `Location` +- `ShapingPlanError` +- `HarfRustUnknownLanguageError` +- `MismatchedAxesError` +- `InvalidTagError` + +### Added + +- `WordListShapingPlanError` + ## v0.1.8 - 2025/10/27 ### Changes diff --git a/core/Cargo.toml b/core/Cargo.toml index 24cdd56..a127abc 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "fontheight" description = "Find out the vertical extents your font reaches on shaped words" -version = "0.1.8" +version = "0.2.0" edition = "2024" authors = ["Dalton Maag "] license = "Apache-2.0" @@ -15,15 +15,13 @@ default = ["rayon"] rayon = ["dep:rayon", "static-lang-word-lists/rayon"] [dependencies] -# Note: make sure harfrust's read-fonts matches skrifa's read-fonts -harfrust = "=0.3.2" +harfrust.workspace = true +harfshapedfa.workspace = true itertools = "0.14.0" -kurbo = "0.11.1" -ordered-float = "5" +ordered-float.workspace = true rayon = { workspace = true, optional = true } -# Note: make sure skrifa's read-fonts matches harfrust's read-fonts -skrifa = "=0.37.0" -thiserror = { workspace = true } +skrifa.workspace = true +thiserror.workspace = true [dependencies.static-lang-word-lists] version = ">=0.2,<0.5" diff --git a/core/src/errors.rs b/core/src/errors.rs index 55e0b55..f7bfb0a 100644 --- a/core/src/errors.rs +++ b/core/src/errors.rs @@ -6,7 +6,9 @@ //! may return more specific errors (all of which will up-convert to //! [`FontHeightError`] one way or another). -use skrifa::{outline::DrawError, raw::types::InvalidTag}; +use harfshapedfa::errors::ShapingPlanError; +pub use harfshapedfa::errors::{InvalidTagError, MismatchedAxesError}; +use skrifa::outline::DrawError; use thiserror::Error; /// Font Height hit an error, sorry! @@ -29,7 +31,7 @@ pub enum FontHeightError { /// Invalid metadata for a [`WordList`](crate::WordList) meant creating a /// shaping plan for it failed. #[error(transparent)] - WordListMetadata(#[from] ShapingPlanError), + WordListMetadata(#[from] WordListShapingPlanError), } /// Creating the shaping plan for a [`WordList`](crate::WordList) failed. @@ -47,44 +49,10 @@ pub enum FontHeightError { /// metadata and it's unable to be used. [`WordList`](crate::WordList)s without /// metadata will not cause this error. #[derive(Debug, Error)] -pub enum ShapingPlanError { - /// The script metadata value on the [`WordList`](crate::WordList) was - /// invalid - #[error( - "invalid script in word list metadata for {word_list_name}: {inner}" - )] - UnknownScriptTag { - /// The name of the word list that had invalid metadata - word_list_name: String, - /// The underlying error - inner: InvalidTagError, - }, - /// The language metadata value on the [`WordList`](crate::WordList) was - /// invalid - #[error( - "invalid language in word list metadata for {word_list_name}: {inner}" - )] - UnknownLanguage { - /// The name of the word list that had invalid metadata - word_list_name: String, - /// The underlying error - inner: HarfRustUnknownLanguageError, - }, -} - -/// [`harfrust`] didn't recognise the language -#[derive(Debug, Error)] -#[error("invalid language: \"{language}\"")] -pub struct HarfRustUnknownLanguageError { - language: String, -} - -impl HarfRustUnknownLanguageError { - pub(crate) fn new(lang: impl Into) -> Self { - HarfRustUnknownLanguageError { - language: lang.into(), - } - } +#[error("couldn't make shaping plan for {word_list_name}: {inner}")] +pub struct WordListShapingPlanError { + pub(crate) word_list_name: String, + pub(crate) inner: ShapingPlanError, } // New-typed errors to not have 3rd party errors in public API @@ -97,18 +65,3 @@ pub struct SkrifaReadError(#[from] skrifa::raw::ReadError); #[derive(Debug, Error)] #[error("could not draw glyph {0}: {1}")] pub struct SkrifaDrawError(pub(crate) skrifa::GlyphId, pub(crate) DrawError); - -/// Returned by [`Location::validate_for`](crate::Location::validate_for), -/// indicating axes are specified in the [`Location`](crate::Location) that -/// aren't in the font being validated against. -#[derive(Debug, Error)] -#[error("mismatched axes: present in Location but not font {extras:?}")] -pub struct MismatchedAxesError { - pub(crate) extras: Vec, -} - -/// The axis/script tag was invalid (it had illegal characters or wasn't four -/// characters). -#[derive(Debug, Error)] -#[error(transparent)] -pub struct InvalidTagError(#[from] InvalidTag); diff --git a/core/src/lib.rs b/core/src/lib.rs index 9ac941f..9951820 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -32,19 +32,14 @@ use std::{ borrow::Cow, cmp, collections::{BTreeSet, HashMap}, - str::FromStr, }; pub use exemplars::{CollectToExemplars, Exemplars}; -use harfrust::{ - Direction, Language, Script, ShapePlan, Shaper, ShaperData, ShaperInstance, - Tag, UnicodeBuffer, script, -}; +use harfrust::{Shaper, ShaperData, ShaperInstance, UnicodeBuffer}; +pub use harfshapedfa::Location; +use harfshapedfa::{HarfRustShaperExt, ShapingMeta, pens::BoundsPen}; use itertools::Itertools; -use kurbo::Shape; -pub use locations::Location; use ordered_float::{NotNan, OrderedFloat}; -use pens::BezierPen; use skrifa::{ FontRef, MetadataProvider, instance::Size, outline::DrawSettings, }; @@ -52,14 +47,11 @@ pub use static_lang_word_lists::WordList; use static_lang_word_lists::WordListIter; use crate::errors::{ - FontHeightError, HarfRustUnknownLanguageError, ShapingPlanError, - SkrifaDrawError, SkrifaReadError, + FontHeightError, SkrifaDrawError, SkrifaReadError, WordListShapingPlanError, }; pub mod errors; mod exemplars; -mod locations; -mod pens; /// Font Height's entrypoint. Parses fonts and can check word lists at /// specified locations. @@ -123,12 +115,12 @@ impl<'a> Reporter<'a> { .iter() .multi_cartesian_product() .map(|coords| { - let inner = coords - .into_iter() - .zip(self.font.axes().iter()) - .map(|(coord, axis)| (axis.tag(), From::from(*coord))) - .collect(); - Location::from_skrifa(inner) + self.font + .axes() + .iter() + .zip(coords) + .map(|(axis, coord)| (axis.tag(), coord.into_inner())) + .collect() }) .collect() } @@ -217,13 +209,22 @@ impl<'a> InstanceReporter<'a> { pub fn to_word_extremes_iter( &self, word_list: &'a WordList, - ) -> Result, ShapingPlanError> { + ) -> Result, WordListShapingPlanError> { let shaper = self .shaper_data .shaper(self.font) .instance(Some(&self.shaper_instance)) .build(); - let shaping_meta = ShapingMeta::new_from(word_list, &shaper)?; + let shaping_meta = word_list + .script() + .map(|script| { + ShapingMeta::new(script, word_list.language(), &shaper) + }) + .transpose() + .map_err(|err| WordListShapingPlanError { + word_list_name: word_list.name().to_owned(), + inner: err, + })?; Ok(WordExtremesIterator { shaper, instance_extremes: &self.instance_extremes, @@ -242,7 +243,7 @@ impl<'a> InstanceReporter<'a> { word_list: &'a WordList, k_words: Option, n_exemplars: usize, - ) -> Result, ShapingPlanError> { + ) -> Result, WordListShapingPlanError> { use std::convert::identity; use exemplars::ExemplarCollector; @@ -262,7 +263,16 @@ impl<'a> InstanceReporter<'a> { .shaper(self.font) .instance(Some(&self.shaper_instance)) .build(); - let shaping_meta = ShapingMeta::new_from(word_list, &shaper)?; + let shaping_meta = word_list + .script() + .map(|script| { + ShapingMeta::new(script, word_list.language(), &shaper) + }) + .transpose() + .map_err(|err| WordListShapingPlanError { + word_list_name: word_list.name().to_owned(), + inner: err, + })?; let exemplars = word_list .par_iter() @@ -278,18 +288,7 @@ impl<'a> InstanceReporter<'a> { // Default features are still included by default let glyph_buffer = match &shaping_meta { - Some(meta) => { - buffer.set_script(meta.script); - if let Some(lang) = meta.language.clone() { - buffer.set_language(lang); - } - buffer.set_direction(meta.direction); - shaper.shape_with_plan( - &meta.shaping_plan, - buffer, - &[], - ) - }, + Some(meta) => shaper.shape_with_meta(meta, buffer, &[]), None => { buffer.guess_segment_properties(); shaper.shape(buffer, &[]) @@ -391,14 +390,7 @@ impl<'a> Iterator for WordExtremesIterator<'a> { // Default features are still included by default let glyph_buffer = match &self.shaping_meta { - Some(meta) => { - buffer.set_script(meta.script); - if let Some(lang) = meta.language.clone() { - buffer.set_language(lang); - } - buffer.set_direction(meta.direction); - self.shaper.shape_with_plan(&meta.shaping_plan, buffer, &[]) - }, + Some(meta) => self.shaper.shape_with_meta(meta, buffer, &[]), None => { buffer.guess_segment_properties(); self.shaper.shape(buffer, &[]) @@ -517,18 +509,18 @@ impl InstanceExtremes { .outline_glyphs() .iter() .map(|(id, outline)| -> Result<(u32, VerticalExtremes), SkrifaDrawError> { - let mut bez_pen = BezierPen::default(); + let mut bounds_pen = BoundsPen::new(); outline .draw( DrawSettings::unhinted( Size::unscaled(), &location.to_skrifa(font), ), - &mut bez_pen, + &mut bounds_pen, ) .map_err(|err| SkrifaDrawError(id, err))?; - let kurbo::Rect { y0, y1, .. } = bez_pen.path.bounding_box(); + let harfshapedfa::kurbo::Rect { y0, y1, .. } = bounds_pen.bounds(); Ok((u32::from(id), VerticalExtremes { lowest: NotNan::new(y0).expect("bounding box with NaN y0"), highest: NotNan::new(y1).expect("bounding box with NaN y1"), @@ -539,6 +531,7 @@ impl InstanceExtremes { } /// Get the [`VerticalExtremes`] for the given glyph ID. + #[must_use] pub fn get(&self, glyph_id: u32) -> Option { self.0.get(&glyph_id).copied() } @@ -626,149 +619,3 @@ impl<'a> Report<'a> { } } } - -struct ShapingMeta { - shaping_plan: ShapePlan, - script: Script, - direction: Direction, - language: Option, -} - -impl ShapingMeta { - fn new_from( - word_list: &WordList, - shaper: &Shaper, - ) -> Result, ShapingPlanError> { - // If we didn't get a script, we're not going to make a shaping plan, - // give up - let Some(script) = word_list.script() else { - return Ok(None); - }; - let script_tag = script.parse::().map_err(|inner| { - ShapingPlanError::UnknownScriptTag { - word_list_name: word_list.name().to_owned(), - inner: inner.into(), - } - })?; - // Unwrap is safe here as script_tag is never null as [0, 0, 0, 0] isn't - // a valid Rust string - let script = Script::from_iso15924_tag(script_tag).unwrap(); - - let language = word_list - .language() - .map(|lang| { - // harfrust's own error here is just "invalid language" - // (v0.1.1), so discard it for our own - Language::from_str(lang).map_err(|_| { - ShapingPlanError::UnknownLanguage { - word_list_name: word_list.name().to_owned(), - inner: HarfRustUnknownLanguageError::new(lang), - } - }) - }) - .transpose()?; - let direction = - direction_from_script(script).unwrap_or(Direction::LeftToRight); - - let shaping_plan = ShapePlan::new( - shaper, - direction, - Some(script), - language.as_ref(), - // Default features are still included by default - &[], - ); - - Ok(Some(Self { - shaping_plan, - script, - direction, - language, - })) - } -} - -const fn direction_from_script(script: Script) -> Option { - // Copied from harfrust (internal API) - // https://github.com/harfbuzz/harfrust/blob/bf4b7ca20cf95e7183c5f9e1c13a56e9ca6c1174/src/hb/common.rs#L75-L161 - - match script { - // Unicode-1.1 additions - script::ARABIC | - script::HEBREW | - - // Unicode-3.0 additions - script::SYRIAC | - script::THAANA | - - // Unicode-4.0 additions - script::CYPRIOT | - - // Unicode-4.1 additions - script::KHAROSHTHI | - - // Unicode-5.0 additions - script::PHOENICIAN | - script::NKO | - - // Unicode-5.1 additions - script::LYDIAN | - - // Unicode-5.2 additions - script::AVESTAN | - script::IMPERIAL_ARAMAIC | - script::INSCRIPTIONAL_PAHLAVI | - script::INSCRIPTIONAL_PARTHIAN | - script::OLD_SOUTH_ARABIAN | - script::OLD_TURKIC | - script::SAMARITAN | - - // Unicode-6.0 additions - script::MANDAIC | - - // Unicode-6.1 additions - script::MEROITIC_CURSIVE | - script::MEROITIC_HIEROGLYPHS | - - // Unicode-7.0 additions - script::MANICHAEAN | - script::MENDE_KIKAKUI | - script::NABATAEAN | - script::OLD_NORTH_ARABIAN | - script::PALMYRENE | - script::PSALTER_PAHLAVI | - - // Unicode-8.0 additions - script::HATRAN | - - // Unicode-9.0 additions - script::ADLAM | - - // Unicode-11.0 additions - script::HANIFI_ROHINGYA | - script::OLD_SOGDIAN | - script::SOGDIAN | - - // Unicode-12.0 additions - script::ELYMAIC | - - // Unicode-13.0 additions - script::CHORASMIAN | - script::YEZIDI | - - // Unicode-14.0 additions - script::OLD_UYGHUR => { - Some(Direction::RightToLeft) - } - - // https://github.com/harfbuzz/harfbuzz/issues/1000 - script::OLD_HUNGARIAN | - script::OLD_ITALIC | - script::RUNIC | - script::TIFINAGH => { - None - } - - _ => Some(Direction::LeftToRight), - } -} diff --git a/core/src/locations.rs b/core/src/locations.rs deleted file mode 100644 index dc3b9f2..0000000 --- a/core/src/locations.rs +++ /dev/null @@ -1,160 +0,0 @@ -use std::{ - collections::{HashMap, HashSet}, - fmt, -}; - -use skrifa::MetadataProvider; - -use crate::errors::{InvalidTagError, MismatchedAxesError}; - -/// A mapping of axis tags to values. -/// -/// ``` -/// # use fontheight::Location; -/// # use fontheight::errors::InvalidTagError; -/// # fn main() -> Result<(), InvalidTagError> { -/// let mut loc = Location::new(); -/// loc.axis("wght", 400.0)? -/// .axis("ital", 1.0)? -/// .axis("wdth", 1000.0)?; -/// # Ok(()) -/// # } -/// ``` -#[derive(Clone, Default)] -pub struct Location { - user_coords: HashMap, -} - -impl Location { - /// Create a new location. - #[must_use] - pub fn new() -> Self { - // HashMap::new isn't const so even if we desugared this we couldn't - // make Location::new const - Default::default() - } - - pub(crate) const fn from_skrifa( - user_coords: HashMap, - ) -> Self { - Self { user_coords } - } - - /// Set the value of an axis. - /// - /// Fails if `tag` isn't a valid axis tag. - /// - /// Designed to support method chaining: - /// - /// ``` - /// # use fontheight::Location; - /// # use fontheight::errors::InvalidTagError; - /// # fn main() -> Result<(), InvalidTagError> { - /// let mut loc = Location::new(); - /// loc.axis("wght", 400.0)? - /// .axis("ital", 1.0)? - /// .axis("wdth", 1000.0)?; - /// # Ok(()) - /// # } - /// ``` - pub fn axis( - &mut self, - tag: impl AsRef<[u8]>, - value: f32, - ) -> Result<&mut Self, InvalidTagError> { - let tag = skrifa::Tag::new_checked(tag.as_ref())?; - self.user_coords.insert(tag, value); - Ok(self) - } - - /// Converts a [`HashMap`] to a Font Height [`Location`]. - /// - /// Fails if any keys aren't valid axis tags. - /// - /// Note: this is just an alias to the [`TryFrom`] implementation. - pub fn try_from_simple( - location: HashMap, - ) -> Result { - Self::try_from(location) - } - - /// Creates a [`HashMap`](HashMap) from `&self`. - #[must_use] - pub fn to_simple(&self) -> HashMap { - self.user_coords - .iter() - .map(|(tag, &val)| (tag.to_string(), val)) - .collect() - } - - /// Creates a [`skrifa::instance::Location`] from `&self`. - pub(crate) fn to_skrifa( - &self, - font: &skrifa::FontRef, - ) -> skrifa::instance::Location { - font.axes().location( - self.user_coords.iter().map(|(tag, coord)| (*tag, *coord)), - ) - } - - /// Creates a [`harfrust::Variation`] from `&self`. - pub(crate) fn to_harfrust(&self) -> Vec { - self.user_coords - .iter() - .map(|(&tag, &value)| harfrust::Variation { tag, value }) - .collect() - } - - /// Checks that `&self` doesn't specify any axes that aren't present in - /// `font`. - /// - /// Omitting axes is allowed as most libraries will just use the default - /// value if one isn't provided for an axis. - /// - /// Note: if you're just using Font Height, it will perform this validation - /// for you as necessary. - pub fn validate_for( - &self, - font: &skrifa::FontRef, - ) -> Result<(), MismatchedAxesError> { - let mut provided = - self.user_coords.keys().copied().collect::>(); - font.axes().iter().map(|axis| axis.tag()).for_each(|tag| { - provided.remove(&tag); - }); - let extras = provided; - if extras.is_empty() { - Ok(()) - } else { - Err(MismatchedAxesError { - extras: Vec::from_iter(extras), - }) - } - } -} - -impl fmt::Debug for Location { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_map() - .entries( - self.user_coords - .iter() - .map(|(tag, &val)| (tag.to_string(), val)), - ) - .finish() - } -} - -impl TryFrom> for Location { - type Error = InvalidTagError; - - fn try_from(location: HashMap) -> Result { - let user_coords = location - .into_iter() - .map(|(tag, val)| { - skrifa::Tag::new_checked(tag.as_bytes()).map(|t| (t, val)) - }) - .collect::>()?; - Ok(Self { user_coords }) - } -} diff --git a/core/src/pens.rs b/core/src/pens.rs deleted file mode 100644 index b59b704..0000000 --- a/core/src/pens.rs +++ /dev/null @@ -1,45 +0,0 @@ -use skrifa::outline::OutlinePen; - -// Taken from https://github.com/googlefonts/fontations/blob/57715f39/skrifa/src/outline/mod.rs#L1159-L1184 (same license) -#[derive(Debug, Default)] -pub(crate) struct BezierPen { - pub(crate) path: kurbo::BezPath, -} - -fn kurbo_point(x: f32, y: f32) -> kurbo::Point { - (x as f64, y as f64).into() -} - -impl OutlinePen for BezierPen { - fn move_to(&mut self, x: f32, y: f32) { - self.path.move_to(kurbo_point(x, y)); - } - - fn line_to(&mut self, x: f32, y: f32) { - self.path.line_to(kurbo_point(x, y)); - } - - fn quad_to(&mut self, cx0: f32, cy0: f32, x: f32, y: f32) { - self.path.quad_to(kurbo_point(cx0, cy0), kurbo_point(x, y)); - } - - fn curve_to( - &mut self, - cx0: f32, - cy0: f32, - cx1: f32, - cy1: f32, - x: f32, - y: f32, - ) { - self.path.curve_to( - kurbo_point(cx0, cy0), - kurbo_point(cx1, cy1), - kurbo_point(x, y), - ); - } - - fn close(&mut self) { - self.path.close_path(); - } -} diff --git a/harfshapedfa/CHANGELOG.md b/harfshapedfa/CHANGELOG.md new file mode 100644 index 0000000..3628065 --- /dev/null +++ b/harfshapedfa/CHANGELOG.md @@ -0,0 +1,18 @@ +# `harfshapedfa` library crate changelog + +## v0.1.0 + +Changes documented here are for items that were migrated from `fontheight`, not for any completely new features (e.g. `ShapingMeta`, conversion utilities, and pens). + +### Changes + +- `Location` is now insertion-order preserving +- `Location` now panics upon injesting any `NaN` values + +### Added + +- `Location::from_skrifa` +- `Location::to_skrifa` +- `Location::to_harfrust` +- `Location::sort_axes` +- `Location` now implements, `PartialEq`, `Eq`, and `PartialOrd` diff --git a/harfshapedfa/Cargo.toml b/harfshapedfa/Cargo.toml new file mode 100644 index 0000000..6ad8912 --- /dev/null +++ b/harfshapedfa/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "harfshapedfa" +description = "Glue functionality between skrifa & harfrust" +version = "0.1.0" +edition = "2024" +authors = ["Dalton Maag "] +license = "Apache-2.0" +readme = "README.md" +repository = "https://github.com/googlefonts/fontheight" +categories = ["text-processing"] + +[features] +default = [] +pens = ["dep:kurbo"] + +[dependencies] +indexmap = "2.12" +harfrust.workspace = true +kurbo = { version = "0.12.0", optional = true } +ordered-float.workspace = true +skrifa.workspace = true +thiserror.workspace = true + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + +[lints] +workspace = true diff --git a/harfshapedfa/README.md b/harfshapedfa/README.md new file mode 100644 index 0000000..0d7ca89 --- /dev/null +++ b/harfshapedfa/README.md @@ -0,0 +1,42 @@ +# `harfshapedfa` + +Some glue and utilities to ease working with [`harfrust`](https://docs.rs/harfrust) and [`skrifa`](https://docs.rs/skrifa), continuing the tradition of confusing font-related crate names. + +> *This crate is not affiliated with `harfrust` or `skrifa`* + +## What exciting features can you offer? + +- [`ShapingMeta`](https://docs.rs/harfshapedfa/latest/harfshapedfa/struct.ShapingMeta.html), to make creating & re-using [shaping plans](https://harfbuzz.github.io/shaping-plans-and-caching.html) easier +- [`Location`](https://docs.rs/harfshapedfa/latest/harfshapedfa/struct.Location.html), a library-agnostic variable font location specifier, mapping axis names to values. Can be validated against a font +- Conversion functions between script, language, and direction (to `harfrust` or OpenType types) + +The hope would be to see this crate eventually deprecated as the functionality/utilities provided here move into sensible locations in the major font libraries, like `harfrust`/`skrifa`/fontations. + +## Usage + +This crate basically expects you to already be using [`harfrust`](https://docs.rs/harfrust) and [`skrifa`](https://docs.rs/skrifa) - if you're not this probably isn't the crate for you. + +Your Cargo.toml will probably look something like this: + +```toml +[dependencies] +harfrust = "=0.3.2" +skrifa = "=0.37.0" +harfshapedfa = "0.1" +``` + +Note: `harfshapedfa` pins against very specific version of `skrifa` and `harfrust`, as both projects have seen breaking changes on minor releases, usually by bumping `read-fonts`. +By keeping everything in lockstep, we avoid duplicate dependencies and incompatible types due to different versions of the same types being used. + +### Pens + +`harfshapedfa` also exports pens optionally if you enable the crate's `pens` feature: + +```toml +[dependencies] +harfrust = "=0.3.2" +skrifa = "=0.37.0" +harfshapedfa = { version = "0.1", features = ["pens"] } +``` + +This provides some pens and re-exports some [`kurbo`](https://docs.rs/kurbo/latest/kurbo/) types that our API exposes, so if you have a very simple use case you probably won't need to explicitly pull in `kurbo`. diff --git a/harfshapedfa/src/convert.rs b/harfshapedfa/src/convert.rs new file mode 100644 index 0000000..8726689 --- /dev/null +++ b/harfshapedfa/src/convert.rs @@ -0,0 +1,1858 @@ +use harfrust::{Direction, Script, Tag, script}; + +use crate::errors::InvalidTagError; + +/// Returns the text direction for the given [`harfrust::Script`], if known +#[must_use] +pub const fn direction_from_script(script: Script) -> Option { + // Copied from harfrust (internal API) + // https://github.com/harfbuzz/harfrust/blob/bf4b7ca20cf95e7183c5f9e1c13a56e9ca6c1174/src/hb/common.rs#L75-L161 + + match script { + // Unicode-1.1 additions + script::ARABIC | + script::HEBREW | + + // Unicode-3.0 additions + script::SYRIAC | + script::THAANA | + + // Unicode-4.0 additions + script::CYPRIOT | + + // Unicode-4.1 additions + script::KHAROSHTHI | + + // Unicode-5.0 additions + script::PHOENICIAN | + script::NKO | + + // Unicode-5.1 additions + script::LYDIAN | + + // Unicode-5.2 additions + script::AVESTAN | + script::IMPERIAL_ARAMAIC | + script::INSCRIPTIONAL_PAHLAVI | + script::INSCRIPTIONAL_PARTHIAN | + script::OLD_SOUTH_ARABIAN | + script::OLD_TURKIC | + script::SAMARITAN | + + // Unicode-6.0 additions + script::MANDAIC | + + // Unicode-6.1 additions + script::MEROITIC_CURSIVE | + script::MEROITIC_HIEROGLYPHS | + + // Unicode-7.0 additions + script::MANICHAEAN | + script::MENDE_KIKAKUI | + script::NABATAEAN | + script::OLD_NORTH_ARABIAN | + script::PALMYRENE | + script::PSALTER_PAHLAVI | + + // Unicode-8.0 additions + script::HATRAN | + + // Unicode-9.0 additions + script::ADLAM | + + // Unicode-11.0 additions + script::HANIFI_ROHINGYA | + script::OLD_SOGDIAN | + script::SOGDIAN | + + // Unicode-12.0 additions + script::ELYMAIC | + + // Unicode-13.0 additions + script::CHORASMIAN | + script::YEZIDI | + + // Unicode-14.0 additions + script::OLD_UYGHUR => { + Some(Direction::RightToLeft) + } + + // https://github.com/harfbuzz/harfbuzz/issues/1000 + script::OLD_HUNGARIAN | + script::OLD_ITALIC | + script::RUNIC | + script::TIFINAGH => { + None + } + + _ => Some(Direction::LeftToRight), + } +} + +/// Converts an [ISO 15924](https://en.wikipedia.org/wiki/ISO_15924) +/// capitalised script to to an [OpenType script tag](https://learn.microsoft.com/en-us/typography/opentype/spec/scripttags). +// https://github.com/simoncozens/autobase/blob/9887854fd7436d034c15bf5875686b7583536e76/autobase/src/utils.rs#L223-L248 +pub fn iso15924_to_opentype(script: &str) -> Result { + match script { + // Special cases: https://github.com/fonttools/fonttools/blob/3c1822544d608f87c41fc8fb9ba41ea129257aa8/Lib/fontTools/unicodedata/OTTags.py + // SCRIPT_EXCEPTIONS + "Hira" => Ok(Tag::new(b"kana")), + "Hrkt" => Ok(Tag::new(b"kana")), + "Laoo" => Ok(Tag::new(b"lao ")), + "Yiii" => Ok(Tag::new(b"yi ")), + "Nkoo" => Ok(Tag::new(b"nko ")), + "Vaii" => Ok(Tag::new(b"vai ")), + // NEW_SCRIPT_TAGS + "Beng" => Ok(Tag::new(b"bng2")), + "Deva" => Ok(Tag::new(b"dev2")), + "Gujr" => Ok(Tag::new(b"gjr2")), + "Guru" => Ok(Tag::new(b"gur2")), + "Knda" => Ok(Tag::new(b"knd2")), + "Mlym" => Ok(Tag::new(b"mlm2")), + "Orya" => Ok(Tag::new(b"ory2")), + "Taml" => Ok(Tag::new(b"tml2")), + "Telu" => Ok(Tag::new(b"tel2")), + "Mymr" => Ok(Tag::new(b"mym2")), + // General case + _ => Tag::new_checked(script.to_lowercase().as_bytes()) + .map_err(InvalidTagError), + } +} + +/// Converts an [ISO 639](https://en.wikipedia.org/wiki/ISO_639) two-or-tree +/// letter language code to an [OpenType language tag](https://learn.microsoft.com/en-us/typography/opentype/spec/languagetags). +// Adapted from https://github.com/simoncozens/autobase/blob/a9523ad4e4763339af47fdfb9846dda9edde7a05/autobase/src/utils.rs#L264-L1924 +#[must_use] +#[rustfmt::skip] +pub fn iso639_to_opentype(language: &str) -> Option { + let tag = match language { + "aa" => Tag::new(b"AFR "), // Afar + "aae" => Tag::new(b"SQI "), // Arbëreshë Albanian -> Albanian + "aao" => Tag::new(b"ARA "), // Algerian Saharan Arabic -> Arabic + // "aaq" => Tag::new(b"AAQ "), // Eastern Abnaki -> Eastern + // Abenaki + "aat" => Tag::new(b"SQI "), // Arvanitika Albanian -> Albanian + "ab" => Tag::new(b"ABK "), // Abkhazian + // "aba" => Tag::new(&[0; 4]), // Abé != Abaza + "abh" => Tag::new(b"ARA "), // Tajiki Arabic -> Arabic + "abq" => Tag::new(b"ABA "), // Abaza + "abs" => Tag::new(b"CPP "), // Ambonese Malay -> Creoles + "abv" => Tag::new(b"ARA "), // Baharna Arabic -> Arabic + "acf" => Tag::new(b"FAN "), /* Saint Lucian Creole French -> French + * Antillean */ + // "acf" => Tag::new(b"CPP "), // Saint Lucian Creole French -> Creoles + // "ach" => Tag::new(b"ACH "), // Acoli -> Acholi + "acm" => Tag::new(b"ARA "), // Mesopotamian Arabic -> Arabic + "acq" => Tag::new(b"ARA "), // Ta'izzi-Adeni Arabic -> Arabic + "acr" => Tag::new(b"ACR "), // Achi + // "acr" => Tag::new(b"MYN "), // Achi -> Mayan + "acw" => Tag::new(b"ARA "), // Hijazi Arabic -> Arabic + "acx" => Tag::new(b"ARA "), // Omani Arabic -> Arabic + "acy" => Tag::new(b"ACY "), // Cypriot Arabic + // "acy" => Tag::new(b"ARA "), // Cypriot Arabic -> Arabic + "ada" => Tag::new(b"DNG "), // Adangme -> Dangme + "adf" => Tag::new(b"ARA "), // Dhofari Arabic -> Arabic + "adp" => Tag::new(b"DZN "), // Adap(retired code) -> Dzongkha + // "ady" => Tag::new(b"ADY "), // Adyghe + "aeb" => Tag::new(b"ARA "), // Tunisian Arabic -> Arabic + "aec" => Tag::new(b"ARA "), // Saidi Arabic -> Arabic + "af" => Tag::new(b"AFK "), // Afrikaans + "afb" => Tag::new(b"ARA "), // Gulf Arabic -> Arabic + // "afk" => Tag::new(&[0; 4]), // Nanubae != Afrikaans + "afs" => Tag::new(b"CPP "), // Afro-Seminole Creole -> Creoles + "agu" => Tag::new(b"MYN "), // Aguacateco -> Mayan + // "agw" => Tag::new(&[0; 4]), // Kahua != Agaw + "ahg" => Tag::new(b"AGW "), // Qimant -> Agaw + "aht" => Tag::new(b"ATH "), // Ahtena -> Athapaskan + "aig" => Tag::new(b"CPP "), /* Antigua and Barbuda Creole English -> + * Creoles */ + "aii" => Tag::new(b"SWA "), // Assyrian Neo-Aramaic -> Swadaya Aramaic + // "aii" => Tag::new(b"SYR "), // Assyrian Neo-Aramaic -> Syriac + // "aio" => Tag::new(b"AIO "), // Aiton + "aiw" => Tag::new(b"ARI "), // Aari + "ajp" => Tag::new(b"ARA "), /* South Levantine Arabic(retired code) + * -> Arabic */ + "ajt" => Tag::new(b"ARA "), /* Judeo-Tunisian Arabic(retired code) + * -> Arabic */ + "ak" => Tag::new(b"AKA "), // Akan [macrolanguage] + "akb" => Tag::new(b"AKB "), // Batak Angkola + // "akb" => Tag::new(b"BTK "), // Batak Angkola -> Batak + "aln" => Tag::new(b"SQI "), // Gheg Albanian -> Albanian + "als" => Tag::new(b"SQI "), // Tosk Albanian -> Albanian + // "alt" => Tag::new(b"ALT "), // Southern Altai -> Altai + "am" => Tag::new(b"AMH "), // Amharic + "amf" => Tag::new(b"HBN "), // Hamer-Banna -> Hammer-Banna + "amw" => Tag::new(b"SYR "), // Western Neo-Aramaic -> Syriac + "an" => Tag::new(b"ARG "), // Aragonese + // "ang" => Tag::new(b"ANG "), // Old English (ca. 450-1100) -> + // Anglo-Saxon + "aoa" => Tag::new(b"CPP "), // Angolar -> Creoles + "apa" => Tag::new(b"ATH "), // Apache [collection] -> Athapaskan + "apc" => Tag::new(b"ARA "), // Levantine Arabic -> Arabic + "apd" => Tag::new(b"ARA "), // Sudanese Arabic -> Arabic + "apj" => Tag::new(b"ATH "), // Jicarilla Apache -> Athapaskan + "apk" => Tag::new(b"ATH "), // Kiowa Apache -> Athapaskan + "apl" => Tag::new(b"ATH "), // Lipan Apache -> Athapaskan + "apm" => Tag::new(b"ATH "), /* Mescalero-Chiricahua Apache -> + * Athapaskan */ + "apw" => Tag::new(b"ATH "), // Western Apache -> Athapaskan + "ar" => Tag::new(b"ARA "), // Arabic [macrolanguage] + "arb" => Tag::new(b"ARA "), // Standard Arabic -> Arabic + // "ari" => Tag::new(&[0; 4]), // Arikara != Aari + // "ark" => Tag::new(&[0; 4]), // Arikapú != Rakhine + "arn" => Tag::new(b"MAP "), // Mapudungun + "arq" => Tag::new(b"ARA "), // Algerian Arabic -> Arabic + "ars" => Tag::new(b"ARA "), // Najdi Arabic -> Arabic + "ary" => Tag::new(b"MOR "), // Moroccan Arabic -> Moroccan + // "ary" => Tag::new(b"ARA "), // Moroccan Arabic -> Arabic + "arz" => Tag::new(b"ARA "), // Egyptian Arabic -> Arabic + "as" => Tag::new(b"ASM "), // Assamese + // "ast" => Tag::new(b"AST "), // Asturian + // "ath" => Tag::new(b"ATH "), // Athapascan [collection] -> + // Athapaskan + "atj" => Tag::new(b"RCR "), // Atikamekw -> R-Cree + // "ats" => Tag::new(b"ATS "), // Gros Ventre (Atsina) + "atv" => Tag::new(b"ALT "), // Northern Altai -> Altai + "auj" => Tag::new(b"BBR "), // Awjilah -> Berber + "auz" => Tag::new(b"ARA "), // Uzbeki Arabic -> Arabic + "av" => Tag::new(b"AVR "), // Avaric -> Avar + "avl" => Tag::new(b"ARA "), // Eastern Egyptian Bedawi Arabic -> Arabic + // "avn" => Tag::new(b"AVN "), // Avatime + // "awa" => Tag::new(b"AWA "), // Awadhi + "ay" => Tag::new(b"AYM "), // Aymara [macrolanguage] + "ayc" => Tag::new(b"AYM "), // Southern Aymara -> Aymara + "ayh" => Tag::new(b"ARA "), // Hadrami Arabic -> Arabic + "ayl" => Tag::new(b"ARA "), // Libyan Arabic -> Arabic + "ayn" => Tag::new(b"ARA "), // Sanaani Arabic -> Arabic + "ayp" => Tag::new(b"ARA "), // North Mesopotamian Arabic -> Arabic + "ayr" => Tag::new(b"AYM "), // Central Aymara -> Aymara + "az" => Tag::new(b"AZE "), // Azerbaijani [macrolanguage] + "azb" => Tag::new(b"AZB "), // South Azerbaijani -> Torki + // "azb" => Tag::new(b"AZE "), // South Azerbaijani -> Azerbaijani + "azd" => Tag::new(b"NAH "), // Eastern Durango Nahuatl -> Nahuatl + "azj" => Tag::new(b"AZE "), // North Azerbaijani -> Azerbaijani + "azn" => Tag::new(b"NAH "), // Western Durango Nahuatl -> Nahuatl + "azz" => Tag::new(b"NAH "), // Highland Puebla Nahuatl -> Nahuatl + "ba" => Tag::new(b"BSH "), // Bashkir + "bad" => Tag::new(b"BAD0"), // Banda [collection] + // "bag" => Tag::new(&[0; 4]), // Tuki != Baghelkhandi + "bah" => Tag::new(b"CPP "), // Bahamas Creole English -> Creoles + "bai" => Tag::new(b"BML "), // Bamileke [collection] + "bal" => Tag::new(b"BLI "), // Baluchi [macrolanguage] + // "ban" => Tag::new(b"BAN "), // Balinese + // "bar" => Tag::new(b"BAR "), // Bavarian + // "bau" => Tag::new(&[0; 4]), // Bada (Nigeria) != Baulé + "bbc" => Tag::new(b"BBC "), // Batak Toba + // "bbc" => Tag::new(b"BTK "), // Batak Toba -> Batak + "bbj" => Tag::new(b"BML "), // Ghomálá' -> Bamileke + "bbp" => Tag::new(b"BAD0"), // West Central Banda -> Banda + // "bbr" => Tag::new(&[0; 4]), // Girawa != Berber + "bbz" => Tag::new(b"ARA "), /* Babalia Creole Arabic(retired code) + * -> Arabic */ + "bcc" => Tag::new(b"BLI "), // Southern Balochi -> Baluchi + // "bch" => Tag::new(&[0; 4]), // Bariai != Bench + "bci" => Tag::new(b"BAU "), // Baoulé -> Baulé + "bcl" => Tag::new(b"BIK "), // Central Bikol -> Bikol + "bcq" => Tag::new(b"BCH "), // Bench + "bcr" => Tag::new(b"ATH "), // Babine -> Athapaskan + // "bdc" => Tag::new(b"BDC "), // Emberá-Baudó + // "bdy" => Tag::new(b"BDY "), // Bandjalang + "be" => Tag::new(b"BEL "), // Belarusian + "bea" => Tag::new(b"ATH "), // Beaver -> Athapaskan + "beb" => Tag::new(b"BTI "), // Bebele -> Beti + // "bem" => Tag::new(b"BEM "), // Bemba (Zambia) + "ber" => Tag::new(b"BBR "), // Berber [collection] + "bew" => Tag::new(b"CPP "), // Betawi -> Creoles + "bfl" => Tag::new(b"BAD0"), // Banda-Ndélé -> Banda + "bfq" => Tag::new(b"BAD "), // Badaga + "bft" => Tag::new(b"BLT "), // Balti + "bfu" => Tag::new(b"LAH "), // Gahri -> Lahuli + "bfy" => Tag::new(b"BAG "), // Bagheli -> Baghelkhandi + "bg" => Tag::new(b"BGR "), // Bulgarian + // "bgc" => Tag::new(b"BGC "), // Haryanvi + "bgn" => Tag::new(b"BLI "), // Western Balochi -> Baluchi + "bgp" => Tag::new(b"BLI "), // Eastern Balochi -> Baluchi + "bgq" => Tag::new(b"BGQ "), // Bagri + // "bgq" => Tag::new(b"RAJ "), // Bagri -> Rajasthani + "bgr" => Tag::new(b"QIN "), // Bawm Chin -> Chin + "bhb" => Tag::new(b"BHI "), // Bhili + // "bhi" => Tag::new(b"BHI "), // Bhilali -> Bhili + "bhk" => Tag::new(b"BIK "), // Albay Bicolano(retired code) -> Bikol + // "bho" => Tag::new(b"BHO "), // Bhojpuri + "bhr" => Tag::new(b"MLG "), // Bara Malagasy -> Malagasy + "bi" => Tag::new(b"BIS "), // Bislama + // "bi" => Tag::new(b"CPP "), // Bislama -> Creoles + // "bik" => Tag::new(b"BIK "), // Bikol [macrolanguage] + // "bil" => Tag::new(&[0; 4]), // Bile != Bilen + "bin" => Tag::new(b"EDO "), // Edo + "biu" => Tag::new(b"QIN "), // Biete -> Chin + // "bjj" => Tag::new(b"BJJ "), // Kanauji + "bjn" => Tag::new(b"MLY "), // Banjar -> Malay + "bjo" => Tag::new(b"BAD0"), // Mid-Southern Banda -> Banda + "bjq" => Tag::new(b"MLG "), /* Southern Betsimisaraka + * Malagasy(retired code) -> Malagasy */ + "bjs" => Tag::new(b"CPP "), // Bajan -> Creoles + "bjt" => Tag::new(b"BLN "), // Balanta-Ganja -> Balante + // "bkf" => Tag::new(&[0; 4]), // Beeke != Blackfoot + "bko" => Tag::new(b"BML "), // Kwa' -> Bamileke + "bla" => Tag::new(b"BKF "), // Siksika -> Blackfoot + "ble" => Tag::new(b"BLN "), // Balanta-Kentohe -> Balante + "blg" => Tag::new(b"IBA "), // Balau(retired code) -> Iban + // "bli" => Tag::new(&[0; 4]), // Bolia != Baluchi + "blk" => Tag::new(b"BLK "), // Pa’o Karen + // "blk" => Tag::new(b"KRN "), // Pa'o Karen -> Karen + "bln" => Tag::new(b"BIK "), // Southern Catanduanes Bikol -> Bikol + // "blt" => Tag::new(&[0; 4]), // Tai Dam != Balti + "bm" => Tag::new(b"BMB "), // Bambara (Bamanankan) + // "bmb" => Tag::new(&[0; 4]), // Bembe != Bambara (Bamanankan) + // "bml" => Tag::new(&[0; 4]), // Bomboli != Bamileke + "bmm" => Tag::new(b"MLG "), /* Northern Betsimisaraka Malagasy -> + * Malagasy */ + "bn" => Tag::new(b"BEN "), // Bangla + "bo" => Tag::new(b"TIB "), // Tibetan + "bpd" => Tag::new(b"BAD0"), // Banda-Banda -> Banda + "bpl" => Tag::new(b"CPP "), // Broome Pearling Lugger Pidgin -> Creoles + "bpq" => Tag::new(b"CPP "), // Banda Malay -> Creoles + // "bpy" => Tag::new(b"BPY "), // Bishnupriya -> Bishnupriya + // Manipuri + "bqi" => Tag::new(b"LRC "), // Bakhtiari -> Luri + "bqk" => Tag::new(b"BAD0"), // Banda-Mbrès -> Banda + "br" => Tag::new(b"BRE "), // Breton + "bra" => Tag::new(b"BRI "), // Braj -> Braj Bhasha + "brc" => Tag::new(b"CPP "), // Berbice Creole Dutch -> Creoles + // "brh" => Tag::new(b"BRH "), // Brahui + // "bri" => Tag::new(&[0; 4]), // Mokpwe != Braj Bhasha + // "brm" => Tag::new(&[0; 4]), // Barambu != Burmese + // "brx" => Tag::new(b"BRX "), // Bodo (India) + "bs" => Tag::new(b"BOS "), // Bosnian + // "bsh" => Tag::new(&[0; 4]), // Kati != Bashkir + // "bsk" => Tag::new(b"BSK "), // Burushaski + "btb" => Tag::new(b"BTI "), // Beti (Cameroon)(retired code) + "btd" => Tag::new(b"BTD "), // Batak Dairi (Pakpak) + // "btd" => Tag::new(b"BTK "), // Batak Dairi -> Batak + // "bti" => Tag::new(&[0; 4]), // Burate != Beti + "btj" => Tag::new(b"MLY "), // Bacanese Malay -> Malay + // "btk" => Tag::new(b"BTK "), // Batak [collection] + "btm" => Tag::new(b"BTM "), // Batak Mandailing + // "btm" => Tag::new(b"BTK "), // Batak Mandailing -> Batak + "bto" => Tag::new(b"BIK "), // Rinconada Bikol -> Bikol + "bts" => Tag::new(b"BTS "), // Batak Simalungun + // "bts" => Tag::new(b"BTK "), // Batak Simalungun -> Batak + "btx" => Tag::new(b"BTX "), // Batak Karo + // "btx" => Tag::new(b"BTK "), // Batak Karo -> Batak + "btz" => Tag::new(b"BTZ "), // Batak Alas-Kluet + // "btz" => Tag::new(b"BTK "), // Batak Alas-Kluet -> Batak + // "bug" => Tag::new(b"BUG "), // Buginese -> Bugis + "bum" => Tag::new(b"BTI "), // Bulu (Cameroon) -> Beti + "bve" => Tag::new(b"MLY "), // Berau Malay -> Malay + "bvu" => Tag::new(b"MLY "), // Bukit Malay -> Malay + "bwe" => Tag::new(b"KRN "), // Bwe Karen -> Karen + "bxk" => Tag::new(b"LUH "), // Bukusu -> Luyia + "bxo" => Tag::new(b"CPP "), // Barikanchi -> Creoles + "bxp" => Tag::new(b"BTI "), // Bebil -> Beti + "bxr" => Tag::new(b"RBU "), // Russia Buriat -> Russian Buriat + "byn" => Tag::new(b"BIL "), // Bilin -> Bilen + "byv" => Tag::new(b"BYV "), // Medumba + // "byv" => Tag::new(b"BML "), // Medumba -> Bamileke + "bzc" => Tag::new(b"MLG "), /* Southern Betsimisaraka Malagasy -> + * Malagasy */ + "bzj" => Tag::new(b"CPP "), // Belize Kriol English -> Creoles + "bzk" => Tag::new(b"CPP "), // Nicaragua Creole English -> Creoles + "ca" => Tag::new(b"CAT "), // Catalan + "caa" => Tag::new(b"MYN "), // Chortí -> Mayan + "cac" => Tag::new(b"MYN "), // Chuj -> Mayan + "caf" => Tag::new(b"CRR "), // Southern Carrier -> Carrier + // "caf" => Tag::new(b"ATH "), // Southern Carrier -> Athapaskan + "cak" => Tag::new(b"CAK "), // Kaqchikel + // "cak" => Tag::new(b"MYN "), // Kaqchikel -> Mayan + // "cay" => Tag::new(b"CAY "), // Cayuga + // "cbg" => Tag::new(b"CBG "), // Chimila + "cbk" => Tag::new(b"CBK "), // Chavacano -> Zamboanga Chavacano + // "cbk" => Tag::new(b"CPP "), // Chavacano -> Creoles + "cbl" => Tag::new(b"QIN "), // Bualkhaw Chin -> Chin + "ccl" => Tag::new(b"CPP "), // Cutchi-Swahili -> Creoles + "ccm" => Tag::new(b"CPP "), // Malaccan Creole Malay -> Creoles + "cco" => Tag::new(b"CCHN"), // Comaltepec Chinantec -> Chinantec + "ccq" => Tag::new(b"ARK "), // Chaungtha(retired code) -> Rakhine + "cdo" => Tag::new(b"ZHS "), // Min Dong Chinese -> Chinese, Simplified + "ce" => Tag::new(b"CHE "), // Chechen + // "ceb" => Tag::new(b"CEB "), // Cebuano + "cek" => Tag::new(b"QIN "), // Eastern Khumi Chin -> Chin + "cey" => Tag::new(b"QIN "), // Ekai Chin -> Chin + "cfm" => Tag::new(b"HAL "), // Halam (Falam Chin) + // "cfm" => Tag::new(b"QIN "), // Falam Chin -> Chin + // "cgg" => Tag::new(b"CGG "), // Chiga + "ch" => Tag::new(b"CHA "), // Chamorro + "chf" => Tag::new(b"MYN "), // Tabasco Chontal -> Mayan + // "chg" => Tag::new(&[0; 4]), // Chagatai != Chaha Gurage + // "chh" => Tag::new(&[0; 4]), // Chinook != Chattisgarhi + "chj" => Tag::new(b"CCHN"), // Ojitlán Chinantec -> Chinantec + "chk" => Tag::new(b"CHK0"), // Chuukese + "chm" => Tag::new(b"HMA "), /* Mari (Russia) [macrolanguage] -> High + * Mari */ + // "chm" => Tag::new(b"LMA "), // Mari (Russia) [macrolanguage] -> Low + // Mari + "chn" => Tag::new(b"CPP "), // Chinook jargon -> Creoles + // "cho" => Tag::new(b"CHO "), // Choctaw + "chp" => Tag::new(b"CHP "), // Chipewyan + // "chp" => Tag::new(b"SAY "), // Chipewyan -> Sayisi + // "chp" => Tag::new(b"ATH "), // Chipewyan -> Athapaskan + "chq" => Tag::new(b"CCHN"), // Quiotepec Chinantec -> Chinantec + // "chr" => Tag::new(b"CHR "), // Cherokee + // "chy" => Tag::new(b"CHY "), // Cheyenne + "chz" => Tag::new(b"CCHN"), // Ozumacín Chinantec -> Chinantec + "ciw" => Tag::new(b"OJB "), // Chippewa -> Ojibway + // "cja" => Tag::new(b"CJA "), // Western Cham + // "cjm" => Tag::new(b"CJM "), // Eastern Cham + "cjy" => Tag::new(b"ZHS "), // Jinyu Chinese -> Chinese, Simplified + "cka" => Tag::new(b"QIN "), // Khumi Awa Chin(retired code) -> Chin + "ckb" => Tag::new(b"KUR "), // Central Kurdish -> Kurdish + "ckn" => Tag::new(b"QIN "), // Kaang Chin -> Chin + "cks" => Tag::new(b"CPP "), // Tayo -> Creoles + "ckt" => Tag::new(b"CHK "), // Chukot -> Chukchi + "ckz" => Tag::new(b"MYN "), /* Cakchiquel-Quiché Mixed Language -> + * Mayan */ + "clc" => Tag::new(b"ATH "), // Chilcotin -> Athapaskan + "cld" => Tag::new(b"SYR "), // Chaldean Neo-Aramaic -> Syriac + "cle" => Tag::new(b"CCHN"), // Lealao Chinantec -> Chinantec + "clj" => Tag::new(b"QIN "), // Laitu Chin -> Chin + "cls" => Tag::new(b"SAN "), // Classical Sanskrit -> Sanskrit + "clt" => Tag::new(b"QIN "), // Lautu Chin -> Chin + // "cmi" => Tag::new(b"CMI "), // Emberá-Chamí + "cmn" => Tag::new(b"ZHS "), // Mandarin Chinese -> Chinese, Simplified + "cmr" => Tag::new(b"QIN "), // Mro-Khimi Chin -> Chin + "cnb" => Tag::new(b"QIN "), // Chinbon Chin -> Chin + "cnh" => Tag::new(b"QIN "), // Hakha Chin -> Chin + "cnk" => Tag::new(b"QIN "), // Khumi Chin -> Chin + "cnl" => Tag::new(b"CCHN"), // Lalana Chinantec -> Chinantec + "cnp" => Tag::new(b"ZHS "), /* Northern Ping Chinese -> Chinese, + * Simplified */ + "cnr" => Tag::new(b"SRB "), // Montenegrin -> Serbian + "cnt" => Tag::new(b"CCHN"), // Tepetotutla Chinantec -> Chinantec + "cnu" => Tag::new(b"BBR "), // Chenoua -> Berber + "cnw" => Tag::new(b"QIN "), // Ngawn Chin -> Chin + "co" => Tag::new(b"COS "), // Corsican + "coa" => Tag::new(b"MLY "), // Cocos Islands Malay -> Malay + "cob" => Tag::new(b"MYN "), // Chicomuceltec -> Mayan + // "coo" => Tag::new(b"COO "), // Comox + // "cop" => Tag::new(b"COP "), // Coptic + "coq" => Tag::new(b"ATH "), // Coquille -> Athapaskan + "cpa" => Tag::new(b"CCHN"), // Palantla Chinantec -> Chinantec + "cpe" => Tag::new(b"CPP "), /* English-based creoles and pidgins + * [collection] -> Creoles */ + "cpf" => Tag::new(b"CPP "), /* French-based creoles and pidgins + * [collection] -> Creoles */ + "cpi" => Tag::new(b"CPP "), // Chinese Pidgin English -> Creoles + // "cpp" => Tag::new(b"CPP "), // Portuguese-based creoles and + // pidgins [collection] -> Creoles + "cpx" => Tag::new(b"ZHS "), // Pu-Xian Chinese -> Chinese, Simplified + "cqd" => Tag::new(b"HMN "), // Chuanqiandian Cluster Miao -> Hmong + "cqu" => Tag::new(b"QUH "), /* Chilean Quechua(retired code) -> + * Quechua (Bolivia) */ + // "cqu" => Tag::new(b"QUZ "), // Chilean Quechua(retired code) -> + // Quechua + "cr" => Tag::new(b"CRE "), // Cree [macrolanguage] + "crh" => Tag::new(b"CRT "), // Crimean Tatar + "cri" => Tag::new(b"CPP "), // Sãotomense -> Creoles + "crj" => Tag::new(b"ECR "), // Southern East Cree -> Eastern Cree + // "crj" => Tag::new(b"YCR "), // Southern East Cree -> Y-Cree + // "crj" => Tag::new(b"CRE "), // Southern East Cree -> Cree + "crk" => Tag::new(b"WCR "), // Plains Cree -> West-Cree + // "crk" => Tag::new(b"YCR "), // Plains Cree -> Y-Cree + // "crk" => Tag::new(b"CRE "), // Plains Cree -> Cree + "crl" => Tag::new(b"ECR "), // Northern East Cree -> Eastern Cree + // "crl" => Tag::new(b"YCR "), // Northern East Cree -> Y-Cree + // "crl" => Tag::new(b"CRE "), // Northern East Cree -> Cree + "crm" => Tag::new(b"MCR "), // Moose Cree + // "crm" => Tag::new(b"LCR "), // Moose Cree -> L-Cree + // "crm" => Tag::new(b"CRE "), // Moose Cree -> Cree + "crp" => Tag::new(b"CPP "), /* Creoles and pidgins [collection] -> + * Creoles */ + // "crr" => Tag::new(&[0; 4]), // Carolina Algonquian != Carrier + "crs" => Tag::new(b"CPP "), // Seselwa Creole French -> Creoles + // "crt" => Tag::new(&[0; 4]), // Iyojwa'ja Chorote != Crimean Tatar + "crx" => Tag::new(b"CRR "), // Carrier + // "crx" => Tag::new(b"ATH "), // Carrier -> Athapaskan + "cs" => Tag::new(b"CSY "), // Czech + "csa" => Tag::new(b"CCHN"), // Chiltepec Chinantec -> Chinantec + // "csb" => Tag::new(b"CSB "), // Kashubian + "csh" => Tag::new(b"QIN "), // Asho Chin -> Chin + "csj" => Tag::new(b"QIN "), // Songlai Chin -> Chin + // "csl" => Tag::new(&[0; 4]), // Chinese Sign Language != Church Slavonic + "cso" => Tag::new(b"CCHN"), // Sochiapam Chinantec -> Chinantec + "csp" => Tag::new(b"ZHS "), /* Southern Ping Chinese -> Chinese, + * Simplified */ + "csv" => Tag::new(b"QIN "), // Sumtu Chin -> Chin + "csw" => Tag::new(b"NCR "), // Swampy Cree -> N-Cree + // "csw" => Tag::new(b"NHC "), // Swampy Cree -> Norway House Cree + // "csw" => Tag::new(b"CRE "), // Swampy Cree -> Cree + "csy" => Tag::new(b"QIN "), // Siyin Chin -> Chin + "ctc" => Tag::new(b"ATH "), // Chetco -> Athapaskan + "ctd" => Tag::new(b"QIN "), // Tedim Chin -> Chin + "cte" => Tag::new(b"CCHN"), // Tepinapa Chinantec -> Chinantec + // "ctg" => Tag::new(b"CTG "), // Chittagonian + "cth" => Tag::new(b"QIN "), // Thaiphum Chin -> Chin + "ctl" => Tag::new(b"CCHN"), // Tlacoatzintepec Chinantec -> Chinantec + // "cto" => Tag::new(b"CTO "), // Emberá-Catío + "cts" => Tag::new(b"BIK "), // Northern Catanduanes Bikol -> Bikol + // "ctt" => Tag::new(b"CTT "), // Wayanad Chetti + "ctu" => Tag::new(b"MYN "), // Chol -> Mayan + "cu" => Tag::new(b"CSL "), // Church Slavonic + "cuc" => Tag::new(b"CCHN"), // Usila Chinantec -> Chinantec + // "cuk" => Tag::new(b"CUK "), // San Blas Kuna + "cv" => Tag::new(b"CHU "), // Chuvash + "cvn" => Tag::new(b"CCHN"), // Valle Nacional Chinantec -> Chinantec + "cwd" => Tag::new(b"DCR "), // Woods Cree + // "cwd" => Tag::new(b"TCR "), // Woods Cree -> TH-Cree + // "cwd" => Tag::new(b"CRE "), // Woods Cree -> Cree + "cy" => Tag::new(b"WEL "), // Welsh + "czh" => Tag::new(b"ZHS "), // Huizhou Chinese -> Chinese, Simplified + "czo" => Tag::new(b"ZHS "), // Min Zhong Chinese -> Chinese, Simplified + "czt" => Tag::new(b"QIN "), // Zotung Chin -> Chin + "da" => Tag::new(b"DAN "), // Danish + // "dag" => Tag::new(b"DAG "), // Dagbani + "dao" => Tag::new(b"QIN "), // Daai Chin -> Chin + "dap" => Tag::new(b"NIS "), // Nisi (India)(retired code) + // "dar" => Tag::new(b"DAR "), // Dargwa + // "dax" => Tag::new(b"DAX "), // Dayi + "dcr" => Tag::new(b"CPP "), // Negerhollands -> Creoles + "de" => Tag::new(b"DEU "), // German + "den" => Tag::new(b"SLA "), /* Slave (Athapascan) [macrolanguage] -> + * Slavey */ + // "den" => Tag::new(b"ATH "), // Slave (Athapascan) [macrolanguage] -> + // Athapaskan + "dep" => Tag::new(b"CPP "), // Pidgin Delaware -> Creoles + "dgo" => Tag::new(b"DGO "), // Dogri (individual language) + // "dgo" => Tag::new(b"DGR "), // Dogri (macrolanguage) + "dgr" => Tag::new(b"ATH "), // Tlicho -> Athapaskan + "dhd" => Tag::new(b"MAW "), // Dhundari -> Marwari + // "dhg" => Tag::new(b"DHG "), // Dhangu + // "dhv" => Tag::new(&[0; 4]), // Dehu != Divehi (Dhivehi, Maldivian) (deprecated) + "dib" => Tag::new(b"DNK "), // South Central Dinka -> Dinka + "dik" => Tag::new(b"DNK "), // Southwestern Dinka -> Dinka + "din" => Tag::new(b"DNK "), // Dinka [macrolanguage] + "dip" => Tag::new(b"DNK "), // Northeastern Dinka -> Dinka + "diq" => Tag::new(b"DIQ "), // Dimli + // "diq" => Tag::new(b"ZZA "), // Dimli -> Zazaki + "diw" => Tag::new(b"DNK "), // Northwestern Dinka -> Dinka + "dje" => Tag::new(b"DJR "), // Zarma + "djk" => Tag::new(b"CPP "), // Eastern Maroon Creole -> Creoles + "djr" => Tag::new(b"DJR0"), // Djambarrpuyngu + "dks" => Tag::new(b"DNK "), // Southeastern Dinka -> Dinka + "dng" => Tag::new(b"DUN "), // Dungan + // "dnj" => Tag::new(b"DNJ "), // Dan + // "dnk" => Tag::new(&[0; 4]), // Dengka != Dinka + "doi" => Tag::new(b"DGR "), // Dogri (macrolanguage) [macrolanguage] + "drh" => Tag::new(b"MNG "), // Darkhat(retired code) -> Mongolian + // "dri" => Tag::new(&[0; 4]), // C'Lela != Dari + "drw" => Tag::new(b"DRI "), // Darwazi(retired code) -> Dari + // "drw" => Tag::new(b"FAR "), // Darwazi(retired code) -> Persian + "dsb" => Tag::new(b"LSB "), // Lower Sorbian + "dty" => Tag::new(b"NEP "), // Dotyali -> Nepali + // "duj" => Tag::new(b"DUJ "), // Dhuwal(retired code) + // "dun" => Tag::new(&[0; 4]), // Dusun Deyah != Dungan + "dup" => Tag::new(b"MLY "), // Duano -> Malay + "dv" => Tag::new(b"DIV "), // Divehi (Dhivehi, Maldivian) + // "dv" => Tag::new(b"DHV "), // Divehi (Dhivehi, Maldivian) + // (deprecated) + "dwk" => Tag::new(b"KUI "), // Dawik Kui -> Kui + "dwu" => Tag::new(b"DUJ "), // Dhuwal + "dwy" => Tag::new(b"DUJ "), // Dhuwaya -> Dhuwal + "dyu" => Tag::new(b"JUL "), // Dyula -> Jula + "dz" => Tag::new(b"DZN "), // Dzongkha + // "dzn" => Tag::new(&[0; 4]), // Dzando != Dzongkha + // "ecr" => Tag::new(&[0; 4]), // Eteocretan != Eastern Cree + "ee" => Tag::new(b"EWE "), // Ewe + // "efi" => Tag::new(b"EFI "), // Efik + "ekk" => Tag::new(b"ETI "), // Standard Estonian -> Estonian + "eky" => Tag::new(b"KRN "), // Eastern Kayah -> Karen + "el" => Tag::new(b"ELL "), // Modern Greek (1453-) -> Greek + "emk" => Tag::new(b"EMK "), // Eastern Maninkakan + // "emk" => Tag::new(b"MNK "), // Eastern Maninkakan -> Maninka + // "emp" => Tag::new(b"EMP "), // Northern Emberá + "emy" => Tag::new(b"MYN "), // Epigraphic Mayan -> Mayan + "en" => Tag::new(b"ENG "), // English + "enb" => Tag::new(b"KAL "), // Markweeta -> Kalenjin + "enf" => Tag::new(b"FNE "), // Forest Enets + "enh" => Tag::new(b"TNE "), // Tundra Enets + "eo" => Tag::new(b"NTO "), // Esperanto + "es" => Tag::new(b"ESP "), // Spanish + "esg" => Tag::new(b"GON "), // Aheri Gondi -> Gondi + "esi" => Tag::new(b"IPK "), // North Alaskan Inupiatun -> Inupiat + "esk" => Tag::new(b"IPK "), // Northwest Alaska Inupiatun -> Inupiat + // "esu" => Tag::new(b"ESU "), // Central Yupik + "et" => Tag::new(b"ETI "), // Estonian [macrolanguage] + "eto" => Tag::new(b"BTI "), // Eton (Cameroon) -> Beti + "eu" => Tag::new(b"EUQ "), // Basque + // "euq" => Tag::new(&[0; 4]), // Basque [collection] != Basque + "eve" => Tag::new(b"EVN "), // Even + "evn" => Tag::new(b"EVK "), // Evenki + "ewo" => Tag::new(b"BTI "), // Ewondo -> Beti + "eyo" => Tag::new(b"KAL "), // Keiyo -> Kalenjin + "fa" => Tag::new(b"FAR "), // Persian [macrolanguage] + "fab" => Tag::new(b"CPP "), // Fa d'Ambu -> Creoles + "fan" => Tag::new(b"FAN0"), // Fang (Equatorial Guinea) + // "fan" => Tag::new(b"BTI "), // Fang (Equatorial Guinea) -> Beti + // "far" => Tag::new(&[0; 4]), // Fataleka != Persian + "fat" => Tag::new(b"FAT "), // Fanti + // "fat" => Tag::new(b"AKA "), // Fanti -> Akan + "fbl" => Tag::new(b"BIK "), // West Albay Bikol -> Bikol + "ff" => Tag::new(b"FUL "), // Fulah [macrolanguage] + "ffm" => Tag::new(b"FUL "), // Maasina Fulfulde -> Fulah + "fi" => Tag::new(b"FIN "), // Finnish + "fil" => Tag::new(b"PIL "), // Filipino + "fj" => Tag::new(b"FJI "), // Fijian + "flm" => Tag::new(b"HAL "), // Halam (Falam Chin)(retired code) + // "flm" => Tag::new(b"QIN "), // Falam Chin(retired code) -> Chin + "fmp" => Tag::new(b"FMP "), // Fe’fe’ + // "fmp" => Tag::new(b"BML "), // Fe'fe' -> Bamileke + "fng" => Tag::new(b"CPP "), // Fanagalo -> Creoles + "fo" => Tag::new(b"FOS "), // Faroese + // "fon" => Tag::new(b"FON "), // Fon + // "fos" => Tag::new(&[0; 4]), // Siraya != Faroese + "fpe" => Tag::new(b"CPP "), // Fernando Po Creole English -> Creoles + "fr" => Tag::new(b"FRA "), // French + // "frc" => Tag::new(b"FRC "), // Cajun French + // "frp" => Tag::new(b"FRP "), // Arpitan + "fub" => Tag::new(b"FUL "), // Adamawa Fulfulde -> Fulah + "fuc" => Tag::new(b"FUL "), // Pulaar -> Fulah + "fue" => Tag::new(b"FUL "), // Borgu Fulfulde -> Fulah + "fuf" => Tag::new(b"FTA "), // Pular -> Futa + // "fuf" => Tag::new(b"FUL "), // Pular -> Fulah + "fuh" => Tag::new(b"FUL "), // Western Niger Fulfulde -> Fulah + "fui" => Tag::new(b"FUL "), // Bagirmi Fulfulde -> Fulah + "fuq" => Tag::new(b"FUL "), // Central-Eastern Niger Fulfulde -> Fulah + "fur" => Tag::new(b"FRL "), // Friulian + "fuv" => Tag::new(b"FUV "), // Nigerian Fulfulde + // "fuv" => Tag::new(b"FUL "), // Nigerian Fulfulde -> Fulah + "fy" => Tag::new(b"FRI "), // Western Frisian -> Frisian + "ga" => Tag::new(b"IRI "), // Irish + // "ga" => Tag::new(b"IRT "), // Irish -> Irish Traditional + "gaa" => Tag::new(b"GAD "), // Ga + "gac" => Tag::new(b"CPP "), // Mixed Great Andamanese -> Creoles + // "gad" => Tag::new(&[0; 4]), // Gaddang != Ga + // "gae" => Tag::new(&[0; 4]), // Guarequena != Scottish Gaelic + // "gag" => Tag::new(b"GAG "), // Gagauz + // "gal" => Tag::new(&[0; 4]), // Galolen != Galician + "gan" => Tag::new(b"ZHS "), // Gan Chinese -> Chinese, Simplified + // "gar" => Tag::new(&[0; 4]), // Galeya != Garshuni + // "gaw" => Tag::new(&[0; 4]), // Nobonob != Garhwali + "gax" => Tag::new(b"ORO "), // Borana-Arsi-Guji Oromo -> Oromo + "gaz" => Tag::new(b"ORO "), // West Central Oromo -> Oromo + "gbm" => Tag::new(b"GAW "), // Garhwali + "gce" => Tag::new(b"ATH "), // Galice -> Athapaskan + "gcf" => Tag::new(b"CPP "), // Guadeloupean Creole French -> Creoles + "gcl" => Tag::new(b"CPP "), // Grenadian Creole English -> Creoles + "gcr" => Tag::new(b"CPP "), // Guianese Creole French -> Creoles + "gd" => Tag::new(b"GAE "), // Scottish Gaelic + "gda" => Tag::new(b"RAJ "), // Gade Lohar -> Rajasthani + // "gez" => Tag::new(b"GEZ "), // Geez + "ggo" => Tag::new(b"GON "), // Southern Gondi(retired code) -> Gondi + "gha" => Tag::new(b"BBR "), // Ghadamès -> Berber + "ghc" => Tag::new(b"IRT "), /* Hiberno-Scottish Gaelic -> Irish + * Traditional */ + "ghk" => Tag::new(b"KRN "), // Geko Karen -> Karen + "gho" => Tag::new(b"BBR "), // Ghomara -> Berber + "gib" => Tag::new(b"CPP "), // Gibanawa -> Creoles + // "gih" => Tag::new(b"GIH "), // Githabul + "gil" => Tag::new(b"GIL0"), // Kiribati (Gilbertese) + "gju" => Tag::new(b"RAJ "), // Gujari -> Rajasthani + "gkp" => Tag::new(b"GKP "), // Guinea Kpelle -> Kpelle (Guinea) + // "gkp" => Tag::new(b"KPL "), // Guinea Kpelle -> Kpelle + "gl" => Tag::new(b"GAL "), // Galician + "gld" => Tag::new(b"NAN "), // Nanai + // "glk" => Tag::new(b"GLK "), // Gilaki + // "gmz" => Tag::new(&[0; 4]), // Mgbolizhia != Gumuz + "gn" => Tag::new(b"GUA "), // Guarani [macrolanguage] + "gnb" => Tag::new(b"QIN "), // Gangte -> Chin + // "gnn" => Tag::new(b"GNN "), // Gumatj + "gno" => Tag::new(b"GON "), // Northern Gondi -> Gondi + "gnw" => Tag::new(b"GUA "), // Western Bolivian Guaraní -> Guarani + // "gog" => Tag::new(b"GOG "), // Gogo + "gom" => Tag::new(b"KOK "), // Goan Konkani -> Konkani + // "gon" => Tag::new(b"GON "), // Gondi [macrolanguage] + "goq" => Tag::new(b"CPP "), // Gorap -> Creoles + "gox" => Tag::new(b"BAD0"), // Gobu -> Banda + "gpe" => Tag::new(b"CPP "), // Ghanaian Pidgin English -> Creoles + // "gro" => Tag::new(&[0; 4]), // Groma != Garo + "grr" => Tag::new(b"BBR "), // Taznatit -> Berber + "grt" => Tag::new(b"GRO "), // Garo + "gru" => Tag::new(b"SOG "), // Kistane -> Sodo Gurage + "gsw" => Tag::new(b"ALS "), // Alsatian + "gu" => Tag::new(b"GUJ "), // Gujarati + // "gua" => Tag::new(&[0; 4]), // Shiki != Guarani + // "guc" => Tag::new(b"GUC "), // Wayuu + // "guf" => Tag::new(b"GUF "), // Gupapuyngu + "gug" => Tag::new(b"GUA "), // Paraguayan Guaraní -> Guarani + "gui" => Tag::new(b"GUA "), // Eastern Bolivian Guaraní -> Guarani + "guk" => Tag::new(b"GMZ "), // Gumuz + "gul" => Tag::new(b"CPP "), // Sea Island Creole English -> Creoles + "gun" => Tag::new(b"GUA "), // Mbyá Guaraní -> Guarani + // "guz" => Tag::new(b"GUZ "), // Gusii + "gv" => Tag::new(b"MNX "), // Manx + "gwi" => Tag::new(b"ATH "), // Gwichʼin -> Athapaskan + "gyn" => Tag::new(b"CPP "), // Guyanese Creole English -> Creoles + "ha" => Tag::new(b"HAU "), // Hausa + "haa" => Tag::new(b"ATH "), // Hän -> Athapaskan + "hae" => Tag::new(b"ORO "), // Eastern Oromo -> Oromo + "hai" => Tag::new(b"HAI0"), // Haida [macrolanguage] + "hak" => Tag::new(b"ZHS "), // Hakka Chinese -> Chinese, Simplified + // "hal" => Tag::new(&[0; 4]), // Halang != Halam (Falam Chin) + "har" => Tag::new(b"HRI "), // Harari + // "haw" => Tag::new(b"HAW "), // Hawaiian + "hax" => Tag::new(b"HAI0"), // Southern Haida -> Haida + // "hay" => Tag::new(b"HAY "), // Haya + // "haz" => Tag::new(b"HAZ "), // Hazaragi + // "hbn" => Tag::new(&[0; 4]), // Heiban != Hammer-Banna + "hca" => Tag::new(b"CPP "), // Andaman Creole Hindi -> Creoles + "hdn" => Tag::new(b"HAI0"), // Northern Haida -> Haida + "he" => Tag::new(b"IWR "), // Hebrew + "hea" => Tag::new(b"HMN "), // Northern Qiandong Miao -> Hmong + // "hei" => Tag::new(b"HEI "), // Heiltsuk + "hi" => Tag::new(b"HIN "), // Hindi + // "hil" => Tag::new(b"HIL "), // Hiligaynon + "hji" => Tag::new(b"MLY "), // Haji -> Malay + "hlt" => Tag::new(b"QIN "), // Matu Chin -> Chin + "hma" => Tag::new(b"HMN "), // Southern Mashan Hmong -> Hmong + "hmc" => Tag::new(b"HMN "), // Central Huishui Hmong -> Hmong + "hmd" => Tag::new(b"HMD "), // Large Flowery Miao -> A-Hmao + // "hmd" => Tag::new(b"HMN "), // Large Flowery Miao -> Hmong + "hme" => Tag::new(b"HMN "), // Eastern Huishui Hmong -> Hmong + "hmg" => Tag::new(b"HMN "), // Southwestern Guiyang Hmong -> Hmong + "hmh" => Tag::new(b"HMN "), // Southwestern Huishui Hmong -> Hmong + "hmi" => Tag::new(b"HMN "), // Northern Huishui Hmong -> Hmong + "hmj" => Tag::new(b"HMN "), // Ge -> Hmong + "hml" => Tag::new(b"HMN "), // Luopohe Hmong -> Hmong + "hmm" => Tag::new(b"HMN "), // Central Mashan Hmong -> Hmong + // "hmn" => Tag::new(b"HMN "), // Hmong [macrolanguage] + "hmp" => Tag::new(b"HMN "), // Northern Mashan Hmong -> Hmong + "hmq" => Tag::new(b"HMN "), // Eastern Qiandong Miao -> Hmong + "hmr" => Tag::new(b"QIN "), // Hmar -> Chin + "hms" => Tag::new(b"HMN "), // Southern Qiandong Miao -> Hmong + "hmw" => Tag::new(b"HMN "), // Western Mashan Hmong -> Hmong + "hmy" => Tag::new(b"HMN "), // Southern Guiyang Hmong -> Hmong + "hmz" => Tag::new(b"HMZ "), // Hmong Shua -> Hmong Shuat + // "hmz" => Tag::new(b"HMN "), // Hmong Shua -> Hmong + // "hnd" => Tag::new(b"HND "), // Southern Hindko -> Hindko + "hne" => Tag::new(b"CHH "), // Chhattisgarhi -> Chattisgarhi + "hnj" => Tag::new(b"HMN "), // Hmong Njua -> Hmong + "hnm" => Tag::new(b"ZHS "), // Hainanese -> Chinese, Simplified + "hno" => Tag::new(b"HND "), // Northern Hindko -> Hindko + "ho" => Tag::new(b"HMO "), // Hiri Motu + // "ho" => Tag::new(b"CPP "), // Hiri Motu -> Creoles + "hoc" => Tag::new(b"HO "), // Ho + "hoi" => Tag::new(b"ATH "), // Holikachuk -> Athapaskan + "hoj" => Tag::new(b"HAR "), // Hadothi -> Harauti + // "hoj" => Tag::new(b"RAJ "), // Hadothi -> Rajasthani + "hr" => Tag::new(b"HRV "), // Croatian + "hra" => Tag::new(b"QIN "), // Hrangkhol -> Chin + "hrm" => Tag::new(b"HMN "), // Horned Miao -> Hmong + "hsb" => Tag::new(b"USB "), // Upper Sorbian + "hsn" => Tag::new(b"ZHS "), // Xiang Chinese -> Chinese, Simplified + "ht" => Tag::new(b"HAI "), // Haitian (Haitian Creole) + // "ht" => Tag::new(b"CPP "), // Haitian -> Creoles + "hu" => Tag::new(b"HUN "), // Hungarian + "huj" => Tag::new(b"HMN "), // Northern Guiyang Hmong -> Hmong + "hup" => Tag::new(b"ATH "), // Hupa -> Athapaskan + // "hur" => Tag::new(b"HUR "), // Halkomelem + "hus" => Tag::new(b"MYN "), // Huastec -> Mayan + "hwc" => Tag::new(b"CPP "), // Hawai'i Creole English -> Creoles + "hy" => Tag::new(b"HYE0"), // Armenian -> Armenian East + // "hy" => Tag::new(b"HYE "), // Armenian + "hyw" => Tag::new(b"HYE "), // Western Armenian -> Armenian + "hz" => Tag::new(b"HER "), // Herero + "ia" => Tag::new(b"INA "), /* Interlingua (International Auxiliary + * Language Association) */ + // "iba" => Tag::new(b"IBA "), // Iban + // "ibb" => Tag::new(b"IBB "), // Ibibio + "iby" => Tag::new(b"IJO "), // Ibani -> Ijo + "icr" => Tag::new(b"CPP "), // Islander Creole English -> Creoles + "id" => Tag::new(b"IND "), // Indonesian + // "id" => Tag::new(b"MLY "), // Indonesian -> Malay + "ida" => Tag::new(b"LUH "), // Idakho-Isukha-Tiriki -> Luyia + "idb" => Tag::new(b"CPP "), // Indo-Portuguese -> Creoles + "ie" => Tag::new(b"ILE "), // Interlingue + "ig" => Tag::new(b"IBO "), // Igbo + "igb" => Tag::new(b"EBI "), // Ebira + "ihb" => Tag::new(b"CPP "), // Iha Based Pidgin -> Creoles + "ii" => Tag::new(b"YIM "), // Sichuan Yi -> Yi Modern + "ijc" => Tag::new(b"IJO "), // Izon -> Ijo + "ije" => Tag::new(b"IJO "), // Biseni -> Ijo + "ijn" => Tag::new(b"IJO "), // Kalabari -> Ijo + // "ijo" => Tag::new(b"IJO "), // Ijo [collection] + "ijs" => Tag::new(b"IJO "), // Southeast Ijo -> Ijo + "ik" => Tag::new(b"IPK "), // Inupiaq [macrolanguage] -> Inupiat + "ike" => Tag::new(b"INU "), // Eastern Canadian Inuktitut -> Inuktitut + // "ike" => Tag::new(b"INUK"), // Eastern Canadian Inuktitut -> Nunavik + // Inuktitut + "ikt" => Tag::new(b"INU "), // Inuinnaqtun -> Inuktitut + // "ilo" => Tag::new(b"ILO "), // Iloko -> Ilokano + "in" => Tag::new(b"IND "), // Indonesian(retired code) + // "in" => Tag::new(b"MLY "), // Indonesian(retired code) -> Malay + "ing" => Tag::new(b"ATH "), // Degexit'an -> Athapaskan + "inh" => Tag::new(b"ING "), // Ingush + "io" => Tag::new(b"IDO "), // Ido + // "iri" => Tag::new(&[0; 4]), // Rigwe != Irish + // "iru" => Tag::new(b"IRU "), // Irula + "is" => Tag::new(b"ISL "), // Icelandic + // "ism" => Tag::new(&[0; 4]), // Masimasi != Inari Sami + "it" => Tag::new(b"ITA "), // Italian + "itz" => Tag::new(b"MYN "), // Itzá -> Mayan + "iu" => Tag::new(b"INU "), // Inuktitut [macrolanguage] + // "iu" => Tag::new(b"INUK"), // Inuktitut [macrolanguage] -> Nunavik + // Inuktitut + "iw" => Tag::new(b"IWR "), // Hebrew(retired code) + "ixl" => Tag::new(b"MYN "), // Ixil -> Mayan + "ja" => Tag::new(b"JAN "), // Japanese + "jac" => Tag::new(b"MYN "), // Popti' -> Mayan + "jak" => Tag::new(b"MLY "), // Jakun -> Malay + "jam" => Tag::new(b"JAM "), /* Jamaican Creole English -> Jamaican + * Creole */ + // "jam" => Tag::new(b"CPP "), // Jamaican Creole English -> Creoles + // "jan" => Tag::new(&[0; 4]), // Jandai != Japanese + "jax" => Tag::new(b"MLY "), // Jambi Malay -> Malay + "jbe" => Tag::new(b"BBR "), // Judeo-Berber -> Berber + "jbn" => Tag::new(b"BBR "), // Nafusi -> Berber + // "jbo" => Tag::new(b"JBO "), // Lojban + // "jct" => Tag::new(b"JCT "), // Krymchak + // "jdt" => Tag::new(b"JDT "), // Judeo-Tat + "jgo" => Tag::new(b"BML "), // Ngomba -> Bamileke + "ji" => Tag::new(b"JII "), // Yiddish(retired code) + // "jii" => Tag::new(&[0; 4]), // Jiiddu != Yiddish + "jkm" => Tag::new(b"KRN "), // Mobwa Karen -> Karen + "jkp" => Tag::new(b"KRN "), // Paku Karen -> Karen + // "jud" => Tag::new(&[0; 4]), // Worodougou != Ladino + // "jul" => Tag::new(&[0; 4]), // Jirel != Jula + "jv" => Tag::new(b"JAV "), // Javanese + "jvd" => Tag::new(b"CPP "), // Javindo -> Creoles + "jw" => Tag::new(b"JAV "), // Javanese(retired code) + "ka" => Tag::new(b"KAT "), // Georgian + "kaa" => Tag::new(b"KRK "), // Karakalpak + "kab" => Tag::new(b"KAB0"), // Kabyle + // "kab" => Tag::new(b"BBR "), // Kabyle -> Berber + // "kac" => Tag::new(&[0; 4]), // Kachin != Kachchi + "kam" => Tag::new(b"KMB "), // Kamba (Kenya) + "kar" => Tag::new(b"KRN "), // Karen [collection] + // "kaw" => Tag::new(b"KAW "), // Kawi (Old Javanese) + // "kbc" => Tag::new(b"KBC "), // Kadiwéu + "kbd" => Tag::new(b"KAB "), // Kabardian + "kby" => Tag::new(b"KNR "), // Manga Kanuri -> Kanuri + "kca" => Tag::new(b"KHK "), // Khanty -> Khanty-Kazim + // "kca" => Tag::new(b"KHS "), // Khanty -> Khanty-Shurishkar + // "kca" => Tag::new(b"KHV "), // Khanty -> Khanty-Vakhi + "kcn" => Tag::new(b"CPP "), // Nubi -> Creoles + // "kde" => Tag::new(b"KDE "), // Makonde + "kdr" => Tag::new(b"KRM "), // Karaim + "kdt" => Tag::new(b"KUY "), // Kuy + "kea" => Tag::new(b"KEA "), // Kabuverdianu (Crioulo) + // "kea" => Tag::new(b"CPP "), // Kabuverdianu -> Creoles + // "keb" => Tag::new(&[0; 4]), // Kélé != Kebena + "kek" => Tag::new(b"KEK "), // Kekchi + // "kek" => Tag::new(b"MYN "), // Kekchí -> Mayan + "kex" => Tag::new(b"KKN "), // Kukna -> Kokni + "kfa" => Tag::new(b"KOD "), // Kodava -> Kodagu + "kfr" => Tag::new(b"KAC "), // Kachhi -> Kachchi + "kfx" => Tag::new(b"KUL "), // Kullu Pahari -> Kulvi + "kfy" => Tag::new(b"KMN "), // Kumaoni + "kg" => Tag::new(b"KON0"), // Kongo [macrolanguage] + // "kge" => Tag::new(&[0; 4]), // Komering != Khutsuri Georgian + // "kgf" => Tag::new(b"KGF "), // Kube + "kha" => Tag::new(b"KSI "), // Khasi + "khb" => Tag::new(b"XBD "), // Lü + "khk" => Tag::new(b"MNG "), // Halh Mongolian -> Mongolian + // "khn" => Tag::new(&[0; 4]), // Khandesi != Khamti Shan(Microsoft fonts) + // "khs" => Tag::new(&[0; 4]), // Kasua != Khanty-Shurishkar + "kht" => Tag::new(b"KHT "), // Khamti -> Khamti Shan + // "kht" => Tag::new(b"KHN "), // Khamti -> Khamti Shan(Microsoft fonts) + // "khv" => Tag::new(&[0; 4]), // Khvarshi != Khanty-Vakhi + // "khw" => Tag::new(b"KHW "), // Khowar + "ki" => Tag::new(b"KIK "), // Kikuyu (Gikuyu) + // "kis" => Tag::new(&[0; 4]), // Kis != Kisii + "kiu" => Tag::new(b"KIU "), // Kirmanjki + // "kiu" => Tag::new(b"ZZA "), // Kirmanjki -> Zazaki + "kj" => Tag::new(b"KUA "), // Kuanyama + "kjb" => Tag::new(b"MYN "), // Q'anjob'al -> Mayan + // "kjd" => Tag::new(b"KJD "), // Southern Kiwai + "kjh" => Tag::new(b"KHA "), // Khakas -> Khakass + // "kjj" => Tag::new(b"KJJ "), // Khinalugh -> Khinalug + "kjp" => Tag::new(b"KJP "), // Pwo Eastern Karen -> Eastern Pwo Karen + // "kjp" => Tag::new(b"KRN "), // Pwo Eastern Karen -> Karen + "kjt" => Tag::new(b"KRN "), // Phrae Pwo Karen -> Karen + // "kjz" => Tag::new(b"KJZ "), // Bumthangkha + "kk" => Tag::new(b"KAZ "), // Kazakh + // "kkn" => Tag::new(&[0; 4]), // Kon Keu != Kokni + "kkz" => Tag::new(b"ATH "), // Kaska -> Athapaskan + "kl" => Tag::new(b"GRN "), // Greenlandic + // "klm" => Tag::new(&[0; 4]), // Migum != Kalmyk + "kln" => Tag::new(b"KAL "), // Kalenjin [macrolanguage] + "km" => Tag::new(b"KHM "), // Khmer + "kmb" => Tag::new(b"MBN "), // Kimbundu -> Mbundu + // "kmg" => Tag::new(b"KMG "), // Kâte + // "kmn" => Tag::new(&[0; 4]), // Awtuw != Kumaoni + // "kmo" => Tag::new(&[0; 4]), // Kwoma != Komo + "kmr" => Tag::new(b"KUR "), // Northern Kurdish -> Kurdish + // "kms" => Tag::new(&[0; 4]), // Kamasau != Komso + "kmv" => Tag::new(b"CPP "), // Karipúna Creole French -> Creoles + "kmw" => Tag::new(b"KMO "), // Komo (Democratic Republic of Congo) + // "kmz" => Tag::new(b"KMZ "), // Khorasani Turkish -> Khorasani + // Turkic + "kn" => Tag::new(b"KAN "), // Kannada + "knc" => Tag::new(b"KNR "), // Central Kanuri -> Kanuri + "kng" => Tag::new(b"KON0"), // Koongo -> Kongo + "knj" => Tag::new(b"MYN "), // Western Kanjobal -> Mayan + "knn" => Tag::new(b"KOK "), // Konkani + // "knr" => Tag::new(&[0; 4]), // Kaningra != Kanuri + "ko" => Tag::new(b"KOR "), // Korean + // "kod" => Tag::new(&[0; 4]), // Kodi != Kodagu + // "koh" => Tag::new(&[0; 4]), // Koyo != Korean Old Hangul + "koi" => Tag::new(b"KOP "), // Komi-Permyak + // "kok" => Tag::new(b"KOK "), // Konkani [macrolanguage] + // "kop" => Tag::new(&[0; 4]), // Waube != Komi-Permyak + // "kos" => Tag::new(b"KOS "), // Kosraean + "koy" => Tag::new(b"ATH "), // Koyukon -> Athapaskan + // "koz" => Tag::new(&[0; 4]), // Korak != Komi-Zyrian + "kpe" => Tag::new(b"KPL "), // Kpelle [macrolanguage] + // "kpl" => Tag::new(&[0; 4]), // Kpala != Kpelle + "kpp" => Tag::new(b"KRN "), // Paku Karen(retired code) -> Karen + "kpv" => Tag::new(b"KOZ "), // Komi-Zyrian + "kpy" => Tag::new(b"KYK "), // Koryak + "kqs" => Tag::new(b"KIS "), // Northern Kissi -> Kisii + "kqy" => Tag::new(b"KRT "), // Koorete + "kr" => Tag::new(b"KNR "), // Kanuri [macrolanguage] + "krc" => Tag::new(b"KAR "), // Karachay-Balkar -> Karachay + "kri" => Tag::new(b"KRI "), // Krio + // "krk" => Tag::new(&[0; 4]), // Kerek != Karakalpak + // "krl" => Tag::new(b"KRL "), // Karelian + // "krm" => Tag::new(&[0; 4]), // Krim(retired code) != Karaim + // "krn" => Tag::new(&[0; 4]), // Sapo != Karen + "krt" => Tag::new(b"KNR "), // Tumari Kanuri -> Kanuri + "kru" => Tag::new(b"KUU "), // Kurukh + "ks" => Tag::new(b"KSH "), // Kashmiri + "ksh" => Tag::new(b"KSH0"), // Kölsch -> Ripuarian + // "ksi" => Tag::new(&[0; 4]), // Krisa != Khasi + // "ksm" => Tag::new(&[0; 4]), // Kumba != Kildin Sami + "kss" => Tag::new(b"KIS "), // Southern Kisi -> Kisii + // "ksu" => Tag::new(b"KSU "), // Khamyang + "ksw" => Tag::new(b"KSW "), // S’gaw Karen + // "ksw" => Tag::new(b"KRN "), // S'gaw Karen -> Karen + "ktb" => Tag::new(b"KEB "), // Kambaata -> Kebena + "ktu" => Tag::new(b"KON "), /* Kituba (Democratic Republic of Congo) + * -> Kikongo */ + "ktw" => Tag::new(b"ATH "), // Kato -> Athapaskan + "ku" => Tag::new(b"KUR "), // Kurdish [macrolanguage] + // "kui" => Tag::new(&[0; 4]), // Kuikúro-Kalapálo != Kui + // "kul" => Tag::new(&[0; 4]), // Kulere != Kulvi + // "kum" => Tag::new(b"KUM "), // Kumyk + "kuu" => Tag::new(b"ATH "), // Upper Kuskokwim -> Athapaskan + "kuw" => Tag::new(b"BAD0"), // Kpagua -> Banda + // "kuy" => Tag::new(&[0; 4]), // Kuuku-Ya'u != Kuy + "kv" => Tag::new(b"KOM "), // Komi [macrolanguage] + "kvb" => Tag::new(b"MLY "), // Kubu -> Malay + "kvl" => Tag::new(b"KRN "), // Kayaw -> Karen + "kvq" => Tag::new(b"KVQ "), // Geba Karen + // "kvq" => Tag::new(b"KRN "), // Geba Karen -> Karen + "kvr" => Tag::new(b"MLY "), // Kerinci -> Malay + "kvt" => Tag::new(b"KRN "), // Lahta Karen -> Karen + "kvu" => Tag::new(b"KRN "), // Yinbaw Karen -> Karen + "kvy" => Tag::new(b"KRN "), // Yintale Karen -> Karen + "kw" => Tag::new(b"COR "), // Cornish + // "kwk" => Tag::new(b"KWK "), // Kwak'wala -> Kwakʼwala + "kww" => Tag::new(b"CPP "), // Kwinti -> Creoles + "kwy" => Tag::new(b"KON0"), // San Salvador Kongo -> Kongo + "kxc" => Tag::new(b"KMS "), // Konso -> Komso + "kxd" => Tag::new(b"MLY "), // Brunei -> Malay + "kxf" => Tag::new(b"KRN "), // Manumanaw Karen -> Karen + "kxk" => Tag::new(b"KRN "), // Zayein Karen -> Karen + "kxl" => Tag::new(b"KUU "), // Nepali Kurux(retired code) -> Kurukh + "kxu" => Tag::new(b"KUI "), // Kui (India)(retired code) + "ky" => Tag::new(b"KIR "), // Kirghiz (Kyrgyz) + // "kyk" => Tag::new(&[0; 4]), // Kamayo != Koryak + "kyu" => Tag::new(b"KYU "), // Western Kayah + // "kyu" => Tag::new(b"KRN "), // Western Kayah -> Karen + "la" => Tag::new(b"LAT "), // Latin + "lac" => Tag::new(b"MYN "), // Lacandon -> Mayan + "lad" => Tag::new(b"JUD "), // Ladino + // "lah" => Tag::new(&[0; 4]), // Lahnda [macrolanguage] != Lahuli + // "lak" => Tag::new(&[0; 4]), // Laka (Nigeria)(retired code) != Lak + // "lam" => Tag::new(&[0; 4]), // Lamba != Lambani + // "laz" => Tag::new(&[0; 4]), // Aribwatsa != Laz + "lb" => Tag::new(b"LTZ "), // Luxembourgish + "lbe" => Tag::new(b"LAK "), // Lak + "lbj" => Tag::new(b"LDK "), // Ladakhi + "lbl" => Tag::new(b"BIK "), // Libon Bikol -> Bikol + "lce" => Tag::new(b"MLY "), // Loncong -> Malay + "lcf" => Tag::new(b"MLY "), // Lubu -> Malay + "ldi" => Tag::new(b"KON0"), // Laari -> Kongo + // "ldk" => Tag::new(&[0; 4]), // Leelau != Ladakhi + // "lef" => Tag::new(b"LEF "), // Lelemi + // "lez" => Tag::new(b"LEZ "), // Lezghian -> Lezgi + "lg" => Tag::new(b"LUG "), // Ganda + "li" => Tag::new(b"LIM "), // Limburgish + "lif" => Tag::new(b"LMB "), // Limbu + // "lij" => Tag::new(b"LIJ "), // Ligurian + "lir" => Tag::new(b"CPP "), // Liberian English -> Creoles + // "lis" => Tag::new(b"LIS "), // Lisu + // "liv" => Tag::new(b"LIV "), // Liv + "liw" => Tag::new(b"MLY "), // Col -> Malay + "liy" => Tag::new(b"BAD0"), // Banda-Bambari -> Banda + // "ljp" => Tag::new(b"LJP "), // Lampung Api -> Lampung + "lkb" => Tag::new(b"LUH "), // Kabras -> Luyia + // "lki" => Tag::new(b"LKI "), // Laki + "lko" => Tag::new(b"LUH "), // Khayo -> Luyia + "lks" => Tag::new(b"LUH "), // Kisa -> Luyia + "lld" => Tag::new(b"LAD "), // Ladin + // "lma" => Tag::new(&[0; 4]), // East Limba != Low Mari + // "lmb" => Tag::new(&[0; 4]), // Merei != Limbu + "lmn" => Tag::new(b"LAM "), // Lambadi -> Lambani + // "lmo" => Tag::new(b"LMO "), // Lombard + // "lmw" => Tag::new(&[0; 4]), // Lake Miwok != Lomwe + "ln" => Tag::new(b"LIN "), // Lingala + "lna" => Tag::new(b"BAD0"), // Langbashe -> Banda + "lnl" => Tag::new(b"BAD0"), // South Central Banda -> Banda + "lo" => Tag::new(b"LAO "), // Lao + // "lom" => Tag::new(b"LOM "), // Loma (Liberia) + "lou" => Tag::new(b"CPP "), // Louisiana Creole -> Creoles + // "lpo" => Tag::new(b"LPO "), // Lipo + // "lrc" => Tag::new(b"LRC "), // Northern Luri -> Luri + "lri" => Tag::new(b"LUH "), // Marachi -> Luyia + "lrm" => Tag::new(b"LUH "), // Marama -> Luyia + "lrt" => Tag::new(b"CPP "), // Larantuka Malay -> Creoles + // "lsb" => Tag::new(&[0; 4]), // Burundian Sign Language != Lower Sorbian + "lsm" => Tag::new(b"LUH "), // Saamia -> Luyia + "lt" => Tag::new(b"LTH "), // Lithuanian + "ltg" => Tag::new(b"LVI "), // Latgalian -> Latvian + // "lth" => Tag::new(&[0; 4]), // Thur != Lithuanian + "lto" => Tag::new(b"LUH "), // Tsotso -> Luyia + "lts" => Tag::new(b"LUH "), // Tachoni -> Luyia + "lu" => Tag::new(b"LUB "), // Luba-Katanga + // "lua" => Tag::new(b"LUA "), // Luba-Lulua + "luh" => Tag::new(b"ZHS "), // Leizhou Chinese -> Chinese, Simplified + // "luo" => Tag::new(b"LUO "), // Luo (Kenya and Tanzania) + "lus" => Tag::new(b"MIZ "), // Lushai -> Mizo + // "lus" => Tag::new(b"QIN "), // Lushai -> Chin + // "lut" => Tag::new(b"LUT "), // Lushootseed + "luy" => Tag::new(b"LUH "), // Luyia [macrolanguage] + "luz" => Tag::new(b"LRC "), // Southern Luri -> Luri + "lv" => Tag::new(b"LVI "), // Latvian [macrolanguage] + // "lvi" => Tag::new(&[0; 4]), // Lavi != Latvian + "lvs" => Tag::new(b"LVI "), // Standard Latvian -> Latvian + "lwg" => Tag::new(b"LUH "), // Wanga -> Luyia + "lzh" => Tag::new(b"ZHT "), // Literary Chinese -> Chinese, Traditional + "lzz" => Tag::new(b"LAZ "), // Laz + // "mad" => Tag::new(b"MAD "), // Madurese -> Madura + // "mag" => Tag::new(b"MAG "), // Magahi + "mai" => Tag::new(b"MTH "), // Maithili + // "maj" => Tag::new(&[0; 4]), // Jalapa De Díaz Mazatec != Majang + "mak" => Tag::new(b"MKR "), // Makasar + "mam" => Tag::new(b"MAM "), // Mam + // "mam" => Tag::new(b"MYN "), // Mam -> Mayan + "man" => Tag::new(b"MNK "), // Mandingo [macrolanguage] -> Maninka + // "map" => Tag::new(&[0; 4]), // Austronesian [collection] != Mapudungun + // "maw" => Tag::new(&[0; 4]), // Mampruli != Marwari + "max" => Tag::new(b"MLY "), // North Moluccan Malay -> Malay + // "max" => Tag::new(b"CPP "), // North Moluccan Malay -> Creoles + "mbf" => Tag::new(b"CPP "), // Baba Malay -> Creoles + // "mbn" => Tag::new(&[0; 4]), // Macaguán != Mbundu + // "mbo" => Tag::new(b"MBO "), // Mbo (Cameroon) + // "mch" => Tag::new(&[0; 4]), // Maquiritari != Manchu + "mcm" => Tag::new(b"CPP "), // Malaccan Creole Portuguese -> Creoles + // "mcr" => Tag::new(&[0; 4]), // Menya != Moose Cree + "mct" => Tag::new(b"BTI "), // Mengisa -> Beti + // "mde" => Tag::new(&[0; 4]), // Maba (Chad) != Mende + "mdf" => Tag::new(b"MOK "), // Moksha + // "mdr" => Tag::new(b"MDR "), // Mandar + "mdy" => Tag::new(b"MLE "), // Male (Ethiopia) + "men" => Tag::new(b"MDE "), // Mende (Sierra Leone) + "meo" => Tag::new(b"MLY "), // Kedah Malay -> Malay + // "mer" => Tag::new(b"MER "), // Meru + // "mev" => Tag::new(b"MEV "), // Mano + "mfa" => Tag::new(b"MFA "), // Pattani Malay + // "mfa" => Tag::new(b"MLY "), // Pattani Malay -> Malay + "mfb" => Tag::new(b"MLY "), // Bangka -> Malay + "mfe" => Tag::new(b"MFE "), // Morisyen + // "mfe" => Tag::new(b"CPP "), // Morisyen -> Creoles + "mfp" => Tag::new(b"CPP "), // Makassar Malay -> Creoles + "mg" => Tag::new(b"MLG "), // Malagasy [macrolanguage] + "mga" => Tag::new(b"SGA "), // Middle Irish (900-1200) -> Old Irish + "mh" => Tag::new(b"MAH "), // Marshallese + "mhc" => Tag::new(b"MYN "), // Mocho -> Mayan + "mhr" => Tag::new(b"LMA "), // Eastern Mari -> Low Mari + "mhv" => Tag::new(b"ARK "), // Arakanese(retired code) -> Rakhine + "mi" => Tag::new(b"MRI "), // Maori + "min" => Tag::new(b"MIN "), // Minangkabau + // "min" => Tag::new(b"MLY "), // Minangkabau -> Malay + // "miz" => Tag::new(&[0; 4]), // Coatzospan Mixtec != Mizo + "mk" => Tag::new(b"MKD "), // Macedonian + "mkn" => Tag::new(b"CPP "), // Kupang Malay -> Creoles + // "mkr" => Tag::new(&[0; 4]), // Malas != Makasar + "mku" => Tag::new(b"MNK "), // Konyanka Maninka -> Maninka + // "mkw" => Tag::new(b"MKW "), // Kituba (Congo) + "ml" => Tag::new(b"MAL "), // Malayalam -> Malayalam Traditional + // "ml" => Tag::new(b"MLR "), // Malayalam -> Malayalam Reformed + // "mle" => Tag::new(&[0; 4]), // Manambu != Male + // "mln" => Tag::new(&[0; 4]), // Malango != Malinke + "mlq" => Tag::new(b"MLN "), // Western Maninkakan -> Malinke + // "mlq" => Tag::new(b"MNK "), // Western Maninkakan -> Maninka + // "mlr" => Tag::new(&[0; 4]), // Vame != Malayalam Reformed + "mmr" => Tag::new(b"HMN "), // Western Xiangxi Miao -> Hmong + "mn" => Tag::new(b"MNG "), // Mongolian [macrolanguage] + "mnc" => Tag::new(b"MCH "), // Manchu + // "mnd" => Tag::new(&[0; 4]), // Mondé != Mandinka + // "mng" => Tag::new(&[0; 4]), // Eastern Mnong != Mongolian + "mnh" => Tag::new(b"BAD0"), /* Mono (Democratic Republic of Congo) + * -> Banda */ + // "mni" => Tag::new(b"MNI "), // Manipuri + "mnk" => Tag::new(b"MND "), // Mandinka + // "mnk" => Tag::new(b"MNK "), // Mandinka -> Maninka + "mnp" => Tag::new(b"ZHS "), // Min Bei Chinese -> Chinese, Simplified + "mns" => Tag::new(b"MAN "), // Mansi + "mnw" => Tag::new(b"MON "), // Mon + // "mnw" => Tag::new(b"MONT"), // Mon -> Thailand Mon + // "mnx" => Tag::new(&[0; 4]), // Manikion != Manx + "mo" => Tag::new(b"MOL "), // Moldavian(retired code) + // "mo" => Tag::new(b"ROM "), // Moldavian(retired code) -> Romanian + "mod" => Tag::new(b"CPP "), // Mobilian -> Creoles + // "moh" => Tag::new(b"MOH "), // Mohawk + // "mok" => Tag::new(&[0; 4]), // Morori != Moksha + "mop" => Tag::new(b"MYN "), // Mopán Maya -> Mayan + // "mor" => Tag::new(&[0; 4]), // Moro != Moroccan + // "mos" => Tag::new(b"MOS "), // Mossi + "mpe" => Tag::new(b"MAJ "), // Majang + "mqg" => Tag::new(b"MLY "), // Kota Bangun Kutai Malay -> Malay + "mr" => Tag::new(b"MAR "), // Marathi + "mrh" => Tag::new(b"QIN "), // Mara Chin -> Chin + "mrj" => Tag::new(b"HMA "), // Western Mari -> High Mari + "ms" => Tag::new(b"MLY "), // Malay [macrolanguage] + "msc" => Tag::new(b"MNK "), // Sankaran Maninka -> Maninka + "msh" => Tag::new(b"MLG "), // Masikoro Malagasy -> Malagasy + "msi" => Tag::new(b"MLY "), // Sabah Malay -> Malay + // "msi" => Tag::new(b"CPP "), // Sabah Malay -> Creoles + "mt" => Tag::new(b"MTS "), // Maltese + // "mth" => Tag::new(&[0; 4]), // Munggui != Maithili + "mtr" => Tag::new(b"MAW "), // Mewari -> Marwari + // "mts" => Tag::new(&[0; 4]), // Yora != Maltese + "mud" => Tag::new(b"CPP "), // Mednyj Aleut -> Creoles + "mui" => Tag::new(b"MLY "), // Musi -> Malay + // "mun" => Tag::new(&[0; 4]), // Munda [collection] != Mundari + "mup" => Tag::new(b"RAJ "), // Malvi -> Rajasthani + "muq" => Tag::new(b"HMN "), // Eastern Xiangxi Miao -> Hmong + // "mus" => Tag::new(b"MUS "), // Creek -> Muscogee + "mvb" => Tag::new(b"ATH "), // Mattole -> Athapaskan + "mve" => Tag::new(b"MAW "), // Marwari (Pakistan) + "mvf" => Tag::new(b"MNG "), // Peripheral Mongolian -> Mongolian + "mwk" => Tag::new(b"MNK "), // Kita Maninkakan -> Maninka + // "mwl" => Tag::new(b"MWL "), // Mirandese + "mwq" => Tag::new(b"QIN "), // Mün Chin -> Chin + "mwr" => Tag::new(b"MAW "), // Marwari [macrolanguage] + "mww" => Tag::new(b"MWW "), // Hmong Daw + // "mww" => Tag::new(b"HMN "), // Hmong Daw -> Hmong + "my" => Tag::new(b"BRM "), // Burmese + "mym" => Tag::new(b"MEN "), // Me’en + // "myn" => Tag::new(b"MYN "), // Mayan [collection] + "myq" => Tag::new(b"MNK "), // Forest Maninka(retired code) -> Maninka + "myv" => Tag::new(b"ERZ "), // Erzya + "mzb" => Tag::new(b"BBR "), // Tumzabt -> Berber + // "mzn" => Tag::new(b"MZN "), // Mazanderani + "mzs" => Tag::new(b"CPP "), // Macanese -> Creoles + "na" => Tag::new(b"NAU "), // Nauru -> Nauruan + "nag" => Tag::new(b"NAG "), // Naga Pidgin -> Naga-Assamese + // "nag" => Tag::new(b"CPP "), // Naga Pidgin -> Creoles + // "nah" => Tag::new(b"NAH "), // Nahuatl [collection] + "nan" => Tag::new(b"ZHS "), // Min Nan Chinese -> Chinese, Simplified + // "nap" => Tag::new(b"NAP "), // Neapolitan + // "nas" => Tag::new(&[0; 4]), // Naasioi != Naskapi + "naz" => Tag::new(b"NAH "), // Coatepec Nahuatl -> Nahuatl + "nb" => Tag::new(b"NOR "), // Norwegian Bokmål -> Norwegian + "nch" => Tag::new(b"NAH "), // Central Huasteca Nahuatl -> Nahuatl + "nci" => Tag::new(b"NAH "), // Classical Nahuatl -> Nahuatl + "ncj" => Tag::new(b"NAH "), // Northern Puebla Nahuatl -> Nahuatl + "ncl" => Tag::new(b"NAH "), // Michoacán Nahuatl -> Nahuatl + // "ncr" => Tag::new(&[0; 4]), // Ncane != N-Cree + "ncx" => Tag::new(b"NAH "), // Central Puebla Nahuatl -> Nahuatl + "nd" => Tag::new(b"NDB "), // North Ndebele -> Ndebele + // "ndb" => Tag::new(&[0; 4]), // Kenswei Nsei != Ndebele + // "ndc" => Tag::new(b"NDC "), // Ndau + // "ndg" => Tag::new(&[0; 4]), // Ndengereko != Ndonga + // "nds" => Tag::new(b"NDS "), // Low Saxon + "ne" => Tag::new(b"NEP "), // Nepali [macrolanguage] + "nef" => Tag::new(b"CPP "), // Nefamese -> Creoles + // "new" => Tag::new(b"NEW "), // Newari + "ng" => Tag::new(b"NDG "), // Ndonga + // "nga" => Tag::new(b"NGA "), // Ngbaka + "ngl" => Tag::new(b"LMW "), // Lomwe + "ngm" => Tag::new(b"CPP "), // Ngatik Men's Creole -> Creoles + "ngo" => Tag::new(b"SXT "), // Ngoni(retired code) -> Sutu + // "ngr" => Tag::new(&[0; 4]), // Engdewu != Nagari + "ngu" => Tag::new(b"NAH "), // Guerrero Nahuatl -> Nahuatl + "nhc" => Tag::new(b"NAH "), // Tabasco Nahuatl -> Nahuatl + "nhd" => Tag::new(b"GUA "), // Chiripá -> Guarani + "nhe" => Tag::new(b"NAH "), // Eastern Huasteca Nahuatl -> Nahuatl + "nhg" => Tag::new(b"NAH "), // Tetelcingo Nahuatl -> Nahuatl + "nhi" => Tag::new(b"NAH "), /* Zacatlán-Ahuacatlán-Tepetzintla + * Nahuatl -> Nahuatl */ + "nhk" => Tag::new(b"NAH "), // Isthmus-Cosoleacaque Nahuatl -> Nahuatl + "nhm" => Tag::new(b"NAH "), // Morelos Nahuatl -> Nahuatl + "nhn" => Tag::new(b"NAH "), // Central Nahuatl -> Nahuatl + "nhp" => Tag::new(b"NAH "), // Isthmus-Pajapan Nahuatl -> Nahuatl + "nhq" => Tag::new(b"NAH "), // Huaxcaleca Nahuatl -> Nahuatl + "nht" => Tag::new(b"NAH "), // Ometepec Nahuatl -> Nahuatl + "nhv" => Tag::new(b"NAH "), // Temascaltepec Nahuatl -> Nahuatl + "nhw" => Tag::new(b"NAH "), // Western Huasteca Nahuatl -> Nahuatl + "nhx" => Tag::new(b"NAH "), // Isthmus-Mecayapan Nahuatl -> Nahuatl + "nhy" => Tag::new(b"NAH "), // Northern Oaxaca Nahuatl -> Nahuatl + "nhz" => Tag::new(b"NAH "), // Santa María La Alta Nahuatl -> Nahuatl + "niq" => Tag::new(b"KAL "), // Nandi -> Kalenjin + // "nis" => Tag::new(&[0; 4]), // Nimi != Nisi + // "niu" => Tag::new(b"NIU "), // Niuean + "niv" => Tag::new(b"GIL "), // Gilyak + "njt" => Tag::new(b"CPP "), // Ndyuka-Trio Pidgin -> Creoles + "njz" => Tag::new(b"NIS "), // Nyishi -> Nisi + // "nko" => Tag::new(&[0; 4]), // Nkonya != N’Ko + "nkx" => Tag::new(b"IJO "), // Nkoroo -> Ijo + "nl" => Tag::new(b"NLD "), // Dutch + "nla" => Tag::new(b"BML "), // Ngombale -> Bamileke + "nle" => Tag::new(b"LUH "), // East Nyala -> Luyia + "nln" => Tag::new(b"NAH "), // Durango Nahuatl(retired code) -> Nahuatl + "nlv" => Tag::new(b"NAH "), // Orizaba Nahuatl -> Nahuatl + "nn" => Tag::new(b"NYN "), // Norwegian Nynorsk (Nynorsk, Norwegian) + "nnh" => Tag::new(b"BML "), // Ngiemboon -> Bamileke + "nnz" => Tag::new(b"BML "), // Nda'nda' -> Bamileke + "no" => Tag::new(b"NOR "), // Norwegian [macrolanguage] + "nod" => Tag::new(b"NTA "), // Northern Thai -> Northern Tai + // "noe" => Tag::new(b"NOE "), // Nimadi + // "nog" => Tag::new(b"NOG "), // Nogai + // "nop" => Tag::new(b"NOP "), // Numanggang + // "nov" => Tag::new(b"NOV "), // Novial + "npi" => Tag::new(b"NEP "), // Nepali + "npl" => Tag::new(b"NAH "), // Southeastern Puebla Nahuatl -> Nahuatl + "nqo" => Tag::new(b"NKO "), // N’Ko + "nr" => Tag::new(b"NDB "), // South Ndebele -> Ndebele + "nsk" => Tag::new(b"NAS "), // Naskapi + // "nsm" => Tag::new(&[0; 4]), // Sumi Naga != Northern Sami + // "nso" => Tag::new(b"NSO "), // Northern Sotho + "nsu" => Tag::new(b"NAH "), // Sierra Negra Nahuatl -> Nahuatl + // "nto" => Tag::new(&[0; 4]), // Ntomba != Esperanto + "nue" => Tag::new(b"BAD0"), // Ngundu -> Banda + // "nuk" => Tag::new(b"NUK "), // Nuu-chah-nulth + "nuu" => Tag::new(b"BAD0"), // Ngbundu -> Banda + "nuz" => Tag::new(b"NAH "), // Tlamacazapa Nahuatl -> Nahuatl + "nv" => Tag::new(b"NAV "), // Navajo + // "nv" => Tag::new(b"ATH "), // Navajo -> Athapaskan + "nwe" => Tag::new(b"BML "), // Ngwe -> Bamileke + "ny" => Tag::new(b"CHI "), // Chichewa (Chewa, Nyanja) + "nyd" => Tag::new(b"LUH "), // Nyore -> Luyia + // "nym" => Tag::new(b"NYM "), // Nyamwezi + "nyn" => Tag::new(b"NKL "), // Nyankole + // "nza" => Tag::new(b"NZA "), // Tigon Mbembe -> Mbembe Tigon + "oc" => Tag::new(b"OCI "), // Occitan (post 1500) + "oj" => Tag::new(b"OJB "), // Ojibwa [macrolanguage] -> Ojibway + // "ojb" => Tag::new(b"OJB "), // Northwestern Ojibwa -> Ojibway + "ojc" => Tag::new(b"OJB "), // Central Ojibwa -> Ojibway + "ojg" => Tag::new(b"OJB "), // Eastern Ojibwa -> Ojibway + "ojs" => Tag::new(b"OCR "), // Severn Ojibwa -> Oji-Cree + // "ojs" => Tag::new(b"OJB "), // Severn Ojibwa -> Ojibway + "ojw" => Tag::new(b"OJB "), // Western Ojibwa -> Ojibway + "okd" => Tag::new(b"IJO "), // Okodia -> Ijo + "oki" => Tag::new(b"KAL "), // Okiek -> Kalenjin + "okm" => Tag::new(b"KOH "), /* Middle Korean (10th-16th cent.) -> + * Korean Old Hangul */ + "okr" => Tag::new(b"IJO "), // Kirike -> Ijo + "om" => Tag::new(b"ORO "), // Oromo [macrolanguage] + // "one" => Tag::new(b"ONE "), // Oneida + // "ono" => Tag::new(b"ONO "), // Onondaga + "onx" => Tag::new(b"CPP "), // Onin Based Pidgin -> Creoles + "oor" => Tag::new(b"CPP "), // Oorlams -> Creoles + "or" => Tag::new(b"ORI "), // Odia [macrolanguage] + "orc" => Tag::new(b"ORO "), // Orma -> Oromo + "orn" => Tag::new(b"MLY "), // Orang Kanaq -> Malay + // "oro" => Tag::new(&[0; 4]), // Orokolo != Oromo + "orr" => Tag::new(b"IJO "), // Oruma -> Ijo + "ors" => Tag::new(b"MLY "), // Orang Seletar -> Malay + "ory" => Tag::new(b"ORI "), // Odia + "os" => Tag::new(b"OSS "), // Ossetian + "otw" => Tag::new(b"OJB "), // Ottawa -> Ojibway + "oua" => Tag::new(b"BBR "), // Tagargrent -> Berber + "pa" => Tag::new(b"PAN "), // Punjabi + // "paa" => Tag::new(&[0; 4]), // Papuan [collection] != Palestinian Aramaic + // "pag" => Tag::new(b"PAG "), // Pangasinan + // "pal" => Tag::new(&[0; 4]), // Pahlavi != Pali + // "pam" => Tag::new(b"PAM "), // Pampanga -> Pampangan + "pap" => Tag::new(b"PAP0"), // Papiamento -> Papiamentu + // "pap" => Tag::new(b"CPP "), // Papiamento -> Creoles + // "pas" => Tag::new(&[0; 4]), // Papasena != Pashto + // "pau" => Tag::new(b"PAU "), // Palauan + "pbt" => Tag::new(b"PAS "), // Southern Pashto -> Pashto + "pbu" => Tag::new(b"PAS "), // Northern Pashto -> Pashto + // "pcc" => Tag::new(b"PCC "), // Bouyei + // "pcd" => Tag::new(b"PCD "), // Picard + "pce" => Tag::new(b"PLG "), // Ruching Palaung -> Palaung + "pck" => Tag::new(b"QIN "), // Paite Chin -> Chin + "pcm" => Tag::new(b"CPP "), // Nigerian Pidgin -> Creoles + // "pdc" => Tag::new(b"PDC "), // Pennsylvania German + "pdu" => Tag::new(b"KRN "), // Kayan -> Karen + "pea" => Tag::new(b"CPP "), // Peranakan Indonesian -> Creoles + "pel" => Tag::new(b"MLY "), // Pekal -> Malay + "pes" => Tag::new(b"FAR "), // Iranian Persian -> Persian + "pey" => Tag::new(b"CPP "), // Petjo -> Creoles + "pga" => Tag::new(b"ARA "), // Sudanese Creole Arabic -> Arabic + // "pga" => Tag::new(b"CPP "), // Sudanese Creole Arabic -> Creoles + // "phk" => Tag::new(b"PHK "), // Phake + "pi" => Tag::new(b"PAL "), // Pali + "pih" => Tag::new(b"PIH "), // Pitcairn-Norfolk -> Norfolk + // "pih" => Tag::new(b"CPP "), // Pitcairn-Norfolk -> Creoles + // "pil" => Tag::new(&[0; 4]), // Yom != Filipino + "pis" => Tag::new(b"CPP "), // Pijin -> Creoles + "pkh" => Tag::new(b"QIN "), // Pankhu -> Chin + "pko" => Tag::new(b"KAL "), // Pökoot -> Kalenjin + "pl" => Tag::new(b"PLK "), // Polish + "plg" => Tag::new(b"PLG0"), // Pilagá + // "plk" => Tag::new(&[0; 4]), // Kohistani Shina != Polish + "pll" => Tag::new(b"PLG "), // Shwe Palaung -> Palaung + "pln" => Tag::new(b"CPP "), // Palenquero -> Creoles + "plp" => Tag::new(b"PAP "), // Palpa(retired code) + "plt" => Tag::new(b"MLG "), // Plateau Malagasy -> Malagasy + "pml" => Tag::new(b"CPP "), // Lingua Franca -> Creoles + // "pms" => Tag::new(b"PMS "), // Piemontese + "pmy" => Tag::new(b"CPP "), // Papuan Malay -> Creoles + // "pnb" => Tag::new(b"PNB "), // Western Panjabi + "poc" => Tag::new(b"MYN "), // Poqomam -> Mayan + "poh" => Tag::new(b"POH "), // Poqomchi' -> Pocomchi + // "poh" => Tag::new(b"MYN "), // Poqomchi' -> Mayan + // "pon" => Tag::new(b"PON "), // Pohnpeian + "pov" => Tag::new(b"CPP "), // Upper Guinea Crioulo -> Creoles + "ppa" => Tag::new(b"BAG "), // Pao(retired code) -> Baghelkhandi + "pre" => Tag::new(b"CPP "), // Principense -> Creoles + // "pro" => Tag::new(b"PRO "), // Old Provençal (to 1500) -> + // Provençal / Old Provençal + "prp" => Tag::new(b"GUJ "), // Parsi(retired code) -> Gujarati + "prs" => Tag::new(b"DRI "), // Dari + // "prs" => Tag::new(b"FAR "), // Dari -> Persian + "ps" => Tag::new(b"PAS "), // Pashto [macrolanguage] + "pse" => Tag::new(b"MLY "), // Central Malay -> Malay + "pst" => Tag::new(b"PAS "), // Central Pashto -> Pashto + "pt" => Tag::new(b"PTG "), // Portuguese + "pub" => Tag::new(b"QIN "), // Purum -> Chin + "puz" => Tag::new(b"QIN "), // Purum Naga(retired code) -> Chin + "pwo" => Tag::new(b"PWO "), // Pwo Western Karen -> Western Pwo Karen + // "pwo" => Tag::new(b"KRN "), // Pwo Western Karen -> Karen + "pww" => Tag::new(b"KRN "), // Pwo Northern Karen -> Karen + "qu" => Tag::new(b"QUZ "), // Quechua [macrolanguage] + "qub" => Tag::new(b"QWH "), /* Huallaga Huánuco Quechua -> Quechua + * (Peru) */ + // "qub" => Tag::new(b"QUZ "), // Huallaga Huánuco Quechua -> Quechua + "quc" => Tag::new(b"QUC "), // K’iche’ + // "quc" => Tag::new(b"MYN "), // K'iche' -> Mayan + "qud" => Tag::new(b"QVI "), /* Calderón Highland Quichua -> Quechua + * (Ecuador) */ + // "qud" => Tag::new(b"QUZ "), // Calderón Highland Quichua -> Quechua + "quf" => Tag::new(b"QUZ "), // Lambayeque Quechua -> Quechua + "qug" => Tag::new(b"QVI "), /* Chimborazo Highland Quichua -> + * Quechua (Ecuador) */ + // "qug" => Tag::new(b"QUZ "), // Chimborazo Highland Quichua -> Quechua + "quh" => Tag::new(b"QUH "), /* South Bolivian Quechua -> Quechua + * (Bolivia) */ + // "quh" => Tag::new(b"QUZ "), // South Bolivian Quechua -> Quechua + "quk" => Tag::new(b"QUZ "), // Chachapoyas Quechua -> Quechua + "qul" => Tag::new(b"QUH "), /* North Bolivian Quechua -> Quechua + * (Bolivia) */ + // "qul" => Tag::new(b"QUZ "), // North Bolivian Quechua -> Quechua + "qum" => Tag::new(b"MYN "), // Sipacapense -> Mayan + "qup" => Tag::new(b"QVI "), /* Southern Pastaza Quechua -> Quechua + * (Ecuador) */ + // "qup" => Tag::new(b"QUZ "), // Southern Pastaza Quechua -> Quechua + "qur" => Tag::new(b"QWH "), /* Yanahuanca Pasco Quechua -> Quechua + * (Peru) */ + // "qur" => Tag::new(b"QUZ "), // Yanahuanca Pasco Quechua -> Quechua + "qus" => Tag::new(b"QUH "), /* Santiago del Estero Quichua -> + * Quechua (Bolivia) */ + // "qus" => Tag::new(b"QUZ "), // Santiago del Estero Quichua -> Quechua + "quv" => Tag::new(b"MYN "), // Sacapulteco -> Mayan + "quw" => Tag::new(b"QVI "), /* Tena Lowland Quichua -> Quechua + * (Ecuador) */ + // "quw" => Tag::new(b"QUZ "), // Tena Lowland Quichua -> Quechua + "qux" => Tag::new(b"QWH "), // Yauyos Quechua -> Quechua (Peru) + // "qux" => Tag::new(b"QUZ "), // Yauyos Quechua -> Quechua + "quy" => Tag::new(b"QUZ "), // Ayacucho Quechua -> Quechua + // "quz" => Tag::new(b"QUZ "), // Cusco Quechua -> Quechua + "qva" => Tag::new(b"QWH "), // Ambo-Pasco Quechua -> Quechua (Peru) + // "qva" => Tag::new(b"QUZ "), // Ambo-Pasco Quechua -> Quechua + "qvc" => Tag::new(b"QUZ "), // Cajamarca Quechua -> Quechua + "qve" => Tag::new(b"QUZ "), // Eastern Apurímac Quechua -> Quechua + "qvh" => Tag::new(b"QWH "), /* Huamalíes-Dos de Mayo Huánuco Quechua + * -> Quechua (Peru) */ + // "qvh" => Tag::new(b"QUZ "), // Huamalíes-Dos de Mayo Huánuco Quechua + // -> Quechua + "qvi" => Tag::new(b"QVI "), /* Imbabura Highland Quichua -> Quechua + * (Ecuador) */ + // "qvi" => Tag::new(b"QUZ "), // Imbabura Highland Quichua -> Quechua + "qvj" => Tag::new(b"QVI "), /* Loja Highland Quichua -> Quechua + * (Ecuador) */ + // "qvj" => Tag::new(b"QUZ "), // Loja Highland Quichua -> Quechua + "qvl" => Tag::new(b"QWH "), /* Cajatambo North Lima Quechua -> + * Quechua (Peru) */ + // "qvl" => Tag::new(b"QUZ "), // Cajatambo North Lima Quechua -> + // Quechua + "qvm" => Tag::new(b"QWH "), /* Margos-Yarowilca-Lauricocha Quechua + * -> Quechua (Peru) */ + // "qvm" => Tag::new(b"QUZ "), // Margos-Yarowilca-Lauricocha Quechua -> + // Quechua + "qvn" => Tag::new(b"QWH "), // North Junín Quechua -> Quechua (Peru) + // "qvn" => Tag::new(b"QUZ "), // North Junín Quechua -> Quechua + "qvo" => Tag::new(b"QVI "), /* Napo Lowland Quechua -> Quechua + * (Ecuador) */ + // "qvo" => Tag::new(b"QUZ "), // Napo Lowland Quechua -> Quechua + "qvp" => Tag::new(b"QWH "), // Pacaraos Quechua -> Quechua (Peru) + // "qvp" => Tag::new(b"QUZ "), // Pacaraos Quechua -> Quechua + "qvs" => Tag::new(b"QUZ "), // San Martín Quechua -> Quechua + "qvw" => Tag::new(b"QWH "), // Huaylla Wanca Quechua -> Quechua (Peru) + // "qvw" => Tag::new(b"QUZ "), // Huaylla Wanca Quechua -> Quechua + "qvz" => Tag::new(b"QVI "), /* Northern Pastaza Quichua -> Quechua + * (Ecuador) */ + // "qvz" => Tag::new(b"QUZ "), // Northern Pastaza Quichua -> Quechua + "qwa" => Tag::new(b"QWH "), // Corongo Ancash Quechua -> Quechua (Peru) + // "qwa" => Tag::new(b"QUZ "), // Corongo Ancash Quechua -> Quechua + "qwc" => Tag::new(b"QUZ "), // Classical Quechua -> Quechua + "qwh" => Tag::new(b"QWH "), // Huaylas Ancash Quechua -> Quechua (Peru) + // "qwh" => Tag::new(b"QUZ "), // Huaylas Ancash Quechua -> Quechua + "qws" => Tag::new(b"QWH "), // Sihuas Ancash Quechua -> Quechua (Peru) + // "qws" => Tag::new(b"QUZ "), // Sihuas Ancash Quechua -> Quechua + "qwt" => Tag::new(b"ATH "), // Kwalhioqua-Tlatskanai -> Athapaskan + "qxa" => Tag::new(b"QWH "), /* Chiquián Ancash Quechua -> Quechua + * (Peru) */ + // "qxa" => Tag::new(b"QUZ "), // Chiquián Ancash Quechua -> Quechua + "qxc" => Tag::new(b"QWH "), // Chincha Quechua -> Quechua (Peru) + // "qxc" => Tag::new(b"QUZ "), // Chincha Quechua -> Quechua + "qxh" => Tag::new(b"QWH "), // Panao Huánuco Quechua -> Quechua (Peru) + // "qxh" => Tag::new(b"QUZ "), // Panao Huánuco Quechua -> Quechua + "qxl" => Tag::new(b"QVI "), /* Salasaca Highland Quichua -> Quechua + * (Ecuador) */ + // "qxl" => Tag::new(b"QUZ "), // Salasaca Highland Quichua -> Quechua + "qxn" => Tag::new(b"QWH "), /* Northern Conchucos Ancash Quechua -> + * Quechua (Peru) */ + // "qxn" => Tag::new(b"QUZ "), // Northern Conchucos Ancash Quechua -> + // Quechua + "qxo" => Tag::new(b"QWH "), /* Southern Conchucos Ancash Quechua -> + * Quechua (Peru) */ + // "qxo" => Tag::new(b"QUZ "), // Southern Conchucos Ancash Quechua -> + // Quechua + "qxp" => Tag::new(b"QUZ "), // Puno Quechua -> Quechua + "qxr" => Tag::new(b"QVI "), /* Cañar Highland Quichua -> Quechua + * (Ecuador) */ + // "qxr" => Tag::new(b"QUZ "), // Cañar Highland Quichua -> Quechua + "qxt" => Tag::new(b"QWH "), /* Santa Ana de Tusi Pasco Quechua -> + * Quechua (Peru) */ + // "qxt" => Tag::new(b"QUZ "), // Santa Ana de Tusi Pasco Quechua -> + // Quechua + "qxu" => Tag::new(b"QUZ "), // Arequipa-La Unión Quechua -> Quechua + "qxw" => Tag::new(b"QWH "), // Jauja Wanca Quechua -> Quechua (Peru) + // "qxw" => Tag::new(b"QUZ "), // Jauja Wanca Quechua -> Quechua + "rag" => Tag::new(b"LUH "), // Logooli -> Luyia + // "raj" => Tag::new(b"RAJ "), // Rajasthani [macrolanguage] + "ral" => Tag::new(b"QIN "), // Ralte -> Chin + // "rar" => Tag::new(b"RAR "), // Rarotongan + "rbb" => Tag::new(b"PLG "), // Rumai Palaung -> Palaung + "rbl" => Tag::new(b"BIK "), // Miraya Bikol -> Bikol + "rcf" => Tag::new(b"CPP "), // Réunion Creole French -> Creoles + // "rej" => Tag::new(b"REJ "), // Rejang + // "rhg" => Tag::new(b"RHG "), // Rohingya + // "ria" => Tag::new(b"RIA "), // Riang (India) + "rif" => Tag::new(b"RIF "), // Tarifit + // "rif" => Tag::new(b"BBR "), // Tarifit -> Berber + // "rit" => Tag::new(b"RIT "), // Ritharrngu -> Ritarungo + "rki" => Tag::new(b"ARK "), // Rakhine + // "rkw" => Tag::new(b"RKW "), // Arakwal + "rm" => Tag::new(b"RMS "), // Romansh + "rmc" => Tag::new(b"ROY "), // Carpathian Romani -> Romany + "rmf" => Tag::new(b"ROY "), // Kalo Finnish Romani -> Romany + "rml" => Tag::new(b"ROY "), // Baltic Romani -> Romany + "rmn" => Tag::new(b"ROY "), // Balkan Romani -> Romany + "rmo" => Tag::new(b"ROY "), // Sinte Romani -> Romany + // "rms" => Tag::new(&[0; 4]), // Romanian Sign Language != Romansh + "rmw" => Tag::new(b"ROY "), // Welsh Romani -> Romany + "rmy" => Tag::new(b"RMY "), // Vlax Romani + // "rmy" => Tag::new(b"ROY "), // Vlax Romani -> Romany + "rmz" => Tag::new(b"ARK "), // Marma -> Rakhine + "rn" => Tag::new(b"RUN "), // Rundi + "ro" => Tag::new(b"ROM "), // Romanian + "rom" => Tag::new(b"ROY "), // Romany [macrolanguage] + "rop" => Tag::new(b"CPP "), // Kriol -> Creoles + "rtc" => Tag::new(b"QIN "), // Rungtu Chin -> Chin + // "rtm" => Tag::new(b"RTM "), // Rotuman + "ru" => Tag::new(b"RUS "), // Russian + "rue" => Tag::new(b"RSY "), // Rusyn + // "rup" => Tag::new(b"RUP "), // Aromanian + "rw" => Tag::new(b"RUA "), // Kinyarwanda + "rwr" => Tag::new(b"MAW "), // Marwari (India) + "sa" => Tag::new(b"SAN "), // Sanskrit [macrolanguage] + // "sad" => Tag::new(&[0; 4]), // Sandawe != Sadri + "sah" => Tag::new(b"YAK "), // Yakut -> Sakha + "sam" => Tag::new(b"PAA "), // Samaritan Aramaic -> Palestinian Aramaic + // "sas" => Tag::new(b"SAS "), // Sasak + // "sat" => Tag::new(b"SAT "), // Santali + // "say" => Tag::new(&[0; 4]), // Saya != Sayisi + "sc" => Tag::new(b"SRD "), // Sardinian [macrolanguage] + "scf" => Tag::new(b"CPP "), // San Miguel Creole French -> Creoles + "sch" => Tag::new(b"QIN "), // Sakachep -> Chin + "sci" => Tag::new(b"CPP "), // Sri Lankan Creole Malay -> Creoles + "sck" => Tag::new(b"SAD "), // Sadri + // "scn" => Tag::new(b"SCN "), // Sicilian + // "sco" => Tag::new(b"SCO "), // Scots + "scs" => Tag::new(b"SCS "), // North Slavey + // "scs" => Tag::new(b"SLA "), // North Slavey -> Slavey + // "scs" => Tag::new(b"ATH "), // North Slavey -> Athapaskan + "sd" => Tag::new(b"SND "), // Sindhi + "sdc" => Tag::new(b"SRD "), // Sassarese Sardinian -> Sardinian + "sdh" => Tag::new(b"KUR "), // Southern Kurdish -> Kurdish + "sdn" => Tag::new(b"SRD "), // Gallurese Sardinian -> Sardinian + "sds" => Tag::new(b"BBR "), // Sened -> Berber + "se" => Tag::new(b"NSM "), // Northern Sami + // "see" => Tag::new(b"SEE "), // Seneca + "seh" => Tag::new(b"SNA "), // Sena + "sek" => Tag::new(b"ATH "), // Sekani -> Athapaskan + // "sel" => Tag::new(b"SEL "), // Selkup + "sez" => Tag::new(b"QIN "), // Senthang Chin -> Chin + "sfm" => Tag::new(b"SFM "), // Small Flowery Miao + // "sfm" => Tag::new(b"HMN "), // Small Flowery Miao -> Hmong + "sg" => Tag::new(b"SGO "), // Sango + // "sga" => Tag::new(b"SGA "), // Old Irish (to 900) + "sgc" => Tag::new(b"KAL "), // Kipsigis -> Kalenjin + // "sgo" => Tag::new(&[0; 4]), // Songa(retired code) != Sango + // "sgs" => Tag::new(b"SGS "), // Samogitian + "sgw" => Tag::new(b"CHG "), // Sebat Bet Gurage -> Chaha Gurage + "sh" => Tag::new(b"BOS "), /* Serbo-Croatian [macrolanguage] -> + * Bosnian */ + // "sh" => Tag::new(b"HRV "), // Serbo-Croatian [macrolanguage] -> + // Croatian "sh" => Tag::new(b"SRB "), // Serbo-Croatian + // [macrolanguage] -> Serbian + "shi" => Tag::new(b"SHI "), // Tachelhit + // "shi" => Tag::new(b"BBR "), // Tachelhit -> Berber + "shl" => Tag::new(b"QIN "), // Shendu -> Chin + // "shn" => Tag::new(b"SHN "), // Shan + "shu" => Tag::new(b"ARA "), // Chadian Arabic -> Arabic + "shy" => Tag::new(b"BBR "), // Tachawit -> Berber + "si" => Tag::new(b"SNH "), // Sinhala (Sinhalese) + // "sib" => Tag::new(&[0; 4]), // Sebop != Sibe + // "sid" => Tag::new(b"SID "), // Sidamo + // "sig" => Tag::new(&[0; 4]), // Paasaal != Silte Gurage + "siz" => Tag::new(b"BBR "), // Siwi -> Berber + // "sja" => Tag::new(b"SJA "), // Epena + "sjc" => Tag::new(b"ZHS "), // Shaojiang Chinese -> Chinese, Simplified + "sjd" => Tag::new(b"KSM "), // Kildin Sami + // "sje" => Tag::new(b"SJE "), // Pite Sami + "sjo" => Tag::new(b"SIB "), // Xibe -> Sibe + "sjs" => Tag::new(b"BBR "), // Senhaja De Srair -> Berber + // "sju" => Tag::new(b"SJU "), // Ume Sami + "sk" => Tag::new(b"SKY "), // Slovak + "skg" => Tag::new(b"MLG "), // Sakalava Malagasy -> Malagasy + "skr" => Tag::new(b"SRK "), // Saraiki + // "sks" => Tag::new(&[0; 4]), // Maia != Skolt Sami + "skw" => Tag::new(b"CPP "), // Skepi Creole Dutch -> Creoles + // "sky" => Tag::new(&[0; 4]), // Sikaiana != Slovak + "sl" => Tag::new(b"SLV "), // Slovenian + // "sla" => Tag::new(&[0; 4]), // Slavic [collection] != Slavey + "sm" => Tag::new(b"SMO "), // Samoan + "sma" => Tag::new(b"SSM "), // Southern Sami + "smd" => Tag::new(b"MBN "), // Sama(retired code) -> Mbundu + "smj" => Tag::new(b"LSM "), // Lule Sami + // "sml" => Tag::new(&[0; 4]), // Central Sama != Somali + "smn" => Tag::new(b"ISM "), // Inari Sami + "sms" => Tag::new(b"SKS "), // Skolt Sami + "smt" => Tag::new(b"QIN "), // Simte -> Chin + "sn" => Tag::new(b"SNA0"), // Shona + "snb" => Tag::new(b"IBA "), // Sebuyau(retired code) -> Iban + // "snh" => Tag::new(&[0; 4]), // Shinabo(retired code) != Sinhala (Sinhalese) + // "snk" => Tag::new(b"SNK "), // Soninke + "so" => Tag::new(b"SML "), // Somali + // "sog" => Tag::new(&[0; 4]), // Sogdian != Sodo Gurage + // "sop" => Tag::new(b"SOP "), // Songe + "spv" => Tag::new(b"ORI "), // Sambalpuri -> Odia + "spy" => Tag::new(b"KAL "), // Sabaot -> Kalenjin + "sq" => Tag::new(b"SQI "), // Albanian [macrolanguage] + "sr" => Tag::new(b"SRB "), // Serbian + // "srb" => Tag::new(&[0; 4]), // Sora != Serbian + "src" => Tag::new(b"SRD "), // Logudorese Sardinian -> Sardinian + // "srk" => Tag::new(&[0; 4]), // Serudung Murut != Saraiki + "srm" => Tag::new(b"CPP "), // Saramaccan -> Creoles + "srn" => Tag::new(b"CPP "), // Sranan Tongo -> Creoles + "sro" => Tag::new(b"SRD "), // Campidanese Sardinian -> Sardinian + // "srr" => Tag::new(b"SRR "), // Serer + "srs" => Tag::new(b"ATH "), // Sarsi -> Athapaskan + "ss" => Tag::new(b"SWZ "), // Swati + "ssh" => Tag::new(b"ARA "), // Shihhi Arabic -> Arabic + // "ssl" => Tag::new(&[0; 4]), // Western Sisaala != South Slavey + // "ssm" => Tag::new(&[0; 4]), // Semnam != Southern Sami + "st" => Tag::new(b"SOT "), // Southern Sotho + "sta" => Tag::new(b"CPP "), // Settla -> Creoles + // "stq" => Tag::new(b"STQ "), // Saterfriesisch -> Saterland + // Frisian "str" => Tag::new(b"STR "), // Straits Salish + "stv" => Tag::new(b"SIG "), // Silt'e -> Silte Gurage + "su" => Tag::new(b"SUN "), // Sundanese + // "suk" => Tag::new(b"SUK "), // Sukuma + "suq" => Tag::new(b"SUR "), // Suri + // "sur" => Tag::new(&[0; 4]), // Mwaghavul != Suri + "sv" => Tag::new(b"SVE "), // Swedish + // "sva" => Tag::new(b"SVA "), // Svan + "svc" => Tag::new(b"CPP "), // Vincentian Creole English -> Creoles + // "sve" => Tag::new(&[0; 4]), // Serili != Swedish + "sw" => Tag::new(b"SWK "), // Swahili [macrolanguage] + "swb" => Tag::new(b"CMR "), // Maore Comorian -> Comorian + "swc" => Tag::new(b"SWK "), // Congo Swahili -> Swahili + "swh" => Tag::new(b"SWK "), // Swahili + // "swk" => Tag::new(&[0; 4]), // Malawi Sena != Swahili + "swn" => Tag::new(b"BBR "), // Sawknah -> Berber + "swv" => Tag::new(b"MAW "), // Shekhawati -> Marwari + // "sxu" => Tag::new(b"SXU "), // Upper Saxon + "syc" => Tag::new(b"SYR "), // Classical Syriac -> Syriac + // "syl" => Tag::new(b"SYL "), // Sylheti + // "syr" => Tag::new(b"SYR "), // Syriac [macrolanguage] + // "szl" => Tag::new(b"SZL "), // Silesian + "ta" => Tag::new(b"TAM "), // Tamil + "taa" => Tag::new(b"ATH "), // Lower Tanana -> Athapaskan + // "tab" => Tag::new(b"TAB "), // Tabassaran -> Tabasaran + // "taj" => Tag::new(&[0; 4]), // Eastern Tamang != Tajiki + "taq" => Tag::new(b"TAQ "), // Tamasheq + // "taq" => Tag::new(b"TMH "), // Tamasheq -> Tamashek + // "taq" => Tag::new(b"BBR "), // Tamasheq -> Berber + "tas" => Tag::new(b"CPP "), // Tay Boi -> Creoles + "tau" => Tag::new(b"ATH "), // Upper Tanana -> Athapaskan + // "tbv" => Tag::new(b"TBV "), // Tobo + "tcb" => Tag::new(b"ATH "), // Tanacross -> Athapaskan + "tce" => Tag::new(b"ATH "), // Southern Tutchone -> Athapaskan + "tch" => Tag::new(b"CPP "), /* Turks And Caicos Creole English -> + * Creoles */ + "tcp" => Tag::new(b"QIN "), // Tawr Chin -> Chin + "tcs" => Tag::new(b"CPP "), // Torres Strait Creole -> Creoles + "tcy" => Tag::new(b"TUL "), // Tulu + "tcz" => Tag::new(b"QIN "), // Thado Chin -> Chin + // "tdc" => Tag::new(b"TDC "), // Emberá-Tadó + // "tdd" => Tag::new(b"TDD "), // Tai Nüa -> Dehong Dai + "tdx" => Tag::new(b"MLG "), // Tandroy-Mahafaly Malagasy -> Malagasy + "te" => Tag::new(b"TEL "), // Telugu + "tec" => Tag::new(b"KAL "), // Terik -> Kalenjin + "tem" => Tag::new(b"TMN "), // Timne -> Temne + // "tet" => Tag::new(b"TET "), // Tetum + "tez" => Tag::new(b"BBR "), // Tetserret -> Berber + "tfn" => Tag::new(b"ATH "), // Tanaina -> Athapaskan + "tg" => Tag::new(b"TAJ "), // Tajik -> Tajiki + "tgh" => Tag::new(b"CPP "), // Tobagonian Creole English -> Creoles + "tgj" => Tag::new(b"NIS "), // Tagin -> Nisi + // "tgn" => Tag::new(&[0; 4]), // Tandaganon != Tongan + // "tgr" => Tag::new(&[0; 4]), // Tareng != Tigre + "tgx" => Tag::new(b"ATH "), // Tagish -> Athapaskan + // "tgy" => Tag::new(&[0; 4]), // Togoyo != Tigrinya + "th" => Tag::new(b"THA "), // Thai + // "thp" => Tag::new(b"THP "), // Thompson + "tht" => Tag::new(b"ATH "), // Tahltan -> Athapaskan + "thv" => Tag::new(b"THV "), // Tahaggart Tamahaq + // "thv" => Tag::new(b"TMH "), // Tahaggart Tamahaq -> Tamashek + // "thv" => Tag::new(b"BBR "), // Tahaggart Tamahaq -> Berber + "thz" => Tag::new(b"THZ "), // Tayart Tamajeq + // "thz" => Tag::new(b"TMH "), // Tayart Tamajeq -> Tamashek + // "thz" => Tag::new(b"BBR "), // Tayart Tamajeq -> Berber + "ti" => Tag::new(b"TGY "), // Tigrinya + "tia" => Tag::new(b"BBR "), // Tidikelt Tamazight -> Berber + "tig" => Tag::new(b"TGR "), // Tigre + // "tiv" => Tag::new(b"TIV "), // Tiv + // "tjl" => Tag::new(b"TJL "), // Tai Laing + "tjo" => Tag::new(b"BBR "), // Temacine Tamazight -> Berber + "tk" => Tag::new(b"TKM "), // Turkmen + "tkg" => Tag::new(b"MLG "), // Tesaka Malagasy -> Malagasy + // "tkm" => Tag::new(&[0; 4]), // Takelma != Turkmen + "tl" => Tag::new(b"TGL "), // Tagalog + // "tli" => Tag::new(b"TLI "), // Tlingit + // "tly" => Tag::new(b"TLY "), // Talysh + "tmg" => Tag::new(b"CPP "), // Ternateño -> Creoles + "tmh" => Tag::new(b"TMH "), // Tamashek [macrolanguage] + // "tmh" => Tag::new(b"BBR "), // Tamashek [macrolanguage] -> Berber + // "tmn" => Tag::new(&[0; 4]), // Taman (Indonesia) != Temne + "tmw" => Tag::new(b"MLY "), // Temuan -> Malay + "tn" => Tag::new(b"TNA "), // Tswana + // "tna" => Tag::new(&[0; 4]), // Tacana != Tswana + // "tne" => Tag::new(&[0; 4]), Tinoc Kallahan(retired code) != Tundra Enets + "tnf" => Tag::new(b"DRI "), // Tangshewi(retired code) -> Dari + // "tnf" => Tag::new(b"FAR "), // Tangshewi(retired code) -> Persian + // "tng" => Tag::new(&[0; 4]), // Tobanga != Tonga + "to" => Tag::new(b"TGN "), // Tonga (Tonga Islands) -> Tongan + "tod" => Tag::new(b"TOD0"), // Toma + "toi" => Tag::new(b"TNG "), // Tonga (Zambia) + "toj" => Tag::new(b"MYN "), // Tojolabal -> Mayan + "tol" => Tag::new(b"ATH "), // Tolowa -> Athapaskan + "tor" => Tag::new(b"BAD0"), // Togbo-Vara Banda -> Banda + "tpi" => Tag::new(b"TPI "), // Tok Pisin + // "tpi" => Tag::new(b"CPP "), // Tok Pisin -> Creoles + "tr" => Tag::new(b"TRK "), // Turkish + "trf" => Tag::new(b"CPP "), // Trinidadian Creole English -> Creoles + // "trk" => Tag::new(&[0; 4]), // Turkic [collection] != Turkish + "tru" => Tag::new(b"TUA "), // Turoyo -> Turoyo Aramaic + // "tru" => Tag::new(b"SYR "), // Turoyo -> Syriac + "ts" => Tag::new(b"TSG "), // Tsonga + // "tsg" => Tag::new(&[0; 4]), // Tausug != Tsonga + // "tsj" => Tag::new(b"TSJ "), // Tshangla + "tt" => Tag::new(b"TAT "), // Tatar + "ttc" => Tag::new(b"MYN "), // Tektiteko -> Mayan + "ttm" => Tag::new(b"ATH "), // Northern Tutchone -> Athapaskan + "ttq" => Tag::new(b"TTQ "), // Tawallammat Tamajaq + // "ttq" => Tag::new(b"TMH "), // Tawallammat Tamajaq -> Tamashek + // "ttq" => Tag::new(b"BBR "), // Tawallammat Tamajaq -> Berber + // "tua" => Tag::new(&[0; 4]), // Wiarumus != Turoyo Aramaic + // "tul" => Tag::new(&[0; 4]), // Tula != Tulu + // "tum" => Tag::new(b"TUM "), // Tumbuka + // "tus" => Tag::new(b"TUS "), // Tuscarora + "tuu" => Tag::new(b"ATH "), // Tututni -> Athapaskan + // "tuv" => Tag::new(&[0; 4]), // Turkana != Tuvin + "tuy" => Tag::new(b"KAL "), // Tugen -> Kalenjin + // "tvl" => Tag::new(b"TVL "), // Tuvalu + "tvy" => Tag::new(b"CPP "), // Timor Pidgin -> Creoles + "tw" => Tag::new(b"TWI "), // Twi + // "tw" => Tag::new(b"AKA "), // Twi -> Akan + "txc" => Tag::new(b"ATH "), // Tsetsaut -> Athapaskan + "txy" => Tag::new(b"MLG "), // Tanosy Malagasy -> Malagasy + "ty" => Tag::new(b"THT "), // Tahitian + "tyv" => Tag::new(b"TUV "), // Tuvinian -> Tuvin + // "tyz" => Tag::new(b"TYZ "), // Tày + "tzh" => Tag::new(b"MYN "), // Tzeltal -> Mayan + "tzj" => Tag::new(b"MYN "), // Tz'utujil -> Mayan + "tzm" => Tag::new(b"TZM "), // Central Atlas Tamazight -> Tamazight + // "tzm" => Tag::new(b"BBR "), // Central Atlas Tamazight -> Berber + "tzo" => Tag::new(b"TZO "), // Tzotzil + // "tzo" => Tag::new(b"MYN "), // Tzotzil -> Mayan + "ubl" => Tag::new(b"BIK "), // Buhi'non Bikol -> Bikol + // "udi" => Tag::new(b"UDI "), // Udi + // "udm" => Tag::new(b"UDM "), // Udmurt + "ug" => Tag::new(b"UYG "), // Uyghur + "uk" => Tag::new(b"UKR "), // Ukrainian + "uki" => Tag::new(b"KUI "), // Kui (India) + "uln" => Tag::new(b"CPP "), // Unserdeutsch -> Creoles + // "umb" => Tag::new(b"UMB "), // Umbundu + "unr" => Tag::new(b"MUN "), // Mundari + "ur" => Tag::new(b"URD "), // Urdu + "urk" => Tag::new(b"MLY "), // Urak Lawoi' -> Malay + "usp" => Tag::new(b"MYN "), // Uspanteco -> Mayan + "uz" => Tag::new(b"UZB "), // Uzbek [macrolanguage] + "uzn" => Tag::new(b"UZB "), // Northern Uzbek -> Uzbek + "uzs" => Tag::new(b"UZB "), // Southern Uzbek -> Uzbek + "vap" => Tag::new(b"QIN "), // Vaiphei -> Chin + "ve" => Tag::new(b"VEN "), // Venda + // "vec" => Tag::new(b"VEC "), // Venetian + "vi" => Tag::new(b"VIT "), // Vietnamese + "vic" => Tag::new(b"CPP "), // Virgin Islands Creole English -> Creoles + // "vit" => Tag::new(&[0; 4]), // Viti != Vietnamese + "vkk" => Tag::new(b"MLY "), // Kaur -> Malay + "vkp" => Tag::new(b"CPP "), // Korlai Creole Portuguese -> Creoles + "vkt" => Tag::new(b"MLY "), // Tenggarong Kutai Malay -> Malay + "vls" => Tag::new(b"FLE "), // Vlaams -> Dutch (Flemish) + "vmw" => Tag::new(b"MAK "), // Makhuwa + "vo" => Tag::new(b"VOL "), // Volapük + "vro" => Tag::new(b"VRO "), // Võro + // "vro" => Tag::new(b"ETI "), // Võro -> Estonian + "vsn" => Tag::new(b"SAN "), // Vedic Sanskrit -> Sanskrit + "wa" => Tag::new(b"WLN "), // Walloon + // "wag" => Tag::new(&[0; 4]), // Wa'ema != Wagdi + // "war" => Tag::new(b"WAR "), // Waray (Philippines) -> + // Waray-Waray "wbl" => Tag::new(b"WBL "), // Wakhi + "wbm" => Tag::new(b"WA "), // Wa + "wbr" => Tag::new(b"WAG "), // Wagdi + // "wbr" => Tag::new(b"RAJ "), // Wagdi -> Rajasthani + // "wci" => Tag::new(b"WCI "), // Waci Gbe + // "wdt" => Tag::new(b"WDT "), // Wendat + "wea" => Tag::new(b"KRN "), // Wewaw -> Karen + "wes" => Tag::new(b"CPP "), // Cameroon Pidgin -> Creoles + "weu" => Tag::new(b"QIN "), // Rawngtu Chin -> Chin + "wlc" => Tag::new(b"CMR "), // Mwali Comorian -> Comorian + "wle" => Tag::new(b"SIG "), // Wolane -> Silte Gurage + "wlk" => Tag::new(b"ATH "), // Wailaki -> Athapaskan + "wni" => Tag::new(b"CMR "), // Ndzwani Comorian -> Comorian + "wo" => Tag::new(b"WLF "), // Wolof + "wry" => Tag::new(b"MAW "), // Merwari -> Marwari + "wsg" => Tag::new(b"GON "), // Adilabad Gondi -> Gondi + // "wtm" => Tag::new(b"WTM "), // Mewati + "wuu" => Tag::new(b"ZHS "), // Wu Chinese -> Chinese, Simplified + "wya" => Tag::new(b"WDT "), // Wyandot(retired code) -> Wendat + // "wya" => Tag::new(b"WYN "), // Wyandot(retired code) + // "wyn" => Tag::new(b"WYN "), // Wyandot + "xal" => Tag::new(b"KLM "), // Kalmyk + // "xal" => Tag::new(b"TOD "), // Kalmyk -> Todo + "xan" => Tag::new(b"SEK "), // Xamtanga -> Sekota + // "xbd" => Tag::new(&[0; 4]), // Bindal != Lü + "xh" => Tag::new(b"XHS "), // Xhosa + // "xjb" => Tag::new(b"XJB "), // Minjungbal -> Minjangbal + // "xkf" => Tag::new(b"XKF "), // Khengkha + "xmg" => Tag::new(b"BML "), // Mengaka -> Bamileke + "xmm" => Tag::new(b"MLY "), // Manado Malay -> Malay + // "xmm" => Tag::new(b"CPP "), // Manado Malay -> Creoles + "xmv" => Tag::new(b"MLG "), // Antankarana Malagasy -> Malagasy + "xmw" => Tag::new(b"MLG "), // Tsimihety Malagasy -> Malagasy + "xnj" => Tag::new(b"SXT "), // Ngoni (Tanzania) -> Sutu + "xnq" => Tag::new(b"SXT "), // Ngoni (Mozambique) -> Sutu + "xnr" => Tag::new(b"DGR "), // Kangri -> Dogri (macrolanguage) + // "xog" => Tag::new(b"XOG "), // Soga + "xpe" => Tag::new(b"XPE "), // Liberia Kpelle -> Kpelle (Liberia) + // "xpe" => Tag::new(b"KPL "), // Liberia Kpelle -> Kpelle + "xsl" => Tag::new(b"SSL "), // South Slavey + // "xsl" => Tag::new(b"SLA "), // South Slavey -> Slavey + // "xsl" => Tag::new(b"ATH "), // South Slavey -> Athapaskan + "xst" => Tag::new(b"SIG "), // Silt'e(retired code) -> Silte Gurage + // "xub" => Tag::new(b"XUB "), // Betta Kurumba -> Bette Kuruma + // "xuj" => Tag::new(b"XUJ "), // Jennu Kurumba -> Jennu Kuruma + "xup" => Tag::new(b"ATH "), // Upper Umpqua -> Athapaskan + "xwo" => Tag::new(b"TOD "), // Written Oirat -> Todo + "yaj" => Tag::new(b"BAD0"), // Banda-Yangere -> Banda + // "yak" => Tag::new(&[0; 4]), // Yakama != Sakha + // "yao" => Tag::new(b"YAO "), // Yao + // "yap" => Tag::new(b"YAP "), // Yapese + // "yba" => Tag::new(&[0; 4]), // Yala != Yoruba + "ybb" => Tag::new(b"BML "), // Yemba -> Bamileke + "ybd" => Tag::new(b"ARK "), // Yangbye(retired code) -> Rakhine + "ycr" => Tag::new(b"CPP "), // Yilan Creole -> Creoles + "ydd" => Tag::new(b"JII "), // Eastern Yiddish -> Yiddish + // "ygp" => Tag::new(b"YGP "), // Gepo + "yi" => Tag::new(b"JII "), // Yiddish [macrolanguage] + "yih" => Tag::new(b"JII "), // Western Yiddish -> Yiddish + // "yim" => Tag::new(&[0; 4]), // Yimchungru Naga != Yi Modern + // "yna" => Tag::new(b"YNA "), // Aluo + "yo" => Tag::new(b"YBA "), // Yoruba + "yos" => Tag::new(b"QIN "), // Yos(retired code) -> Chin + "yua" => Tag::new(b"MYN "), // Yucateco -> Mayan + "yue" => Tag::new(b"ZHH "), /* Yue Chinese -> Chinese, Traditional, + * Hong Kong SAR */ + // "yuf" => Tag::new(b"YUF "), // Havasupai-Walapai-Yavapai + // "ywq" => Tag::new(b"YWQ "), // Wuding-Luquan Yi + "za" => Tag::new(b"ZHA "), // Zhuang [macrolanguage] + "zch" => Tag::new(b"ZHA "), // Central Hongshuihe Zhuang -> Zhuang + "zdj" => Tag::new(b"CMR "), // Ngazidja Comorian -> Comorian + // "zea" => Tag::new(b"ZEA "), // Zeeuws -> Zealandic + "zeh" => Tag::new(b"ZHA "), // Eastern Hongshuihe Zhuang -> Zhuang + "zen" => Tag::new(b"BBR "), // Zenaga -> Berber + "zgb" => Tag::new(b"ZHA "), // Guibei Zhuang -> Zhuang + "zgh" => Tag::new(b"ZGH "), // Standard Moroccan Tamazight + // "zgh" => Tag::new(b"BBR "), // Standard Moroccan Tamazight -> Berber + "zgm" => Tag::new(b"ZHA "), // Minz Zhuang -> Zhuang + "zgn" => Tag::new(b"ZHA "), // Guibian Zhuang -> Zhuang + "zh" => Tag::new(b"ZHS "), // Chinese, Simplified [macrolanguage] + "zhd" => Tag::new(b"ZHA "), // Dai Zhuang -> Zhuang + "zhn" => Tag::new(b"ZHA "), // Nong Zhuang -> Zhuang + "zkb" => Tag::new(b"KHA "), // Koibal(retired code) -> Khakass + "zlj" => Tag::new(b"ZHA "), // Liujiang Zhuang -> Zhuang + "zlm" => Tag::new(b"MLY "), // Malay + "zln" => Tag::new(b"ZHA "), // Lianshan Zhuang -> Zhuang + "zlq" => Tag::new(b"ZHA "), // Liuqian Zhuang -> Zhuang + "zmi" => Tag::new(b"MLY "), // Negeri Sembilan Malay -> Malay + "zmz" => Tag::new(b"BAD0"), // Mbandja -> Banda + // "znd" => Tag::new(&[0; 4]), // Zande [collection] != Zande + "zne" => Tag::new(b"ZND "), // Zande + "zom" => Tag::new(b"QIN "), // Zou -> Chin + "zqe" => Tag::new(b"ZHA "), // Qiubei Zhuang -> Zhuang + "zsm" => Tag::new(b"MLY "), // Standard Malay -> Malay + "zu" => Tag::new(b"ZUL "), // Zulu + "zum" => Tag::new(b"LRC "), // Kumzari -> Luri + "zyb" => Tag::new(b"ZHA "), // Yongbei Zhuang -> Zhuang + "zyg" => Tag::new(b"ZHA "), // Yang Zhuang -> Zhuang + "zyj" => Tag::new(b"ZHA "), // Youjiang Zhuang -> Zhuang + "zyn" => Tag::new(b"ZHA "), // Yongnan Zhuang -> Zhuang + "zyp" => Tag::new(b"QIN "), // Zyphe Chin -> Chin + // "zza" => Tag::new(b"ZZA "), // Zazaki [macrolanguage] + "zzj" => Tag::new(b"ZHA "), // Zuojiang Zhuang -> Zhuang + _ => return None, // Unknown + }; + Some(tag) +} diff --git a/harfshapedfa/src/errors.rs b/harfshapedfa/src/errors.rs new file mode 100644 index 0000000..f0cd0c3 --- /dev/null +++ b/harfshapedfa/src/errors.rs @@ -0,0 +1,52 @@ +use skrifa::raw::types::InvalidTag; +use thiserror::Error; + +/// Creating the shaping plan failed. +/// +/// # What is a shaping plan? +/// +/// A shaping plan is a HarfBuzz/[`harfrust`] optimisation where you inform it +/// ahead-of-time about the text you're going to give it, telling it things like +/// the direction, script, and language of the text. You can read more about +/// this [here](https://harfbuzz.github.io/shaping-plans-and-caching.html). +#[derive(Debug, Error)] +pub enum ShapingPlanError { + /// The script metadata value was + /// invalid + #[error("invalid script: {0}")] + UnknownScriptTag(#[from] InvalidTagError), + /// The language metadata value was + /// invalid + #[error("invalid language: {0}")] + UnknownLanguage(#[from] HarfRustUnknownLanguageError), +} + +/// [`harfrust`] didn't recognise the language. +#[derive(Debug, Error)] +#[error("invalid language: \"{language}\"")] +pub struct HarfRustUnknownLanguageError { + language: String, +} + +impl HarfRustUnknownLanguageError { + pub(crate) fn new(lang: impl Into) -> Self { + HarfRustUnknownLanguageError { + language: lang.into(), + } + } +} + +/// Returned by [`Location::validate_for`](crate::Location::validate_for), +/// indicating axes are specified in the [`Location`](crate::Location) that +/// aren't in the font being validated against. +#[derive(Debug, Error)] +#[error("mismatched axes: present in Location but not font {extras:?}")] +pub struct MismatchedAxesError { + pub(crate) extras: Vec, +} + +/// The axis/script tag was invalid (it had illegal characters or wasn't four +/// characters). +#[derive(Debug, Error)] +#[error(transparent)] +pub struct InvalidTagError(#[from] pub(crate) InvalidTag); diff --git a/harfshapedfa/src/lib.rs b/harfshapedfa/src/lib.rs new file mode 100644 index 0000000..d9d27e3 --- /dev/null +++ b/harfshapedfa/src/lib.rs @@ -0,0 +1,157 @@ +#![cfg_attr(docsrs, feature(doc_cfg))] +#![doc = include_str!("../README.md")] + +use std::str::FromStr; + +use harfrust::{ + Direction, Feature, GlyphBuffer, Language, Script, ShapePlan, Shaper, Tag, + UnicodeBuffer, +}; +pub use location::*; + +use crate::{ + convert::direction_from_script, + errors::{HarfRustUnknownLanguageError, InvalidTagError, ShapingPlanError}, +}; + +/// Helper functions for converting between differing standards. +pub mod convert; +/// Something went wrong! +pub mod errors; +mod location; +/// Pens, used to transform or calculate information about glyph outlines. +/// +/// A pen is a kind of object that standardizes the way how to "draw" outlines: +/// it is a middle man between an outline and a drawing. In other words: it is +/// an abstraction for drawing outlines, making sure that outline objects don’t +/// need to know the details about how and where they’re being drawn, and that +/// drawings don’t need to know the details of how outlines are stored. +// ^ re-used from: https://fonttools.readthedocs.io/en/latest/pens/basePen.html +#[cfg(feature = "pens")] +pub mod pens; + +/// Re-exports from [`kurbo`](::kurbo) +/// +/// This should cover the API surface that [`pens`] exposes. +#[cfg(feature = "pens")] +pub mod kurbo { + pub use kurbo::{BezPath, PathEl, Point, Rect}; +} + +/// Metadata related to shaping. +/// +/// Stores information on script, language, direction, and the resultant +/// [`harfrust::ShapePlan`] that this produces. +/// +/// See [`Shaper::shape_with_meta`](HarfRustShaperExt::shape_with_meta) & +/// [`UnicodeBuffer::configure_with_meta`](HarfRustBufferExt::configure_with_meta) +/// for usage. +pub struct ShapingMeta { + shaping_plan: ShapePlan, + script: Script, + direction: Direction, + language: Option, +} + +impl ShapingMeta { + /// Create a new `ShapingMeta`. + /// + /// Errors if `script` or `language` are invalid/unrecognised. + pub fn new( + script: &str, + language: Option<&str>, + shaper: &Shaper, + ) -> Result { + let script_tag = script.parse::().map_err(InvalidTagError)?; + // Unwrap is safe here as script_tag is never null as [0, 0, 0, 0] isn't + // a valid Rust string + let script = Script::from_iso15924_tag(script_tag).unwrap(); + + let language = language + .map(|lang| { + // harfrust's own error here is just "invalid language" + // (v0.3.1), so discard it for our own + Language::from_str(lang) + .map_err(|_| HarfRustUnknownLanguageError::new(lang)) + }) + .transpose()?; + let direction = + direction_from_script(script).unwrap_or(Direction::LeftToRight); + + let shaping_plan = ShapePlan::new( + shaper, + direction, + Some(script), + language.as_ref(), + // Default features are still included by default + &[], + ); + + Ok(Self { + shaping_plan, + script, + direction, + language, + }) + } + + /// Get access to the inner [`ShapePlan`]. + #[must_use] + pub const fn shaping_plan(&self) -> &ShapePlan { + &self.shaping_plan + } +} + +/// Extension trait for [`harfrust::UnicodeBuffer`]. +pub trait HarfRustBufferExt: private::Sealed { + /// Configures the buffer with script/language/direction information from + /// [`ShapingMeta`]. + fn configure_with_meta(&mut self, meta: &ShapingMeta); +} + +impl HarfRustBufferExt for UnicodeBuffer { + fn configure_with_meta(&mut self, meta: &ShapingMeta) { + self.set_script(meta.script); + if let Some(lang) = meta.language.clone() { + self.set_language(lang); + } + self.set_direction(meta.direction); + } +} + +/// Extension trait for [`harfrust::Shaper`]. +pub trait HarfRustShaperExt: private::Sealed { + /// A convenience method that configures the buffer and then shapes it. + /// + /// Equivalent to: + // TODO: make this code sample compile & run + /// ```ignore + /// buffer.configure_with_meta(meta); + /// shaper.shape_with_plan(meta.shaping_plan(), buffer, features) + /// ``` + fn shape_with_meta( + &self, + meta: &ShapingMeta, + buffer: UnicodeBuffer, + features: &[Feature], + ) -> GlyphBuffer; +} + +impl HarfRustShaperExt for Shaper<'_> { + fn shape_with_meta( + &self, + meta: &ShapingMeta, + mut buffer: UnicodeBuffer, + features: &[Feature], + ) -> GlyphBuffer { + buffer.configure_with_meta(meta); + self.shape_with_plan(meta.shaping_plan(), buffer, features) + } +} + +mod private { + use harfrust::{Shaper, UnicodeBuffer}; + pub trait Sealed {} + impl Sealed for UnicodeBuffer {} + impl Sealed for Shaper<'_> {} +} diff --git a/harfshapedfa/src/location.rs b/harfshapedfa/src/location.rs new file mode 100644 index 0000000..9092664 --- /dev/null +++ b/harfshapedfa/src/location.rs @@ -0,0 +1,258 @@ +use std::{ + cmp::Ordering, + collections::{HashMap, HashSet}, + fmt, +}; + +use indexmap::IndexMap; +use ordered_float::NotNan; +use skrifa::MetadataProvider; + +use crate::errors::{InvalidTagError, MismatchedAxesError}; + +/// A mapping of axis tags to values. +/// +/// Retains insertion order of axes. +/// +/// ``` +/// # use harfshapedfa::Location; +/// # use harfshapedfa::errors::InvalidTagError; +/// # fn main() -> Result<(), InvalidTagError> { +/// let mut loc = Location::new(); +/// loc.axis("wght", 400.0)? +/// .axis("ital", 1.0)? +/// .axis("wdth", 1000.0)?; +/// # Ok(()) +/// # } +/// ``` +#[derive(Clone, Default, Eq, PartialEq)] +pub struct Location(IndexMap>); + +impl Location { + /// Create a new location. + #[must_use] + pub fn new() -> Self { + // IndexMap::new isn't const so even if we desugared this we couldn't + // make Location::new const + Default::default() + } + + /// Convert from a [`HashMap`] using [`skrifa::Tag`]s as keys. + /// + /// # Panics + /// + /// If any axis values are `NaN`. + #[must_use] + pub fn from_skrifa(user_coords: HashMap) -> Self { + Self( + user_coords + .into_iter() + .map(|(tag, value)| { + let value = NotNan::new(value).unwrap_or_else(|_| { + panic!("{tag} coordinate was NaN"); + }); + (tag, value) + }) + .collect(), + ) + } + + /// Set the value of an axis. + /// + /// Fails if `tag` isn't a valid axis tag. + /// + /// Designed to support method chaining: + /// + /// ``` + /// # use harfshapedfa::Location; + /// # use harfshapedfa::errors::InvalidTagError; + /// # fn main() -> Result<(), InvalidTagError> { + /// let mut loc = Location::new(); + /// loc.axis("wght", 400.0)? + /// .axis("ital", 1.0)? + /// .axis("wdth", 1000.0)?; + /// # Ok(()) + /// # } + /// ``` + /// + /// # Panics + /// + /// If any axis value is `NaN`. + pub fn axis( + &mut self, + tag: impl AsRef<[u8]>, + value: f32, + ) -> Result<&mut Self, InvalidTagError> { + let tag = skrifa::Tag::new_checked(tag.as_ref())?; + let value = NotNan::new(value).unwrap_or_else(|_| { + panic!("{tag} coordinate was NaN"); + }); + self.0.insert(tag, value); + Ok(self) + } + + /// Converts a [`HashMap`] to a Font Height [`Location`]. + /// + /// Fails if any keys aren't valid axis tags. + /// + /// Note: this is just an alias to the [`TryFrom`] implementation. + /// + /// # Panics + /// + /// If any axis values are `NaN`. + // TODO: I think this one should error, not panic, on NaNs + pub fn try_from_std( + location: HashMap, + ) -> Result { + Self::try_from(location) + } + + /// Creates a [`HashMap`](HashMap) from `&self`. + #[must_use] + pub fn to_std(&self) -> HashMap { + self.0 + .iter() + .map(|(tag, val)| (tag.to_string(), val.into_inner())) + .collect() + } + + /// Creates a [`skrifa::instance::Location`] from `&self`. + #[must_use] + pub fn to_skrifa( + &self, + font: &skrifa::FontRef, + ) -> skrifa::instance::Location { + font.axes().location( + self.0.iter().map(|(tag, coord)| (*tag, coord.into_inner())), + ) + } + + /// Creates a [`harfrust::Variation`] iterator from `&self`. + pub fn to_harfrust(&self) -> impl Iterator { + self.0.iter().map(|(&tag, value)| harfrust::Variation { + tag, + value: value.into_inner(), + }) + } + + /// Checks that `&self` doesn't specify any axes that aren't present in + /// `font`. + /// + /// Omitting axes is allowed as most libraries will just use the default + /// value if one isn't provided for an axis. + /// + /// ⚠️ Does not current check axis values are valid / in range. + /// + /// Note: if you're just using Font Height, it will perform this validation + /// for you as necessary. + pub fn validate_for( + &self, + font: &skrifa::FontRef, + ) -> Result<(), MismatchedAxesError> { + let mut provided = self.0.keys().copied().collect::>(); + // TODO: check values are legal too + font.axes().iter().map(|axis| axis.tag()).for_each(|tag| { + provided.remove(&tag); + }); + let extras = provided; + if extras.is_empty() { + Ok(()) + } else { + Err(MismatchedAxesError { + extras: Vec::from_iter(extras), + }) + } + } + + /// Sort axes lexicographically. + /// + /// Axes being ordered allows for [sorting](Location::partial_cmp). + pub fn sort_axes(&mut self) { + self.0.sort_keys(); + } + + // TODO + // pub fn sort_axes_by(&mut self, func) + // pub fn sort_axes_with_fvar(&mut self, font) + // pub fn sort_axes_with_stat(&mut self, font) +} + +impl PartialOrd for Location { + /// Sorts two `Location`s iff they have the same axes in the same order. + /// Will return `None` if this is not the case. + // FIXME: will return None for some Locations that are considered equal + // (when axis order differs). Does this violate expected + // invariants of PartialOrd/Eq? + fn partial_cmp(&self, other: &Self) -> Option { + if self.0.len() != other.0.len() { + // Difference in axes + return None; + } + + for ((left_tag, left_val), (right_tag, right_val)) in + self.0.iter().zip(other.0.iter()) + { + if left_tag != right_tag { + // Difference in axes (order) + return None; + } + match NotNan::cmp(left_val, right_val) { + Ordering::Equal => { /* check next axis */ }, + not_equal => return Some(not_equal), + } + } + Some(Ordering::Equal) + } +} + +impl fmt::Debug for Location { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_map() + .entries(self.0.iter().map(|(tag, &val)| (tag.to_string(), val))) + .finish() + } +} + +impl FromIterator<(T, f32)> for Location +where + T: AsRef<[u8]>, +{ + /// Support collecting into a `Location`. + // TODO: code example + /// + /// # Panics + /// + /// If any axis value is `NaN`. + fn from_iter>(iter: I) -> Self { + iter.into_iter() + .fold(Location::new(), |mut loc, (tag, value)| { + loc.axis(tag, value) + .expect("invalid tag when building Location"); + loc + }) + } +} + +impl TryFrom> for Location { + type Error = InvalidTagError; + + /// Convert standard library types into a `Location`. + /// + /// # Panics + /// + /// If any value of `location` is `NaN`. + // TODO: make NaNs an error + fn try_from(location: HashMap) -> Result { + let user_coords = location + .into_iter() + .map(|(tag, value)| -> Result<_, InvalidTagError> { + let tag = skrifa::Tag::new_checked(tag.as_bytes())?; + let value = NotNan::new(value).unwrap_or_else(|_| { + panic!("{tag} coordinate was NaN"); + }); + Ok((tag, value)) + }) + .collect::>()?; + Ok(Self(user_coords)) + } +} diff --git a/harfshapedfa/src/pens.rs b/harfshapedfa/src/pens.rs new file mode 100644 index 0000000..1605659 --- /dev/null +++ b/harfshapedfa/src/pens.rs @@ -0,0 +1,92 @@ +use ::kurbo::Shape; +use skrifa::outline::OutlinePen; + +use crate::kurbo; + +/// Pen to calculate the bounds of a shape. +/// +/// Has the functionality of both Python's [`BoundsPen`](https://fonttools.readthedocs.io/en/latest/pens/boundsPen.html#fontTools.pens.boundsPen.BoundsPen) +/// and [`ControlBoundsPen`](https://fonttools.readthedocs.io/en/latest/pens/boundsPen.html#fontTools.pens.boundsPen.ControlBoundsPen), +/// but is powered by [`kurbo`]. +// Adapted from https://github.com/googlefonts/fontations/blob/57715f39/skrifa/src/outline/mod.rs#L1159-L1184 (same license) +#[derive(Debug, Default)] +pub struct BoundsPen { + path: kurbo::BezPath, +} + +impl BoundsPen { + /// Create a new `BoundsPen`. + #[must_use] + pub fn new() -> Self { + Default::default() + } + + /// Get out the drawn [`kurbo::BezPath`] + #[must_use] + pub const fn path(&self) -> &kurbo::BezPath { + &self.path + } + + /// Calculate the bounds of a shape. + /// + /// It calculates the correct bounds even when the shape contains curves + /// that don’t have points on their extremes. + /// + /// This is somewhat slower to compute than the + /// [`BoundsPen::control_bounds`]. + #[must_use] + pub fn bounds(&self) -> kurbo::Rect { + self.path.bounding_box() + } + + /// Calculate the "control bounds" of a shape. + /// + /// This is the bounding box of all control points, so may be larger than + /// the actual bounding box if there are curves that don’t have points on + /// their extremes. + /// + /// Faster to compute than [`BoundsPen::bounds`], but not always what you + /// want. + #[must_use] + pub fn control_bounds(&self) -> kurbo::Rect { + self.path.control_box() + } +} + +impl OutlinePen for BoundsPen { + fn move_to(&mut self, x: f32, y: f32) { + self.path.move_to(kurbo_point(x, y)); + } + + fn line_to(&mut self, x: f32, y: f32) { + self.path.line_to(kurbo_point(x, y)); + } + + fn quad_to(&mut self, cx0: f32, cy0: f32, x: f32, y: f32) { + self.path.quad_to(kurbo_point(cx0, cy0), kurbo_point(x, y)); + } + + fn curve_to( + &mut self, + cx0: f32, + cy0: f32, + cx1: f32, + cy1: f32, + x: f32, + y: f32, + ) { + self.path.curve_to( + kurbo_point(cx0, cy0), + kurbo_point(cx1, cy1), + kurbo_point(x, y), + ); + } + + fn close(&mut self) { + self.path.close_path(); + } +} + +fn kurbo_point(x: f32, y: f32) -> kurbo::Point { + (x as f64, y as f64).into() +} diff --git a/static-lang-word-lists/Cargo.toml b/static-lang-word-lists/Cargo.toml index 5d267d0..acaaf23 100644 --- a/static-lang-word-lists/Cargo.toml +++ b/static-lang-word-lists/Cargo.toml @@ -21,9 +21,9 @@ include = [ [dependencies] brotli-decompressor = "5" -log = { workspace = true } +log.workspace = true rayon = { workspace = true, optional = true } -thiserror = { workspace = true } +thiserror.workspace = true serde = { version = "1.0", features = ["derive"] } [dependencies.toml]