make Combining column human-readable

This patch introduces a `CombiningClass` type to represent the canonical
combining class of a codepoint. The `fmt::Display` implementation of
this type writes the human-readable name of the combining class, if
there is one. This replaces the previous behaviour, which was to just
print the raw byte value of the combining class.
main
pantonshire 3 years ago
parent f1355b5fe3
commit 26ee43af2e

@ -4,7 +4,7 @@ use clap::Parser;
use libshire::strings::CappedString;
use tabled::{Tabled, Table, Style};
use utfdump_core::{chardata::Category, encoded::Data};
use utfdump_core::{chardata::{Category, CombiningClass}, encoded::Data};
const UNICODE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/unicode_data_encoded"));
@ -50,7 +50,7 @@ fn main() {
let ccc = char_data.ccc();
char_combining_class = Optional::Some(ccc);
combining = ccc != 0;
combining = ccc.is_combining();
}
let display_char = {
@ -91,7 +91,7 @@ struct OutRow {
#[tabled(rename = "Category")]
category: Optional<DisplayCategory>,
#[tabled(rename = "Combining")]
char_combining_class: Optional<u8>,
char_combining_class: Optional<CombiningClass>,
}
#[derive(Debug)]

@ -4,7 +4,7 @@ use std::fmt;
pub struct CharData<'a> {
name: &'a str,
category: Category,
ccc: u8,
ccc: CombiningClass,
}
impl<'a> CharData<'a> {
@ -17,12 +17,12 @@ impl<'a> CharData<'a> {
let codepoint = u32::from_str_radix(fields[0], 16).ok()?;
let name = fields[1];
let category = Category::from_abbr(fields[2])?;
let ccc = u8::from_str_radix(fields[3], 10).ok()?;
let ccc = CombiningClass(u8::from_str_radix(fields[3], 10).ok()?);
Some((codepoint, Self::from_parts(name, category, ccc)))
}
pub fn from_parts(name: &'a str, category: Category, ccc: u8) -> Self {
pub fn from_parts(name: &'a str, category: Category, ccc: CombiningClass) -> Self {
Self { name, category, ccc }
}
@ -41,7 +41,7 @@ impl<'a> CharData<'a> {
self.category
}
pub fn ccc(&self) -> u8 {
pub fn ccc(&self) -> CombiningClass {
self.ccc
}
}
@ -235,3 +235,48 @@ impl fmt::Display for Category {
}
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub struct CombiningClass(pub u8);
impl CombiningClass {
pub fn name(self) -> Option<&'static str> {
match self.0 {
0 => Some("Not_Reordered"),
1 => Some("Overlay"),
6 => Some("Han_Reading"),
7 => Some("Nukta"),
8 => Some("Kana_Voicing"),
9 => Some("Virama"),
200 => Some("Attached_Below_Left"),
202 => Some("Attached_Below"),
214 => Some("Attached_Above"),
216 => Some("Attached_Above_Right"),
218 => Some("Below_Left"),
220 => Some("Below"),
222 => Some("Below_Right"),
224 => Some("Left"),
226 => Some("Right"),
228 => Some("Above_Left"),
230 => Some("Above"),
232 => Some("Above_Right"),
233 => Some("Double_Below"),
234 => Some("Double_Above"),
240 => Some("Iota_Subscript"),
_ => None,
}
}
pub fn is_combining(self) -> bool {
self.0 != 0
}
}
impl fmt::Display for CombiningClass {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.name() {
Some(name) => write!(f, "{}", name),
None => write!(f, "Ccc{}", self.0),
}
}
}

@ -1,13 +1,13 @@
use std::{collections::{HashMap, hash_map}, error, fmt, str, ops::Range};
use crate::chardata::{CharData, Category};
use crate::chardata::{CharData, Category, CombiningClass};
const DATA_ENTRY_SIZE: usize = 8;
const DATA_INIT_FLAG: u8 = 1;
const DATA_REPEATED_FLAG: u8 = 2;
fn encode_char_data(name_index: u32, category: Category, ccc: u8, repeated: bool) -> [u8; DATA_ENTRY_SIZE] {
fn encode_char_data(name_index: u32, category: Category, ccc: CombiningClass, repeated: bool) -> [u8; DATA_ENTRY_SIZE] {
let mut buf = [0u8; DATA_ENTRY_SIZE];
buf[0] |= DATA_INIT_FLAG;
@ -18,12 +18,12 @@ fn encode_char_data(name_index: u32, category: Category, ccc: u8, repeated: bool
buf[1..5].copy_from_slice(&name_index.to_le_bytes());
buf[5] = category.byte_repr();
buf[6] = ccc;
buf[6] = ccc.0;
buf
}
fn decode_char_data(bytes: [u8; DATA_ENTRY_SIZE]) -> Option<(u32, Category, u8, bool)> {
fn decode_char_data(bytes: [u8; DATA_ENTRY_SIZE]) -> Option<(u32, Category, CombiningClass, bool)> {
let flags = bytes[0];
if flags & DATA_INIT_FLAG == 0 {
@ -32,7 +32,7 @@ fn decode_char_data(bytes: [u8; DATA_ENTRY_SIZE]) -> Option<(u32, Category, u8,
let name_index = u32::from_le_bytes(bytes[1..5].try_into().unwrap());
let category = Category::from_byte(bytes[5])?;
let ccc = bytes[6];
let ccc = CombiningClass(bytes[6]);
let repeated = flags & DATA_REPEATED_FLAG != 0;
Some((name_index, category, ccc, repeated))

Loading…
Cancel
Save