update binary to use new lib

main
pantonshire 3 years ago
parent cc47c90f50
commit f9430db2f9

@ -3,7 +3,7 @@ use std::{fmt, io::{self, Read}};
use clap::Parser;
use libshire::strings::CappedString;
use tabled::{Tabled, Table, Style};
use utfdump::{char_data, CombiningClass, Category, utf8::{Utf8Decode, Utf8Error}};
use utfdump::{CombiningClass, Category, utf8::{Utf8Decode, Utf8Error}, StaticUnicodeData};
#[derive(Parser)]
#[clap(author, version, about, long_about = None)]
@ -14,6 +14,8 @@ struct Args {
}
fn main() {
let unicode_data = StaticUnicodeData::new().unwrap();
let args = Args::parse();
let input = {
@ -27,7 +29,7 @@ fn main() {
let rows = input
.decode_utf8()
.map(|c| OutRow::from_char_result(c, args.full_category_names));
.map(|c| OutRow::from_char_result(&unicode_data, c, args.full_category_names));
let table = Table::new(rows)
.with(Style::modern());
@ -48,25 +50,35 @@ struct OutRow {
#[tabled(rename = "Category")]
category: Optional<DisplayCategory>,
#[tabled(rename = "Combining")]
char_combining_class: Optional<CombiningClass>,
char_combining_class: Optional<DisplayCombiningClass>,
}
impl OutRow {
fn from_char_result(c: Result<char, Utf8Error>, full_category_names: bool) -> Self {
fn from_char_result(
unicode_data: &StaticUnicodeData,
c: Result<char, Utf8Error>,
full_category_names: bool
) -> Self
{
match c {
Ok(c) => Self::from_good_char(c, full_category_names),
Ok(c) => Self::from_good_char(unicode_data, c, full_category_names),
Err(err) => Self::from_bad_char(err),
}
}
fn from_good_char(c: char, full_category_names: bool) -> Self {
fn from_good_char(
unicode_data: &StaticUnicodeData,
c: char,
full_category_names: bool
) -> Self
{
let mut name = Optional::None;
let mut category = Optional::None;
let mut char_combining_class = Optional::None;
let mut combining = false;
if let Some(char_data) = char_data(c) {
if let Some(char_data) = unicode_data.get(u32::from(c)) {
name = Optional::Some(char_data.name());
category = Optional::Some(DisplayCategory {
category: char_data.category(),
@ -74,7 +86,7 @@ impl OutRow {
});
let ccc = char_data.combining_class();
char_combining_class = Optional::Some(ccc);
char_combining_class = Optional::Some(DisplayCombiningClass { ccc });
combining = ccc.is_combining();
}
@ -184,8 +196,20 @@ impl fmt::Display for DisplayCategory {
if self.full_name {
write!(f, "{}", self.category.full_name())
} else {
write!(f, "{}", self.category.abbr())
write!(f, "{}", self.category.abbreviation())
}
}
}
struct DisplayCombiningClass {
ccc: CombiningClass,
}
impl fmt::Display for DisplayCombiningClass {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.ccc.name() {
Some(name) => write!(f, "{}", name),
None => write!(f, "{}", self.ccc.0),
}
}
}

@ -112,6 +112,39 @@ impl<'a> CharData<'a> {
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
pub struct CombiningClass(pub u8);
impl CombiningClass {
pub fn name(self) -> Option<&'static str> {
match self.0 {
0 => Some("Not_Reordered"),
1 => Some("Overlay"),
6 => Some("Han_Reading"),
7 => Some("Nukta"),
8 => Some("Kana_Voicing"),
9 => Some("Virama"),
200 => Some("Attached_Below_Left"),
202 => Some("Attached_Below"),
214 => Some("Attached_Above"),
216 => Some("Attached_Above_Right"),
218 => Some("Below_Left"),
220 => Some("Below"),
222 => Some("Below_Right"),
224 => Some("Left"),
226 => Some("Right"),
228 => Some("Above_Left"),
230 => Some("Above"),
232 => Some("Above_Right"),
233 => Some("Double_Below"),
234 => Some("Double_Above"),
240 => Some("Iota_Subscript"),
_ => None,
}
}
pub fn is_combining(self) -> bool {
self.0 != 0
}
}
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub enum Category {
Lu,
@ -182,6 +215,76 @@ impl Category {
_ => None,
}
}
pub fn abbreviation(self) -> &'static str {
match self {
Self::Lu => "Lu",
Self::Ll => "Ll",
Self::Lt => "Lt",
Self::Mn => "Mn",
Self::Mc => "Mc",
Self::Me => "Me",
Self::Nd => "Nd",
Self::Nl => "Nl",
Self::No => "No",
Self::Zs => "Zs",
Self::Zl => "Zl",
Self::Zp => "Zp",
Self::Cc => "Cc",
Self::Cf => "Cf",
Self::Cs => "Cs",
Self::Co => "Co",
Self::Cn => "Cn",
Self::Lm => "Lm",
Self::Lo => "Lo",
Self::Pc => "Pc",
Self::Pd => "Pd",
Self::Ps => "Ps",
Self::Pe => "Pe",
Self::Pi => "Pi",
Self::Pf => "Pf",
Self::Po => "Po",
Self::Sm => "Sm",
Self::Sc => "Sc",
Self::Sk => "Sk",
Self::So => "So",
}
}
pub fn full_name(self) -> &'static str {
match self {
Self::Lu => "Letter, Uppercase",
Self::Ll => "Letter, Lowercase",
Self::Lt => "Letter, Titlecase",
Self::Mn => "Mark, Non-Spacing",
Self::Mc => "Mark, Spacing Combining",
Self::Me => "Mark, Enclosing",
Self::Nd => "Number, Decimal Digit",
Self::Nl => "Number, Letter",
Self::No => "Number, Other",
Self::Zs => "Separator, Space",
Self::Zl => "Separator, Line",
Self::Zp => "Separator: Paragraph",
Self::Cc => "Other, Control",
Self::Cf => "Other, Format",
Self::Cs => "Other, Surrogate",
Self::Co => "Other, Private Use",
Self::Cn => "Other, Not Assigned",
Self::Lm => "Letter, Modifier",
Self::Lo => "Letter, Other",
Self::Pc => "Punctuation, Connector",
Self::Pd => "Punctuation, Dash",
Self::Ps => "Punctuation, Open",
Self::Pe => "Punctuation, Close",
Self::Pi => "Punctuation, Initial Quote",
Self::Pf => "Punctuation, Final Quote",
Self::Po => "Punctuation, Other",
Self::Sm => "Symbol, Math",
Self::Sc => "Symbol, Currency",
Self::Sk => "Symbol, Modifier",
Self::So => "Symbol, Other",
}
}
}
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
@ -240,6 +343,62 @@ impl BidiCategory {
_ => None,
}
}
pub fn abbreviation(self) -> &'static str {
match self {
Self::L => "L",
Self::R => "R",
Self::Al => "AL",
Self::En => "EN",
Self::Es => "ES",
Self::Et => "ET",
Self::An => "AN",
Self::Cs => "CS",
Self::Nsm => "NSM",
Self::Bn => "BN",
Self::B => "B",
Self::S => "S",
Self::Ws => "WS",
Self::On => "ON",
Self::Lre => "LRE",
Self::Lro => "LRO",
Self::Rle => "RLE",
Self::Rlo => "RLO",
Self::Pdf => "PDF",
Self::Lri => "LRI",
Self::Rli => "RLI",
Self::Fsi => "FSI",
Self::Pdi => "PDI",
}
}
pub fn full_name(self) -> &'static str {
match self {
Self::L => "Left_To_Right",
Self::R => "Right_To_Left",
Self::Al => "Arabic_Letter",
Self::En => "European_Number",
Self::Es => "European_Separator",
Self::Et => "European_Terminator",
Self::An => "Arabic_Number",
Self::Cs => "Common_Separator",
Self::Nsm => "Nonspacing_Mark",
Self::Bn => "Boundary_Neutral",
Self::B => "Paragraph_Separator",
Self::S => "Segment_Separator",
Self::Ws => "White_Space",
Self::On => "Other_Neutral",
Self::Lre => "Left_To_Right_Embedding",
Self::Lro => "Left_To_Right_Override",
Self::Rle => "Right_To_Left_Embedding",
Self::Rlo => "Right_To_Left_Override",
Self::Pdf => "Pop_Directional_Format",
Self::Lri => "Left_To_Right_Isolate",
Self::Rli => "Right_To_Left_Isolate",
Self::Fsi => "First_Strong_Isolate",
Self::Pdi => "Pop_Directional_Isolate",
}
}
}
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
@ -286,6 +445,29 @@ pub enum DecompKind {
Narrow,
}
impl DecompKind {
pub fn name(self) -> &'static str {
match self {
Self::Nobreak => "noBreak",
Self::Compat => "compat",
Self::Super => "super",
Self::Fraction => "fraction",
Self::Sub => "sub",
Self::Font => "font",
Self::Circle => "circle",
Self::Wide => "wide",
Self::Vertical => "vertical",
Self::Square => "square",
Self::Isolated => "isolated",
Self::Final => "final",
Self::Initial => "initial",
Self::Medial => "medial",
Self::Small => "small",
Self::Narrow => "narrow",
}
}
}
pub(crate) enum OptionalDecompKind {
None,
Anon,

@ -1,3 +1,14 @@
pub mod character;
pub mod unicode_data;
pub mod utf8;
pub use character::{
BidiCategory,
Category,
CharData,
CombiningClass,
DecompKind,
DecompMapping,
};
pub use unicode_data::{StaticUnicodeData, UnicodeData};

@ -13,6 +13,8 @@ use crate::character::{
const MAGIC_NUMBER: [u8; 8] = *b"UTFDUMP!";
pub type StaticUnicodeData = UnicodeData<'static>;
#[derive(Clone, Copy)]
pub struct UnicodeData<'a> {
group_table: GroupTable<'a>,
@ -24,11 +26,13 @@ const UNICODE_DATA_BYTES: &[u8] = include_bytes!(
concat!(env!("OUT_DIR"), "/unicode_data_encoded")
);
impl<'a> UnicodeData<'a> {
impl UnicodeData<'static> {
pub fn new() -> Result<Self, UnicodeDataError> {
Self::from_bytes(UNICODE_DATA_BYTES)
}
}
impl<'a> UnicodeData<'a> {
pub(crate) fn from_bytes(bs: &'a [u8]) -> Result<Self, UnicodeDataError> {
let mut bs = ByteStream(bs);
@ -225,7 +229,6 @@ impl<'a> GroupTable<'a> {
}
}
const GROUP_KIND_NO_VALUE: u8 = 0;
const GROUP_KIND_USE_PREV_VALUE: u8 = 1;
#[derive(Debug)]

Loading…
Cancel
Save