diff --git a/lib/src/character.rs b/lib/src/character.rs index 4b512f5..9537143 100644 --- a/lib/src/character.rs +++ b/lib/src/character.rs @@ -38,33 +38,37 @@ impl<'a> CharData<'a> { self.category } + pub fn combining_class(&self) -> CombiningClass { + self.combining + } + #[inline] #[must_use] - pub fn bidi(&self) -> BidiCategory { + pub fn bidi_category(&self) -> BidiCategory { self.bidi } #[inline] #[must_use] - pub fn decomp(&self) -> Option> { + pub fn decomp_mapping(&self) -> Option> { self.decomp } #[inline] #[must_use] - pub fn decimal_digit(&self) -> Option { + pub fn decimal_digit_value(&self) -> Option { self.decimal_digit } #[inline] #[must_use] - pub fn digit(&self) -> Option { + pub fn digit_value(&self) -> Option { self.digit } #[inline] #[must_use] - pub fn numeric(&self) -> Option<&'a str> { + pub fn numeric_value(&self) -> Option<&'a str> { self.numeric } @@ -76,7 +80,7 @@ impl<'a> CharData<'a> { #[inline] #[must_use] - pub fn old_name(&self) -> Option<&'a str> { + pub fn unicode_1_name(&self) -> Option<&'a str> { self.old_name } diff --git a/lib/src/lib.rs b/lib/src/lib.rs index ce11caf..2765303 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -1,57 +1,3 @@ pub mod character; pub mod unicode_data; pub mod utf8; - -// pub use utfdump_core::{CharData, Category, CombiningClass}; - -// use once_cell::sync::Lazy; -// use utfdump_core::data_store::DataStore; - -// const UNICODE_DATA_BYTES: &[u8] = include_bytes!( -// concat!(env!("OUT_DIR"), "/unicode_data_encoded") -// ); - -// static UNICODE_DATA: Lazy = Lazy::new(|| { -// DataStore::from_bytes(UNICODE_DATA_BYTES).unwrap() -// }); - -// pub fn char_data(c: char) -> Option> { -// UNICODE_DATA.get(c) -// } - -const UNICODE_DATA_BYTES: &[u8] = include_bytes!( - concat!(env!("OUT_DIR"), "/unicode_data_encoded") -); - -#[cfg(test)] -mod tests { - use crate::{UNICODE_DATA_BYTES, unicode_data}; - - #[test] - fn test_data_decode() { - let data = unicode_data::UnicodeData::from_bytes(UNICODE_DATA_BYTES) - .unwrap(); - - assert_eq!(data.get(0x0).unwrap().name(), ""); - assert_eq!(data.get(0x0).unwrap().old_name(), Some("NULL")); - assert_eq!(data.get(0x1).unwrap().name(), ""); - assert_eq!(data.get(0x1).unwrap().old_name(), Some("START OF HEADING")); - assert_eq!(data.get(0x2).unwrap().name(), ""); - assert_eq!(data.get(0x2).unwrap().old_name(), Some("START OF TEXT")); - - assert_eq!(data.get(0x377).unwrap().name(), "GREEK SMALL LETTER PAMPHYLIAN DIGAMMA"); - assert!(data.get(0x378).is_none()); - assert!(data.get(0x379).is_none()); - assert_eq!(data.get(0x37a).unwrap().name(), "GREEK YPOGEGRAMMENI"); - - assert_eq!(data.get(0x33ff).unwrap().name(), "SQUARE GAL"); - assert_eq!(data.get(0x3400).unwrap().name(), "CJK Ideograph Extension A"); - assert_eq!(data.get(0x3401).unwrap().name(), "CJK Ideograph Extension A"); - assert_eq!(data.get(0x3402).unwrap().name(), "CJK Ideograph Extension A"); - assert_eq!(data.get(0x4dbe).unwrap().name(), "CJK Ideograph Extension A"); - assert_eq!(data.get(0x4dbf).unwrap().name(), "CJK Ideograph Extension A"); - assert_eq!(data.get(0x4dc0).unwrap().name(), "HEXAGRAM FOR THE CREATIVE HEAVEN"); - - assert_eq!(data.get(0x1039f).unwrap().name(), "UGARITIC WORD DIVIDER"); - } -} diff --git a/lib/src/unicode_data.rs b/lib/src/unicode_data.rs index 432f4cd..6d869eb 100644 --- a/lib/src/unicode_data.rs +++ b/lib/src/unicode_data.rs @@ -20,7 +20,15 @@ pub struct UnicodeData<'a> { string_table: StringTable<'a>, } +const UNICODE_DATA_BYTES: &[u8] = include_bytes!( + concat!(env!("OUT_DIR"), "/unicode_data_encoded") +); + impl<'a> UnicodeData<'a> { + pub fn new() -> Result { + Self::from_bytes(UNICODE_DATA_BYTES) + } + pub(crate) fn from_bytes(bs: &'a [u8]) -> Result { let mut bs = ByteStream(bs); @@ -436,3 +444,35 @@ impl fmt::Display for UnicodeDataError { } } } + +#[cfg(test)] +mod tests { + use super::UnicodeData; + + #[test] + fn test_data_decode() { + let data = UnicodeData::new().unwrap(); + + assert_eq!(data.get(0x0).unwrap().name(), ""); + assert_eq!(data.get(0x0).unwrap().unicode_1_name(), Some("NULL")); + assert_eq!(data.get(0x1).unwrap().name(), ""); + assert_eq!(data.get(0x1).unwrap().unicode_1_name(), Some("START OF HEADING")); + assert_eq!(data.get(0x2).unwrap().name(), ""); + assert_eq!(data.get(0x2).unwrap().unicode_1_name(), Some("START OF TEXT")); + + assert_eq!(data.get(0x377).unwrap().name(), "GREEK SMALL LETTER PAMPHYLIAN DIGAMMA"); + assert!(data.get(0x378).is_none()); + assert!(data.get(0x379).is_none()); + assert_eq!(data.get(0x37a).unwrap().name(), "GREEK YPOGEGRAMMENI"); + + assert_eq!(data.get(0x33ff).unwrap().name(), "SQUARE GAL"); + assert_eq!(data.get(0x3400).unwrap().name(), "CJK Ideograph Extension A"); + assert_eq!(data.get(0x3401).unwrap().name(), "CJK Ideograph Extension A"); + assert_eq!(data.get(0x3402).unwrap().name(), "CJK Ideograph Extension A"); + assert_eq!(data.get(0x4dbe).unwrap().name(), "CJK Ideograph Extension A"); + assert_eq!(data.get(0x4dbf).unwrap().name(), "CJK Ideograph Extension A"); + assert_eq!(data.get(0x4dc0).unwrap().name(), "HEXAGRAM FOR THE CREATIVE HEAVEN"); + + assert_eq!(data.get(0x1039f).unwrap().name(), "UGARITIC WORD DIVIDER"); + } +}