refactoring

main
pantonshire 3 years ago
parent a3750b5732
commit 25dae48064

@ -38,33 +38,37 @@ impl<'a> CharData<'a> {
self.category
}
pub fn combining_class(&self) -> CombiningClass {
self.combining
}
#[inline]
#[must_use]
pub fn bidi(&self) -> BidiCategory {
pub fn bidi_category(&self) -> BidiCategory {
self.bidi
}
#[inline]
#[must_use]
pub fn decomp(&self) -> Option<DecompMapping<'a>> {
pub fn decomp_mapping(&self) -> Option<DecompMapping<'a>> {
self.decomp
}
#[inline]
#[must_use]
pub fn decimal_digit(&self) -> Option<u8> {
pub fn decimal_digit_value(&self) -> Option<u8> {
self.decimal_digit
}
#[inline]
#[must_use]
pub fn digit(&self) -> Option<u8> {
pub fn digit_value(&self) -> Option<u8> {
self.digit
}
#[inline]
#[must_use]
pub fn numeric(&self) -> Option<&'a str> {
pub fn numeric_value(&self) -> Option<&'a str> {
self.numeric
}
@ -76,7 +80,7 @@ impl<'a> CharData<'a> {
#[inline]
#[must_use]
pub fn old_name(&self) -> Option<&'a str> {
pub fn unicode_1_name(&self) -> Option<&'a str> {
self.old_name
}

@ -1,57 +1,3 @@
pub mod character;
pub mod unicode_data;
pub mod utf8;
// pub use utfdump_core::{CharData, Category, CombiningClass};
// use once_cell::sync::Lazy;
// use utfdump_core::data_store::DataStore;
// const UNICODE_DATA_BYTES: &[u8] = include_bytes!(
// concat!(env!("OUT_DIR"), "/unicode_data_encoded")
// );
// static UNICODE_DATA: Lazy<DataStore> = Lazy::new(|| {
// DataStore::from_bytes(UNICODE_DATA_BYTES).unwrap()
// });
// pub fn char_data(c: char) -> Option<CharData<'static>> {
// UNICODE_DATA.get(c)
// }
const UNICODE_DATA_BYTES: &[u8] = include_bytes!(
concat!(env!("OUT_DIR"), "/unicode_data_encoded")
);
#[cfg(test)]
mod tests {
use crate::{UNICODE_DATA_BYTES, unicode_data};
#[test]
fn test_data_decode() {
let data = unicode_data::UnicodeData::from_bytes(UNICODE_DATA_BYTES)
.unwrap();
assert_eq!(data.get(0x0).unwrap().name(), "<control>");
assert_eq!(data.get(0x0).unwrap().old_name(), Some("NULL"));
assert_eq!(data.get(0x1).unwrap().name(), "<control>");
assert_eq!(data.get(0x1).unwrap().old_name(), Some("START OF HEADING"));
assert_eq!(data.get(0x2).unwrap().name(), "<control>");
assert_eq!(data.get(0x2).unwrap().old_name(), Some("START OF TEXT"));
assert_eq!(data.get(0x377).unwrap().name(), "GREEK SMALL LETTER PAMPHYLIAN DIGAMMA");
assert!(data.get(0x378).is_none());
assert!(data.get(0x379).is_none());
assert_eq!(data.get(0x37a).unwrap().name(), "GREEK YPOGEGRAMMENI");
assert_eq!(data.get(0x33ff).unwrap().name(), "SQUARE GAL");
assert_eq!(data.get(0x3400).unwrap().name(), "CJK Ideograph Extension A");
assert_eq!(data.get(0x3401).unwrap().name(), "CJK Ideograph Extension A");
assert_eq!(data.get(0x3402).unwrap().name(), "CJK Ideograph Extension A");
assert_eq!(data.get(0x4dbe).unwrap().name(), "CJK Ideograph Extension A");
assert_eq!(data.get(0x4dbf).unwrap().name(), "CJK Ideograph Extension A");
assert_eq!(data.get(0x4dc0).unwrap().name(), "HEXAGRAM FOR THE CREATIVE HEAVEN");
assert_eq!(data.get(0x1039f).unwrap().name(), "UGARITIC WORD DIVIDER");
}
}

@ -20,7 +20,15 @@ pub struct UnicodeData<'a> {
string_table: StringTable<'a>,
}
const UNICODE_DATA_BYTES: &[u8] = include_bytes!(
concat!(env!("OUT_DIR"), "/unicode_data_encoded")
);
impl<'a> UnicodeData<'a> {
pub fn new() -> Result<Self, UnicodeDataError> {
Self::from_bytes(UNICODE_DATA_BYTES)
}
pub(crate) fn from_bytes(bs: &'a [u8]) -> Result<Self, UnicodeDataError> {
let mut bs = ByteStream(bs);
@ -436,3 +444,35 @@ impl fmt::Display for UnicodeDataError {
}
}
}
#[cfg(test)]
mod tests {
use super::UnicodeData;
#[test]
fn test_data_decode() {
let data = UnicodeData::new().unwrap();
assert_eq!(data.get(0x0).unwrap().name(), "<control>");
assert_eq!(data.get(0x0).unwrap().unicode_1_name(), Some("NULL"));
assert_eq!(data.get(0x1).unwrap().name(), "<control>");
assert_eq!(data.get(0x1).unwrap().unicode_1_name(), Some("START OF HEADING"));
assert_eq!(data.get(0x2).unwrap().name(), "<control>");
assert_eq!(data.get(0x2).unwrap().unicode_1_name(), Some("START OF TEXT"));
assert_eq!(data.get(0x377).unwrap().name(), "GREEK SMALL LETTER PAMPHYLIAN DIGAMMA");
assert!(data.get(0x378).is_none());
assert!(data.get(0x379).is_none());
assert_eq!(data.get(0x37a).unwrap().name(), "GREEK YPOGEGRAMMENI");
assert_eq!(data.get(0x33ff).unwrap().name(), "SQUARE GAL");
assert_eq!(data.get(0x3400).unwrap().name(), "CJK Ideograph Extension A");
assert_eq!(data.get(0x3401).unwrap().name(), "CJK Ideograph Extension A");
assert_eq!(data.get(0x3402).unwrap().name(), "CJK Ideograph Extension A");
assert_eq!(data.get(0x4dbe).unwrap().name(), "CJK Ideograph Extension A");
assert_eq!(data.get(0x4dbf).unwrap().name(), "CJK Ideograph Extension A");
assert_eq!(data.get(0x4dc0).unwrap().name(), "HEXAGRAM FOR THE CREATIVE HEAVEN");
assert_eq!(data.get(0x1039f).unwrap().name(), "UGARITIC WORD DIVIDER");
}
}

Loading…
Cancel
Save