refactoring

3 years ago · 25dae48064
parent a3750b5732
commit 25dae48064
3 changed files with 50 additions and 60 deletions
--- a/lib/src/character.rs
+++ b/lib/src/character.rs
@ -38,33 +38,37 @@ impl<'a> CharData<'a> {
        self.category
    }

+    pub fn combining_class(&self) -> CombiningClass {
+        self.combining
+    }
+
    #[inline]
    #[must_use]
-    pub fn bidi(&self) -> BidiCategory {
+    pub fn bidi_category(&self) -> BidiCategory {
        self.bidi
    }

    #[inline]
    #[must_use]
-    pub fn decomp(&self) -> Option<DecompMapping<'a>> {
+    pub fn decomp_mapping(&self) -> Option<DecompMapping<'a>> {
        self.decomp
    }

    #[inline]
    #[must_use]
-    pub fn decimal_digit(&self) -> Option<u8> {
+    pub fn decimal_digit_value(&self) -> Option<u8> {
        self.decimal_digit
    }

    #[inline]
    #[must_use]
-    pub fn digit(&self) -> Option<u8> {
+    pub fn digit_value(&self) -> Option<u8> {
        self.digit
    }

    #[inline]
    #[must_use]
-    pub fn numeric(&self) -> Option<&'a str> {
+    pub fn numeric_value(&self) -> Option<&'a str> {
        self.numeric
    }

@ -76,7 +80,7 @@ impl<'a> CharData<'a> {

    #[inline]
    #[must_use]
-    pub fn old_name(&self) -> Option<&'a str> {
+    pub fn unicode_1_name(&self) -> Option<&'a str> {
        self.old_name
    }

--- a/lib/src/lib.rs
+++ b/lib/src/lib.rs
@ -1,57 +1,3 @@
 pub mod character;
 pub mod unicode_data;
 pub mod utf8;
-
-// pub use utfdump_core::{CharData, Category, CombiningClass};
-
-// use once_cell::sync::Lazy;
-// use utfdump_core::data_store::DataStore;
-
-// const UNICODE_DATA_BYTES: &[u8] = include_bytes!(
-//     concat!(env!("OUT_DIR"), "/unicode_data_encoded")
-// );
-
-// static UNICODE_DATA: Lazy<DataStore> = Lazy::new(|| {
-//     DataStore::from_bytes(UNICODE_DATA_BYTES).unwrap()
-// });
-
-// pub fn char_data(c: char) -> Option<CharData<'static>> {
-//     UNICODE_DATA.get(c)
-// }
-
-const UNICODE_DATA_BYTES: &[u8] = include_bytes!(
-    concat!(env!("OUT_DIR"), "/unicode_data_encoded")
-);
-
-#[cfg(test)]
-mod tests {
-    use crate::{UNICODE_DATA_BYTES, unicode_data};
-
-    #[test]
-    fn test_data_decode() {
-        let data = unicode_data::UnicodeData::from_bytes(UNICODE_DATA_BYTES)
-            .unwrap();
-
-        assert_eq!(data.get(0x0).unwrap().name(), "<control>");
-        assert_eq!(data.get(0x0).unwrap().old_name(), Some("NULL"));
-        assert_eq!(data.get(0x1).unwrap().name(), "<control>");
-        assert_eq!(data.get(0x1).unwrap().old_name(), Some("START OF HEADING"));
-        assert_eq!(data.get(0x2).unwrap().name(), "<control>");
-        assert_eq!(data.get(0x2).unwrap().old_name(), Some("START OF TEXT"));
-
-        assert_eq!(data.get(0x377).unwrap().name(), "GREEK SMALL LETTER PAMPHYLIAN DIGAMMA");
-        assert!(data.get(0x378).is_none());
-        assert!(data.get(0x379).is_none());
-        assert_eq!(data.get(0x37a).unwrap().name(), "GREEK YPOGEGRAMMENI");
-
-        assert_eq!(data.get(0x33ff).unwrap().name(), "SQUARE GAL");
-        assert_eq!(data.get(0x3400).unwrap().name(), "CJK Ideograph Extension A");
-        assert_eq!(data.get(0x3401).unwrap().name(), "CJK Ideograph Extension A");
-        assert_eq!(data.get(0x3402).unwrap().name(), "CJK Ideograph Extension A");
-        assert_eq!(data.get(0x4dbe).unwrap().name(), "CJK Ideograph Extension A");
-        assert_eq!(data.get(0x4dbf).unwrap().name(), "CJK Ideograph Extension A");
-        assert_eq!(data.get(0x4dc0).unwrap().name(), "HEXAGRAM FOR THE CREATIVE HEAVEN");
-
-        assert_eq!(data.get(0x1039f).unwrap().name(), "UGARITIC WORD DIVIDER");
-    }
-}
--- a/lib/src/unicode_data.rs
+++ b/lib/src/unicode_data.rs
@ -20,7 +20,15 @@ pub struct UnicodeData<'a> {
    string_table: StringTable<'a>,
 }

+const UNICODE_DATA_BYTES: &[u8] = include_bytes!(
+    concat!(env!("OUT_DIR"), "/unicode_data_encoded")
+);
+
 impl<'a> UnicodeData<'a> {
+    pub fn new() -> Result<Self, UnicodeDataError> {
+        Self::from_bytes(UNICODE_DATA_BYTES)
+    }
+
    pub(crate) fn from_bytes(bs: &'a [u8]) -> Result<Self, UnicodeDataError> {
        let mut bs = ByteStream(bs);

@ -436,3 +444,35 @@ impl fmt::Display for UnicodeDataError {
        }
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::UnicodeData;
+
+    #[test]
+    fn test_data_decode() {
+        let data = UnicodeData::new().unwrap();
+
+        assert_eq!(data.get(0x0).unwrap().name(), "<control>");
+        assert_eq!(data.get(0x0).unwrap().unicode_1_name(), Some("NULL"));
+        assert_eq!(data.get(0x1).unwrap().name(), "<control>");
+        assert_eq!(data.get(0x1).unwrap().unicode_1_name(), Some("START OF HEADING"));
+        assert_eq!(data.get(0x2).unwrap().name(), "<control>");
+        assert_eq!(data.get(0x2).unwrap().unicode_1_name(), Some("START OF TEXT"));
+
+        assert_eq!(data.get(0x377).unwrap().name(), "GREEK SMALL LETTER PAMPHYLIAN DIGAMMA");
+        assert!(data.get(0x378).is_none());
+        assert!(data.get(0x379).is_none());
+        assert_eq!(data.get(0x37a).unwrap().name(), "GREEK YPOGEGRAMMENI");
+
+        assert_eq!(data.get(0x33ff).unwrap().name(), "SQUARE GAL");
+        assert_eq!(data.get(0x3400).unwrap().name(), "CJK Ideograph Extension A");
+        assert_eq!(data.get(0x3401).unwrap().name(), "CJK Ideograph Extension A");
+        assert_eq!(data.get(0x3402).unwrap().name(), "CJK Ideograph Extension A");
+        assert_eq!(data.get(0x4dbe).unwrap().name(), "CJK Ideograph Extension A");
+        assert_eq!(data.get(0x4dbf).unwrap().name(), "CJK Ideograph Extension A");
+        assert_eq!(data.get(0x4dc0).unwrap().name(), "HEXAGRAM FOR THE CREATIVE HEAVEN");
+
+        assert_eq!(data.get(0x1039f).unwrap().name(), "UGARITIC WORD DIVIDER");
+    }
+}