diff --git a/.gitignore b/.gitignore index 2b214e2..2d1240f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ /target -/build -/build.sh +pkg/ __pycache__ unicode_data_latest.txt diff --git a/Cargo.lock b/Cargo.lock index 28a2631..40682ca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -31,6 +31,12 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bumpalo" +version = "3.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" + [[package]] name = "bytecount" version = "0.6.3" @@ -70,7 +76,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -152,6 +158,12 @@ dependencies = [ "serde", ] +[[package]] +name = "log" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "518ef76f2f87365916b142844c16d8fefd85039bc5699050210a7778ee1cd1de" + [[package]] name = "miniz_oxide" version = "0.7.1" @@ -193,7 +205,7 @@ dependencies = [ "proc-macro-error-attr", "proc-macro2", "quote", - "syn", + "syn 1.0.109", "version_check", ] @@ -249,6 +261,17 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "syn" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "tabled" version = "0.8.0" @@ -270,7 +293,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -324,12 +347,74 @@ dependencies = [ "utfdump", ] +[[package]] +name = "utfdump_wasm" +version = "0.1.0" +dependencies = [ + "utfdump", + "wasm-bindgen", +] + [[package]] name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "wasm-bindgen" +version = "0.2.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bba0e8cb82ba49ff4e229459ff22a191bbe9a1cb3a341610c9c33efc27ddf73" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b04bc93f9d6bdee709f6bd2118f57dd6679cf1176a1af464fca3ab0d66d8fb" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.18", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14d6b024f1a526bb0234f52840389927257beb670610081360e5a03c5df9c258" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e128beba882dd1eb6200e1dc92ae6c5dbaa4311aa7bb211ca035779e5efc39f8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.18", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed9d5b4305409d1fc9482fee2d7f9bcbf24b3972bf59817ef757e23982242a93" + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index a3a8149..6b4bdfa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,4 +2,12 @@ members = [ "lib", "bin", + "wasm", ] + +[profile.release] +panic = "abort" +lto = "fat" + +[profile.release.package.wasm] +opt-level = "z" diff --git a/wasm/Cargo.toml b/wasm/Cargo.toml new file mode 100644 index 0000000..f49ca5d --- /dev/null +++ b/wasm/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "utfdump_wasm" +version = "0.1.0" +edition = "2021" + +[lib] +crate-type = ["cdylib", "rlib"] + +[dependencies] +utfdump = { path = "../lib" } +wasm-bindgen = "0.2.86" diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs new file mode 100644 index 0000000..246e161 --- /dev/null +++ b/wasm/src/lib.rs @@ -0,0 +1,133 @@ +use std::sync::OnceLock; + +use utfdump::{UnicodeData, CombiningClass, CharData}; +use wasm_bindgen::prelude::wasm_bindgen; + +#[wasm_bindgen] +pub struct WbgCharData(CharData<'static>); + +#[wasm_bindgen] +impl WbgCharData { + #[wasm_bindgen] + pub fn name(&self) -> String { + self.0.name().to_owned() + } + + #[wasm_bindgen] + pub fn encoded_utf8(&self) -> Option { + let c = char::try_from(self.0.codepoint()).ok()?; + let mut buf = [0u8; 4]; + let len = c.encode_utf8(&mut buf).len() as u8; + Some(EncodedCodepoint::new(buf, len)) + } + + #[wasm_bindgen] + pub fn encoded_utf16_le(&self) -> Option { + let (word_buf, num_words) = self.encoded_utf16()?; + let mut byte_buf = [0u8; 4]; + for (i, word) in word_buf.iter().take(usize::from(num_words)).enumerate() { + let le_bytes = word.to_le_bytes(); + byte_buf[(i * 2)..(i * 2 + 2)].copy_from_slice(&le_bytes); + } + Some(EncodedCodepoint::new(byte_buf, num_words * 2)) + } + + fn encoded_utf16(&self) -> Option<([u16; 2], u8)> { + let c = char::try_from(self.0.codepoint()).ok()?; + let mut word_buf = [0u16; 2]; + let num_words = c.encode_utf16(&mut word_buf).len() as u8; + Some((word_buf, num_words)) + } + + #[wasm_bindgen] + pub fn category(&self) -> String { + self.0.category().abbreviation().to_owned() + } + + #[wasm_bindgen] + pub fn category_full(&self) -> String { + self.0.category().full_name().to_owned() + } + + #[wasm_bindgen] + pub fn combining_class(&self) -> u8 { + self.0.combining_class().0 + } + + #[wasm_bindgen] + pub fn bidi(&self) -> String { + self.0.bidi_category().abbreviation().to_owned() + } + + #[wasm_bindgen] + pub fn bidi_full(&self) -> String { + self.0.bidi_category().full_name().to_owned() + } + + #[wasm_bindgen] + pub fn numeric_value(&self) -> Option { + self.0.numeric_value().map(ToOwned::to_owned) + } + + #[wasm_bindgen] + pub fn mirrored(&self) -> bool { + self.0.mirrored() + } + + #[wasm_bindgen] + pub fn decomp_string(&self) -> Option { + self.0.decomp_mapping().map(|d| d.value().to_owned()) + } + + #[wasm_bindgen] + pub fn uppercase_string(&self) -> Option { + self.0.uppercase().map(ToOwned::to_owned) + } + + #[wasm_bindgen] + pub fn lowercase_string(&self) -> Option { + self.0.lowercase().map(ToOwned::to_owned) + } + + #[wasm_bindgen] + pub fn titlecase_string(&self) -> Option { + self.0.titlecase().map(ToOwned::to_owned) + } +} + +#[wasm_bindgen] +pub fn combining_class_name(combining_class: u8) -> Option { + CombiningClass(combining_class) + .name() + .map(ToOwned::to_owned) +} + +static UNICODE_DATA: OnceLock = OnceLock::new(); + +#[wasm_bindgen] +pub fn codepoint_char_data(codepoint: u32) -> Option { + let unicode_data = UNICODE_DATA.get_or_init(|| { + UnicodeData::new() + .unwrap() + }); + + unicode_data + .get(codepoint) + .map(WbgCharData) +} + +#[wasm_bindgen] +pub struct EncodedCodepoint { + // `wasm-bindgen` unfortunately does not support arrays :( + pub b0: u8, + pub b1: u8, + pub b2: u8, + pub b3: u8, + pub len: u8, +} + +impl EncodedCodepoint { + fn new([b0, b1, b2, b3]: [u8; 4], len: u8) -> Self { + Self { b0, b1, b2, b3, len } + } +}