wasm library

main
pantonshire 3 years ago
parent 9d6eae0bd0
commit 0d1872b902

3
.gitignore vendored

@ -1,5 +1,4 @@
/target
/build
/build.sh
pkg/
__pycache__
unicode_data_latest.txt

91
Cargo.lock generated

@ -31,6 +31,12 @@ version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bumpalo"
version = "3.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
[[package]]
name = "bytecount"
version = "0.6.3"
@ -70,7 +76,7 @@ dependencies = [
"proc-macro-error",
"proc-macro2",
"quote",
"syn",
"syn 1.0.109",
]
[[package]]
@ -152,6 +158,12 @@ dependencies = [
"serde",
]
[[package]]
name = "log"
version = "0.4.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "518ef76f2f87365916b142844c16d8fefd85039bc5699050210a7778ee1cd1de"
[[package]]
name = "miniz_oxide"
version = "0.7.1"
@ -193,7 +205,7 @@ dependencies = [
"proc-macro-error-attr",
"proc-macro2",
"quote",
"syn",
"syn 1.0.109",
"version_check",
]
@ -249,6 +261,17 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "syn"
version = "2.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "tabled"
version = "0.8.0"
@ -270,7 +293,7 @@ dependencies = [
"proc-macro-error",
"proc-macro2",
"quote",
"syn",
"syn 1.0.109",
]
[[package]]
@ -324,12 +347,74 @@ dependencies = [
"utfdump",
]
[[package]]
name = "utfdump_wasm"
version = "0.1.0"
dependencies = [
"utfdump",
"wasm-bindgen",
]
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wasm-bindgen"
version = "0.2.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5bba0e8cb82ba49ff4e229459ff22a191bbe9a1cb3a341610c9c33efc27ddf73"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19b04bc93f9d6bdee709f6bd2118f57dd6679cf1176a1af464fca3ab0d66d8fb"
dependencies = [
"bumpalo",
"log",
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.18",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14d6b024f1a526bb0234f52840389927257beb670610081360e5a03c5df9c258"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e128beba882dd1eb6200e1dc92ae6c5dbaa4311aa7bb211ca035779e5efc39f8"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.18",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed9d5b4305409d1fc9482fee2d7f9bcbf24b3972bf59817ef757e23982242a93"
[[package]]
name = "winapi"
version = "0.3.9"

@ -2,4 +2,12 @@
members = [
"lib",
"bin",
"wasm",
]
[profile.release]
panic = "abort"
lto = "fat"
[profile.release.package.wasm]
opt-level = "z"

@ -0,0 +1,11 @@
[package]
name = "utfdump_wasm"
version = "0.1.0"
edition = "2021"
[lib]
crate-type = ["cdylib", "rlib"]
[dependencies]
utfdump = { path = "../lib" }
wasm-bindgen = "0.2.86"

@ -0,0 +1,133 @@
use std::sync::OnceLock;
use utfdump::{UnicodeData, CombiningClass, CharData};
use wasm_bindgen::prelude::wasm_bindgen;
#[wasm_bindgen]
pub struct WbgCharData(CharData<'static>);
#[wasm_bindgen]
impl WbgCharData {
#[wasm_bindgen]
pub fn name(&self) -> String {
self.0.name().to_owned()
}
#[wasm_bindgen]
pub fn encoded_utf8(&self) -> Option<EncodedCodepoint> {
let c = char::try_from(self.0.codepoint()).ok()?;
let mut buf = [0u8; 4];
let len = c.encode_utf8(&mut buf).len() as u8;
Some(EncodedCodepoint::new(buf, len))
}
#[wasm_bindgen]
pub fn encoded_utf16_le(&self) -> Option<EncodedCodepoint> {
let (word_buf, num_words) = self.encoded_utf16()?;
let mut byte_buf = [0u8; 4];
for (i, word) in word_buf.iter().take(usize::from(num_words)).enumerate() {
let le_bytes = word.to_le_bytes();
byte_buf[(i * 2)..(i * 2 + 2)].copy_from_slice(&le_bytes);
}
Some(EncodedCodepoint::new(byte_buf, num_words * 2))
}
fn encoded_utf16(&self) -> Option<([u16; 2], u8)> {
let c = char::try_from(self.0.codepoint()).ok()?;
let mut word_buf = [0u16; 2];
let num_words = c.encode_utf16(&mut word_buf).len() as u8;
Some((word_buf, num_words))
}
#[wasm_bindgen]
pub fn category(&self) -> String {
self.0.category().abbreviation().to_owned()
}
#[wasm_bindgen]
pub fn category_full(&self) -> String {
self.0.category().full_name().to_owned()
}
#[wasm_bindgen]
pub fn combining_class(&self) -> u8 {
self.0.combining_class().0
}
#[wasm_bindgen]
pub fn bidi(&self) -> String {
self.0.bidi_category().abbreviation().to_owned()
}
#[wasm_bindgen]
pub fn bidi_full(&self) -> String {
self.0.bidi_category().full_name().to_owned()
}
#[wasm_bindgen]
pub fn numeric_value(&self) -> Option<String> {
self.0.numeric_value().map(ToOwned::to_owned)
}
#[wasm_bindgen]
pub fn mirrored(&self) -> bool {
self.0.mirrored()
}
#[wasm_bindgen]
pub fn decomp_string(&self) -> Option<String> {
self.0.decomp_mapping().map(|d| d.value().to_owned())
}
#[wasm_bindgen]
pub fn uppercase_string(&self) -> Option<String> {
self.0.uppercase().map(ToOwned::to_owned)
}
#[wasm_bindgen]
pub fn lowercase_string(&self) -> Option<String> {
self.0.lowercase().map(ToOwned::to_owned)
}
#[wasm_bindgen]
pub fn titlecase_string(&self) -> Option<String> {
self.0.titlecase().map(ToOwned::to_owned)
}
}
#[wasm_bindgen]
pub fn combining_class_name(combining_class: u8) -> Option<String> {
CombiningClass(combining_class)
.name()
.map(ToOwned::to_owned)
}
static UNICODE_DATA: OnceLock<UnicodeData> = OnceLock::new();
#[wasm_bindgen]
pub fn codepoint_char_data(codepoint: u32) -> Option<WbgCharData> {
let unicode_data = UNICODE_DATA.get_or_init(|| {
UnicodeData::new()
.unwrap()
});
unicode_data
.get(codepoint)
.map(WbgCharData)
}
#[wasm_bindgen]
pub struct EncodedCodepoint {
// `wasm-bindgen` unfortunately does not support arrays :(
pub b0: u8,
pub b1: u8,
pub b2: u8,
pub b3: u8,
pub len: u8,
}
impl EncodedCodepoint {
fn new([b0, b1, b2, b3]: [u8; 4], len: u8) -> Self {
Self { b0, b1, b2, b3, len }
}
}
Loading…
Cancel
Save