You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
134 lines
3.4 KiB
Rust
134 lines
3.4 KiB
Rust
use std::sync::OnceLock;
|
|
|
|
use utfdump::{UnicodeData, CombiningClass, CharData};
|
|
use wasm_bindgen::prelude::wasm_bindgen;
|
|
|
|
#[wasm_bindgen]
|
|
pub struct WbgCharData(CharData<'static>);
|
|
|
|
#[wasm_bindgen]
|
|
impl WbgCharData {
|
|
#[wasm_bindgen]
|
|
pub fn name(&self) -> String {
|
|
self.0.name().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
pub fn encoded_utf8(&self) -> Option<EncodedCodepoint> {
|
|
let c = char::try_from(self.0.codepoint()).ok()?;
|
|
let mut buf = [0u8; 4];
|
|
let len = c.encode_utf8(&mut buf).len() as u8;
|
|
Some(EncodedCodepoint::new(buf, len))
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
pub fn encoded_utf16_le(&self) -> Option<EncodedCodepoint> {
|
|
let (word_buf, num_words) = self.encoded_utf16()?;
|
|
let mut byte_buf = [0u8; 4];
|
|
for (i, word) in word_buf.iter().take(usize::from(num_words)).enumerate() {
|
|
let le_bytes = word.to_le_bytes();
|
|
byte_buf[(i * 2)..(i * 2 + 2)].copy_from_slice(&le_bytes);
|
|
}
|
|
Some(EncodedCodepoint::new(byte_buf, num_words * 2))
|
|
}
|
|
|
|
fn encoded_utf16(&self) -> Option<([u16; 2], u8)> {
|
|
let c = char::try_from(self.0.codepoint()).ok()?;
|
|
let mut word_buf = [0u16; 2];
|
|
let num_words = c.encode_utf16(&mut word_buf).len() as u8;
|
|
Some((word_buf, num_words))
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
pub fn category(&self) -> String {
|
|
self.0.category().abbreviation().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
pub fn category_full(&self) -> String {
|
|
self.0.category().full_name().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
pub fn combining_class(&self) -> u8 {
|
|
self.0.combining_class().0
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
pub fn bidi(&self) -> String {
|
|
self.0.bidi_category().abbreviation().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
pub fn bidi_full(&self) -> String {
|
|
self.0.bidi_category().full_name().to_owned()
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
pub fn numeric_value(&self) -> Option<String> {
|
|
self.0.numeric_value().map(ToOwned::to_owned)
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
pub fn mirrored(&self) -> bool {
|
|
self.0.mirrored()
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
pub fn decomp_string(&self) -> Option<String> {
|
|
self.0.decomp_mapping().map(|d| d.value().to_owned())
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
pub fn uppercase_string(&self) -> Option<String> {
|
|
self.0.uppercase().map(ToOwned::to_owned)
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
pub fn lowercase_string(&self) -> Option<String> {
|
|
self.0.lowercase().map(ToOwned::to_owned)
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
pub fn titlecase_string(&self) -> Option<String> {
|
|
self.0.titlecase().map(ToOwned::to_owned)
|
|
}
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
pub fn combining_class_name(combining_class: u8) -> Option<String> {
|
|
CombiningClass(combining_class)
|
|
.name()
|
|
.map(ToOwned::to_owned)
|
|
}
|
|
|
|
static UNICODE_DATA: OnceLock<UnicodeData> = OnceLock::new();
|
|
|
|
#[wasm_bindgen]
|
|
pub fn codepoint_char_data(codepoint: u32) -> Option<WbgCharData> {
|
|
let unicode_data = UNICODE_DATA.get_or_init(|| {
|
|
UnicodeData::new()
|
|
.unwrap()
|
|
});
|
|
|
|
unicode_data
|
|
.get(codepoint)
|
|
.map(WbgCharData)
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
pub struct EncodedCodepoint {
|
|
// `wasm-bindgen` unfortunately does not support arrays :(
|
|
pub b0: u8,
|
|
pub b1: u8,
|
|
pub b2: u8,
|
|
pub b3: u8,
|
|
pub len: u8,
|
|
}
|
|
|
|
impl EncodedCodepoint {
|
|
fn new([b0, b1, b2, b3]: [u8; 4], len: u8) -> Self {
|
|
Self { b0, b1, b2, b3, len }
|
|
}
|
|
}
|