You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

134 lines
3.4 KiB
Rust

use std::sync::OnceLock;
use utfdump::{UnicodeData, CombiningClass, CharData};
use wasm_bindgen::prelude::wasm_bindgen;
#[wasm_bindgen]
pub struct WbgCharData(CharData<'static>);
#[wasm_bindgen]
impl WbgCharData {
#[wasm_bindgen]
pub fn name(&self) -> String {
self.0.name().to_owned()
}
#[wasm_bindgen]
pub fn encoded_utf8(&self) -> Option<EncodedCodepoint> {
let c = char::try_from(self.0.codepoint()).ok()?;
let mut buf = [0u8; 4];
let len = c.encode_utf8(&mut buf).len() as u8;
Some(EncodedCodepoint::new(buf, len))
}
#[wasm_bindgen]
pub fn encoded_utf16_le(&self) -> Option<EncodedCodepoint> {
let (word_buf, num_words) = self.encoded_utf16()?;
let mut byte_buf = [0u8; 4];
for (i, word) in word_buf.iter().take(usize::from(num_words)).enumerate() {
let le_bytes = word.to_le_bytes();
byte_buf[(i * 2)..(i * 2 + 2)].copy_from_slice(&le_bytes);
}
Some(EncodedCodepoint::new(byte_buf, num_words * 2))
}
fn encoded_utf16(&self) -> Option<([u16; 2], u8)> {
let c = char::try_from(self.0.codepoint()).ok()?;
let mut word_buf = [0u16; 2];
let num_words = c.encode_utf16(&mut word_buf).len() as u8;
Some((word_buf, num_words))
}
#[wasm_bindgen]
pub fn category(&self) -> String {
self.0.category().abbreviation().to_owned()
}
#[wasm_bindgen]
pub fn category_full(&self) -> String {
self.0.category().full_name().to_owned()
}
#[wasm_bindgen]
pub fn combining_class(&self) -> u8 {
self.0.combining_class().0
}
#[wasm_bindgen]
pub fn bidi(&self) -> String {
self.0.bidi_category().abbreviation().to_owned()
}
#[wasm_bindgen]
pub fn bidi_full(&self) -> String {
self.0.bidi_category().full_name().to_owned()
}
#[wasm_bindgen]
pub fn numeric_value(&self) -> Option<String> {
self.0.numeric_value().map(ToOwned::to_owned)
}
#[wasm_bindgen]
pub fn mirrored(&self) -> bool {
self.0.mirrored()
}
#[wasm_bindgen]
pub fn decomp_string(&self) -> Option<String> {
self.0.decomp_mapping().map(|d| d.value().to_owned())
}
#[wasm_bindgen]
pub fn uppercase_string(&self) -> Option<String> {
self.0.uppercase().map(ToOwned::to_owned)
}
#[wasm_bindgen]
pub fn lowercase_string(&self) -> Option<String> {
self.0.lowercase().map(ToOwned::to_owned)
}
#[wasm_bindgen]
pub fn titlecase_string(&self) -> Option<String> {
self.0.titlecase().map(ToOwned::to_owned)
}
}
#[wasm_bindgen]
pub fn combining_class_name(combining_class: u8) -> Option<String> {
CombiningClass(combining_class)
.name()
.map(ToOwned::to_owned)
}
static UNICODE_DATA: OnceLock<UnicodeData> = OnceLock::new();
#[wasm_bindgen]
pub fn codepoint_char_data(codepoint: u32) -> Option<WbgCharData> {
let unicode_data = UNICODE_DATA.get_or_init(|| {
UnicodeData::new()
.unwrap()
});
unicode_data
.get(codepoint)
.map(WbgCharData)
}
#[wasm_bindgen]
pub struct EncodedCodepoint {
// `wasm-bindgen` unfortunately does not support arrays :(
pub b0: u8,
pub b1: u8,
pub b2: u8,
pub b3: u8,
pub len: u8,
}
impl EncodedCodepoint {
fn new([b0, b1, b2, b3]: [u8; 4], len: u8) -> Self {
Self { b0, b1, b2, b3, len }
}
}