You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
190 lines
5.5 KiB
Rust
190 lines
5.5 KiB
Rust
use std::{collections::{HashMap, hash_map}, error, fmt, ops::Range};
|
|
|
|
use crate::{
|
|
char_data::{CharData, Category, CombiningClass},
|
|
string_table::{StringTableBufError, StringTableBuf, StringTable},
|
|
};
|
|
|
|
const DATA_ENTRY_SIZE: usize = 8;
|
|
|
|
const DATA_INIT_FLAG: u8 = 1;
|
|
const DATA_REPEATED_FLAG: u8 = 2;
|
|
|
|
fn encode_char_data(
|
|
name_index: u32,
|
|
category: Category,
|
|
combining_class: CombiningClass,
|
|
repeated: bool
|
|
) -> [u8; DATA_ENTRY_SIZE]
|
|
{
|
|
let mut buf = [0u8; DATA_ENTRY_SIZE];
|
|
|
|
buf[0] |= DATA_INIT_FLAG;
|
|
|
|
if repeated {
|
|
buf[0] |= DATA_REPEATED_FLAG;
|
|
}
|
|
|
|
buf[1..5].copy_from_slice(&name_index.to_le_bytes());
|
|
buf[5] = category.byte_repr();
|
|
buf[6] = combining_class.0;
|
|
|
|
buf
|
|
}
|
|
|
|
fn decode_char_data(bytes: [u8; DATA_ENTRY_SIZE])
|
|
-> Option<(u32, Category, CombiningClass, bool)>
|
|
{
|
|
let flags = bytes[0];
|
|
|
|
if flags & DATA_INIT_FLAG == 0 {
|
|
return None;
|
|
}
|
|
|
|
let name_index = u32::from_le_bytes(bytes[1..5].try_into().unwrap());
|
|
let category = Category::from_byte(bytes[5])?;
|
|
let combining_class = CombiningClass(bytes[6]);
|
|
let repeated = flags & DATA_REPEATED_FLAG != 0;
|
|
|
|
Some((name_index, category, combining_class, repeated))
|
|
}
|
|
|
|
pub struct DataStoreBuf {
|
|
data: Vec<u8>,
|
|
strings: StringTableBuf,
|
|
strings_map: HashMap<String, u32>,
|
|
}
|
|
|
|
impl DataStoreBuf {
|
|
pub fn new() -> Self {
|
|
Self {
|
|
data: Vec::new(),
|
|
strings: StringTableBuf::new(),
|
|
strings_map: HashMap::new(),
|
|
}
|
|
}
|
|
|
|
pub fn as_ref_type(&self) -> DataStore {
|
|
DataStore { data: &self.data, strings: &*self.strings }
|
|
}
|
|
|
|
pub fn insert(&mut self, char_data: CharData, range: Range<u32>) -> Result<(), DataBufError> {
|
|
if range.is_empty() {
|
|
return Ok(());
|
|
}
|
|
|
|
let repeated = range.end
|
|
.checked_sub(range.start)
|
|
.map(|len| len > 1)
|
|
.unwrap_or(false);
|
|
|
|
let range = {
|
|
let start = usize::try_from(range.start)
|
|
.map_err(|_| DataBufError::DataOutOfCapacity)?
|
|
.checked_mul(DATA_ENTRY_SIZE)
|
|
.ok_or(DataBufError::DataOutOfCapacity)?;
|
|
let end = usize::try_from(range.end)
|
|
.map_err(|_| DataBufError::DataOutOfCapacity)?
|
|
.checked_mul(DATA_ENTRY_SIZE)
|
|
.ok_or(DataBufError::DataOutOfCapacity)?;
|
|
start..end
|
|
};
|
|
|
|
if let Some(extra_capacity_needed) = range.end.checked_sub(self.data.len()) {
|
|
self.data.try_reserve(extra_capacity_needed)
|
|
.map_err(|_| DataBufError::DataOutOfCapacity)?;
|
|
}
|
|
|
|
let name_index = self.add_string(char_data.name().to_owned())?;
|
|
|
|
let encoded_char_data = encode_char_data(
|
|
name_index,
|
|
char_data.category(),
|
|
char_data.combining_class(),
|
|
repeated
|
|
);
|
|
|
|
if self.data.len() < range.end {
|
|
// Using 0 means that the DATA_INIT_FLAG won't be set, so these won't be valid entries.
|
|
self.data.resize(range.end, 0);
|
|
}
|
|
|
|
for i in range.step_by(DATA_ENTRY_SIZE) {
|
|
self.data[i..(i + DATA_ENTRY_SIZE)].copy_from_slice(&encoded_char_data);
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn add_string(&mut self, name: String) -> Result<u32, DataBufError> {
|
|
match self.strings_map.entry(name) {
|
|
hash_map::Entry::Occupied(entry) => Ok(*entry.get()),
|
|
hash_map::Entry::Vacant(entry) => {
|
|
let index = self.strings.push(entry.key())?;
|
|
entry.insert(index);
|
|
Ok(index)
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Copy)]
|
|
pub struct DataStore<'a> {
|
|
data: &'a [u8],
|
|
strings: &'a StringTable,
|
|
}
|
|
|
|
impl<'a> DataStore<'a> {
|
|
pub fn get(self, codepoint: char) -> Option<CharData<'a>> {
|
|
let index = usize::try_from(u32::from(codepoint)).ok()?;
|
|
let start = index.checked_mul(DATA_ENTRY_SIZE)?;
|
|
let end = start.checked_add(DATA_ENTRY_SIZE)?;
|
|
let encoded = self.data.get(start..end)?;
|
|
let (name_index, category, ccc, _repeated) = decode_char_data(encoded.try_into().unwrap())?;
|
|
let name = self.strings.get(name_index)?;
|
|
Some(CharData::from_parts(name, category, ccc))
|
|
}
|
|
|
|
pub fn to_bytes(self) -> Option<([u8; 4], [&'a [u8]; 2])> {
|
|
let strings = self.strings.to_bytes();
|
|
let strings_len = u32::try_from(strings.len())
|
|
.ok()?
|
|
.to_le_bytes();
|
|
Some((strings_len, [strings, self.data]))
|
|
}
|
|
|
|
pub fn from_bytes(bytes: &'a [u8]) -> Option<Self> {
|
|
let strings_len = usize::try_from(
|
|
u32::from_le_bytes(bytes.get(..4)?.try_into().unwrap())
|
|
).ok()?;
|
|
let strings = StringTable::from_bytes(bytes.get(4..(4 + strings_len))?);
|
|
let data = bytes.get((4 + strings_len)..)?;
|
|
Some(Self { data, strings })
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub enum DataBufError {
|
|
DataOutOfCapacity,
|
|
StringsMapOutOfCapacity,
|
|
StringTable(StringTableBufError),
|
|
}
|
|
|
|
impl fmt::Display for DataBufError {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
match self {
|
|
Self::DataOutOfCapacity => write!(f, "data buf out of capacity"),
|
|
Self::StringsMapOutOfCapacity => write!(f, "strings map out of capacity"),
|
|
Self::StringTable(err) => write!(f, "string table error: {}", err),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl error::Error for DataBufError {}
|
|
|
|
impl From<StringTableBufError> for DataBufError {
|
|
fn from(err: StringTableBufError) -> Self {
|
|
Self::StringTable(err)
|
|
}
|
|
}
|