remove core
parent
f9430db2f9
commit
6e8d197ae4
@ -1,6 +1,5 @@
|
|||||||
[workspace]
|
[workspace]
|
||||||
members = [
|
members = [
|
||||||
"core",
|
|
||||||
"lib",
|
"lib",
|
||||||
"bin",
|
"bin",
|
||||||
]
|
]
|
||||||
|
|||||||
@ -1,10 +0,0 @@
|
|||||||
[package]
|
|
||||||
name = "utfdump_core"
|
|
||||||
version = "0.1.0"
|
|
||||||
edition = "2021"
|
|
||||||
authors = ["Tom Panton <pantonshire@gmail.com>"]
|
|
||||||
license = "MIT"
|
|
||||||
repository = "https://github.com/pantonshire/utfdump"
|
|
||||||
description = "Core library for the utfdump command-line tool"
|
|
||||||
|
|
||||||
[dependencies]
|
|
||||||
@ -1,281 +0,0 @@
|
|||||||
use std::fmt;
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub struct CharData<'a> {
|
|
||||||
name: &'a str,
|
|
||||||
category: Category,
|
|
||||||
combining_class: CombiningClass,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> CharData<'a> {
|
|
||||||
pub fn from_row(row: &'a str) -> Option<(u32, Self)> {
|
|
||||||
let mut fields = [""; 15];
|
|
||||||
for (i, field) in row.splitn(15, ';').enumerate() {
|
|
||||||
fields[i] = field;
|
|
||||||
}
|
|
||||||
|
|
||||||
let codepoint = u32::from_str_radix(fields[0], 16).ok()?;
|
|
||||||
let name = fields[1];
|
|
||||||
let category = Category::from_abbr(fields[2])?;
|
|
||||||
let ccc = CombiningClass(u8::from_str_radix(fields[3], 10).ok()?);
|
|
||||||
|
|
||||||
Some((codepoint, Self::from_parts(name, category, ccc)))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn from_parts(name: &'a str, category: Category, combining_class: CombiningClass) -> Self {
|
|
||||||
Self { name, category, combining_class }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn with_name<'b>(self, name: &'a str) -> CharData<'b>
|
|
||||||
where
|
|
||||||
'a: 'b,
|
|
||||||
{
|
|
||||||
Self { name, ..self }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn name(&self) -> &'a str {
|
|
||||||
self.name
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn category(&self) -> Category {
|
|
||||||
self.category
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn combining_class(&self) -> CombiningClass {
|
|
||||||
self.combining_class
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
|
|
||||||
pub enum Category {
|
|
||||||
Lu,
|
|
||||||
Ll,
|
|
||||||
Lt,
|
|
||||||
Mn,
|
|
||||||
Mc,
|
|
||||||
Me,
|
|
||||||
Nd,
|
|
||||||
Nl,
|
|
||||||
No,
|
|
||||||
Zs,
|
|
||||||
Zl,
|
|
||||||
Zp,
|
|
||||||
Cc,
|
|
||||||
Cf,
|
|
||||||
Cs,
|
|
||||||
Co,
|
|
||||||
Cn,
|
|
||||||
Lm,
|
|
||||||
Lo,
|
|
||||||
Pc,
|
|
||||||
Pd,
|
|
||||||
Ps,
|
|
||||||
Pe,
|
|
||||||
Pi,
|
|
||||||
Pf,
|
|
||||||
Po,
|
|
||||||
Sm,
|
|
||||||
Sc,
|
|
||||||
Sk,
|
|
||||||
So,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Category {
|
|
||||||
pub fn from_byte(b: u8) -> Option<Self> {
|
|
||||||
match b {
|
|
||||||
0 => Some(Self::Lu),
|
|
||||||
1 => Some(Self::Ll),
|
|
||||||
2 => Some(Self::Lt),
|
|
||||||
3 => Some(Self::Mn),
|
|
||||||
4 => Some(Self::Mc),
|
|
||||||
5 => Some(Self::Me),
|
|
||||||
6 => Some(Self::Nd),
|
|
||||||
7 => Some(Self::Nl),
|
|
||||||
8 => Some(Self::No),
|
|
||||||
9 => Some(Self::Zs),
|
|
||||||
10 => Some(Self::Zl),
|
|
||||||
11 => Some(Self::Zp),
|
|
||||||
12 => Some(Self::Cc),
|
|
||||||
13 => Some(Self::Cf),
|
|
||||||
14 => Some(Self::Cs),
|
|
||||||
15 => Some(Self::Co),
|
|
||||||
16 => Some(Self::Cn),
|
|
||||||
17 => Some(Self::Lm),
|
|
||||||
18 => Some(Self::Lo),
|
|
||||||
19 => Some(Self::Pc),
|
|
||||||
20 => Some(Self::Pd),
|
|
||||||
21 => Some(Self::Ps),
|
|
||||||
22 => Some(Self::Pe),
|
|
||||||
23 => Some(Self::Pi),
|
|
||||||
24 => Some(Self::Pf),
|
|
||||||
25 => Some(Self::Po),
|
|
||||||
26 => Some(Self::Sm),
|
|
||||||
27 => Some(Self::Sc),
|
|
||||||
28 => Some(Self::Sk),
|
|
||||||
29 => Some(Self::So),
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn byte_repr(self) -> u8 {
|
|
||||||
self as u8
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn from_abbr(s: &str) -> Option<Self> {
|
|
||||||
match s {
|
|
||||||
"Lu" => Some(Self::Lu),
|
|
||||||
"Ll" => Some(Self::Ll),
|
|
||||||
"Lt" => Some(Self::Lt),
|
|
||||||
"Mn" => Some(Self::Mn),
|
|
||||||
"Mc" => Some(Self::Mc),
|
|
||||||
"Me" => Some(Self::Me),
|
|
||||||
"Nd" => Some(Self::Nd),
|
|
||||||
"Nl" => Some(Self::Nl),
|
|
||||||
"No" => Some(Self::No),
|
|
||||||
"Zs" => Some(Self::Zs),
|
|
||||||
"Zl" => Some(Self::Zl),
|
|
||||||
"Zp" => Some(Self::Zp),
|
|
||||||
"Cc" => Some(Self::Cc),
|
|
||||||
"Cf" => Some(Self::Cf),
|
|
||||||
"Cs" => Some(Self::Cs),
|
|
||||||
"Co" => Some(Self::Co),
|
|
||||||
"Cn" => Some(Self::Cn),
|
|
||||||
"Lm" => Some(Self::Lm),
|
|
||||||
"Lo" => Some(Self::Lo),
|
|
||||||
"Pc" => Some(Self::Pc),
|
|
||||||
"Pd" => Some(Self::Pd),
|
|
||||||
"Ps" => Some(Self::Ps),
|
|
||||||
"Pe" => Some(Self::Pe),
|
|
||||||
"Pi" => Some(Self::Pi),
|
|
||||||
"Pf" => Some(Self::Pf),
|
|
||||||
"Po" => Some(Self::Po),
|
|
||||||
"Sm" => Some(Self::Sm),
|
|
||||||
"Sc" => Some(Self::Sc),
|
|
||||||
"Sk" => Some(Self::Sk),
|
|
||||||
"So" => Some(Self::So),
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn abbr(self) -> &'static str {
|
|
||||||
match self {
|
|
||||||
Self::Lu => "Lu",
|
|
||||||
Self::Ll => "Ll",
|
|
||||||
Self::Lt => "Lt",
|
|
||||||
Self::Mn => "Mn",
|
|
||||||
Self::Mc => "Mc",
|
|
||||||
Self::Me => "Me",
|
|
||||||
Self::Nd => "Nd",
|
|
||||||
Self::Nl => "Nl",
|
|
||||||
Self::No => "No",
|
|
||||||
Self::Zs => "Zs",
|
|
||||||
Self::Zl => "Zl",
|
|
||||||
Self::Zp => "Zp",
|
|
||||||
Self::Cc => "Cc",
|
|
||||||
Self::Cf => "Cf",
|
|
||||||
Self::Cs => "Cs",
|
|
||||||
Self::Co => "Co",
|
|
||||||
Self::Cn => "Cn",
|
|
||||||
Self::Lm => "Lm",
|
|
||||||
Self::Lo => "Lo",
|
|
||||||
Self::Pc => "Pc",
|
|
||||||
Self::Pd => "Pd",
|
|
||||||
Self::Ps => "Ps",
|
|
||||||
Self::Pe => "Pe",
|
|
||||||
Self::Pi => "Pi",
|
|
||||||
Self::Pf => "Pf",
|
|
||||||
Self::Po => "Po",
|
|
||||||
Self::Sm => "Sm",
|
|
||||||
Self::Sc => "Sc",
|
|
||||||
Self::Sk => "Sk",
|
|
||||||
Self::So => "So",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn full_name(self) -> &'static str {
|
|
||||||
match self {
|
|
||||||
Self::Lu => "Letter, Uppercase",
|
|
||||||
Self::Ll => "Letter, Lowercase",
|
|
||||||
Self::Lt => "Letter, Titlecase",
|
|
||||||
Self::Mn => "Mark, Non-Spacing",
|
|
||||||
Self::Mc => "Mark, Spacing Combining",
|
|
||||||
Self::Me => "Mark, Enclosing",
|
|
||||||
Self::Nd => "Number, Decimal Digit",
|
|
||||||
Self::Nl => "Number, Letter",
|
|
||||||
Self::No => "Number, Other",
|
|
||||||
Self::Zs => "Separator, Space",
|
|
||||||
Self::Zl => "Separator, Line",
|
|
||||||
Self::Zp => "Separator: Paragraph",
|
|
||||||
Self::Cc => "Other, Control",
|
|
||||||
Self::Cf => "Other, Format",
|
|
||||||
Self::Cs => "Other, Surrogate",
|
|
||||||
Self::Co => "Other, Private Use",
|
|
||||||
Self::Cn => "Other, Not Assigned",
|
|
||||||
Self::Lm => "Letter, Modifier",
|
|
||||||
Self::Lo => "Letter, Other",
|
|
||||||
Self::Pc => "Punctuation, Connector",
|
|
||||||
Self::Pd => "Punctuation, Dash",
|
|
||||||
Self::Ps => "Punctuation, Open",
|
|
||||||
Self::Pe => "Punctuation, Close",
|
|
||||||
Self::Pi => "Punctuation, Initial Quote",
|
|
||||||
Self::Pf => "Punctuation, Final Quote",
|
|
||||||
Self::Po => "Punctuation, Other",
|
|
||||||
Self::Sm => "Symbol, Math",
|
|
||||||
Self::Sc => "Symbol, Currency",
|
|
||||||
Self::Sk => "Symbol, Modifier",
|
|
||||||
Self::So => "Symbol, Other",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for Category {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
write!(f, "{}", self.abbr())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
|
|
||||||
pub struct CombiningClass(pub u8);
|
|
||||||
|
|
||||||
impl CombiningClass {
|
|
||||||
pub fn name(self) -> Option<&'static str> {
|
|
||||||
match self.0 {
|
|
||||||
0 => Some("Not_Reordered"),
|
|
||||||
1 => Some("Overlay"),
|
|
||||||
6 => Some("Han_Reading"),
|
|
||||||
7 => Some("Nukta"),
|
|
||||||
8 => Some("Kana_Voicing"),
|
|
||||||
9 => Some("Virama"),
|
|
||||||
200 => Some("Attached_Below_Left"),
|
|
||||||
202 => Some("Attached_Below"),
|
|
||||||
214 => Some("Attached_Above"),
|
|
||||||
216 => Some("Attached_Above_Right"),
|
|
||||||
218 => Some("Below_Left"),
|
|
||||||
220 => Some("Below"),
|
|
||||||
222 => Some("Below_Right"),
|
|
||||||
224 => Some("Left"),
|
|
||||||
226 => Some("Right"),
|
|
||||||
228 => Some("Above_Left"),
|
|
||||||
230 => Some("Above"),
|
|
||||||
232 => Some("Above_Right"),
|
|
||||||
233 => Some("Double_Below"),
|
|
||||||
234 => Some("Double_Above"),
|
|
||||||
240 => Some("Iota_Subscript"),
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_combining(self) -> bool {
|
|
||||||
self.0 != 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for CombiningClass {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
match self.name() {
|
|
||||||
Some(name) => write!(f, "{}", name),
|
|
||||||
None => write!(f, "Ccc{}", self.0),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@ -1,189 +0,0 @@
|
|||||||
use std::{collections::{HashMap, hash_map}, error, fmt, ops::Range};
|
|
||||||
|
|
||||||
use crate::{
|
|
||||||
char_data::{CharData, Category, CombiningClass},
|
|
||||||
string_table::{StringTableBufError, StringTableBuf, StringTable},
|
|
||||||
};
|
|
||||||
|
|
||||||
const DATA_ENTRY_SIZE: usize = 8;
|
|
||||||
|
|
||||||
const DATA_INIT_FLAG: u8 = 1;
|
|
||||||
const DATA_REPEATED_FLAG: u8 = 2;
|
|
||||||
|
|
||||||
fn encode_char_data(
|
|
||||||
name_index: u32,
|
|
||||||
category: Category,
|
|
||||||
combining_class: CombiningClass,
|
|
||||||
repeated: bool
|
|
||||||
) -> [u8; DATA_ENTRY_SIZE]
|
|
||||||
{
|
|
||||||
let mut buf = [0u8; DATA_ENTRY_SIZE];
|
|
||||||
|
|
||||||
buf[0] |= DATA_INIT_FLAG;
|
|
||||||
|
|
||||||
if repeated {
|
|
||||||
buf[0] |= DATA_REPEATED_FLAG;
|
|
||||||
}
|
|
||||||
|
|
||||||
buf[1..5].copy_from_slice(&name_index.to_le_bytes());
|
|
||||||
buf[5] = category.byte_repr();
|
|
||||||
buf[6] = combining_class.0;
|
|
||||||
|
|
||||||
buf
|
|
||||||
}
|
|
||||||
|
|
||||||
fn decode_char_data(bytes: [u8; DATA_ENTRY_SIZE])
|
|
||||||
-> Option<(u32, Category, CombiningClass, bool)>
|
|
||||||
{
|
|
||||||
let flags = bytes[0];
|
|
||||||
|
|
||||||
if flags & DATA_INIT_FLAG == 0 {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
let name_index = u32::from_le_bytes(bytes[1..5].try_into().unwrap());
|
|
||||||
let category = Category::from_byte(bytes[5])?;
|
|
||||||
let combining_class = CombiningClass(bytes[6]);
|
|
||||||
let repeated = flags & DATA_REPEATED_FLAG != 0;
|
|
||||||
|
|
||||||
Some((name_index, category, combining_class, repeated))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct DataStoreBuf {
|
|
||||||
data: Vec<u8>,
|
|
||||||
strings: StringTableBuf,
|
|
||||||
strings_map: HashMap<String, u32>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl DataStoreBuf {
|
|
||||||
pub fn new() -> Self {
|
|
||||||
Self {
|
|
||||||
data: Vec::new(),
|
|
||||||
strings: StringTableBuf::new(),
|
|
||||||
strings_map: HashMap::new(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn as_ref_type(&self) -> DataStore {
|
|
||||||
DataStore { data: &self.data, strings: &*self.strings }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn insert(&mut self, char_data: CharData, range: Range<u32>) -> Result<(), DataBufError> {
|
|
||||||
if range.is_empty() {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
let repeated = range.end
|
|
||||||
.checked_sub(range.start)
|
|
||||||
.map(|len| len > 1)
|
|
||||||
.unwrap_or(false);
|
|
||||||
|
|
||||||
let range = {
|
|
||||||
let start = usize::try_from(range.start)
|
|
||||||
.map_err(|_| DataBufError::DataOutOfCapacity)?
|
|
||||||
.checked_mul(DATA_ENTRY_SIZE)
|
|
||||||
.ok_or(DataBufError::DataOutOfCapacity)?;
|
|
||||||
let end = usize::try_from(range.end)
|
|
||||||
.map_err(|_| DataBufError::DataOutOfCapacity)?
|
|
||||||
.checked_mul(DATA_ENTRY_SIZE)
|
|
||||||
.ok_or(DataBufError::DataOutOfCapacity)?;
|
|
||||||
start..end
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Some(extra_capacity_needed) = range.end.checked_sub(self.data.len()) {
|
|
||||||
self.data.try_reserve(extra_capacity_needed)
|
|
||||||
.map_err(|_| DataBufError::DataOutOfCapacity)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
let name_index = self.add_string(char_data.name().to_owned())?;
|
|
||||||
|
|
||||||
let encoded_char_data = encode_char_data(
|
|
||||||
name_index,
|
|
||||||
char_data.category(),
|
|
||||||
char_data.combining_class(),
|
|
||||||
repeated
|
|
||||||
);
|
|
||||||
|
|
||||||
if self.data.len() < range.end {
|
|
||||||
// Using 0 means that the DATA_INIT_FLAG won't be set, so these won't be valid entries.
|
|
||||||
self.data.resize(range.end, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
for i in range.step_by(DATA_ENTRY_SIZE) {
|
|
||||||
self.data[i..(i + DATA_ENTRY_SIZE)].copy_from_slice(&encoded_char_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn add_string(&mut self, name: String) -> Result<u32, DataBufError> {
|
|
||||||
match self.strings_map.entry(name) {
|
|
||||||
hash_map::Entry::Occupied(entry) => Ok(*entry.get()),
|
|
||||||
hash_map::Entry::Vacant(entry) => {
|
|
||||||
let index = self.strings.push(entry.key())?;
|
|
||||||
entry.insert(index);
|
|
||||||
Ok(index)
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
|
||||||
pub struct DataStore<'a> {
|
|
||||||
data: &'a [u8],
|
|
||||||
strings: &'a StringTable,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> DataStore<'a> {
|
|
||||||
pub fn get(self, codepoint: char) -> Option<CharData<'a>> {
|
|
||||||
let index = usize::try_from(u32::from(codepoint)).ok()?;
|
|
||||||
let start = index.checked_mul(DATA_ENTRY_SIZE)?;
|
|
||||||
let end = start.checked_add(DATA_ENTRY_SIZE)?;
|
|
||||||
let encoded = self.data.get(start..end)?;
|
|
||||||
let (name_index, category, ccc, _repeated) = decode_char_data(encoded.try_into().unwrap())?;
|
|
||||||
let name = self.strings.get(name_index)?;
|
|
||||||
Some(CharData::from_parts(name, category, ccc))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn to_bytes(self) -> Option<([u8; 4], [&'a [u8]; 2])> {
|
|
||||||
let strings = self.strings.to_bytes();
|
|
||||||
let strings_len = u32::try_from(strings.len())
|
|
||||||
.ok()?
|
|
||||||
.to_le_bytes();
|
|
||||||
Some((strings_len, [strings, self.data]))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn from_bytes(bytes: &'a [u8]) -> Option<Self> {
|
|
||||||
let strings_len = usize::try_from(
|
|
||||||
u32::from_le_bytes(bytes.get(..4)?.try_into().unwrap())
|
|
||||||
).ok()?;
|
|
||||||
let strings = StringTable::from_bytes(bytes.get(4..(4 + strings_len))?);
|
|
||||||
let data = bytes.get((4 + strings_len)..)?;
|
|
||||||
Some(Self { data, strings })
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum DataBufError {
|
|
||||||
DataOutOfCapacity,
|
|
||||||
StringsMapOutOfCapacity,
|
|
||||||
StringTable(StringTableBufError),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for DataBufError {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
Self::DataOutOfCapacity => write!(f, "data buf out of capacity"),
|
|
||||||
Self::StringsMapOutOfCapacity => write!(f, "strings map out of capacity"),
|
|
||||||
Self::StringTable(err) => write!(f, "string table error: {}", err),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl error::Error for DataBufError {}
|
|
||||||
|
|
||||||
impl From<StringTableBufError> for DataBufError {
|
|
||||||
fn from(err: StringTableBufError) -> Self {
|
|
||||||
Self::StringTable(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1,6 +0,0 @@
|
|||||||
pub mod char_data;
|
|
||||||
pub mod data_store;
|
|
||||||
mod string_table;
|
|
||||||
|
|
||||||
pub use char_data::{CharData, Category, CombiningClass};
|
|
||||||
pub use data_store::{DataStore, DataStoreBuf, DataBufError};
|
|
||||||
@ -1,98 +0,0 @@
|
|||||||
use std::{fmt, error, str, ops::Deref};
|
|
||||||
|
|
||||||
/// A view into a [`StringTableBuf`](StringTableBuf). The table stores a collection of strings
|
|
||||||
/// contiguously, with each string being prefixed by its length in bytes.
|
|
||||||
#[repr(transparent)]
|
|
||||||
pub struct StringTable {
|
|
||||||
bytes: [u8],
|
|
||||||
}
|
|
||||||
|
|
||||||
impl StringTable {
|
|
||||||
pub fn from_bytes(bytes: &[u8]) -> &Self {
|
|
||||||
// SAFETY:
|
|
||||||
// `StringTable` uses `repr(transparent)`, so it has the same memory layout as `[u8]`.
|
|
||||||
unsafe { &*(bytes as *const [u8] as *const Self) }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn to_bytes(&self) -> &[u8] {
|
|
||||||
&self.bytes
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Attempt to retrieve the string at the given byte offset in the table. The given index must
|
|
||||||
/// be the start of a table entry; providing any other index may result in an error or an
|
|
||||||
/// unintended string.
|
|
||||||
///
|
|
||||||
/// Note that the string table does not have a sure-fire mechanism for detecting whether the
|
|
||||||
/// given index is valid, so providing an invalid index may not always result in an error; the
|
|
||||||
/// bytes starting at the invalid index may be incorrectly interpreted as a valid table entry.
|
|
||||||
/// However, this will never result in unsoundness, and thus the function is not marked as
|
|
||||||
/// unsafe; it is checked that the resulting string is valid UTF-8.
|
|
||||||
pub fn get(&self, index: u32) -> Option<&str> {
|
|
||||||
let index = usize::try_from(index).ok()?;
|
|
||||||
let len = *self.bytes.get(index)?;
|
|
||||||
let bytes = self.bytes.get((index + 1)..(index + 1 + usize::from(len)))?;
|
|
||||||
str::from_utf8(bytes).ok()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// An owned [`StringTable`](StringTable). Stores a collection of strings contiguously, with each
|
|
||||||
/// string being prefixed by its length in bytes.
|
|
||||||
pub struct StringTableBuf {
|
|
||||||
buf: Vec<u8>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl StringTableBuf {
|
|
||||||
pub fn new() -> Self {
|
|
||||||
Self { buf: Vec::new() }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Append the given string to the table, returning the byte offset in the table at which it
|
|
||||||
/// was stored. This byte offset can then be used to retrieve the string from the table later,
|
|
||||||
/// via `StringTable::get`.
|
|
||||||
pub fn push(&mut self, s: &str) -> Result<u32, StringTableBufError> {
|
|
||||||
let len = u8::try_from(s.len())
|
|
||||||
.map_err(|_| StringTableBufError::StringTooLong)?;
|
|
||||||
|
|
||||||
let index = u32::try_from(self.buf.len())
|
|
||||||
.map_err(|_| StringTableBufError::OutOfCapacity)?;
|
|
||||||
|
|
||||||
self.buf.try_reserve(s.len() + 1)
|
|
||||||
.map_err(|_| StringTableBufError::OutOfCapacity)?;
|
|
||||||
|
|
||||||
self.buf.push(len);
|
|
||||||
self.buf.extend(s.bytes());
|
|
||||||
|
|
||||||
Ok(index)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl AsRef<StringTable> for StringTableBuf {
|
|
||||||
fn as_ref(&self) -> &StringTable {
|
|
||||||
self
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Deref for StringTableBuf {
|
|
||||||
type Target = StringTable;
|
|
||||||
|
|
||||||
fn deref(&self) -> &Self::Target {
|
|
||||||
StringTable::from_bytes(&self.buf)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum StringTableBufError {
|
|
||||||
StringTooLong,
|
|
||||||
OutOfCapacity,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for StringTableBufError {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
Self::StringTooLong => write!(f, "string too long to add to table"),
|
|
||||||
Self::OutOfCapacity => write!(f, "string table out of capacity"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl error::Error for StringTableBufError {}
|
|
||||||
Loading…
Reference in New Issue