Proof of concept improvement for ShString
parent
4dd61aeaa2
commit
c077cdf610
@ -0,0 +1,223 @@
|
|||||||
|
use std::{
|
||||||
|
borrow,
|
||||||
|
fmt,
|
||||||
|
mem::ManuallyDrop,
|
||||||
|
ops,
|
||||||
|
ptr::{addr_of, addr_of_mut},
|
||||||
|
slice,
|
||||||
|
str,
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::either::Either::{self, Inl, Inr};
|
||||||
|
|
||||||
|
/// An experimental alternative to `libshire::strings::ShString`, which is able to store one extra
|
||||||
|
/// byte of string data on the stack in the same amount of space.
|
||||||
|
// `repr(C)` is necessary to ensure that `Repr` starts at offset zero, so that it's properly
|
||||||
|
// aligned within the struct.
|
||||||
|
#[repr(C)]
|
||||||
|
pub struct ShString<const N: usize> {
|
||||||
|
repr: Repr<N>,
|
||||||
|
len: u8,
|
||||||
|
_align: [Box<str>; 0],
|
||||||
|
}
|
||||||
|
|
||||||
|
#[repr(C, packed)]
|
||||||
|
union Repr<const N: usize> {
|
||||||
|
stack: [u8; N],
|
||||||
|
heap: ManuallyDrop<Box<str>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<const N: usize> ShString<N> {
|
||||||
|
const MAX_LEN: u8 = {
|
||||||
|
#[allow(clippy::cast_possible_truncation, clippy::checked_conversions)]
|
||||||
|
if N < u8::MAX as usize {
|
||||||
|
N as u8
|
||||||
|
} else {
|
||||||
|
panic!("`N` must be less than `u8::MAX`")
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub fn new<S>(s: S) -> Self
|
||||||
|
where
|
||||||
|
S: AsRef<str>,
|
||||||
|
Box<str>: From<S>,
|
||||||
|
{
|
||||||
|
let bytes = s.as_ref().as_bytes();
|
||||||
|
match u8::try_from(bytes.len()) {
|
||||||
|
Ok(len) if len <= Self::MAX_LEN => {
|
||||||
|
let mut buf = [0u8; N];
|
||||||
|
buf[..usize::from(len)].copy_from_slice(bytes);
|
||||||
|
// SAFETY:
|
||||||
|
// The first `len` bytes of `buf` are copied from a `&str`, so the first `len`
|
||||||
|
// bytes are valid UTF-8. We have already checked that `len` is thess than or equal
|
||||||
|
// to `Self::MAX_LEN`.
|
||||||
|
unsafe { Self::stack_from_raw_parts(buf, len) }
|
||||||
|
},
|
||||||
|
_ => Self::new_heap(s),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// # Safety
|
||||||
|
/// The first `len` bytes of `buf` must be valid UTF-8. `len` must be less than or equal to
|
||||||
|
/// `Self::MAX_LEN` (which is equal to `N`).
|
||||||
|
unsafe fn stack_from_raw_parts(buf: [u8; N], len: u8) -> Self {
|
||||||
|
Self {
|
||||||
|
repr: Repr { stack: buf },
|
||||||
|
len,
|
||||||
|
_align: []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn new_heap<S>(s: S) -> Self
|
||||||
|
where
|
||||||
|
Box<str>: From<S>,
|
||||||
|
{
|
||||||
|
Self {
|
||||||
|
repr: Repr { heap: ManuallyDrop::new(Box::<str>::from(s)) },
|
||||||
|
len: u8::MAX,
|
||||||
|
_align: [],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
#[must_use]
|
||||||
|
pub fn as_str(&self) -> &str {
|
||||||
|
match self.variant() {
|
||||||
|
// SAFETY:
|
||||||
|
// `stack` being valid UTF-8 when active is an invariant of `ShString`.
|
||||||
|
Inl(stack) => unsafe { str::from_utf8_unchecked(stack) },
|
||||||
|
Inr(heap) => heap,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
#[must_use]
|
||||||
|
pub fn as_str_mut(&mut self) -> &mut str {
|
||||||
|
match self.variant_mut() {
|
||||||
|
// SAFETY:
|
||||||
|
// `stack` being valid UTF-8 when active is an invariant of `ShString`.
|
||||||
|
Inl(stack) => unsafe { str::from_utf8_unchecked_mut(stack) },
|
||||||
|
Inr(heap) => heap,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
#[must_use]
|
||||||
|
fn variant(&self) -> Either<&[u8], &ManuallyDrop<Box<str>>> {
|
||||||
|
if self.len <= Self::MAX_LEN {
|
||||||
|
let slice = unsafe {
|
||||||
|
// The preferred way to read the fields of a packed struct is with `addr_of`.
|
||||||
|
let ptr = addr_of!(self.repr.stack) as *const u8;
|
||||||
|
let len = usize::from(self.len);
|
||||||
|
slice::from_raw_parts(ptr, len)
|
||||||
|
};
|
||||||
|
Inl(slice)
|
||||||
|
} else {
|
||||||
|
// SAFETY:
|
||||||
|
// `len` is greater than `Self::MAX_LEN`, which means that the `heap` field is active.
|
||||||
|
// `heap` is properly aligned because it is stored at offset 0 of `ShString` (since
|
||||||
|
// both `ShString` and `Repr` use `repr(C)`), and the alignment of `ShString` is equal
|
||||||
|
// to the alignment of `Box<str>`.
|
||||||
|
let heap = unsafe { &*addr_of!(self.repr.heap) };
|
||||||
|
Inr(heap)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
#[must_use]
|
||||||
|
fn variant_mut(&mut self) -> Either<&mut [u8], &mut ManuallyDrop<Box<str>>> {
|
||||||
|
if self.len <= Self::MAX_LEN {
|
||||||
|
let slice = unsafe {
|
||||||
|
let ptr = addr_of_mut!(self.repr.stack) as *mut u8;
|
||||||
|
let len = usize::from(self.len);
|
||||||
|
slice::from_raw_parts_mut(ptr, len)
|
||||||
|
};
|
||||||
|
Inl(slice)
|
||||||
|
} else {
|
||||||
|
let heap = unsafe { &mut *addr_of_mut!(self.repr.heap) };
|
||||||
|
Inr(heap)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<const N: usize> Drop for ShString<N> {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
if let Inr(heap) = self.variant_mut() {
|
||||||
|
// SAFETY:
|
||||||
|
// Since this is a drop implementation, `heap` will not be used again after this.
|
||||||
|
unsafe {
|
||||||
|
let _ = ManuallyDrop::take(heap);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<const N: usize> ops::Deref for ShString<N> {
|
||||||
|
type Target = str;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
self.as_str()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<const N: usize> ops::DerefMut for ShString<N> {
|
||||||
|
#[inline]
|
||||||
|
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||||
|
self.as_str_mut()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<const N: usize> AsRef<str> for ShString<N> {
|
||||||
|
#[inline]
|
||||||
|
fn as_ref(&self) -> &str {
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<const N: usize> AsMut<str> for ShString<N> {
|
||||||
|
#[inline]
|
||||||
|
fn as_mut(&mut self) -> &mut str {
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<const N: usize> borrow::Borrow<str> for ShString<N> {
|
||||||
|
#[inline]
|
||||||
|
fn borrow(&self) -> &str {
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<const N: usize> borrow::BorrowMut<str> for ShString<N> {
|
||||||
|
#[inline]
|
||||||
|
fn borrow_mut(&mut self) -> &mut str {
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<const N: usize> fmt::Debug for ShString<N> {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
fmt::Debug::fmt(&**self, f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<const N: usize> fmt::Display for ShString<N> {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
fmt::Display::fmt(&**self, f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: ** lots of MIRI tests! **
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_shstring_align() {
|
||||||
|
use std::mem::align_of;
|
||||||
|
assert_eq!(align_of::<ShString<23>>(), align_of::<Box<str>>());
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue