Proof of concept improvement for ShString

main
Pantonshire 4 years ago
parent 4dd61aeaa2
commit c077cdf610

@ -2,6 +2,3 @@
name = "libshire"
version = "0.1.0"
edition = "2021"
[dependencies]
serde = { version = "1", optional = true }

@ -0,0 +1,223 @@
use std::{
borrow,
fmt,
mem::ManuallyDrop,
ops,
ptr::{addr_of, addr_of_mut},
slice,
str,
};
use crate::either::Either::{self, Inl, Inr};
/// An experimental alternative to `libshire::strings::ShString`, which is able to store one extra
/// byte of string data on the stack in the same amount of space.
// `repr(C)` is necessary to ensure that `Repr` starts at offset zero, so that it's properly
// aligned within the struct.
#[repr(C)]
pub struct ShString<const N: usize> {
repr: Repr<N>,
len: u8,
_align: [Box<str>; 0],
}
#[repr(C, packed)]
union Repr<const N: usize> {
stack: [u8; N],
heap: ManuallyDrop<Box<str>>,
}
impl<const N: usize> ShString<N> {
const MAX_LEN: u8 = {
#[allow(clippy::cast_possible_truncation, clippy::checked_conversions)]
if N < u8::MAX as usize {
N as u8
} else {
panic!("`N` must be less than `u8::MAX`")
}
};
#[must_use]
pub fn new<S>(s: S) -> Self
where
S: AsRef<str>,
Box<str>: From<S>,
{
let bytes = s.as_ref().as_bytes();
match u8::try_from(bytes.len()) {
Ok(len) if len <= Self::MAX_LEN => {
let mut buf = [0u8; N];
buf[..usize::from(len)].copy_from_slice(bytes);
// SAFETY:
// The first `len` bytes of `buf` are copied from a `&str`, so the first `len`
// bytes are valid UTF-8. We have already checked that `len` is thess than or equal
// to `Self::MAX_LEN`.
unsafe { Self::stack_from_raw_parts(buf, len) }
},
_ => Self::new_heap(s),
}
}
/// # Safety
/// The first `len` bytes of `buf` must be valid UTF-8. `len` must be less than or equal to
/// `Self::MAX_LEN` (which is equal to `N`).
unsafe fn stack_from_raw_parts(buf: [u8; N], len: u8) -> Self {
Self {
repr: Repr { stack: buf },
len,
_align: []
}
}
fn new_heap<S>(s: S) -> Self
where
Box<str>: From<S>,
{
Self {
repr: Repr { heap: ManuallyDrop::new(Box::<str>::from(s)) },
len: u8::MAX,
_align: [],
}
}
#[inline]
#[must_use]
pub fn as_str(&self) -> &str {
match self.variant() {
// SAFETY:
// `stack` being valid UTF-8 when active is an invariant of `ShString`.
Inl(stack) => unsafe { str::from_utf8_unchecked(stack) },
Inr(heap) => heap,
}
}
#[inline]
#[must_use]
pub fn as_str_mut(&mut self) -> &mut str {
match self.variant_mut() {
// SAFETY:
// `stack` being valid UTF-8 when active is an invariant of `ShString`.
Inl(stack) => unsafe { str::from_utf8_unchecked_mut(stack) },
Inr(heap) => heap,
}
}
#[inline(always)]
#[must_use]
fn variant(&self) -> Either<&[u8], &ManuallyDrop<Box<str>>> {
if self.len <= Self::MAX_LEN {
let slice = unsafe {
// The preferred way to read the fields of a packed struct is with `addr_of`.
let ptr = addr_of!(self.repr.stack) as *const u8;
let len = usize::from(self.len);
slice::from_raw_parts(ptr, len)
};
Inl(slice)
} else {
// SAFETY:
// `len` is greater than `Self::MAX_LEN`, which means that the `heap` field is active.
// `heap` is properly aligned because it is stored at offset 0 of `ShString` (since
// both `ShString` and `Repr` use `repr(C)`), and the alignment of `ShString` is equal
// to the alignment of `Box<str>`.
let heap = unsafe { &*addr_of!(self.repr.heap) };
Inr(heap)
}
}
#[inline(always)]
#[must_use]
fn variant_mut(&mut self) -> Either<&mut [u8], &mut ManuallyDrop<Box<str>>> {
if self.len <= Self::MAX_LEN {
let slice = unsafe {
let ptr = addr_of_mut!(self.repr.stack) as *mut u8;
let len = usize::from(self.len);
slice::from_raw_parts_mut(ptr, len)
};
Inl(slice)
} else {
let heap = unsafe { &mut *addr_of_mut!(self.repr.heap) };
Inr(heap)
}
}
}
impl<const N: usize> Drop for ShString<N> {
fn drop(&mut self) {
if let Inr(heap) = self.variant_mut() {
// SAFETY:
// Since this is a drop implementation, `heap` will not be used again after this.
unsafe {
let _ = ManuallyDrop::take(heap);
}
}
}
}
impl<const N: usize> ops::Deref for ShString<N> {
type Target = str;
#[inline]
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
impl<const N: usize> ops::DerefMut for ShString<N> {
#[inline]
fn deref_mut(&mut self) -> &mut Self::Target {
self.as_str_mut()
}
}
impl<const N: usize> AsRef<str> for ShString<N> {
#[inline]
fn as_ref(&self) -> &str {
self
}
}
impl<const N: usize> AsMut<str> for ShString<N> {
#[inline]
fn as_mut(&mut self) -> &mut str {
self
}
}
impl<const N: usize> borrow::Borrow<str> for ShString<N> {
#[inline]
fn borrow(&self) -> &str {
self
}
}
impl<const N: usize> borrow::BorrowMut<str> for ShString<N> {
#[inline]
fn borrow_mut(&mut self) -> &mut str {
self
}
}
impl<const N: usize> fmt::Debug for ShString<N> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(&**self, f)
}
}
impl<const N: usize> fmt::Display for ShString<N> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&**self, f)
}
}
// TODO: ** lots of MIRI tests! **
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_shstring_align() {
use std::mem::align_of;
assert_eq!(align_of::<ShString<23>>(), align_of::<Box<str>>());
}
}

@ -1,3 +1,4 @@
pub mod experimental;
pub mod fixed_string;
pub mod shstring;

@ -158,28 +158,6 @@ impl fmt::Display for Uuid {
}
}
#[cfg(feature = "serde")]
impl serde::Serialize for Uuid {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer
{
serializer.serialize_str(&self.as_string())
}
}
#[cfg(feature = "serde")]
impl<'de> serde::Deserialize<'de> for Uuid {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>
{
let s: &str = serde::Deserialize::deserialize(deserializer)?;
s.parse()
.map_err(serde::de::Error::custom)
}
}
#[derive(Debug)]
pub enum ParseError {
NotEnoughGroups(usize),

Loading…
Cancel
Save