Experimental ShString now supports efficient conversion to String

main
Pantonshire 3 years ago
parent a9c431627b
commit 78ee3cfb4e

@ -1,13 +1,14 @@
use std::{ use std::{
borrow, fmt, borrow::{self, Cow},
mem::{ManuallyDrop, MaybeUninit}, convert::Infallible,
fmt,
mem::{self, ManuallyDrop, MaybeUninit},
ops, ops,
ptr::{self, addr_of, addr_of_mut}, ptr::{self, addr_of, addr_of_mut},
slice, str, slice,
str,
}; };
use crate::either::Either::{self, Inl, Inr};
pub type ShString23 = ShString<23>; pub type ShString23 = ShString<23>;
/// An experimental alternative to `libshire::strings::ShString`, which is able to store one extra /// An experimental alternative to `libshire::strings::ShString`, which is able to store one extra
@ -32,7 +33,7 @@ pub struct ShString<const N: usize> {
#[repr(C, packed)] #[repr(C, packed)]
union Repr<const N: usize> { union Repr<const N: usize> {
stack: [MaybeUninit<u8>; N], stack: [MaybeUninit<u8>; N],
heap: ManuallyDrop<Box<str>>, heap: ManuallyDrop<MaybeUninit<Box<str>>>,
} }
impl<const N: usize> ShString<N> { impl<const N: usize> ShString<N> {
@ -94,7 +95,7 @@ impl<const N: usize> ShString<N> {
{ {
Self { Self {
repr: Repr { repr: Repr {
heap: ManuallyDrop::new(Box::<str>::from(s)), heap: ManuallyDrop::new(MaybeUninit::new(Box::from(s))),
}, },
len: u8::MAX, len: u8::MAX,
_align: [], _align: [],
@ -104,120 +105,148 @@ impl<const N: usize> ShString<N> {
#[inline] #[inline]
#[must_use] #[must_use]
pub fn as_str(&self) -> &str { pub fn as_str(&self) -> &str {
match self.variant() { if self.heap_allocated() {
Inl(stack) => stack, // SAFETY:
Inr(heap) => heap, // `len` is greater than `Self::MAX_LEN`, which means that the `heap` field is
// active. `heap` is properly aligned because it is stored at offset 0 of
// `ShString` (since both `ShString` and `Repr` use `repr(C)`), and the alignment
// of `ShString` is equal to the alignment of `Box<str>`.
let box_str = unsafe { &*addr_of!(self.repr.heap) };
unsafe { box_str.assume_init_ref() }
} else {
// Get a pointer to the `stack` field of the union.
// SAFETY:
// Since `len` is less no greater than `MAX_LEN`, the `stack` field must be
// active.
let ptr = unsafe { addr_of!(self.repr.stack) }
as *const MaybeUninit<u8>
as *const u8;
// Construct a byte slice from the pointer to the string data and the length.
// SAFETY:
// The first `len` bytes of `stack` are always initialised, as this is an
// invariant of `ShString`.
let bytes = unsafe { slice::from_raw_parts(ptr, usize::from(self.len)) };
// Perform an unchecked conversion from the byte slice to a string slice.
// SAFETY:
// The first `len` bytes of `stack` is always valid UTF-8, as this is an
// invariant of `ShString`.
unsafe { str::from_utf8_unchecked(bytes) }
} }
} }
#[inline] #[inline]
#[must_use] #[must_use]
pub fn as_str_mut(&mut self) -> &mut str { pub fn as_str_mut(&mut self) -> &mut str {
match self.variant_mut() { if self.heap_allocated() {
Inl(stack) => stack, // SAFETY:
Inr(heap) => heap, // `len` is greater than `Self::MAX_LEN`, which means that the `heap` field is
} // active. `heap` is properly aligned because it is stored at offset 0 of
} // `ShString` (since both `ShString` and `Repr` use `repr(C)`), and the alignment
// of `ShString` is equal to the alignment of `Box<str>`.
let box_str = unsafe { &mut *addr_of_mut!(self.repr.heap) };
// #[inline] unsafe { box_str.assume_init_mut() }
// #[must_use] } else {
// pub fn into_string(self) -> String { // Get a pointer to the `stack` field of the union.
// match self.variant() { // SAFETY:
// Inl(stack) => stack.to_owned(), // Since `len` is less no greater than `MAX_LEN`, the `stack` field must be
// Inr(heap) => heap.into_string(), // active.
// } let ptr = unsafe { addr_of_mut!(self.repr.stack) }
// } as *mut MaybeUninit<u8>
as *mut u8;
#[inline] // Construct a byte slice from the pointer to the string data and the length.
#[must_use] // SAFETY:
pub fn heap_allocated(&self) -> bool { // The first `len` bytes of `stack` are always initialised, as this is an
match self.variant() { // invariant of `ShString`.
Inl(_) => false, let bytes = unsafe { slice::from_raw_parts_mut(ptr, usize::from(self.len)) };
Inr(_) => true,
// Perform an unchecked conversion from the byte slice to a string slice.
// SAFETY:
// The first `len` bytes of `stack` is always valid UTF-8, as this is an
// invariant of `ShString`.
unsafe { str::from_utf8_unchecked_mut(bytes) }
} }
} }
#[inline] #[inline]
#[must_use] #[must_use]
pub fn len(&self) -> usize { pub fn into_string(self) -> String {
match self.variant() { if self.heap_allocated() {
Inl(stack) => stack.len(), // Disable the destructor for `self`; we are transferring ownership of the allocated
Inr(heap) => heap.len(), // memory to the caller, so we don't want to run the drop implementation which would
// free the memory.
let mut this = ManuallyDrop::new(self);
// SAFETY:
// `len` is greater than `Self::MAX_LEN`, which means that the `heap` field is
// active. `heap` is properly aligned because it is stored at offset 0 of
// `ShString` (since both `ShString` and `Repr` use `repr(C)`), and the alignment
// of `ShString` is equal to the alignment of `Box<str>`.
let field_ref = unsafe { &mut *addr_of_mut!(this.repr.heap) };
let manual_box_str = mem::replace(field_ref, ManuallyDrop::new(MaybeUninit::uninit()));
let maybe_box_str = ManuallyDrop::into_inner(manual_box_str);
let box_str = unsafe { maybe_box_str.assume_init() };
box_str.into_string()
} else {
// Get a pointer to the `stack` field of the union.
// SAFETY:
// Since `len` is less no greater than `MAX_LEN`, the `stack` field must be
// active.
let ptr = unsafe { addr_of!(self.repr.stack) }
as *const MaybeUninit<u8>
as *const u8;
// Construct a byte slice from the pointer to the string data and the length.
// SAFETY:
// The first `len` bytes of `stack` are always initialised, as this is an
// invariant of `ShString`.
let bytes = unsafe { slice::from_raw_parts(ptr, usize::from(self.len)) };
// Perform an unchecked conversion from the byte slice to a string slice.
// SAFETY:
// The first `len` bytes of `stack` is always valid UTF-8, as this is an
// invariant of `ShString`.
let str_slice = unsafe { str::from_utf8_unchecked(bytes) };
str_slice.to_owned()
} }
} }
#[inline] #[inline]
#[must_use] #[must_use]
pub fn is_empty(&self) -> bool { pub fn heap_allocated(&self) -> bool {
match self.variant() { self.len > Self::MAX_LEN
Inl(stack) => stack.is_empty(),
Inr(heap) => heap.is_empty(),
}
} }
#[inline(always)] #[inline]
#[must_use] #[must_use]
fn variant(&self) -> Either<&str, &ManuallyDrop<Box<str>>> { pub fn len(&self) -> usize {
if self.len <= Self::MAX_LEN { self.as_str().len()
let slice = unsafe {
// Get a pointer to the `stack` field of the union.
// SAFETY:
// Since `len` is less no greater than `MAX_LEN`, the `stack` field must be active.
let ptr = addr_of!(self.repr.stack) as *const MaybeUninit<u8> as *const u8;
// SAFETY:
// The first `len` bytes of `stack` are always initialised, as this is an invariant
// of `ShString`.
let bytes = slice::from_raw_parts(ptr, usize::from(self.len));
// Perform an unchecked conversion from the byte slice to a string slice.
// SAFETY:
// The first `len` bytes of `stack` is always valid UTF-8, as this is an invariant
// of `ShString`.
str::from_utf8_unchecked(bytes)
};
Inl(slice)
} else {
// SAFETY:
// `len` is greater than `Self::MAX_LEN`, which means that the `heap` field is active.
// `heap` is properly aligned because it is stored at offset 0 of `ShString` (since
// both `ShString` and `Repr` use `repr(C)`), and the alignment of `ShString` is equal
// to the alignment of `Box<str>`.
let heap = unsafe { &*addr_of!(self.repr.heap) };
Inr(heap)
}
} }
#[inline(always)] #[inline]
#[must_use] #[must_use]
fn variant_mut(&mut self) -> Either<&mut str, &mut ManuallyDrop<Box<str>>> { pub fn is_empty(&self) -> bool {
if self.len <= Self::MAX_LEN { self.as_str().is_empty()
let slice = unsafe {
let ptr = addr_of_mut!(self.repr.stack) as *mut MaybeUninit<u8> as *mut u8;
let bytes = slice::from_raw_parts_mut(ptr, usize::from(self.len));
// Perform an unchecked conversion from the byte slice to a string slice. This is
// sound because the first `len` bytes of `stack` is always valid UTF-8 when it is
// active, as this is an invariant of `ShString`.
str::from_utf8_unchecked_mut(bytes)
};
Inl(slice)
} else {
let heap = unsafe { &mut *addr_of_mut!(self.repr.heap) };
Inr(heap)
}
} }
} }
impl<const N: usize> Drop for ShString<N> { impl<const N: usize> Drop for ShString<N> {
fn drop(&mut self) { fn drop(&mut self) {
if let Inr(heap) = self.variant_mut() { if self.heap_allocated() {
let heap = unsafe { &mut *addr_of_mut!(self.repr.heap) };
// SAFETY: // SAFETY:
// Since this is a drop implementation, `heap` will not be used again after this. // Since this is a drop implementation, `heap` will not be used again after this.
unsafe { let _ = unsafe { ManuallyDrop::take(heap).assume_init() };
let _ = ManuallyDrop::take(heap);
}
} }
} }
} }
@ -266,6 +295,36 @@ impl<const N: usize> borrow::BorrowMut<str> for ShString<N> {
} }
} }
impl<'a, const N: usize> From<&'a str> for ShString<N> {
#[inline]
fn from(s: &'a str) -> Self {
Self::new(s)
}
}
impl<const N: usize> From<String> for ShString<N> {
#[inline]
fn from(s: String) -> Self {
Self::new(s)
}
}
impl<'a, const N: usize> From<Cow<'a, str>> for ShString<N> {
#[inline]
fn from(s: Cow<'a, str>) -> Self {
Self::new(s)
}
}
impl<const N: usize> str::FromStr for ShString<N> {
type Err = Infallible;
#[inline]
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(Self::new(s))
}
}
impl<const N: usize> fmt::Debug for ShString<N> { impl<const N: usize> fmt::Debug for ShString<N> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(&**self, f) fmt::Debug::fmt(&**self, f)
@ -278,8 +337,6 @@ impl<const N: usize> fmt::Display for ShString<N> {
} }
} }
// TODO: ** lots of MIRI tests! **
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::borrow::Cow; use std::borrow::Cow;
@ -327,6 +384,23 @@ mod tests {
assert_eq!(s2.as_str(), "THE QUICK BROWN FOX JUMPS OVER THE LAZY DOG"); assert_eq!(s2.as_str(), "THE QUICK BROWN FOX JUMPS OVER THE LAZY DOG");
} }
#[test]
fn test_into_string() {
let test_strings = [
"".to_owned(),
"Hello".to_owned(),
"Somethingfortheweekend".to_owned(),
"Dichlorodifluoromethane".to_owned(),
"Electrocardiographically".to_owned(),
"こんにちは".to_owned(),
"❤️🧡💛💚💙💜".to_owned(),
];
for s in test_strings {
assert_eq!(ShString23::new(&*s).into_string(), s);
}
}
#[test] #[test]
fn test_len() { fn test_len() {
assert_eq!(ShString23::new("").len(), 0); assert_eq!(ShString23::new("").len(), 0);

Loading…
Cancel
Save