Experimental ShString no longer needlessly zeroes memory, added experimental ShString23

main
Pantonshire 3 years ago
parent 93a8cefd06
commit b6f60fcfc3

@ -1,35 +1,45 @@
use std::{ use std::{
borrow, borrow, fmt,
fmt, mem::{ManuallyDrop, MaybeUninit},
mem::ManuallyDrop,
ops, ops,
ptr::{addr_of, addr_of_mut}, ptr::{self, addr_of, addr_of_mut},
slice, slice, str,
str,
}; };
use crate::either::Either::{self, Inl, Inr}; use crate::either::Either::{self, Inl, Inr};
pub type ShString23 = ShString<23>;
/// An experimental alternative to `libshire::strings::ShString`, which is able to store one extra /// An experimental alternative to `libshire::strings::ShString`, which is able to store one extra
/// byte of string data on the stack in the same amount of space. /// byte of string data on the stack in the same amount of space.
// `repr(C)` is necessary to ensure that `Repr` starts at offset zero, so that it's properly
// aligned within the struct. // `repr(C)` is necessary to ensure that `Repr` starts at offset 0, so that it's properly aligned
// within the struct.
#[repr(C)] #[repr(C)]
pub struct ShString<const N: usize> { pub struct ShString<const N: usize> {
repr: Repr<N>, repr: Repr<N>,
// When `len` is less than or equal to `MAX_LEN`, `repr.stack` is active and the first `len`
// bytes of `repr.stack` contains initialised, valid UTF-8 data. When it is greater than
// `MAX_LEN`, `repr.heap` is active.
len: u8, len: u8,
// A zero-sized field to ensure that `ShString` has an alignment equal to the alignment of
// `Box<str>`, to ensure that `repr.heap` is properly aligned when it is active.
_align: [Box<str>; 0], _align: [Box<str>; 0],
} }
// `repr(C)` is necessary to ensure that both of the fields start at offset 0. `repr(packed)`
// reduces the alignment to 1, which allows `ShString` to be more compact.
#[repr(C, packed)] #[repr(C, packed)]
union Repr<const N: usize> { union Repr<const N: usize> {
stack: [u8; N], stack: [MaybeUninit<u8>; N],
heap: ManuallyDrop<Box<str>>, heap: ManuallyDrop<Box<str>>,
} }
impl<const N: usize> ShString<N> { impl<const N: usize> ShString<N> {
const MAX_LEN: u8 = { const MAX_LEN: u8 = {
#[allow(clippy::cast_possible_truncation, clippy::checked_conversions)] #[allow(clippy::cast_possible_truncation, clippy::checked_conversions)]
// `MAX_LEN` may be no larger than `u8::MAX - 1` to leave at least one bit pattern to
// represent the "stored on the heap" case.
if N < u8::MAX as usize { if N < u8::MAX as usize {
N as u8 N as u8
} else { } else {
@ -43,17 +53,26 @@ impl<const N: usize> ShString<N> {
S: AsRef<str>, S: AsRef<str>,
Box<str>: From<S>, Box<str>: From<S>,
{ {
let bytes = s.as_ref().as_bytes(); let src = s.as_ref().as_bytes();
match u8::try_from(bytes.len()) {
match u8::try_from(src.len()) {
Ok(len) if len <= Self::MAX_LEN => { Ok(len) if len <= Self::MAX_LEN => {
let mut buf = [0u8; N]; unsafe {
buf[..usize::from(len)].copy_from_slice(bytes); let mut buf = MaybeUninit::<[MaybeUninit<u8>; N]>::uninit()
.assume_init();
let src_ptr = src.as_ptr() as *const MaybeUninit<u8>;
ptr::copy_nonoverlapping(src_ptr, buf.as_mut_ptr(), usize::from(len));
// SAFETY: // SAFETY:
// The first `len` bytes of `buf` are copied from a `&str`, so the first `len` // The first `len` bytes of `buf` are copied from a `&str`, so the first `len`
// bytes are valid UTF-8. We have already checked that `len` is thess than or equal // bytes are valid UTF-8. We have already checked that `len` is thess than or equal
// to `Self::MAX_LEN`. // to `Self::MAX_LEN`.
unsafe { Self::stack_from_raw_parts(buf, len) } Self::stack_from_raw_parts(buf, len)
}
}, },
_ => Self::new_heap(s), _ => Self::new_heap(s),
} }
} }
@ -61,11 +80,11 @@ impl<const N: usize> ShString<N> {
/// # Safety /// # Safety
/// The first `len` bytes of `buf` must be valid UTF-8. `len` must be less than or equal to /// The first `len` bytes of `buf` must be valid UTF-8. `len` must be less than or equal to
/// `Self::MAX_LEN` (which is equal to `N`). /// `Self::MAX_LEN` (which is equal to `N`).
unsafe fn stack_from_raw_parts(buf: [u8; N], len: u8) -> Self { unsafe fn stack_from_raw_parts(buf: [MaybeUninit<u8>; N], len: u8) -> Self {
Self { Self {
repr: Repr { stack: buf }, repr: Repr { stack: buf },
len, len,
_align: [] _align: [],
} }
} }
@ -74,7 +93,9 @@ impl<const N: usize> ShString<N> {
Box<str>: From<S>, Box<str>: From<S>,
{ {
Self { Self {
repr: Repr { heap: ManuallyDrop::new(Box::<str>::from(s)) }, repr: Repr {
heap: ManuallyDrop::new(Box::<str>::from(s)),
},
len: u8::MAX, len: u8::MAX,
_align: [], _align: [],
} }
@ -84,9 +105,7 @@ impl<const N: usize> ShString<N> {
#[must_use] #[must_use]
pub fn as_str(&self) -> &str { pub fn as_str(&self) -> &str {
match self.variant() { match self.variant() {
// SAFETY: Inl(stack) => stack,
// `stack` being valid UTF-8 when active is an invariant of `ShString`.
Inl(stack) => unsafe { str::from_utf8_unchecked(stack) },
Inr(heap) => heap, Inr(heap) => heap,
} }
} }
@ -95,22 +114,67 @@ impl<const N: usize> ShString<N> {
#[must_use] #[must_use]
pub fn as_str_mut(&mut self) -> &mut str { pub fn as_str_mut(&mut self) -> &mut str {
match self.variant_mut() { match self.variant_mut() {
// SAFETY: Inl(stack) => stack,
// `stack` being valid UTF-8 when active is an invariant of `ShString`.
Inl(stack) => unsafe { str::from_utf8_unchecked_mut(stack) },
Inr(heap) => heap, Inr(heap) => heap,
} }
} }
// #[inline]
// #[must_use]
// pub fn into_string(self) -> String {
// match self.variant() {
// Inl(stack) => stack.to_owned(),
// Inr(heap) => heap.into_string(),
// }
// }
#[inline]
#[must_use]
pub fn heap_allocated(&self) -> bool {
match self.variant() {
Inl(_) => false,
Inr(_) => true,
}
}
#[inline]
#[must_use]
pub fn len(&self) -> usize {
match self.variant() {
Inl(stack) => stack.len(),
Inr(heap) => heap.len(),
}
}
#[inline]
#[must_use]
pub fn is_empty(&self) -> bool {
match self.variant() {
Inl(stack) => stack.is_empty(),
Inr(heap) => heap.is_empty(),
}
}
#[inline(always)] #[inline(always)]
#[must_use] #[must_use]
fn variant(&self) -> Either<&[u8], &ManuallyDrop<Box<str>>> { fn variant(&self) -> Either<&str, &ManuallyDrop<Box<str>>> {
if self.len <= Self::MAX_LEN { if self.len <= Self::MAX_LEN {
let slice = unsafe { let slice = unsafe {
// The preferred way to read the fields of a packed struct is with `addr_of`. // Get a pointer to the `stack` field of the union.
let ptr = addr_of!(self.repr.stack) as *const u8; // SAFETY:
let len = usize::from(self.len); // Since `len` is less no greater than `MAX_LEN`, the `stack` field must be active.
slice::from_raw_parts(ptr, len) let ptr = addr_of!(self.repr.stack) as *const MaybeUninit<u8> as *const u8;
// SAFETY:
// The first `len` bytes of `stack` are always initialised, as this is an invariant
// of `ShString`.
let bytes = slice::from_raw_parts(ptr, usize::from(self.len));
// Perform an unchecked conversion from the byte slice to a string slice.
// SAFETY:
// The first `len` bytes of `stack` is always valid UTF-8, as this is an invariant
// of `ShString`.
str::from_utf8_unchecked(bytes)
}; };
Inl(slice) Inl(slice)
} else { } else {
@ -126,12 +190,17 @@ impl<const N: usize> ShString<N> {
#[inline(always)] #[inline(always)]
#[must_use] #[must_use]
fn variant_mut(&mut self) -> Either<&mut [u8], &mut ManuallyDrop<Box<str>>> { fn variant_mut(&mut self) -> Either<&mut str, &mut ManuallyDrop<Box<str>>> {
if self.len <= Self::MAX_LEN { if self.len <= Self::MAX_LEN {
let slice = unsafe { let slice = unsafe {
let ptr = addr_of_mut!(self.repr.stack) as *mut u8; let ptr = addr_of_mut!(self.repr.stack) as *mut MaybeUninit<u8> as *mut u8;
let len = usize::from(self.len);
slice::from_raw_parts_mut(ptr, len) let bytes = slice::from_raw_parts_mut(ptr, usize::from(self.len));
// Perform an unchecked conversion from the byte slice to a string slice. This is
// sound because the first `len` bytes of `stack` is always valid UTF-8 when it is
// active, as this is an invariant of `ShString`.
str::from_utf8_unchecked_mut(bytes)
}; };
Inl(slice) Inl(slice)
} else { } else {
@ -213,11 +282,82 @@ impl<const N: usize> fmt::Display for ShString<N> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::borrow::Cow;
use super::*; use super::*;
#[test] #[test]
fn test_shstring_align() { fn test_align() {
use std::mem::align_of; use std::mem::align_of;
assert_eq!(align_of::<ShString<23>>(), align_of::<Box<str>>()); assert_eq!(align_of::<ShString<23>>(), align_of::<Box<str>>());
} }
#[test]
fn test_new() {
let test_strings = [
"",
"Hello",
"Somethingfortheweekend",
"Dichlorodifluoromethane",
"Electrocardiographically",
"こんにちは",
"❤️🧡💛💚💙💜",
];
for s in test_strings {
let buf = s.to_owned();
let borrowed = Cow::Borrowed(s);
let owned = Cow::<'static, str>::Owned(buf.clone());
assert_eq!(ShString23::new(s).as_str(), s);
assert_eq!(ShString23::new(buf).as_str(), s);
assert_eq!(ShString23::new(borrowed).as_str(), s);
assert_eq!(ShString23::new(owned).as_str(), s);
}
}
#[test]
fn test_as_str_mut() {
let mut s1 = ShString23::new("hello");
s1.as_str_mut().make_ascii_uppercase();
assert_eq!(s1.as_str(), "HELLO");
let mut s2 = ShString23::new("the quick brown fox jumps over the lazy dog");
s2.as_str_mut().make_ascii_uppercase();
assert_eq!(s2.as_str(), "THE QUICK BROWN FOX JUMPS OVER THE LAZY DOG");
}
#[test]
fn test_len() {
assert_eq!(ShString23::new("").len(), 0);
assert_eq!(ShString23::new("Hello").len(), 5);
assert_eq!(ShString23::new("Somethingfortheweekend").len(), 22);
assert_eq!(ShString23::new("Dichlorodifluoromethane").len(), 23);
assert_eq!(ShString23::new("Electrocardiographically").len(), 24);
assert_eq!(ShString23::new("こんにちは").len(), 15);
assert_eq!(ShString23::new("❤️🧡💛💚💙💜").len(), 26);
}
#[test]
fn test_heap_allocated() {
assert!(!ShString23::new("").heap_allocated());
assert!(!ShString23::new("Hello").heap_allocated());
assert!(!ShString23::new("Somethingfortheweekend").heap_allocated());
assert!(!ShString23::new("Dichlorodifluoromethane").heap_allocated());
assert!(!ShString23::new("こんにちは").heap_allocated());
assert!(ShString23::new("Electrocardiographically").heap_allocated());
assert!(ShString23::new("Squishedbuginsidethescreen").heap_allocated());
assert!(ShString23::new("❤️🧡💛💚💙💜").heap_allocated());
}
#[test]
fn test_zero_capacity() {
assert_eq!(ShString::<0>::new("").as_str(), "");
assert!(!ShString::<0>::new("").heap_allocated());
assert_eq!(ShString::<0>::new("a").as_str(), "a");
assert!(ShString::<0>::new("a").heap_allocated());
assert_eq!(ShString::<0>::new("Hello").as_str(), "Hello");
assert!(ShString::<0>::new("Hello").heap_allocated());
}
} }

Loading…
Cancel
Save