diff --git a/src/strings/experimental.rs b/src/strings/experimental.rs index 3036cb7..9fa8eb1 100644 --- a/src/strings/experimental.rs +++ b/src/strings/experimental.rs @@ -14,21 +14,21 @@ use std::{ pub type InliningString23 = InliningString<23>; -/// An experimental alternative to `libshire::strings::ShString`, which is able to store one extra -/// byte of string data on the stack in the same amount of space. +/// An experimental alternative to `libshire::strings::InliningString`, which is able to store one +/// extra byte of inline string data in the same amount of space. // `repr(C)` is necessary to ensure that `Repr` starts at offset 0, so that it's properly aligned // within the struct. #[repr(C)] pub struct InliningString { repr: Repr, - // When `len` is less than or equal to `MAX_LEN`, `repr.stack` is active and the first `len` - // bytes of `repr.stack` contains initialised, valid UTF-8 data. When it is greater than - // `MAX_LEN`, `repr.heap` is active. + // When `len` is less than or equal to `MAX_LEN`, `repr.inline` is active and the first `len` + // bytes of `repr.inline` contains initialised, valid UTF-8 data. When it is greater than + // `MAX_LEN`, `repr.boxed` is active. // len: u8, len: NonZeroU8, - // A zero-sized field to ensure that `ShString` has an alignment equal to the alignment of - // `Box`, to ensure that `repr.heap` is properly aligned when it is active. + // A zero-sized field to ensure that `InliningString` has an alignment equal to the alignment + // of `Box`, to ensure that `repr.boxed` is properly aligned when it is active. _align: [Box; 0], } @@ -89,11 +89,11 @@ impl InliningString { // The first `len` bytes of `buf` are copied from a `&str`, so the first `len` // bytes are valid UTF-8. We have already checked that `len` is thess than or // equal to `Self::MAX_LEN`. - Self::stack_from_raw_parts(buf, len) + Self::inline_from_raw_parts(buf, len) } }, - _ => Self::new_heap(s), + _ => Self::new_boxed(s), } } @@ -110,7 +110,7 @@ impl InliningString { // SAFETY: // `len` is 0, so the contract that the first `len` bytes of `buf` are initialised and // valid UTF-8 is trivially upheld. - Self::stack_from_raw_parts(buf, 0) + Self::inline_from_raw_parts(buf, 0) } } @@ -118,7 +118,7 @@ impl InliningString { /// The first `len` bytes of `buf` must be initialised and valid UTF-8. `len` must be less than /// or equal to `Self::MAX_LEN` (which is equal to `N`). #[inline] - unsafe fn stack_from_raw_parts(buf: [MaybeUninit; N], len: u8) -> Self { + unsafe fn inline_from_raw_parts(buf: [MaybeUninit; N], len: u8) -> Self { // SAFETY: // The caller is responsible for ensuring that `len` is less than or equal to // `Self::MAX_LEN`, which is no greater than `u8::MAX - 2`. If this contract is upheld, @@ -133,7 +133,7 @@ impl InliningString { } #[inline] - fn new_heap(s: S) -> Self + fn new_boxed(s: S) -> Self where Box: From, { @@ -180,8 +180,8 @@ impl InliningString { // Perform an unchecked conversion from the byte slice to a string slice. // SAFETY: - // The first `len` bytes of `inline` is always valid UTF-8, as this is an - // invariant of `InliningString`. + // The first `len` bytes of `inline` is always valid UTF-8, as this is an invariant + // of `InliningString`. unsafe { str::from_utf8_unchecked(bytes) } }, @@ -193,6 +193,10 @@ impl InliningString { // alignment of `InliningString` is equal to the alignment of `Box`. let box_str = unsafe { &*addr_of!(self.repr.boxed) }; + // SAFETY: + // `repr.boxed` is initialised, as the only time it's uninitialised is when it is + // briefly replaced with a temporary value before the `InliningString` is dropped + // in the `into_string` function. unsafe { box_str.assume_init_ref() } }, } @@ -212,14 +216,14 @@ impl InliningString { // Construct a byte slice from the pointer to the string data and the length. // SAFETY: - // The first `len` bytes of `stack` are always initialised, as this is an - // invariant of `ShString`. + // The first `len` bytes of `inline` are always initialised, as this is an + // invariant of `InliningString`. let bytes = unsafe { slice::from_raw_parts_mut(ptr, usize::from(len)) }; // Perform an unchecked conversion from the byte slice to a string slice. // SAFETY: - // The first `len` bytes of `inline` is always valid UTF-8, as this is an - // invariant of `InliningString`. + // The first `len` bytes of `inline` is always valid UTF-8, as this is an invariant + // of `InliningString`. unsafe { str::from_utf8_unchecked_mut(bytes) } }, @@ -231,6 +235,10 @@ impl InliningString { // alignment of `InliningString` is equal to the alignment of `Box`. let box_str = unsafe { &mut *addr_of_mut!(self.repr.boxed) }; + // SAFETY: + // `repr.boxed` is initialised, as the only time it's uninitialised is when it is + // briefly replaced with a temporary value before the `InliningString` is dropped + // in the `into_string` function. unsafe { box_str.assume_init_mut() } }, } @@ -241,46 +249,57 @@ impl InliningString { pub fn into_string(self) -> String { match self.inline_string_len() { Some(len) => { - // Get a pointer to the `stack` field of the union. + // Get a pointer to the `inline` field of the union. // SAFETY: - // Since `len` is less no greater than `MAX_LEN`, the `stack` field must be - // active. + // Since `inline_string_len` returned `Some`, the `inline` field must be active. let ptr = unsafe { addr_of!(self.repr.inline) } as *const MaybeUninit as *const u8; // Construct a byte slice from the pointer to the string data and the length. // SAFETY: - // The first `len` bytes of `stack` are always initialised, as this is an - // invariant of `ShString`. + // The first `len` bytes of `inline` are always initialised, as this is an + // invariant of `InliningString`. let bytes = unsafe { slice::from_raw_parts(ptr, usize::from(len)) }; // Perform an unchecked conversion from the byte slice to a string slice. // SAFETY: - // The first `len` bytes of `stack` is always valid UTF-8, as this is an - // invariant of `ShString`. + // The first `len` bytes of `inline` is always valid UTF-8, as this is an invariant + // of `InliningString`. let str_slice = unsafe { str::from_utf8_unchecked(bytes) }; str_slice.to_owned() }, None => { - // Disable the destructor for `self`; we are transferring ownership of the allocated - // memory to the caller, so we don't want to run the drop implementation which would - // free the memory. - let mut this = ManuallyDrop::new(self); - - // SAFETY: - // `len` is greater than `Self::MAX_LEN`, which means that the `heap` field is - // active. `heap` is properly aligned because it is stored at offset 0 of - // `ShString` (since both `ShString` and `Repr` use `repr(C)`), and the alignment - // of `ShString` is equal to the alignment of `Box`. - let field_ref = unsafe { &mut *addr_of_mut!(this.repr.boxed) }; - - let manual_box_str = mem::replace(field_ref, ManuallyDrop::new(MaybeUninit::uninit())); + let manual_box_str = { + // Disable the destructor for `self`; we are transferring ownership of the + // allocated memory to the caller, so we don't want to run the destructor which + // would free the memory. + let mut this = ManuallyDrop::new(self); + // SAFETY: + // `inline_string_len` returned `None`, which means that the `boxed` field is + // active. `boxed` is properly aligned because it is stored at offset 0 of + // `InliningString` (since both `InliningString` and `Repr` use `repr(C)`), and + // the alignment of `InliningString` is equal to the alignment of `Box`. + let field_ref = unsafe { &mut *addr_of_mut!(this.repr.boxed) }; + + // Move `repr.boxed` out of the `InliningString`, replacing it with + // uninitialised memory. This is sound because we have ownership of the + // `InliningString` and we will not be doing anything else with it after this + // which calls `assume_init` on `repr.boxed`; at the end of this block, the + // `InliningString` is dropped without calling its destructor. + mem::replace(field_ref, ManuallyDrop::new(MaybeUninit::uninit())) + }; + + // Re-enable the destructor for the boxed string. let maybe_box_str = ManuallyDrop::into_inner(manual_box_str); + // SAFETY: + // The boxed string is initialised, as we obtained it by moving `repr.boxed`, and + // the only time `repr.boxed` is uninitialised is when it is briefly replaced with + // a temporary value in the block above. let box_str = unsafe { maybe_box_str.assume_init() }; box_str.into_string() @@ -310,11 +329,11 @@ impl InliningString { impl Drop for InliningString { fn drop(&mut self) { if self.heap_allocated() { - let heap = unsafe { &mut *addr_of_mut!(self.repr.boxed) }; + let boxed = unsafe { &mut *addr_of_mut!(self.repr.boxed) }; // SAFETY: - // Since this is a drop implementation, `heap` will not be used again after this. - let _ = unsafe { ManuallyDrop::take(heap).assume_init() }; + // Since this is a drop implementation, `boxed` will not be used again after this. + let _ = unsafe { ManuallyDrop::take(boxed).assume_init() }; } } }