From 5ca54944e82aab463d55b5e627d626e029e40d82 Mon Sep 17 00:00:00 2001 From: Pantonshire Date: Sat, 23 Jul 2022 15:18:54 +0100 Subject: [PATCH] Move the experimental InliningString to the inlining module --- src/strings/experimental.rs | 746 ------------------------------------ src/strings/inlining.rs | 528 +++++++++++++++++++------ src/strings/mod.rs | 4 +- test.sh | 9 +- 4 files changed, 418 insertions(+), 869 deletions(-) delete mode 100644 src/strings/experimental.rs diff --git a/src/strings/experimental.rs b/src/strings/experimental.rs deleted file mode 100644 index a00727a..0000000 --- a/src/strings/experimental.rs +++ /dev/null @@ -1,746 +0,0 @@ -use core::{ - borrow, - cmp::Ordering, - convert::Infallible, - fmt, - hash::{Hash, Hasher}, - mem::{self, ManuallyDrop, MaybeUninit}, - num::NonZeroU8, - ops, - ptr::{self, addr_of, addr_of_mut}, - slice, - str, -}; - -#[cfg(not(feature = "std"))] -use alloc::{ - borrow::Cow, - boxed::Box, - rc::Rc, - string::String, - sync::Arc, -}; - -#[cfg(feature = "std")] -use std::{ - borrow::Cow, - rc::Rc, - sync::Arc, -}; - -/// A non-growable string where strings 23 bytes or shorter are stored inline and longer strings -/// use a separate heap allocation. If maximum inline lengths other than 23 are desired, see the -/// more general [InliningString]. -/// -/// 23 bytes is chosen because it is optimal for 64-bit architectures; the minimum possible size -/// of the data structure on 64-bit architectures which always keeps the data properly aligned is -/// 24 bytes (because, when heap-allocated, the data structure contains a 16-byte `Box<[u8]>` with -/// 8-byte alignment and a 1-byte discriminant, and the greatest multiple of 8 which is ≥17 is 24), -/// so there is space for 23 bytes of string data plus the 1-byte discriminant. -pub type InliningString23 = InliningString<23>; - -/// A non-growable string which stores small strings inline; strings of length less than or equal -/// to `N` are stored inside the data structure itself, whereas strings of length greater than `N` -/// use a separate heap allocation. -/// -/// This type is intended to be used when lots of small strings need to be stored, and these -/// strings do not need to grow. -/// -/// For 64-bit targets, `N = 23` allows the greatest amount of inline string data to be stored -/// without exceeding the size of a regular [String]. Therefore, [InliningString23] is provided as -/// a type alias for `InliningString<23>`. -/// -/// Although `N` is a `usize`, it may be no greater than `u8::MAX`; larger values will result in a -/// compile-time error. -/// -/// ``` -/// # use libshire::strings::InliningString; -/// let s1 = InliningString::<23>::new("This string is 23 bytes"); -/// assert_eq!(&*s1, "This string is 23 bytes"); -/// assert!(!s1.heap_allocated()); -/// -/// let s2 = InliningString::<23>::new("and this one is 24 bytes"); -/// assert_eq!(&*s2, "and this one is 24 bytes"); -/// assert!(s2.heap_allocated()); -/// ``` -#[repr(C)] -pub struct InliningString { - repr: Repr, - // When `len - 1` is less than or equal to `MAX_LEN`, `repr.inline` is active and the first - // `len - 1` bytes of `repr.inline` contains initialised, valid UTF-8 data. When `len - 1` is - // greater than `MAX_LEN`, `repr.boxed` is active. `NonZeroU8` is used to allow for the niche - // optimisation (https://rust-lang.github.io/unsafe-code-guidelines/glossary.html#niche). - len: NonZeroU8, - // A zero-sized field to ensure that `InliningString` has an alignment equal to the alignment - // of `Box`, to ensure that `repr.boxed` is properly aligned when it is active. - _align: [Box; 0], -} - -// `repr(C)` is necessary to ensure that both of the fields start at offset 0. `repr(packed)` -// reduces the alignment to 1, which allows `InliningString` to be more compact. -#[repr(C, packed)] -union Repr { - inline: [MaybeUninit; N], - boxed: ManuallyDrop>>, -} - -impl InliningString { - const MAX_LEN: u8 = { - #[allow(clippy::cast_possible_truncation, clippy::checked_conversions)] - // `MAX_LEN` may be no larger than `u8::MAX - 2` to leave at least one bit pattern to - // represent the "boxed" case and at least one bit pattern for the niche optimisation. - if N <= (u8::MAX - 2) as usize { - N as u8 - } else { - panic!("`N` must be no greater than `u8::MAX - 2`") - } - }; - - #[must_use] - pub fn new(s: S) -> Self - where - S: AsRef, - Box: From, - { - let src = s.as_ref().as_bytes(); - - match u8::try_from(src.len()) { - Ok(len) if len <= Self::MAX_LEN => { - unsafe { - // SAFETY: - // `MaybeUninit::uninit()` is a valid value for `[MaybeUninit; N]`, since - // each element of the array is allowed to be uninitialised. - let mut buf = MaybeUninit::<[MaybeUninit; N]>::uninit() - .assume_init(); - - // Cast the byte slice to a `MaybeUninit` pointer. This is valid because - // `u8` has the same memory layout as `MaybeUninit`. - let src_ptr = src.as_ptr() as *const MaybeUninit; - - // Copy the string data provided by the caller into the buffer. - // SAFETY: - // The source is valid because the source and length are both taken from a - // valid `&[u8]`. We have already checked in the match statement that there is - // enough space in the buffer to fit the string data (i.e. `len` is less than - // or equal to `MAX_LEN`, which is equal to `N`), so the destination is valid. - // The source and destination are trivially properly aligned because the - // alignment of `MaybeUninit` is 1. The source and destination do not - // overlap; the destination buffer is a new variable completely separate from - // the source data. - ptr::copy_nonoverlapping(src_ptr, buf.as_mut_ptr(), usize::from(len)); - - // SAFETY: - // The first `len` bytes of `buf` are copied from a `&str`, so the first `len` - // bytes are valid UTF-8. We have already checked that `len` is thess than or - // equal to `Self::MAX_LEN`. - Self::inline_from_raw_parts(buf, len) - } - }, - - _ => Self::new_boxed(s), - } - } - - #[inline] - #[must_use] - pub fn empty() -> Self { - unsafe { - // SAFETY: - // `MaybeUninit::uninit()` is a valid value for `[MaybeUninit; N]`, since each - // element of the array is allowed to be uninitialised. - let buf = MaybeUninit::<[MaybeUninit; N]>::uninit() - .assume_init(); - - // SAFETY: - // `len` is 0, so the contract that the first `len` bytes of `buf` are initialised and - // valid UTF-8 is trivially upheld. - Self::inline_from_raw_parts(buf, 0) - } - } - - /// # Safety - /// The first `len` bytes of `buf` must be initialised and valid UTF-8. `len` must be less than - /// or equal to `Self::MAX_LEN` (which is equal to `N`). - #[inline] - unsafe fn inline_from_raw_parts(buf: [MaybeUninit; N], len: u8) -> Self { - // SAFETY: - // The caller is responsible for ensuring that `len` is less than or equal to - // `Self::MAX_LEN`, which is no greater than `u8::MAX - 2`. If this contract is upheld, - // `len + 1` can never overflow, so `len + 1` can never be zero. - let len = NonZeroU8::new_unchecked(len + 1); - - Self { - repr: Repr { inline: buf }, - len, - _align: [], - } - } - - #[inline] - fn new_boxed(s: S) -> Self - where - Box: From, - { - const U8_NONZERO_MAX: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(u8::MAX) }; - - Self { - repr: Repr { - boxed: ManuallyDrop::new(MaybeUninit::new(Box::from(s))), - }, - len: U8_NONZERO_MAX, - _align: [], - } - } - - /// If the `inline` field is active, returns the length of the inline string data. If the - /// `boxed` field is active, returns `None`. - #[inline(always)] - fn inline_string_len(&self) -> Option { - let len = self.len.get() - 1; - if len <= Self::MAX_LEN { - Some(len) - } else { - None - } - } - - #[inline] - #[must_use] - pub fn as_str(&self) -> &str { - match self.inline_string_len() { - Some(len) => { - // Get a pointer to the `inline` field of the union. - // SAFETY: - // Since `inline_string_len` returned `Some`, the `inline` field must be active. - let ptr = unsafe { addr_of!(self.repr.inline) } - as *const MaybeUninit - as *const u8; - - // Construct a byte slice from the pointer to the string data and the length. - // SAFETY: - // The first `len` bytes of `inline` are always initialised, as this is an - // invariant of `InliningString`. - let bytes = unsafe { slice::from_raw_parts(ptr, usize::from(len)) }; - - // Perform an unchecked conversion from the byte slice to a string slice. - // SAFETY: - // The first `len` bytes of `inline` is always valid UTF-8, as this is an invariant - // of `InliningString`. - unsafe { str::from_utf8_unchecked(bytes) } - }, - - None => { - // SAFETY: - // `inline_string_len` returned `None`, which means that the `boxed` field is - // active. `boxed` is properly aligned because it is stored at offset 0 of - // `InliningString` (since both `InliningString` and `Repr` use `repr(C)`), and the - // alignment of `InliningString` is equal to the alignment of `Box`. - let box_str = unsafe { &*addr_of!(self.repr.boxed) }; - - // SAFETY: - // `repr.boxed` is initialised, as the only time it's uninitialised is when it is - // briefly replaced with a temporary value before the `InliningString` is dropped - // in the `into_string` function. - unsafe { box_str.assume_init_ref() } - }, - } - } - - #[inline] - #[must_use] - pub fn as_str_mut(&mut self) -> &mut str { - match self.inline_string_len() { - Some(len) => { - // Get a pointer to the `inline` field of the union. - // SAFETY: - // Since `inline_string_len` returned `Some`, the `inline` field must be active. - let ptr = unsafe { addr_of_mut!(self.repr.inline) } - as *mut MaybeUninit - as *mut u8; - - // Construct a byte slice from the pointer to the string data and the length. - // SAFETY: - // The first `len` bytes of `inline` are always initialised, as this is an - // invariant of `InliningString`. - let bytes = unsafe { slice::from_raw_parts_mut(ptr, usize::from(len)) }; - - // Perform an unchecked conversion from the byte slice to a string slice. - // SAFETY: - // The first `len` bytes of `inline` is always valid UTF-8, as this is an invariant - // of `InliningString`. - unsafe { str::from_utf8_unchecked_mut(bytes) } - }, - - None => { - // SAFETY: - // `inline_string_len` returned `None`, which means that the `boxed` field is - // active. `boxed` is properly aligned because it is stored at offset 0 of - // `InliningString` (since both `InliningString` and `Repr` use `repr(C)`), and the - // alignment of `InliningString` is equal to the alignment of `Box`. - let box_str = unsafe { &mut *addr_of_mut!(self.repr.boxed) }; - - // SAFETY: - // `repr.boxed` is initialised, as the only time it's uninitialised is when it is - // briefly replaced with a temporary value before the `InliningString` is dropped - // in the `into_string` function. - unsafe { box_str.assume_init_mut() } - }, - } - } - - #[inline] - #[must_use] - pub fn into_boxed_str(self) -> Box { - match self.inline_string_len() { - Some(len) => { - // Get a pointer to the `inline` field of the union. - // SAFETY: - // Since `inline_string_len` returned `Some`, the `inline` field must be active. - let ptr = unsafe { addr_of!(self.repr.inline) } - as *const MaybeUninit - as *const u8; - - // Construct a byte slice from the pointer to the string data and the length. - // SAFETY: - // The first `len` bytes of `inline` are always initialised, as this is an - // invariant of `InliningString`. - let bytes = unsafe { slice::from_raw_parts(ptr, usize::from(len)) }; - - // Perform an unchecked conversion from the byte slice to a string slice. - // SAFETY: - // The first `len` bytes of `inline` is always valid UTF-8, as this is an invariant - // of `InliningString`. - let str_slice = unsafe { str::from_utf8_unchecked(bytes) }; - - Box::from(str_slice) - }, - - None => { - let manual_box_str = { - // Disable the destructor for `self`; we are transferring ownership of the - // allocated memory to the caller, so we don't want to run the destructor which - // would free the memory. - let mut this = ManuallyDrop::new(self); - - // SAFETY: - // `inline_string_len` returned `None`, which means that the `boxed` field is - // active. `boxed` is properly aligned because it is stored at offset 0 of - // `InliningString` (since both `InliningString` and `Repr` use `repr(C)`), and - // the alignment of `InliningString` is equal to the alignment of `Box`. - let field_ref = unsafe { &mut *addr_of_mut!(this.repr.boxed) }; - - // Move `repr.boxed` out of the `InliningString`, replacing it with - // uninitialised memory. This is sound because we have ownership of the - // `InliningString` and we will not be doing anything else with it after this - // which calls `assume_init` on `repr.boxed`; at the end of this block, the - // `InliningString` is dropped without calling its destructor. - mem::replace(field_ref, ManuallyDrop::new(MaybeUninit::uninit())) - }; - - // Re-enable the destructor for the boxed string. - let maybe_box_str = ManuallyDrop::into_inner(manual_box_str); - - // SAFETY: - // The boxed string is initialised, as we obtained it by moving `repr.boxed`, and - // the only time `repr.boxed` is uninitialised is when it is briefly replaced with - // a temporary value in the block above. - unsafe { maybe_box_str.assume_init() } - }, - } - } - - #[inline] - #[must_use] - pub fn into_string(self) -> String { - self.into_boxed_str() - .into_string() - } - - #[inline] - #[must_use] - pub fn heap_allocated(&self) -> bool { - self.inline_string_len().is_none() - } - - #[inline] - #[must_use] - pub fn len(&self) -> usize { - self.as_str().len() - } - - #[inline] - #[must_use] - pub fn is_empty(&self) -> bool { - self.as_str().is_empty() - } -} - -impl Drop for InliningString { - fn drop(&mut self) { - if self.heap_allocated() { - let boxed = unsafe { &mut *addr_of_mut!(self.repr.boxed) }; - - // SAFETY: - // Since this is a drop implementation, `boxed` will not be used again after this. - let _ = unsafe { ManuallyDrop::take(boxed).assume_init() }; - } - } -} - -impl Default for InliningString { - #[inline] - fn default() -> Self { - Self::empty() - } -} - -impl ops::Deref for InliningString { - type Target = str; - - #[inline] - fn deref(&self) -> &Self::Target { - self.as_str() - } -} - -impl ops::DerefMut for InliningString { - #[inline] - fn deref_mut(&mut self) -> &mut Self::Target { - self.as_str_mut() - } -} - -impl AsRef for InliningString { - #[inline] - fn as_ref(&self) -> &str { - self - } -} - -impl AsMut for InliningString { - #[inline] - fn as_mut(&mut self) -> &mut str { - self - } -} - -impl borrow::Borrow for InliningString { - #[inline] - fn borrow(&self) -> &str { - self - } -} - -impl borrow::BorrowMut for InliningString { - #[inline] - fn borrow_mut(&mut self) -> &mut str { - self - } -} - -impl str::FromStr for InliningString { - type Err = Infallible; - - #[inline] - fn from_str(s: &str) -> Result { - Ok(Self::new(s)) - } -} - -impl<'a, const N: usize> From<&'a str> for InliningString { - #[inline] - fn from(s: &'a str) -> Self { - Self::new(s) - } -} - -impl From for InliningString { - #[inline] - fn from(s: String) -> Self { - Self::new(s) - } -} - -impl From> for InliningString { - #[inline] - fn from(s: Box) -> Self { - Self::new(s) - } -} - -impl<'a, const N: usize> From> for InliningString { - #[inline] - fn from(s: Cow<'a, str>) -> Self { - Self::new(s) - } -} - -impl From> for String { - #[inline] - fn from(s: InliningString) -> Self { - s.into_string() - } -} - -impl From> for Box { - #[inline] - fn from(s: InliningString) -> Self { - s.into_boxed_str() - } -} - -impl From> for Rc { - #[inline] - fn from(s: InliningString) -> Self { - Rc::from(s.into_boxed_str()) - } -} - -impl From> for Arc { - #[inline] - fn from(s: InliningString) -> Self { - Arc::from(s.into_boxed_str()) - } -} - -impl PartialEq> for InliningString { - #[inline] - fn eq(&self, other: &InliningString) -> bool { - **self == **other - } -} - -impl Eq for InliningString {} - -impl PartialOrd> for InliningString { - #[inline] - fn partial_cmp(&self, other: &InliningString) -> Option { - (**self).partial_cmp(&**other) - } -} - -impl Ord for InliningString { - #[inline] - fn cmp(&self, other: &Self) -> Ordering { - (**self).cmp(&**other) - } -} - -impl Hash for InliningString { - #[inline] - fn hash(&self, state: &mut H) { - (**self).hash(state); - } -} - -impl fmt::Debug for InliningString { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(&**self, f) - } -} - -impl fmt::Display for InliningString { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(&**self, f) - } -} - -#[cfg(feature = "serde")] -impl serde::Serialize for InliningString { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer - { - serde::Serialize::serialize(&**self, serializer) - } -} - -#[cfg(feature = "serde")] -impl<'de, const N: usize> serde::Deserialize<'de> for InliningString { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de> - { - #[cfg(not(feature = "std"))] - use alloc::vec::Vec; - - use serde::de::{Error, Unexpected, Visitor}; - - struct InliningStringVisitor; - - impl<'de, const N: usize> Visitor<'de> for InliningStringVisitor { - type Value = InliningString; - - fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.write_str("a string") - } - - fn visit_str(self, v: &str) -> Result { - Ok(Self::Value::new(v)) - } - - fn visit_string(self, v: String) -> Result { - Ok(Self::Value::new(v)) - } - - fn visit_bytes(self, v: &[u8]) -> Result { - str::from_utf8(v) - .map(Self::Value::new) - .map_err(|_| Error::invalid_value(Unexpected::Bytes(v), &self)) - } - - fn visit_byte_buf(self, v: Vec) -> Result { - String::from_utf8(v) - .map(Self::Value::new) - .map_err(|err| { - Error::invalid_value(Unexpected::Bytes(&err.into_bytes()), &self) - }) - } - } - - deserializer.deserialize_string(InliningStringVisitor) - } -} - -#[cfg(test)] -mod tests { - #[cfg(not(feature = "std"))] - use alloc::{ - borrow::{Cow, ToOwned}, - vec::Vec, - }; - - #[cfg(feature = "std")] - use std::borrow::Cow; - - use super::*; - - #[test] - fn test_align() { - use core::mem::align_of; - assert_eq!(align_of::(), align_of::>()); - } - - #[test] - fn test_niche() { - use core::mem::size_of; - assert_eq!(size_of::(), size_of::>()); - } - - #[test] - fn test_empty() { - assert_eq!(InliningString23::empty().as_str(), ""); - assert_eq!(InliningString23::empty().len(), 0); - assert!(!InliningString23::empty().heap_allocated()); - } - - #[test] - fn test_new() { - let test_strings = [ - "", - "Hello", - "Somethingfortheweekend", - "Dichlorodifluoromethane", - "Electrocardiographically", - "こんにちは", - "❤️🧡💛💚💙💜", - ]; - - for s in test_strings { - let buf = s.to_owned(); - let borrowed = Cow::Borrowed(s); - let owned = Cow::<'static, str>::Owned(buf.clone()); - - assert_eq!(InliningString23::new(s).as_str(), s); - assert_eq!(InliningString23::new(buf).as_str(), s); - assert_eq!(InliningString23::new(borrowed).as_str(), s); - assert_eq!(InliningString23::new(owned).as_str(), s); - } - } - - #[test] - fn test_contiguous() { - let test_strings = [ - "", - "Hello", - "Somethingfortheweekend", - "Dichlorodifluoromethane", - "Electrocardiographically", - "こんにちは", - "❤️🧡💛💚💙💜", - ]; - - #[allow(clippy::needless_collect)] - let vec = test_strings - .iter() - .copied() - .map(InliningString23::new) - .collect::>(); - - for (i, s) in vec.into_iter().enumerate() { - assert_eq!(s.as_str(), test_strings[i]); - } - } - - #[test] - fn test_as_str_mut() { - let mut s1 = InliningString23::new("hello"); - s1.as_str_mut().make_ascii_uppercase(); - assert_eq!(s1.as_str(), "HELLO"); - - let mut s2 = InliningString23::new("the quick brown fox jumps over the lazy dog"); - s2.as_str_mut().make_ascii_uppercase(); - assert_eq!(s2.as_str(), "THE QUICK BROWN FOX JUMPS OVER THE LAZY DOG"); - } - - #[test] - fn test_into_string() { - let test_strings = [ - "".to_owned(), - "Hello".to_owned(), - "Somethingfortheweekend".to_owned(), - "Dichlorodifluoromethane".to_owned(), - "Electrocardiographically".to_owned(), - "こんにちは".to_owned(), - "❤️🧡💛💚💙💜".to_owned(), - ]; - - for s in test_strings { - assert_eq!(InliningString23::new(&*s).into_string(), s); - } - } - - #[test] - fn test_len() { - assert_eq!(InliningString23::new("").len(), 0); - assert_eq!(InliningString23::new("Hello").len(), 5); - assert_eq!(InliningString23::new("Somethingfortheweekend").len(), 22); - assert_eq!(InliningString23::new("Dichlorodifluoromethane").len(), 23); - assert_eq!(InliningString23::new("Electrocardiographically").len(), 24); - assert_eq!(InliningString23::new("こんにちは").len(), 15); - assert_eq!(InliningString23::new("❤️🧡💛💚💙💜").len(), 26); - } - - #[test] - fn test_heap_allocated() { - assert!(!InliningString23::new("").heap_allocated()); - assert!(!InliningString23::new("Hello").heap_allocated()); - assert!(!InliningString23::new("Somethingfortheweekend").heap_allocated()); - assert!(!InliningString23::new("Dichlorodifluoromethane").heap_allocated()); - assert!(!InliningString23::new("こんにちは").heap_allocated()); - - assert!(InliningString23::new("Electrocardiographically").heap_allocated()); - assert!(InliningString23::new("Squishedbuginsidethescreen").heap_allocated()); - assert!(InliningString23::new("❤️🧡💛💚💙💜").heap_allocated()); - } - - #[test] - fn test_zero_capacity() { - assert_eq!(InliningString::<0>::new("").as_str(), ""); - assert!(!InliningString::<0>::new("").heap_allocated()); - assert_eq!(InliningString::<0>::new("a").as_str(), "a"); - assert!(InliningString::<0>::new("a").heap_allocated()); - assert_eq!(InliningString::<0>::new("Hello").as_str(), "Hello"); - assert!(InliningString::<0>::new("Hello").heap_allocated()); - } -} diff --git a/src/strings/inlining.rs b/src/strings/inlining.rs index c854d0b..cede043 100644 --- a/src/strings/inlining.rs +++ b/src/strings/inlining.rs @@ -4,7 +4,11 @@ use core::{ convert::Infallible, fmt, hash::{Hash, Hasher}, + mem::{self, ManuallyDrop, MaybeUninit}, + num::NonZeroU8, ops, + ptr::{self, addr_of, addr_of_mut}, + slice, str, }; @@ -12,8 +16,8 @@ use core::{ use alloc::{ borrow::Cow, boxed::Box, - string::String, rc::Rc, + string::String, sync::Arc, }; @@ -24,65 +28,124 @@ use std::{ sync::Arc, }; -use super::CappedString; - -/// A non-growable string where strings 22 bytes or shorter are stored inline and longer strings -/// use a separate heap allocation. If maximum inline lengths other than 22 are desired, see the +/// A non-growable string where strings 23 bytes or shorter are stored inline and longer strings +/// use a separate heap allocation. If maximum inline lengths other than 23 are desired, see the /// more general [InliningString]. -/// -/// 22 bytes is chosen because it is optimal for 64-bit architectures; the minimum possible size +/// +/// 23 bytes is chosen because it is optimal for 64-bit architectures; the minimum possible size /// of the data structure on 64-bit architectures which always keeps the data properly aligned is /// 24 bytes (because, when heap-allocated, the data structure contains a 16-byte `Box<[u8]>` with /// 8-byte alignment and a 1-byte discriminant, and the greatest multiple of 8 which is ≥17 is 24), -/// and the inline variant needs to use 2 bytes for the length and disciminant. -pub type InliningString22 = InliningString<22>; +/// so there is space for 23 bytes of string data plus the 1-byte discriminant. +pub type InliningString23 = InliningString<23>; /// A non-growable string which stores small strings inline; strings of length less than or equal /// to `N` are stored inside the data structure itself, whereas strings of length greater than `N` /// use a separate heap allocation. -/// +/// /// This type is intended to be used when lots of small strings need to be stored, and these /// strings do not need to grow. -/// -/// For 64-bit targets, `N = 22` allows the greatest amount of inline string data to be stored -/// without exceeding the size of a regular [String]. Therefore, [InliningString22] is provided as -/// a type alias for `InliningString<22>`. -/// +/// +/// For 64-bit targets, `N = 23` allows the greatest amount of inline string data to be stored +/// without exceeding the size of a regular [String]. Therefore, [InliningString23] is provided as +/// a type alias for `InliningString<23>`. +/// /// Although `N` is a `usize`, it may be no greater than `u8::MAX`; larger values will result in a /// compile-time error. -/// +/// /// ``` /// # use libshire::strings::InliningString; -/// let s1 = InliningString::<22>::new("Hello, InliningString!"); -/// assert_eq!(&*s1, "Hello, InliningString!"); +/// let s1 = InliningString::<23>::new("This string is 23 bytes"); +/// assert_eq!(&*s1, "This string is 23 bytes"); /// assert!(!s1.heap_allocated()); -/// -/// let s2 = InliningString::<22>::new("This string is 23 bytes"); -/// assert_eq!(&*s2, "This string is 23 bytes"); +/// +/// let s2 = InliningString::<23>::new("and this one is 24 bytes"); +/// assert_eq!(&*s2, "and this one is 24 bytes"); /// assert!(s2.heap_allocated()); /// ``` -#[derive(Clone)] -pub struct InliningString(Repr); +#[repr(C)] +pub struct InliningString { + repr: Repr, + // When `len - 1` is less than or equal to `MAX_LEN`, `repr.inline` is active and the first + // `len - 1` bytes of `repr.inline` contains initialised, valid UTF-8 data. When `len - 1` is + // greater than `MAX_LEN`, `repr.boxed` is active. `NonZeroU8` is used to allow for the niche + // optimisation (https://rust-lang.github.io/unsafe-code-guidelines/glossary.html#niche). + len: NonZeroU8, + // A zero-sized field to ensure that `InliningString` has an alignment equal to the alignment + // of `Box`, to ensure that `repr.boxed` is properly aligned when it is active. + _align: [Box; 0], +} + +// `repr(C)` is necessary to ensure that both of the fields start at offset 0. `repr(packed)` +// reduces the alignment to 1, which allows `InliningString` to be more compact. +#[repr(C, packed)] +union Repr { + inline: [MaybeUninit; N], + boxed: ManuallyDrop>>, +} impl InliningString { + const MAX_LEN: u8 = { + #[allow(clippy::cast_possible_truncation, clippy::checked_conversions)] + // `MAX_LEN` may be no larger than `u8::MAX - 2` to leave at least one bit pattern to + // represent the "boxed" case and at least one bit pattern for the niche optimisation. + if N <= (u8::MAX - 2) as usize { + N as u8 + } else { + panic!("`N` must be no greater than `u8::MAX - 2`") + } + }; + /// Creates a new `InliningString` from the given string, storing the string data inline if /// possible or creating a new heap allocation otherwise. /// /// ``` /// # use libshire::strings::InliningString; - /// let s = InliningString::<22>::new("Hello, InliningString!"); + /// let s = InliningString::<23>::new("Hello, InliningString!"); /// assert_eq!(&*s, "Hello, InliningString!"); /// ``` - #[inline] #[must_use] pub fn new(s: S) -> Self where S: AsRef, Box: From, { - match CappedString::new(&s) { - Ok(buf) => Self(Repr::Inline(buf)), - Err(_) => Self(Repr::Boxed(Box::::from(s))), + let src = s.as_ref().as_bytes(); + + match u8::try_from(src.len()) { + Ok(len) if len <= Self::MAX_LEN => { + unsafe { + // SAFETY: + // `MaybeUninit::uninit()` is a valid value for `[MaybeUninit; N]`, since + // each element of the array is allowed to be uninitialised. + let mut buf = MaybeUninit::<[MaybeUninit; N]>::uninit() + .assume_init(); + + // Cast the byte slice to a `MaybeUninit` pointer. This is valid because + // `u8` has the same memory layout as `MaybeUninit`. + let src_ptr = src.as_ptr() as *const MaybeUninit; + + // Copy the string data provided by the caller into the buffer. + // SAFETY: + // The source is valid because the source and length are both taken from a + // valid `&[u8]`. We have already checked in the match statement that there is + // enough space in the buffer to fit the string data (i.e. `len` is less than + // or equal to `MAX_LEN`, which is equal to `N`), so the destination is valid. + // The source and destination are trivially properly aligned because the + // alignment of `MaybeUninit` is 1. The source and destination do not + // overlap; the destination buffer is a new variable completely separate from + // the source data. + ptr::copy_nonoverlapping(src_ptr, buf.as_mut_ptr(), usize::from(len)); + + // SAFETY: + // The first `len` bytes of `buf` are copied from a `&str`, so the first `len` + // bytes are valid UTF-8. We have already checked that `len` is thess than or + // equal to `Self::MAX_LEN`. + Self::inline_from_raw_parts(buf, len) + } + }, + + _ => Self::new_boxed(s), } } @@ -90,105 +153,278 @@ impl InliningString { /// /// ``` /// # use libshire::strings::InliningString; - /// let s = InliningString::<22>::empty(); + /// let s = InliningString::<23>::empty(); /// assert_eq!(&*s, ""); /// ``` #[inline] #[must_use] - pub const fn empty() -> Self { - Self(Repr::Inline(CappedString::empty())) + pub fn empty() -> Self { + unsafe { + // SAFETY: + // `MaybeUninit::uninit()` is a valid value for `[MaybeUninit; N]`, since each + // element of the array is allowed to be uninitialised. + let buf = MaybeUninit::<[MaybeUninit; N]>::uninit() + .assume_init(); + + // SAFETY: + // `len` is 0, so the contract that the first `len` bytes of `buf` are initialised and + // valid UTF-8 is trivially upheld. + Self::inline_from_raw_parts(buf, 0) + } + } + + /// # Safety + /// The first `len` bytes of `buf` must be initialised and valid UTF-8. `len` must be less than + /// or equal to `Self::MAX_LEN` (which is equal to `N`). + #[inline] + unsafe fn inline_from_raw_parts(buf: [MaybeUninit; N], len: u8) -> Self { + // SAFETY: + // The caller is responsible for ensuring that `len` is less than or equal to + // `Self::MAX_LEN`, which is no greater than `u8::MAX - 2`. If this contract is upheld, + // `len + 1` can never overflow, so `len + 1` can never be zero. + let len = NonZeroU8::new_unchecked(len + 1); + + Self { + repr: Repr { inline: buf }, + len, + _align: [], + } + } + + #[inline] + fn new_boxed(s: S) -> Self + where + Box: From, + { + const U8_NONZERO_MAX: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(u8::MAX) }; + + Self { + repr: Repr { + boxed: ManuallyDrop::new(MaybeUninit::new(Box::from(s))), + }, + len: U8_NONZERO_MAX, + _align: [], + } + } + + /// If the `inline` field is active, returns the length of the inline string data. If the + /// `boxed` field is active, returns `None`. + #[inline(always)] + fn inline_string_len(&self) -> Option { + let len = self.len.get() - 1; + if len <= Self::MAX_LEN { + Some(len) + } else { + None + } } - /// Returns a string slice for the underlying string data. #[inline] #[must_use] pub fn as_str(&self) -> &str { - match self { - Self(Repr::Inline(buf)) => buf, - Self(Repr::Boxed(buf)) => buf, + match self.inline_string_len() { + Some(len) => { + // Get a pointer to the `inline` field of the union. + // SAFETY: + // Since `inline_string_len` returned `Some`, the `inline` field must be active. + let ptr = unsafe { addr_of!(self.repr.inline) } + as *const MaybeUninit + as *const u8; + + // Construct a byte slice from the pointer to the string data and the length. + // SAFETY: + // The first `len` bytes of `inline` are always initialised, as this is an + // invariant of `InliningString`. + let bytes = unsafe { slice::from_raw_parts(ptr, usize::from(len)) }; + + // Perform an unchecked conversion from the byte slice to a string slice. + // SAFETY: + // The first `len` bytes of `inline` is always valid UTF-8, as this is an invariant + // of `InliningString`. + unsafe { str::from_utf8_unchecked(bytes) } + }, + + None => { + // SAFETY: + // `inline_string_len` returned `None`, which means that the `boxed` field is + // active. `boxed` is properly aligned because it is stored at offset 0 of + // `InliningString` (since both `InliningString` and `Repr` use `repr(C)`), and the + // alignment of `InliningString` is equal to the alignment of `Box`. + let box_str = unsafe { &*addr_of!(self.repr.boxed) }; + + // SAFETY: + // `repr.boxed` is initialised, as the only time it's uninitialised is when it is + // briefly replaced with a temporary value before the `InliningString` is dropped + // in the `into_string` function. + unsafe { box_str.assume_init_ref() } + }, } } - /// Returns a mutable string slice for the underlying string data. #[inline] #[must_use] pub fn as_str_mut(&mut self) -> &mut str { - match self { - Self(Repr::Inline(buf)) => buf, - Self(Repr::Boxed(buf)) => buf, + match self.inline_string_len() { + Some(len) => { + // Get a pointer to the `inline` field of the union. + // SAFETY: + // Since `inline_string_len` returned `Some`, the `inline` field must be active. + let ptr = unsafe { addr_of_mut!(self.repr.inline) } + as *mut MaybeUninit + as *mut u8; + + // Construct a byte slice from the pointer to the string data and the length. + // SAFETY: + // The first `len` bytes of `inline` are always initialised, as this is an + // invariant of `InliningString`. + let bytes = unsafe { slice::from_raw_parts_mut(ptr, usize::from(len)) }; + + // Perform an unchecked conversion from the byte slice to a string slice. + // SAFETY: + // The first `len` bytes of `inline` is always valid UTF-8, as this is an invariant + // of `InliningString`. + unsafe { str::from_utf8_unchecked_mut(bytes) } + }, + + None => { + // SAFETY: + // `inline_string_len` returned `None`, which means that the `boxed` field is + // active. `boxed` is properly aligned because it is stored at offset 0 of + // `InliningString` (since both `InliningString` and `Repr` use `repr(C)`), and the + // alignment of `InliningString` is equal to the alignment of `Box`. + let box_str = unsafe { &mut *addr_of_mut!(self.repr.boxed) }; + + // SAFETY: + // `repr.boxed` is initialised, as the only time it's uninitialised is when it is + // briefly replaced with a temporary value before the `InliningString` is dropped + // in the `into_string` function. + unsafe { box_str.assume_init_mut() } + }, } } #[inline] #[must_use] pub fn into_boxed_str(self) -> Box { - match self { - Self(Repr::Inline(buf)) => buf.into_boxed_str(), - Self(Repr::Boxed(buf)) => buf, + match self.inline_string_len() { + Some(len) => { + // Get a pointer to the `inline` field of the union. + // SAFETY: + // Since `inline_string_len` returned `Some`, the `inline` field must be active. + let ptr = unsafe { addr_of!(self.repr.inline) } + as *const MaybeUninit + as *const u8; + + // Construct a byte slice from the pointer to the string data and the length. + // SAFETY: + // The first `len` bytes of `inline` are always initialised, as this is an + // invariant of `InliningString`. + let bytes = unsafe { slice::from_raw_parts(ptr, usize::from(len)) }; + + // Perform an unchecked conversion from the byte slice to a string slice. + // SAFETY: + // The first `len` bytes of `inline` is always valid UTF-8, as this is an invariant + // of `InliningString`. + let str_slice = unsafe { str::from_utf8_unchecked(bytes) }; + + Box::from(str_slice) + }, + + None => { + let manual_box_str = { + // Disable the destructor for `self`; we are transferring ownership of the + // allocated memory to the caller, so we don't want to run the destructor which + // would free the memory. + let mut this = ManuallyDrop::new(self); + + // SAFETY: + // `inline_string_len` returned `None`, which means that the `boxed` field is + // active. `boxed` is properly aligned because it is stored at offset 0 of + // `InliningString` (since both `InliningString` and `Repr` use `repr(C)`), and + // the alignment of `InliningString` is equal to the alignment of `Box`. + let field_ref = unsafe { &mut *addr_of_mut!(this.repr.boxed) }; + + // Move `repr.boxed` out of the `InliningString`, replacing it with + // uninitialised memory. This is sound because we have ownership of the + // `InliningString` and we will not be doing anything else with it after this + // which calls `assume_init` on `repr.boxed`; at the end of this block, the + // `InliningString` is dropped without calling its destructor. + mem::replace(field_ref, ManuallyDrop::new(MaybeUninit::uninit())) + }; + + // Re-enable the destructor for the boxed string. + let maybe_box_str = ManuallyDrop::into_inner(manual_box_str); + + // SAFETY: + // The boxed string is initialised, as we obtained it by moving `repr.boxed`, and + // the only time `repr.boxed` is uninitialised is when it is briefly replaced with + // a temporary value in the block above. + unsafe { maybe_box_str.assume_init() } + }, } } - /// Consumes the `InliningString` and converts it to a heap-allocated `String`. #[inline] #[must_use] pub fn into_string(self) -> String { - match self { - Self(Repr::Inline(buf)) => buf.into_string(), - Self(Repr::Boxed(buf)) => buf.into_string(), - } + self.into_boxed_str() + .into_string() + } + + /// Returns `true` if and only if the string data uses a separate heap allocation. + /// + /// ``` + /// # use libshire::strings::InliningString; + /// let s1 = InliningString::<23>::new("This string is 23 bytes"); + /// assert!(!s1.heap_allocated()); + /// + /// let s2 = InliningString::<23>::new("and this one is 24 bytes"); + /// assert!(s2.heap_allocated()); + /// ``` + #[inline] + #[must_use] + pub fn heap_allocated(&self) -> bool { + self.inline_string_len().is_none() } /// Returns the length of the string in bytes. /// /// ``` /// # use libshire::strings::InliningString; - /// let s = InliningString::<22>::new("こんにちは"); + /// let s = InliningString::<23>::new("こんにちは"); /// assert_eq!(s.len(), 15); /// ``` #[inline] #[must_use] pub fn len(&self) -> usize { - match self { - Self(Repr::Inline(buf)) => buf.len(), - Self(Repr::Boxed(buf)) => buf.len(), - } + self.as_str().len() } /// Returns `true` if the string has length 0. /// /// ``` /// # use libshire::strings::InliningString; - /// let s1 = InliningString::<22>::new(""); + /// let s1 = InliningString::<23>::new(""); /// assert!(s1.is_empty()); /// - /// let s2 = InliningString::<22>::new("Hello"); + /// let s2 = InliningString::<23>::new("Hello"); /// assert!(!s2.is_empty()); /// ``` #[inline] #[must_use] pub fn is_empty(&self) -> bool { - match self { - Self(Repr::Inline(buf)) => buf.is_empty(), - Self(Repr::Boxed(buf)) => buf.is_empty(), - } + self.as_str().is_empty() } +} - /// Returns `true` if the string data is stored on the heap, and `false` otherwise. - /// - /// ``` - /// # use libshire::strings::InliningString; - /// let s1 = InliningString::<22>::new("This string's 22 bytes"); - /// assert!(!s1.heap_allocated()); - /// - /// let s2 = InliningString::<22>::new("This string is 23 bytes"); - /// assert!(s2.heap_allocated()); - /// ``` - #[inline] - #[must_use] - pub fn heap_allocated(&self) -> bool { - match self { - Self(Repr::Inline(_)) => false, - Self(Repr::Boxed(_)) => true, +impl Drop for InliningString { + fn drop(&mut self) { + if self.heap_allocated() { + let boxed = unsafe { &mut *addr_of_mut!(self.repr.boxed) }; + + // SAFETY: + // Since this is a drop implementation, `boxed` will not be used again after this. + let _ = unsafe { ManuallyDrop::take(boxed).assume_init() }; } } } @@ -244,6 +480,15 @@ impl borrow::BorrowMut for InliningString { } } +impl str::FromStr for InliningString { + type Err = Infallible; + + #[inline] + fn from_str(s: &str) -> Result { + Ok(Self::new(s)) + } +} + impl<'a, const N: usize> From<&'a str> for InliningString { #[inline] fn from(s: &'a str) -> Self { @@ -330,25 +575,14 @@ impl Hash for InliningString { } } -impl str::FromStr for InliningString { - type Err = Infallible; - - #[inline] - fn from_str(s: &str) -> Result { - Ok(Self::new(s)) - } -} - impl fmt::Debug for InliningString { - #[inline] - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Debug::fmt(&**self, f) } } impl fmt::Display for InliningString { - #[inline] - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(&**self, f) } } @@ -357,7 +591,7 @@ impl fmt::Display for InliningString { impl serde::Serialize for InliningString { fn serialize(&self, serializer: S) -> Result where - S: serde::Serializer, + S: serde::Serializer { serde::Serialize::serialize(&**self, serializer) } @@ -367,7 +601,7 @@ impl serde::Serialize for InliningString { impl<'de, const N: usize> serde::Deserialize<'de> for InliningString { fn deserialize(deserializer: D) -> Result where - D: serde::Deserializer<'de>, + D: serde::Deserializer<'de> { #[cfg(not(feature = "std"))] use alloc::vec::Vec; @@ -410,21 +644,37 @@ impl<'de, const N: usize> serde::Deserialize<'de> for InliningString { } } -#[derive(Clone)] -enum Repr { - Inline(CappedString), - Boxed(Box), -} - #[cfg(test)] mod tests { #[cfg(not(feature = "std"))] - use alloc::borrow::{Cow, ToOwned}; + use alloc::{ + borrow::{Cow, ToOwned}, + vec::Vec, + }; #[cfg(feature = "std")] use std::borrow::Cow; - use super::{InliningString, InliningString22}; + use super::*; + + #[test] + fn test_align() { + use core::mem::align_of; + assert_eq!(align_of::(), align_of::>()); + } + + #[test] + fn test_niche() { + use core::mem::size_of; + assert_eq!(size_of::(), size_of::>()); + } + + #[test] + fn test_empty() { + assert_eq!(InliningString23::empty().as_str(), ""); + assert_eq!(InliningString23::empty().len(), 0); + assert!(!InliningString23::empty().heap_allocated()); + } #[test] fn test_new() { @@ -433,6 +683,7 @@ mod tests { "Hello", "Somethingfortheweekend", "Dichlorodifluoromethane", + "Electrocardiographically", "こんにちは", "❤️🧡💛💚💙💜", ]; @@ -442,44 +693,87 @@ mod tests { let borrowed = Cow::Borrowed(s); let owned = Cow::<'static, str>::Owned(buf.clone()); - assert_eq!(InliningString22::new(s).as_str(), s); - assert_eq!(InliningString22::new(buf).as_str(), s); - assert_eq!(InliningString22::new(borrowed).as_str(), s); - assert_eq!(InliningString22::new(owned).as_str(), s); + assert_eq!(InliningString23::new(s).as_str(), s); + assert_eq!(InliningString23::new(buf).as_str(), s); + assert_eq!(InliningString23::new(borrowed).as_str(), s); + assert_eq!(InliningString23::new(owned).as_str(), s); + } + } + + #[test] + fn test_contiguous() { + let test_strings = [ + "", + "Hello", + "Somethingfortheweekend", + "Dichlorodifluoromethane", + "Electrocardiographically", + "こんにちは", + "❤️🧡💛💚💙💜", + ]; + + #[allow(clippy::needless_collect)] + let vec = test_strings + .iter() + .copied() + .map(InliningString23::new) + .collect::>(); + + for (i, s) in vec.into_iter().enumerate() { + assert_eq!(s.as_str(), test_strings[i]); } } #[test] fn test_as_str_mut() { - let mut s1 = InliningString22::new("hello"); + let mut s1 = InliningString23::new("hello"); s1.as_str_mut().make_ascii_uppercase(); assert_eq!(s1.as_str(), "HELLO"); - let mut s2 = InliningString22::new("the quick brown fox jumps over the lazy dog"); + let mut s2 = InliningString23::new("the quick brown fox jumps over the lazy dog"); s2.as_str_mut().make_ascii_uppercase(); assert_eq!(s2.as_str(), "THE QUICK BROWN FOX JUMPS OVER THE LAZY DOG"); } + #[test] + fn test_into_string() { + let test_strings = [ + "".to_owned(), + "Hello".to_owned(), + "Somethingfortheweekend".to_owned(), + "Dichlorodifluoromethane".to_owned(), + "Electrocardiographically".to_owned(), + "こんにちは".to_owned(), + "❤️🧡💛💚💙💜".to_owned(), + ]; + + for s in test_strings { + assert_eq!(InliningString23::new(&*s).into_string(), s); + } + } + #[test] fn test_len() { - assert_eq!(InliningString22::new("").len(), 0); - assert_eq!(InliningString22::new("Hello").len(), 5); - assert_eq!(InliningString22::new("Somethingfortheweekend").len(), 22); - assert_eq!(InliningString22::new("Dichlorodifluoromethane").len(), 23); - assert_eq!(InliningString22::new("こんにちは").len(), 15); - assert_eq!(InliningString22::new("❤️🧡💛💚💙💜").len(), 26); + assert_eq!(InliningString23::new("").len(), 0); + assert_eq!(InliningString23::new("Hello").len(), 5); + assert_eq!(InliningString23::new("Somethingfortheweekend").len(), 22); + assert_eq!(InliningString23::new("Dichlorodifluoromethane").len(), 23); + assert_eq!(InliningString23::new("Electrocardiographically").len(), 24); + assert_eq!(InliningString23::new("こんにちは").len(), 15); + assert_eq!(InliningString23::new("❤️🧡💛💚💙💜").len(), 26); } #[test] fn test_heap_allocated() { - assert!(!InliningString22::new("").heap_allocated()); - assert!(!InliningString22::new("Hello").heap_allocated()); - assert!(!InliningString22::new("Somethingfortheweekend").heap_allocated()); - assert!(!InliningString22::new("こんにちは").heap_allocated()); - - assert!(InliningString22::new("Dichlorodifluoromethane").heap_allocated()); - assert!(InliningString22::new("Squishedbuginsidethescreen").heap_allocated()); - assert!(InliningString22::new("❤️🧡💛💚💙💜").heap_allocated()); + assert!(!InliningString23::new("").heap_allocated()); + assert!(!InliningString23::new("Hello").heap_allocated()); + assert!(!InliningString23::new("Somethingfortheweekend").heap_allocated()); + assert!(!InliningString23::new("Dichlorodifluoromethane").heap_allocated()); + assert!(!InliningString23::new("こんにちは").heap_allocated()); + + assert!(InliningString23::new("Electrocardiographically").heap_allocated()); + assert!(InliningString23::new("Squishedbuginsidethescreen").heap_allocated()); + assert!(InliningString23::new("❤️🧡💛💚💙💜").heap_allocated()); } #[test] diff --git a/src/strings/mod.rs b/src/strings/mod.rs index 92aed64..30aacbc 100644 --- a/src/strings/mod.rs +++ b/src/strings/mod.rs @@ -1,5 +1,3 @@ -#[cfg(any(feature = "alloc", feature = "std"))] -pub mod experimental; pub mod fixed; pub mod capped; #[cfg(any(feature = "alloc", feature = "std"))] @@ -8,4 +6,4 @@ pub mod inlining; pub use fixed::{FixedString, Error as FixedStringError}; pub use capped::{CappedString, Error as CappedStringError}; #[cfg(any(feature = "alloc", feature = "std"))] -pub use inlining::{InliningString, InliningString22}; +pub use inlining::{InliningString, InliningString23}; diff --git a/test.sh b/test.sh index b4e0d3a..3158430 100755 --- a/test.sh +++ b/test.sh @@ -2,14 +2,17 @@ set -e set -o pipefail +# no_std and no alloc cargo +nightly miri test --no-default-features --features serde +# no_std with alloc cargo +nightly miri test --no-default-features --features alloc,serde -cargo +nightly miri test +# std +cargo +nightly miri test --features serde # 32-bit target -cargo +nightly miri test --target sparc-unknown-linux-gnu +cargo +nightly miri test --target sparc-unknown-linux-gnu --features serde # Big-endian target -cargo +nightly miri test --target mips64-unknown-linux-gnuabi64 +cargo +nightly miri test --target mips64-unknown-linux-gnuabi64 --features serde