use core::{ borrow, cmp::Ordering, fmt, hash::{Hash, Hasher}, mem::MaybeUninit, ops, ptr, str, }; #[cfg(not(feature = "std"))] use core::convert::TryFrom; #[cfg(all(feature = "alloc", not(feature = "std")))] use alloc::{ borrow::{Cow, ToOwned}, boxed::Box, string::String, }; #[cfg(feature = "std")] use std::borrow::Cow; #[derive(Debug)] pub struct CapacityError; impl fmt::Display for CapacityError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "`CappedString` capacity exceeded") } } #[cfg(feature = "std")] impl std::error::Error for CapacityError {} /// A string type which stores at most `N` bytes of string data. The string data is stored inline /// rather than using a heap allocation. /// /// ``` /// # use libshire::strings::CappedString; /// # fn main() -> Result<(), libshire::strings::capped::Error> { /// let s = CappedString::<16>::new("hello world")?; /// assert_eq!(&*s, "hello world"); /// # Ok(()) /// # } /// ``` #[derive(Clone)] pub struct CappedString { buf: [MaybeUninit; N], len: u8, } impl CappedString { const MAX_LEN: u8 = { #[allow(clippy::cast_possible_truncation, clippy::checked_conversions)] if N <= u8::MAX as usize { N as u8 } else { panic!("`N` must be within the bounds of `u8`") } }; /// Creates a new `CappedString` from a given byte buffer and length. /// /// # Safety /// The first `len` bytes of `buf` (i.e. `buf[..len]`) must be initialised and valid UTF-8. /// `len` must be less than or equal to `N`. #[inline] #[must_use] pub const unsafe fn from_raw_parts(buf: [MaybeUninit; N], len: u8) -> Self { Self { buf, len } } #[inline] #[must_use] pub const fn into_raw_parts(self) -> ([MaybeUninit; N], u8) { (self.buf, self.len) } /// # Safety /// `src` must point to `len` bytes of valid, UTF-8 string data. `len` must be less than or /// equal to `N`. #[inline] unsafe fn from_raw_ptr(src: *const u8, len: u8) -> Self { // `u8` has the same memory layout as `MaybeUninit`, so this cast is valid. let src = src as *const MaybeUninit; // SAFETY: // `MaybeUninit::uninit()` is a valid value for `[MaybeUninit; N]`, since each element // of the array is allowed to be uninitialised. let mut buf = unsafe { MaybeUninit::<[MaybeUninit; N]>::uninit().assume_init() }; // SAFETY: // The caller is responsible for ensuring that `src` points to a valid string, which means // that it must not overlap with the new local variable `buf`. The caller is responsible // for ensuring that `src` is valid for reads of `len` bytes. The caller is responsible for // ensuring that `len <= N`, so `buf` is valid for writes of `len` bytes. `src` and `buf` // are both trivially properly aligned, since they both have an alignment of 1. unsafe { ptr::copy_nonoverlapping(src, buf.as_mut_ptr(), usize::from(len)); } // SAFETY: // The caller is responsible for ensuring that `src` points to `len` bytes of valid UTF-8 // data. `src` is copied into the start of `buf`, so the first `len` bytes of `buf` are // valid UTF-8. The caller is responsible for ensuring that `len <= N`. unsafe { Self::from_raw_parts(buf, len) } } /// # Safety /// `self.len` must be less than `N`, so that there is space in the buffer to append the byte. #[inline] unsafe fn append_byte(&mut self, byte: u8) { // SAFETY: // The caller is responsible for ensuring that `self.len < N`. let dst = unsafe { self.buf.get_unchecked_mut(usize::from(self.len)) }; *dst = MaybeUninit::new(byte); self.len += 1; } /// # Safety /// `src` must point to `len` bytes of valid UTF-8 string data. `len` must be less than or equal /// to `N - self.len`. #[inline] unsafe fn append_bytes(&mut self, src: *const u8, len: u8) { // `u8` has the same memory layout as `MaybeUninit`, so this cast is valid. let src = src as *const MaybeUninit; // SAFETY: // `self.len <= N` is an invariant of `CappedString`, so `self.len..` is a valid range over // `self.buf`. let dst = unsafe { self.buf.get_unchecked_mut(usize::from(self.len)..) }; // SAFETY: // The caller is responsible for ensuring that `src` points to a valid string, which means // that it cannot overlap with the new local variable `buf`. The caller is responsible for // ensuring that `src` is valid for reads of `len` bytes. The caller is responsible for // ensuring that `len <= N - self.len`, so the destination `dst = self.buf[self.len..]` is // valid for writes of `len` bytes. `src` and `dst` are both trivially properly aligned, // since they both have an alignment of 1. unsafe { ptr::copy_nonoverlapping(src, dst.as_mut_ptr(), usize::from(len)); } self.len += len; } /// Returns a new empty `CappedString`. /// /// ``` /// # use libshire::strings::CappedString; /// let s = CappedString::<8>::empty(); /// assert!(s.is_empty()); /// assert_eq!(s.len(), 0); /// assert_eq!(&*s, ""); /// ``` #[inline] #[must_use] pub const fn empty() -> Self { // SAFETY: // `MaybeUninit::uninit()` is a valid value for `[MaybeUninit; N]`, since each element // of the array is allowed to be uninitialised. let buf = unsafe { MaybeUninit::<[MaybeUninit; N]>::uninit().assume_init() }; // SAFETY: // It is vacuously true that the first 0 bytes of the buffer are initialised and valid // UTF-8. unsafe { Self::from_raw_parts(buf, 0) } } /// Returns a new `CappedString` containing the given string data. The string data will be /// stored inline; no heap allocation is used. An error will be returned if the length of the /// provided string exceeds the `CappedString`'s maximum length, `N`. /// /// ``` /// # use libshire::strings::CappedString; /// # fn main() -> Result<(), libshire::strings::capped::Error> { /// let s = CappedString::<16>::new("hello world")?; /// assert_eq!(&*s, "hello world"); /// # Ok(()) /// # } /// ``` #[inline] pub fn new(src: &S) -> Result where S: AsRef + ?Sized, { // Convert the string to a byte slice, which is guaranteed to be valid UTF-8 since this is // an invariant of `str`. let src = >::as_ref(src); // If the length of the `src` string does not fit into a `u8` or is greater than // `Self::MAX_LEN`, we can't fit it into the new `CappedString` so return an error. let len = match u8::try_from(src.len()) { Ok(len) if len <= Self::MAX_LEN => len, _ => return Err(CapacityError), }; // SAFETY: // `src.as_ptr()` points to `len` bytes of valid UTF-8 string data since `src` is a `&str` // and `len` is its length. `len` is less than or equal to `Self::MAX_LEN`, which is equal // to `N`. unsafe { Ok(Self::from_raw_ptr(src.as_ptr(), len)) } } #[inline] #[must_use] pub fn new_truncating(src: &S) -> Self where S: AsRef + ?Sized, { let src = >::as_ref(src); let (src, len) = truncate_str(src, Self::MAX_LEN); // SAFETY: // It is part of the contract of `truncate_str` that it returns a pointer to a valid UTF-8 // string of length `len`, and that `len` is less than or equal to the provided maximum // length, which is `Self::MAX_LEN` (which is equal to `N`) in this case. unsafe { Self::from_raw_ptr(src, len) } } /// Appends the given character to the end of this `CappedString`, returning an error if there /// is insufficient capacity remaining to do so. /// /// If you do not care whether or not the append succeeds, see [`Self::push_truncating`]. #[inline] pub fn push(&mut self, c: char) -> Result<(), CapacityError> { let mut char_buf = [0u8; 4]; let encoded = c.encode_utf8(&mut char_buf); match encoded.len() { 1 => { if self.len == Self::MAX_LEN { return Err(CapacityError); } // SAFETY: // unsafe { self.append_byte(encoded.as_bytes()[0]) } Ok(()) }, _ => self.push_str(encoded), } } /// Appends the given character to the end of this `CappedString`, failing silently if there is /// insufficient capacity remaining to do so. /// /// If you would like to know whether or not the append succeeds, see [`Self::push`]. #[inline] pub fn push_truncating(&mut self, c: char) { // Unlike `Self::push_str_truncating`, we can just use `Self::push` and swallow the error // because a single character will never be partially pushed; it is either pushed or it // isn't. self.push(c).ok(); } /// Appends the given string slice to the end of this `CappedString`, returning an error if /// there is insufficient capacity remaining to do so. /// /// If you would like a version which cannot fail, see [`Self::push_str_truncating`]. #[inline] pub fn push_str(&mut self, src: &S) -> Result<(), CapacityError> where S: AsRef + ?Sized, { let src = >::as_ref(src); let len = match u8::try_from(src.len()) { Ok(len) if len <= Self::MAX_LEN - self.len => len, _ => return Err(CapacityError), }; // SAFETY: // `src` is a valid string slice with length `len`. We have checked that // `len <= N - self.len` holds above (note that `Self::MAX_LEN == N`). unsafe { self.append_bytes(src.as_ptr(), len); } Ok(()) } /// Appends as many of the characters of the given string slice to the end of this /// `CappedString` as can fit. Any remaining characters will not be added. /// /// If you would like a version which returns an error if there is not enough capacity remaining /// to append the entire string slice, see [`Self::push_str`]. #[inline] pub fn push_str_truncating(&mut self, src: &S) where S: AsRef + ?Sized, { let remaining_cap = Self::MAX_LEN - self.len; // Short-circuit if we have no space left to copy into. if remaining_cap == 0 { return; } let src = >::as_ref(src); // Find the longest valid UTF-8 prefix which fits into the remaining space. let (src, len) = truncate_str(src, remaining_cap); // SAFETY: // `truncate_str` returns a pointer to `len` bytes of valid UTF-8 string data. The returned // `len` will always be less than or equal to `remaining_cap`, which is equal to // `N - self.len` (note that `Self::MAX_LEN == N`). unsafe { self.append_bytes(src, len); } } /// Returns a string slice pointing to the underlying string data. #[inline] #[must_use] pub fn as_str(&self) -> &str { // SAFETY: // The first `self.len` bytes of `self.buf` (which is returned by `Self::as_bytes`) being // valid UTF-8 is an invariant of `CappedString`. unsafe { str::from_utf8_unchecked(self.as_bytes()) } } /// Returns a mutable string slice pointing to the underlying string data. #[inline] #[must_use] pub fn as_str_mut(&mut self) -> &mut str { // SAFETY: // The first `self.len` bytes of `self.buf` (which is returned by `Self::as_bytes_mut`) // being valid UTF-8 is an invariant of `CappedString`. unsafe { str::from_utf8_unchecked_mut(self.as_bytes_mut()) } } #[inline] #[must_use] pub fn as_bytes(&self) -> &[u8] { // Get the slice of the buffer containing initialised string data. // SAFETY: // It is an invariant of `CappedString` that `self.len <= N`, so `..self.len` is a valid // range over `self.buf`. let data_slice = unsafe { self.buf.get_unchecked(..usize::from(self.len)) }; // Convert the `&[MaybeUninit]` to a `&[u8]`. // SAFETY: // `MaybeUninit` has the same memory layout as `u8`, and the first `self.len` bytes of // the buffer are initialised, so this conversion is valid. unsafe { &*(data_slice as *const [MaybeUninit] as *const [u8]) } } /// # Safety /// The caller is responsible for ensuring that the slice is valid UTF-8 when the mutable /// borrow ends. #[inline] #[must_use] pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] { // Get the slice of the buffer containing initialised string data. // SAFETY: // It is an invariant of `CappedString` that `self.len <= N`, so `..self.len` is a valid // range over `self.buf`. let data_slice = unsafe { self.buf.get_unchecked_mut(..usize::from(self.len)) }; // Convert the `&[MaybeUninit]` to a `&[u8]`. // SAFETY: // `MaybeUninit` has the same memory layout as `u8`, and the first `self.len` bytes of // the buffer are initialised, so this conversion is valid. unsafe { &mut *(data_slice as *mut [MaybeUninit] as *mut [u8]) } } #[inline] #[must_use] pub fn len(&self) -> usize { usize::from(self.len) } #[inline] #[must_use] pub fn is_empty(&self) -> bool { self.len == 0 } } #[cfg(feature = "alloc")] impl CappedString { #[inline] #[must_use] pub fn into_boxed_str(self) -> Box { self.as_str().into() } #[inline] #[must_use] pub fn into_string(self) -> String { self.as_str().to_owned() } } impl Default for CappedString { #[inline] fn default() -> Self { Self::empty() } } impl ops::Deref for CappedString { type Target = str; #[inline] fn deref(&self) -> &Self::Target { self.as_str() } } impl ops::DerefMut for CappedString { #[inline] fn deref_mut(&mut self) -> &mut Self::Target { self.as_str_mut() } } impl AsRef for CappedString { #[inline] fn as_ref(&self) -> &str { self } } impl AsMut for CappedString { #[inline] fn as_mut(&mut self) -> &mut str { self } } impl borrow::Borrow for CappedString { #[inline] fn borrow(&self) -> &str { self } } impl borrow::BorrowMut for CappedString { #[inline] fn borrow_mut(&mut self) -> &mut str { self } } impl<'a, const N: usize> TryFrom<&'a str> for CappedString { type Error = CapacityError; #[inline] fn try_from(s: &'a str) -> Result { Self::new(s) } } #[cfg(feature = "alloc")] impl TryFrom for CappedString { type Error = CapacityError; #[inline] fn try_from(s: String) -> Result { Self::new(&s) } } #[cfg(feature = "alloc")] impl TryFrom> for CappedString { type Error = CapacityError; #[inline] fn try_from(s: Box) -> Result { Self::new(&s) } } #[cfg(feature = "alloc")] impl<'a, const N: usize> TryFrom> for CappedString { type Error = CapacityError; #[inline] fn try_from(s: Cow<'a, str>) -> Result { Self::new(&s) } } #[cfg(feature = "alloc")] impl From> for String { #[inline] fn from(s: CappedString) -> Self { s.into_string() } } #[cfg(feature = "alloc")] impl From> for Box { #[inline] fn from(s: CappedString) -> Self { s.into_boxed_str() } } impl PartialEq> for CappedString { #[inline] fn eq(&self, other: &CappedString) -> bool { **self == **other } } impl Eq for CappedString {} impl PartialOrd> for CappedString { #[inline] fn partial_cmp(&self, other: &CappedString) -> Option { (**self).partial_cmp(&**other) } } impl Ord for CappedString { #[inline] fn cmp(&self, other: &Self) -> Ordering { (**self).cmp(&**other) } } impl Hash for CappedString { #[inline] fn hash(&self, state: &mut H) { (**self).hash(state); } } impl str::FromStr for CappedString { type Err = CapacityError; #[inline] fn from_str(s: &str) -> Result { Self::new(s) } } impl fmt::Debug for CappedString { #[inline] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fmt::Debug::fmt(&**self, f) } } impl fmt::Display for CappedString { #[inline] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fmt::Display::fmt(&**self, f) } } /// Returns a pointer to the longest prefix of `src` which is valid UTF-8 and whose length is /// less than or equal to `max_len`, and returns the length of this prefix. #[inline] fn truncate_str(src: &str, max_len: u8) -> (*const u8, u8) { match u8::try_from(src.len()) { // If the length of the `src` string is less than or equal to `max_len`, there is no need to // truncate it. Ok(src_len) if src_len <= max_len => (src.as_ptr(), src_len), // If the length of `src` is greater than `max_len`, we need to truncate it. Note that // `u8::try_from` returning an error means that `src.len() > max_len`, since `max_len` is a // `u8` and `src.len()` is a `usize`. _ => { let src = src.as_bytes(); let mut i = max_len; // Find the rightmost codepoint which starts at an index less than or equal to // `max_len`. Everything to the left of this will be valid UTF-8 with a length less // than or equal to `max_len`. We only need to do 3 iterations because codepoints have // a maximum length of 4 bytes. for _ in 0..3 { // The first byte in the string must always be the start of a codepoint. if i == 0 { break; } // SAFETY: // `i <= max_len`, since it is never incremented. If this branch is run, then either // `src.len(): usize` does not fit into a `u8`, in which case it must be greater // than `max_len: u8`, or it does fit into a `u8` but it is greater than `max_len`. // Therefore, `src.len() > max_len` must hold. Substitution gives `i < src.len()`, // so `i` is a valid index into `src`. let byte = unsafe { *src.get_unchecked(usize::from(i)) }; // If the byte is not of the form 0b10xxxxxx, then it is the start of a codepoint. if byte & 0xc0 != 0x80 { break; } i -= 1; } // `i < src.len()` always holds as discussed above, so the pointer `src.as_ptr()` is // valid for reads of `i` bytes. `i` is the index of the start of a codepoint, and // codepoints are contiguous, so the `i` bytes being pointed to must be valid UTF-8. (src.as_ptr(), i) }, } } #[cfg(test)] mod tests {}