Compare commits

...

12 Commits

Author SHA1 Message Date
pantonshire 44e27e9d23 🩹 convert: remove `#[must_use]` from `result_elim` 3 years ago
pantonshire 266fa13165 sink: add `SinkString::empty` and `SinkString::with_capacity` 3 years ago
pantonshire e1e26c7fd5 🩹 Re-export `sink_fmt!` from `libshire::sink` module 3 years ago
pantonshire ba515d4cfc Add `FmtSink` trait and `sink_fmt!` macro 3 years ago
pantonshire ac26dc3422 ♻️ Rename `StringSink` to `SinkString` 3 years ago
pantonshire 8309e19a68 strings: remove wide pointer cast in CappedString impl
`CappedString::as_bytes` previously used a wide pointer cast to obtain a
`&[u8]` from the `[MaybeUninit<u8>; N]` buffer; it cast a `*const
[MaybeUninit<u8>]` to a `*const [u8]` as an intermediate step. Although
this seems to be valid and did not cause any UB detected by MIRI, it
seems to be generally accepted that `slice::from_raw_parts` is the
preferred way to transmute slices since it makes explicit the metadata
(length in this case) of the new wide pointer. This is in contrast to
casting with `as`, which implicitly copies the metadata from the old
wide pointer into the new one.

Therefore, this patch replaces the `as *const [u8]` conversion with a
call to `slice::from_raw_parts`.
3 years ago
pantonshire 7253d95010 strings: CappedString to FixedString conversion methods
This patch adds `CappedString::into_fixed` and
`CappedString::into_fixed_max_capacity` to allow for checked conversions
from `CappedString` to `FixedString`.
3 years ago
pantonshire 96daa5ca00 strings: refactor error types
This patch replaces the error type for `FixedString` and removes the
re-exports of the various string error types in the `strings` module.
3 years ago
pantonshire 352c01f613
Merge pull request #1 from pantonshire/capped-string
CappedString improvements
3 years ago
pantonshire cad45f5bce strings: unit tests and documentation for CappedString
This patch adds several unit tests for `CappedString`, which are
intended to be run under miri since `CappedString` uses lots of unsafe
code. It also adds documentation and documentation tests for a number of
previously undocumented `CappedString` methods.
3 years ago
pantonshire 82034e14d0 strings: pointer-to-reference functions for InliningString
Previously, several functions in the implementation of `InliningString`
converted raw pointers to references as part of large blocks of code,
either via deref coercion or via `slice::from_raw_parts`. This created a
risk of Rust inferring reference lifetimes that were too long; this is
bad because it could result in a use-after-free or mutable aliasing.
This patch moves pointer-to-reference conversions in `InliningString` to
dedicated helper functions with explicit or easily-elided lifetimes to
avoid this issue.

This patch also introduces a
`InliningString::take_boxed_buf_invalidating` method, which provides a
way to move the boxed buffer out of an `InliningString` without aliasing
the box's heap allocation (which is not allowed). The `Drop`
implementation is reworked to use this method, as well as
`InliningString::into_boxed_str`.
3 years ago
pantonshire f411c30220 encoding: remove redundant pointer cast in url::percent_decode_utf8
A debug assertion in `encoding::url::percent_decode_utf8` previously
cast the result of `[u8]::as_ptr` to `*const u8`, which was redundant
because the return type of the `as_ptr` call was already `*const u8`.
This patch removes the redundant cast.
3 years ago

@ -43,7 +43,6 @@ impl Empty for Infallible {
}
#[inline]
#[must_use]
pub fn result_elim<T, E>(res: Result<T, E>) -> T
where
E: Empty,

@ -175,9 +175,9 @@ pub fn percent_encode_to_buf<B>(buf: &mut String, bytes: &B)
where
B: AsRef<[u8]> + ?Sized,
{
use crate::{convert::result_elim, sink::StringSink};
use crate::{convert::result_elim, sink::SinkString};
let sink = StringSink::from_string_mut(buf);
let sink = SinkString::from_string_mut(buf);
result_elim(percent_encode_to(sink, bytes))
}
@ -396,7 +396,7 @@ where
Cow::Borrowed(decoded_str) => {
debug_assert_eq!(decoded_str.len(), decoded.len());
debug_assert_eq!(
decoded_str.as_bytes().as_ptr() as *const u8,
decoded_str.as_bytes().as_ptr(),
decoded.as_ptr()
);

@ -1,4 +1,4 @@
use core::fmt;
use core::fmt::{self, Arguments};
pub trait StrSink {
type Error;
@ -29,23 +29,57 @@ where
}
}
pub trait FmtSink: StrSink {
fn sink_fmt<'a>(&mut self, args: Arguments<'a>) -> Result<(), <Self as StrSink>::Error>;
}
impl<W> FmtSink for W
where
W: fmt::Write,
{
fn sink_fmt<'a>(&mut self, args: Arguments<'a>) -> Result<(), <Self as StrSink>::Error> {
self.write_fmt(args)
}
}
#[macro_export]
macro_rules! sink_fmt {
($dst:expr, $($arg:tt)*) => {
FmtSink::sink_fmt($dst, core::format_args!($($arg)*))
};
}
pub use sink_fmt;
#[cfg(feature = "alloc")]
pub use string_sink::StringSink;
pub use string_sink::SinkString;
#[cfg(feature = "alloc")]
mod string_sink {
use core::convert::Infallible;
use core::{convert::Infallible, fmt::{self, Arguments}};
#[cfg(not(feature = "std"))]
use alloc::string::String;
use super::StrSink;
use super::{StrSink, FmtSink};
#[repr(transparent)]
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
pub struct StringSink(pub String);
pub struct SinkString(pub String);
impl SinkString {
#[inline]
#[must_use]
pub fn empty() -> Self {
Self(String::new())
}
#[inline]
#[must_use]
pub fn with_capacity(capacity: usize) -> Self {
Self(String::with_capacity(capacity))
}
impl StringSink {
#[inline]
#[must_use]
pub fn from_string_ref(s: &String) -> &Self {
@ -65,21 +99,21 @@ mod string_sink {
}
}
impl AsRef<StringSink> for String {
impl AsRef<SinkString> for String {
#[inline]
fn as_ref(&self) -> &StringSink {
StringSink::from_string_ref(self)
fn as_ref(&self) -> &SinkString {
SinkString::from_string_ref(self)
}
}
impl AsMut<StringSink> for String {
impl AsMut<SinkString> for String {
#[inline]
fn as_mut(&mut self) -> &mut StringSink {
StringSink::from_string_mut(self)
fn as_mut(&mut self) -> &mut SinkString {
SinkString::from_string_mut(self)
}
}
impl StrSink for StringSink {
impl StrSink for SinkString {
type Error = Infallible;
#[inline]
@ -94,4 +128,13 @@ mod string_sink {
Ok(())
}
}
impl FmtSink for SinkString {
fn sink_fmt<'a>(&mut self, args: Arguments<'a>) -> Result<(), <Self as StrSink>::Error> {
// We discard any error, since writing to a `String` should be infallible for correct
// implementations of `Display` etc.
let _ = <String as fmt::Write>::write_fmt(&mut self.0, args);
Ok(())
}
}
}

@ -4,7 +4,7 @@ use core::{
fmt,
hash::{Hash, Hasher},
mem::MaybeUninit,
ops, ptr, str,
ops, ptr, slice, str,
};
#[cfg(not(feature = "std"))]
@ -20,6 +20,8 @@ use alloc::{
#[cfg(feature = "std")]
use std::borrow::Cow;
use super::fixed::{FixedString, LengthError};
#[derive(Debug)]
pub struct CapacityError;
@ -37,7 +39,7 @@ impl std::error::Error for CapacityError {}
///
/// ```
/// # use libshire::strings::CappedString;
/// # fn main() -> Result<(), libshire::strings::capped::Error> {
/// # fn main() -> Result<(), libshire::strings::capped::CapacityError> {
/// let s = CappedString::<16>::new("hello world")?;
/// assert_eq!(&*s, "hello world");
/// # Ok(())
@ -45,7 +47,11 @@ impl std::error::Error for CapacityError {}
/// ```
#[derive(Clone)]
pub struct CappedString<const N: usize> {
/// The buffer storing the string data. It is an invariant of this type that the first `len`
/// elements of this buffer is initialised, valid UTF-8 string data.
buf: [MaybeUninit<u8>; N],
/// The length of the string stored in `buf`.
len: u8,
}
@ -70,6 +76,10 @@ impl<const N: usize> CappedString<N> {
Self { buf, len }
}
/// Returns the raw buffer and length backing this `CappedString`; the first element of the
/// tuple is the buffer `buf` and the second is the length `len`. The first `len` elements of
/// `buf` (i.e. `&buf[..usize::from(len)]`) is guaranteed to be initialised, valid UTF-8 string
/// data.
#[inline]
#[must_use]
pub const fn into_raw_parts(self) -> ([MaybeUninit<u8>; N], u8) {
@ -106,6 +116,7 @@ impl<const N: usize> CappedString<N> {
/// # Safety
/// `self.len` must be less than `N`, so that there is space in the buffer to append the byte.
/// The byte must be a valid UTF-8 codepoint; it must be in the range `0..=127`.
#[inline]
unsafe fn append_byte(&mut self, byte: u8) {
// SAFETY:
@ -167,10 +178,12 @@ impl<const N: usize> CappedString<N> {
/// Returns a new `CappedString` containing the given string data. The string data will be
/// stored inline; no heap allocation is used. An error will be returned if the length of the
/// provided string exceeds the `CappedString`'s maximum length, `N`.
///
/// If you would like a version which never returns an error, see [`Self::new_truncating`].
///
/// ```
/// # use libshire::strings::CappedString;
/// # fn main() -> Result<(), libshire::strings::capped::Error> {
/// # fn main() -> Result<(), libshire::strings::capped::CapacityError> {
/// let s = CappedString::<16>::new("hello world")?;
/// assert_eq!(&*s, "hello world");
/// # Ok(())
@ -199,6 +212,21 @@ impl<const N: usize> CappedString<N> {
unsafe { Ok(Self::from_raw_ptr(src.as_ptr(), len)) }
}
/// Returns a new `CappedString` containing the given string data. The string data will be
/// stored inline; no heap allocation is used. If the length of the provided string exceeds the
/// `CappedString`'s maximum length, `N`, it will be truncated to fit.
///
/// If you would like a version which returns an error rather than truncating the string, see
/// [`Self::new`].
///
/// ```
/// # use libshire::strings::CappedString;
/// let s1 = CappedString::<15>::new_truncating("こんにちは");
/// assert_eq!(&*s1, "こんにちは");
///
/// let s2 = CappedString::<10>::new_truncating("こんにちは");
/// assert_eq!(&*s2, "こんに");
/// ```
#[inline]
#[must_use]
pub fn new_truncating<S>(src: &S) -> Self
@ -232,7 +260,11 @@ impl<const N: usize> CappedString<N> {
}
// SAFETY:
//
// We have checked that `self.len != N` (`Self::MAX_LEN == N`). Since it is an
// invariant of `CappedString` that `self.len <= N`, it must hold that
// `self.len < N`. The first byte of a `str` of length 1 must be a valid UTF-8
// codepoint; it must be in the range `0..=127`, since anything outside this range
// implies the presence of further bytes.
unsafe { self.append_byte(encoded.as_bytes()[0]) }
Ok(())
@ -258,6 +290,20 @@ impl<const N: usize> CappedString<N> {
/// there is insufficient capacity remaining to do so.
///
/// If you would like a version which cannot fail, see [`Self::push_str_truncating`].
///
/// ```
/// # use libshire::strings::CappedString;
/// let mut s = CappedString::<8>::empty();
///
/// assert!(s.push_str("hello").is_ok());
/// assert_eq!(&*s, "hello");
///
/// assert!(s.push_str(" world").is_err());
/// assert_eq!(&*s, "hello");
///
/// assert!(s.push_str("!!!").is_ok());
/// assert_eq!(&*s, "hello!!!");
/// ```
#[inline]
pub fn push_str<S>(&mut self, src: &S) -> Result<(), CapacityError>
where
@ -283,6 +329,20 @@ impl<const N: usize> CappedString<N> {
///
/// If you would like a version which returns an error if there is not enough capacity remaining
/// to append the entire string slice, see [`Self::push_str`].
///
/// ```
/// # use libshire::strings::CappedString;
/// let mut s = CappedString::<10>::empty();
///
/// s.push_str_truncating("hello");
/// assert_eq!(&*s, "hello");
///
/// s.push_str_truncating(" 世界");
/// assert_eq!(&*s, "hello 世");
///
/// s.push_str_truncating("!!!");
/// assert_eq!(&*s, "hello 世!");
/// ```
#[inline]
pub fn push_str_truncating<S>(&mut self, src: &S)
where
@ -326,6 +386,15 @@ impl<const N: usize> CappedString<N> {
/// ```
#[inline]
pub fn clear(&mut self) {
// Setting the length to 0 is enough to clear the `CappedString`; we don't need to replace
// any of the old bytes in the buffer, as setting the length to 0 makes all of the old bytes
// inaccessible via safe methods, and means that any future calls to `Self::push` and
// friends will write over the old bytes.
//
// It may be desirable for security-critical code to zero the old buffer to prevent cleared
// data from being exposed via buffer-overflow exploits or similar. However, this should be
// implemented in a separate function so that regular users don't have to pay the cost of
// zeroing the buffer.
self.len = 0;
}
@ -340,34 +409,55 @@ impl<const N: usize> CappedString<N> {
}
/// Returns a mutable string slice pointing to the underlying string data.
///
/// ```
/// # use libshire::strings::CappedString;
/// # fn main() -> Result<(), libshire::strings::capped::CapacityError> {
/// let mut s = CappedString::<16>::new("hello!")?;
/// s.as_str_mut().make_ascii_uppercase();
/// assert_eq!(&*s, "HELLO!");
/// # Ok(())
/// # }
/// ```
#[inline]
#[must_use]
pub fn as_str_mut(&mut self) -> &mut str {
// SAFETY:
// The first `self.len` bytes of `self.buf` (which is returned by `Self::as_bytes_mut`)
// being valid UTF-8 is an invariant of `CappedString`.
// being valid UTF-8 is an invariant of `CappedString`. Since we are returning a `&mut str`
// to the caller, the caller cannot safely use it to mutate this `CappedString`'s buffer in
// a way that violates the UTF-8 property.
unsafe { str::from_utf8_unchecked_mut(self.as_bytes_mut()) }
}
/// Returns a byte slice containing the UTF-8 bytes representing the string.
///
/// ```
/// # use libshire::strings::CappedString;
/// # fn main() -> Result<(), libshire::strings::capped::CapacityError> {
/// let s = CappedString::<16>::new("hello!")?;
/// assert_eq!(s.as_bytes(), &[0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x21]);
/// # Ok(())
/// # }
/// ```
#[inline]
#[must_use]
pub fn as_bytes(&self) -> &[u8] {
// Get the slice of the buffer containing initialised string data.
// SAFETY:
// It is an invariant of `CappedString` that `self.len <= N`, so `..self.len` is a valid
// range over `self.buf`.
let data_slice = unsafe { self.buf.get_unchecked(..usize::from(self.len)) };
// Get a pointer to the start of the buffer and convert it from a `*const MaybeUninit<u8>`
// to a `*const u8`. This conversion is valid because `MaybeUninit<u8>` has the same memory
// layout as `u8`.
let data_ptr = self.buf.as_ptr() as *const u8;
// Convert the `&[MaybeUninit<u8>]` to a `&[u8]`.
// SAFETY:
// `MaybeUninit<u8>` has the same memory layout as `u8`, and the first `self.len` bytes of
// the buffer are initialised, so this conversion is valid.
unsafe { &*(data_slice as *const [MaybeUninit<u8>] as *const [u8]) }
// It is an invariant of `CappedString` that the first `self.len` bytes of the buffer are
// initialised, so `data_ptr` is valid for reads of `self.len` bytes. `data_ptr` is
// trivially properly aligned, since `u8` has an alignment of 1.
unsafe { slice::from_raw_parts(data_ptr, usize::from(self.len)) }
}
/// # Safety
/// The caller is responsible for ensuring that the slice is valid UTF-8 when the mutable
/// borrow ends.
/// The slice must be valid UTF-8 when the mutable borrow ends and this `CappedString` is used
/// again.
#[inline]
#[must_use]
pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] {
@ -395,6 +485,24 @@ impl<const N: usize> CappedString<N> {
pub fn is_empty(&self) -> bool {
self.len == 0
}
#[inline]
pub fn into_fixed<const M: usize>(self) -> Result<FixedString<M>, LengthError> {
let buf: [u8; M] = self
.as_bytes()
.try_into()
.map_err(|_| LengthError)?;
// SAFETY:
// It is an invariant of `CappedString` that the first `self.len` bytes of `self.buf` is
// valid UTF-8, so the bytes returned by `Self::as_bytes` are valid UTF-8.
unsafe { Ok(FixedString::from_raw_array(buf)) }
}
#[inline]
pub fn into_fixed_max_capacity(self) -> Result<FixedString<N>, LengthError> {
self.into_fixed()
}
}
#[cfg(feature = "alloc")]
@ -623,4 +731,191 @@ fn truncate_str(src: &str, max_len: u8) -> (*const u8, u8) {
}
#[cfg(test)]
mod tests {}
mod tests {
use super::CappedString;
#[test]
fn test_truncate_str() {
use super::truncate_str;
let s1 = "hello";
assert_eq!(truncate_str(s1, 0), (s1.as_ptr(), 0));
assert_eq!(truncate_str(s1, 1), (s1.as_ptr(), 1));
assert_eq!(truncate_str(s1, 5), (s1.as_ptr(), 5));
assert_eq!(truncate_str(s1, 6), (s1.as_ptr(), 5));
let s2 = "こんにちは";
assert_eq!(truncate_str(s2, 0), (s2.as_ptr(), 0));
assert_eq!(truncate_str(s2, 1), (s2.as_ptr(), 0));
assert_eq!(truncate_str(s2, 2), (s2.as_ptr(), 0));
assert_eq!(truncate_str(s2, 3), (s2.as_ptr(), 3));
assert_eq!(truncate_str(s2, 4), (s2.as_ptr(), 3));
assert_eq!(truncate_str(s2, 5), (s2.as_ptr(), 3));
assert_eq!(truncate_str(s2, 6), (s2.as_ptr(), 6));
assert_eq!(truncate_str(s2, 14), (s2.as_ptr(), 12));
assert_eq!(truncate_str(s2, 15), (s2.as_ptr(), 15));
assert_eq!(truncate_str(s2, 16), (s2.as_ptr(), 15));
assert_eq!(truncate_str(s2, 18), (s2.as_ptr(), 15));
let s3 = "🤖 こんにちは, world 🤖";
assert_eq!(truncate_str(s3, 0), (s3.as_ptr(), 0));
assert_eq!(truncate_str(s3, 1), (s3.as_ptr(), 0));
assert_eq!(truncate_str(s3, 2), (s3.as_ptr(), 0));
assert_eq!(truncate_str(s3, 3), (s3.as_ptr(), 0));
assert_eq!(truncate_str(s3, 4), (s3.as_ptr(), 4));
assert_eq!(truncate_str(s3, 5), (s3.as_ptr(), 5));
assert_eq!(truncate_str(s3, 6), (s3.as_ptr(), 5));
assert_eq!(truncate_str(s3, 7), (s3.as_ptr(), 5));
assert_eq!(truncate_str(s3, 8), (s3.as_ptr(), 8));
assert_eq!(truncate_str(s3, 28), (s3.as_ptr(), 28));
assert_eq!(truncate_str(s3, 29), (s3.as_ptr(), 28));
assert_eq!(truncate_str(s3, 30), (s3.as_ptr(), 28));
assert_eq!(truncate_str(s3, 31), (s3.as_ptr(), 28));
assert_eq!(truncate_str(s3, 32), (s3.as_ptr(), 32));
assert_eq!(truncate_str(s3, 33), (s3.as_ptr(), 32));
assert_eq!(truncate_str(s3, 36), (s3.as_ptr(), 32));
let s4 = "a";
assert_eq!(truncate_str(s4, 0), (s4.as_ptr(), 0));
assert_eq!(truncate_str(s4, 1), (s4.as_ptr(), 1));
assert_eq!(truncate_str(s4, 2), (s4.as_ptr(), 1));
assert_eq!(truncate_str(s4, 3), (s4.as_ptr(), 1));
assert_eq!(truncate_str(s4, 4), (s4.as_ptr(), 1));
let s5 = "";
assert_eq!(truncate_str(s5, 0), (s5.as_ptr(), 0));
assert_eq!(truncate_str(s5, 1), (s5.as_ptr(), 0));
assert_eq!(truncate_str(s5, 2), (s5.as_ptr(), 0));
assert_eq!(truncate_str(s5, 3), (s5.as_ptr(), 0));
assert_eq!(truncate_str(s5, 4), (s5.as_ptr(), 0));
let s6 = "На берегу пустынных волн\n\
Стоял он, дум великих полн,\n\
И вдаль глядел. Пред ним широко\n\
Река неслася; бедный чёлн\n\
По ней стремился одиноко.\n\
По мшистым, топким берегам\n\
Чернели избы здесь и там,\n\
Приют убогого чухонца;\n\
И лес, неведомый лучам\n\
В тумане спрятанного солнца,\n\
Кругом шумел.";
assert_eq!(truncate_str(s6, 0), (s6.as_ptr(), 0));
assert_eq!(truncate_str(s6, 1), (s6.as_ptr(), 0));
assert_eq!(truncate_str(s6, 2), (s6.as_ptr(), 2));
assert_eq!(truncate_str(s6, 3), (s6.as_ptr(), 2));
assert_eq!(truncate_str(s6, 4), (s6.as_ptr(), 4));
assert_eq!(truncate_str(s6, 254), (s6.as_ptr(), 253));
assert_eq!(truncate_str(s6, 255), (s6.as_ptr(), 255));
}
#[test]
fn test_new() {
assert_eq!(&*CappedString::<5>::new("").unwrap(), "");
assert_eq!(&*CappedString::<5>::new("a").unwrap(), "a");
assert_eq!(&*CappedString::<5>::new("hello").unwrap(), "hello");
assert_eq!(&*CappedString::<6>::new("hello").unwrap(), "hello");
assert!(CappedString::<5>::new("hello!").is_err());
assert_eq!(&*CappedString::<6>::new("hello!").unwrap(), "hello!");
assert_eq!(&*CappedString::<5>::new("こ").unwrap(), "こ");
assert!(CappedString::<5>::new("こん").is_err());
assert_eq!(&*CappedString::<6>::new("こん").unwrap(), "こん");
assert!(CappedString::<6>::new("こんにちは").is_err());
assert_eq!(&*CappedString::<0>::new("").unwrap(), "");
assert!(CappedString::<0>::new("a").is_err());
}
#[test]
fn test_new_truncating() {
assert_eq!(&*CappedString::<5>::new_truncating(""), "");
assert_eq!(&*CappedString::<5>::new_truncating("a"), "a");
assert_eq!(&*CappedString::<5>::new_truncating("hello"), "hello");
assert_eq!(&*CappedString::<6>::new_truncating("hello"), "hello");
assert_eq!(&*CappedString::<5>::new_truncating("hello!"), "hello");
assert_eq!(&*CappedString::<6>::new_truncating("hello!"), "hello!");
assert_eq!(&*CappedString::<5>::new_truncating("こ"), "こ");
assert_eq!(&*CappedString::<5>::new_truncating("こん"), "こ");
assert_eq!(&*CappedString::<6>::new_truncating("こん"), "こん");
assert_eq!(&*CappedString::<6>::new_truncating("こんにちは"), "こん");
assert_eq!(&*CappedString::<7>::new_truncating("こんにちは"), "こん");
assert_eq!(&*CappedString::<8>::new_truncating("こんにちは"), "こん");
assert_eq!(&*CappedString::<9>::new_truncating("こんにちは"), "こんに");
assert_eq!(&*CappedString::<3>::new_truncating("🤖 hello 🤖"), "");
assert_eq!(&*CappedString::<4>::new_truncating("🤖 hello 🤖"), "🤖");
assert_eq!(&*CappedString::<14>::new_truncating("🤖 hello 🤖"), "🤖 hello ");
assert_eq!(&*CappedString::<15>::new_truncating("🤖 hello 🤖"), "🤖 hello 🤖");
assert_eq!(&*CappedString::<20>::new_truncating("🤖 hello 🤖"), "🤖 hello 🤖");
assert_eq!(&*CappedString::<0>::new_truncating(""), "");
assert_eq!(&*CappedString::<0>::new_truncating("a"), "");
}
#[test]
fn test_push() {
let mut s = CappedString::<6>::empty();
s.push_str("").unwrap();
assert_eq!(&*s, "");
s.push('h').unwrap();
assert_eq!(&*s, "h");
s.push_str("ello").unwrap();
assert_eq!(&*s, "hello");
assert!(s.push_str(", world").is_err());
assert_eq!(&*s, "hello");
}
#[test]
fn test_push_truncating() {
let mut s = CappedString::<6>::empty();
s.push_str_truncating("");
assert_eq!(&*s, "");
s.push_truncating('h');
assert_eq!(&*s, "h");
s.push_str_truncating("ello");
assert_eq!(&*s, "hello");
s.push_str_truncating(", world");
assert_eq!(&*s, "hello,");
s.clear();
s.push_truncating('こ');
assert_eq!(&*s, "こ");
s.push_truncating('ん');
assert_eq!(&*s, "こん");
s.push_truncating('に');
assert_eq!(&*s, "こん");
s.clear();
s.push_truncating('🤖');
assert_eq!(&*s, "🤖");
s.push_truncating('🤖');
assert_eq!(&*s, "🤖");
s.push_str_truncating("!!!");
assert_eq!(&*s, "🤖!!");
s.clear();
s.push_str_truncating("🤖 ");
assert_eq!(&*s, "🤖 ");
s.push_truncating('🤖');
assert_eq!(&*s, "🤖 ");
s.clear();
s.push_str_truncating(" ");
assert_eq!(&*s, " ");
s.push_str_truncating("🤖🤖🤖");
assert_eq!(&*s, " 🤖");
s.push_truncating('!');
assert_eq!(&*s, " 🤖");
s.clear();
s.push_str_truncating(" ");
assert_eq!(&*s, " ");
s.push_truncating('🤖');
assert_eq!(&*s, " ");
s.push_str_truncating("こんにちは");
assert_eq!(&*s, " こ");
}
}

@ -7,13 +7,25 @@ use core::{
str,
};
#[derive(Debug)]
pub struct LengthError;
impl fmt::Display for LengthError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "invalid string length for `FixedString`")
}
}
#[cfg(feature = "std")]
impl std::error::Error for LengthError {}
pub struct FixedString<const N: usize> {
buf: [u8; N],
}
impl<const N: usize> FixedString<N> {
#[inline]
pub fn new(s: &str) -> Result<Self, Error> {
pub fn new(s: &str) -> Result<Self, LengthError> {
// SAFETY:
// A `&str` is always valid UTF-8.
unsafe { Self::from_raw_slice(s.as_bytes()) }
@ -22,15 +34,12 @@ impl<const N: usize> FixedString<N> {
/// # Safety
/// The provided byte slice must be valid UTF-8.
#[inline]
pub unsafe fn from_raw_slice(bytes: &[u8]) -> Result<Self, Error> {
pub unsafe fn from_raw_slice(bytes: &[u8]) -> Result<Self, LengthError> {
match bytes.try_into() {
// SAFETY:
// The caller is reponsible for ensuring that the provided bytes are valid UTF-8.
Ok(bytes) => unsafe { Ok(Self::from_raw_array(bytes)) },
Err(_) => Err(Error {
expected_len: N,
actual_len: bytes.len(),
}),
Err(_) => Err(LengthError),
}
}
@ -116,7 +125,7 @@ impl<const N: usize> borrow::BorrowMut<str> for FixedString<N> {
}
impl<const N: usize> str::FromStr for FixedString<N> {
type Err = Error;
type Err = LengthError;
#[inline]
fn from_str(s: &str) -> Result<Self, Self::Err> {
@ -125,7 +134,7 @@ impl<const N: usize> str::FromStr for FixedString<N> {
}
impl<'a, const N: usize> TryFrom<&'a str> for FixedString<N> {
type Error = Error;
type Error = LengthError;
#[inline]
fn try_from(value: &'a str) -> Result<Self, Self::Error> {
@ -175,26 +184,6 @@ impl<const N: usize> fmt::Display for FixedString<N> {
}
}
#[derive(Debug)]
pub struct Error {
expected_len: usize,
actual_len: usize,
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"expected {} bytes of string data, found {} bytes",
self.expected_len,
self.actual_len
)
}
}
#[cfg(feature = "std")]
impl std::error::Error for Error {}
#[cfg(test)]
mod tests {
use super::FixedString;

@ -65,16 +65,38 @@ pub type InliningString23 = InliningString<23>;
/// ```
#[repr(C)]
pub struct InliningString<const N: usize> {
/// The union which stores the string data itself. The active variant of this union is encoded
/// by `discrim`.
///
/// When the `InliningString` is properly aligned, `repr.boxed` will also be properly aligned:
/// - `boxed` is stored at offset 0 of `Repr` because it is `repr(C)`, and the fields of C union
/// all begin at offset 0, as per section 6.7.2.1 constraint 16 of the C17 specification.
/// - `repr` is stored at offset 0 of `InliningString` because it is `repr(C)`, and the first
/// field of a C struct begins at offset 0, as per section 6.7.2.1 constraint 15 of the C17
/// specification.
/// - Therefore, `repr.boxed` is stored at offset 0 of `InliningString`.
/// - `InliningString` has the same alignment as `ManuallyDrop<MaybeUninit<Box<str>>>` because
/// it includes a `[ManuallyDrop<MaybeUninit<Box<str>>>; 0]` field.
/// - Therefore, when the `InliningString` is properly aligned, its `repr.boxed` must also be
/// properly aligned since they have the same address and alignment.
///
/// `repr.boxed` is always initialised, except for after
/// `InliningString::take_boxed_buf_invalidating` has returned; the function is unsafe and
/// requires that the `InliningString` is never used again once it has returned.
repr: Repr<N>,
// When `discrim - 1` is less than or equal to `MAX_LEN`, `repr.inline` is active and the first
// `discrim - 1` bytes of `repr.inline` contains initialised, valid UTF-8 data. When
// `discrim - 1` is greater than `MAX_LEN`, `repr.boxed` is active. `NonZeroU8` is used to
// allow for the niche optimisation
// (https://rust-lang.github.io/unsafe-code-guidelines/glossary.html#niche).
/// A value which encodes which field of `repr` is active and, possibly, some additional
/// information about that field. When `discrim - 1` is less than or equal to `MAX_LEN`,
/// `repr.inline` is active and the first `discrim - 1` bytes of `repr.inline` is initialised,
/// valid UTF-8 data. When `discrim - 1` is greater than `MAX_LEN`, `repr.boxed` is active.
///
/// `NonZeroU8` is used to allow for the niche optimisation, which allows
/// `Option<InliningString<N>>` and similar types to be efficiently represented.
discrim: NonZeroU8,
// A zero-sized field to ensure that `InliningString` has an alignment equal to the alignment
// of `Box<str>`, to ensure that `repr.boxed` is properly aligned when it is active.
_align: [Box<str>; 0],
/// A zero-sized field to ensure that `InliningString` has an alignment equal to the alignment
/// of `ManuallyDrop<MaybeUninit<Box<str>>>`, to ensure that `repr.boxed` is properly aligned.
_align: [ManuallyDrop<MaybeUninit<Box<str>>>; 0],
}
// `repr(C)` is necessary to ensure that both of the fields start at offset 0. `repr(packed)`
@ -220,44 +242,149 @@ impl<const N: usize> InliningString<N> {
}
}
/// # Safety
/// The active field of `self.repr` must be `inline`. `len` must be less than or equal to
/// `self.discrim - 1`.
#[inline(always)]
unsafe fn inline_buf<'s>(&'s self, len: u8) -> &'s [u8] {
// SAFETY:
// The caller is responsible for ensuring that `inline` is the active field of `self.repr`.
let ptr = unsafe { addr_of!(self.repr.inline) };
// Cast the `MaybeUninit<u8>` pointer to a `u8` pointer; the two types have the same memory
// layout.
let ptr = ptr
as *const MaybeUninit<u8>
as *const u8;
// SAFETY:
// The caller is responsible for ensuring that `len <= self.discrim - 1`. It is an invariant
// of `InliningString` that, when `self.repr.inline` is active, the first `self.discrim - 1`
// bytes of `self.repr.inline` are initialised.
unsafe { slice::from_raw_parts::<'s, u8>(ptr, usize::from(len)) }
}
/// # Safety
/// The active field of `self.repr` must be `inline`. `len` must be less than or equal to
/// `self.discrim - 1`.
#[inline(always)]
unsafe fn inline_buf_mut<'s>(&'s mut self, len: u8) -> &'s mut [u8] {
// SAFETY:
// The caller is responsible for ensuring that `inline` is the active field of `self.repr`.
let ptr = unsafe { addr_of_mut!(self.repr.inline) };
// Cast the `MaybeUninit<u8>` pointer to a `u8` pointer; the two types have the same memory
// layout.
let ptr = ptr
as *mut MaybeUninit<u8>
as *mut u8;
// SAFETY:
// The caller is responsible for ensuring that `len <= self.discrim - 1`. It is an invariant
// of `InliningString` that, when `self.repr.inline` is active, the first `self.discrim - 1`
// bytes of `self.repr.inline` are initialised.
unsafe { slice::from_raw_parts_mut::<'s, u8>(ptr, usize::from(len)) }
}
/// # Safety
/// The active field of `self.repr` must be `boxed`.
#[allow(clippy::borrowed_box)]
#[inline(always)]
unsafe fn boxed_buf<'s>(&'s self) -> &'s Box<str> {
// SAFETY:
// The caller is responsible for ensuring that `boxed` is the active field of `self.repr`.
// `self.repr.boxed` is properly aligned, as explained in the documentation for `self.repr`.
let maybe_boxed_buf: &'s _ = unsafe { &*addr_of!(self.repr.boxed) };
// SAFETY:
// `repr.boxed` is initialised, as the only time it's uninitialised is when it is
// briefly replaced with a temporary value before the `InliningString` is dropped
// in the `into_string` function.
unsafe { maybe_boxed_buf.assume_init_ref() }
}
/// # Safety
/// The active field of `self.repr` must be `boxed`.
#[allow(clippy::borrowed_box)]
#[inline(always)]
unsafe fn boxed_buf_mut<'s>(&'s mut self) -> &'s mut Box<str> {
// SAFETY:
// The caller is responsible for ensuring that `boxed` is the active field of `self.repr`.
// `self.repr.boxed` is properly aligned, as explained in the documentation for `self.repr`.
let maybe_boxed_buf: &'s mut _ = unsafe { &mut *addr_of_mut!(self.repr.boxed) };
// SAFETY:
// It is sound to assume that the buffer is initialised; the only time it isn't initialised
// is after `Self::take_boxed_buf_invalidating` returns, and that function stipulates that
// the `InliningString` must never be used again after it returns.
unsafe { maybe_boxed_buf.assume_init_mut() }
}
/// # Safety
/// The active field of `self.repr` must be `boxed`.
unsafe fn boxed_buf_raw_mut(&mut self) -> &mut ManuallyDrop<MaybeUninit<Box<str>>> {
// SAFETY:
// The caller is responsible for ensuring that `boxed` is the active field of `self.repr`.
// `self.repr.boxed` is properly aligned, as explained in the documentation for `self.repr`.
unsafe { &mut *addr_of_mut!(self.repr.boxed) }
}
/// Swaps the boxed buffer out of this `InliningString`, replacing it with uninitialised memory.
/// This allows obtaining an owned `Box<str>` from the `InliningString` while ensuring that the
/// underlying heap allocation is never aliased, which is required because `Box` is backed by a
/// `core::ptr::Unique` which forbids aliasing.
///
/// Once this function returns, this `InliningString` becomes "invalidated" and must never be
/// used again.
///
/// # Safety
/// The active field of `self.repr` must be `boxed`. Once this function returns, this
/// `InliningString` must never be used again; this includes dropping it.
unsafe fn take_boxed_buf_invalidating(&mut self) -> Box<str> {
let boxed_buf = {
// SAFETY:
// The caller is responsible for ensuring that `boxed` is the active field of
// `self.repr`.
let replace_target = unsafe { self.boxed_buf_raw_mut() };
// Move the buffer out of this `InliningString`, replacing it with uninitialised memory.
// Other functions assume that `self.repr.boxed` is initialised but it is now
// uninitialised, so we have to stipulate that the `InliningString` must not ever be
// used again after this function returns.
mem::replace(replace_target, ManuallyDrop::new(MaybeUninit::uninit()))
};
// Re-enable the destructor for the boxed buffer.
let boxed_buf = ManuallyDrop::into_inner(boxed_buf);
// SAFETY:
// `boxed_buf` was obtained by moving out of `self.repr.boxed`. The only time
// `self.repr.boxed` is uninitialised is after the `mem::replace` above. Since we stipulate
// that the `InliningString` is never used again after this function has returned, the
// `mem::replace` should not have been run before on this `InliningString`, so `boxed_buf`
// is initialised.
unsafe { boxed_buf.assume_init() }
}
#[inline]
#[must_use]
pub fn as_str(&self) -> &str {
match self.inline_string_len() {
Some(len) => {
// Get a pointer to the `inline` field of the union.
// SAFETY:
// Since `inline_string_len` returned `Some`, the `inline` field must be active.
let inline_buf_ptr = unsafe { addr_of!(self.repr.inline) }
as *const MaybeUninit<u8>
as *const u8;
// Construct a byte slice from the pointer to the string data and the length.
// SAFETY:
// The first `len` bytes of `inline` are always initialised, as this is an
// invariant of `InliningString`.
let inline_buf_slice = unsafe { slice::from_raw_parts(inline_buf_ptr, usize::from(len)) };
// Perform an unchecked conversion from the byte slice to a string slice.
// SAFETY:
// The first `len` bytes of `inline` is always valid UTF-8, as this is an invariant
// of `InliningString`.
unsafe { str::from_utf8_unchecked(inline_buf_slice) }
// `Self::inline_string_len` returned `Some`, which means that the active field of
// `self.repr` is `inline`. `len = self.discrim - 1`, since this is the value
// returned by `Self::inline_string_len`. It is an invariant of `InliningString`
// that, when `self.repr.inline` is active, the first `self.discrim - 1` bytes are
// valid UTF-8.
unsafe { str::from_utf8_unchecked(self.inline_buf(len)) }
},
None => {
// SAFETY:
// `inline_string_len` returned `None`, which means that the `boxed` field is
// active. `boxed` is properly aligned because it is stored at offset 0 of
// `InliningString` (since both `InliningString` and `Repr` use `repr(C)`), and the
// alignment of `InliningString` is equal to the alignment of `Box<str>`.
let maybe_boxed_buf = unsafe { &*addr_of!(self.repr.boxed) };
// SAFETY:
// `repr.boxed` is initialised, as the only time it's uninitialised is when it is
// briefly replaced with a temporary value before the `InliningString` is dropped
// in the `into_string` function.
unsafe { maybe_boxed_buf.assume_init_ref() }
// `Self::inline_string_len` returned `None`, which means that the active field of
// `self.repr` is `boxed.`
unsafe { self.boxed_buf() }
},
}
}
@ -267,39 +394,20 @@ impl<const N: usize> InliningString<N> {
pub fn as_str_mut(&mut self) -> &mut str {
match self.inline_string_len() {
Some(len) => {
// Get a pointer to the `inline` field of the union.
// SAFETY:
// Since `inline_string_len` returned `Some`, the `inline` field must be active.
let inline_buf_ptr = unsafe { addr_of_mut!(self.repr.inline) }
as *mut MaybeUninit<u8>
as *mut u8;
// Construct a byte slice from the pointer to the string data and the length.
// SAFETY:
// The first `len` bytes of `inline` are always initialised, as this is an
// invariant of `InliningString`.
let inline_buf_slice = unsafe { slice::from_raw_parts_mut(inline_buf_ptr, usize::from(len)) };
// Perform an unchecked conversion from the byte slice to a string slice.
// SAFETY:
// The first `len` bytes of `inline` is always valid UTF-8, as this is an invariant
// of `InliningString`.
unsafe { str::from_utf8_unchecked_mut(inline_buf_slice) }
// `Self::inline_string_len` returned `Some`, which means that the active field of
// `self.repr` is `inline`. `len = self.discrim - 1`, since this is the value
// returned by `Self::inline_string_len`. It is an invariant of `InliningString`
// that, when `self.repr.inline` is active, the first `self.discrim - 1` bytes are
// valid UTF-8.
unsafe { str::from_utf8_unchecked_mut(self.inline_buf_mut(len)) }
},
None => {
// SAFETY:
// `inline_string_len` returned `None`, which means that the `boxed` field is
// active. `boxed` is properly aligned because it is stored at offset 0 of
// `InliningString` (since both `InliningString` and `Repr` use `repr(C)`), and the
// alignment of `InliningString` is equal to the alignment of `Box<str>`.
let maybe_boxed_buf = unsafe { &mut *addr_of_mut!(self.repr.boxed) };
// SAFETY:
// `repr.boxed` is initialised, as the only time it's uninitialised is when it is
// briefly replaced with a temporary value before the `InliningString` is dropped
// in the `into_string` function.
unsafe { maybe_boxed_buf.assume_init_mut() }
// `Self::inline_string_len` returned `None`, which means that the active field of
// `self.repr` is `boxed.`
unsafe { self.boxed_buf_mut() }
},
}
}
@ -309,58 +417,31 @@ impl<const N: usize> InliningString<N> {
pub fn into_boxed_str(self) -> Box<str> {
match self.inline_string_len() {
Some(len) => {
// Get a pointer to the `inline` field of the union.
// SAFETY:
// Since `inline_string_len` returned `Some`, the `inline` field must be active.
let inline_buf_ptr = unsafe { addr_of!(self.repr.inline) }
as *const MaybeUninit<u8>
as *const u8;
// Construct a byte slice from the pointer to the string data and the length.
// SAFETY:
// The first `len` bytes of `inline` are always initialised, as this is an
// invariant of `InliningString`.
let inline_buf_slice = unsafe { slice::from_raw_parts(inline_buf_ptr, usize::from(len)) };
// Perform an unchecked conversion from the byte slice to a string slice.
// SAFETY:
// The first `len` bytes of `inline` is always valid UTF-8, as this is an invariant
// of `InliningString`.
let str_slice = unsafe { str::from_utf8_unchecked(inline_buf_slice) };
Box::from(str_slice)
// `Self::inline_string_len` returned `Some`, which means that the active field of
// `self.repr` is `inline`. `len = self.discrim - 1`, since this is the value
// returned by `Self::inline_string_len`. It is an invariant of `InliningString`
// that, when `self.repr.inline` is active, the first `self.discrim - 1` bytes are
// valid UTF-8.
let inline_str_slice = unsafe { str::from_utf8_unchecked(self.inline_buf(len)) };
Box::from(inline_str_slice)
},
None => {
let manual_boxed_buf = {
// Disable the destructor for `self`; we are transferring ownership of the
// allocated memory to the caller, so we don't want to run the destructor which
// would free the memory.
let mut this = ManuallyDrop::new(self);
// SAFETY:
// `inline_string_len` returned `None`, which means that the `boxed` field is
// active. `boxed` is properly aligned because it is stored at offset 0 of
// `InliningString` (since both `InliningString` and `Repr` use `repr(C)`), and
// the alignment of `InliningString` is equal to the alignment of `Box<str>`.
let field_ref = unsafe { &mut *addr_of_mut!(this.repr.boxed) };
// Move `repr.boxed` out of the `InliningString`, replacing it with
// uninitialised memory. This is sound because we have ownership of the
// `InliningString` and we will not be doing anything else with it after this
// which calls `assume_init` on `repr.boxed`; at the end of this block, the
// `InliningString` is dropped without calling its destructor.
mem::replace(field_ref, ManuallyDrop::new(MaybeUninit::uninit()))
};
// Re-enable the destructor for the boxed string.
let maybe_boxed_buf = ManuallyDrop::into_inner(manual_boxed_buf);
// Use a `ManuallyDrop` to stop the destructor from running. This is important
// because the `Drop` implementation assumes that `self.repr.boxed` is initialised,
// but we are about to replace it with uninitialised memory by calling
// `take_boxed_buf_invalidating`.
let mut this = ManuallyDrop::new(self);
// SAFETY:
// The boxed string is initialised, as we obtained it by moving `repr.boxed`, and
// the only time `repr.boxed` is uninitialised is when it is briefly replaced with
// a temporary value in the block above.
unsafe { maybe_boxed_buf.assume_init() }
// `Self::inlining_string_len` returned `None`, which means that the active field of
// `self.repr` is `boxed`. After the call to `take_boxed_buf_invalidating` returns,
// the `InliningString` is never used again; this function takes ownership of the
// `InliningString`, and we disabled its destructor by wrapping it in
// `ManuallyDrop`.
unsafe { this.take_boxed_buf_invalidating() }
},
}
}
@ -421,11 +502,17 @@ impl<const N: usize> InliningString<N> {
impl<const N: usize> Drop for InliningString<N> {
fn drop(&mut self) {
if self.heap_allocated() {
let boxed_buf = unsafe { &mut *addr_of_mut!(self.repr.boxed) };
// Move the boxed buffer out of the `InliningString`, replacing it with uninitialised
// memory, then immediately drop the boxed buffer.
//
// SAFETY:
// Since this is a drop implementation, `boxed` will not be used again after this.
let _ = unsafe { ManuallyDrop::take(boxed_buf).assume_init() };
// `Self::heap_allocated` returned true, so `self.repr.boxed` must be active. Once the
// function returns, the `InliningString` is never used again; the only thing which
// happens next is dropping each of `InliningString`'s fields, but none of the fields
// are `Drop` so this is a no-op.
//
// See https://doc.rust-lang.org/reference/destructors.html.
let _ = unsafe { self.take_boxed_buf_invalidating() };
}
}
}
@ -447,17 +534,8 @@ impl<const N: usize> Clone for InliningString<N> {
None => {
// SAFETY:
// `inline_string_len` returned `None`, which means that the `boxed` field is
// active. `boxed` is properly aligned because it is stored at offset 0 of
// `InliningString` (since both `InliningString` and `Repr` use `repr(C)`), and the
// alignment of `InliningString` is equal to the alignment of `Box<str>`.
let maybe_boxed_buf = unsafe { &*addr_of!(self.repr.boxed) };
// SAFETY:
// `repr.boxed` is initialised, as the only time it's uninitialised is when it is
// briefly replaced with a temporary value before the `InliningString` is dropped
// in the `into_string` function.
let boxed_buf = unsafe { maybe_boxed_buf.assume_init_ref() };
// Since `inline_string_len` returned `None`, the `boxed` field must be active.
let boxed_buf = unsafe { self.boxed_buf() };
Self::new_boxed(boxed_buf.clone())
},

@ -3,7 +3,7 @@ pub mod capped;
#[cfg(feature = "alloc")]
pub mod inlining;
pub use fixed::{FixedString, Error as FixedStringError};
pub use capped::{CappedString, CapacityError as CappedStringError};
pub use fixed::FixedString;
pub use capped::CappedString;
#[cfg(feature = "alloc")]
pub use inlining::{InliningString, InliningString23};

Loading…
Cancel
Save