@ -20,6 +20,18 @@ use alloc::{
#[ cfg(feature = " std " ) ]
use std ::borrow ::Cow ;
#[ derive(Debug) ]
pub struct CapacityError ;
impl fmt ::Display for CapacityError {
fn fmt ( & self , f : & mut fmt ::Formatter < ' _ > ) -> fmt ::Result {
write! ( f , "`CappedString` capacity exceeded" )
}
}
#[ cfg(feature = " std " ) ]
impl std ::error ::Error for CapacityError { }
/// A string type which stores at most `N` bytes of string data. The string data is stored inline
/// rather than using a heap allocation.
///
@ -50,8 +62,8 @@ impl<const N: usize> CappedString<N> {
/// Creates a new `CappedString` from a given byte buffer and length.
///
/// # Safety
///
/// The first `len` bytes of `buf` (i.e. `buf[..len]`) must be initialised and valid UTF-8 .
/// The first `len` bytes of `buf` (i.e. `buf[..len]`) must be initialised and valid UTF-8.
/// `len` must be less than or equal to `N` .
#[ inline ]
#[ must_use ]
pub const unsafe fn from_raw_parts ( buf : [ MaybeUninit < u8 > ; N ] , len : u8 ) -> Self {
@ -60,10 +72,38 @@ impl<const N: usize> CappedString<N> {
#[ inline ]
#[ must_use ]
pub fn into_raw_parts ( self ) -> ( [ MaybeUninit < u8 > ; N ] , u8 ) {
pub const fn into_raw_parts ( self ) -> ( [ MaybeUninit < u8 > ; N ] , u8 ) {
( self . buf , self . len )
}
/// # Safety
/// `src` must point to `len` bytes of valid, initialised, UTF-8 string data. `len` must be less
/// than or equal to `N`.
#[ inline ]
unsafe fn from_raw_ptr ( src : * const u8 , len : u8 ) -> Self {
// `u8` has the same memory layout as `MaybeUninit<u8>`, so this cast is valid.
let src = src as * const MaybeUninit < u8 > ;
// SAFETY:
// `MaybeUninit::uninit()` is a valid value for `[MaybeUninit<u8>; N]`, since each element
// of the array is allowed to be uninitialised.
let mut buf = unsafe { MaybeUninit ::< [ MaybeUninit < u8 > ; N ] > ::uninit ( ) . assume_init ( ) } ;
// SAFETY:
// The caller is responsible for ensuring that `src` points to a valid string, which means
// that it must not overlap with the new local variable `buf`. The caller is responsible
// for ensuring that `src` is valid for reads of `len` bytes. The caller is responsible for
// ensuring that `len <= N`, so `buf` is valid for writes of `len` bytes. `src` and `buf`
// are both trivially properly aligned, since they both have an alignment of 1.
unsafe { ptr ::copy_nonoverlapping ( src , buf . as_mut_ptr ( ) , usize ::from ( len ) ) ; }
// SAFETY:
// The caller is responsible for ensuring that `src` points to `len` bytes of valid UTF-8
// data. `src` is copied into the start of `buf`, so the first `len` bytes of `buf` are
// valid UTF-8. The caller is responsible for ensuring that `len <= N`.
unsafe { Self ::from_raw_parts ( buf , len ) }
}
/// Returns a new empty `CappedString`.
///
/// ```
@ -100,23 +140,18 @@ impl<const N: usize> CappedString<N> {
/// # }
/// ```
#[ inline ]
pub fn new < S > ( s : & S ) -> Result < Self , Error>
pub fn new < S > ( s rc : & S ) -> Result < Self , Capacity Error>
where
S : AsRef < str > + ? Sized ,
{
// Convert the string to a byte slice, which is guaranteed to be valid UTF-8 since this is
// an invariant of `str`.
let src = < S as AsRef < str > > ::as_ref ( s ) . as_bytes ( ) ;
let src = < S as AsRef < str > > ::as_ref ( s rc ) . as_bytes ( ) ;
// If the length of the string is greater than `Self::MAX_LEN`, it will not fit in the
// buffer so return `None`.
let len = u8 ::try_from ( src . len ( ) )
. ok ( )
. and_then ( | len | ( len < = Self ::MAX_LEN ) . then_some ( len ) )
. ok_or ( Error {
max_len : N ,
actual_len : src . len ( ) ,
} ) ? ;
// buffer so return an error.
let len = Self ::bound_to_max_len ( src . len ( ) )
. ok_or ( CapacityError ) ? ;
// SAFETY:
// `MaybeUninit::uninit()` is a valid value for `[MaybeUninit<u8>; N]`, since each element
@ -139,6 +174,52 @@ impl<const N: usize> CappedString<N> {
unsafe { Ok ( Self ::from_raw_parts ( buf , len ) ) }
}
#[ inline ]
#[ must_use ]
pub fn new_truncating < S > ( src : & S ) -> Self
where
S : AsRef < str > + ? Sized ,
{
let ( src , len ) = truncate_str ( < S as AsRef < str > > ::as_ref ( src ) , Self ::MAX_LEN ) ;
// SAFETY:
// It is part of the contract of `truncate_str` that it returns a pointer to a valid UTF-8
// string of length `len`, and that `len` is less than or equal to the provided maximum
// length, which is `Self::MAX_LEN` (which is equal to `N`) in this case.
unsafe { Self ::from_raw_ptr ( src , len ) }
}
/// Returns the length as a `u8` if it is less than or equal to `Self::MAX_LEN` (which is the
/// `u8` representation of `N`). Otherwise, returns `None`.
#[ inline ]
fn bound_to_max_len ( len : usize ) -> Option < u8 > {
u8 ::try_from ( len )
. ok ( )
. and_then ( | len | ( len < = Self ::MAX_LEN ) . then_some ( len ) )
}
pub fn push ( & mut self , c : char ) -> Result < ( ) , CapacityError > {
todo! ( )
}
pub fn push_truncating ( & mut self , c : char ) {
todo! ( )
}
pub fn push_str < S > ( & mut self , s : & S ) -> Result < ( ) , CapacityError >
where
S : AsRef < str > + ? Sized ,
{
todo! ( )
}
pub fn push_str_truncating < S > ( & mut self , s : & S )
where
S : AsRef < str > + ? Sized ,
{
todo! ( )
}
/// Returns a string slice pointing to the underlying string data.
#[ inline ]
#[ must_use ]
@ -194,10 +275,14 @@ impl<const N: usize> CappedString<N> {
unsafe { & mut * ( data_slice as * mut [ MaybeUninit < u8 > ] as * mut [ u8 ] ) }
}
#[ inline ]
#[ must_use ]
pub fn len ( & self ) -> usize {
usize ::from ( self . len )
}
#[ inline ]
#[ must_use ]
pub fn is_empty ( & self ) -> bool {
self . len = = 0
}
@ -205,10 +290,14 @@ impl<const N: usize> CappedString<N> {
#[ cfg(feature = " alloc " ) ]
impl < const N : usize > CappedString < N > {
#[ inline ]
#[ must_use ]
pub fn into_boxed_str ( self ) -> Box < str > {
self . as_str ( ) . into ( )
}
#[ inline ]
#[ must_use ]
pub fn into_string ( self ) -> String {
self . as_str ( ) . to_owned ( )
}
@ -266,7 +355,7 @@ impl<const N: usize> borrow::BorrowMut<str> for CappedString<N> {
}
impl < ' a , const N : usize > TryFrom < & ' a str > for CappedString < N > {
type Error = Error;
type Error = Capacity Error;
#[ inline ]
fn try_from ( s : & ' a str ) -> Result < Self , Self ::Error > {
@ -276,7 +365,7 @@ impl<'a, const N: usize> TryFrom<&'a str> for CappedString<N> {
#[ cfg(feature = " alloc " ) ]
impl < const N : usize > TryFrom < String > for CappedString < N > {
type Error = Error;
type Error = Capacity Error;
#[ inline ]
fn try_from ( s : String ) -> Result < Self , Self ::Error > {
@ -286,7 +375,7 @@ impl<const N: usize> TryFrom<String> for CappedString<N> {
#[ cfg(feature = " alloc " ) ]
impl < const N : usize > TryFrom < Box < str > > for CappedString < N > {
type Error = Error;
type Error = Capacity Error;
#[ inline ]
fn try_from ( s : Box < str > ) -> Result < Self , Self ::Error > {
@ -296,7 +385,7 @@ impl<const N: usize> TryFrom<Box<str>> for CappedString<N> {
#[ cfg(feature = " alloc " ) ]
impl < ' a , const N : usize > TryFrom < Cow < ' a , str > > for CappedString < N > {
type Error = Error;
type Error = Capacity Error;
#[ inline ]
fn try_from ( s : Cow < ' a , str > ) -> Result < Self , Self ::Error > {
@ -351,7 +440,7 @@ impl<const N: usize> Hash for CappedString<N> {
}
impl < const N : usize > str ::FromStr for CappedString < N > {
type Err = Error;
type Err = Capacity Error;
#[ inline ]
fn from_str ( s : & str ) -> Result < Self , Self ::Err > {
@ -373,32 +462,68 @@ impl<const N: usize> fmt::Display for CappedString<N> {
}
}
#[ derive(Debug) ]
pub struct Error {
max_len : usize ,
actual_len : usize ,
}
impl Error {
pub fn max_len ( & self ) -> usize {
self . max_len
}
pub fn actual_len ( & self ) -> usize {
self . actual_len
}
}
impl fmt ::Display for Error {
fn fmt ( & self , f : & mut fmt ::Formatter < ' _ > ) -> fmt ::Result {
write! (
f ,
"string of length {} exceeds limit for `CappedString<{}>`" ,
self . actual_len ,
self . max_len
)
/// Returns a pointer to the longest prefix of `src` which is valid UTF-8 and whose length is
/// shorter than `max_len`, and returns the length of this prefix.
#[ inline ]
fn truncate_str ( src : & str , max_len : u8 ) -> ( * const u8 , u8 ) {
match u8 ::try_from ( src . len ( ) ) {
// If the length of the `src` string is less than or equal to `max_len`, there is no need to
// truncate it.
Ok ( src_len ) if src_len < = max_len = > ( src . as_ptr ( ) , src_len ) ,
// If the length of `src` is greater than `max_len`, we need to truncate it. Note that
// `u8::try_from` returning an error means that `src.len() > max_len`, since `max_len` is a
// `u8` and `src.len()` is a `usize`.
_ = > {
let src = src . as_bytes ( ) ;
let mut i = max_len ;
// Find the rightmost codepoint which starts at an index less than or equal to
// `max_len`. Everything to the left of this will be valid UTF-8 with a length less
// than or equal to `max_len`. We only need to do 3 iterations because codepoints have
// a maximum length of 4 bytes.
for _ in 0 .. 3 {
// The first byte in the string must always be the start of a codepoint.
if i = = 0 {
break ;
}
// SAFETY:
// `i <= max_len`, since it is never incremented. If this branch is run, then either
// `src.len(): usize` does not fit into a `u8`, in which case it must be greater
// than `max_len: u8`, or it does fit into a `u8` but it is greater than `max_len`.
// Therefore, `src.len() > max_len` must hold. Substitution gives `i < src.len()`,
// so `i` is a valid index into `src`.
let byte = unsafe { * src . get_unchecked ( usize ::from ( i ) ) } ;
// If the byte is not of the form 0b10xxxxxx, then it is the start of a codepoint.
if byte & 0xc0 ! = 0x80 {
break ;
}
i - = 1 ;
}
// // SAFETY:
// // As discussed above, `i < src.len()` always holds, so `..i` is a valid range over
// // `src`.
// let src_truncated = unsafe { src.get_unchecked(..usize::from(i)) };
// // SAFETY:
// // `i` is the index of a start of a codepoint, and codepoints are contiguous, so the
// // substring `src[..i]` must be valid UTF-8.
// let src_truncated = unsafe { str::from_utf8_unchecked(src_truncated) };
// (src_truncated, i)
// `i < src.len()` always holds as discussed above, so the pointer `src.as_ptr()` is
// valid for reads of `i` bytes. `i` is the index of the start of a codepoint, and
// codepoints are contiguous, so the `i` bytes being pointed to must be valid UTF-8.
( src . as_ptr ( ) , i )
} ,
}
}
#[ cfg(feature = " std " ) ]
impl std ::error ::Error for Error { }
#[ cfg( test)]
mod tests { }