capped string uppercase conversion

rename
pantonshire 2 years ago
parent 5f169d1ac5
commit d083d30940

@ -1,42 +1,100 @@
//! Module for the [`CappedString`](CappedString) type, which is a string type which always stores //! Module for the [`CappedString`](CappedString) type, which is a string type which always stores
//! its data inline. //! its data inline.
use core::{str, convert::TryFrom, ops::Deref, borrow::Borrow, fmt}; use core::{str, ops::Deref, borrow::Borrow, fmt};
/// TODO: documentation /// TODO: documentation
pub struct CappedString<const N: usize> { pub struct CappedString<const N: usize> {
/// The string data. It is an invariant that this must always be valid UTF-8. /// The string data. It is an invariant that the first `len` bytes must be valid UTF-8.
buf: [u8; N], buf: [u8; N],
// The length of the string data in the buffer. It is an invariant that `len <= N`.
len: usize,
} }
impl<const N: usize> CappedString<N> { impl<const N: usize> CappedString<N> {
/// TODO: documentation /// TODO: documentation
#[inline] #[inline]
#[must_use] #[must_use]
pub fn new(s: &str) -> Option<Self> { pub fn from_str(s: &str) -> Option<Self> {
unsafe { Self::from_utf8_unchecked(s.as_bytes()) } unsafe { Self::from_utf8_unchecked(s.as_bytes()) }
} }
/// TODO: documentation /// TODO: documentation
///
/// # Safety
/// - `bs` must be valid UTF-8.
#[inline] #[inline]
#[must_use] #[must_use]
pub unsafe fn from_utf8_unchecked(bs: &[u8]) -> Option<Self> { pub unsafe fn from_utf8_unchecked(bs: &[u8]) -> Option<Self> {
let buf = <[u8; N]>::try_from(bs).ok()?; let mut buf = [0u8; N];
Some(Self { buf }) buf.get_mut(..bs.len())?.copy_from_slice(bs);
// SAFETY:
// - `bs.len() <= N` has already been checked by the `get_mut` call, which will return
// `None` and cause us to return early if the condition does not hold.
//
unsafe { Some(Self::from_raw_parts(buf, bs.len())) }
}
/// TODO: documentation
///
/// # Safety
/// - `len <= N` must hold.
/// - The first `len` bytes of `buf` must be valid UTF-8.
#[inline]
#[must_use]
pub unsafe fn from_raw_parts(buf: [u8; N], len: usize) -> Self {
Self { buf, len }
}
/// TODO: documentation
#[inline]
#[must_use]
pub fn into_raw_parts(self) -> ([u8; N], usize) {
(self.buf, self.len)
} }
/// TODO: documentation /// TODO: documentation
#[inline] #[inline]
#[must_use] #[must_use]
pub fn as_str(&self) -> &str { pub fn as_str(&self) -> &str {
unsafe { str::from_utf8_unchecked(&self.buf) } // SAFETY:
// - It is an invariant of `CappedString<N>` that `len <= N`.
// - It is an invariant of `CappedString<N>` that the first `len` bytes of `buf` are valid
// UTF-8.
unsafe {
let buf_occupied_prefix = self.buf.get_unchecked(..self.len);
str::from_utf8_unchecked(buf_occupied_prefix)
}
} }
/// TODO: documentation /// TODO: documentation
#[inline] #[inline]
#[must_use] #[must_use]
pub fn to_uppercase<const M: usize>(&self) -> Option<CappedString<M>> { pub fn to_uppercase<const M: usize>(&self) -> Option<CappedString<M>> {
todo!() let mut buf = [0u8; M];
let mut cursor = 0usize;
for c_orig in self.as_str().chars() {
for c_upper in c_orig.to_uppercase() {
let encode_buf = cursor
.checked_add(c_upper.len_utf8())
.and_then(|encode_buf_end| buf.get_mut(cursor..encode_buf_end))?;
// FIXME: avoid the panic asm that gets generated for this encode (can never panic,
// as we always have at least `c_upper.len_utf8()` buffer space).
let encoded = c_upper.encode_utf8(encode_buf);
cursor = cursor.checked_add(encoded.len())?;
}
}
let filled_buf = buf.get(..cursor)?;
// SAFETY:
// `filled_buf` has been filled with a sequence of bytes obtained from `char::encode_utf8`,
// so it is valid UTF-8.
unsafe { CappedString::from_utf8_unchecked(filled_buf) }
} }
} }
@ -86,7 +144,7 @@ impl<'de, const N: usize> serde::de::Visitor<'de> for CappedStringVisitor<N> {
where where
E: serde::de::Error, E: serde::de::Error,
{ {
CappedString::new(v) CappedString::from_str(v)
.ok_or_else(|| E::invalid_length(v.len(), &self)) .ok_or_else(|| E::invalid_length(v.len(), &self))
} }
@ -96,7 +154,50 @@ impl<'de, const N: usize> serde::de::Visitor<'de> for CappedStringVisitor<N> {
{ {
str::from_utf8(v) str::from_utf8(v)
.map_err(|_| E::invalid_value(serde::de::Unexpected::Bytes(v), &self)) .map_err(|_| E::invalid_value(serde::de::Unexpected::Bytes(v), &self))
.and_then(|v| CappedString::new(v) .and_then(|v| CappedString::from_str(v)
.ok_or_else(|| E::invalid_length(v.len(), &self))) .ok_or_else(|| E::invalid_length(v.len(), &self)))
} }
} }
#[cfg(test)]
mod tests {
use super::CappedString;
#[test]
fn test_capped_string_uppercase() {
{
let s1 = CappedString::<5>::from_str("hello").unwrap();
let s2 = s1.to_uppercase::<5>().unwrap();
assert_eq!(s2.as_str(), "HELLO");
}
{
let s1 = CappedString::<20>::from_str("hello").unwrap();
let s2 = s1.to_uppercase::<20>().unwrap();
assert_eq!(s2.as_str(), "HELLO");
}
{
let s1 = CappedString::<5>::from_str("hElLo").unwrap();
let s2 = s1.to_uppercase::<5>().unwrap();
assert_eq!(s2.as_str(), "HELLO");
}
{
let s1 = CappedString::<5>::from_str("hello").unwrap();
assert!(s1.to_uppercase::<4>().is_none());
}
{
let s1 = CappedString::<5>::from_str("groß").unwrap();
let s2 = s1.to_uppercase::<5>().unwrap();
assert_eq!(s2.as_str(), "GROSS");
}
{
let s1 = CappedString::<1>::from_str("").unwrap();
let s2 = s1.to_uppercase::<1>().unwrap();
assert_eq!(s2.as_str(), "");
}
{
let s1 = CappedString::<0>::from_str("").unwrap();
let s2 = s1.to_uppercase::<0>().unwrap();
assert_eq!(s2.as_str(), "");
}
}
}

Loading…
Cancel
Save