From fdb0e16fa3382914dc8153a9f860793dfce24e5b Mon Sep 17 00:00:00 2001 From: Pantonshire Date: Sun, 15 May 2022 17:14:39 +0100 Subject: [PATCH] Initial commit --- .gitignore | 2 + Cargo.toml | 15 ++ src/lib.rs | 1 + src/strings.rs | 377 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 395 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 src/lib.rs create mode 100644 src/strings.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..96ef6c0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..a3fd908 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "libshire" +version = "0.1.0" +edition = "2021" + +[features] +sqlx-actix-native-tls = ["sqlx/runtime-actix-native-tls"] +sqlx-async-std-native-tls = ["sqlx/runtime-async-std-native-tls"] +sqlx-actix-rustls = ["sqlx/runtime-actix-rustls"] +sqlx-async-std-rustls = ["sqlx/runtime-async-std-rustls"] +sqlx-tokio-rustls = ["sqlx/runtime-tokio-rustls"] +sqlx-tokio-native-tls = ["sqlx/runtime-tokio-native-tls"] + +[dependencies] +sqlx = { version = "0.5", optional = true } diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..e8dfd78 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1 @@ +pub mod strings; diff --git a/src/strings.rs b/src/strings.rs new file mode 100644 index 0000000..f448800 --- /dev/null +++ b/src/strings.rs @@ -0,0 +1,377 @@ +use std::{ + borrow::{self, Cow}, + cmp::Ordering, + convert::Infallible, + fmt, + hash::{Hash, Hasher}, + ops, + str::FromStr, +}; + +#[cfg(feature = "sqlx")] +use sqlx::{ + Database, + database::{HasArguments, HasValueRef}, + Decode, + Encode, + Type, + encode::IsNull, + error::BoxDynError, +}; + +use buf::{StackString, HeapString}; + +/// A non-growable string where strings 22 bytes or shorter are stored on the stack and longer +/// strings are stored on the heap. +/// +/// 22 bytes is chosen because it is optimal for 64-bit architectures; the minimum possible size +/// of the data structure on 64-bit architectures which always keeps the data properly aligned is +/// 24 bytes (because, when heap-allocated, the data structure contains a 16-byte `Box<[u8]>` with +/// 8-byte alignment and a 1-byte discriminant, and the greatest multiple of 8 which is ≥17 is 24), +/// and the stack-allocated variant needs to store 2 extra bytes for the length and disciminant. +pub type ShString22 = ShString<22>; + +/// A non-growable string which may be allocated either on the stack or on the heap; strings `N` +/// bytes or shorter will be allocated on the stack, while strings longer than `N` bytes will be +/// allocated on the heap. +#[derive(Clone)] +pub struct ShString(Repr); + +impl ShString { + /// Creates a new `ShString` from the given string slice, putting it on the stack if possible + /// or creating a new heap allocation otherwise. + pub fn new_from_str(s: &str) -> Self { + match StackString::from_str(s) { + Some(stack_buf) => Self(Repr::Stack(stack_buf)), + None => Self(Repr::Heap(HeapString::from_str(s))), + } + } + + /// Creates a new `ShString` from the given owned `String`, moving the string data onto the + /// stack if possible or reusing the `String`'s heap allocation otherwise. + pub fn new_from_string(s: String) -> Self { + match StackString::from_str(&s) { + Some(stack_buf) => Self(Repr::Stack(stack_buf)), + None => Self(Repr::Heap(HeapString::from_string(s))), + } + } + + /// Creates a new `ShString` from the given `Cow`. + pub fn new_from_cow_str(s: Cow) -> Self { + match s { + Cow::Borrowed(s) => Self::new_from_str(s), + Cow::Owned(s) => Self::new_from_string(s), + } + } + + /// Returns a string slice for the underlying string data. + pub fn as_str(&self) -> &str { + match self { + Self(Repr::Stack(buf)) => buf.as_str(), + Self(Repr::Heap(buf)) => buf.as_str(), + } + } + + /// Returns a mutable string slice for the underlying string data. + pub fn as_str_mut(&mut self) -> &mut str { + match self { + Self(Repr::Stack(buf)) => buf.as_str_mut(), + Self(Repr::Heap(buf)) => buf.as_str_mut(), + } + } + + /// Consumes the `ShString` and converts it to a heap-allocated `String`. + pub fn into_string(self) -> String { + match self { + Self(Repr::Stack(buf)) => buf.into_string(), + Self(Repr::Heap(buf)) => buf.into_string(), + } + } + + /// Returns the length of the string in bytes. + pub fn len(&self) -> usize { + match self { + Self(Repr::Stack(buf)) => buf.len(), + Self(Repr::Heap(buf)) => buf.len(), + } + } + + /// Returns `true` if the string has length 0. + pub fn is_empty(&self) -> bool { + match self { + Self(Repr::Stack(buf)) => buf.is_empty(), + Self(Repr::Heap(buf)) => buf.is_empty(), + } + } + + /// Returns `true` if the string data is stored on the heap, and `false` otherwise. + pub fn heap_allocated(&self) -> bool { + match self { + Self(Repr::Stack(_)) => false, + Self(Repr::Heap(_)) => true, + } + } +} + +impl ops::Deref for ShString { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +impl ops::DerefMut for ShString { + fn deref_mut(&mut self) -> &mut Self::Target { + self.as_str_mut() + } +} + +impl AsRef for ShString { + fn as_ref(&self) -> &str { + self + } +} + +impl borrow::Borrow for ShString { + fn borrow(&self) -> &str { + self + } +} + +impl borrow::BorrowMut for ShString { + fn borrow_mut(&mut self) -> &mut str { + self + } +} + +impl<'a, const N: usize> From<&'a str> for ShString { + fn from(s: &'a str) -> Self { + Self::new_from_str(s) + } +} + +impl From for ShString { + fn from(s: String) -> Self { + Self::new_from_string(s) + } +} + +impl<'a, const N: usize> From> for ShString { + fn from(s: Cow<'a, str>) -> Self { + Self::new_from_cow_str(s) + } +} + +impl From> for String { + fn from(s: ShString) -> Self { + s.into_string() + } +} + +impl PartialEq> for ShString { + fn eq(&self, other: &ShString) -> bool { + **self == **other + } +} + +impl Eq for ShString {} + +impl PartialOrd> for ShString { + fn partial_cmp(&self, other: &ShString) -> Option { + (**self).partial_cmp(&**other) + } +} + +impl Ord for ShString { + fn cmp(&self, other: &Self) -> Ordering { + (**self).cmp(&**other) + } +} + +impl Hash for ShString { + fn hash(&self, state: &mut H) { + (**self).hash(state); + } +} + +impl FromStr for ShString { + type Err = Infallible; + + fn from_str(s: &str) -> Result { + Ok(Self::new_from_str(s)) + } +} + +impl fmt::Debug for ShString { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } +} + +impl fmt::Display for ShString { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&**self, f) + } +} + +#[cfg(feature = "sqlx")] +impl<'r, DB, const N: usize> Decode<'r, DB> for ShString +where + DB: Database, + &'r str: Decode<'r, DB>, +{ + fn decode(value: >::ValueRef) -> Result { + <&'r str as Decode<'r, DB>>::decode(value).map(Self::new_from_str) + } +} + +#[cfg(feature = "sqlx")] +impl<'q, DB, const N: usize> Encode<'q, DB> for ShString +where + DB: Database, + for<'a> &'a str: Encode<'q, DB>, +{ + fn encode_by_ref(&self, buf: &mut >::ArgumentBuffer) -> IsNull { + <&str as Encode<'q, DB>>::encode(self.as_str(), buf) + } +} + +#[cfg(feature = "sqlx")] +impl Type for ShString +where + DB: Database, + for<'a> &'a str: Type, +{ + fn type_info() -> ::TypeInfo { + <&str as Type>::type_info() + } + + fn compatible(ty: &::TypeInfo) -> bool { + <&str as Type>::compatible(ty) + } +} + +#[derive(Clone)] +enum Repr { + Stack(StackString), + Heap(HeapString), +} + +mod buf { + use std::str; + + /// A stack-allocated string with a capacity of `N` bytes. `len` must be less than or equal to + /// `N`, and the first `len` bytes of `buf` must be valid UTF-8. + #[derive(Clone)] + pub(super) struct StackString { + buf: [u8; N], + len: u8, + } + + impl StackString { + const MAX_LEN: u8 = { + #[allow(clippy::cast_possible_truncation, clippy::checked_conversions)] + if N <= u8::MAX as usize { + N as u8 + } else { + panic!("`N` must be within the bounds of `u8`") + } + }; + + pub(super) fn from_str(s: &str) -> Option { + let s = s.as_bytes(); + + // If the length of the string is greater than `Self::MAX_LEN`, it will not fit in the + // stack buffer so return `None`. + let len = u8::try_from(s.len()).ok()?; + if len > Self::MAX_LEN { + return None; + } + + let mut buf = [0; N]; + buf[..usize::from(len)].copy_from_slice(s); + Some(Self { buf, len }) + } + + pub(super) fn as_str(&self) -> &str { + // SAFETY: + // `len` being less than or equal to `N` is an invariant of `StackString`, so it is + // always within the bounds of `buf`. + let slice = unsafe { self.buf.get_unchecked(..usize::from(self.len)) }; + + // SAFETY: + // The first `len` bytes of `buf` being valid UTF-8 is an invariant of `StackString`. + unsafe { str::from_utf8_unchecked(slice) } + } + + pub(super) fn as_str_mut(&mut self) -> &mut str { + // SAFETY: + // `len` being less than or equal to `N` is an invariant of `StackString`, so it is + // always within the bounds of `buf`. + let slice = unsafe { self.buf.get_unchecked_mut(..usize::from(self.len)) }; + + // SAFETY: + // The first `len` bytes of `buf` being valid UTF-8 is an invariant of `StackString`. + unsafe { str::from_utf8_unchecked_mut(slice) } + } + + pub(super) fn into_string(self) -> String { + self.as_str().to_owned() + } + + pub(super) fn len(&self) -> usize { + usize::from(self.len) + } + + pub(super) fn is_empty(&self) -> bool { + self.len == 0 + } + } + + /// A heap-allocated non-growable string. `buf` must be valid UTF-8. + #[derive(Clone)] + pub(super) struct HeapString { + buf: Box<[u8]>, + } + + impl HeapString { + pub(super) fn from_str(s: &str) -> Self { + Self { + buf: s.as_bytes().into(), + } + } + + pub(super) fn from_string(s: String) -> Self { + Self { + buf: s.into_boxed_str().into_boxed_bytes(), + } + } + + pub(super) fn as_str(&self) -> &str { + // SAFETY: + // `buf` being valid UTF-8 is an invariant of `HeapString`. + unsafe { str::from_utf8_unchecked(&self.buf) } + } + + pub(super) fn as_str_mut(&mut self) -> &mut str { + // SAFETY: + // `buf` being valid UTF-8 is an invariant of `HeapString`. + unsafe { str::from_utf8_unchecked_mut(&mut self.buf) } + } + + pub(super) fn into_string(self) -> String { + // SAFETY: + // `buf` being valid UTF-8 is an invariant of `HeapString`. + unsafe { String::from_utf8_unchecked(self.buf.into_vec()) } + } + + pub(super) fn len(&self) -> usize { + self.buf.len() + } + + pub(super) fn is_empty(&self) -> bool { + self.buf.is_empty() + } + } +}