From 03d5baced87952a7091572ff09329b970eb9aed3 Mon Sep 17 00:00:00 2001 From: pantonshire Date: Sun, 17 Sep 2023 09:52:44 +0100 Subject: [PATCH 01/15] capped string type with Deserialize impl --- enumscribe/Cargo.toml | 7 +- enumscribe/src/internal/capped_string.rs | 102 +++++++++++++++++++++++ enumscribe/src/internal/mod.rs | 3 + enumscribe/src/lib.rs | 3 +- 4 files changed, 112 insertions(+), 3 deletions(-) create mode 100644 enumscribe/src/internal/capped_string.rs create mode 100644 enumscribe/src/internal/mod.rs diff --git a/enumscribe/Cargo.toml b/enumscribe/Cargo.toml index 9c2378f..faddb11 100644 --- a/enumscribe/Cargo.toml +++ b/enumscribe/Cargo.toml @@ -12,13 +12,16 @@ keywords = ["enum", "derive", "serde"] [dependencies] enumscribe_derive = { version = "0.3.0", path = "../enumscribe_derive", default-features = false, optional = true } +serde = { version = "1.0", default-features = false, optional = true } [dev-dependencies] serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" [features] -default = ["std", "derive", "derive_serde"] +# default = ["std", "derive", "derive_serde"] +default = ["derive", "derive_serde"] std = ["enumscribe_derive/std"] derive = ["enumscribe_derive"] -derive_serde = ["derive", "enumscribe_derive/serde"] +derive_serde = ["derive", "serde", "enumscribe_derive/serde"] +serde = ["derive_serde", "dep:serde"] diff --git a/enumscribe/src/internal/capped_string.rs b/enumscribe/src/internal/capped_string.rs new file mode 100644 index 0000000..0f4a165 --- /dev/null +++ b/enumscribe/src/internal/capped_string.rs @@ -0,0 +1,102 @@ +//! Module for the [`CappedString`](CappedString) type, which is a string type which always stores +//! its data inline. + +use core::{str, convert::TryFrom, ops::Deref, borrow::Borrow, fmt}; + +/// TODO: documentation +pub struct CappedString { + /// The string data. It is an invariant that this must always be valid UTF-8. + buf: [u8; N], +} + +impl CappedString { + /// TODO: documentation + #[inline] + #[must_use] + pub fn new(s: &str) -> Option { + unsafe { Self::from_utf8_unchecked(s.as_bytes()) } + } + + /// TODO: documentation + #[inline] + #[must_use] + pub unsafe fn from_utf8_unchecked(bs: &[u8]) -> Option { + let buf = <[u8; N]>::try_from(bs).ok()?; + Some(Self { buf }) + } + + /// TODO: documentation + #[inline] + #[must_use] + pub fn as_str(&self) -> &str { + unsafe { str::from_utf8_unchecked(&self.buf) } + } + + /// TODO: documentation + #[inline] + #[must_use] + pub fn to_uppercase(&self) -> Option> { + todo!() + } +} + +impl Deref for CappedString { + type Target = str; + + #[inline] + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +impl AsRef for CappedString { + #[inline] + fn as_ref(&self) -> &str { + self + } +} + +impl Borrow for CappedString { + #[inline] + fn borrow(&self) -> &str { + self + } +} + +#[cfg(feature = "serde")] +impl<'de, const N: usize> serde::Deserialize<'de> for CappedString { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de> + { + deserializer.deserialize_str(CappedStringVisitor::) + } +} + +struct CappedStringVisitor; + +impl<'de, const N: usize> serde::de::Visitor<'de> for CappedStringVisitor { + type Value = CappedString; + + fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "a string up to {} bytes long", N) + } + + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, + { + CappedString::new(v) + .ok_or_else(|| E::invalid_length(v.len(), &self)) + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: serde::de::Error, + { + str::from_utf8(v) + .map_err(|_| E::invalid_value(serde::de::Unexpected::Bytes(v), &self)) + .and_then(|v| CappedString::new(v) + .ok_or_else(|| E::invalid_length(v.len(), &self))) + } +} diff --git a/enumscribe/src/internal/mod.rs b/enumscribe/src/internal/mod.rs new file mode 100644 index 0000000..6a46570 --- /dev/null +++ b/enumscribe/src/internal/mod.rs @@ -0,0 +1,3 @@ +//! Utilities for use by code generated by `enumscribe_derive`. + +pub mod capped_string; diff --git a/enumscribe/src/lib.rs b/enumscribe/src/lib.rs index 438b39b..cfc4bf7 100644 --- a/enumscribe/src/lib.rs +++ b/enumscribe/src/lib.rs @@ -183,7 +183,8 @@ #![deny(missing_docs)] #![cfg_attr(not(feature = "std"), no_std)] -#[macro_use] +pub mod internal; + extern crate enumscribe_derive; pub use enumscribe_derive::*; From 5f169d1ac5484903da563b41f489d5e8a1aaf640 Mon Sep 17 00:00:00 2001 From: pantonshire Date: Sun, 17 Sep 2023 09:54:44 +0100 Subject: [PATCH 02/15] re-enable std by default --- enumscribe/Cargo.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/enumscribe/Cargo.toml b/enumscribe/Cargo.toml index faddb11..fcccae0 100644 --- a/enumscribe/Cargo.toml +++ b/enumscribe/Cargo.toml @@ -19,8 +19,7 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" [features] -# default = ["std", "derive", "derive_serde"] -default = ["derive", "derive_serde"] +default = ["std", "derive", "derive_serde"] std = ["enumscribe_derive/std"] derive = ["enumscribe_derive"] derive_serde = ["derive", "serde", "enumscribe_derive/serde"] From d083d3094012f360dffcf6fab087ab6ec0b42d13 Mon Sep 17 00:00:00 2001 From: pantonshire Date: Sat, 23 Sep 2023 15:09:39 +0100 Subject: [PATCH 03/15] capped string uppercase conversion --- enumscribe/src/internal/capped_string.rs | 119 +++++++++++++++++++++-- 1 file changed, 110 insertions(+), 9 deletions(-) diff --git a/enumscribe/src/internal/capped_string.rs b/enumscribe/src/internal/capped_string.rs index 0f4a165..02deac8 100644 --- a/enumscribe/src/internal/capped_string.rs +++ b/enumscribe/src/internal/capped_string.rs @@ -1,42 +1,100 @@ //! Module for the [`CappedString`](CappedString) type, which is a string type which always stores //! its data inline. -use core::{str, convert::TryFrom, ops::Deref, borrow::Borrow, fmt}; +use core::{str, ops::Deref, borrow::Borrow, fmt}; /// TODO: documentation pub struct CappedString { - /// The string data. It is an invariant that this must always be valid UTF-8. + /// The string data. It is an invariant that the first `len` bytes must be valid UTF-8. buf: [u8; N], + // The length of the string data in the buffer. It is an invariant that `len <= N`. + len: usize, } impl CappedString { /// TODO: documentation #[inline] #[must_use] - pub fn new(s: &str) -> Option { + pub fn from_str(s: &str) -> Option { unsafe { Self::from_utf8_unchecked(s.as_bytes()) } } /// TODO: documentation + /// + /// # Safety + /// - `bs` must be valid UTF-8. #[inline] #[must_use] pub unsafe fn from_utf8_unchecked(bs: &[u8]) -> Option { - let buf = <[u8; N]>::try_from(bs).ok()?; - Some(Self { buf }) + let mut buf = [0u8; N]; + buf.get_mut(..bs.len())?.copy_from_slice(bs); + + // SAFETY: + // - `bs.len() <= N` has already been checked by the `get_mut` call, which will return + // `None` and cause us to return early if the condition does not hold. + // + unsafe { Some(Self::from_raw_parts(buf, bs.len())) } + } + + /// TODO: documentation + /// + /// # Safety + /// - `len <= N` must hold. + /// - The first `len` bytes of `buf` must be valid UTF-8. + #[inline] + #[must_use] + pub unsafe fn from_raw_parts(buf: [u8; N], len: usize) -> Self { + Self { buf, len } + } + + + /// TODO: documentation + #[inline] + #[must_use] + pub fn into_raw_parts(self) -> ([u8; N], usize) { + (self.buf, self.len) } /// TODO: documentation #[inline] #[must_use] pub fn as_str(&self) -> &str { - unsafe { str::from_utf8_unchecked(&self.buf) } + // SAFETY: + // - It is an invariant of `CappedString` that `len <= N`. + // - It is an invariant of `CappedString` that the first `len` bytes of `buf` are valid + // UTF-8. + unsafe { + let buf_occupied_prefix = self.buf.get_unchecked(..self.len); + str::from_utf8_unchecked(buf_occupied_prefix) + } } /// TODO: documentation #[inline] #[must_use] pub fn to_uppercase(&self) -> Option> { - todo!() + let mut buf = [0u8; M]; + let mut cursor = 0usize; + + for c_orig in self.as_str().chars() { + for c_upper in c_orig.to_uppercase() { + let encode_buf = cursor + .checked_add(c_upper.len_utf8()) + .and_then(|encode_buf_end| buf.get_mut(cursor..encode_buf_end))?; + + // FIXME: avoid the panic asm that gets generated for this encode (can never panic, + // as we always have at least `c_upper.len_utf8()` buffer space). + let encoded = c_upper.encode_utf8(encode_buf); + cursor = cursor.checked_add(encoded.len())?; + } + } + + let filled_buf = buf.get(..cursor)?; + + // SAFETY: + // `filled_buf` has been filled with a sequence of bytes obtained from `char::encode_utf8`, + // so it is valid UTF-8. + unsafe { CappedString::from_utf8_unchecked(filled_buf) } } } @@ -86,7 +144,7 @@ impl<'de, const N: usize> serde::de::Visitor<'de> for CappedStringVisitor { where E: serde::de::Error, { - CappedString::new(v) + CappedString::from_str(v) .ok_or_else(|| E::invalid_length(v.len(), &self)) } @@ -96,7 +154,50 @@ impl<'de, const N: usize> serde::de::Visitor<'de> for CappedStringVisitor { { str::from_utf8(v) .map_err(|_| E::invalid_value(serde::de::Unexpected::Bytes(v), &self)) - .and_then(|v| CappedString::new(v) + .and_then(|v| CappedString::from_str(v) .ok_or_else(|| E::invalid_length(v.len(), &self))) } } + +#[cfg(test)] +mod tests { + use super::CappedString; + + #[test] + fn test_capped_string_uppercase() { + { + let s1 = CappedString::<5>::from_str("hello").unwrap(); + let s2 = s1.to_uppercase::<5>().unwrap(); + assert_eq!(s2.as_str(), "HELLO"); + } + { + let s1 = CappedString::<20>::from_str("hello").unwrap(); + let s2 = s1.to_uppercase::<20>().unwrap(); + assert_eq!(s2.as_str(), "HELLO"); + } + { + let s1 = CappedString::<5>::from_str("hElLo").unwrap(); + let s2 = s1.to_uppercase::<5>().unwrap(); + assert_eq!(s2.as_str(), "HELLO"); + } + { + let s1 = CappedString::<5>::from_str("hello").unwrap(); + assert!(s1.to_uppercase::<4>().is_none()); + } + { + let s1 = CappedString::<5>::from_str("groß").unwrap(); + let s2 = s1.to_uppercase::<5>().unwrap(); + assert_eq!(s2.as_str(), "GROSS"); + } + { + let s1 = CappedString::<1>::from_str("").unwrap(); + let s2 = s1.to_uppercase::<1>().unwrap(); + assert_eq!(s2.as_str(), ""); + } + { + let s1 = CappedString::<0>::from_str("").unwrap(); + let s2 = s1.to_uppercase::<0>().unwrap(); + assert_eq!(s2.as_str(), ""); + } + } +} From 42eb2dd272b5d2032a0d0b5a52f5a4e03a13e9e1 Mon Sep 17 00:00:00 2001 From: pantonshire Date: Sat, 23 Sep 2023 15:11:07 +0100 Subject: [PATCH 04/15] deny unsafe_op_in_unsafe_fn --- enumscribe/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/enumscribe/src/lib.rs b/enumscribe/src/lib.rs index cfc4bf7..04e41de 100644 --- a/enumscribe/src/lib.rs +++ b/enumscribe/src/lib.rs @@ -181,6 +181,7 @@ //! you *really* don't want to use a `Cow` for whatever reason. #![deny(missing_docs)] +#![deny(unsafe_op_in_unsafe_fn)] #![cfg_attr(not(feature = "std"), no_std)] pub mod internal; From 096b63509609915b4a9d6bef9d3ee9c2b861eecd Mon Sep 17 00:00:00 2001 From: pantonshire Date: Sat, 23 Sep 2023 15:11:47 +0100 Subject: [PATCH 05/15] specify features required by examples --- enumscribe_examples/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/enumscribe_examples/Cargo.toml b/enumscribe_examples/Cargo.toml index cc708a3..94112bb 100644 --- a/enumscribe_examples/Cargo.toml +++ b/enumscribe_examples/Cargo.toml @@ -6,6 +6,6 @@ edition = "2018" license = "MIT" [dev-dependencies] -enumscribe = { path = "../enumscribe" } +enumscribe = { path = "../enumscribe", features = ["std", "derive", "serde"] } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" From 876d92c3e89ead75b703c429e18d6fd0a9b4b776 Mon Sep 17 00:00:00 2001 From: pantonshire Date: Sun, 24 Sep 2023 19:35:40 +0100 Subject: [PATCH 06/15] capped string documentation --- enumscribe/src/internal/capped_string.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/enumscribe/src/internal/capped_string.rs b/enumscribe/src/internal/capped_string.rs index 02deac8..8ee4aca 100644 --- a/enumscribe/src/internal/capped_string.rs +++ b/enumscribe/src/internal/capped_string.rs @@ -3,7 +3,7 @@ use core::{str, ops::Deref, borrow::Borrow, fmt}; -/// TODO: documentation +/// A string type which stores up to `N` bytes of string data inline. pub struct CappedString { /// The string data. It is an invariant that the first `len` bytes must be valid UTF-8. buf: [u8; N], @@ -12,14 +12,16 @@ pub struct CappedString { } impl CappedString { - /// TODO: documentation + /// Returns a new `CappedString` containing a copy of the given string data. Returns an error + /// if the string data is larger than `N` bytes. #[inline] #[must_use] pub fn from_str(s: &str) -> Option { unsafe { Self::from_utf8_unchecked(s.as_bytes()) } } - /// TODO: documentation + /// Returns a new `CappedString` containing a copy of the given UTF-8 encoded string data. + /// Returns an error if more than `N` bytes of data are given. /// /// # Safety /// - `bs` must be valid UTF-8. @@ -36,7 +38,7 @@ impl CappedString { unsafe { Some(Self::from_raw_parts(buf, bs.len())) } } - /// TODO: documentation + /// Returns a new `CappedString` from a given buffer and length. /// /// # Safety /// - `len <= N` must hold. @@ -48,14 +50,14 @@ impl CappedString { } - /// TODO: documentation + /// Consumes the `CappedString` and returns its buffer and length. #[inline] #[must_use] pub fn into_raw_parts(self) -> ([u8; N], usize) { (self.buf, self.len) } - /// TODO: documentation + /// Returns the string data contained by this `CappedString`. #[inline] #[must_use] pub fn as_str(&self) -> &str { @@ -69,7 +71,8 @@ impl CappedString { } } - /// TODO: documentation + /// Returns a new `CappedString` with capacity `M` containing the string converted to + /// uppercase. Returns an error if the uppercase-converted string is longer than `M` bytes. #[inline] #[must_use] pub fn to_uppercase(&self) -> Option> { From afa8e15acef9dcb9149a80537ea3e81777730bc9 Mon Sep 17 00:00:00 2001 From: pantonshire Date: Sun, 24 Sep 2023 21:07:22 +0100 Subject: [PATCH 07/15] cow capped string --- enumscribe/src/internal/capped_string.rs | 67 +++++++++++++++++++++++- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/enumscribe/src/internal/capped_string.rs b/enumscribe/src/internal/capped_string.rs index 8ee4aca..43ab7b3 100644 --- a/enumscribe/src/internal/capped_string.rs +++ b/enumscribe/src/internal/capped_string.rs @@ -3,6 +3,68 @@ use core::{str, ops::Deref, borrow::Borrow, fmt}; +/// TODO: documentation +pub enum CowCappedString<'a, const N: usize> { + /// TODO: documentation + Borrowed(&'a str), + /// TODO: documentation + Owned(CappedString), +} + +#[cfg(feature = "serde")] +impl<'de, const N: usize> serde::Deserialize<'de> for CowCappedString<'de, N> { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de> + { + deserializer.deserialize_str(CowCappedStringVisitor::) + } +} + +#[cfg(feature = "serde")] +struct CowCappedStringVisitor; + +#[cfg(feature = "serde")] +impl<'de, const N: usize> serde::de::Visitor<'de> for CowCappedStringVisitor { + type Value = CowCappedString<'de, N>; + + fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "a borrowed string or a string up to {} bytes long", N) + } + + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, + { + CappedStringVisitor::.visit_str(v) + .map(CowCappedString::Owned) + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: serde::de::Error, + { + CappedStringVisitor::.visit_bytes(v) + .map(CowCappedString::Owned) + } + + fn visit_borrowed_str(self, v: &'de str) -> Result + where + E: serde::de::Error, + { + Ok(CowCappedString::Borrowed(v)) + } + + fn visit_borrowed_bytes(self, v: &'de [u8]) -> Result + where + E: serde::de::Error, + { + str::from_utf8(v) + .map_err(|_| E::invalid_value(serde::de::Unexpected::Bytes(v), &self)) + .and_then(|v| self.visit_borrowed_str(v)) + } +} + /// A string type which stores up to `N` bytes of string data inline. pub struct CappedString { /// The string data. It is an invariant that the first `len` bytes must be valid UTF-8. @@ -134,8 +196,10 @@ impl<'de, const N: usize> serde::Deserialize<'de> for CappedString { } } +#[cfg(feature = "serde")] struct CappedStringVisitor; +#[cfg(feature = "serde")] impl<'de, const N: usize> serde::de::Visitor<'de> for CappedStringVisitor { type Value = CappedString; @@ -157,8 +221,7 @@ impl<'de, const N: usize> serde::de::Visitor<'de> for CappedStringVisitor { { str::from_utf8(v) .map_err(|_| E::invalid_value(serde::de::Unexpected::Bytes(v), &self)) - .and_then(|v| CappedString::from_str(v) - .ok_or_else(|| E::invalid_length(v.len(), &self))) + .and_then(|v| self.visit_str(v)) } } From a799521e02c7e266aaf364530f428078ae8c3dc1 Mon Sep 17 00:00:00 2001 From: pantonshire Date: Fri, 29 Sep 2023 09:17:26 +0100 Subject: [PATCH 08/15] unit tests for (cow) capped string deserialisation --- enumscribe/src/internal/capped_string.rs | 140 ++++++++++++++++++++++- 1 file changed, 137 insertions(+), 3 deletions(-) diff --git a/enumscribe/src/internal/capped_string.rs b/enumscribe/src/internal/capped_string.rs index 43ab7b3..ba4a145 100644 --- a/enumscribe/src/internal/capped_string.rs +++ b/enumscribe/src/internal/capped_string.rs @@ -11,6 +11,18 @@ pub enum CowCappedString<'a, const N: usize> { Owned(CappedString), } +impl<'a, const N: usize> CowCappedString<'a, N> { + /// Returns the string data contained by this `CowCappedString`. + #[inline] + #[must_use] + pub fn as_str(&self) -> &str { + match self { + CowCappedString::Borrowed(s) => s, + CowCappedString::Owned(s) => s, + } + } +} + #[cfg(feature = "serde")] impl<'de, const N: usize> serde::Deserialize<'de> for CowCappedString<'de, N> { fn deserialize(deserializer: D) -> Result @@ -186,6 +198,20 @@ impl Borrow for CappedString { } } +impl PartialEq for CappedString { + fn eq(&self, other: &Self) -> bool { + self.as_str() == other.as_str() + } +} + +impl Eq for CappedString {} + +impl PartialEq for CappedString { + fn eq(&self, other: &str) -> bool { + self.as_str() == other + } +} + #[cfg(feature = "serde")] impl<'de, const N: usize> serde::Deserialize<'de> for CappedString { fn deserialize(deserializer: D) -> Result @@ -227,7 +253,115 @@ impl<'de, const N: usize> serde::de::Visitor<'de> for CappedStringVisitor { #[cfg(test)] mod tests { - use super::CappedString; + use super::{CappedString, CowCappedString}; + + #[cfg(feature = "serde")] + #[test] + fn test_cow_capped_string_deserialize() { + struct DeBorrowedOnly(String); + + impl<'de, const N: usize> serde::Deserialize<'de> for DeBorrowedOnly { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de> + { + match CowCappedString::<'de, N>::deserialize(deserializer)? { + CowCappedString::Borrowed(s) => Ok(Self(s.to_owned())), + CowCappedString::Owned(_) => { + Err(serde::de::Error::custom("expected borrowed CowCappedString")) + }, + } + } + } + + struct DeOwnedOnly(String); + + impl<'de, const N: usize> serde::Deserialize<'de> for DeOwnedOnly { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de> + { + match CowCappedString::<'de, N>::deserialize(deserializer)? { + CowCappedString::Borrowed(_) => { + Err(serde::de::Error::custom("expected owned CowCappedString")) + }, + CowCappedString::Owned(s) => Ok(Self(s.to_owned())), + } + } + } + + { + let DeBorrowedOnly(s) = serde_json::from_str::>( + r#""hello""# + ).unwrap(); + assert_eq!(s, "hello"); + } + { + let DeBorrowedOnly(s) = serde_json::from_str::>( + r#""hello""# + ).unwrap(); + assert_eq!(s, "hello"); + } + { + let s = serde_json::from_str::>( + r#""hello""# + ); + assert!(s.is_err()); + } + { + let DeOwnedOnly(s) = serde_json::from_str::>( + r#""\u87f9""# + ).unwrap(); + assert_eq!(s, "蟹"); + } + { + let s = serde_json::from_str::>( + r#""\u87f9""# + ); + assert!(s.is_err()); + } + } + + #[cfg(feature = "serde")] + #[test] + fn test_capped_string_deserialize() { + { + let s = serde_json::from_str::>( + r#""hello""# + ).unwrap(); + assert_eq!(s.as_str(), "hello"); + } + { + let s = serde_json::from_str::>( + r#""hello""# + ); + assert!(s.is_err()); + } + { + let s = serde_json::from_str::>( + r#""hello""# + ).unwrap(); + assert_eq!(s.as_str(), "hello"); + } + { + let s = serde_json::from_str::>( + r#""hello\tworld\n""# + ).unwrap(); + assert_eq!(s.as_str(), "hello\tworld\n"); + } + { + let s = serde_json::from_str::>( + r#""\u87f9""# + ).unwrap(); + assert_eq!(s.as_str(), "蟹"); + } + { + let s = serde_json::from_str::>( + r#""\u87f9""# + ); + assert!(s.is_err()); + } + } #[test] fn test_capped_string_uppercase() { @@ -247,8 +381,8 @@ mod tests { assert_eq!(s2.as_str(), "HELLO"); } { - let s1 = CappedString::<5>::from_str("hello").unwrap(); - assert!(s1.to_uppercase::<4>().is_none()); + let s = CappedString::<5>::from_str("hello").unwrap(); + assert!(s.to_uppercase::<4>().is_none()); } { let s1 = CappedString::<5>::from_str("groß").unwrap(); From 9487a7f5330e11f3dc6bb584ead49875960c7030 Mon Sep 17 00:00:00 2001 From: pantonshire Date: Fri, 29 Sep 2023 09:18:37 +0100 Subject: [PATCH 09/15] add miri test script --- test.sh | 3 +++ 1 file changed, 3 insertions(+) create mode 100755 test.sh diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..2e854b7 --- /dev/null +++ b/test.sh @@ -0,0 +1,3 @@ +#!/bin/bash +RUSTFLAGS='-Z randomize-layout' cargo +nightly miri test + From 14effd44c5d4b09142c731f30eeba2499b86e87e Mon Sep 17 00:00:00 2001 From: pantonshire Date: Fri, 29 Sep 2023 09:40:31 +0100 Subject: [PATCH 10/15] replace test shell script with justfile --- justfile | 6 ++++++ test.sh | 3 --- 2 files changed, 6 insertions(+), 3 deletions(-) create mode 100644 justfile delete mode 100755 test.sh diff --git a/justfile b/justfile new file mode 100644 index 0000000..5948224 --- /dev/null +++ b/justfile @@ -0,0 +1,6 @@ +nightly := 'cargo +nightly' +rustc_nightly_flags := '-Z randomize-layout' + +test: + RUST_FLAGS='{{rustc_nightly_flags}}' {{nightly}} miri test + diff --git a/test.sh b/test.sh deleted file mode 100755 index 2e854b7..0000000 --- a/test.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash -RUSTFLAGS='-Z randomize-layout' cargo +nightly miri test - From c8ca0b7e607b7a132f6a6f1b8d16cfaf8814188b Mon Sep 17 00:00:00 2001 From: pantonshire Date: Sun, 1 Oct 2023 11:50:44 +0100 Subject: [PATCH 11/15] justfile fix --- justfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/justfile b/justfile index 5948224..041a171 100644 --- a/justfile +++ b/justfile @@ -2,5 +2,5 @@ nightly := 'cargo +nightly' rustc_nightly_flags := '-Z randomize-layout' test: - RUST_FLAGS='{{rustc_nightly_flags}}' {{nightly}} miri test + RUSTFLAGS='{{rustc_nightly_flags}}' {{nightly}} miri test From 0483c756f066c4bfd4f622de0a61ae55708c2edd Mon Sep 17 00:00:00 2001 From: pantonshire Date: Sun, 1 Oct 2023 13:00:22 +0100 Subject: [PATCH 12/15] add macro backtrace to test flags --- justfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/justfile b/justfile index 041a171..62a7b45 100644 --- a/justfile +++ b/justfile @@ -1,5 +1,5 @@ nightly := 'cargo +nightly' -rustc_nightly_flags := '-Z randomize-layout' +rustc_nightly_flags := '-Z randomize-layout -Z macro-backtrace' test: RUSTFLAGS='{{rustc_nightly_flags}}' {{nightly}} miri test From 406b42f0ca21d152b3722226a04a8c136fa71b11 Mon Sep 17 00:00:00 2001 From: pantonshire Date: Sun, 1 Oct 2023 13:12:30 +0100 Subject: [PATCH 13/15] use CappedString and CowCappedString in generated unscribe fn --- enumscribe/src/internal/capped_string.rs | 91 ++++++++++---- enumscribe_derive/src/enums.rs | 148 +++++++++++++++++++---- enumscribe_derive/src/lib.rs | 83 ++++++++----- 3 files changed, 240 insertions(+), 82 deletions(-) diff --git a/enumscribe/src/internal/capped_string.rs b/enumscribe/src/internal/capped_string.rs index ba4a145..d6d5b7f 100644 --- a/enumscribe/src/internal/capped_string.rs +++ b/enumscribe/src/internal/capped_string.rs @@ -21,6 +21,37 @@ impl<'a, const N: usize> CowCappedString<'a, N> { CowCappedString::Owned(s) => s, } } + + /// Returns a new `CappedString` with capacity `M` containing the string converted to + /// uppercase. Returns `None` if the uppercase-converted string is longer than `M` bytes. + #[inline] + #[must_use] + pub fn to_uppercase(&self) -> Option> { + CappedString::::uppercase_from_str(self) + } +} + +impl<'a, const N: usize> Deref for CowCappedString<'a, N> { + type Target = str; + + #[inline] + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +impl<'a, const N: usize> AsRef for CowCappedString<'a, N> { + #[inline] + fn as_ref(&self) -> &str { + self + } +} + +impl<'a, const N: usize> Borrow for CowCappedString<'a, N> { + #[inline] + fn borrow(&self) -> &str { + self + } } #[cfg(feature = "serde")] @@ -86,16 +117,45 @@ pub struct CappedString { } impl CappedString { - /// Returns a new `CappedString` containing a copy of the given string data. Returns an error - /// if the string data is larger than `N` bytes. + /// Returns a new `CappedString` containing a copy of the given string data. Returns `None` if + /// the string data is larger than `N` bytes. #[inline] #[must_use] pub fn from_str(s: &str) -> Option { unsafe { Self::from_utf8_unchecked(s.as_bytes()) } } + /// Returns a new `CappedString` containing an uppercase conversion of the given string data. + /// Returns `None` if the converted string is larger than `N` bytes. + #[inline] + #[must_use] + pub fn uppercase_from_str(s: &str) -> Option { + let mut buf = [0u8; N]; + let mut cursor = 0usize; + + for c_orig in s.chars() { + for c_upper in c_orig.to_uppercase() { + let encode_buf = cursor + .checked_add(c_upper.len_utf8()) + .and_then(|encode_buf_end| buf.get_mut(cursor..encode_buf_end))?; + + // FIXME: avoid the panic asm that gets generated for this encode (can never panic, + // as we always have at least `c_upper.len_utf8()` buffer space). + let encoded = c_upper.encode_utf8(encode_buf); + cursor = cursor.checked_add(encoded.len())?; + } + } + + let filled_buf = buf.get(..cursor)?; + + // SAFETY: + // `filled_buf` has been filled with a sequence of bytes obtained from `char::encode_utf8`, + // so it is valid UTF-8. + unsafe { Self::from_utf8_unchecked(filled_buf) } + } + /// Returns a new `CappedString` containing a copy of the given UTF-8 encoded string data. - /// Returns an error if more than `N` bytes of data are given. + /// Returns `None` if more than `N` bytes of data are given. /// /// # Safety /// - `bs` must be valid UTF-8. @@ -146,32 +206,11 @@ impl CappedString { } /// Returns a new `CappedString` with capacity `M` containing the string converted to - /// uppercase. Returns an error if the uppercase-converted string is longer than `M` bytes. + /// uppercase. Returns `None` if the uppercase-converted string is longer than `M` bytes. #[inline] #[must_use] pub fn to_uppercase(&self) -> Option> { - let mut buf = [0u8; M]; - let mut cursor = 0usize; - - for c_orig in self.as_str().chars() { - for c_upper in c_orig.to_uppercase() { - let encode_buf = cursor - .checked_add(c_upper.len_utf8()) - .and_then(|encode_buf_end| buf.get_mut(cursor..encode_buf_end))?; - - // FIXME: avoid the panic asm that gets generated for this encode (can never panic, - // as we always have at least `c_upper.len_utf8()` buffer space). - let encoded = c_upper.encode_utf8(encode_buf); - cursor = cursor.checked_add(encoded.len())?; - } - } - - let filled_buf = buf.get(..cursor)?; - - // SAFETY: - // `filled_buf` has been filled with a sequence of bytes obtained from `char::encode_utf8`, - // so it is valid UTF-8. - unsafe { CappedString::from_utf8_unchecked(filled_buf) } + CappedString::::uppercase_from_str(self) } } diff --git a/enumscribe_derive/src/enums.rs b/enumscribe_derive/src/enums.rs index 4363b98..7e9da6e 100644 --- a/enumscribe_derive/src/enums.rs +++ b/enumscribe_derive/src/enums.rs @@ -12,7 +12,45 @@ use crate::{CASE_INSENSITIVE, CRATE_ATTR, IGNORE, NAME, OTHER}; #[derive(Clone)] pub(crate) struct Enum<'a> { - pub(crate) variants: Vec>, + variants: Box<[Variant<'a>]>, + name_capacity: usize, + name_upper_capacity: usize, +} + +impl<'a> Enum<'a> { + pub(crate) fn new(variants: Box<[Variant<'a>]>) -> Self { + let name_capacity = variants + .iter() + .filter_map(|v| v.v_type.as_named()) + .map(|named| named.name().len()) + .max() + .unwrap_or(0); + + let name_upper_capacity = variants + .iter() + .filter_map(|v| v.v_type.as_named()) + .map(|named| named.name_upper().len()) + .max() + .unwrap_or(0); + + Self { + variants, + name_capacity, + name_upper_capacity, + } + } + + pub(crate) fn variants(&self) -> &[Variant<'a>] { + &self.variants + } + + pub(crate) fn name_capacity(&self) -> usize { + self.name_capacity + } + + pub(crate) fn name_upper_capacity(&self) -> usize { + self.name_upper_capacity + } } #[derive(Clone)] @@ -25,14 +63,69 @@ pub(crate) struct Variant<'a> { #[derive(Clone)] pub(crate) enum VariantType<'a> { Ignore, - Named { - name: String, + Named(NamedVariant), + Other(OtherVariant<'a>), +} + +impl<'a> VariantType<'a> { + pub(crate) fn as_named(&self) -> Option<&NamedVariant> { + match self { + Self::Named(named) => Some(named), + _ => None, + } + } +} + +#[derive(Clone)] +pub(crate) struct NamedVariant { + name: Box, + name_upper: Box, + constructor: VariantConstructor, + case_insensitive: bool, +} + +impl NamedVariant { + pub(crate) fn new( + name: Box, constructor: VariantConstructor, - case_insensitive: bool, - }, - Other { - field_name: Option<&'a Ident>, - }, + case_insensitive: bool + ) -> Self + { + let name_upper = char_wise_uppercase(&name); + Self { + name, + name_upper, + constructor, + case_insensitive, + } + } + + pub(crate) fn name(&self) -> &str { + &self.name + } + + pub(crate) fn name_upper(&self) -> &str { + &self.name_upper + } + + pub(crate) fn constructor(&self) -> VariantConstructor { + self.constructor + } + + pub(crate) fn case_insensitive(&self) -> bool { + self.case_insensitive + } +} + +#[derive(Clone)] +pub(crate) struct OtherVariant<'a> { + field_name: Option<&'a Ident>, +} + +impl<'a> OtherVariant<'a> { + pub(crate) fn field_name(&self) -> Option<&'a Ident> { + self.field_name + } } #[derive(Clone, Copy, Debug)] @@ -58,20 +151,18 @@ impl<'a> Variant<'a> { match &self.v_type { VariantType::Ignore => Ok(None), - VariantType::Named { - name, constructor, .. - } => { - let constructor_tokens = constructor.empty(); + VariantType::Named(named) => { + let constructor_tokens = named.constructor().empty_toks(); let pattern = quote! { #enum_ident::#variant_ident #constructor_tokens }; - Ok(Some((pattern, named_fn(self, enum_ident, name)?))) + Ok(Some((pattern, named_fn(self, enum_ident, named.name())?))) } - VariantType::Other { field_name } => { - let field_name_tokens = match field_name { + VariantType::Other(other) => { + let field_name_tokens = match other.field_name() { Some(field_name) => field_name.to_token_stream(), None => quote! { __enumscribe_other_inner }, }; - let pattern = match field_name { + let pattern = match other.field_name() { Some(_) => quote! { #enum_ident::#variant_ident{#field_name_tokens} }, None => quote! { #enum_ident::#variant_ident(#field_name_tokens) }, }; @@ -85,7 +176,7 @@ impl<'a> Variant<'a> { } impl VariantConstructor { - pub(crate) fn empty(&self) -> TokenStream2 { + pub(crate) fn empty_toks(&self) -> TokenStream2 { match self { VariantConstructor::None => quote! {}, VariantConstructor::Paren => quote! { () }, @@ -220,7 +311,7 @@ pub(crate) fn parse_enum<'a>(data: &'a DataEnum, attrs: &'a [Attribute]) -> Macr Variant { data: variant, - v_type: VariantType::Other { field_name }, + v_type: VariantType::Other(OtherVariant { field_name }), span: variant_span, } } else { @@ -279,13 +370,12 @@ pub(crate) fn parse_enum<'a>(data: &'a DataEnum, attrs: &'a [Attribute]) -> Macr Fields::Unit => VariantConstructor::None, }; + let named = NamedVariant::new(name.into_boxed_str(), constructor, case_insensitive); + let v_type = VariantType::Named(named); + Variant { data: variant, - v_type: VariantType::Named { - name, - constructor, - case_insensitive, - }, + v_type, span: variant_span, } }; @@ -293,5 +383,13 @@ pub(crate) fn parse_enum<'a>(data: &'a DataEnum, attrs: &'a [Attribute]) -> Macr variants.push(scribe_variant); } - Ok(Enum { variants }) -} \ No newline at end of file + Ok(Enum::new(variants.into_boxed_slice())) +} + +fn char_wise_uppercase(s: &str) -> Box { + // Use the same uppercase algorithm as `enumscribe::internal::capped_string`. + s.chars() + .flat_map(char::to_uppercase) + .collect::() + .into_boxed_str() +} diff --git a/enumscribe_derive/src/lib.rs b/enumscribe_derive/src/lib.rs index 2c9b78c..c21228c 100644 --- a/enumscribe_derive/src/lib.rs +++ b/enumscribe_derive/src/lib.rs @@ -59,9 +59,9 @@ where let enum_ident = &input.ident; - let mut match_arms = Vec::with_capacity(parsed_enum.variants.len()); + let mut match_arms = Vec::with_capacity(parsed_enum.variants().len()); - for variant in parsed_enum.variants.iter() { + for variant in parsed_enum.variants().iter() { match variant.match_variant(enum_ident, &named_fn, &other_fn) { Ok(Some((pattern, result))) => match_arms.push(quote! { #pattern => #result }), Ok(None) => return ignore_err_fn(variant, enum_ident).into(), @@ -102,9 +102,9 @@ where let enum_ident = &input.ident; let mut ignore_variant = false; - let mut match_arms = Vec::with_capacity(parsed_enum.variants.len()); + let mut match_arms = Vec::with_capacity(parsed_enum.variants().len()); - for variant in parsed_enum.variants.iter() { + for variant in parsed_enum.variants().iter() { match variant.match_variant(enum_ident, &named_fn, &other_fn) { Ok(Some((pattern, result))) => match_arms.push(quote! { #pattern => #result }), Ok(None) => ignore_variant = true, @@ -192,30 +192,28 @@ where let mut case_sensitive_arms = Vec::new(); let mut case_insensitive_arms = Vec::new(); - for variant in parsed_enum.variants.iter() { + for variant in parsed_enum.variants().iter() { let variant_ident = &variant.data.ident; match &variant.v_type { VariantType::Ignore => (), - VariantType::Named { - name, - constructor, - case_insensitive, - } => { - let match_pattern = if *case_insensitive { - let lowercase_name = name.to_lowercase(); - quote! { #lowercase_name } + VariantType::Named(named) => { + let match_pattern = if named.case_insensitive() { + let uppercase_name = named.name_upper(); + quote! { #uppercase_name } } else { + let name = named.name(); quote! { #name } }; - let constructor_tokens = constructor.empty(); - let constructed_variant = - quote! { #enum_ident::#variant_ident #constructor_tokens }; + let constructor_tokens = named.constructor().empty_toks(); + let constructed_variant = quote! { + #enum_ident::#variant_ident #constructor_tokens + }; let match_result = named_fn(constructed_variant); - if *case_insensitive { + if named.case_insensitive() { &mut case_insensitive_arms } else { &mut case_sensitive_arms @@ -223,11 +221,11 @@ where .push(quote! { #match_pattern => #match_result }); } - VariantType::Other { field_name } => { + VariantType::Other(other) => { let unscribe_value = quote! { <_ as ::std::convert::Into<_>>::into(#match_against) }; - let constructed_variant = match field_name { + let constructed_variant = match other.field_name() { None => quote! { #enum_ident::#variant_ident(#unscribe_value) }, @@ -251,10 +249,23 @@ where let case_insensitive_match = if case_insensitive_arms.is_empty() { None } else { + let match_against_upper_ident = quote! { __enumscribe_unscribe_uppercase }; + let name_upper_cap = parsed_enum.name_upper_capacity(); + Some(quote! { - let __enumscribe_unscribe_lowercase = #match_against.to_lowercase(); - match __enumscribe_unscribe_lowercase.as_str() { - #(#case_insensitive_arms,)* + match ::enumscribe + ::internal + ::capped_string + ::CappedString + ::<#name_upper_cap> + ::uppercase_from_str(#match_against) + { + Some(#match_against_upper_ident) => { + match &*#match_against_upper_ident { + #(#case_insensitive_arms,)* + #other_arm, + } + }, #other_arm, } }) @@ -659,23 +670,22 @@ pub fn derive_enum_serialize(input: TokenStream) -> TokenStream { let mut match_arms = Vec::new(); let mut ignore_variant = false; - for variant in parsed_enum.variants.iter() { + for variant in parsed_enum.variants().iter() { let variant_ident = &variant.data.ident; match &variant.v_type { VariantType::Ignore => ignore_variant = true, - VariantType::Named { - name, constructor, .. - } => { - let constructor_tokens = constructor.empty(); + VariantType::Named(named) => { + let constructor_tokens = named.constructor().empty_toks(); + let name = named.name(); match_arms.push(quote! { #enum_ident::#variant_ident #constructor_tokens => #serializer_ident.serialize_str(#name) }) } - VariantType::Other { field_name } => match field_name { + VariantType::Other(other) => match other.field_name() { Some(field_name) => match_arms.push(quote! { #enum_ident::#variant_ident { #field_name } => #serializer_ident.serialize_str(&#field_name) @@ -748,13 +758,14 @@ pub fn derive_enum_deserialize(input: TokenStream) -> TokenStream { let enum_ident = &input.ident; let deserializer_ident = quote! { __enumscribe_deserializer }; + let deserialized_cow_capped_str_ident = quote! { __enumscribe_deserialized_cow_capped_str }; let deserialized_str_ident = quote! { __enumscribe_deserialized_str }; let variant_strings = parsed_enum - .variants + .variants() .iter() .map(|variant| match &variant.v_type { - VariantType::Named { name, .. } => Some(name.as_str()), + VariantType::Named(named) => Some(named.name()), _ => None, }) .filter_map(|name| name) @@ -780,13 +791,23 @@ pub fn derive_enum_deserialize(input: TokenStream) -> TokenStream { }), )); + let name_cap = parsed_enum.name_capacity(); + (quote! { #[automatically_derived] impl<'de> ::serde::Deserialize<'de> for #enum_ident { fn deserialize(#deserializer_ident: D) -> ::core::result::Result where D: ::serde::Deserializer<'de> { - let #deserialized_str_ident = <&str as ::serde::Deserialize<'_>>::deserialize(#deserializer_ident)?; + let #deserialized_cow_capped_str_ident = < + ::enumscribe + ::internal + ::capped_string + ::CowCappedString<'de, #name_cap> + as ::serde::Deserialize<'_> + >::deserialize(#deserializer_ident)?; + + let #deserialized_str_ident = &*#deserialized_cow_capped_str_ident; #main_match } } From dca10eb38d3ebe0cdfb0aff9df12dfff9ad1938b Mon Sep 17 00:00:00 2001 From: pantonshire Date: Sun, 1 Oct 2023 13:31:40 +0100 Subject: [PATCH 14/15] fix for generated serde deserialize impl --- enumscribe_derive/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/enumscribe_derive/src/lib.rs b/enumscribe_derive/src/lib.rs index c21228c..74f96f3 100644 --- a/enumscribe_derive/src/lib.rs +++ b/enumscribe_derive/src/lib.rs @@ -782,9 +782,9 @@ pub fn derive_enum_deserialize(input: TokenStream) -> TokenStream { ::core::result::Result::Ok(#constructed_other_variant) }, |_| Ok(quote! { - __enumscribe_deserialize_base_case => ::core::result::Result::Err( + _ => ::core::result::Result::Err( ::serde::de::Error::unknown_variant( - __enumscribe_deserialize_base_case, + #deserialized_str_ident, &[#(#variant_strings),*] ) ) From 615db6be6d91c2ff348956d1c4049861c3c6a959 Mon Sep 17 00:00:00 2001 From: pantonshire Date: Sun, 1 Oct 2023 13:45:42 +0100 Subject: [PATCH 15/15] tests for serde deserialize --- enumscribe_tests/tests/test_serde.rs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 enumscribe_tests/tests/test_serde.rs diff --git a/enumscribe_tests/tests/test_serde.rs b/enumscribe_tests/tests/test_serde.rs new file mode 100644 index 0000000..a3aed39 --- /dev/null +++ b/enumscribe_tests/tests/test_serde.rs @@ -0,0 +1,25 @@ +use enumscribe::EnumDeserialize; + +#[test] +fn test_deserialize() { + #[derive(EnumDeserialize, Eq, PartialEq, Debug)] + enum E0 { + V0, + #[enumscribe(str = "baa", case_insensitive)] + V1, + #[enumscribe(str = "bAz\n", case_insensitive)] + V2, + #[enumscribe(str = "蟹")] + V3, + } + + assert_eq!(serde_json::from_str::(r#""V0""#).unwrap(), E0::V0); + assert!(serde_json::from_str::(r#""v0""#).is_err()); + assert_eq!(serde_json::from_str::(r#""baa""#).unwrap(), E0::V1); + assert_eq!(serde_json::from_str::(r#""BAA""#).unwrap(), E0::V1); + assert_eq!(serde_json::from_str::(r#""BaA""#).unwrap(), E0::V1); + assert_eq!(serde_json::from_str::(r#""baz\n""#).unwrap(), E0::V2); + assert_eq!(serde_json::from_str::(r#""BAZ\n""#).unwrap(), E0::V2); + assert_eq!(serde_json::from_str::(r#""BaZ\n""#).unwrap(), E0::V2); + assert_eq!(serde_json::from_str::(r#""\u87f9""#).unwrap(), E0::V3); +}