diff --git a/src/hex.rs b/src/hex.rs index 4717730..99d6d20 100644 --- a/src/hex.rs +++ b/src/hex.rs @@ -125,6 +125,8 @@ pub fn byte_to_hex_lower(byte: u8) -> (u8, u8) { ) } +/// Returns the ASCII byte corresponding to the given hex nybble, using lowercase for the digits A +/// to F. Assumes the given value is less than 16. #[inline] fn nybble_to_hex_lower(nybble: u8) -> u8 { match nybble { @@ -133,6 +135,14 @@ fn nybble_to_hex_lower(nybble: u8) -> u8 { } } +/// Converts the given byte to its uppercase hexadecimal representation. The first byte returned +/// encodes the most significant 4 bits, and the second byte encodes the least significant 4 bits. +/// +/// ``` +/// # use libshire::hex::byte_to_hex_upper; +/// assert_eq!(byte_to_hex_upper(15), (b'0', b'F')); +/// assert_eq!(byte_to_hex_upper(139), (b'8', b'B')); +/// ``` #[inline] #[must_use] pub fn byte_to_hex_upper(byte: u8) -> (u8, u8) { @@ -142,6 +152,8 @@ pub fn byte_to_hex_upper(byte: u8) -> (u8, u8) { ) } +/// Returns the ASCII byte corresponding to the given hex nybble, using uppercase for the digits A +/// to F. Assumes the given value is less than 16. #[inline] fn nybble_to_hex_upper(nybble: u8) -> u8 { match nybble { @@ -298,6 +310,32 @@ mod tests { assert_eq!(nybble_to_hex_upper(15), b'F'); } + #[test] + fn test_byte_to_hex_lower() { + assert_eq!(byte_to_hex_lower(0x00), (b'0', b'0')); + assert_eq!(byte_to_hex_lower(0x01), (b'0', b'1')); + assert_eq!(byte_to_hex_lower(0x0F), (b'0', b'f')); + assert_eq!(byte_to_hex_lower(0x10), (b'1', b'0')); + assert_eq!(byte_to_hex_lower(0x1F), (b'1', b'f')); + assert_eq!(byte_to_hex_lower(0x9A), (b'9', b'a')); + assert_eq!(byte_to_hex_lower(0xA9), (b'a', b'9')); + assert_eq!(byte_to_hex_lower(0xF0), (b'f', b'0')); + assert_eq!(byte_to_hex_lower(0xFF), (b'f', b'f')); + } + + #[test] + fn test_byte_to_hex_upper() { + assert_eq!(byte_to_hex_upper(0x00), (b'0', b'0')); + assert_eq!(byte_to_hex_upper(0x01), (b'0', b'1')); + assert_eq!(byte_to_hex_upper(0x0F), (b'0', b'F')); + assert_eq!(byte_to_hex_upper(0x10), (b'1', b'0')); + assert_eq!(byte_to_hex_upper(0x1F), (b'1', b'F')); + assert_eq!(byte_to_hex_upper(0x9A), (b'9', b'A')); + assert_eq!(byte_to_hex_upper(0xA9), (b'A', b'9')); + assert_eq!(byte_to_hex_upper(0xF0), (b'F', b'0')); + assert_eq!(byte_to_hex_upper(0xFF), (b'F', b'F')); + } + #[test] fn test_hex_to_nybble() { assert_eq!(hex_to_nybble('0').unwrap(), 0x0); diff --git a/src/strings/fixed_string.rs b/src/strings/fixed_string.rs new file mode 100644 index 0000000..33ab3fc --- /dev/null +++ b/src/strings/fixed_string.rs @@ -0,0 +1,218 @@ +use std::{ + borrow, + cmp::Ordering, + error, + fmt, + hash::{Hash, Hasher}, + ops, + str, +}; + +pub struct FixedString { + buf: [u8; N], +} + +impl FixedString { + #[inline] + pub fn new(s: &str) -> Result { + // SAFETY: + // A `&str` is always valid UTF-8. + unsafe { Self::from_raw_slice(s.as_bytes()) } + } + + /// # Safety + /// The provided byte slice must be valid UTF-8. + #[inline] + pub unsafe fn from_raw_slice(bytes: &[u8]) -> Result { + match bytes.try_into() { + Ok(bytes) => Ok(Self::from_raw_array(bytes)), + Err(_) => Err(Error::BadLength { + expected: N, + actual: bytes.len(), + }), + } + } + + /// # Safety + /// The provided byte array must be valid UTF-8. + #[inline] + #[must_use] + pub unsafe fn from_raw_array(bytes: [u8; N]) -> Self { + Self { buf: bytes } + } + + #[inline] + #[must_use] + pub fn as_str(&self) -> &str { + // SAFETY: + // `buf` is always valid UTF-8 since that is an invariant `FixedString`. + unsafe { str::from_utf8_unchecked(&self.buf) } + } + + #[inline] + #[must_use] + pub fn as_str_mut(&mut self) -> &mut str { + // SAFETY: + // `buf` is always valid UTF-8 since that is an invariant `FixedString`. + unsafe { str::from_utf8_unchecked_mut(&mut self.buf) } + } + + #[inline] + #[must_use] + pub fn as_bytes(&self) -> &[u8; N] { + &self.buf + } + + #[inline] + #[must_use] + pub fn into_raw(self) -> [u8; N] { + self.buf + } +} + +impl ops::Deref for FixedString { + type Target = str; + + #[inline] + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +impl ops::DerefMut for FixedString { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + self.as_str_mut() + } +} + +impl AsRef for FixedString { + #[inline] + fn as_ref(&self) -> &str { + self + } +} + +impl AsMut for FixedString { + #[inline] + fn as_mut(&mut self) -> &mut str { + self + } +} + +impl borrow::Borrow for FixedString { + #[inline] + fn borrow(&self) -> &str { + self + } +} + +impl borrow::BorrowMut for FixedString { + #[inline] + fn borrow_mut(&mut self) -> &mut str { + self + } +} + +impl str::FromStr for FixedString { + type Err = Error; + + #[inline] + fn from_str(s: &str) -> Result { + Self::new(s) + } +} + +impl<'a, const N: usize> TryFrom<&'a str> for FixedString { + type Error = Error; + + #[inline] + fn try_from(value: &'a str) -> Result { + Self::new(value) + } +} + +impl PartialEq for FixedString { + #[inline] + fn eq(&self, other: &Self) -> bool { + **self == **other + } +} + +impl Eq for FixedString {} + +impl PartialOrd for FixedString { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for FixedString { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + (**self).cmp(&**other) + } +} + +impl Hash for FixedString { + #[inline] + fn hash(&self, state: &mut H) { + (**self).hash(state); + } +} + +impl fmt::Debug for FixedString { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } +} + +impl fmt::Display for FixedString { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&**self, f) + } +} + +#[derive(Debug)] +pub enum Error { + BadLength { expected: usize, actual: usize }, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Error::BadLength { expected, actual } => { + write!( + f, + "expected {} bytes of string data, found {} bytes", + expected, actual + ) + } + } + } +} + +impl error::Error for Error {} + +#[cfg(test)] +mod tests { + use super::FixedString; + + #[test] + fn test_fixed_string() { + assert!(FixedString::<5>::new("hello").is_ok()); + assert!(FixedString::<5>::new("hello!").is_err()); + assert!(FixedString::<5>::new("helo").is_err()); + assert!(FixedString::<5>::new("").is_err()); + assert_eq!(FixedString::<5>::new("hello").unwrap().as_bytes(), "hello".as_bytes()); + } + + #[test] + fn test_fixed_string_zero() { + assert!(FixedString::<0>::new("").is_ok()); + assert!(FixedString::<0>::new("a").is_err()); + assert!(FixedString::<0>::new("abc").is_err()); + assert_eq!(FixedString::<0>::new("").unwrap().as_bytes(), &[]); + } +} diff --git a/src/strings/mod.rs b/src/strings/mod.rs index b6dce5b..d5f6f52 100644 --- a/src/strings/mod.rs +++ b/src/strings/mod.rs @@ -1,3 +1,5 @@ +pub mod fixed_string; pub mod shstring; +pub use fixed_string::{FixedString, Error as FixedStringError}; pub use shstring::{ShString, ShString22}; diff --git a/src/uuid.rs b/src/uuid.rs index e4e7b9c..0f1a552 100644 --- a/src/uuid.rs +++ b/src/uuid.rs @@ -1,6 +1,6 @@ use std::{error, fmt, str}; -use crate::hex::{self, HexBytes}; +use crate::{hex, strings::FixedString}; // TODO: make conformity to RFC 4122 an invariant of this type (which means it cannot be created // safely from an arbitrary [u8; 16]). @@ -69,6 +69,37 @@ impl Uuid { pub fn to_bytes(self) -> [u8; 16] { self.0 } + + #[must_use] + pub fn as_string(&self) -> FixedString<36> { + let mut buf = [0u8; 36]; + + for (i, byte) in self.0.iter().copied().enumerate() { + let (b0, b1) = hex::byte_to_hex_lower(byte); + let offset = match i { + 0..=3 => 0, + 4..=5 => 1, + 6..=7 => 2, + 8..=9 => 3, + _ => 4, + }; + buf[i * 2 + offset] = b0; + buf[i * 2 + 1 + offset] = b1; + } + + buf[8] = b'-'; + buf[13] = b'-'; + buf[18] = b'-'; + buf[23] = b'-'; + + debug_assert!(str::from_utf8(&buf).is_ok()); + + // SAFETY: + // `byte_to_hex_lower` always returns a pair of ASCII characters, and `b'-'` is a valid + // ASCII character, so `buf` contains a valid ASCII string. All valid ASCII strings are + // also valid UTF-8, so `buf` is valid UTF-8. + unsafe { FixedString::from_raw_array(buf) } + } } impl Default for Uuid { @@ -121,20 +152,9 @@ impl str::FromStr for Uuid { } } -// TODO: UUIDs have a fixed-length string representation, so write a function which either creates -// a string with that capacity or returns a string type with a compile-time known length. - impl fmt::Display for Uuid { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "{}-{}-{}-{}-{}", - HexBytes::::new(&self.0[..4]), - HexBytes::::new(&self.0[4..6]), - HexBytes::::new(&self.0[6..8]), - HexBytes::::new(&self.0[8..10]), - HexBytes::::new(&self.0[10..]) - ) + f.write_str(&self.as_string()) } } @@ -144,8 +164,7 @@ impl serde::Serialize for Uuid { where S: serde::Serializer { - // TODO: replace with a better string conversion function - serializer.serialize_str(&self.to_string()) + serializer.serialize_str(&self.as_string()) } }