FixedString data structure, improved UUID string encoding

main
Pantonshire 4 years ago
parent da5f2235d2
commit 4dd61aeaa2

@ -125,6 +125,8 @@ pub fn byte_to_hex_lower(byte: u8) -> (u8, u8) {
)
}
/// Returns the ASCII byte corresponding to the given hex nybble, using lowercase for the digits A
/// to F. Assumes the given value is less than 16.
#[inline]
fn nybble_to_hex_lower(nybble: u8) -> u8 {
match nybble {
@ -133,6 +135,14 @@ fn nybble_to_hex_lower(nybble: u8) -> u8 {
}
}
/// Converts the given byte to its uppercase hexadecimal representation. The first byte returned
/// encodes the most significant 4 bits, and the second byte encodes the least significant 4 bits.
///
/// ```
/// # use libshire::hex::byte_to_hex_upper;
/// assert_eq!(byte_to_hex_upper(15), (b'0', b'F'));
/// assert_eq!(byte_to_hex_upper(139), (b'8', b'B'));
/// ```
#[inline]
#[must_use]
pub fn byte_to_hex_upper(byte: u8) -> (u8, u8) {
@ -142,6 +152,8 @@ pub fn byte_to_hex_upper(byte: u8) -> (u8, u8) {
)
}
/// Returns the ASCII byte corresponding to the given hex nybble, using uppercase for the digits A
/// to F. Assumes the given value is less than 16.
#[inline]
fn nybble_to_hex_upper(nybble: u8) -> u8 {
match nybble {
@ -298,6 +310,32 @@ mod tests {
assert_eq!(nybble_to_hex_upper(15), b'F');
}
#[test]
fn test_byte_to_hex_lower() {
assert_eq!(byte_to_hex_lower(0x00), (b'0', b'0'));
assert_eq!(byte_to_hex_lower(0x01), (b'0', b'1'));
assert_eq!(byte_to_hex_lower(0x0F), (b'0', b'f'));
assert_eq!(byte_to_hex_lower(0x10), (b'1', b'0'));
assert_eq!(byte_to_hex_lower(0x1F), (b'1', b'f'));
assert_eq!(byte_to_hex_lower(0x9A), (b'9', b'a'));
assert_eq!(byte_to_hex_lower(0xA9), (b'a', b'9'));
assert_eq!(byte_to_hex_lower(0xF0), (b'f', b'0'));
assert_eq!(byte_to_hex_lower(0xFF), (b'f', b'f'));
}
#[test]
fn test_byte_to_hex_upper() {
assert_eq!(byte_to_hex_upper(0x00), (b'0', b'0'));
assert_eq!(byte_to_hex_upper(0x01), (b'0', b'1'));
assert_eq!(byte_to_hex_upper(0x0F), (b'0', b'F'));
assert_eq!(byte_to_hex_upper(0x10), (b'1', b'0'));
assert_eq!(byte_to_hex_upper(0x1F), (b'1', b'F'));
assert_eq!(byte_to_hex_upper(0x9A), (b'9', b'A'));
assert_eq!(byte_to_hex_upper(0xA9), (b'A', b'9'));
assert_eq!(byte_to_hex_upper(0xF0), (b'F', b'0'));
assert_eq!(byte_to_hex_upper(0xFF), (b'F', b'F'));
}
#[test]
fn test_hex_to_nybble() {
assert_eq!(hex_to_nybble('0').unwrap(), 0x0);

@ -0,0 +1,218 @@
use std::{
borrow,
cmp::Ordering,
error,
fmt,
hash::{Hash, Hasher},
ops,
str,
};
pub struct FixedString<const N: usize> {
buf: [u8; N],
}
impl<const N: usize> FixedString<N> {
#[inline]
pub fn new(s: &str) -> Result<Self, Error> {
// SAFETY:
// A `&str` is always valid UTF-8.
unsafe { Self::from_raw_slice(s.as_bytes()) }
}
/// # Safety
/// The provided byte slice must be valid UTF-8.
#[inline]
pub unsafe fn from_raw_slice(bytes: &[u8]) -> Result<Self, Error> {
match bytes.try_into() {
Ok(bytes) => Ok(Self::from_raw_array(bytes)),
Err(_) => Err(Error::BadLength {
expected: N,
actual: bytes.len(),
}),
}
}
/// # Safety
/// The provided byte array must be valid UTF-8.
#[inline]
#[must_use]
pub unsafe fn from_raw_array(bytes: [u8; N]) -> Self {
Self { buf: bytes }
}
#[inline]
#[must_use]
pub fn as_str(&self) -> &str {
// SAFETY:
// `buf` is always valid UTF-8 since that is an invariant `FixedString`.
unsafe { str::from_utf8_unchecked(&self.buf) }
}
#[inline]
#[must_use]
pub fn as_str_mut(&mut self) -> &mut str {
// SAFETY:
// `buf` is always valid UTF-8 since that is an invariant `FixedString`.
unsafe { str::from_utf8_unchecked_mut(&mut self.buf) }
}
#[inline]
#[must_use]
pub fn as_bytes(&self) -> &[u8; N] {
&self.buf
}
#[inline]
#[must_use]
pub fn into_raw(self) -> [u8; N] {
self.buf
}
}
impl<const N: usize> ops::Deref for FixedString<N> {
type Target = str;
#[inline]
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
impl<const N: usize> ops::DerefMut for FixedString<N> {
#[inline]
fn deref_mut(&mut self) -> &mut Self::Target {
self.as_str_mut()
}
}
impl<const N: usize> AsRef<str> for FixedString<N> {
#[inline]
fn as_ref(&self) -> &str {
self
}
}
impl<const N: usize> AsMut<str> for FixedString<N> {
#[inline]
fn as_mut(&mut self) -> &mut str {
self
}
}
impl<const N: usize> borrow::Borrow<str> for FixedString<N> {
#[inline]
fn borrow(&self) -> &str {
self
}
}
impl<const N: usize> borrow::BorrowMut<str> for FixedString<N> {
#[inline]
fn borrow_mut(&mut self) -> &mut str {
self
}
}
impl<const N: usize> str::FromStr for FixedString<N> {
type Err = Error;
#[inline]
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::new(s)
}
}
impl<'a, const N: usize> TryFrom<&'a str> for FixedString<N> {
type Error = Error;
#[inline]
fn try_from(value: &'a str) -> Result<Self, Self::Error> {
Self::new(value)
}
}
impl<const N: usize> PartialEq for FixedString<N> {
#[inline]
fn eq(&self, other: &Self) -> bool {
**self == **other
}
}
impl<const N: usize> Eq for FixedString<N> {}
impl<const N: usize> PartialOrd for FixedString<N> {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl<const N: usize> Ord for FixedString<N> {
#[inline]
fn cmp(&self, other: &Self) -> Ordering {
(**self).cmp(&**other)
}
}
impl<const N: usize> Hash for FixedString<N> {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
(**self).hash(state);
}
}
impl<const N: usize> fmt::Debug for FixedString<N> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(&**self, f)
}
}
impl<const N: usize> fmt::Display for FixedString<N> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&**self, f)
}
}
#[derive(Debug)]
pub enum Error {
BadLength { expected: usize, actual: usize },
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Error::BadLength { expected, actual } => {
write!(
f,
"expected {} bytes of string data, found {} bytes",
expected, actual
)
}
}
}
}
impl error::Error for Error {}
#[cfg(test)]
mod tests {
use super::FixedString;
#[test]
fn test_fixed_string() {
assert!(FixedString::<5>::new("hello").is_ok());
assert!(FixedString::<5>::new("hello!").is_err());
assert!(FixedString::<5>::new("helo").is_err());
assert!(FixedString::<5>::new("").is_err());
assert_eq!(FixedString::<5>::new("hello").unwrap().as_bytes(), "hello".as_bytes());
}
#[test]
fn test_fixed_string_zero() {
assert!(FixedString::<0>::new("").is_ok());
assert!(FixedString::<0>::new("a").is_err());
assert!(FixedString::<0>::new("abc").is_err());
assert_eq!(FixedString::<0>::new("").unwrap().as_bytes(), &[]);
}
}

@ -1,3 +1,5 @@
pub mod fixed_string;
pub mod shstring;
pub use fixed_string::{FixedString, Error as FixedStringError};
pub use shstring::{ShString, ShString22};

@ -1,6 +1,6 @@
use std::{error, fmt, str};
use crate::hex::{self, HexBytes};
use crate::{hex, strings::FixedString};
// TODO: make conformity to RFC 4122 an invariant of this type (which means it cannot be created
// safely from an arbitrary [u8; 16]).
@ -69,6 +69,37 @@ impl Uuid {
pub fn to_bytes(self) -> [u8; 16] {
self.0
}
#[must_use]
pub fn as_string(&self) -> FixedString<36> {
let mut buf = [0u8; 36];
for (i, byte) in self.0.iter().copied().enumerate() {
let (b0, b1) = hex::byte_to_hex_lower(byte);
let offset = match i {
0..=3 => 0,
4..=5 => 1,
6..=7 => 2,
8..=9 => 3,
_ => 4,
};
buf[i * 2 + offset] = b0;
buf[i * 2 + 1 + offset] = b1;
}
buf[8] = b'-';
buf[13] = b'-';
buf[18] = b'-';
buf[23] = b'-';
debug_assert!(str::from_utf8(&buf).is_ok());
// SAFETY:
// `byte_to_hex_lower` always returns a pair of ASCII characters, and `b'-'` is a valid
// ASCII character, so `buf` contains a valid ASCII string. All valid ASCII strings are
// also valid UTF-8, so `buf` is valid UTF-8.
unsafe { FixedString::from_raw_array(buf) }
}
}
impl Default for Uuid {
@ -121,20 +152,9 @@ impl str::FromStr for Uuid {
}
}
// TODO: UUIDs have a fixed-length string representation, so write a function which either creates
// a string with that capacity or returns a string type with a compile-time known length.
impl fmt::Display for Uuid {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{}-{}-{}-{}-{}",
HexBytes::<hex::Lowercase>::new(&self.0[..4]),
HexBytes::<hex::Lowercase>::new(&self.0[4..6]),
HexBytes::<hex::Lowercase>::new(&self.0[6..8]),
HexBytes::<hex::Lowercase>::new(&self.0[8..10]),
HexBytes::<hex::Lowercase>::new(&self.0[10..])
)
f.write_str(&self.as_string())
}
}
@ -144,8 +164,7 @@ impl serde::Serialize for Uuid {
where
S: serde::Serializer
{
// TODO: replace with a better string conversion function
serializer.serialize_str(&self.to_string())
serializer.serialize_str(&self.as_string())
}
}

Loading…
Cancel
Save