use std::{error, fmt, str}; use crate::{hex, strings::FixedString}; // TODO: make conformity to RFC 4122 an invariant of this type (which means it cannot be created // safely from an arbitrary [u8; 16]). #[derive(Clone, Copy, PartialEq, Eq, Debug)] #[repr(transparent)] pub struct Uuid([u8; 16]); // UUID ANATOMY // // Offset | Field // -------+--------------------------- // 0 | time_low // 1 | // 2 | // 3 | // -------+--------------------------- // 4 | time_mid // 5 | // -------+--------------------------- // 6 | time_hi_and_version // 7 | // -------+--------------------------- // 8 | clock_seq_hi_and_reserved // -------+--------------------------- // 9 | clock_seq_low // -------+--------------------------- // 10 | node // 11 | // 12 | // 13 | // 14 | // 15 | impl Uuid { #[inline] #[must_use] pub const fn nil() -> Self { Self([0; 16]) } #[inline] #[must_use] pub const fn from_bytes(bytes: [u8; 16]) -> Self { Self(bytes) } pub fn new_v5(namespace: Uuid, name: &T) -> Result where T: AsRef<[u8]> + ?Sized, { const UUID_VERSION: u8 = 5; let hash = sha1(namespace.to_bytes(), >::as_ref(name))?; let mut buf = [0u8; 16]; buf.copy_from_slice(&hash[..16]); buf[6] = (buf[6] & 0xF) | (UUID_VERSION << 4); buf[8] = (buf[8] & 0x3F) | 0x80; Ok(Self::from_bytes(buf)) } #[inline] #[must_use] pub fn to_bytes(self) -> [u8; 16] { self.0 } #[must_use] pub fn as_string(&self) -> FixedString<36> { let mut buf = [0u8; 36]; for (i, byte) in self.0.iter().copied().enumerate() { let (b0, b1) = hex::byte_to_hex_lower(byte); let offset = match i { 0..=3 => 0, 4..=5 => 1, 6..=7 => 2, 8..=9 => 3, _ => 4, }; buf[i * 2 + offset] = b0; buf[i * 2 + 1 + offset] = b1; } buf[8] = b'-'; buf[13] = b'-'; buf[18] = b'-'; buf[23] = b'-'; debug_assert!(str::from_utf8(&buf).is_ok()); // SAFETY: // `byte_to_hex_lower` always returns a pair of ASCII characters, and `b'-'` is a valid // ASCII character, so `buf` contains a valid ASCII string. All valid ASCII strings are // also valid UTF-8, so `buf` is valid UTF-8. unsafe { FixedString::from_raw_array(buf) } } } impl Default for Uuid { #[inline] fn default() -> Self { Self::nil() } } impl str::FromStr for Uuid { type Err = ParseError; fn from_str(s: &str) -> Result { let groups = { let mut groups_iter = s.split('-'); let mut groups_buf = [""; 5]; let mut num_groups = 0; for (i, group) in groups_iter.by_ref().take(5).enumerate() { groups_buf[i] = group; num_groups += 1; } if num_groups < 5 { return Err(ParseError::NotEnoughGroups(num_groups)); } if groups_iter.next().is_some() { return Err(ParseError::TooManyGroups); } groups_buf }; let mut buf = [0u8; 16]; buf[..4].copy_from_slice( &hex::hex_to_be_byte_array::<4>(groups[0]).map_err(ParseError::BadTimeLow)?, ); buf[4..6].copy_from_slice( &hex::hex_to_be_byte_array::<2>(groups[1]).map_err(ParseError::BadTimeMid)?, ); buf[6..8].copy_from_slice( &hex::hex_to_be_byte_array::<2>(groups[2]).map_err(ParseError::BadTimeHi)?, ); buf[8..10].copy_from_slice( &hex::hex_to_be_byte_array::<2>(groups[3]).map_err(ParseError::BadClockSeq)?, ); buf[10..].copy_from_slice( &hex::hex_to_be_byte_array::<6>(groups[4]).map_err(ParseError::BadNode)?, ); Ok(Self::from_bytes(buf)) } } impl fmt::Display for Uuid { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str(&self.as_string()) } } #[derive(Debug)] pub enum ParseError { NotEnoughGroups(usize), TooManyGroups, BadTimeLow(hex::ArrayParseError), BadTimeMid(hex::ArrayParseError), BadTimeHi(hex::ArrayParseError), BadClockSeq(hex::ArrayParseError), BadNode(hex::ArrayParseError), } impl fmt::Display for ParseError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::NotEnoughGroups(groups) => { write!(f, "expected 5 groups of digits, found {}", groups) } Self::TooManyGroups => { write!( f, "found an unexpected extra group after the first 5 groups" ) } Self::BadTimeLow(err) => { write!(f, "error decoding `time_low` (first) group: {}", err) } Self::BadTimeMid(err) => { write!(f, "error decoding `time_mid` (second) group: {}", err) } Self::BadTimeHi(err) => { write!(f, "error decoding `time_hi` (third) group: {}", err) } Self::BadClockSeq(err) => { write!(f, "error decoding `clock_seq` (fourth) group: {}", err) } Self::BadNode(err) => { write!(f, "error decoding `node` (fifth) group: {}", err) } } } } impl error::Error for ParseError {} #[derive(Debug)] pub enum UuidV5Error { NameTooLong(usize), } impl fmt::Display for UuidV5Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::NameTooLong(size) => write!(f, "uuid v5 name too long ({} bytes)", size), } } } impl error::Error for UuidV5Error {} fn sha1(namespace: [u8; 16], name: &[u8]) -> Result<[u8; 20], UuidV5Error> { let (mut h0, mut h1, mut h2, mut h3, mut h4) = ( 0x67452301u32, 0xefcdab89u32, 0x98badcfeu32, 0x10325476u32, 0xc3d2e1f0u32, ); // Break the namespace and name (and some additional trailing bytes) into 64-byte chunks and // update the state for each chunk. for chunk in Sha1ChunkIter::new(namespace, name)? { let mut words = [0u32; 80]; // Break the 64-byte chunk into 16 4-byte words, and store them as the first 16 values in // the `words` buffer. for (i, word) in words.iter_mut().take(16).enumerate() { *word = u32::from_be_bytes(chunk[(i * 4)..(i * 4 + 4)].try_into().unwrap()); } // Calculate the remaining 64 4-byte words. for i in 16..80 { words[i] = (words[i - 3] ^ words[i - 8] ^ words[i - 14] ^ words[i - 16]).rotate_left(1); } let (mut a, mut b, mut c, mut d, mut e) = (h0, h1, h2, h3, h4); // 80-round main loop for the current chunk. for (i, word) in words.iter().copied().enumerate() { let (f, k) = match i { 0..=19 => ((b & c) | (!b & d), 0x5a827999), 20..=39 => (b ^ c ^ d, 0x6ed9eba1), 40..=59 => ((b & c) | (b & d) | (c & d), 0x8f1bbcdc), _ => (b ^ c ^ d, 0xca62c1d6), }; let temp = (a.rotate_left(5)) .wrapping_add(f) .wrapping_add(e) .wrapping_add(k) .wrapping_add(word); e = d; d = c; c = b.rotate_left(30); b = a; a = temp; } h0 = h0.wrapping_add(a); h1 = h1.wrapping_add(b); h2 = h2.wrapping_add(c); h3 = h3.wrapping_add(d); h4 = h4.wrapping_add(e); } // Copy the 5 4-byte hash values into a single 20-byte array, to be returned as the final hash // value. let mut hash = [0u8; 20]; hash[..4].copy_from_slice(&h0.to_be_bytes()); hash[4..8].copy_from_slice(&h1.to_be_bytes()); hash[8..12].copy_from_slice(&h2.to_be_bytes()); hash[12..16].copy_from_slice(&h3.to_be_bytes()); hash[16..].copy_from_slice(&h4.to_be_bytes()); Ok(hash) } struct Sha1ChunkIter<'a> { namespace: [u8; 16], name: &'a [u8], message_len_bits: u64, namespace_added: bool, one_bit_added: bool, message_len_added: bool, } impl<'a> Sha1ChunkIter<'a> { fn new(namespace: [u8; 16], name: &'a [u8]) -> Result { let message_len_bits = u64::try_from(name.len()) .ok() .and_then(|len| len.checked_add(16)) .and_then(|len| len.checked_mul(8)) .ok_or(UuidV5Error::NameTooLong(name.len()))?; Ok(Self { namespace, name, message_len_bits, namespace_added: false, one_bit_added: false, message_len_added: false, }) } } impl<'a> Iterator for Sha1ChunkIter<'a> { type Item = [u8; 64]; fn next(&mut self) -> Option { if self.message_len_added { None } else { let mut chunk = [0u8; 64]; let mut chunk_offset = 0; // If the 16-byte namespace has not already appeared in a chunk, then we need to add it // to the current chunk. if !self.namespace_added { // Copy the namespace into the start of the current chunk. chunk[..16].copy_from_slice(&self.namespace); // Since the 16-byte namespace is currently the only thing in the chunk, we can // just set the chunk offset to 16 rather than incrementing it by 16. chunk_offset = 16; self.namespace_added = true; } // Check whether there are any bytes of the name remaining that have not appeared in // any chunk. if !self.name.is_empty() { // Calculate how many bytes of the name to add to this chunk by taking the minimum // of the length of the remaining part of the name and the free space in the // current chunk. This will always be non-zero because a maximum of 16 bytes of the // chunk have been used at this point (to store the namespace), so there will // always be at least 48 bytes available to store a portion of the name. let name_slice_len = self.name.len().min(64 - chunk_offset); // Copy the selected portion of the name into the chunk. chunk[chunk_offset..(chunk_offset + name_slice_len)] .copy_from_slice(&self.name[..name_slice_len]); // Advance the chunk offset by the number of bytes we just wrote to it. chunk_offset += name_slice_len; // Shrink the size of the name slice so the portion of the name we just added is no // longer included. self.name = &self.name[name_slice_len..]; } // Once we've written the entire name to the chunk, we now need to add the "1" bit // after the name, the zero-padding and the length of the hashed message in bits. Note // that this is a separate `if` statement rather than an `else` statement because `if` // statement above may have changed the value of `self.name`. if self.name.is_empty() { // If there's space in the chunk, add the byte 0x80 to the chunk in order to add a // "1" bit at the end of the message, as required by SHA-1. if !self.one_bit_added && chunk_offset < 64 { chunk[chunk_offset] = 0x80; chunk_offset += 1; self.one_bit_added = true; } // If we've already added the "1" bit and there's space in the chunk, add the // message length to the end of the chunk. if self.one_bit_added && chunk_offset < 57 { chunk[56..].copy_from_slice(&self.message_len_bits.to_be_bytes()); self.message_len_added = true; } } Some(chunk) } } } #[cfg(test)] mod tests { use super::Uuid; #[test] fn test_uuid_display() { assert_eq!( Uuid::from_bytes([ 0x12, 0x3e, 0x45, 0x67, 0xe8, 0x9b, 0x12, 0xd3, 0xa4, 0x56, 0x42, 0x66, 0x14, 0x17, 0x40, 0x00 ]) .to_string(), "123e4567-e89b-12d3-a456-426614174000" ); } #[test] fn test_uuid_parse() { assert_eq!( "123e4567-e89b-12d3-a456-426614174000" .parse::() .unwrap(), Uuid::from_bytes([ 0x12, 0x3e, 0x45, 0x67, 0xe8, 0x9b, 0x12, 0xd3, 0xa4, 0x56, 0x42, 0x66, 0x14, 0x17, 0x40, 0x00 ]) ); } #[test] fn test_uuidv5() { let namespace = "123e4567-e89b-12d3-a456-426614174000".parse().unwrap(); assert_eq!( Uuid::new_v5(namespace, "hello").unwrap(), "971690ef-3543-5938-95a4-29c6b029d731".parse().unwrap() ); } #[test] fn test_sha1() { use super::sha1; assert_eq!( sha1(*b"abcdefghijklmnop", b"").unwrap(), [ 0x14, 0xf3, 0x99, 0x52, 0x88, 0xac, 0xd1, 0x89, 0xe6, 0xe5, 0x0a, 0x7a, 0xf4, 0x7e, 0xe7, 0x09, 0x9a, 0xa6, 0x82, 0xb9 ] ); assert_eq!( sha1(*b"abcdefghijklmnop", b"hello").unwrap(), [ 0x35, 0x52, 0x6e, 0x86, 0xd8, 0x66, 0x5b, 0x7e, 0x91, 0x68, 0xf9, 0x94, 0x4d, 0xff, 0x3b, 0x5e, 0xd4, 0xb9, 0x30, 0x1c ] ); assert_eq!( sha1( *b"abcdefghijklmnop", b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLM" ) .unwrap(), [ 0x4e, 0xbd, 0x2c, 0x7f, 0xff, 0xa5, 0x75, 0xe0, 0xe0, 0xc4, 0xed, 0x50, 0x7b, 0x8b, 0x3c, 0x93, 0xdc, 0x2d, 0xad, 0x0d ] ); assert_eq!( sha1( *b"abcdefghijklmnop", b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN" ) .unwrap(), [ 0xb3, 0x5b, 0x3d, 0x3d, 0xaa, 0xa5, 0x92, 0xfd, 0x15, 0x49, 0xd3, 0xa9, 0x81, 0xad, 0x85, 0x58, 0x03, 0xce, 0x01, 0x21 ] ); assert_eq!( sha1( *b"abcdefghijklmnop", b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTU" ) .unwrap(), [ 0xae, 0x12, 0xd0, 0xa7, 0x54, 0x8b, 0xa5, 0x99, 0x41, 0x15, 0xfa, 0x04, 0x8f, 0xe1, 0xcd, 0x7d, 0x91, 0xd8, 0x61, 0xc7, ] ); assert_eq!( sha1( *b"abcdefghijklmnop", b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUV" ) .unwrap(), [ 0x72, 0xd2, 0x58, 0xb5, 0x62, 0x4a, 0x55, 0x72, 0xd7, 0x55, 0x2d, 0xd9, 0xe2, 0x11, 0x4d, 0x4a, 0xc3, 0x8b, 0xd7, 0x61 ] ); assert_eq!( sha1( *b"abcdefghijklmnop", b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUW" ) .unwrap(), [ 0xa1, 0x75, 0x25, 0xb5, 0xd3, 0x15, 0x31, 0x63, 0x18, 0xf4, 0x83, 0x5c, 0x05, 0xbb, 0xf2, 0x5d, 0x8f, 0x89, 0x55, 0x13 ] ); } }