From f0cc229a65f0e49a8e1f6c5220ed46bd80109680 Mon Sep 17 00:00:00 2001 From: pantonshire Date: Tue, 6 Sep 2022 15:42:32 +0100 Subject: [PATCH] Percent decoding compliant with URL standard, export percent encoder and decoder types --- src/convert.rs | 12 ++ src/encoding/mod.rs | 2 +- src/encoding/rfc3986.rs | 308 ----------------------------------- src/encoding/url.rs | 351 ++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + src/sink.rs | 97 +++++++++++ 6 files changed, 462 insertions(+), 309 deletions(-) delete mode 100644 src/encoding/rfc3986.rs create mode 100644 src/encoding/url.rs create mode 100644 src/sink.rs diff --git a/src/convert.rs b/src/convert.rs index 4f382e7..d04cd92 100644 --- a/src/convert.rs +++ b/src/convert.rs @@ -42,6 +42,18 @@ impl Empty for Infallible { } } +#[inline] +#[must_use] +pub fn result_elim(res: Result) -> T +where + E: Empty, +{ + match res { + Ok(x) => x, + Err(e) => e.elim(), + } +} + #[inline] #[must_use] pub fn clone(x: &T) -> T { diff --git a/src/encoding/mod.rs b/src/encoding/mod.rs index 44a657e..c11931b 100644 --- a/src/encoding/mod.rs +++ b/src/encoding/mod.rs @@ -1,2 +1,2 @@ pub mod hex; -pub mod rfc3986; +pub mod url; diff --git a/src/encoding/rfc3986.rs b/src/encoding/rfc3986.rs deleted file mode 100644 index c005692..0000000 --- a/src/encoding/rfc3986.rs +++ /dev/null @@ -1,308 +0,0 @@ -// Following RFC3986 (https://www.rfc-editor.org/rfc/rfc3986#section-2.1) - -use core::{ - fmt::{self, Write}, - str, -}; - -#[cfg(all(feature = "alloc", not(feature = "std")))] -use alloc::{borrow::Cow, string::String, vec::Vec}; - -#[cfg(feature = "std")] -use std::borrow::Cow; - -use crate::{either::{Either, Inl, Inr}, strings::FixedString}; - -use super::hex; - -/// Finds the first element of the slice which does not match the given predicate and returns the -/// sub-slice preceding that element, the element itself, and the sub-slice following the element. -#[inline] -fn split_at_non_matching(xs: &[T], predicate: P) -> (&[T], Option<(T, &[T])>) -where - T: Copy, - P: Fn(T) -> bool, -{ - let mut i = 0; - while i < xs.len() { - let x = xs[i]; - if !predicate(x) { - // `get_unchecked` is used here because the compiler currently seems to struggle to - // reason about the correctness of the start and end indexes here, and can end up - // leaving in unnecessary bound checks. - // SAFETY: - // We have already checked that `i < xs.len()`, so `..i` is in bounds for `xs`. - let prefix = unsafe { xs.get_unchecked(..i) }; - // SAFETY: - // We have already checked that `i < xs.len()`, so `i + 1 <= xs.len()` must hold. - // Therefore, `(i + 1)..` is in bounds for `xs`. - let suffix = unsafe { xs.get_unchecked((i + 1)..) }; - return (prefix, Some((x, suffix))); - } - i += 1; - } - (xs, None) -} - -fn byte_unreserved(byte: u8) -> bool { - matches!(byte, b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'-' | b'.' | b'_' | b'~') -} - -struct PercentEncoder<'a>(&'a [u8]); - -impl<'a> PercentEncoder<'a> { - pub fn partial_encode(&mut self) -> Option<(&'a str, Option>)> { - if self.0.is_empty() { - return None; - } - - let (prefix, suffix) = split_at_non_matching(self.0, byte_unreserved); - - // SAFETY: - // `prefix` only contains bytes which satisfy `byte_unreserved`, which are all valid ASCII - // characters. Therefore, it is valid UTF-8. - let prefix = unsafe { str::from_utf8_unchecked(prefix) }; - - match suffix { - Some((byte, suffix)) => { - self.0 = suffix; - Some((prefix, Some(Self::percent_encode_byte(byte)))) - }, - - None => { - self.0 = &self.0[self.0.len()..]; - Some((prefix, None)) - }, - } - } - - fn percent_encode_byte(byte: u8) -> FixedString<3> { - let [msb, lsb] = hex::byte_to_hex_upper(byte).into_raw(); - // SAFETY: - // The bytes obtained from `hex::byte_to_hex_upper` are valid UTF-8, and `b'%'` is a valid - // UTF-8 codepoint, so the byte array is valid UTF-8. - unsafe { FixedString::from_raw_array([b'%', msb, lsb]) } - } -} - -#[cfg(feature = "alloc")] -pub fn percent_encode(bytes: &B) -> Cow -where - B: AsRef<[u8]> + ?Sized, -{ - let mut encoder = PercentEncoder(bytes.as_ref()); - - match encoder.partial_encode().unwrap_or(("", None)) { - (prefix, Some(encoded_byte)) => { - let mut buf = String::new(); - buf.push_str(prefix); - buf.push_str(&encoded_byte); - - while let Some((prefix, encoded_byte)) = encoder.partial_encode() { - buf.push_str(prefix); - if let Some(encoded_byte) = encoded_byte { - buf.push_str(&encoded_byte); - } - } - - Cow::Owned(buf) - }, - - (prefix, None) => Cow::Borrowed(prefix), - } -} - -#[cfg(feature = "alloc")] -pub fn percent_encode_to_buf(buf: &mut String, bytes: &B) -where - B: AsRef<[u8]> + ?Sized, -{ - percent_encode_to_fmt_writer(buf, bytes) - .expect("writing to a String should never return an error") -} - -pub fn percent_encode_to_fmt_writer(writer: &mut W, bytes: &B) -> fmt::Result -where - W: Write + ?Sized, - B: AsRef<[u8]> + ?Sized, -{ - let mut encoder = PercentEncoder(bytes.as_ref()); - - while let Some((prefix, encoded_byte)) = encoder.partial_encode() { - if !prefix.is_empty() { - writer.write_str(prefix)?; - } - if let Some(encoded_byte) = encoded_byte { - writer.write_str(&encoded_byte)?; - } - } - - Ok(()) -} - -struct PercentDecoder<'a>(&'a [u8]); - -impl<'a> PercentDecoder<'a> { - fn partial_decode(&mut self) -> Result)>, PercentDecodeError> { - if self.0.is_empty() { - return Ok(None); - } - - let (prefix, suffix) = split_at_non_matching(self.0, byte_unreserved); - - // SAFETY: - // `prefix` only contains bytes which satisfy `byte_unreserved`, which are all valid ASCII - // characters. Therefore, it is valid UTF-8. - let prefix = unsafe { str::from_utf8_unchecked(prefix) }; - - match suffix { - Some((byte, suffix)) => { - if byte != b'%' { - return Err(PercentDecodeError); - } - - let [hex_msb, hex_lsb]: [u8; 2] = suffix - .get(..2) - .and_then(|hex_bytes| hex_bytes.try_into().ok()) - .ok_or(PercentDecodeError)?; - - let hex_byte = hex::hex_to_byte(hex_msb, hex_lsb) - .map_err(|_| PercentDecodeError)?; - - self.0 = &suffix[2..]; - - Ok(Some((prefix, Some(hex_byte)))) - }, - - None => { - self.0 = &self.0[self.0.len()..]; - Ok(Some((prefix, None))) - }, - } - } -} - -#[cfg(feature = "alloc")] -fn percent_decode_internal(bytes: &B) -> Result>, PercentDecodeError> -where - B: AsRef<[u8]> + ?Sized, -{ - let mut decoder = PercentDecoder(bytes.as_ref()); - - match decoder.partial_decode()?.unwrap_or(("", None)) { - (prefix, Some(byte)) => { - let mut buf = Vec::new(); - buf.extend(prefix.bytes()); - buf.push(byte); - - while let Some((prefix, byte)) = decoder.partial_decode()? { - buf.extend(prefix.bytes()); - if let Some(byte) = byte { - buf.push(byte); - } - } - - Ok(Inr(buf)) - }, - - (prefix, None) => Ok(Inl(prefix)) - } -} - -#[cfg(feature = "alloc")] -pub fn percent_decode_to_utf8(bytes: &B) -> Result, PercentDecodeError> -where - B: AsRef<[u8]> + ?Sized, -{ - percent_decode_internal(bytes).and_then(|decoded| match decoded { - Inl(decoded_str) => Ok(Cow::Borrowed(decoded_str)), - Inr(decoded_bytes) => String::from_utf8(decoded_bytes) - .map(Cow::Owned) - .map_err(|_| PercentDecodeError), - }) -} - -#[cfg(feature = "alloc")] -pub fn percent_decode_to_bytes(bytes: &B) -> Result, PercentDecodeError> -where - B: AsRef<[u8]> + ?Sized, -{ - percent_decode_internal(bytes).map(|decoded| match decoded { - Inl(decoded_str) => Cow::Borrowed(decoded_str.as_bytes()), - Inr(decoded_bytes) => Cow::Owned(decoded_bytes), - }) -} - -#[derive(Debug)] -pub struct PercentDecodeError; - -impl fmt::Display for PercentDecodeError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "invalid rfc 3986 percent-encoded string") - } -} - -#[cfg(feature = "std")] -impl std::error::Error for PercentDecodeError {} - -#[cfg(test)] -mod tests { - #[cfg(feature = "alloc")] - #[test] - fn test_percent_encode() { - #[cfg(all(feature = "alloc", not(feature = "std")))] - use alloc::borrow::Cow; - - #[cfg(feature = "std")] - use std::borrow::Cow; - - use super::percent_encode; - - assert!(matches!(percent_encode(""), Cow::Borrowed(""))); - assert!(matches!(percent_encode("foobar"), Cow::Borrowed("foobar"))); - - assert_eq!(&*percent_encode("Ladies + Gentlemen"), "Ladies%20%2B%20Gentlemen"); - assert_eq!(&*percent_encode("An encoded string!"), "An%20encoded%20string%21"); - assert_eq!(&*percent_encode("Dogs, Cats & Mice"), "Dogs%2C%20Cats%20%26%20Mice"); - assert_eq!(&*percent_encode("☃"), "%E2%98%83"); - } - - #[cfg(feature = "alloc")] - #[test] - fn test_percent_decode() { - #[cfg(all(feature = "alloc", not(feature = "std")))] - use alloc::borrow::Cow; - - #[cfg(feature = "std")] - use std::borrow::Cow; - - use super::{percent_decode_to_utf8, percent_decode_to_bytes}; - - assert!(matches!(percent_decode_to_utf8(""), Ok(Cow::Borrowed("")))); - assert!(matches!(percent_decode_to_bytes(""), Ok(Cow::Borrowed(b"")))); - assert!(matches!(percent_decode_to_utf8("foobar"), Ok(Cow::Borrowed("foobar")))); - assert!(matches!(percent_decode_to_bytes("foobar"), Ok(Cow::Borrowed(b"foobar")))); - - assert!(matches!(percent_decode_to_utf8("Ladies%20%2B%20Gentlemen").as_deref(), Ok("Ladies + Gentlemen"))); - assert!(matches!(percent_decode_to_bytes("Ladies%20%2B%20Gentlemen").as_deref(), Ok(b"Ladies + Gentlemen"))); - assert!(matches!(percent_decode_to_utf8("An%20encoded%20string%21").as_deref(), Ok("An encoded string!"))); - assert!(matches!(percent_decode_to_bytes("An%20encoded%20string%21").as_deref(), Ok(b"An encoded string!"))); - assert!(matches!(percent_decode_to_utf8("Dogs%2C%20Cats%20%26%20Mice").as_deref(), Ok("Dogs, Cats & Mice"))); - assert!(matches!(percent_decode_to_bytes("Dogs%2C%20Cats%20%26%20Mice").as_deref(), Ok(b"Dogs, Cats & Mice"))); - assert!(matches!(percent_decode_to_utf8("%E2%98%83").as_deref(), Ok("☃"))); - - assert!(matches!(percent_decode_to_utf8("%e2%98%83").as_deref(), Ok("☃"))); - - assert!(matches!(percent_decode_to_utf8("%41%6E%20%65%6E%63%6F%64%65%64%20%73%74%72%69%6E%67%21").as_deref(), Ok("An encoded string!"))); - - assert!(matches!(percent_decode_to_utf8("hello!"), Err(_))); - assert!(matches!(percent_decode_to_bytes("hello!"), Err(_))); - assert!(matches!(percent_decode_to_utf8("%2"), Err(_))); - assert!(matches!(percent_decode_to_bytes("%2"), Err(_))); - assert!(matches!(percent_decode_to_utf8("%2!"), Err(_))); - assert!(matches!(percent_decode_to_bytes("%2!"), Err(_))); - - assert!(matches!(percent_decode_to_utf8("%FF"), Err(_))); - assert!(matches!(percent_decode_to_bytes("%FF").as_deref(), Ok(&[0xff]))); - } -} diff --git a/src/encoding/url.rs b/src/encoding/url.rs new file mode 100644 index 0000000..1139966 --- /dev/null +++ b/src/encoding/url.rs @@ -0,0 +1,351 @@ +// Following: +// - RFC 3986 (https://www.rfc-editor.org/rfc/rfc3986#section-2.1) +// - URL standard (https://url.spec.whatwg.org/#application/x-www-form-urlencoded) + +use core::str; + +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::{borrow::Cow, string::String, vec::Vec}; + +#[cfg(feature = "std")] +use std::borrow::Cow; + +use crate::{strings::FixedString, sink::StrSink}; + +use super::hex; + +/// Finds the first element of the slice which matches the given predicate and returns the sub-slice +/// preceding that element, the element itself, and the sub-slice following the element. +#[inline] +fn split_at(xs: &[T], predicate: P) -> (&[T], Option<(T, &[T])>) +where + T: Copy, + P: Fn(T) -> bool, +{ + let mut i = 0; + + while i < xs.len() { + // Since we required that `T: Copy`, we can copy the `i`th element from the slice. + let x = xs[i]; + + if predicate(x) { + // `get_unchecked` is used here because the compiler currently seems to struggle to + // reason about the correctness of the start and end indexes here, and can end up + // leaving in unnecessary bound checks. + // SAFETY: + // We have already checked that `i < xs.len()`, so `..i` is in bounds for `xs`. + let prefix = unsafe { xs.get_unchecked(..i) }; + + // SAFETY: + // We have already checked that `i < xs.len()`, so `i + 1 <= xs.len()` must hold. + // Therefore, `(i + 1)..` is in bounds for `xs`. + let suffix = unsafe { xs.get_unchecked((i + 1)..) }; + + return (prefix, Some((x, suffix))); + } + + i += 1; + } + + (xs, None) +} + +pub struct PercentEncoder<'a> { + remaining: &'a [u8], +} + +impl<'a> PercentEncoder<'a> { + #[must_use] + pub fn new(bytes: &'a B) -> Self + where + B: AsRef<[u8]> + ?Sized, + { + Self { + remaining: bytes.as_ref(), + } + } + + fn percent_encode_byte(byte: u8) -> FixedString<3> { + let [msb, lsb] = hex::byte_to_hex_upper(byte).into_raw(); + // SAFETY: + // The bytes obtained from `hex::byte_to_hex_upper` are valid UTF-8, and `b'%'` is a valid + // UTF-8 codepoint, so the byte array is valid UTF-8. + unsafe { FixedString::from_raw_array([b'%', msb, lsb]) } + } +} + +impl<'a> Iterator for PercentEncoder<'a> { + type Item = (&'a str, Option>); + + fn next(&mut self) -> Option { + if self.remaining.is_empty() { + return None; + } + + // Split at the first character which does not belong to RFC 3986's "unreserved" set, + // which is the set of characters which do not need to be percent-encoded. This will give us + // a `prefix` consisting entirely of characters which do not need to be percent-encoded, + // followed by a `suffix` which is either `None` or starts which a character which needs + // to be percent-encoded. + let (prefix, suffix) = split_at(self.remaining, |b| { + !matches!(b, b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'-' | b'.' | b'_' | b'~') + }); + + // SAFETY: + // `prefix` only contains characters in the unreserved set, which are all valid ASCII + // characters. Therefore, it is valid UTF-8. + let prefix = unsafe { str::from_utf8_unchecked(prefix) }; + + match suffix { + // If there is a suffix, return the prefix and the percent-encoded first byte of the + // suffix. Set the iterator's slice to the remainder of the suffix, ready to be + // percent-encoded at the next call to `next`. + Some((byte, suffix)) => { + self.remaining = suffix; + Some((prefix, Some(Self::percent_encode_byte(byte)))) + }, + + // If there's no suffix, then we've reached the end of the input string. Therefore, we + // set the length of the iterator's slice to 0 to indicate that we are done, and then + // return the prefix. + None => { + self.remaining = &self.remaining[self.remaining.len()..]; + Some((prefix, None)) + }, + } + } +} + +#[cfg(feature = "alloc")] +#[must_use] +pub fn percent_encode(bytes: &B) -> Cow +where + B: AsRef<[u8]> + ?Sized, +{ + let mut encoder = PercentEncoder::new(bytes); + + match encoder.next() { + Some((prefix, Some(encoded_byte))) => { + let mut buf = String::new(); + buf.push_str(prefix); + buf.push_str(&encoded_byte); + + for (prefix, encoded_byte) in encoder { + buf.push_str(prefix); + if let Some(encoded_byte) = encoded_byte { + buf.push_str(&encoded_byte); + } + } + + Cow::Owned(buf) + }, + + Some((prefix, None)) => Cow::Borrowed(prefix), + + None => Cow::Borrowed(""), + } +} + +#[cfg(feature = "alloc")] +pub fn percent_encode_to_buf(buf: &mut String, bytes: &B) +where + B: AsRef<[u8]> + ?Sized, +{ + use crate::{convert::result_elim, sink::StringSink}; + + let sink = StringSink::from_string_mut(buf); + result_elim(percent_encode_to(sink, bytes)) +} + +pub fn percent_encode_to(sink: &mut S, bytes: &B) -> Result<(), S::Error> +where + S: StrSink + ?Sized, + B: AsRef<[u8]> + ?Sized, +{ + for (prefix, encoded_byte) in PercentEncoder::new(bytes) { + if !prefix.is_empty() { + sink.sink_str(prefix)?; + } + if let Some(encoded_byte) = encoded_byte { + sink.sink_str(&encoded_byte)?; + } + } + + Ok(()) +} + +pub struct PercentDecoder<'a> { + remaining: &'a [u8], +} + +impl<'a> PercentDecoder<'a> { + pub fn new(bytes: &'a B) -> Self + where + B: AsRef<[u8]> + ?Sized, + { + Self { + remaining: bytes.as_ref(), + } + } +} + +impl<'a> Iterator for PercentDecoder<'a> { + type Item = (&'a [u8], Option); + + fn next(&mut self) -> Option { + if self.remaining.is_empty() { + return None; + } + + let mut i = 0; + + while i < self.remaining.len() { + // According to the URL standard, the only special case we need to handle is when the + // percent character '%' is followed immediately by two hex digits. We check that there + // are at least two characters after the percent with `self.remaining.len() - i > 2`, + // using a subtraction rather than an addition to avoid overflow in the (rather far- + // fetched) case where `self.remaining.len() == usize::MAX` and `i == usize::MAX - 1`. + if self.remaining[i] == b'%' && self.remaining.len() - i > 2 { + // Get the next two bytes after the percent character. We use unchecked methods here + // because the current compiler does not seem to be able to eliminate the bounds + // checks on its own. + // SAFETY: + // We have just checked that `self.remaining.len() - i > 2` holds. Rearranging this + // gives `i + 2 < self.remaining.len()`. Therefore, `i + 1` and `i + 2` are valid + // indexes into the slice. + let (msb, lsb) = unsafe { + (*self.remaining.get_unchecked(i + 1), *self.remaining.get_unchecked(i + 2)) + }; + + // If the two bytes are valid hex digits, decode the hex number. + if let Ok(decoded) = hex::hex_to_byte(msb, lsb) { + // SAFETY: + // `i < self.remaining.len()`, so `..i` is a valid range over the slice. + let prefix = unsafe { self.remaining.get_unchecked(..i) }; + + // SAFETY: + // As explained above, `i + 2 < self.remaining.len()` must hold at this point. + // Therefore, `i + 3 <= self.remaining.len()`, so `(i + 3)..` is a valid range + // over the slice. + self.remaining = unsafe { self.remaining.get_unchecked((i + 3)..) }; + + return Some((prefix, Some(decoded))); + } + } + + i += 1; + } + + let bytes = self.remaining; + self.remaining = &self.remaining[i..]; + + Some((bytes, None)) + } +} + +#[cfg(feature = "alloc")] +pub fn percent_decode(bytes: &B) -> Cow<[u8]> +where + B: AsRef<[u8]> + ?Sized, +{ + let mut decoder = PercentDecoder::new(bytes); + + match decoder.next() { + Some((prefix, Some(byte))) => { + let mut buf = Vec::new(); + buf.extend(prefix); + buf.push(byte); + + for (prefix, byte) in decoder { + buf.extend(prefix); + if let Some(byte) = byte { + buf.push(byte); + } + } + + Cow::Owned(buf) + }, + + Some((prefix, None)) => Cow::Borrowed(prefix), + + None => Cow::Borrowed(&[]), + } +} + +#[cfg(feature = "alloc")] +pub fn percent_decode_utf8(bytes: &B) -> Cow +where + B: AsRef<[u8]> + ?Sized, +{ + match percent_decode(bytes) { + Cow::Borrowed(decoded) => String::from_utf8_lossy(decoded), + Cow::Owned(decoded) => match String::from_utf8_lossy(&decoded) { + Cow::Borrowed(decoded_str) => { + debug_assert_eq!(decoded_str.len(), decoded.len()); + debug_assert_eq!(decoded_str.as_bytes().as_ptr() as *const u8, decoded.as_ptr()); + + // SAFETY: + // `String::from_utf8_lossy` returned a `Cow::Borrowed`, which means that + // `decoded` is valid UTF-8. + let decoded = unsafe { String::from_utf8_unchecked(decoded) }; + Cow::Owned(decoded) + }, + Cow::Owned(decoded) => Cow::Owned(decoded), + }, + } +} + +#[cfg(test)] +mod tests { + #[cfg(feature = "alloc")] + #[test] + fn test_percent_encode() { + #[cfg(all(feature = "alloc", not(feature = "std")))] + use alloc::borrow::Cow; + + #[cfg(feature = "std")] + use std::borrow::Cow; + + use super::percent_encode; + + assert!(matches!(percent_encode(""), Cow::Borrowed(""))); + assert!(matches!(percent_encode("foobar"), Cow::Borrowed("foobar"))); + + assert_eq!(&*percent_encode("Ladies + Gentlemen"), "Ladies%20%2B%20Gentlemen"); + assert_eq!(&*percent_encode("An encoded string!"), "An%20encoded%20string%21"); + assert_eq!(&*percent_encode("Dogs, Cats & Mice"), "Dogs%2C%20Cats%20%26%20Mice"); + assert_eq!(&*percent_encode("☃"), "%E2%98%83"); + } + + #[cfg(feature = "alloc")] + #[test] + fn test_percent_decode() { + #[cfg(all(feature = "alloc", not(feature = "std")))] + use alloc::borrow::Cow; + + #[cfg(feature = "std")] + use std::borrow::Cow; + + use super::{percent_decode_utf8}; + + assert!(matches!(percent_decode_utf8(""), Cow::Borrowed(""))); + assert!(matches!(percent_decode_utf8("foobar"), Cow::Borrowed("foobar"))); + + assert_eq!(&*percent_decode_utf8("Ladies%20%2B%20Gentlemen"), "Ladies + Gentlemen"); + assert_eq!(&*percent_decode_utf8("An%20encoded%20string%21"), "An encoded string!"); + assert_eq!(&*percent_decode_utf8("Dogs%2C%20Cats%20%26%20Mice"), "Dogs, Cats & Mice"); + assert_eq!(&*percent_decode_utf8("%E2%98%83"), "☃"); + + assert_eq!(&*percent_decode_utf8("%e2%98%83"), "☃"); + + assert_eq!(&*percent_decode_utf8("%41%6E%20%65%6E%63%6F%64%65%64%20%73%74%72%69%6E%67%21"), "An encoded string!"); + + assert_eq!(&*percent_decode_utf8("hello!"), "hello!"); + assert_eq!(&*percent_decode_utf8("hello%"), "hello%"); + assert_eq!(&*percent_decode_utf8("%a"), "%a"); + assert_eq!(&*percent_decode_utf8("%za"), "%za"); + assert_eq!(&*percent_decode_utf8("%az"), "%az"); + + assert_eq!(&*percent_decode_utf8("hello%FFworld"), "hello�world"); + } +} diff --git a/src/lib.rs b/src/lib.rs index 064f823..8271a2f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,5 +6,6 @@ extern crate alloc; pub mod convert; pub mod either; pub mod encoding; +pub mod sink; pub mod strings; pub mod uuid; diff --git a/src/sink.rs b/src/sink.rs new file mode 100644 index 0000000..30d5bbf --- /dev/null +++ b/src/sink.rs @@ -0,0 +1,97 @@ +use core::fmt; + +pub trait StrSink { + type Error; + + fn sink_str(&mut self, s: &str) -> Result<(), Self::Error>; + + fn sink_char(&mut self, c: char) -> Result<(), Self::Error> { + let mut buf = [0u8; 4]; + let s = c.encode_utf8(&mut buf); + self.sink_str(s) + } +} + +impl StrSink for W +where + W: fmt::Write, +{ + type Error = fmt::Error; + + #[inline] + fn sink_str(&mut self, s: &str) -> Result<(), Self::Error> { + self.write_str(s) + } + + #[inline] + fn sink_char(&mut self, c: char) -> Result<(), Self::Error> { + self.write_char(c) + } +} + +#[cfg(feature = "alloc")] +pub use string_sink::StringSink; + +#[cfg(feature = "alloc")] +mod string_sink { + use core::convert::Infallible; + + #[cfg(not(feature = "std"))] + use alloc::string::String; + + use super::StrSink; + + #[repr(transparent)] + #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] + pub struct StringSink(pub String); + + impl StringSink { + #[inline] + #[must_use] + pub fn from_string_ref(s: &String) -> &Self { + // SAFETY: + // Since `StringSink` uses `repr(transparent)`, it has the same memory layout as + // `String`. + unsafe { &*(s as *const String as *const Self) } + } + + #[inline] + #[must_use] + pub fn from_string_mut(s: &mut String) -> &mut Self { + // SAFETY: + // Since `StringSink` uses `repr(transparent)`, it has the same memory layout as + // `String`. + unsafe { &mut *(s as *mut String as *mut Self) } + } + } + + impl AsRef for String { + #[inline] + fn as_ref(&self) -> &StringSink { + StringSink::from_string_ref(self) + } + } + + impl AsMut for String { + #[inline] + fn as_mut(&mut self) -> &mut StringSink { + StringSink::from_string_mut(self) + } + } + + impl StrSink for StringSink { + type Error = Infallible; + + #[inline] + fn sink_str(&mut self, s: &str) -> Result<(), Self::Error> { + self.0.push_str(s); + Ok(()) + } + + #[inline] + fn sink_char(&mut self, c: char) -> Result<(), Self::Error> { + self.0.push(c); + Ok(()) + } + } +}