From f0cc229a65f0e49a8e1f6c5220ed46bd80109680 Mon Sep 17 00:00:00 2001
From: pantonshire <tom@tomandtally.co.uk>
Date: Tue, 6 Sep 2022 15:42:32 +0100
Subject: [PATCH] Percent decoding compliant with URL standard, export percent
 encoder and decoder types

---
 src/convert.rs          |  12 ++
 src/encoding/mod.rs     |   2 +-
 src/encoding/rfc3986.rs | 308 -----------------------------------
 src/encoding/url.rs     | 351 ++++++++++++++++++++++++++++++++++++++++
 src/lib.rs              |   1 +
 src/sink.rs             |  97 +++++++++++
 6 files changed, 462 insertions(+), 309 deletions(-)
 delete mode 100644 src/encoding/rfc3986.rs
 create mode 100644 src/encoding/url.rs
 create mode 100644 src/sink.rs

diff --git a/src/convert.rs b/src/convert.rs
index 4f382e7..d04cd92 100644
--- a/src/convert.rs
+++ b/src/convert.rs
@@ -42,6 +42,18 @@ impl Empty for Infallible {
     }
 }
 
+#[inline]
+#[must_use]
+pub fn result_elim<T, E>(res: Result<T, E>) -> T
+where
+    E: Empty,
+{
+    match res {
+        Ok(x) => x,
+        Err(e) => e.elim(),
+    }
+}
+
 #[inline]
 #[must_use]
 pub fn clone<T: Clone>(x: &T) -> T {
diff --git a/src/encoding/mod.rs b/src/encoding/mod.rs
index 44a657e..c11931b 100644
--- a/src/encoding/mod.rs
+++ b/src/encoding/mod.rs
@@ -1,2 +1,2 @@
 pub mod hex;
-pub mod rfc3986;
+pub mod url;
diff --git a/src/encoding/rfc3986.rs b/src/encoding/rfc3986.rs
deleted file mode 100644
index c005692..0000000
--- a/src/encoding/rfc3986.rs
+++ /dev/null
@@ -1,308 +0,0 @@
-// Following RFC3986 (https://www.rfc-editor.org/rfc/rfc3986#section-2.1)
-
-use core::{
-    fmt::{self, Write},
-    str,
-};
-
-#[cfg(all(feature = "alloc", not(feature = "std")))]
-use alloc::{borrow::Cow, string::String, vec::Vec};
-
-#[cfg(feature = "std")]
-use std::borrow::Cow;
-
-use crate::{either::{Either, Inl, Inr}, strings::FixedString};
-
-use super::hex;
-
-/// Finds the first element of the slice which does not match the given predicate and returns the
-/// sub-slice preceding that element, the element itself, and the sub-slice following the element.
-#[inline]
-fn split_at_non_matching<T, P>(xs: &[T], predicate: P) -> (&[T], Option<(T, &[T])>)
-where
-    T: Copy,
-    P: Fn(T) -> bool,
-{
-    let mut i = 0;
-    while i < xs.len() {
-        let x = xs[i];
-        if !predicate(x) {
-            // `get_unchecked` is used here because the compiler currently seems to struggle to
-            // reason about the correctness of the start and end indexes here, and can end up
-            // leaving in unnecessary bound checks.
-            // SAFETY:
-            // We have already checked that `i < xs.len()`, so `..i` is in bounds for `xs`.
-            let prefix = unsafe { xs.get_unchecked(..i) };
-            // SAFETY:
-            // We have already checked that `i < xs.len()`, so `i + 1 <= xs.len()` must hold.
-            // Therefore, `(i + 1)..` is in bounds for `xs`.
-            let suffix = unsafe { xs.get_unchecked((i + 1)..) };
-            return (prefix, Some((x, suffix)));
-        }
-        i += 1;
-    }
-    (xs, None)
-}
-
-fn byte_unreserved(byte: u8) -> bool {
-    matches!(byte, b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'-' | b'.' | b'_' | b'~')
-}
-
-struct PercentEncoder<'a>(&'a [u8]);
-
-impl<'a> PercentEncoder<'a> {
-    pub fn partial_encode(&mut self) -> Option<(&'a str, Option<FixedString<3>>)> {
-        if self.0.is_empty() {
-            return None;
-        }
-
-        let (prefix, suffix) = split_at_non_matching(self.0, byte_unreserved);
-
-        // SAFETY:
-        // `prefix` only contains bytes which satisfy `byte_unreserved`, which are all valid ASCII
-        // characters. Therefore, it is valid UTF-8.
-        let prefix = unsafe { str::from_utf8_unchecked(prefix) };
-
-        match suffix {
-            Some((byte, suffix)) => {
-                self.0 = suffix;
-                Some((prefix, Some(Self::percent_encode_byte(byte))))
-            },
-
-            None => {
-                self.0 = &self.0[self.0.len()..];
-                Some((prefix, None))
-            },
-        }
-    }
-
-    fn percent_encode_byte(byte: u8) -> FixedString<3> {
-        let [msb, lsb] = hex::byte_to_hex_upper(byte).into_raw();
-        // SAFETY:
-        // The bytes obtained from `hex::byte_to_hex_upper` are valid UTF-8, and `b'%'` is a valid
-        // UTF-8 codepoint, so the byte array is valid UTF-8.
-        unsafe { FixedString::from_raw_array([b'%', msb, lsb]) }
-    }
-}
-
-#[cfg(feature = "alloc")]
-pub fn percent_encode<B>(bytes: &B) -> Cow<str>
-where
-    B: AsRef<[u8]> + ?Sized,
-{
-    let mut encoder = PercentEncoder(bytes.as_ref());
-
-    match encoder.partial_encode().unwrap_or(("", None)) {
-        (prefix, Some(encoded_byte)) => {
-            let mut buf = String::new();
-            buf.push_str(prefix);
-            buf.push_str(&encoded_byte);
-
-            while let Some((prefix, encoded_byte)) = encoder.partial_encode() {
-                buf.push_str(prefix);
-                if let Some(encoded_byte) = encoded_byte {
-                    buf.push_str(&encoded_byte);
-                }
-            }
-
-            Cow::Owned(buf)
-        },
-
-        (prefix, None) => Cow::Borrowed(prefix),
-    }
-}
-
-#[cfg(feature = "alloc")]
-pub fn percent_encode_to_buf<B>(buf: &mut String, bytes: &B)
-where
-    B: AsRef<[u8]> + ?Sized,
-{
-    percent_encode_to_fmt_writer(buf, bytes)
-        .expect("writing to a String should never return an error")
-}
-
-pub fn percent_encode_to_fmt_writer<W, B>(writer: &mut W, bytes: &B) -> fmt::Result
-where
-    W: Write + ?Sized,
-    B: AsRef<[u8]> + ?Sized,
-{
-    let mut encoder = PercentEncoder(bytes.as_ref());
-
-    while let Some((prefix, encoded_byte)) = encoder.partial_encode() {
-        if !prefix.is_empty() {
-            writer.write_str(prefix)?;
-        }
-        if let Some(encoded_byte) = encoded_byte {
-            writer.write_str(&encoded_byte)?;
-        }
-    }
-
-    Ok(())
-}
-
-struct PercentDecoder<'a>(&'a [u8]);
-
-impl<'a> PercentDecoder<'a> {
-    fn partial_decode(&mut self) -> Result<Option<(&'a str, Option<u8>)>, PercentDecodeError> {
-        if self.0.is_empty() {
-            return Ok(None);
-        }
-
-        let (prefix, suffix) = split_at_non_matching(self.0, byte_unreserved);
-        
-        // SAFETY:
-        // `prefix` only contains bytes which satisfy `byte_unreserved`, which are all valid ASCII
-        // characters. Therefore, it is valid UTF-8.
-        let prefix = unsafe { str::from_utf8_unchecked(prefix) };
-
-        match suffix {
-            Some((byte, suffix)) => {
-                if byte != b'%' {
-                    return Err(PercentDecodeError);
-                }
-                
-                let [hex_msb, hex_lsb]: [u8; 2] = suffix
-                    .get(..2)
-                    .and_then(|hex_bytes| hex_bytes.try_into().ok())
-                    .ok_or(PercentDecodeError)?;
-
-                let hex_byte = hex::hex_to_byte(hex_msb, hex_lsb)
-                    .map_err(|_| PercentDecodeError)?;
-
-                self.0 = &suffix[2..];
-
-                Ok(Some((prefix, Some(hex_byte))))
-            },
-
-            None => {
-                self.0 = &self.0[self.0.len()..];
-                Ok(Some((prefix, None)))
-            },
-        }
-    }
-}
-
-#[cfg(feature = "alloc")]
-fn percent_decode_internal<B>(bytes: &B) -> Result<Either<&str, Vec<u8>>, PercentDecodeError>
-where
-    B: AsRef<[u8]> + ?Sized,
-{
-    let mut decoder = PercentDecoder(bytes.as_ref());
-
-    match decoder.partial_decode()?.unwrap_or(("", None)) {
-        (prefix, Some(byte)) => {
-            let mut buf = Vec::new();
-            buf.extend(prefix.bytes());
-            buf.push(byte);
-
-            while let Some((prefix, byte)) = decoder.partial_decode()? {
-                buf.extend(prefix.bytes());
-                if let Some(byte) = byte {
-                    buf.push(byte);
-                }
-            }
-
-            Ok(Inr(buf))
-        },
-
-        (prefix, None) => Ok(Inl(prefix))
-    }
-}
-
-#[cfg(feature = "alloc")]
-pub fn percent_decode_to_utf8<B>(bytes: &B) -> Result<Cow<str>, PercentDecodeError>
-where
-    B: AsRef<[u8]> + ?Sized,
-{
-    percent_decode_internal(bytes).and_then(|decoded| match decoded {
-        Inl(decoded_str) => Ok(Cow::Borrowed(decoded_str)),
-        Inr(decoded_bytes) => String::from_utf8(decoded_bytes)
-            .map(Cow::Owned)
-            .map_err(|_| PercentDecodeError),
-    })
-}
-
-#[cfg(feature = "alloc")]
-pub fn percent_decode_to_bytes<B>(bytes: &B) -> Result<Cow<[u8]>, PercentDecodeError>
-where
-    B: AsRef<[u8]> + ?Sized,
-{
-    percent_decode_internal(bytes).map(|decoded| match decoded {
-        Inl(decoded_str) => Cow::Borrowed(decoded_str.as_bytes()),
-        Inr(decoded_bytes) => Cow::Owned(decoded_bytes),
-    })
-}
-
-#[derive(Debug)]
-pub struct PercentDecodeError;
-
-impl fmt::Display for PercentDecodeError {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "invalid rfc 3986 percent-encoded string")
-    }
-}
-
-#[cfg(feature = "std")]
-impl std::error::Error for PercentDecodeError {}
-
-#[cfg(test)]
-mod tests {
-    #[cfg(feature = "alloc")]
-    #[test]
-    fn test_percent_encode() {
-        #[cfg(all(feature = "alloc", not(feature = "std")))]
-        use alloc::borrow::Cow;
-
-        #[cfg(feature = "std")]
-        use std::borrow::Cow;
-
-        use super::percent_encode;
-
-        assert!(matches!(percent_encode(""), Cow::Borrowed("")));
-        assert!(matches!(percent_encode("foobar"), Cow::Borrowed("foobar")));
-
-        assert_eq!(&*percent_encode("Ladies + Gentlemen"), "Ladies%20%2B%20Gentlemen");
-        assert_eq!(&*percent_encode("An encoded string!"), "An%20encoded%20string%21");
-        assert_eq!(&*percent_encode("Dogs, Cats & Mice"), "Dogs%2C%20Cats%20%26%20Mice");
-        assert_eq!(&*percent_encode("☃"), "%E2%98%83");
-    }
-
-    #[cfg(feature = "alloc")]
-    #[test]
-    fn test_percent_decode() {
-        #[cfg(all(feature = "alloc", not(feature = "std")))]
-        use alloc::borrow::Cow;
-
-        #[cfg(feature = "std")]
-        use std::borrow::Cow;
-
-        use super::{percent_decode_to_utf8, percent_decode_to_bytes};
-
-        assert!(matches!(percent_decode_to_utf8(""), Ok(Cow::Borrowed(""))));
-        assert!(matches!(percent_decode_to_bytes(""), Ok(Cow::Borrowed(b""))));
-        assert!(matches!(percent_decode_to_utf8("foobar"), Ok(Cow::Borrowed("foobar"))));
-        assert!(matches!(percent_decode_to_bytes("foobar"), Ok(Cow::Borrowed(b"foobar"))));
-
-        assert!(matches!(percent_decode_to_utf8("Ladies%20%2B%20Gentlemen").as_deref(), Ok("Ladies + Gentlemen")));
-        assert!(matches!(percent_decode_to_bytes("Ladies%20%2B%20Gentlemen").as_deref(), Ok(b"Ladies + Gentlemen")));
-        assert!(matches!(percent_decode_to_utf8("An%20encoded%20string%21").as_deref(), Ok("An encoded string!")));
-        assert!(matches!(percent_decode_to_bytes("An%20encoded%20string%21").as_deref(), Ok(b"An encoded string!")));
-        assert!(matches!(percent_decode_to_utf8("Dogs%2C%20Cats%20%26%20Mice").as_deref(), Ok("Dogs, Cats & Mice")));
-        assert!(matches!(percent_decode_to_bytes("Dogs%2C%20Cats%20%26%20Mice").as_deref(), Ok(b"Dogs, Cats & Mice")));
-        assert!(matches!(percent_decode_to_utf8("%E2%98%83").as_deref(), Ok("☃")));
-
-        assert!(matches!(percent_decode_to_utf8("%e2%98%83").as_deref(), Ok("☃")));
-
-        assert!(matches!(percent_decode_to_utf8("%41%6E%20%65%6E%63%6F%64%65%64%20%73%74%72%69%6E%67%21").as_deref(), Ok("An encoded string!")));
-
-        assert!(matches!(percent_decode_to_utf8("hello!"), Err(_)));
-        assert!(matches!(percent_decode_to_bytes("hello!"), Err(_)));
-        assert!(matches!(percent_decode_to_utf8("%2"), Err(_)));
-        assert!(matches!(percent_decode_to_bytes("%2"), Err(_)));
-        assert!(matches!(percent_decode_to_utf8("%2!"), Err(_)));
-        assert!(matches!(percent_decode_to_bytes("%2!"), Err(_)));
-
-        assert!(matches!(percent_decode_to_utf8("%FF"), Err(_)));
-        assert!(matches!(percent_decode_to_bytes("%FF").as_deref(), Ok(&[0xff])));
-    }
-}
diff --git a/src/encoding/url.rs b/src/encoding/url.rs
new file mode 100644
index 0000000..1139966
--- /dev/null
+++ b/src/encoding/url.rs
@@ -0,0 +1,351 @@
+// Following:
+// - RFC 3986 (https://www.rfc-editor.org/rfc/rfc3986#section-2.1)
+// - URL standard (https://url.spec.whatwg.org/#application/x-www-form-urlencoded)
+
+use core::str;
+
+#[cfg(all(feature = "alloc", not(feature = "std")))]
+use alloc::{borrow::Cow, string::String, vec::Vec};
+
+#[cfg(feature = "std")]
+use std::borrow::Cow;
+
+use crate::{strings::FixedString, sink::StrSink};
+
+use super::hex;
+
+/// Finds the first element of the slice which matches the given predicate and returns the sub-slice
+/// preceding that element, the element itself, and the sub-slice following the element.
+#[inline]
+fn split_at<T, P>(xs: &[T], predicate: P) -> (&[T], Option<(T, &[T])>)
+where
+    T: Copy,
+    P: Fn(T) -> bool,
+{
+    let mut i = 0;
+
+    while i < xs.len() {
+        // Since we required that `T: Copy`, we can copy the `i`th element from the slice.
+        let x = xs[i];
+
+        if predicate(x) {
+            // `get_unchecked` is used here because the compiler currently seems to struggle to
+            // reason about the correctness of the start and end indexes here, and can end up
+            // leaving in unnecessary bound checks.
+            // SAFETY:
+            // We have already checked that `i < xs.len()`, so `..i` is in bounds for `xs`.
+            let prefix = unsafe { xs.get_unchecked(..i) };
+            
+            // SAFETY:
+            // We have already checked that `i < xs.len()`, so `i + 1 <= xs.len()` must hold.
+            // Therefore, `(i + 1)..` is in bounds for `xs`.
+            let suffix = unsafe { xs.get_unchecked((i + 1)..) };
+            
+            return (prefix, Some((x, suffix)));
+        }
+
+        i += 1;
+    }
+
+    (xs, None)
+}
+
+pub struct PercentEncoder<'a> {
+    remaining: &'a [u8],
+}
+
+impl<'a> PercentEncoder<'a> {
+    #[must_use]
+    pub fn new<B>(bytes: &'a B) -> Self
+    where
+        B: AsRef<[u8]> + ?Sized,
+    {
+        Self {
+            remaining: bytes.as_ref(),
+        }
+    }
+
+    fn percent_encode_byte(byte: u8) -> FixedString<3> {
+        let [msb, lsb] = hex::byte_to_hex_upper(byte).into_raw();
+        // SAFETY:
+        // The bytes obtained from `hex::byte_to_hex_upper` are valid UTF-8, and `b'%'` is a valid
+        // UTF-8 codepoint, so the byte array is valid UTF-8.
+        unsafe { FixedString::from_raw_array([b'%', msb, lsb]) }
+    }
+}
+
+impl<'a> Iterator for PercentEncoder<'a> {
+    type Item = (&'a str, Option<FixedString<3>>);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.remaining.is_empty() {
+            return None;
+        }
+
+        // Split at the first character which does not belong to RFC 3986's "unreserved" set,
+        // which is the set of characters which do not need to be percent-encoded. This will give us
+        // a `prefix` consisting entirely of characters which do not need to be percent-encoded,
+        // followed by a `suffix` which is either `None` or starts which a  character which needs
+        // to be percent-encoded.
+        let (prefix, suffix) = split_at(self.remaining, |b| {
+            !matches!(b, b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'-' | b'.' | b'_' | b'~')
+        });
+
+        // SAFETY:
+        // `prefix` only contains characters in the unreserved set, which are all valid ASCII
+        // characters. Therefore, it is valid UTF-8.
+        let prefix = unsafe { str::from_utf8_unchecked(prefix) };
+
+        match suffix {
+            // If there is a suffix, return the prefix and the percent-encoded first byte of the
+            // suffix. Set the iterator's slice to the remainder of the suffix, ready to be
+            // percent-encoded at the next call to `next`.
+            Some((byte, suffix)) => {
+                self.remaining = suffix;
+                Some((prefix, Some(Self::percent_encode_byte(byte))))
+            },
+
+            // If there's no suffix, then we've reached the end of the input string. Therefore, we
+            // set the length of the iterator's slice to 0 to indicate that we are done, and then
+            // return the prefix.
+            None => {
+                self.remaining = &self.remaining[self.remaining.len()..];
+                Some((prefix, None))
+            },
+        }
+    }
+}
+
+#[cfg(feature = "alloc")]
+#[must_use]
+pub fn percent_encode<B>(bytes: &B) -> Cow<str>
+where
+    B: AsRef<[u8]> + ?Sized,
+{
+    let mut encoder = PercentEncoder::new(bytes);
+
+    match encoder.next() {
+        Some((prefix, Some(encoded_byte))) => {
+            let mut buf = String::new();
+            buf.push_str(prefix);
+            buf.push_str(&encoded_byte);
+
+           for (prefix, encoded_byte) in encoder {
+                buf.push_str(prefix);
+                if let Some(encoded_byte) = encoded_byte {
+                    buf.push_str(&encoded_byte);
+                }
+            }
+
+            Cow::Owned(buf)
+        },
+
+        Some((prefix, None)) => Cow::Borrowed(prefix),
+
+        None => Cow::Borrowed(""),
+    }
+}
+
+#[cfg(feature = "alloc")]
+pub fn percent_encode_to_buf<B>(buf: &mut String, bytes: &B)
+where
+    B: AsRef<[u8]> + ?Sized,
+{
+    use crate::{convert::result_elim, sink::StringSink};
+
+    let sink = StringSink::from_string_mut(buf);
+    result_elim(percent_encode_to(sink, bytes))
+}
+
+pub fn percent_encode_to<S, B>(sink: &mut S, bytes: &B) -> Result<(), S::Error>
+where
+    S: StrSink + ?Sized,
+    B: AsRef<[u8]> + ?Sized,
+{
+    for (prefix, encoded_byte) in PercentEncoder::new(bytes) {
+        if !prefix.is_empty() {
+            sink.sink_str(prefix)?;
+        }
+        if let Some(encoded_byte) = encoded_byte {
+            sink.sink_str(&encoded_byte)?;
+        }
+    }
+
+    Ok(())
+}
+
+pub struct PercentDecoder<'a> {
+    remaining: &'a [u8],
+}
+
+impl<'a> PercentDecoder<'a> {
+    pub fn new<B>(bytes: &'a B) -> Self
+    where
+        B: AsRef<[u8]> + ?Sized,
+    {
+        Self {
+            remaining: bytes.as_ref(),
+        }
+    }
+}
+
+impl<'a> Iterator for PercentDecoder<'a> {
+    type Item = (&'a [u8], Option<u8>);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.remaining.is_empty() {
+            return None;
+        }
+
+        let mut i = 0;
+
+        while i < self.remaining.len() {
+            // According to the URL standard, the only special case we need to handle is when the
+            // percent character '%' is followed immediately by two hex digits. We check that there
+            // are at least two characters after the percent with `self.remaining.len() - i > 2`,
+            // using a subtraction rather than an addition to avoid overflow in the (rather far-
+            // fetched) case where `self.remaining.len() == usize::MAX` and `i == usize::MAX - 1`.
+            if self.remaining[i] == b'%' && self.remaining.len() - i > 2 {
+                // Get the next two bytes after the percent character. We use unchecked methods here
+                // because the current compiler does not seem to be able to eliminate the bounds
+                // checks on its own.
+                // SAFETY:
+                // We have just checked that `self.remaining.len() - i > 2` holds. Rearranging this
+                // gives `i + 2 < self.remaining.len()`. Therefore, `i + 1` and `i + 2` are valid
+                // indexes into the slice.
+                let (msb, lsb) = unsafe {
+                    (*self.remaining.get_unchecked(i + 1), *self.remaining.get_unchecked(i + 2))
+                };
+                
+                // If the two bytes are valid hex digits, decode the hex number.
+                if let Ok(decoded) = hex::hex_to_byte(msb, lsb) {
+                    // SAFETY:
+                    // `i < self.remaining.len()`, so `..i` is a valid range over the slice.
+                    let prefix = unsafe { self.remaining.get_unchecked(..i) };
+
+                    // SAFETY:
+                    // As explained above, `i + 2 < self.remaining.len()` must hold at this point.
+                    // Therefore, `i + 3 <= self.remaining.len()`, so `(i + 3)..` is a valid range
+                    // over the slice.
+                    self.remaining = unsafe { self.remaining.get_unchecked((i + 3)..) };
+
+                    return Some((prefix, Some(decoded)));
+                }
+            }
+
+            i += 1;
+        }
+
+        let bytes = self.remaining;
+        self.remaining = &self.remaining[i..];
+
+        Some((bytes, None))
+    }
+}
+
+#[cfg(feature = "alloc")]
+pub fn percent_decode<B>(bytes: &B) -> Cow<[u8]>
+where
+    B: AsRef<[u8]> + ?Sized,
+{
+    let mut decoder = PercentDecoder::new(bytes);
+
+    match decoder.next() {
+        Some((prefix, Some(byte))) => {
+            let mut buf = Vec::new();
+            buf.extend(prefix);
+            buf.push(byte);
+
+            for (prefix, byte) in decoder {
+                buf.extend(prefix);
+                if let Some(byte) = byte {
+                    buf.push(byte);
+                }
+            }
+
+            Cow::Owned(buf)
+        },
+
+        Some((prefix, None)) => Cow::Borrowed(prefix),
+
+        None => Cow::Borrowed(&[]),
+    }
+}
+
+#[cfg(feature = "alloc")]
+pub fn percent_decode_utf8<B>(bytes: &B) -> Cow<str>
+where
+    B: AsRef<[u8]> + ?Sized,
+{
+    match percent_decode(bytes) {
+        Cow::Borrowed(decoded) => String::from_utf8_lossy(decoded),
+        Cow::Owned(decoded) => match String::from_utf8_lossy(&decoded) {
+            Cow::Borrowed(decoded_str) => {
+                debug_assert_eq!(decoded_str.len(), decoded.len());
+                debug_assert_eq!(decoded_str.as_bytes().as_ptr() as *const u8, decoded.as_ptr());
+
+                // SAFETY:
+                // `String::from_utf8_lossy` returned a `Cow::Borrowed`, which means that
+                // `decoded` is valid UTF-8.
+                let decoded = unsafe { String::from_utf8_unchecked(decoded) };
+                Cow::Owned(decoded)
+            },
+            Cow::Owned(decoded) => Cow::Owned(decoded),
+        },
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    #[cfg(feature = "alloc")]
+    #[test]
+    fn test_percent_encode() {
+        #[cfg(all(feature = "alloc", not(feature = "std")))]
+        use alloc::borrow::Cow;
+
+        #[cfg(feature = "std")]
+        use std::borrow::Cow;
+
+        use super::percent_encode;
+
+        assert!(matches!(percent_encode(""), Cow::Borrowed("")));
+        assert!(matches!(percent_encode("foobar"), Cow::Borrowed("foobar")));
+
+        assert_eq!(&*percent_encode("Ladies + Gentlemen"), "Ladies%20%2B%20Gentlemen");
+        assert_eq!(&*percent_encode("An encoded string!"), "An%20encoded%20string%21");
+        assert_eq!(&*percent_encode("Dogs, Cats & Mice"), "Dogs%2C%20Cats%20%26%20Mice");
+        assert_eq!(&*percent_encode("☃"), "%E2%98%83");
+    }
+
+    #[cfg(feature = "alloc")]
+    #[test]
+    fn test_percent_decode() {
+        #[cfg(all(feature = "alloc", not(feature = "std")))]
+        use alloc::borrow::Cow;
+
+        #[cfg(feature = "std")]
+        use std::borrow::Cow;
+
+        use super::{percent_decode_utf8};
+
+        assert!(matches!(percent_decode_utf8(""), Cow::Borrowed("")));
+        assert!(matches!(percent_decode_utf8("foobar"), Cow::Borrowed("foobar")));
+
+        assert_eq!(&*percent_decode_utf8("Ladies%20%2B%20Gentlemen"), "Ladies + Gentlemen");
+        assert_eq!(&*percent_decode_utf8("An%20encoded%20string%21"), "An encoded string!");
+        assert_eq!(&*percent_decode_utf8("Dogs%2C%20Cats%20%26%20Mice"), "Dogs, Cats & Mice");
+        assert_eq!(&*percent_decode_utf8("%E2%98%83"), "☃");
+
+        assert_eq!(&*percent_decode_utf8("%e2%98%83"), "☃");
+
+        assert_eq!(&*percent_decode_utf8("%41%6E%20%65%6E%63%6F%64%65%64%20%73%74%72%69%6E%67%21"), "An encoded string!");
+
+        assert_eq!(&*percent_decode_utf8("hello!"), "hello!");
+        assert_eq!(&*percent_decode_utf8("hello%"), "hello%");
+        assert_eq!(&*percent_decode_utf8("%a"), "%a");
+        assert_eq!(&*percent_decode_utf8("%za"), "%za");
+        assert_eq!(&*percent_decode_utf8("%az"), "%az");
+
+        assert_eq!(&*percent_decode_utf8("hello%FFworld"), "hello�world");
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 064f823..8271a2f 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -6,5 +6,6 @@ extern crate alloc;
 pub mod convert;
 pub mod either;
 pub mod encoding;
+pub mod sink;
 pub mod strings;
 pub mod uuid;
diff --git a/src/sink.rs b/src/sink.rs
new file mode 100644
index 0000000..30d5bbf
--- /dev/null
+++ b/src/sink.rs
@@ -0,0 +1,97 @@
+use core::fmt;
+
+pub trait StrSink {
+    type Error;
+
+    fn sink_str(&mut self, s: &str) -> Result<(), Self::Error>;
+
+    fn sink_char(&mut self, c: char) -> Result<(), Self::Error> {
+        let mut buf = [0u8; 4];
+        let s = c.encode_utf8(&mut buf);
+        self.sink_str(s)
+    }
+}
+
+impl<W> StrSink for W
+where
+    W: fmt::Write,
+{
+    type Error = fmt::Error;
+
+    #[inline]
+    fn sink_str(&mut self, s: &str) -> Result<(), Self::Error> {
+        self.write_str(s)
+    }
+
+    #[inline]
+    fn sink_char(&mut self, c: char) -> Result<(), Self::Error> {
+        self.write_char(c)
+    }
+}
+
+#[cfg(feature = "alloc")]
+pub use string_sink::StringSink;
+
+#[cfg(feature = "alloc")]
+mod string_sink {
+    use core::convert::Infallible;
+
+    #[cfg(not(feature = "std"))]
+    use alloc::string::String;
+
+    use super::StrSink;
+
+    #[repr(transparent)]
+    #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
+    pub struct StringSink(pub String);
+
+    impl StringSink {
+        #[inline]
+        #[must_use]
+        pub fn from_string_ref(s: &String) -> &Self {
+            // SAFETY:
+            // Since `StringSink` uses `repr(transparent)`, it has the same memory layout as
+            // `String`.
+            unsafe { &*(s as *const String as *const Self) }
+        }
+
+        #[inline]
+        #[must_use]
+        pub fn from_string_mut(s: &mut String) -> &mut Self {
+            // SAFETY:
+            // Since `StringSink` uses `repr(transparent)`, it has the same memory layout as
+            // `String`.
+            unsafe { &mut *(s as *mut String as *mut Self) }
+        }
+    }
+
+    impl AsRef<StringSink> for String {
+        #[inline]
+        fn as_ref(&self) -> &StringSink {
+            StringSink::from_string_ref(self)
+        }
+    }
+
+    impl AsMut<StringSink> for String {
+        #[inline]
+        fn as_mut(&mut self) -> &mut StringSink {
+            StringSink::from_string_mut(self)
+        }
+    }
+
+    impl StrSink for StringSink {
+        type Error = Infallible;
+
+        #[inline]
+        fn sink_str(&mut self, s: &str) -> Result<(), Self::Error> {
+            self.0.push_str(s);
+            Ok(())
+        }
+
+        #[inline]
+        fn sink_char(&mut self, c: char) -> Result<(), Self::Error> {
+            self.0.push(c);
+            Ok(())
+        }
+    }
+}