|
|
|
@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
pub fn truncate_str_to_array<const N: usize>(s: &str) -> ([u8; N], usize) {
|
|
|
|
|
|
|
|
let mut buf = [0u8; N];
|
|
|
|
|
|
|
|
let tr_len = truncated_str_len(s, N);
|
|
|
|
|
|
|
|
// SAFETY:
|
|
|
|
|
|
|
|
// `truncated_str_len` is guaranteed to return a length less than or equal to both the string
|
|
|
|
|
|
|
|
// length and the maximum truncated length `N`.
|
|
|
|
|
|
|
|
let src = unsafe { s.as_bytes().get_unchecked(..tr_len) };
|
|
|
|
|
|
|
|
let dst = unsafe { buf.get_unchecked_mut(..tr_len) };
|
|
|
|
|
|
|
|
dst.copy_from_slice(src);
|
|
|
|
|
|
|
|
(buf, tr_len)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pub fn truncated_str_len(s: &str, n: usize) -> usize {
|
|
|
|
|
|
|
|
let bs = s.as_bytes();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if bs.len() <= n {
|
|
|
|
|
|
|
|
return bs.len();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
let mut tr_len = n;
|
|
|
|
|
|
|
|
// Repeatedly check if the byte `bs[tr_len]` (the byte immediately after the end of the
|
|
|
|
|
|
|
|
// candidate truncated string) is a continutation byte. We are splitting the string at a
|
|
|
|
|
|
|
|
// codepoint boundary (and therefore have a valid truncated string) iff this byte is not a
|
|
|
|
|
|
|
|
// continuation byte. There can be a maximum of 3 consecutive continuation bytes, so if the
|
|
|
|
|
|
|
|
// `is_utf8_continutation` check returns true 3 times in a row then we know the next byte will
|
|
|
|
|
|
|
|
// not be a continutation without the need to check; therefore, we can use `take` and hopefully
|
|
|
|
|
|
|
|
// allow the compiler to unroll the loop. `bs.len() > n` must hold here, so the slice will
|
|
|
|
|
|
|
|
// never panic.
|
|
|
|
|
|
|
|
for b in bs[..=tr_len].iter().copied().rev().take(3) {
|
|
|
|
|
|
|
|
if !is_utf8_continutation(b) {
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
// This would underflow if `tr_len == 0`, but the first byte of a utf8 string is guaranteed
|
|
|
|
|
|
|
|
// not to be a continuation byte so this cannot happen.
|
|
|
|
|
|
|
|
tr_len -= 1;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
tr_len
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pub fn is_utf8_continutation(b: u8) -> bool {
|
|
|
|
|
|
|
|
b & 0b11000000 == 0b10000000
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
|
|
|
mod tests {
|
|
|
|
|
|
|
|
use std::str;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
use super::{truncated_str_len, truncate_str_to_array};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
|
|
fn test_truncate_str_len() {
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("", 0), 0);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("", 1), 0);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("", 2), 0);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("", 3), 0);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("", usize::MAX), 0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("hi", 0), 0);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("hi", 1), 1);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("hi", 2), 2);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("hi", 3), 2);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("hi", usize::MAX), 2);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("日本", 0), 0);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("日本", 1), 0);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("日本", 2), 0);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("日本", 3), 3);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("日本", 4), 3);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("日本", 5), 3);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("日本", 6), 6);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("日本", 7), 6);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("日本", 8), 6);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("日本", 9), 6);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("日本", usize::MAX), 6);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("cafe\u{0301}s", 0), 0);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("cafe\u{0301}s", 1), 1);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("cafe\u{0301}s", 2), 2);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("cafe\u{0301}s", 3), 3);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("cafe\u{0301}s", 4), 4);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("cafe\u{0301}s", 5), 4);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("cafe\u{0301}s", 6), 6);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("cafe\u{0301}s", 7), 7);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("cafe\u{0301}s", 8), 7);
|
|
|
|
|
|
|
|
assert_eq!(truncated_str_len("cafe\u{0301}s", usize::MAX), 7);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
|
|
fn test_truncate_str_to_array() {
|
|
|
|
|
|
|
|
fn array_to_str<const N: usize>(x: &([u8; N], usize)) -> &str {
|
|
|
|
|
|
|
|
str::from_utf8(&x.0[..x.1]).unwrap()
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<0>("")), "");
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<1>("")), "");
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<2>("")), "");
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<3>("")), "");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<0>("hi")), "");
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<1>("hi")), "h");
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<2>("hi")), "hi");
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<3>("hi")), "hi");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<0>("日本")), "");
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<1>("日本")), "");
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<2>("日本")), "");
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<3>("日本")), "日");
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<4>("日本")), "日");
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<5>("日本")), "日");
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<6>("日本")), "日本");
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<7>("日本")), "日本");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<0>("cafe\u{0301}s")), "");
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<1>("cafe\u{0301}s")), "c");
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<2>("cafe\u{0301}s")), "ca");
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<3>("cafe\u{0301}s")), "caf");
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<4>("cafe\u{0301}s")), "cafe");
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<5>("cafe\u{0301}s")), "cafe");
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<6>("cafe\u{0301}s")), "cafe\u{0301}");
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<7>("cafe\u{0301}s")), "cafe\u{0301}s");
|
|
|
|
|
|
|
|
assert_eq!(array_to_str(&truncate_str_to_array::<8>("cafe\u{0301}s")), "cafe\u{0301}s");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|