-
-
Save Yaulendil/7ec3d5bc961d844c945202080b98f4c1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::{ | |
marker::PhantomData, | |
slice::{from_raw_parts, from_raw_parts_mut}, | |
str::{from_utf8_unchecked, from_utf8_unchecked_mut}, | |
}; | |
/// Determine the number of bytes in a UTF-8 encoded character, given the first | |
/// byte of the sequence. | |
/// | |
/// Panics if given a byte which is not valid as a UTF-8 starting byte. | |
#[inline] | |
fn utf8_char_len(start_byte: u8) -> usize { | |
// Match the number of leading ones before the first zero. | |
match (start_byte ^ 0b_1111_1111_u8).leading_zeros() { | |
0 => 1, // 0xxx xxxx | |
2 => 2, // 110x xxxx 10xxxxxx | |
3 => 3, // 1110 xxxx 10xxxxxx 10xxxxxx | |
4 => 4, // 1111 0xxx 10xxxxxx 10xxxxxx 10xxxxxx | |
_ => panic!( | |
"Invalid UTF-8 starting byte: {b:#04X} [{b:#010b}]", | |
b = start_byte, | |
), | |
} | |
} | |
/// An iterator of mutable string slices over a mutable string slice. | |
/// | |
/// Each slice returned corresponds to one `char` in the original string, in | |
/// UTF-8 encoding. | |
pub struct MutChars<'i> { | |
data: *mut u8, | |
end: *mut u8, | |
_p: PhantomData<&'i mut str>, | |
} | |
impl<'i> MutChars<'i> { | |
pub fn from(string: &'i mut str) -> Self { | |
let data: *mut u8 = string.as_mut_ptr(); | |
let end: *mut u8 = unsafe { data.add(string.len()) }; | |
Self { data, end, _p: PhantomData } | |
} | |
} | |
// impl<'i, T> From<T> for MutChars<'i> where | |
// T: AsMut<str> + 'i, | |
// { | |
// fn from(mut asmut: T) -> Self { | |
// let string: &mut str = asmut.as_mut(); | |
// | |
// let data: *mut u8 = string.as_mut_ptr(); | |
// let end: *mut u8 = unsafe { data.add(string.len()) }; | |
// | |
// Self { data, end, _p: PhantomData } | |
// } | |
// } | |
impl<'i> Iterator for MutChars<'i> { | |
type Item = &'i mut str; | |
fn next(&mut self) -> Option<Self::Item> { | |
if self.data >= self.end { return None; } | |
unsafe { | |
let len: usize = utf8_char_len(*self.data); | |
let s = from_utf8_unchecked_mut(from_raw_parts_mut(self.data, len)); | |
self.data = self.data.add(len); | |
Some(s) | |
} | |
} | |
} | |
/// An iterator of immutable string slices over an immutable string slice. | |
/// | |
/// Each slice returned corresponds to one `char` in the original string, in | |
/// UTF-8 encoding. | |
pub struct StrChars<'i> { | |
data: *const u8, | |
end: *const u8, | |
_p: PhantomData<&'i str>, | |
} | |
impl<'i, T> From<T> for StrChars<'i> where | |
T: AsRef<str> + 'i, | |
{ | |
fn from(asref: T) -> Self { | |
let string: &str = asref.as_ref(); | |
let data: *const u8 = string.as_ptr(); | |
let end: *const u8 = unsafe { data.add(string.len()) }; | |
Self { data, end, _p: PhantomData } | |
} | |
} | |
impl<'i> Iterator for StrChars<'i> { | |
type Item = &'i str; | |
fn next(&mut self) -> Option<Self::Item> { | |
if self.data >= self.end { return None; } | |
unsafe { | |
let len: usize = utf8_char_len(*self.data); | |
let s = from_utf8_unchecked(from_raw_parts(self.data, len)); | |
self.data = self.data.add(len); | |
Some(s) | |
} | |
} | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
#[test] | |
fn test_mut_chars() { | |
let mut s: String = "äßdf 🐱 qwærþ".into(); | |
dbg!(&s); | |
for c in MutChars::from(s.as_mut_str()) { | |
if c.len() == 4 { | |
// Take the String as its mutable bytes, and write a different | |
// character into it. | |
unsafe { '🗡'.encode_utf8(c.as_bytes_mut()); } | |
} | |
} | |
dbg!(&s); | |
assert_eq!("äßdf 🗡 qwærþ", &s); | |
for c in MutChars::from(&mut s) { | |
if c.len() == 1 { | |
// Increment the value of this character. | |
unsafe { c.as_bytes_mut()[0] += 1; } | |
} | |
} | |
dbg!(&s); | |
assert_eq!("äßeg!🗡!rxæsþ", &s); | |
for c in MutChars::from(&mut s[..]) { | |
if c.len() == 2 { | |
// Clobber this two-byte character, writing two new single-byte | |
// characters in its place. | |
unsafe { c.as_bytes_mut().clone_from_slice(b"ZW"); } | |
} | |
} | |
dbg!(&s); | |
assert_eq!("ZWZWeg!🗡!rxZWsZW", &s); | |
} | |
#[test] | |
fn test_str_chars() { | |
let string: String = "äßdf 🐱 qwærþ".into(); | |
let mut i: usize = 0; | |
for (c, s) in string.chars().zip(StrChars::from(&string[..])) { | |
i += 1; | |
assert_eq!( | |
c, | |
s.chars().next().expect("StrChars exhausted early"), | |
"char {:?} is not equal to string slice {:?}.", c, s, | |
); | |
} | |
assert_eq!(i, string.chars().count(), ""); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment