1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
use simd_abstraction::ascii as sa_ascii;
#[inline(always)]
fn find_non_ascii_whitespace(data: &[u8]) -> usize {
sa_ascii::multiversion::find_non_ascii_whitespace::auto_indirect(data)
}
#[inline(always)]
fn remove_ascii_whitespace(data: &mut [u8]) -> &mut [u8] {
let non_aw_pos = find_non_ascii_whitespace(data);
if non_aw_pos >= data.len() {
return data;
}
unsafe {
let dirty_len = data.len() - non_aw_pos;
let dirty_data = data.as_mut_ptr().add(non_aw_pos);
let clean_len = sa_ascii::remove_ascii_whitespace_raw_fallback(dirty_data, dirty_len);
data.get_unchecked_mut(..(non_aw_pos + clean_len))
}
}
#[test]
fn test_remove_ascii_whitespace() {
let cases = [
"abcd",
"ab\tcd",
"ab\ncd",
"ab\x0Ccd",
"ab\rcd",
"ab cd",
"ab\t\n\x0C\r cd",
"ab\t\n\x0C\r =\t\n\x0C\r =\t\n\x0C\r ",
];
for case in cases {
let mut buf = case.to_owned().into_bytes();
let expected = {
let mut v = buf.clone();
v.retain(|c| !c.is_ascii_whitespace());
v
};
let ans = remove_ascii_whitespace(&mut buf);
assert_eq!(ans, &*expected, "case = {:?}", case);
}
}
const fn discard_table(mask: u8) -> [u8; 256] {
let charset = crate::STANDARD_CHARSET;
let mut table = [0; 256];
let mut i = 0;
loop {
table[i as usize] = i;
if i == 255 {
break;
}
i += 1;
}
let mut i = 0;
while i < 64 {
table[charset[i] as usize] = charset[i & mask as usize];
i += 1;
}
table
}
#[inline(always)]
fn discard4(ch: &mut u8) {
const TABLE: &[u8; 256] = &discard_table(0xf0);
unsafe { *ch = *TABLE.get_unchecked(*ch as usize) }
}
#[inline(always)]
fn discard2(ch: &mut u8) {
const TABLE: &[u8; 256] = &discard_table(0xfc);
unsafe { *ch = *TABLE.get_unchecked(*ch as usize) }
}
pub fn normalize(buf: &mut [u8]) -> &mut [u8] {
let buf = remove_ascii_whitespace(buf);
if buf.is_empty() {
return buf;
}
unsafe {
let len = buf.len();
match len % 4 {
0 => {
let x1 = *buf.get_unchecked(len - 1);
let x2 = *buf.get_unchecked(len - 2);
if x1 == b'=' {
if x2 == b'=' {
let last3 = buf.get_unchecked_mut(len - 3);
discard4(last3);
buf.get_unchecked_mut(..len - 2)
} else {
let last2 = buf.get_unchecked_mut(len - 2);
discard2(last2);
buf.get_unchecked_mut(..len - 1)
}
} else {
buf
}
}
1 => buf,
2 => {
let last1 = buf.get_unchecked_mut(len - 1);
discard4(last1);
buf
}
3 => {
let last1 = buf.get_unchecked_mut(len - 1);
discard2(last1);
buf
}
_ => core::hint::unreachable_unchecked(),
}
}
}