1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
use simd_abstraction::ascii as sa_ascii;

#[inline(always)]
fn find_non_ascii_whitespace(data: &[u8]) -> usize {
    sa_ascii::multiversion::find_non_ascii_whitespace::auto_indirect(data)
}

#[inline(always)]
fn remove_ascii_whitespace(data: &mut [u8]) -> &mut [u8] {
    let non_aw_pos = find_non_ascii_whitespace(data);
    if non_aw_pos >= data.len() {
        return data;
    }

    unsafe {
        let dirty_len = data.len() - non_aw_pos;
        let dirty_data = data.as_mut_ptr().add(non_aw_pos);

        let clean_len = sa_ascii::remove_ascii_whitespace_raw_fallback(dirty_data, dirty_len);

        data.get_unchecked_mut(..(non_aw_pos + clean_len))
    }
}

#[test]
fn test_remove_ascii_whitespace() {
    let cases = [
        "abcd",
        "ab\tcd",
        "ab\ncd",
        "ab\x0Ccd",
        "ab\rcd",
        "ab cd",
        "ab\t\n\x0C\r cd",
        "ab\t\n\x0C\r =\t\n\x0C\r =\t\n\x0C\r ",
    ];
    for case in cases {
        let mut buf = case.to_owned().into_bytes();
        let expected = {
            let mut v = buf.clone();
            v.retain(|c| !c.is_ascii_whitespace());
            v
        };
        let ans = remove_ascii_whitespace(&mut buf);
        assert_eq!(ans, &*expected, "case = {:?}", case);
    }
}

const fn discard_table(mask: u8) -> [u8; 256] {
    let charset = crate::STANDARD_CHARSET;
    let mut table = [0; 256];

    let mut i = 0;
    loop {
        table[i as usize] = i;
        if i == 255 {
            break;
        }
        i += 1;
    }

    let mut i = 0;
    while i < 64 {
        table[charset[i] as usize] = charset[i & mask as usize];
        i += 1;
    }
    table
}

#[inline(always)]
fn discard4(ch: &mut u8) {
    const TABLE: &[u8; 256] = &discard_table(0xf0);
    unsafe { *ch = *TABLE.get_unchecked(*ch as usize) }
}

#[inline(always)]
fn discard2(ch: &mut u8) {
    const TABLE: &[u8; 256] = &discard_table(0xfc);
    unsafe { *ch = *TABLE.get_unchecked(*ch as usize) }
}

pub fn normalize(buf: &mut [u8]) -> &mut [u8] {
    let buf = remove_ascii_whitespace(buf);

    if buf.is_empty() {
        return buf;
    }

    unsafe {
        let len = buf.len();
        match len % 4 {
            0 => {
                let x1 = *buf.get_unchecked(len - 1);
                let x2 = *buf.get_unchecked(len - 2);
                if x1 == b'=' {
                    if x2 == b'=' {
                        let last3 = buf.get_unchecked_mut(len - 3);
                        discard4(last3);
                        buf.get_unchecked_mut(..len - 2)
                    } else {
                        let last2 = buf.get_unchecked_mut(len - 2);
                        discard2(last2);
                        buf.get_unchecked_mut(..len - 1)
                    }
                } else {
                    buf
                }
            }
            1 => buf,
            2 => {
                let last1 = buf.get_unchecked_mut(len - 1);
                discard4(last1);
                buf
            }
            3 => {
                let last1 = buf.get_unchecked_mut(len - 1);
                discard2(last1);
                buf
            }
            _ => core::hint::unreachable_unchecked(),
        }
    }
}