1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351
//! Efficiently insert line endings.
//!
//! If you have a buffer full of data and want to insert any sort of regularly-spaced separator,
//! this will do it with a minimum of data copying. Commonly, this is to insert `\n` (see `lf()`) or `\r\n` (`crlf()`), but
//! any byte sequence can be used.
//!
//! 1. Pick a line ending. For single byte separators, see `ByteLineEnding`, or for two bytes, `TwoByteLineEnding`. For
//! arbitrary byte slices, use `SliceLineEnding`.
//! 2. Call `line_wrap`.
//! 3. Your data has been rearranged in place with the specified line ending inserted.
//!
//! # Examples
//!
//! ```
//! use line_wrap::*;
//! // suppose we have 80 bytes of data in a buffer and we want to wrap as per MIME.
//! // Buffer is large enough to hold line endings.
//! let mut data = vec![0; 82];
//!
//! assert_eq!(2, line_wrap(&mut data, 80, 76, &crlf()));
//!
//! // first line of zeroes
//! let mut expected_data = vec![0; 76];
//! // line ending
//! expected_data.extend_from_slice(b"\r\n");
//! // next line
//! expected_data.extend_from_slice(&[0, 0, 0, 0]);
//! assert_eq!(expected_data, data);
//! ```
//!
//! # Performance
//!
//! On an i7 6850k:
//!
//! - 10 byte input, 1 byte line length takes ~60ns (~160MiB/s)
//! - 100 byte input, 10 byte lines takes ~60ns (~1.6GiB/s)
//! - Startup costs dominate at these small lengths
//! - 1,000 byte input, 100 byte lines takes ~65ns (~15GiB/s)
//! - 10,000 byte input, 100 byte lines takes ~550ns (~17GiB/s)
//! - In general, `SliceLineEncoding` is about 75% the speed of the fixed-length impls.
//!
//! Naturally, try `cargo +nightly bench` on your hardware to get more representative data.
extern crate safemem;
/// Unix-style line ending.
pub fn lf() -> ByteLineEnding { ByteLineEnding::new(b'\n') }
/// Windows-style line ending.
pub fn crlf() -> TwoByteLineEnding { TwoByteLineEnding::new(b'\r', b'\n') }
/// Writes line endings.
///
/// The trait allows specialization for the common single and double byte cases, netting nice
/// throughput improvements over simply using a slice for everything.
pub trait LineEnding {
/// Write the line ending into the slice, which starts at the point where the ending should be written and is len() in length
fn write_ending(&self, slice: &mut [u8]);
/// The length of this particular line ending (must be constant and > 0)
fn len(&self) -> usize;
}
/// A single byte line ending.
///
/// See `lf()`.
///
/// # Examples
///
/// ```
/// use line_wrap::*;
///
/// let ending = ByteLineEnding::new(b'\n');
///
/// let mut data = vec![1, 2, 3, 4, 5, 6, 255, 255];
///
/// assert_eq!(2, line_wrap(&mut data[..], 6, 2, &ending));
///
/// assert_eq!(vec![1, 2, b'\n', 3, 4, b'\n', 5, 6], data);
/// ```
pub struct ByteLineEnding {
byte: u8
}
impl ByteLineEnding {
pub fn new(byte: u8) -> ByteLineEnding {
ByteLineEnding {
byte
}
}
}
impl LineEnding for ByteLineEnding {
#[inline]
fn write_ending(&self, slice: &mut [u8]) {
slice[0] = self.byte;
}
#[inline]
fn len(&self) -> usize {
1
}
}
/// A double byte line ending.
///
/// See `crlf()`.
///
/// # Examples
///
/// ```
/// use line_wrap::*;
///
/// let ending = TwoByteLineEnding::new(b'\r', b'\n');
///
/// let mut data = vec![1, 2, 3, 4, 5, 6, 255, 255, 255, 255];
///
/// assert_eq!(4, line_wrap(&mut data[..], 6, 2, &ending));
///
/// assert_eq!(vec![1, 2, b'\r', b'\n', 3, 4, b'\r', b'\n', 5, 6], data);
/// ```
pub struct TwoByteLineEnding {
byte0: u8,
byte1: u8,
}
impl TwoByteLineEnding {
pub fn new(byte0: u8, byte1: u8) -> TwoByteLineEnding {
TwoByteLineEnding {
byte0,
byte1,
}
}
}
impl LineEnding for TwoByteLineEnding {
#[inline]
fn write_ending(&self, slice: &mut [u8]) {
slice[0] = self.byte0;
slice[1] = self.byte1;
}
#[inline]
fn len(&self) -> usize {
2
}
}
/// A byte slice line ending.
///
/// Gives up some throughput compared to the specialized single/double byte impls, but works with
/// any length.
///
/// # Examples
///
/// ```
/// use line_wrap::*;
///
/// let ending = SliceLineEnding::new(b"xyz");
///
/// let mut data = vec![1, 2, 3, 4, 5, 6, 255, 255, 255, 255, 255, 255];
///
/// assert_eq!(6, line_wrap(&mut data[..], 6, 2, &ending));
///
/// assert_eq!(vec![1, 2, b'x', b'y', b'z', 3, 4, b'x', b'y', b'z', 5, 6], data);
/// ```
pub struct SliceLineEnding<'a> {
slice: &'a [u8]
}
impl<'a> SliceLineEnding<'a> {
pub fn new(slice: &[u8]) -> SliceLineEnding {
SliceLineEnding {
slice
}
}
}
impl<'a> LineEnding for SliceLineEnding<'a> {
#[inline]
fn write_ending(&self, slice: &mut [u8]) {
slice.copy_from_slice(self.slice);
}
#[inline]
fn len(&self) -> usize {
self.slice.len()
}
}
/// Insert line endings into the input.
///
/// Endings are inserted after each complete line, except the last line, even if the last line takes
/// up the full line width.
///
/// - `buf` must be large enough to handle the increased size after endings are inserted. In other
/// words, `buf.len() >= input_len / line_len * line_ending.len()`.
/// - `input_len` is the length of the unwrapped in `buf`.
/// - `line_len` is the desired line width without line ending characters.
///
/// Returns the number of line ending bytes added.
///
/// # Panics
///
/// - When `line_ending.len() == 0`
/// - When `buf` is too small to contain the original input and its new line endings
pub fn line_wrap<L: LineEnding>(
buf: &mut [u8],
input_len: usize,
line_len: usize,
line_ending: &L,
) -> usize {
assert!(line_ending.len() > 0);
if input_len <= line_len {
return 0;
}
let line_ending_len = line_ending.len();
let line_wrap_params = line_wrap_parameters(input_len, line_len, line_ending_len);
// ptr.offset() is undefined if it wraps, and there is no checked_offset(). However, because
// we perform this check up front to make sure we have enough capacity, we know that none of
// the subsequent pointer operations (assuming they implement the desired behavior of course!)
// will overflow.
assert!(
buf.len() >= line_wrap_params.total_len,
"Buffer must be able to hold encoded data after line wrapping"
);
// Move the last line, either partial or full, by itself as it does not have a line ending
// afterwards
let last_line_start = input_len.checked_sub(line_wrap_params.last_line_len)
.expect("Last line start index underflow");
// last line starts immediately after all the wrapped full lines
let new_line_start = line_wrap_params.total_full_wrapped_lines_len;
safemem::copy_over(
buf,
last_line_start,
new_line_start,
line_wrap_params.last_line_len,
);
let mut total_line_ending_bytes = 0;
// initialize so that the initial decrement will set them correctly
let mut old_line_start = last_line_start;
let mut new_line_start = line_wrap_params.total_full_wrapped_lines_len;
// handle the full lines
for _ in 0..line_wrap_params.lines_with_endings {
// the index after the end of the line ending we're about to write is the start of the next
// line
let end_of_line_ending = new_line_start;
let start_of_line_ending = end_of_line_ending
.checked_sub(line_ending_len)
.expect("Line ending start index underflow");
// doesn't underflow because it's decremented `line_wrap_params.lines_with_endings` times
old_line_start = old_line_start.checked_sub(line_len)
.expect("Old line start index underflow");
new_line_start = new_line_start.checked_sub(line_wrap_params.line_with_ending_len)
.expect("New line start index underflow");
safemem::copy_over(buf, old_line_start, new_line_start, line_len);
line_ending.write_ending(&mut buf[start_of_line_ending..(end_of_line_ending)]);
total_line_ending_bytes += line_ending_len;
}
assert_eq!(line_wrap_params.total_line_endings_len, total_line_ending_bytes);
total_line_ending_bytes
}
#[derive(Debug, PartialEq)]
struct LineWrapParameters {
line_with_ending_len: usize,
// number of lines that need an ending
lines_with_endings: usize,
// length of last line (which never needs an ending)
last_line_len: usize,
// length of lines that need an ending (which are always full lines), with their endings
total_full_wrapped_lines_len: usize,
// length of all lines, including endings for the ones that need them
total_len: usize,
// length of the line endings only
total_line_endings_len: usize,
}
/// Calculations about how many lines we'll get for a given line length, line ending, etc.
/// This assumes that the last line will not get an ending, even if it is the full line length.
// Inlining improves short input single-byte by 25%.
#[inline]
fn line_wrap_parameters(
input_len: usize,
line_len: usize,
line_ending_len: usize,
) -> LineWrapParameters {
let line_with_ending_len = line_len
.checked_add(line_ending_len)
.expect("Line length with ending exceeds usize");
if input_len <= line_len {
// no wrapping needed
return LineWrapParameters {
line_with_ending_len,
lines_with_endings: 0,
last_line_len: input_len,
total_full_wrapped_lines_len: 0,
total_len: input_len,
total_line_endings_len: 0,
};
};
// lines_with_endings > 0, last_line_len > 0
let (lines_with_endings, last_line_len) = if input_len % line_len > 0 {
// Every full line has an ending since there is a partial line at the end
(input_len / line_len, input_len % line_len)
} else {
// Every line is a full line, but no trailing ending.
// Subtraction will not underflow since we know input_len > line_len.
(input_len / line_len - 1, line_len)
};
// Should we expose exceeding usize via Result to be kind to 16-bit users? Or is that
// always going to be a panic anyway in practice?
// length of just the full lines with line endings
let total_full_wrapped_lines_len = lines_with_endings
.checked_mul(line_with_ending_len)
.expect("Full lines with endings length exceeds usize");
// all lines with appropriate endings, including the last line
let total_len = total_full_wrapped_lines_len
.checked_add(last_line_len)
.expect("All lines with endings length exceeds usize");
let total_line_endings_len = lines_with_endings
.checked_mul(line_ending_len)
.expect("Total line endings length exceeds usize");
LineWrapParameters {
line_with_ending_len,
lines_with_endings,
last_line_len,
total_full_wrapped_lines_len,
total_len,
total_line_endings_len,
}
}
#[cfg(test)]
mod tests;