1 //! Efficiently insert line endings.
2 //!
3 //! If you have a buffer full of data and want to insert any sort of regularly-spaced separator,
4 //! this will do it with a minimum of data copying. Commonly, this is to insert `\n` (see `lf()`) or `\r\n` (`crlf()`), but
5 //! any byte sequence can be used.
6 //!
7 //! 1. Pick a line ending. For single byte separators, see `ByteLineEnding`, or for two bytes, `TwoByteLineEnding`. For
8 //! arbitrary byte slices, use `SliceLineEnding`.
9 //! 2. Call `line_wrap`.
10 //! 3. Your data has been rearranged in place with the specified line ending inserted.
11 //!
12 //! # Examples
13 //!
14 //! ```
15 //! use line_wrap::*;
16 //! // suppose we have 80 bytes of data in a buffer and we want to wrap as per MIME.
17 //! // Buffer is large enough to hold line endings.
18 //! let mut data = vec![0; 82];
19 //!
20 //! assert_eq!(2, line_wrap(&mut data, 80, 76, &crlf()));
21 //!
22 //! // first line of zeroes
23 //! let mut expected_data = vec![0; 76];
24 //! // line ending
25 //! expected_data.extend_from_slice(b"\r\n");
26 //! // next line
27 //! expected_data.extend_from_slice(&[0, 0, 0, 0]);
28 //! assert_eq!(expected_data, data);
29 //! ```
30 //!
31 //! # Performance
32 //!
33 //! On an i7 6850k:
34 //!
35 //! - 10 byte input, 1 byte line length takes ~60ns (~160MiB/s)
36 //! - 100 byte input, 10 byte lines takes ~60ns (~1.6GiB/s)
37 //!     - Startup costs dominate at these small lengths
38 //! - 1,000 byte input, 100 byte lines takes ~65ns (~15GiB/s)
39 //! - 10,000 byte input, 100 byte lines takes ~550ns (~17GiB/s)
40 //! - In general, `SliceLineEncoding` is about 75% the speed of the fixed-length impls.
41 //!
42 //! Naturally, try `cargo +nightly bench` on your hardware to get more representative data.
43 extern crate safemem;
44 
45 /// Unix-style line ending.
lf() -> ByteLineEnding46 pub fn lf() -> ByteLineEnding { ByteLineEnding::new(b'\n') }
47 
48 /// Windows-style line ending.
crlf() -> TwoByteLineEnding49 pub fn crlf() -> TwoByteLineEnding { TwoByteLineEnding::new(b'\r', b'\n') }
50 
51 /// Writes line endings.
52 ///
53 /// The trait allows specialization for the common single and double byte cases, netting nice
54 /// throughput improvements over simply using a slice for everything.
55 pub trait LineEnding {
56     /// Write the line ending into the slice, which starts at the point where the ending should be written and is len() in length
write_ending(&self, slice: &mut [u8])57     fn write_ending(&self, slice: &mut [u8]);
58     /// The length of this particular line ending (must be constant and > 0)
len(&self) -> usize59     fn len(&self) -> usize;
60 }
61 
62 /// A single byte line ending.
63 ///
64 /// See `lf()`.
65 ///
66 /// # Examples
67 ///
68 /// ```
69 /// use line_wrap::*;
70 ///
71 /// let ending = ByteLineEnding::new(b'\n');
72 ///
73 /// let mut data = vec![1, 2, 3, 4, 5, 6, 255, 255];
74 ///
75 /// assert_eq!(2, line_wrap(&mut data[..], 6, 2, &ending));
76 ///
77 /// assert_eq!(vec![1, 2, b'\n', 3, 4, b'\n', 5, 6], data);
78 /// ```
79 pub struct ByteLineEnding {
80     byte: u8
81 }
82 
83 impl ByteLineEnding {
new(byte: u8) -> ByteLineEnding84     pub fn new(byte: u8) -> ByteLineEnding {
85         ByteLineEnding {
86             byte
87         }
88     }
89 }
90 
91 impl LineEnding for ByteLineEnding {
92     #[inline]
write_ending(&self, slice: &mut [u8])93     fn write_ending(&self, slice: &mut [u8]) {
94         slice[0] = self.byte;
95     }
96 
97     #[inline]
len(&self) -> usize98     fn len(&self) -> usize {
99         1
100     }
101 }
102 
103 /// A double byte line ending.
104 ///
105 /// See `crlf()`.
106 ///
107 /// # Examples
108 ///
109 /// ```
110 /// use line_wrap::*;
111 ///
112 /// let ending = TwoByteLineEnding::new(b'\r', b'\n');
113 ///
114 /// let mut data = vec![1, 2, 3, 4, 5, 6, 255, 255, 255, 255];
115 ///
116 /// assert_eq!(4, line_wrap(&mut data[..], 6, 2, &ending));
117 ///
118 /// assert_eq!(vec![1, 2, b'\r', b'\n', 3, 4, b'\r', b'\n', 5, 6], data);
119 /// ```
120 pub struct TwoByteLineEnding {
121     byte0: u8,
122     byte1: u8,
123 }
124 
125 impl TwoByteLineEnding {
new(byte0: u8, byte1: u8) -> TwoByteLineEnding126     pub fn new(byte0: u8, byte1: u8) -> TwoByteLineEnding {
127         TwoByteLineEnding {
128             byte0,
129             byte1,
130         }
131     }
132 }
133 
134 impl LineEnding for TwoByteLineEnding {
135     #[inline]
write_ending(&self, slice: &mut [u8])136     fn write_ending(&self, slice: &mut [u8]) {
137         slice[0] = self.byte0;
138         slice[1] = self.byte1;
139     }
140 
141     #[inline]
len(&self) -> usize142     fn len(&self) -> usize {
143         2
144     }
145 }
146 
147 /// A byte slice line ending.
148 ///
149 /// Gives up some throughput compared to the specialized single/double byte impls, but works with
150 /// any length.
151 ///
152 /// # Examples
153 ///
154 /// ```
155 /// use line_wrap::*;
156 ///
157 /// let ending = SliceLineEnding::new(b"xyz");
158 ///
159 /// let mut data = vec![1, 2, 3, 4, 5, 6, 255, 255, 255, 255, 255, 255];
160 ///
161 /// assert_eq!(6, line_wrap(&mut data[..], 6, 2, &ending));
162 ///
163 /// assert_eq!(vec![1, 2, b'x', b'y', b'z', 3, 4, b'x', b'y', b'z', 5, 6], data);
164 /// ```
165 pub struct SliceLineEnding<'a> {
166     slice: &'a [u8]
167 }
168 
169 impl<'a> SliceLineEnding<'a> {
new(slice: &[u8]) -> SliceLineEnding170     pub fn new(slice: &[u8]) -> SliceLineEnding {
171         SliceLineEnding {
172             slice
173         }
174     }
175 }
176 
177 impl<'a> LineEnding for SliceLineEnding<'a> {
178     #[inline]
write_ending(&self, slice: &mut [u8])179     fn write_ending(&self, slice: &mut [u8]) {
180         slice.copy_from_slice(self.slice);
181     }
182 
183     #[inline]
len(&self) -> usize184     fn len(&self) -> usize {
185         self.slice.len()
186     }
187 }
188 
189 /// Insert line endings into the input.
190 ///
191 /// Endings are inserted after each complete line, except the last line, even if the last line takes
192 /// up the full line width.
193 ///
194 /// - `buf` must be large enough to handle the increased size after endings are inserted. In other
195 /// words, `buf.len() >= input_len / line_len * line_ending.len()`.
196 /// - `input_len` is the length of the unwrapped  in `buf`.
197 /// - `line_len` is the desired line width without line ending characters.
198 ///
199 /// Returns the number of line ending bytes added.
200 ///
201 /// # Panics
202 ///
203 /// - When `line_ending.len() == 0`
204 /// - When `buf` is too small to contain the original input and its new line endings
line_wrap<L: LineEnding>( buf: &mut [u8], input_len: usize, line_len: usize, line_ending: &L, ) -> usize205 pub fn line_wrap<L: LineEnding>(
206     buf: &mut [u8],
207     input_len: usize,
208     line_len: usize,
209     line_ending: &L,
210 ) -> usize {
211     assert!(line_ending.len() > 0);
212 
213     if input_len <= line_len {
214         return 0;
215     }
216 
217     let line_ending_len = line_ending.len();
218     let line_wrap_params = line_wrap_parameters(input_len, line_len, line_ending_len);
219 
220     // ptr.offset() is undefined if it wraps, and there is no checked_offset(). However, because
221     // we perform this check up front to make sure we have enough capacity, we know that none of
222     // the subsequent pointer operations (assuming they implement the desired behavior of course!)
223     // will overflow.
224     assert!(
225         buf.len() >= line_wrap_params.total_len,
226         "Buffer must be able to hold encoded data after line wrapping"
227     );
228 
229     // Move the last line, either partial or full, by itself as it does not have a line ending
230     // afterwards
231     let last_line_start = input_len.checked_sub(line_wrap_params.last_line_len)
232         .expect("Last line start index underflow");
233     // last line starts immediately after all the wrapped full lines
234     let new_line_start = line_wrap_params.total_full_wrapped_lines_len;
235 
236     safemem::copy_over(
237         buf,
238         last_line_start,
239         new_line_start,
240         line_wrap_params.last_line_len,
241     );
242 
243     let mut total_line_ending_bytes = 0;
244 
245     // initialize so that the initial decrement will set them correctly
246     let mut old_line_start = last_line_start;
247     let mut new_line_start = line_wrap_params.total_full_wrapped_lines_len;
248 
249     // handle the full lines
250     for _ in 0..line_wrap_params.lines_with_endings {
251         // the index after the end of the line ending we're about to write is the start of the next
252         // line
253         let end_of_line_ending = new_line_start;
254         let start_of_line_ending = end_of_line_ending
255             .checked_sub(line_ending_len)
256             .expect("Line ending start index underflow");
257 
258         // doesn't underflow because it's decremented `line_wrap_params.lines_with_endings` times
259         old_line_start = old_line_start.checked_sub(line_len)
260             .expect("Old line start index underflow");
261         new_line_start = new_line_start.checked_sub(line_wrap_params.line_with_ending_len)
262             .expect("New line start index underflow");
263 
264         safemem::copy_over(buf, old_line_start, new_line_start, line_len);
265 
266         line_ending.write_ending(&mut buf[start_of_line_ending..(end_of_line_ending)]);
267         total_line_ending_bytes += line_ending_len;
268     }
269 
270     assert_eq!(line_wrap_params.total_line_endings_len, total_line_ending_bytes);
271 
272     total_line_ending_bytes
273 }
274 
275 #[derive(Debug, PartialEq)]
276 struct LineWrapParameters {
277     line_with_ending_len: usize,
278     // number of lines that need an ending
279     lines_with_endings: usize,
280     // length of last line (which never needs an ending)
281     last_line_len: usize,
282     // length of lines that need an ending (which are always full lines), with their endings
283     total_full_wrapped_lines_len: usize,
284     // length of all lines, including endings for the ones that need them
285     total_len: usize,
286     // length of the line endings only
287     total_line_endings_len: usize,
288 }
289 
290 /// Calculations about how many lines we'll get for a given line length, line ending, etc.
291 /// This assumes that the last line will not get an ending, even if it is the full line length.
292 // Inlining improves short input single-byte by 25%.
293 #[inline]
line_wrap_parameters( input_len: usize, line_len: usize, line_ending_len: usize, ) -> LineWrapParameters294 fn line_wrap_parameters(
295     input_len: usize,
296     line_len: usize,
297     line_ending_len: usize,
298 ) -> LineWrapParameters {
299     let line_with_ending_len = line_len
300         .checked_add(line_ending_len)
301         .expect("Line length with ending exceeds usize");
302 
303     if input_len <= line_len {
304         // no wrapping needed
305         return LineWrapParameters {
306             line_with_ending_len,
307             lines_with_endings: 0,
308             last_line_len: input_len,
309             total_full_wrapped_lines_len: 0,
310             total_len: input_len,
311             total_line_endings_len: 0,
312         };
313     };
314 
315     // lines_with_endings > 0, last_line_len > 0
316     let (lines_with_endings, last_line_len) = if input_len % line_len > 0 {
317         // Every full line has an ending since there is a partial line at the end
318         (input_len / line_len, input_len % line_len)
319     } else {
320         // Every line is a full line, but no trailing ending.
321         // Subtraction will not underflow since we know input_len > line_len.
322         (input_len / line_len - 1, line_len)
323     };
324 
325     // Should we expose exceeding usize via Result to be kind to 16-bit users? Or is that
326     // always going to be a panic anyway in practice?
327 
328     // length of just the full lines with line endings
329     let total_full_wrapped_lines_len = lines_with_endings
330         .checked_mul(line_with_ending_len)
331         .expect("Full lines with endings length exceeds usize");
332     // all lines with appropriate endings, including the last line
333     let total_len = total_full_wrapped_lines_len
334         .checked_add(last_line_len)
335         .expect("All lines with endings length exceeds usize");
336     let total_line_endings_len = lines_with_endings
337         .checked_mul(line_ending_len)
338         .expect("Total line endings length exceeds usize");
339 
340     LineWrapParameters {
341         line_with_ending_len,
342         lines_with_endings,
343         last_line_len,
344         total_full_wrapped_lines_len,
345         total_len,
346         total_line_endings_len,
347     }
348 }
349 
350 #[cfg(test)]
351 mod tests;