1 use std::cmp;
2 use std::io;
3 
4 use bstr::ByteSlice;
5 
6 /// The default buffer capacity that we use for the line buffer.
7 pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 64 * (1 << 10); // 64 KB
8 
9 /// The behavior of a searcher in the face of long lines and big contexts.
10 ///
11 /// When searching data incrementally using a fixed size buffer, this controls
12 /// the amount of *additional* memory to allocate beyond the size of the buffer
13 /// to accommodate lines (which may include the lines in a context window, when
14 /// enabled) that do not fit in the buffer.
15 ///
16 /// The default is to eagerly allocate without a limit.
17 #[derive(Clone, Copy, Debug)]
18 pub enum BufferAllocation {
19     /// Attempt to expand the size of the buffer until either at least the next
20     /// line fits into memory or until all available memory is exhausted.
21     ///
22     /// This is the default.
23     Eager,
24     /// Limit the amount of additional memory allocated to the given size. If
25     /// a line is found that requires more memory than is allowed here, then
26     /// stop reading and return an error.
27     Error(usize),
28 }
29 
30 impl Default for BufferAllocation {
default() -> BufferAllocation31     fn default() -> BufferAllocation {
32         BufferAllocation::Eager
33     }
34 }
35 
36 /// Create a new error to be used when a configured allocation limit has been
37 /// reached.
alloc_error(limit: usize) -> io::Error38 pub fn alloc_error(limit: usize) -> io::Error {
39     let msg = format!("configured allocation limit ({}) exceeded", limit);
40     io::Error::new(io::ErrorKind::Other, msg)
41 }
42 
43 /// The behavior of binary detection in the line buffer.
44 ///
45 /// Binary detection is the process of _heuristically_ identifying whether a
46 /// given chunk of data is binary or not, and then taking an action based on
47 /// the result of that heuristic. The motivation behind detecting binary data
48 /// is that binary data often indicates data that is undesirable to search
49 /// using textual patterns. Of course, there are many cases in which this isn't
50 /// true, which is why binary detection is disabled by default.
51 #[derive(Clone, Copy, Debug)]
52 pub enum BinaryDetection {
53     /// No binary detection is performed. Data reported by the line buffer may
54     /// contain arbitrary bytes.
55     None,
56     /// The given byte is searched in all contents read by the line buffer. If
57     /// it occurs, then the data is considered binary and the line buffer acts
58     /// as if it reached EOF. The line buffer guarantees that this byte will
59     /// never be observable by callers.
60     Quit(u8),
61     /// The given byte is searched in all contents read by the line buffer. If
62     /// it occurs, then it is replaced by the line terminator. The line buffer
63     /// guarantees that this byte will never be observable by callers.
64     Convert(u8),
65 }
66 
67 impl Default for BinaryDetection {
default() -> BinaryDetection68     fn default() -> BinaryDetection {
69         BinaryDetection::None
70     }
71 }
72 
73 impl BinaryDetection {
74     /// Returns true if and only if the detection heuristic demands that
75     /// the line buffer stop read data once binary data is observed.
is_quit(&self) -> bool76     fn is_quit(&self) -> bool {
77         match *self {
78             BinaryDetection::Quit(_) => true,
79             _ => false,
80         }
81     }
82 }
83 
84 /// The configuration of a buffer. This contains options that are fixed once
85 /// a buffer has been constructed.
86 #[derive(Clone, Copy, Debug)]
87 struct Config {
88     /// The number of bytes to attempt to read at a time.
89     capacity: usize,
90     /// The line terminator.
91     lineterm: u8,
92     /// The behavior for handling long lines.
93     buffer_alloc: BufferAllocation,
94     /// When set, the presence of the given byte indicates binary content.
95     binary: BinaryDetection,
96 }
97 
98 impl Default for Config {
default() -> Config99     fn default() -> Config {
100         Config {
101             capacity: DEFAULT_BUFFER_CAPACITY,
102             lineterm: b'\n',
103             buffer_alloc: BufferAllocation::default(),
104             binary: BinaryDetection::default(),
105         }
106     }
107 }
108 
109 /// A builder for constructing line buffers.
110 #[derive(Clone, Debug, Default)]
111 pub struct LineBufferBuilder {
112     config: Config,
113 }
114 
115 impl LineBufferBuilder {
116     /// Create a new builder for a buffer.
new() -> LineBufferBuilder117     pub fn new() -> LineBufferBuilder {
118         LineBufferBuilder { config: Config::default() }
119     }
120 
121     /// Create a new line buffer from this builder's configuration.
build(&self) -> LineBuffer122     pub fn build(&self) -> LineBuffer {
123         LineBuffer {
124             config: self.config,
125             buf: vec![0; self.config.capacity],
126             pos: 0,
127             last_lineterm: 0,
128             end: 0,
129             absolute_byte_offset: 0,
130             binary_byte_offset: None,
131         }
132     }
133 
134     /// Set the default capacity to use for a buffer.
135     ///
136     /// In general, the capacity of a buffer corresponds to the amount of data
137     /// to hold in memory, and the size of the reads to make to the underlying
138     /// reader.
139     ///
140     /// This is set to a reasonable default and probably shouldn't be changed
141     /// unless there's a specific reason to do so.
capacity(&mut self, capacity: usize) -> &mut LineBufferBuilder142     pub fn capacity(&mut self, capacity: usize) -> &mut LineBufferBuilder {
143         self.config.capacity = capacity;
144         self
145     }
146 
147     /// Set the line terminator for the buffer.
148     ///
149     /// Every buffer has a line terminator, and this line terminator is used
150     /// to determine how to roll the buffer forward. For example, when a read
151     /// to the buffer's underlying reader occurs, the end of the data that is
152     /// read is likely to correspond to an incomplete line. As a line buffer,
153     /// callers should not access this data since it is incomplete. The line
154     /// terminator is how the line buffer determines the part of the read that
155     /// is incomplete.
156     ///
157     /// By default, this is set to `b'\n'`.
line_terminator(&mut self, lineterm: u8) -> &mut LineBufferBuilder158     pub fn line_terminator(&mut self, lineterm: u8) -> &mut LineBufferBuilder {
159         self.config.lineterm = lineterm;
160         self
161     }
162 
163     /// Set the maximum amount of additional memory to allocate for long lines.
164     ///
165     /// In order to enable line oriented search, a fundamental requirement is
166     /// that, at a minimum, each line must be able to fit into memory. This
167     /// setting controls how big that line is allowed to be. By default, this
168     /// is set to `BufferAllocation::Eager`, which means a line buffer will
169     /// attempt to allocate as much memory as possible to fit a line, and will
170     /// only be limited by available memory.
171     ///
172     /// Note that this setting only applies to the amount of *additional*
173     /// memory to allocate, beyond the capacity of the buffer. That means that
174     /// a value of `0` is sensible, and in particular, will guarantee that a
175     /// line buffer will never allocate additional memory beyond its initial
176     /// capacity.
buffer_alloc( &mut self, behavior: BufferAllocation, ) -> &mut LineBufferBuilder177     pub fn buffer_alloc(
178         &mut self,
179         behavior: BufferAllocation,
180     ) -> &mut LineBufferBuilder {
181         self.config.buffer_alloc = behavior;
182         self
183     }
184 
185     /// Whether to enable binary detection or not. Depending on the setting,
186     /// this can either cause the line buffer to report EOF early or it can
187     /// cause the line buffer to clean the data.
188     ///
189     /// By default, this is disabled. In general, binary detection should be
190     /// viewed as an imperfect heuristic.
binary_detection( &mut self, detection: BinaryDetection, ) -> &mut LineBufferBuilder191     pub fn binary_detection(
192         &mut self,
193         detection: BinaryDetection,
194     ) -> &mut LineBufferBuilder {
195         self.config.binary = detection;
196         self
197     }
198 }
199 
200 /// A line buffer reader efficiently reads a line oriented buffer from an
201 /// arbitrary reader.
202 #[derive(Debug)]
203 pub struct LineBufferReader<'b, R> {
204     rdr: R,
205     line_buffer: &'b mut LineBuffer,
206 }
207 
208 impl<'b, R: io::Read> LineBufferReader<'b, R> {
209     /// Create a new buffered reader that reads from `rdr` and uses the given
210     /// `line_buffer` as an intermediate buffer.
211     ///
212     /// This does not change the binary detection behavior of the given line
213     /// buffer.
new( rdr: R, line_buffer: &'b mut LineBuffer, ) -> LineBufferReader<'b, R>214     pub fn new(
215         rdr: R,
216         line_buffer: &'b mut LineBuffer,
217     ) -> LineBufferReader<'b, R> {
218         line_buffer.clear();
219         LineBufferReader { rdr, line_buffer }
220     }
221 
222     /// The absolute byte offset which corresponds to the starting offsets
223     /// of the data returned by `buffer` relative to the beginning of the
224     /// underlying reader's contents. As such, this offset does not generally
225     /// correspond to an offset in memory. It is typically used for reporting
226     /// purposes. It can also be used for counting the number of bytes that
227     /// have been searched.
absolute_byte_offset(&self) -> u64228     pub fn absolute_byte_offset(&self) -> u64 {
229         self.line_buffer.absolute_byte_offset()
230     }
231 
232     /// If binary data was detected, then this returns the absolute byte offset
233     /// at which binary data was initially found.
binary_byte_offset(&self) -> Option<u64>234     pub fn binary_byte_offset(&self) -> Option<u64> {
235         self.line_buffer.binary_byte_offset()
236     }
237 
238     /// Fill the contents of this buffer by discarding the part of the buffer
239     /// that has been consumed. The free space created by discarding the
240     /// consumed part of the buffer is then filled with new data from the
241     /// reader.
242     ///
243     /// If EOF is reached, then `false` is returned. Otherwise, `true` is
244     /// returned. (Note that if this line buffer's binary detection is set to
245     /// `Quit`, then the presence of binary data will cause this buffer to
246     /// behave as if it had seen EOF at the first occurrence of binary data.)
247     ///
248     /// This forwards any errors returned by the underlying reader, and will
249     /// also return an error if the buffer must be expanded past its allocation
250     /// limit, as governed by the buffer allocation strategy.
fill(&mut self) -> Result<bool, io::Error>251     pub fn fill(&mut self) -> Result<bool, io::Error> {
252         self.line_buffer.fill(&mut self.rdr)
253     }
254 
255     /// Return the contents of this buffer.
buffer(&self) -> &[u8]256     pub fn buffer(&self) -> &[u8] {
257         self.line_buffer.buffer()
258     }
259 
260     /// Return the buffer as a BStr, used for convenient equality checking
261     /// in tests only.
262     #[cfg(test)]
bstr(&self) -> &::bstr::BStr263     fn bstr(&self) -> &::bstr::BStr {
264         self.buffer().as_bstr()
265     }
266 
267     /// Consume the number of bytes provided. This must be less than or equal
268     /// to the number of bytes returned by `buffer`.
consume(&mut self, amt: usize)269     pub fn consume(&mut self, amt: usize) {
270         self.line_buffer.consume(amt);
271     }
272 
273     /// Consumes the remainder of the buffer. Subsequent calls to `buffer` are
274     /// guaranteed to return an empty slice until the buffer is refilled.
275     ///
276     /// This is a convenience function for `consume(buffer.len())`.
277     #[cfg(test)]
consume_all(&mut self)278     fn consume_all(&mut self) {
279         self.line_buffer.consume_all();
280     }
281 }
282 
283 /// A line buffer manages a (typically fixed) buffer for holding lines.
284 ///
285 /// Callers should create line buffers sparingly and reuse them when possible.
286 /// Line buffers cannot be used directly, but instead must be used via the
287 /// LineBufferReader.
288 #[derive(Clone, Debug)]
289 pub struct LineBuffer {
290     /// The configuration of this buffer.
291     config: Config,
292     /// The primary buffer with which to hold data.
293     buf: Vec<u8>,
294     /// The current position of this buffer. This is always a valid sliceable
295     /// index into `buf`, and its maximum value is the length of `buf`.
296     pos: usize,
297     /// The end position of searchable content in this buffer. This is either
298     /// set to just after the final line terminator in the buffer, or to just
299     /// after the end of the last byte emitted by the reader when the reader
300     /// has been exhausted.
301     last_lineterm: usize,
302     /// The end position of the buffer. This is always greater than or equal to
303     /// last_lineterm. The bytes between last_lineterm and end, if any, always
304     /// correspond to a partial line.
305     end: usize,
306     /// The absolute byte offset corresponding to `pos`. This is most typically
307     /// not a valid index into addressable memory, but rather, an offset that
308     /// is relative to all data that passes through a line buffer (since
309     /// construction or since the last time `clear` was called).
310     ///
311     /// When the line buffer reaches EOF, this is set to the position just
312     /// after the last byte read from the underlying reader. That is, it
313     /// becomes the total count of bytes that have been read.
314     absolute_byte_offset: u64,
315     /// If binary data was found, this records the absolute byte offset at
316     /// which it was first detected.
317     binary_byte_offset: Option<u64>,
318 }
319 
320 impl LineBuffer {
321     /// Set the binary detection method used on this line buffer.
322     ///
323     /// This permits dynamically changing the binary detection strategy on
324     /// an existing line buffer without needing to create a new one.
set_binary_detection(&mut self, binary: BinaryDetection)325     pub fn set_binary_detection(&mut self, binary: BinaryDetection) {
326         self.config.binary = binary;
327     }
328 
329     /// Reset this buffer, such that it can be used with a new reader.
clear(&mut self)330     fn clear(&mut self) {
331         self.pos = 0;
332         self.last_lineterm = 0;
333         self.end = 0;
334         self.absolute_byte_offset = 0;
335         self.binary_byte_offset = None;
336     }
337 
338     /// The absolute byte offset which corresponds to the starting offsets
339     /// of the data returned by `buffer` relative to the beginning of the
340     /// reader's contents. As such, this offset does not generally correspond
341     /// to an offset in memory. It is typically used for reporting purposes,
342     /// particularly in error messages.
343     ///
344     /// This is reset to `0` when `clear` is called.
absolute_byte_offset(&self) -> u64345     fn absolute_byte_offset(&self) -> u64 {
346         self.absolute_byte_offset
347     }
348 
349     /// If binary data was detected, then this returns the absolute byte offset
350     /// at which binary data was initially found.
binary_byte_offset(&self) -> Option<u64>351     fn binary_byte_offset(&self) -> Option<u64> {
352         self.binary_byte_offset
353     }
354 
355     /// Return the contents of this buffer.
buffer(&self) -> &[u8]356     fn buffer(&self) -> &[u8] {
357         &self.buf[self.pos..self.last_lineterm]
358     }
359 
360     /// Return the contents of the free space beyond the end of the buffer as
361     /// a mutable slice.
free_buffer(&mut self) -> &mut [u8]362     fn free_buffer(&mut self) -> &mut [u8] {
363         &mut self.buf[self.end..]
364     }
365 
366     /// Consume the number of bytes provided. This must be less than or equal
367     /// to the number of bytes returned by `buffer`.
consume(&mut self, amt: usize)368     fn consume(&mut self, amt: usize) {
369         assert!(amt <= self.buffer().len());
370         self.pos += amt;
371         self.absolute_byte_offset += amt as u64;
372     }
373 
374     /// Consumes the remainder of the buffer. Subsequent calls to `buffer` are
375     /// guaranteed to return an empty slice until the buffer is refilled.
376     ///
377     /// This is a convenience function for `consume(buffer.len())`.
378     #[cfg(test)]
consume_all(&mut self)379     fn consume_all(&mut self) {
380         let amt = self.buffer().len();
381         self.consume(amt);
382     }
383 
384     /// Fill the contents of this buffer by discarding the part of the buffer
385     /// that has been consumed. The free space created by discarding the
386     /// consumed part of the buffer is then filled with new data from the given
387     /// reader.
388     ///
389     /// Callers should provide the same reader to this line buffer in
390     /// subsequent calls to fill. A different reader can only be used
391     /// immediately following a call to `clear`.
392     ///
393     /// If EOF is reached, then `false` is returned. Otherwise, `true` is
394     /// returned. (Note that if this line buffer's binary detection is set to
395     /// `Quit`, then the presence of binary data will cause this buffer to
396     /// behave as if it had seen EOF.)
397     ///
398     /// This forwards any errors returned by `rdr`, and will also return an
399     /// error if the buffer must be expanded past its allocation limit, as
400     /// governed by the buffer allocation strategy.
fill<R: io::Read>(&mut self, mut rdr: R) -> Result<bool, io::Error>401     fn fill<R: io::Read>(&mut self, mut rdr: R) -> Result<bool, io::Error> {
402         // If the binary detection heuristic tells us to quit once binary data
403         // has been observed, then we no longer read new data and reach EOF
404         // once the current buffer has been consumed.
405         if self.config.binary.is_quit() && self.binary_byte_offset.is_some() {
406             return Ok(!self.buffer().is_empty());
407         }
408 
409         self.roll();
410         assert_eq!(self.pos, 0);
411         loop {
412             self.ensure_capacity()?;
413             let readlen = rdr.read(self.free_buffer().as_bytes_mut())?;
414             if readlen == 0 {
415                 // We're only done reading for good once the caller has
416                 // consumed everything.
417                 self.last_lineterm = self.end;
418                 return Ok(!self.buffer().is_empty());
419             }
420 
421             // Get a mutable view into the bytes we've just read. These are
422             // the bytes that we do binary detection on, and also the bytes we
423             // search to find the last line terminator. We need a mutable slice
424             // in the case of binary conversion.
425             let oldend = self.end;
426             self.end += readlen;
427             let newbytes = &mut self.buf[oldend..self.end];
428 
429             // Binary detection.
430             match self.config.binary {
431                 BinaryDetection::None => {} // nothing to do
432                 BinaryDetection::Quit(byte) => {
433                     if let Some(i) = newbytes.find_byte(byte) {
434                         self.end = oldend + i;
435                         self.last_lineterm = self.end;
436                         self.binary_byte_offset =
437                             Some(self.absolute_byte_offset + self.end as u64);
438                         // If the first byte in our buffer is a binary byte,
439                         // then our buffer is empty and we should report as
440                         // such to the caller.
441                         return Ok(self.pos < self.end);
442                     }
443                 }
444                 BinaryDetection::Convert(byte) => {
445                     if let Some(i) =
446                         replace_bytes(newbytes, byte, self.config.lineterm)
447                     {
448                         // Record only the first binary offset.
449                         if self.binary_byte_offset.is_none() {
450                             self.binary_byte_offset = Some(
451                                 self.absolute_byte_offset
452                                     + (oldend + i) as u64,
453                             );
454                         }
455                     }
456                 }
457             }
458 
459             // Update our `last_lineterm` positions if we read one.
460             if let Some(i) = newbytes.rfind_byte(self.config.lineterm) {
461                 self.last_lineterm = oldend + i + 1;
462                 return Ok(true);
463             }
464             // At this point, if we couldn't find a line terminator, then we
465             // don't have a complete line. Therefore, we try to read more!
466         }
467     }
468 
469     /// Roll the unconsumed parts of the buffer to the front.
470     ///
471     /// This operation is idempotent.
472     ///
473     /// After rolling, `last_lineterm` and `end` point to the same location,
474     /// and `pos` is always set to `0`.
roll(&mut self)475     fn roll(&mut self) {
476         if self.pos == self.end {
477             self.pos = 0;
478             self.last_lineterm = 0;
479             self.end = 0;
480             return;
481         }
482 
483         let roll_len = self.end - self.pos;
484         self.buf.copy_within_str(self.pos..self.end, 0);
485         self.pos = 0;
486         self.last_lineterm = roll_len;
487         self.end = roll_len;
488     }
489 
490     /// Ensures that the internal buffer has a non-zero amount of free space
491     /// in which to read more data. If there is no free space, then more is
492     /// allocated. If the allocation must exceed the configured limit, then
493     /// this returns an error.
ensure_capacity(&mut self) -> Result<(), io::Error>494     fn ensure_capacity(&mut self) -> Result<(), io::Error> {
495         if !self.free_buffer().is_empty() {
496             return Ok(());
497         }
498         // `len` is used for computing the next allocation size. The capacity
499         // is permitted to start at `0`, so we make sure it's at least `1`.
500         let len = cmp::max(1, self.buf.len());
501         let additional = match self.config.buffer_alloc {
502             BufferAllocation::Eager => len * 2,
503             BufferAllocation::Error(limit) => {
504                 let used = self.buf.len() - self.config.capacity;
505                 let n = cmp::min(len * 2, limit - used);
506                 if n == 0 {
507                     return Err(alloc_error(self.config.capacity + limit));
508                 }
509                 n
510             }
511         };
512         assert!(additional > 0);
513         let newlen = self.buf.len() + additional;
514         self.buf.resize(newlen, 0);
515         assert!(!self.free_buffer().is_empty());
516         Ok(())
517     }
518 }
519 
520 /// Replaces `src` with `replacement` in bytes, and return the offset of the
521 /// first replacement, if one exists.
replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize>522 fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
523     if src == replacement {
524         return None;
525     }
526     let mut first_pos = None;
527     let mut pos = 0;
528     while let Some(i) = bytes[pos..].find_byte(src).map(|i| pos + i) {
529         if first_pos.is_none() {
530             first_pos = Some(i);
531         }
532         bytes[i] = replacement;
533         pos = i + 1;
534         while bytes.get(pos) == Some(&src) {
535             bytes[pos] = replacement;
536             pos += 1;
537         }
538     }
539     first_pos
540 }
541 
542 #[cfg(test)]
543 mod tests {
544     use super::*;
545     use bstr::{ByteSlice, ByteVec};
546     use std::str;
547 
548     const SHERLOCK: &'static str = "\
549 For the Doctor Watsons of this world, as opposed to the Sherlock
550 Holmeses, success in the province of detective work must always
551 be, to a very large extent, the result of luck. Sherlock Holmes
552 can extract a clew from a wisp of straw or a flake of cigar ash;
553 but Doctor Watson has to have it taken out for him and dusted,
554 and exhibited clearly, with a label attached.\
555 ";
556 
s(slice: &str) -> String557     fn s(slice: &str) -> String {
558         slice.to_string()
559     }
560 
replace_str( slice: &str, src: u8, replacement: u8, ) -> (String, Option<usize>)561     fn replace_str(
562         slice: &str,
563         src: u8,
564         replacement: u8,
565     ) -> (String, Option<usize>) {
566         let mut dst = Vec::from(slice);
567         let result = replace_bytes(&mut dst, src, replacement);
568         (dst.into_string().unwrap(), result)
569     }
570 
571     #[test]
replace()572     fn replace() {
573         assert_eq!(replace_str("abc", b'b', b'z'), (s("azc"), Some(1)));
574         assert_eq!(replace_str("abb", b'b', b'z'), (s("azz"), Some(1)));
575         assert_eq!(replace_str("aba", b'a', b'z'), (s("zbz"), Some(0)));
576         assert_eq!(replace_str("bbb", b'b', b'z'), (s("zzz"), Some(0)));
577         assert_eq!(replace_str("bac", b'b', b'z'), (s("zac"), Some(0)));
578     }
579 
580     #[test]
buffer_basics1()581     fn buffer_basics1() {
582         let bytes = "homer\nlisa\nmaggie";
583         let mut linebuf = LineBufferBuilder::new().build();
584         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
585 
586         assert!(rdr.buffer().is_empty());
587 
588         assert!(rdr.fill().unwrap());
589         assert_eq!(rdr.bstr(), "homer\nlisa\n");
590         assert_eq!(rdr.absolute_byte_offset(), 0);
591         rdr.consume(5);
592         assert_eq!(rdr.absolute_byte_offset(), 5);
593         rdr.consume_all();
594         assert_eq!(rdr.absolute_byte_offset(), 11);
595 
596         assert!(rdr.fill().unwrap());
597         assert_eq!(rdr.bstr(), "maggie");
598         rdr.consume_all();
599 
600         assert!(!rdr.fill().unwrap());
601         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
602         assert_eq!(rdr.binary_byte_offset(), None);
603     }
604 
605     #[test]
buffer_basics2()606     fn buffer_basics2() {
607         let bytes = "homer\nlisa\nmaggie\n";
608         let mut linebuf = LineBufferBuilder::new().build();
609         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
610 
611         assert!(rdr.fill().unwrap());
612         assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n");
613         rdr.consume_all();
614 
615         assert!(!rdr.fill().unwrap());
616         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
617         assert_eq!(rdr.binary_byte_offset(), None);
618     }
619 
620     #[test]
buffer_basics3()621     fn buffer_basics3() {
622         let bytes = "\n";
623         let mut linebuf = LineBufferBuilder::new().build();
624         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
625 
626         assert!(rdr.fill().unwrap());
627         assert_eq!(rdr.bstr(), "\n");
628         rdr.consume_all();
629 
630         assert!(!rdr.fill().unwrap());
631         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
632         assert_eq!(rdr.binary_byte_offset(), None);
633     }
634 
635     #[test]
buffer_basics4()636     fn buffer_basics4() {
637         let bytes = "\n\n";
638         let mut linebuf = LineBufferBuilder::new().build();
639         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
640 
641         assert!(rdr.fill().unwrap());
642         assert_eq!(rdr.bstr(), "\n\n");
643         rdr.consume_all();
644 
645         assert!(!rdr.fill().unwrap());
646         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
647         assert_eq!(rdr.binary_byte_offset(), None);
648     }
649 
650     #[test]
buffer_empty()651     fn buffer_empty() {
652         let bytes = "";
653         let mut linebuf = LineBufferBuilder::new().build();
654         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
655 
656         assert!(!rdr.fill().unwrap());
657         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
658         assert_eq!(rdr.binary_byte_offset(), None);
659     }
660 
661     #[test]
buffer_zero_capacity()662     fn buffer_zero_capacity() {
663         let bytes = "homer\nlisa\nmaggie";
664         let mut linebuf = LineBufferBuilder::new().capacity(0).build();
665         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
666 
667         while rdr.fill().unwrap() {
668             rdr.consume_all();
669         }
670         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
671         assert_eq!(rdr.binary_byte_offset(), None);
672     }
673 
674     #[test]
buffer_small_capacity()675     fn buffer_small_capacity() {
676         let bytes = "homer\nlisa\nmaggie";
677         let mut linebuf = LineBufferBuilder::new().capacity(1).build();
678         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
679 
680         let mut got = vec![];
681         while rdr.fill().unwrap() {
682             got.push_str(rdr.buffer());
683             rdr.consume_all();
684         }
685         assert_eq!(bytes, got.as_bstr());
686         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
687         assert_eq!(rdr.binary_byte_offset(), None);
688     }
689 
690     #[test]
buffer_limited_capacity1()691     fn buffer_limited_capacity1() {
692         let bytes = "homer\nlisa\nmaggie";
693         let mut linebuf = LineBufferBuilder::new()
694             .capacity(1)
695             .buffer_alloc(BufferAllocation::Error(5))
696             .build();
697         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
698 
699         assert!(rdr.fill().unwrap());
700         assert_eq!(rdr.bstr(), "homer\n");
701         rdr.consume_all();
702 
703         assert!(rdr.fill().unwrap());
704         assert_eq!(rdr.bstr(), "lisa\n");
705         rdr.consume_all();
706 
707         // This returns an error because while we have just enough room to
708         // store maggie in the buffer, we *don't* have enough room to read one
709         // more byte, so we don't know whether we're at EOF or not, and
710         // therefore must give up.
711         assert!(rdr.fill().is_err());
712 
713         // We can mush on though!
714         assert_eq!(rdr.bstr(), "m");
715         rdr.consume_all();
716 
717         assert!(rdr.fill().unwrap());
718         assert_eq!(rdr.bstr(), "aggie");
719         rdr.consume_all();
720 
721         assert!(!rdr.fill().unwrap());
722     }
723 
724     #[test]
buffer_limited_capacity2()725     fn buffer_limited_capacity2() {
726         let bytes = "homer\nlisa\nmaggie";
727         let mut linebuf = LineBufferBuilder::new()
728             .capacity(1)
729             .buffer_alloc(BufferAllocation::Error(6))
730             .build();
731         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
732 
733         assert!(rdr.fill().unwrap());
734         assert_eq!(rdr.bstr(), "homer\n");
735         rdr.consume_all();
736 
737         assert!(rdr.fill().unwrap());
738         assert_eq!(rdr.bstr(), "lisa\n");
739         rdr.consume_all();
740 
741         // We have just enough space.
742         assert!(rdr.fill().unwrap());
743         assert_eq!(rdr.bstr(), "maggie");
744         rdr.consume_all();
745 
746         assert!(!rdr.fill().unwrap());
747     }
748 
749     #[test]
buffer_limited_capacity3()750     fn buffer_limited_capacity3() {
751         let bytes = "homer\nlisa\nmaggie";
752         let mut linebuf = LineBufferBuilder::new()
753             .capacity(1)
754             .buffer_alloc(BufferAllocation::Error(0))
755             .build();
756         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
757 
758         assert!(rdr.fill().is_err());
759         assert_eq!(rdr.bstr(), "");
760     }
761 
762     #[test]
buffer_binary_none()763     fn buffer_binary_none() {
764         let bytes = "homer\nli\x00sa\nmaggie\n";
765         let mut linebuf = LineBufferBuilder::new().build();
766         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
767 
768         assert!(rdr.buffer().is_empty());
769 
770         assert!(rdr.fill().unwrap());
771         assert_eq!(rdr.bstr(), "homer\nli\x00sa\nmaggie\n");
772         rdr.consume_all();
773 
774         assert!(!rdr.fill().unwrap());
775         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
776         assert_eq!(rdr.binary_byte_offset(), None);
777     }
778 
779     #[test]
buffer_binary_quit1()780     fn buffer_binary_quit1() {
781         let bytes = "homer\nli\x00sa\nmaggie\n";
782         let mut linebuf = LineBufferBuilder::new()
783             .binary_detection(BinaryDetection::Quit(b'\x00'))
784             .build();
785         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
786 
787         assert!(rdr.buffer().is_empty());
788 
789         assert!(rdr.fill().unwrap());
790         assert_eq!(rdr.bstr(), "homer\nli");
791         rdr.consume_all();
792 
793         assert!(!rdr.fill().unwrap());
794         assert_eq!(rdr.absolute_byte_offset(), 8);
795         assert_eq!(rdr.binary_byte_offset(), Some(8));
796     }
797 
798     #[test]
buffer_binary_quit2()799     fn buffer_binary_quit2() {
800         let bytes = "\x00homer\nlisa\nmaggie\n";
801         let mut linebuf = LineBufferBuilder::new()
802             .binary_detection(BinaryDetection::Quit(b'\x00'))
803             .build();
804         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
805 
806         assert!(!rdr.fill().unwrap());
807         assert_eq!(rdr.bstr(), "");
808         assert_eq!(rdr.absolute_byte_offset(), 0);
809         assert_eq!(rdr.binary_byte_offset(), Some(0));
810     }
811 
812     #[test]
buffer_binary_quit3()813     fn buffer_binary_quit3() {
814         let bytes = "homer\nlisa\nmaggie\n\x00";
815         let mut linebuf = LineBufferBuilder::new()
816             .binary_detection(BinaryDetection::Quit(b'\x00'))
817             .build();
818         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
819 
820         assert!(rdr.buffer().is_empty());
821 
822         assert!(rdr.fill().unwrap());
823         assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n");
824         rdr.consume_all();
825 
826         assert!(!rdr.fill().unwrap());
827         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64 - 1);
828         assert_eq!(rdr.binary_byte_offset(), Some(bytes.len() as u64 - 1));
829     }
830 
831     #[test]
buffer_binary_quit4()832     fn buffer_binary_quit4() {
833         let bytes = "homer\nlisa\nmaggie\x00\n";
834         let mut linebuf = LineBufferBuilder::new()
835             .binary_detection(BinaryDetection::Quit(b'\x00'))
836             .build();
837         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
838 
839         assert!(rdr.buffer().is_empty());
840 
841         assert!(rdr.fill().unwrap());
842         assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie");
843         rdr.consume_all();
844 
845         assert!(!rdr.fill().unwrap());
846         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64 - 2);
847         assert_eq!(rdr.binary_byte_offset(), Some(bytes.len() as u64 - 2));
848     }
849 
850     #[test]
buffer_binary_quit5()851     fn buffer_binary_quit5() {
852         let mut linebuf = LineBufferBuilder::new()
853             .binary_detection(BinaryDetection::Quit(b'u'))
854             .build();
855         let mut rdr = LineBufferReader::new(SHERLOCK.as_bytes(), &mut linebuf);
856 
857         assert!(rdr.buffer().is_empty());
858 
859         assert!(rdr.fill().unwrap());
860         assert_eq!(
861             rdr.bstr(),
862             "\
863 For the Doctor Watsons of this world, as opposed to the Sherlock
864 Holmeses, s\
865 "
866         );
867         rdr.consume_all();
868 
869         assert!(!rdr.fill().unwrap());
870         assert_eq!(rdr.absolute_byte_offset(), 76);
871         assert_eq!(rdr.binary_byte_offset(), Some(76));
872         assert_eq!(SHERLOCK.as_bytes()[76], b'u');
873     }
874 
875     #[test]
buffer_binary_convert1()876     fn buffer_binary_convert1() {
877         let bytes = "homer\nli\x00sa\nmaggie\n";
878         let mut linebuf = LineBufferBuilder::new()
879             .binary_detection(BinaryDetection::Convert(b'\x00'))
880             .build();
881         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
882 
883         assert!(rdr.buffer().is_empty());
884 
885         assert!(rdr.fill().unwrap());
886         assert_eq!(rdr.bstr(), "homer\nli\nsa\nmaggie\n");
887         rdr.consume_all();
888 
889         assert!(!rdr.fill().unwrap());
890         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
891         assert_eq!(rdr.binary_byte_offset(), Some(8));
892     }
893 
894     #[test]
buffer_binary_convert2()895     fn buffer_binary_convert2() {
896         let bytes = "\x00homer\nlisa\nmaggie\n";
897         let mut linebuf = LineBufferBuilder::new()
898             .binary_detection(BinaryDetection::Convert(b'\x00'))
899             .build();
900         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
901 
902         assert!(rdr.buffer().is_empty());
903 
904         assert!(rdr.fill().unwrap());
905         assert_eq!(rdr.bstr(), "\nhomer\nlisa\nmaggie\n");
906         rdr.consume_all();
907 
908         assert!(!rdr.fill().unwrap());
909         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
910         assert_eq!(rdr.binary_byte_offset(), Some(0));
911     }
912 
913     #[test]
buffer_binary_convert3()914     fn buffer_binary_convert3() {
915         let bytes = "homer\nlisa\nmaggie\n\x00";
916         let mut linebuf = LineBufferBuilder::new()
917             .binary_detection(BinaryDetection::Convert(b'\x00'))
918             .build();
919         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
920 
921         assert!(rdr.buffer().is_empty());
922 
923         assert!(rdr.fill().unwrap());
924         assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n\n");
925         rdr.consume_all();
926 
927         assert!(!rdr.fill().unwrap());
928         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
929         assert_eq!(rdr.binary_byte_offset(), Some(bytes.len() as u64 - 1));
930     }
931 
932     #[test]
buffer_binary_convert4()933     fn buffer_binary_convert4() {
934         let bytes = "homer\nlisa\nmaggie\x00\n";
935         let mut linebuf = LineBufferBuilder::new()
936             .binary_detection(BinaryDetection::Convert(b'\x00'))
937             .build();
938         let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
939 
940         assert!(rdr.buffer().is_empty());
941 
942         assert!(rdr.fill().unwrap());
943         assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n\n");
944         rdr.consume_all();
945 
946         assert!(!rdr.fill().unwrap());
947         assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
948         assert_eq!(rdr.binary_byte_offset(), Some(bytes.len() as u64 - 2));
949     }
950 }
951