1 use std::cmp;
2 use std::io;
3
4 use bstr::ByteSlice;
5
6 /// The default buffer capacity that we use for the line buffer.
7 pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 64 * (1 << 10); // 64 KB
8
9 /// The behavior of a searcher in the face of long lines and big contexts.
10 ///
11 /// When searching data incrementally using a fixed size buffer, this controls
12 /// the amount of *additional* memory to allocate beyond the size of the buffer
13 /// to accommodate lines (which may include the lines in a context window, when
14 /// enabled) that do not fit in the buffer.
15 ///
16 /// The default is to eagerly allocate without a limit.
17 #[derive(Clone, Copy, Debug)]
18 pub enum BufferAllocation {
19 /// Attempt to expand the size of the buffer until either at least the next
20 /// line fits into memory or until all available memory is exhausted.
21 ///
22 /// This is the default.
23 Eager,
24 /// Limit the amount of additional memory allocated to the given size. If
25 /// a line is found that requires more memory than is allowed here, then
26 /// stop reading and return an error.
27 Error(usize),
28 }
29
30 impl Default for BufferAllocation {
default() -> BufferAllocation31 fn default() -> BufferAllocation {
32 BufferAllocation::Eager
33 }
34 }
35
36 /// Create a new error to be used when a configured allocation limit has been
37 /// reached.
alloc_error(limit: usize) -> io::Error38 pub fn alloc_error(limit: usize) -> io::Error {
39 let msg = format!("configured allocation limit ({}) exceeded", limit);
40 io::Error::new(io::ErrorKind::Other, msg)
41 }
42
43 /// The behavior of binary detection in the line buffer.
44 ///
45 /// Binary detection is the process of _heuristically_ identifying whether a
46 /// given chunk of data is binary or not, and then taking an action based on
47 /// the result of that heuristic. The motivation behind detecting binary data
48 /// is that binary data often indicates data that is undesirable to search
49 /// using textual patterns. Of course, there are many cases in which this isn't
50 /// true, which is why binary detection is disabled by default.
51 #[derive(Clone, Copy, Debug)]
52 pub enum BinaryDetection {
53 /// No binary detection is performed. Data reported by the line buffer may
54 /// contain arbitrary bytes.
55 None,
56 /// The given byte is searched in all contents read by the line buffer. If
57 /// it occurs, then the data is considered binary and the line buffer acts
58 /// as if it reached EOF. The line buffer guarantees that this byte will
59 /// never be observable by callers.
60 Quit(u8),
61 /// The given byte is searched in all contents read by the line buffer. If
62 /// it occurs, then it is replaced by the line terminator. The line buffer
63 /// guarantees that this byte will never be observable by callers.
64 Convert(u8),
65 }
66
67 impl Default for BinaryDetection {
default() -> BinaryDetection68 fn default() -> BinaryDetection {
69 BinaryDetection::None
70 }
71 }
72
73 impl BinaryDetection {
74 /// Returns true if and only if the detection heuristic demands that
75 /// the line buffer stop read data once binary data is observed.
is_quit(&self) -> bool76 fn is_quit(&self) -> bool {
77 match *self {
78 BinaryDetection::Quit(_) => true,
79 _ => false,
80 }
81 }
82 }
83
84 /// The configuration of a buffer. This contains options that are fixed once
85 /// a buffer has been constructed.
86 #[derive(Clone, Copy, Debug)]
87 struct Config {
88 /// The number of bytes to attempt to read at a time.
89 capacity: usize,
90 /// The line terminator.
91 lineterm: u8,
92 /// The behavior for handling long lines.
93 buffer_alloc: BufferAllocation,
94 /// When set, the presence of the given byte indicates binary content.
95 binary: BinaryDetection,
96 }
97
98 impl Default for Config {
default() -> Config99 fn default() -> Config {
100 Config {
101 capacity: DEFAULT_BUFFER_CAPACITY,
102 lineterm: b'\n',
103 buffer_alloc: BufferAllocation::default(),
104 binary: BinaryDetection::default(),
105 }
106 }
107 }
108
109 /// A builder for constructing line buffers.
110 #[derive(Clone, Debug, Default)]
111 pub struct LineBufferBuilder {
112 config: Config,
113 }
114
115 impl LineBufferBuilder {
116 /// Create a new builder for a buffer.
new() -> LineBufferBuilder117 pub fn new() -> LineBufferBuilder {
118 LineBufferBuilder { config: Config::default() }
119 }
120
121 /// Create a new line buffer from this builder's configuration.
build(&self) -> LineBuffer122 pub fn build(&self) -> LineBuffer {
123 LineBuffer {
124 config: self.config,
125 buf: vec![0; self.config.capacity],
126 pos: 0,
127 last_lineterm: 0,
128 end: 0,
129 absolute_byte_offset: 0,
130 binary_byte_offset: None,
131 }
132 }
133
134 /// Set the default capacity to use for a buffer.
135 ///
136 /// In general, the capacity of a buffer corresponds to the amount of data
137 /// to hold in memory, and the size of the reads to make to the underlying
138 /// reader.
139 ///
140 /// This is set to a reasonable default and probably shouldn't be changed
141 /// unless there's a specific reason to do so.
capacity(&mut self, capacity: usize) -> &mut LineBufferBuilder142 pub fn capacity(&mut self, capacity: usize) -> &mut LineBufferBuilder {
143 self.config.capacity = capacity;
144 self
145 }
146
147 /// Set the line terminator for the buffer.
148 ///
149 /// Every buffer has a line terminator, and this line terminator is used
150 /// to determine how to roll the buffer forward. For example, when a read
151 /// to the buffer's underlying reader occurs, the end of the data that is
152 /// read is likely to correspond to an incomplete line. As a line buffer,
153 /// callers should not access this data since it is incomplete. The line
154 /// terminator is how the line buffer determines the part of the read that
155 /// is incomplete.
156 ///
157 /// By default, this is set to `b'\n'`.
line_terminator(&mut self, lineterm: u8) -> &mut LineBufferBuilder158 pub fn line_terminator(&mut self, lineterm: u8) -> &mut LineBufferBuilder {
159 self.config.lineterm = lineterm;
160 self
161 }
162
163 /// Set the maximum amount of additional memory to allocate for long lines.
164 ///
165 /// In order to enable line oriented search, a fundamental requirement is
166 /// that, at a minimum, each line must be able to fit into memory. This
167 /// setting controls how big that line is allowed to be. By default, this
168 /// is set to `BufferAllocation::Eager`, which means a line buffer will
169 /// attempt to allocate as much memory as possible to fit a line, and will
170 /// only be limited by available memory.
171 ///
172 /// Note that this setting only applies to the amount of *additional*
173 /// memory to allocate, beyond the capacity of the buffer. That means that
174 /// a value of `0` is sensible, and in particular, will guarantee that a
175 /// line buffer will never allocate additional memory beyond its initial
176 /// capacity.
buffer_alloc( &mut self, behavior: BufferAllocation, ) -> &mut LineBufferBuilder177 pub fn buffer_alloc(
178 &mut self,
179 behavior: BufferAllocation,
180 ) -> &mut LineBufferBuilder {
181 self.config.buffer_alloc = behavior;
182 self
183 }
184
185 /// Whether to enable binary detection or not. Depending on the setting,
186 /// this can either cause the line buffer to report EOF early or it can
187 /// cause the line buffer to clean the data.
188 ///
189 /// By default, this is disabled. In general, binary detection should be
190 /// viewed as an imperfect heuristic.
binary_detection( &mut self, detection: BinaryDetection, ) -> &mut LineBufferBuilder191 pub fn binary_detection(
192 &mut self,
193 detection: BinaryDetection,
194 ) -> &mut LineBufferBuilder {
195 self.config.binary = detection;
196 self
197 }
198 }
199
200 /// A line buffer reader efficiently reads a line oriented buffer from an
201 /// arbitrary reader.
202 #[derive(Debug)]
203 pub struct LineBufferReader<'b, R> {
204 rdr: R,
205 line_buffer: &'b mut LineBuffer,
206 }
207
208 impl<'b, R: io::Read> LineBufferReader<'b, R> {
209 /// Create a new buffered reader that reads from `rdr` and uses the given
210 /// `line_buffer` as an intermediate buffer.
211 ///
212 /// This does not change the binary detection behavior of the given line
213 /// buffer.
new( rdr: R, line_buffer: &'b mut LineBuffer, ) -> LineBufferReader<'b, R>214 pub fn new(
215 rdr: R,
216 line_buffer: &'b mut LineBuffer,
217 ) -> LineBufferReader<'b, R> {
218 line_buffer.clear();
219 LineBufferReader { rdr, line_buffer }
220 }
221
222 /// The absolute byte offset which corresponds to the starting offsets
223 /// of the data returned by `buffer` relative to the beginning of the
224 /// underlying reader's contents. As such, this offset does not generally
225 /// correspond to an offset in memory. It is typically used for reporting
226 /// purposes. It can also be used for counting the number of bytes that
227 /// have been searched.
absolute_byte_offset(&self) -> u64228 pub fn absolute_byte_offset(&self) -> u64 {
229 self.line_buffer.absolute_byte_offset()
230 }
231
232 /// If binary data was detected, then this returns the absolute byte offset
233 /// at which binary data was initially found.
binary_byte_offset(&self) -> Option<u64>234 pub fn binary_byte_offset(&self) -> Option<u64> {
235 self.line_buffer.binary_byte_offset()
236 }
237
238 /// Fill the contents of this buffer by discarding the part of the buffer
239 /// that has been consumed. The free space created by discarding the
240 /// consumed part of the buffer is then filled with new data from the
241 /// reader.
242 ///
243 /// If EOF is reached, then `false` is returned. Otherwise, `true` is
244 /// returned. (Note that if this line buffer's binary detection is set to
245 /// `Quit`, then the presence of binary data will cause this buffer to
246 /// behave as if it had seen EOF at the first occurrence of binary data.)
247 ///
248 /// This forwards any errors returned by the underlying reader, and will
249 /// also return an error if the buffer must be expanded past its allocation
250 /// limit, as governed by the buffer allocation strategy.
fill(&mut self) -> Result<bool, io::Error>251 pub fn fill(&mut self) -> Result<bool, io::Error> {
252 self.line_buffer.fill(&mut self.rdr)
253 }
254
255 /// Return the contents of this buffer.
buffer(&self) -> &[u8]256 pub fn buffer(&self) -> &[u8] {
257 self.line_buffer.buffer()
258 }
259
260 /// Return the buffer as a BStr, used for convenient equality checking
261 /// in tests only.
262 #[cfg(test)]
bstr(&self) -> &::bstr::BStr263 fn bstr(&self) -> &::bstr::BStr {
264 self.buffer().as_bstr()
265 }
266
267 /// Consume the number of bytes provided. This must be less than or equal
268 /// to the number of bytes returned by `buffer`.
consume(&mut self, amt: usize)269 pub fn consume(&mut self, amt: usize) {
270 self.line_buffer.consume(amt);
271 }
272
273 /// Consumes the remainder of the buffer. Subsequent calls to `buffer` are
274 /// guaranteed to return an empty slice until the buffer is refilled.
275 ///
276 /// This is a convenience function for `consume(buffer.len())`.
277 #[cfg(test)]
consume_all(&mut self)278 fn consume_all(&mut self) {
279 self.line_buffer.consume_all();
280 }
281 }
282
283 /// A line buffer manages a (typically fixed) buffer for holding lines.
284 ///
285 /// Callers should create line buffers sparingly and reuse them when possible.
286 /// Line buffers cannot be used directly, but instead must be used via the
287 /// LineBufferReader.
288 #[derive(Clone, Debug)]
289 pub struct LineBuffer {
290 /// The configuration of this buffer.
291 config: Config,
292 /// The primary buffer with which to hold data.
293 buf: Vec<u8>,
294 /// The current position of this buffer. This is always a valid sliceable
295 /// index into `buf`, and its maximum value is the length of `buf`.
296 pos: usize,
297 /// The end position of searchable content in this buffer. This is either
298 /// set to just after the final line terminator in the buffer, or to just
299 /// after the end of the last byte emitted by the reader when the reader
300 /// has been exhausted.
301 last_lineterm: usize,
302 /// The end position of the buffer. This is always greater than or equal to
303 /// last_lineterm. The bytes between last_lineterm and end, if any, always
304 /// correspond to a partial line.
305 end: usize,
306 /// The absolute byte offset corresponding to `pos`. This is most typically
307 /// not a valid index into addressable memory, but rather, an offset that
308 /// is relative to all data that passes through a line buffer (since
309 /// construction or since the last time `clear` was called).
310 ///
311 /// When the line buffer reaches EOF, this is set to the position just
312 /// after the last byte read from the underlying reader. That is, it
313 /// becomes the total count of bytes that have been read.
314 absolute_byte_offset: u64,
315 /// If binary data was found, this records the absolute byte offset at
316 /// which it was first detected.
317 binary_byte_offset: Option<u64>,
318 }
319
320 impl LineBuffer {
321 /// Set the binary detection method used on this line buffer.
322 ///
323 /// This permits dynamically changing the binary detection strategy on
324 /// an existing line buffer without needing to create a new one.
set_binary_detection(&mut self, binary: BinaryDetection)325 pub fn set_binary_detection(&mut self, binary: BinaryDetection) {
326 self.config.binary = binary;
327 }
328
329 /// Reset this buffer, such that it can be used with a new reader.
clear(&mut self)330 fn clear(&mut self) {
331 self.pos = 0;
332 self.last_lineterm = 0;
333 self.end = 0;
334 self.absolute_byte_offset = 0;
335 self.binary_byte_offset = None;
336 }
337
338 /// The absolute byte offset which corresponds to the starting offsets
339 /// of the data returned by `buffer` relative to the beginning of the
340 /// reader's contents. As such, this offset does not generally correspond
341 /// to an offset in memory. It is typically used for reporting purposes,
342 /// particularly in error messages.
343 ///
344 /// This is reset to `0` when `clear` is called.
absolute_byte_offset(&self) -> u64345 fn absolute_byte_offset(&self) -> u64 {
346 self.absolute_byte_offset
347 }
348
349 /// If binary data was detected, then this returns the absolute byte offset
350 /// at which binary data was initially found.
binary_byte_offset(&self) -> Option<u64>351 fn binary_byte_offset(&self) -> Option<u64> {
352 self.binary_byte_offset
353 }
354
355 /// Return the contents of this buffer.
buffer(&self) -> &[u8]356 fn buffer(&self) -> &[u8] {
357 &self.buf[self.pos..self.last_lineterm]
358 }
359
360 /// Return the contents of the free space beyond the end of the buffer as
361 /// a mutable slice.
free_buffer(&mut self) -> &mut [u8]362 fn free_buffer(&mut self) -> &mut [u8] {
363 &mut self.buf[self.end..]
364 }
365
366 /// Consume the number of bytes provided. This must be less than or equal
367 /// to the number of bytes returned by `buffer`.
consume(&mut self, amt: usize)368 fn consume(&mut self, amt: usize) {
369 assert!(amt <= self.buffer().len());
370 self.pos += amt;
371 self.absolute_byte_offset += amt as u64;
372 }
373
374 /// Consumes the remainder of the buffer. Subsequent calls to `buffer` are
375 /// guaranteed to return an empty slice until the buffer is refilled.
376 ///
377 /// This is a convenience function for `consume(buffer.len())`.
378 #[cfg(test)]
consume_all(&mut self)379 fn consume_all(&mut self) {
380 let amt = self.buffer().len();
381 self.consume(amt);
382 }
383
384 /// Fill the contents of this buffer by discarding the part of the buffer
385 /// that has been consumed. The free space created by discarding the
386 /// consumed part of the buffer is then filled with new data from the given
387 /// reader.
388 ///
389 /// Callers should provide the same reader to this line buffer in
390 /// subsequent calls to fill. A different reader can only be used
391 /// immediately following a call to `clear`.
392 ///
393 /// If EOF is reached, then `false` is returned. Otherwise, `true` is
394 /// returned. (Note that if this line buffer's binary detection is set to
395 /// `Quit`, then the presence of binary data will cause this buffer to
396 /// behave as if it had seen EOF.)
397 ///
398 /// This forwards any errors returned by `rdr`, and will also return an
399 /// error if the buffer must be expanded past its allocation limit, as
400 /// governed by the buffer allocation strategy.
fill<R: io::Read>(&mut self, mut rdr: R) -> Result<bool, io::Error>401 fn fill<R: io::Read>(&mut self, mut rdr: R) -> Result<bool, io::Error> {
402 // If the binary detection heuristic tells us to quit once binary data
403 // has been observed, then we no longer read new data and reach EOF
404 // once the current buffer has been consumed.
405 if self.config.binary.is_quit() && self.binary_byte_offset.is_some() {
406 return Ok(!self.buffer().is_empty());
407 }
408
409 self.roll();
410 assert_eq!(self.pos, 0);
411 loop {
412 self.ensure_capacity()?;
413 let readlen = rdr.read(self.free_buffer().as_bytes_mut())?;
414 if readlen == 0 {
415 // We're only done reading for good once the caller has
416 // consumed everything.
417 self.last_lineterm = self.end;
418 return Ok(!self.buffer().is_empty());
419 }
420
421 // Get a mutable view into the bytes we've just read. These are
422 // the bytes that we do binary detection on, and also the bytes we
423 // search to find the last line terminator. We need a mutable slice
424 // in the case of binary conversion.
425 let oldend = self.end;
426 self.end += readlen;
427 let newbytes = &mut self.buf[oldend..self.end];
428
429 // Binary detection.
430 match self.config.binary {
431 BinaryDetection::None => {} // nothing to do
432 BinaryDetection::Quit(byte) => {
433 if let Some(i) = newbytes.find_byte(byte) {
434 self.end = oldend + i;
435 self.last_lineterm = self.end;
436 self.binary_byte_offset =
437 Some(self.absolute_byte_offset + self.end as u64);
438 // If the first byte in our buffer is a binary byte,
439 // then our buffer is empty and we should report as
440 // such to the caller.
441 return Ok(self.pos < self.end);
442 }
443 }
444 BinaryDetection::Convert(byte) => {
445 if let Some(i) =
446 replace_bytes(newbytes, byte, self.config.lineterm)
447 {
448 // Record only the first binary offset.
449 if self.binary_byte_offset.is_none() {
450 self.binary_byte_offset = Some(
451 self.absolute_byte_offset
452 + (oldend + i) as u64,
453 );
454 }
455 }
456 }
457 }
458
459 // Update our `last_lineterm` positions if we read one.
460 if let Some(i) = newbytes.rfind_byte(self.config.lineterm) {
461 self.last_lineterm = oldend + i + 1;
462 return Ok(true);
463 }
464 // At this point, if we couldn't find a line terminator, then we
465 // don't have a complete line. Therefore, we try to read more!
466 }
467 }
468
469 /// Roll the unconsumed parts of the buffer to the front.
470 ///
471 /// This operation is idempotent.
472 ///
473 /// After rolling, `last_lineterm` and `end` point to the same location,
474 /// and `pos` is always set to `0`.
roll(&mut self)475 fn roll(&mut self) {
476 if self.pos == self.end {
477 self.pos = 0;
478 self.last_lineterm = 0;
479 self.end = 0;
480 return;
481 }
482
483 let roll_len = self.end - self.pos;
484 self.buf.copy_within_str(self.pos..self.end, 0);
485 self.pos = 0;
486 self.last_lineterm = roll_len;
487 self.end = roll_len;
488 }
489
490 /// Ensures that the internal buffer has a non-zero amount of free space
491 /// in which to read more data. If there is no free space, then more is
492 /// allocated. If the allocation must exceed the configured limit, then
493 /// this returns an error.
ensure_capacity(&mut self) -> Result<(), io::Error>494 fn ensure_capacity(&mut self) -> Result<(), io::Error> {
495 if !self.free_buffer().is_empty() {
496 return Ok(());
497 }
498 // `len` is used for computing the next allocation size. The capacity
499 // is permitted to start at `0`, so we make sure it's at least `1`.
500 let len = cmp::max(1, self.buf.len());
501 let additional = match self.config.buffer_alloc {
502 BufferAllocation::Eager => len * 2,
503 BufferAllocation::Error(limit) => {
504 let used = self.buf.len() - self.config.capacity;
505 let n = cmp::min(len * 2, limit - used);
506 if n == 0 {
507 return Err(alloc_error(self.config.capacity + limit));
508 }
509 n
510 }
511 };
512 assert!(additional > 0);
513 let newlen = self.buf.len() + additional;
514 self.buf.resize(newlen, 0);
515 assert!(!self.free_buffer().is_empty());
516 Ok(())
517 }
518 }
519
520 /// Replaces `src` with `replacement` in bytes, and return the offset of the
521 /// first replacement, if one exists.
replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize>522 fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
523 if src == replacement {
524 return None;
525 }
526 let mut first_pos = None;
527 let mut pos = 0;
528 while let Some(i) = bytes[pos..].find_byte(src).map(|i| pos + i) {
529 if first_pos.is_none() {
530 first_pos = Some(i);
531 }
532 bytes[i] = replacement;
533 pos = i + 1;
534 while bytes.get(pos) == Some(&src) {
535 bytes[pos] = replacement;
536 pos += 1;
537 }
538 }
539 first_pos
540 }
541
542 #[cfg(test)]
543 mod tests {
544 use super::*;
545 use bstr::{ByteSlice, ByteVec};
546 use std::str;
547
548 const SHERLOCK: &'static str = "\
549 For the Doctor Watsons of this world, as opposed to the Sherlock
550 Holmeses, success in the province of detective work must always
551 be, to a very large extent, the result of luck. Sherlock Holmes
552 can extract a clew from a wisp of straw or a flake of cigar ash;
553 but Doctor Watson has to have it taken out for him and dusted,
554 and exhibited clearly, with a label attached.\
555 ";
556
s(slice: &str) -> String557 fn s(slice: &str) -> String {
558 slice.to_string()
559 }
560
replace_str( slice: &str, src: u8, replacement: u8, ) -> (String, Option<usize>)561 fn replace_str(
562 slice: &str,
563 src: u8,
564 replacement: u8,
565 ) -> (String, Option<usize>) {
566 let mut dst = Vec::from(slice);
567 let result = replace_bytes(&mut dst, src, replacement);
568 (dst.into_string().unwrap(), result)
569 }
570
571 #[test]
replace()572 fn replace() {
573 assert_eq!(replace_str("abc", b'b', b'z'), (s("azc"), Some(1)));
574 assert_eq!(replace_str("abb", b'b', b'z'), (s("azz"), Some(1)));
575 assert_eq!(replace_str("aba", b'a', b'z'), (s("zbz"), Some(0)));
576 assert_eq!(replace_str("bbb", b'b', b'z'), (s("zzz"), Some(0)));
577 assert_eq!(replace_str("bac", b'b', b'z'), (s("zac"), Some(0)));
578 }
579
580 #[test]
buffer_basics1()581 fn buffer_basics1() {
582 let bytes = "homer\nlisa\nmaggie";
583 let mut linebuf = LineBufferBuilder::new().build();
584 let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
585
586 assert!(rdr.buffer().is_empty());
587
588 assert!(rdr.fill().unwrap());
589 assert_eq!(rdr.bstr(), "homer\nlisa\n");
590 assert_eq!(rdr.absolute_byte_offset(), 0);
591 rdr.consume(5);
592 assert_eq!(rdr.absolute_byte_offset(), 5);
593 rdr.consume_all();
594 assert_eq!(rdr.absolute_byte_offset(), 11);
595
596 assert!(rdr.fill().unwrap());
597 assert_eq!(rdr.bstr(), "maggie");
598 rdr.consume_all();
599
600 assert!(!rdr.fill().unwrap());
601 assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
602 assert_eq!(rdr.binary_byte_offset(), None);
603 }
604
605 #[test]
buffer_basics2()606 fn buffer_basics2() {
607 let bytes = "homer\nlisa\nmaggie\n";
608 let mut linebuf = LineBufferBuilder::new().build();
609 let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
610
611 assert!(rdr.fill().unwrap());
612 assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n");
613 rdr.consume_all();
614
615 assert!(!rdr.fill().unwrap());
616 assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
617 assert_eq!(rdr.binary_byte_offset(), None);
618 }
619
620 #[test]
buffer_basics3()621 fn buffer_basics3() {
622 let bytes = "\n";
623 let mut linebuf = LineBufferBuilder::new().build();
624 let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
625
626 assert!(rdr.fill().unwrap());
627 assert_eq!(rdr.bstr(), "\n");
628 rdr.consume_all();
629
630 assert!(!rdr.fill().unwrap());
631 assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
632 assert_eq!(rdr.binary_byte_offset(), None);
633 }
634
635 #[test]
buffer_basics4()636 fn buffer_basics4() {
637 let bytes = "\n\n";
638 let mut linebuf = LineBufferBuilder::new().build();
639 let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
640
641 assert!(rdr.fill().unwrap());
642 assert_eq!(rdr.bstr(), "\n\n");
643 rdr.consume_all();
644
645 assert!(!rdr.fill().unwrap());
646 assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
647 assert_eq!(rdr.binary_byte_offset(), None);
648 }
649
650 #[test]
buffer_empty()651 fn buffer_empty() {
652 let bytes = "";
653 let mut linebuf = LineBufferBuilder::new().build();
654 let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
655
656 assert!(!rdr.fill().unwrap());
657 assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
658 assert_eq!(rdr.binary_byte_offset(), None);
659 }
660
661 #[test]
buffer_zero_capacity()662 fn buffer_zero_capacity() {
663 let bytes = "homer\nlisa\nmaggie";
664 let mut linebuf = LineBufferBuilder::new().capacity(0).build();
665 let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
666
667 while rdr.fill().unwrap() {
668 rdr.consume_all();
669 }
670 assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
671 assert_eq!(rdr.binary_byte_offset(), None);
672 }
673
674 #[test]
buffer_small_capacity()675 fn buffer_small_capacity() {
676 let bytes = "homer\nlisa\nmaggie";
677 let mut linebuf = LineBufferBuilder::new().capacity(1).build();
678 let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
679
680 let mut got = vec![];
681 while rdr.fill().unwrap() {
682 got.push_str(rdr.buffer());
683 rdr.consume_all();
684 }
685 assert_eq!(bytes, got.as_bstr());
686 assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
687 assert_eq!(rdr.binary_byte_offset(), None);
688 }
689
690 #[test]
buffer_limited_capacity1()691 fn buffer_limited_capacity1() {
692 let bytes = "homer\nlisa\nmaggie";
693 let mut linebuf = LineBufferBuilder::new()
694 .capacity(1)
695 .buffer_alloc(BufferAllocation::Error(5))
696 .build();
697 let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
698
699 assert!(rdr.fill().unwrap());
700 assert_eq!(rdr.bstr(), "homer\n");
701 rdr.consume_all();
702
703 assert!(rdr.fill().unwrap());
704 assert_eq!(rdr.bstr(), "lisa\n");
705 rdr.consume_all();
706
707 // This returns an error because while we have just enough room to
708 // store maggie in the buffer, we *don't* have enough room to read one
709 // more byte, so we don't know whether we're at EOF or not, and
710 // therefore must give up.
711 assert!(rdr.fill().is_err());
712
713 // We can mush on though!
714 assert_eq!(rdr.bstr(), "m");
715 rdr.consume_all();
716
717 assert!(rdr.fill().unwrap());
718 assert_eq!(rdr.bstr(), "aggie");
719 rdr.consume_all();
720
721 assert!(!rdr.fill().unwrap());
722 }
723
724 #[test]
buffer_limited_capacity2()725 fn buffer_limited_capacity2() {
726 let bytes = "homer\nlisa\nmaggie";
727 let mut linebuf = LineBufferBuilder::new()
728 .capacity(1)
729 .buffer_alloc(BufferAllocation::Error(6))
730 .build();
731 let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
732
733 assert!(rdr.fill().unwrap());
734 assert_eq!(rdr.bstr(), "homer\n");
735 rdr.consume_all();
736
737 assert!(rdr.fill().unwrap());
738 assert_eq!(rdr.bstr(), "lisa\n");
739 rdr.consume_all();
740
741 // We have just enough space.
742 assert!(rdr.fill().unwrap());
743 assert_eq!(rdr.bstr(), "maggie");
744 rdr.consume_all();
745
746 assert!(!rdr.fill().unwrap());
747 }
748
749 #[test]
buffer_limited_capacity3()750 fn buffer_limited_capacity3() {
751 let bytes = "homer\nlisa\nmaggie";
752 let mut linebuf = LineBufferBuilder::new()
753 .capacity(1)
754 .buffer_alloc(BufferAllocation::Error(0))
755 .build();
756 let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
757
758 assert!(rdr.fill().is_err());
759 assert_eq!(rdr.bstr(), "");
760 }
761
762 #[test]
buffer_binary_none()763 fn buffer_binary_none() {
764 let bytes = "homer\nli\x00sa\nmaggie\n";
765 let mut linebuf = LineBufferBuilder::new().build();
766 let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
767
768 assert!(rdr.buffer().is_empty());
769
770 assert!(rdr.fill().unwrap());
771 assert_eq!(rdr.bstr(), "homer\nli\x00sa\nmaggie\n");
772 rdr.consume_all();
773
774 assert!(!rdr.fill().unwrap());
775 assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
776 assert_eq!(rdr.binary_byte_offset(), None);
777 }
778
779 #[test]
buffer_binary_quit1()780 fn buffer_binary_quit1() {
781 let bytes = "homer\nli\x00sa\nmaggie\n";
782 let mut linebuf = LineBufferBuilder::new()
783 .binary_detection(BinaryDetection::Quit(b'\x00'))
784 .build();
785 let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
786
787 assert!(rdr.buffer().is_empty());
788
789 assert!(rdr.fill().unwrap());
790 assert_eq!(rdr.bstr(), "homer\nli");
791 rdr.consume_all();
792
793 assert!(!rdr.fill().unwrap());
794 assert_eq!(rdr.absolute_byte_offset(), 8);
795 assert_eq!(rdr.binary_byte_offset(), Some(8));
796 }
797
798 #[test]
buffer_binary_quit2()799 fn buffer_binary_quit2() {
800 let bytes = "\x00homer\nlisa\nmaggie\n";
801 let mut linebuf = LineBufferBuilder::new()
802 .binary_detection(BinaryDetection::Quit(b'\x00'))
803 .build();
804 let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
805
806 assert!(!rdr.fill().unwrap());
807 assert_eq!(rdr.bstr(), "");
808 assert_eq!(rdr.absolute_byte_offset(), 0);
809 assert_eq!(rdr.binary_byte_offset(), Some(0));
810 }
811
812 #[test]
buffer_binary_quit3()813 fn buffer_binary_quit3() {
814 let bytes = "homer\nlisa\nmaggie\n\x00";
815 let mut linebuf = LineBufferBuilder::new()
816 .binary_detection(BinaryDetection::Quit(b'\x00'))
817 .build();
818 let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
819
820 assert!(rdr.buffer().is_empty());
821
822 assert!(rdr.fill().unwrap());
823 assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n");
824 rdr.consume_all();
825
826 assert!(!rdr.fill().unwrap());
827 assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64 - 1);
828 assert_eq!(rdr.binary_byte_offset(), Some(bytes.len() as u64 - 1));
829 }
830
831 #[test]
buffer_binary_quit4()832 fn buffer_binary_quit4() {
833 let bytes = "homer\nlisa\nmaggie\x00\n";
834 let mut linebuf = LineBufferBuilder::new()
835 .binary_detection(BinaryDetection::Quit(b'\x00'))
836 .build();
837 let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
838
839 assert!(rdr.buffer().is_empty());
840
841 assert!(rdr.fill().unwrap());
842 assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie");
843 rdr.consume_all();
844
845 assert!(!rdr.fill().unwrap());
846 assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64 - 2);
847 assert_eq!(rdr.binary_byte_offset(), Some(bytes.len() as u64 - 2));
848 }
849
850 #[test]
buffer_binary_quit5()851 fn buffer_binary_quit5() {
852 let mut linebuf = LineBufferBuilder::new()
853 .binary_detection(BinaryDetection::Quit(b'u'))
854 .build();
855 let mut rdr = LineBufferReader::new(SHERLOCK.as_bytes(), &mut linebuf);
856
857 assert!(rdr.buffer().is_empty());
858
859 assert!(rdr.fill().unwrap());
860 assert_eq!(
861 rdr.bstr(),
862 "\
863 For the Doctor Watsons of this world, as opposed to the Sherlock
864 Holmeses, s\
865 "
866 );
867 rdr.consume_all();
868
869 assert!(!rdr.fill().unwrap());
870 assert_eq!(rdr.absolute_byte_offset(), 76);
871 assert_eq!(rdr.binary_byte_offset(), Some(76));
872 assert_eq!(SHERLOCK.as_bytes()[76], b'u');
873 }
874
875 #[test]
buffer_binary_convert1()876 fn buffer_binary_convert1() {
877 let bytes = "homer\nli\x00sa\nmaggie\n";
878 let mut linebuf = LineBufferBuilder::new()
879 .binary_detection(BinaryDetection::Convert(b'\x00'))
880 .build();
881 let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
882
883 assert!(rdr.buffer().is_empty());
884
885 assert!(rdr.fill().unwrap());
886 assert_eq!(rdr.bstr(), "homer\nli\nsa\nmaggie\n");
887 rdr.consume_all();
888
889 assert!(!rdr.fill().unwrap());
890 assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
891 assert_eq!(rdr.binary_byte_offset(), Some(8));
892 }
893
894 #[test]
buffer_binary_convert2()895 fn buffer_binary_convert2() {
896 let bytes = "\x00homer\nlisa\nmaggie\n";
897 let mut linebuf = LineBufferBuilder::new()
898 .binary_detection(BinaryDetection::Convert(b'\x00'))
899 .build();
900 let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
901
902 assert!(rdr.buffer().is_empty());
903
904 assert!(rdr.fill().unwrap());
905 assert_eq!(rdr.bstr(), "\nhomer\nlisa\nmaggie\n");
906 rdr.consume_all();
907
908 assert!(!rdr.fill().unwrap());
909 assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
910 assert_eq!(rdr.binary_byte_offset(), Some(0));
911 }
912
913 #[test]
buffer_binary_convert3()914 fn buffer_binary_convert3() {
915 let bytes = "homer\nlisa\nmaggie\n\x00";
916 let mut linebuf = LineBufferBuilder::new()
917 .binary_detection(BinaryDetection::Convert(b'\x00'))
918 .build();
919 let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
920
921 assert!(rdr.buffer().is_empty());
922
923 assert!(rdr.fill().unwrap());
924 assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n\n");
925 rdr.consume_all();
926
927 assert!(!rdr.fill().unwrap());
928 assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
929 assert_eq!(rdr.binary_byte_offset(), Some(bytes.len() as u64 - 1));
930 }
931
932 #[test]
buffer_binary_convert4()933 fn buffer_binary_convert4() {
934 let bytes = "homer\nlisa\nmaggie\x00\n";
935 let mut linebuf = LineBufferBuilder::new()
936 .binary_detection(BinaryDetection::Convert(b'\x00'))
937 .build();
938 let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
939
940 assert!(rdr.buffer().is_empty());
941
942 assert!(rdr.fill().unwrap());
943 assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n\n");
944 rdr.consume_all();
945
946 assert!(!rdr.fill().unwrap());
947 assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
948 assert_eq!(rdr.binary_byte_offset(), Some(bytes.len() as u64 - 2));
949 }
950 }
951