1 use std::cmp;
2 use std::fmt;
3 use std::iter::FromIterator;
4 use std::ops::{self, Range};
5 use std::result;
6 
7 use bstr::{BString, ByteSlice};
8 use serde::de::Deserialize;
9 
10 use crate::deserializer::deserialize_byte_record;
11 use crate::error::{new_utf8_error, Result, Utf8Error};
12 use crate::string_record::StringRecord;
13 
14 /// A single CSV record stored as raw bytes.
15 ///
16 /// A byte record permits reading or writing CSV rows that are not UTF-8.
17 /// In general, you should prefer using a
18 /// [`StringRecord`](struct.StringRecord.html)
19 /// since it is more ergonomic, but a `ByteRecord` is provided in case you need
20 /// it.
21 ///
22 /// If you are using the Serde (de)serialization APIs, then you probably never
23 /// need to interact with a `ByteRecord` or a `StringRecord`. However, there
24 /// are some circumstances in which you might need to use a raw record type
25 /// while still using Serde. For example, if you need to deserialize possibly
26 /// invalid UTF-8 fields, then you'll need to first read your record into a
27 /// `ByteRecord`, and then use `ByteRecord::deserialize` to run Serde. Another
28 /// reason for using the raw record deserialization APIs is if you're using
29 /// Serde to read into borrowed data such as a `&'a str` or a `&'a [u8]`.
30 ///
31 /// Two `ByteRecord`s are compared on the basis of their field data. Any
32 /// position information associated with the records is ignored.
33 #[derive(Clone, Eq)]
34 pub struct ByteRecord(Box<ByteRecordInner>);
35 
36 impl PartialEq for ByteRecord {
eq(&self, other: &ByteRecord) -> bool37     fn eq(&self, other: &ByteRecord) -> bool {
38         if self.len() != other.len() {
39             return false;
40         }
41         self.iter().zip(other.iter()).all(|e| e.0 == e.1)
42     }
43 }
44 
45 impl<T: AsRef<[u8]>> PartialEq<Vec<T>> for ByteRecord {
eq(&self, other: &Vec<T>) -> bool46     fn eq(&self, other: &Vec<T>) -> bool {
47         self.iter_eq(other)
48     }
49 }
50 
51 impl<'a, T: AsRef<[u8]>> PartialEq<Vec<T>> for &'a ByteRecord {
eq(&self, other: &Vec<T>) -> bool52     fn eq(&self, other: &Vec<T>) -> bool {
53         self.iter_eq(other)
54     }
55 }
56 
57 impl<T: AsRef<[u8]>> PartialEq<[T]> for ByteRecord {
eq(&self, other: &[T]) -> bool58     fn eq(&self, other: &[T]) -> bool {
59         self.iter_eq(other)
60     }
61 }
62 
63 impl<'a, T: AsRef<[u8]>> PartialEq<[T]> for &'a ByteRecord {
eq(&self, other: &[T]) -> bool64     fn eq(&self, other: &[T]) -> bool {
65         self.iter_eq(other)
66     }
67 }
68 
69 impl fmt::Debug for ByteRecord {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result70     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
71         let mut fields = vec![];
72         for field in self {
73             fields.push(BString::from(field.to_vec()));
74         }
75         write!(f, "ByteRecord({:?})", fields)
76     }
77 }
78 
79 /// The inner portion of a byte record.
80 ///
81 /// We use this memory layout so that moving a `ByteRecord` only requires
82 /// moving a single pointer. The optimization is dubious at best, but does
83 /// seem to result in slightly better numbers in microbenchmarks. Methinks this
84 /// may heavily depend on the underlying allocator.
85 #[derive(Clone, Debug, Eq, PartialEq)]
86 struct ByteRecordInner {
87     /// The position of this byte record.
88     pos: Option<Position>,
89     /// All fields in this record, stored contiguously.
90     fields: Vec<u8>,
91     /// The number of and location of each field in this record.
92     bounds: Bounds,
93 }
94 
95 impl Default for ByteRecord {
96     #[inline]
default() -> ByteRecord97     fn default() -> ByteRecord {
98         ByteRecord::new()
99     }
100 }
101 
102 impl ByteRecord {
103     /// Create a new empty `ByteRecord`.
104     ///
105     /// Note that you may find the `ByteRecord::from` constructor more
106     /// convenient, which is provided by an impl on the `From` trait.
107     ///
108     /// # Example: create an empty record
109     ///
110     /// ```
111     /// use csv::ByteRecord;
112     ///
113     /// let record = ByteRecord::new();
114     /// assert_eq!(record.len(), 0);
115     /// ```
116     ///
117     /// # Example: initialize a record from a `Vec`
118     ///
119     /// ```
120     /// use csv::ByteRecord;
121     ///
122     /// let record = ByteRecord::from(vec!["a", "b", "c"]);
123     /// assert_eq!(record.len(), 3);
124     /// ```
125     #[inline]
new() -> ByteRecord126     pub fn new() -> ByteRecord {
127         ByteRecord::with_capacity(0, 0)
128     }
129 
130     /// Create a new empty `ByteRecord` with the given capacity settings.
131     ///
132     /// `buffer` refers to the capacity of the buffer used to store the
133     /// actual row contents. `fields` refers to the number of fields one
134     /// might expect to store.
135     #[inline]
with_capacity(buffer: usize, fields: usize) -> ByteRecord136     pub fn with_capacity(buffer: usize, fields: usize) -> ByteRecord {
137         ByteRecord(Box::new(ByteRecordInner {
138             pos: None,
139             fields: vec![0; buffer],
140             bounds: Bounds::with_capacity(fields),
141         }))
142     }
143 
144     /// Deserialize this record.
145     ///
146     /// The `D` type parameter refers to the type that this record should be
147     /// deserialized into. The `'de` lifetime refers to the lifetime of the
148     /// `ByteRecord`. The `'de` lifetime permits deserializing into structs
149     /// that borrow field data from this record.
150     ///
151     /// An optional `headers` parameter permits deserializing into a struct
152     /// based on its field names (corresponding to header values) rather than
153     /// the order in which the fields are defined.
154     ///
155     /// # Example: without headers
156     ///
157     /// This shows how to deserialize a single row into a struct based on the
158     /// order in which fields occur. This example also shows how to borrow
159     /// fields from the `ByteRecord`, which results in zero allocation
160     /// deserialization.
161     ///
162     /// ```
163     /// use std::error::Error;
164     ///
165     /// use csv::ByteRecord;
166     /// use serde::Deserialize;
167     ///
168     /// #[derive(Deserialize)]
169     /// struct Row<'a> {
170     ///     city: &'a str,
171     ///     country: &'a str,
172     ///     population: u64,
173     /// }
174     ///
175     /// # fn main() { example().unwrap() }
176     /// fn example() -> Result<(), Box<dyn Error>> {
177     ///     let record = ByteRecord::from(vec![
178     ///         "Boston", "United States", "4628910",
179     ///     ]);
180     ///
181     ///     let row: Row = record.deserialize(None)?;
182     ///     assert_eq!(row.city, "Boston");
183     ///     assert_eq!(row.country, "United States");
184     ///     assert_eq!(row.population, 4628910);
185     ///     Ok(())
186     /// }
187     /// ```
188     ///
189     /// # Example: with headers
190     ///
191     /// This example is like the previous one, but shows how to deserialize
192     /// into a struct based on the struct's field names. For this to work,
193     /// you must provide a header row.
194     ///
195     /// This example also shows that you can deserialize into owned data
196     /// types (e.g., `String`) instead of borrowed data types (e.g., `&str`).
197     ///
198     /// ```
199     /// use std::error::Error;
200     ///
201     /// use csv::ByteRecord;
202     /// use serde::Deserialize;
203     ///
204     /// #[derive(Deserialize)]
205     /// struct Row {
206     ///     city: String,
207     ///     country: String,
208     ///     population: u64,
209     /// }
210     ///
211     /// # fn main() { example().unwrap() }
212     /// fn example() -> Result<(), Box<dyn Error>> {
213     ///     // Notice that the fields are not in the same order
214     ///     // as the fields in the struct!
215     ///     let header = ByteRecord::from(vec![
216     ///         "country", "city", "population",
217     ///     ]);
218     ///     let record = ByteRecord::from(vec![
219     ///         "United States", "Boston", "4628910",
220     ///     ]);
221     ///
222     ///     let row: Row = record.deserialize(Some(&header))?;
223     ///     assert_eq!(row.city, "Boston");
224     ///     assert_eq!(row.country, "United States");
225     ///     assert_eq!(row.population, 4628910);
226     ///     Ok(())
227     /// }
228     /// ```
deserialize<'de, D: Deserialize<'de>>( &'de self, headers: Option<&'de ByteRecord>, ) -> Result<D>229     pub fn deserialize<'de, D: Deserialize<'de>>(
230         &'de self,
231         headers: Option<&'de ByteRecord>,
232     ) -> Result<D> {
233         deserialize_byte_record(self, headers)
234     }
235 
236     /// Returns an iterator over all fields in this record.
237     ///
238     /// # Example
239     ///
240     /// This example shows how to iterate over each field in a `ByteRecord`.
241     ///
242     /// ```
243     /// use csv::ByteRecord;
244     ///
245     /// let record = ByteRecord::from(vec!["a", "b", "c"]);
246     /// for field in record.iter() {
247     ///     assert!(field == b"a" || field == b"b" || field == b"c");
248     /// }
249     /// ```
250     #[inline]
iter(&self) -> ByteRecordIter251     pub fn iter(&self) -> ByteRecordIter {
252         self.into_iter()
253     }
254 
255     /// Return the field at index `i`.
256     ///
257     /// If no field at index `i` exists, then this returns `None`.
258     ///
259     /// # Example
260     ///
261     /// ```
262     /// use csv::ByteRecord;
263     ///
264     /// let record = ByteRecord::from(vec!["a", "b", "c"]);
265     /// assert_eq!(record.get(1), Some(&b"b"[..]));
266     /// assert_eq!(record.get(3), None);
267     /// ```
268     #[inline]
get(&self, i: usize) -> Option<&[u8]>269     pub fn get(&self, i: usize) -> Option<&[u8]> {
270         self.0.bounds.get(i).map(|range| &self.0.fields[range])
271     }
272 
273     /// Returns true if and only if this record is empty.
274     ///
275     /// # Example
276     ///
277     /// ```
278     /// use csv::ByteRecord;
279     ///
280     /// assert!(ByteRecord::new().is_empty());
281     /// ```
282     #[inline]
is_empty(&self) -> bool283     pub fn is_empty(&self) -> bool {
284         self.len() == 0
285     }
286 
287     /// Returns the number of fields in this record.
288     ///
289     /// # Example
290     ///
291     /// ```
292     /// use csv::ByteRecord;
293     ///
294     /// let record = ByteRecord::from(vec!["a", "b", "c"]);
295     /// assert_eq!(record.len(), 3);
296     /// ```
297     #[inline]
len(&self) -> usize298     pub fn len(&self) -> usize {
299         self.0.bounds.len()
300     }
301 
302     /// Truncate this record to `n` fields.
303     ///
304     /// If `n` is greater than the number of fields in this record, then this
305     /// has no effect.
306     ///
307     /// # Example
308     ///
309     /// ```
310     /// use csv::ByteRecord;
311     ///
312     /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
313     /// assert_eq!(record.len(), 3);
314     /// record.truncate(1);
315     /// assert_eq!(record.len(), 1);
316     /// assert_eq!(record, vec!["a"]);
317     /// ```
318     #[inline]
truncate(&mut self, n: usize)319     pub fn truncate(&mut self, n: usize) {
320         if n <= self.len() {
321             self.0.bounds.len = n;
322         }
323     }
324 
325     /// Clear this record so that it has zero fields.
326     ///
327     /// This is equivalent to calling `truncate(0)`.
328     ///
329     /// Note that it is not necessary to clear the record to reuse it with
330     /// the CSV reader.
331     ///
332     /// # Example
333     ///
334     /// ```
335     /// use csv::ByteRecord;
336     ///
337     /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
338     /// assert_eq!(record.len(), 3);
339     /// record.clear();
340     /// assert_eq!(record.len(), 0);
341     /// ```
342     #[inline]
clear(&mut self)343     pub fn clear(&mut self) {
344         self.truncate(0);
345     }
346 
347     /// Trim the fields of this record so that leading and trailing whitespace
348     /// is removed.
349     ///
350     /// This method uses the ASCII definition of whitespace. That is, only
351     /// bytes in the class `[\t\n\v\f\r ]` are trimmed.
352     ///
353     /// # Example
354     ///
355     /// ```
356     /// use csv::ByteRecord;
357     ///
358     /// let mut record = ByteRecord::from(vec![
359     ///     "  ", "\tfoo", "bar  ", "b a z",
360     /// ]);
361     /// record.trim();
362     /// assert_eq!(record, vec!["", "foo", "bar", "b a z"]);
363     /// ```
trim(&mut self)364     pub fn trim(&mut self) {
365         let length = self.len();
366         if length == 0 {
367             return;
368         }
369         // TODO: We could likely do this in place, but for now, we allocate.
370         let mut trimmed =
371             ByteRecord::with_capacity(self.as_slice().len(), self.len());
372         trimmed.set_position(self.position().cloned());
373         for field in &*self {
374             trimmed.push_field(field.trim());
375         }
376         *self = trimmed;
377     }
378 
379     /// Add a new field to this record.
380     ///
381     /// # Example
382     ///
383     /// ```
384     /// use csv::ByteRecord;
385     ///
386     /// let mut record = ByteRecord::new();
387     /// record.push_field(b"foo");
388     /// assert_eq!(&record[0], b"foo");
389     /// ```
390     #[inline]
push_field(&mut self, field: &[u8])391     pub fn push_field(&mut self, field: &[u8]) {
392         let (s, e) = (self.0.bounds.end(), self.0.bounds.end() + field.len());
393         while e > self.0.fields.len() {
394             self.expand_fields();
395         }
396         self.0.fields[s..e].copy_from_slice(field);
397         self.0.bounds.add(e);
398     }
399 
400     /// Return the position of this record, if available.
401     ///
402     /// # Example
403     ///
404     /// ```
405     /// use std::error::Error;
406     ///
407     /// use csv::{ByteRecord, ReaderBuilder};
408     ///
409     /// # fn main() { example().unwrap(); }
410     /// fn example() -> Result<(), Box<dyn Error>> {
411     ///     let mut record = ByteRecord::new();
412     ///     let mut rdr = ReaderBuilder::new()
413     ///         .has_headers(false)
414     ///         .from_reader("a,b,c\nx,y,z".as_bytes());
415     ///
416     ///     assert!(rdr.read_byte_record(&mut record)?);
417     ///     {
418     ///         let pos = record.position().expect("a record position");
419     ///         assert_eq!(pos.byte(), 0);
420     ///         assert_eq!(pos.line(), 1);
421     ///         assert_eq!(pos.record(), 0);
422     ///     }
423     ///
424     ///     assert!(rdr.read_byte_record(&mut record)?);
425     ///     {
426     ///         let pos = record.position().expect("a record position");
427     ///         assert_eq!(pos.byte(), 6);
428     ///         assert_eq!(pos.line(), 2);
429     ///         assert_eq!(pos.record(), 1);
430     ///     }
431     ///
432     ///     // Finish the CSV reader for good measure.
433     ///     assert!(!rdr.read_byte_record(&mut record)?);
434     ///     Ok(())
435     /// }
436     /// ```
437     #[inline]
position(&self) -> Option<&Position>438     pub fn position(&self) -> Option<&Position> {
439         self.0.pos.as_ref()
440     }
441 
442     /// Set the position of this record.
443     ///
444     /// # Example
445     ///
446     /// ```
447     /// use csv::{ByteRecord, Position};
448     ///
449     /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
450     /// let mut pos = Position::new();
451     /// pos.set_byte(100);
452     /// pos.set_line(4);
453     /// pos.set_record(2);
454     ///
455     /// record.set_position(Some(pos.clone()));
456     /// assert_eq!(record.position(), Some(&pos));
457     /// ```
458     #[inline]
set_position(&mut self, pos: Option<Position>)459     pub fn set_position(&mut self, pos: Option<Position>) {
460         self.0.pos = pos;
461     }
462 
463     /// Return the start and end position of a field in this record.
464     ///
465     /// If no such field exists at the given index, then return `None`.
466     ///
467     /// The range returned can be used with the slice returned by `as_slice`.
468     ///
469     /// # Example
470     ///
471     /// ```
472     /// use csv::ByteRecord;
473     ///
474     /// let record = ByteRecord::from(vec!["foo", "quux", "z"]);
475     /// let range = record.range(1).expect("a record range");
476     /// assert_eq!(&record.as_slice()[range], &b"quux"[..]);
477     /// ```
478     #[inline]
range(&self, i: usize) -> Option<Range<usize>>479     pub fn range(&self, i: usize) -> Option<Range<usize>> {
480         self.0.bounds.get(i)
481     }
482 
483     /// Return the entire row as a single byte slice. The slice returned stores
484     /// all fields contiguously. The boundaries of each field can be determined
485     /// via the `range` method.
486     ///
487     /// # Example
488     ///
489     /// ```
490     /// use csv::ByteRecord;
491     ///
492     /// let record = ByteRecord::from(vec!["foo", "quux", "z"]);
493     /// assert_eq!(record.as_slice(), &b"fooquuxz"[..]);
494     /// ```
495     #[inline]
as_slice(&self) -> &[u8]496     pub fn as_slice(&self) -> &[u8] {
497         &self.0.fields[..self.0.bounds.end()]
498     }
499 
500     /// Clone this record, but only copy `fields` up to the end of bounds. This
501     /// is useful when one wants to copy a record, but not necessarily any
502     /// excess capacity in that record.
503     #[inline]
clone_truncated(&self) -> ByteRecord504     pub(crate) fn clone_truncated(&self) -> ByteRecord {
505         let mut br = ByteRecord::new();
506         br.0.pos = self.0.pos.clone();
507         br.0.bounds = self.0.bounds.clone();
508         br.0.fields = self.0.fields[..self.0.bounds.end()].to_vec();
509         br
510     }
511 
512     /// Retrieve the underlying parts of a byte record.
513     #[inline]
as_parts(&mut self) -> (&mut Vec<u8>, &mut Vec<usize>)514     pub(crate) fn as_parts(&mut self) -> (&mut Vec<u8>, &mut Vec<usize>) {
515         let inner = &mut *self.0;
516         (&mut inner.fields, &mut inner.bounds.ends)
517     }
518 
519     /// Set the number of fields in the given record record.
520     #[inline]
set_len(&mut self, len: usize)521     pub(crate) fn set_len(&mut self, len: usize) {
522         self.0.bounds.len = len;
523     }
524 
525     /// Expand the capacity for storing fields.
526     #[inline]
expand_fields(&mut self)527     pub(crate) fn expand_fields(&mut self) {
528         let new_len = self.0.fields.len().checked_mul(2).unwrap();
529         self.0.fields.resize(cmp::max(4, new_len), 0);
530     }
531 
532     /// Expand the capacity for storing field ending positions.
533     #[inline]
expand_ends(&mut self)534     pub(crate) fn expand_ends(&mut self) {
535         self.0.bounds.expand();
536     }
537 
538     /// Validate the given record as UTF-8.
539     ///
540     /// If it's not UTF-8, return an error.
541     #[inline]
validate(&self) -> result::Result<(), Utf8Error>542     pub(crate) fn validate(&self) -> result::Result<(), Utf8Error> {
543         // If the entire buffer is ASCII, then we have nothing to fear.
544         if self.0.fields[..self.0.bounds.end()].is_ascii() {
545             return Ok(());
546         }
547         // Otherwise, we must check each field individually to ensure that
548         // it's valid UTF-8.
549         for (i, field) in self.iter().enumerate() {
550             if let Err(err) = field.to_str() {
551                 return Err(new_utf8_error(i, err.valid_up_to()));
552             }
553         }
554         Ok(())
555     }
556 
557     /// Compare the given byte record with the iterator of fields for equality.
iter_eq<I, T>(&self, other: I) -> bool where I: IntoIterator<Item = T>, T: AsRef<[u8]>,558     pub(crate) fn iter_eq<I, T>(&self, other: I) -> bool
559     where
560         I: IntoIterator<Item = T>,
561         T: AsRef<[u8]>,
562     {
563         let mut it_record = self.iter();
564         let mut it_other = other.into_iter();
565         loop {
566             match (it_record.next(), it_other.next()) {
567                 (None, None) => return true,
568                 (None, Some(_)) | (Some(_), None) => return false,
569                 (Some(x), Some(y)) => {
570                     if x != y.as_ref() {
571                         return false;
572                     }
573                 }
574             }
575         }
576     }
577 }
578 
579 /// A position in CSV data.
580 ///
581 /// A position is used to report errors in CSV data. All positions include the
582 /// byte offset, line number and record index at which the error occurred.
583 ///
584 /// Byte offsets and record indices start at `0`. Line numbers start at `1`.
585 ///
586 /// A CSV reader will automatically assign the position of each record.
587 #[derive(Clone, Debug, Eq, PartialEq)]
588 pub struct Position {
589     byte: u64,
590     line: u64,
591     record: u64,
592 }
593 
594 impl Position {
595     /// Returns a new position initialized to the start value.
596     #[inline]
new() -> Position597     pub fn new() -> Position {
598         Position { byte: 0, line: 1, record: 0 }
599     }
600 
601     /// The byte offset, starting at `0`, of this position.
602     #[inline]
byte(&self) -> u64603     pub fn byte(&self) -> u64 {
604         self.byte
605     }
606     /// The line number, starting at `1`, of this position.
607     #[inline]
line(&self) -> u64608     pub fn line(&self) -> u64 {
609         self.line
610     }
611     /// The record index, starting with the first record at `0`.
612     #[inline]
record(&self) -> u64613     pub fn record(&self) -> u64 {
614         self.record
615     }
616 
617     /// Set the byte offset of this position.
618     #[inline]
set_byte(&mut self, byte: u64) -> &mut Position619     pub fn set_byte(&mut self, byte: u64) -> &mut Position {
620         self.byte = byte;
621         self
622     }
623 
624     /// Set the line number of this position.
625     ///
626     /// If the line number is less than `1`, then this method panics.
627     #[inline]
set_line(&mut self, line: u64) -> &mut Position628     pub fn set_line(&mut self, line: u64) -> &mut Position {
629         assert!(line > 0);
630         self.line = line;
631         self
632     }
633 
634     /// Set the record index of this position.
635     #[inline]
set_record(&mut self, record: u64) -> &mut Position636     pub fn set_record(&mut self, record: u64) -> &mut Position {
637         self.record = record;
638         self
639     }
640 }
641 
642 /// The bounds of fields in a single record.
643 #[derive(Clone, Debug, Eq, PartialEq)]
644 struct Bounds {
645     /// The ending index of each field.
646     ends: Vec<usize>,
647     /// The number of fields in this record.
648     ///
649     /// Technically, we could drop this field and maintain an invariant that
650     /// `ends.len()` is always the number of fields, but doing that efficiently
651     /// requires attention to safety. We play it safe at essentially no cost.
652     len: usize,
653 }
654 
655 impl Default for Bounds {
656     #[inline]
default() -> Bounds657     fn default() -> Bounds {
658         Bounds::with_capacity(0)
659     }
660 }
661 
662 impl Bounds {
663     /// Create a new set of bounds with the given capacity for storing the
664     /// ends of fields.
665     #[inline]
with_capacity(capacity: usize) -> Bounds666     fn with_capacity(capacity: usize) -> Bounds {
667         Bounds { ends: vec![0; capacity], len: 0 }
668     }
669 
670     /// Returns the bounds of field `i`.
671     #[inline]
get(&self, i: usize) -> Option<Range<usize>>672     fn get(&self, i: usize) -> Option<Range<usize>> {
673         if i >= self.len {
674             return None;
675         }
676         let end = match self.ends.get(i) {
677             None => return None,
678             Some(&end) => end,
679         };
680         let start = match i.checked_sub(1).and_then(|i| self.ends.get(i)) {
681             None => 0,
682             Some(&start) => start,
683         };
684         Some(ops::Range { start: start, end: end })
685     }
686 
687     /// Returns a slice of ending positions of all fields.
688     #[inline]
ends(&self) -> &[usize]689     fn ends(&self) -> &[usize] {
690         &self.ends[..self.len]
691     }
692 
693     /// Return the last position of the last field.
694     ///
695     /// If there are no fields, this returns `0`.
696     #[inline]
end(&self) -> usize697     fn end(&self) -> usize {
698         self.ends().last().map(|&i| i).unwrap_or(0)
699     }
700 
701     /// Returns the number of fields in these bounds.
702     #[inline]
len(&self) -> usize703     fn len(&self) -> usize {
704         self.len
705     }
706 
707     /// Expand the capacity for storing field ending positions.
708     #[inline]
expand(&mut self)709     fn expand(&mut self) {
710         let new_len = self.ends.len().checked_mul(2).unwrap();
711         self.ends.resize(cmp::max(4, new_len), 0);
712     }
713 
714     /// Add a new field with the given ending position.
715     #[inline]
add(&mut self, pos: usize)716     fn add(&mut self, pos: usize) {
717         if self.len >= self.ends.len() {
718             self.expand();
719         }
720         self.ends[self.len] = pos;
721         self.len += 1;
722     }
723 }
724 
725 impl ops::Index<usize> for ByteRecord {
726     type Output = [u8];
727     #[inline]
index(&self, i: usize) -> &[u8]728     fn index(&self, i: usize) -> &[u8] {
729         self.get(i).unwrap()
730     }
731 }
732 
733 impl From<StringRecord> for ByteRecord {
734     #[inline]
from(record: StringRecord) -> ByteRecord735     fn from(record: StringRecord) -> ByteRecord {
736         record.into_byte_record()
737     }
738 }
739 
740 impl<T: AsRef<[u8]>> From<Vec<T>> for ByteRecord {
741     #[inline]
from(xs: Vec<T>) -> ByteRecord742     fn from(xs: Vec<T>) -> ByteRecord {
743         ByteRecord::from_iter(&xs)
744     }
745 }
746 
747 impl<'a, T: AsRef<[u8]>> From<&'a [T]> for ByteRecord {
748     #[inline]
from(xs: &'a [T]) -> ByteRecord749     fn from(xs: &'a [T]) -> ByteRecord {
750         ByteRecord::from_iter(xs)
751     }
752 }
753 
754 impl<T: AsRef<[u8]>> FromIterator<T> for ByteRecord {
755     #[inline]
from_iter<I: IntoIterator<Item = T>>(iter: I) -> ByteRecord756     fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> ByteRecord {
757         let mut record = ByteRecord::new();
758         record.extend(iter);
759         record
760     }
761 }
762 
763 impl<T: AsRef<[u8]>> Extend<T> for ByteRecord {
764     #[inline]
extend<I: IntoIterator<Item = T>>(&mut self, iter: I)765     fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
766         for x in iter {
767             self.push_field(x.as_ref());
768         }
769     }
770 }
771 
772 /// A double-ended iterator over the fields in a byte record.
773 ///
774 /// The `'r` lifetime variable refers to the lifetime of the `ByteRecord` that
775 /// is being iterated over.
776 #[derive(Clone)]
777 pub struct ByteRecordIter<'r> {
778     /// The record we are iterating over.
779     r: &'r ByteRecord,
780     /// The starting index of the previous field. (For reverse iteration.)
781     last_start: usize,
782     /// The ending index of the previous field. (For forward iteration.)
783     last_end: usize,
784     /// The index of forward iteration.
785     i_forward: usize,
786     /// The index of reverse iteration.
787     i_reverse: usize,
788 }
789 
790 impl<'r> IntoIterator for &'r ByteRecord {
791     type IntoIter = ByteRecordIter<'r>;
792     type Item = &'r [u8];
793 
794     #[inline]
into_iter(self) -> ByteRecordIter<'r>795     fn into_iter(self) -> ByteRecordIter<'r> {
796         ByteRecordIter {
797             r: self,
798             last_start: self.as_slice().len(),
799             last_end: 0,
800             i_forward: 0,
801             i_reverse: self.len(),
802         }
803     }
804 }
805 
806 impl<'r> ExactSizeIterator for ByteRecordIter<'r> {}
807 
808 impl<'r> Iterator for ByteRecordIter<'r> {
809     type Item = &'r [u8];
810 
811     #[inline]
next(&mut self) -> Option<&'r [u8]>812     fn next(&mut self) -> Option<&'r [u8]> {
813         if self.i_forward == self.i_reverse {
814             None
815         } else {
816             let start = self.last_end;
817             let end = self.r.0.bounds.ends()[self.i_forward];
818             self.i_forward += 1;
819             self.last_end = end;
820             Some(&self.r.0.fields[start..end])
821         }
822     }
823 
824     #[inline]
size_hint(&self) -> (usize, Option<usize>)825     fn size_hint(&self) -> (usize, Option<usize>) {
826         let x = self.i_reverse - self.i_forward;
827         (x, Some(x))
828     }
829 
830     #[inline]
count(self) -> usize831     fn count(self) -> usize {
832         self.len()
833     }
834 }
835 
836 impl<'r> DoubleEndedIterator for ByteRecordIter<'r> {
837     #[inline]
next_back(&mut self) -> Option<&'r [u8]>838     fn next_back(&mut self) -> Option<&'r [u8]> {
839         if self.i_forward == self.i_reverse {
840             None
841         } else {
842             self.i_reverse -= 1;
843             let start = self
844                 .i_reverse
845                 .checked_sub(1)
846                 .map(|i| self.r.0.bounds.ends()[i])
847                 .unwrap_or(0);
848             let end = self.last_start;
849             self.last_start = start;
850             Some(&self.r.0.fields[start..end])
851         }
852     }
853 }
854 
855 #[cfg(test)]
856 mod tests {
857     use crate::string_record::StringRecord;
858 
859     use super::ByteRecord;
860 
b(s: &str) -> &[u8]861     fn b(s: &str) -> &[u8] {
862         s.as_bytes()
863     }
864 
865     #[test]
record_1()866     fn record_1() {
867         let mut rec = ByteRecord::new();
868         rec.push_field(b"foo");
869 
870         assert_eq!(rec.len(), 1);
871         assert_eq!(rec.get(0), Some(b("foo")));
872         assert_eq!(rec.get(1), None);
873         assert_eq!(rec.get(2), None);
874     }
875 
876     #[test]
record_2()877     fn record_2() {
878         let mut rec = ByteRecord::new();
879         rec.push_field(b"foo");
880         rec.push_field(b"quux");
881 
882         assert_eq!(rec.len(), 2);
883         assert_eq!(rec.get(0), Some(b("foo")));
884         assert_eq!(rec.get(1), Some(b("quux")));
885         assert_eq!(rec.get(2), None);
886         assert_eq!(rec.get(3), None);
887     }
888 
889     #[test]
empty_record()890     fn empty_record() {
891         let rec = ByteRecord::new();
892 
893         assert_eq!(rec.len(), 0);
894         assert_eq!(rec.get(0), None);
895         assert_eq!(rec.get(1), None);
896     }
897 
898     #[test]
trim_whitespace_only()899     fn trim_whitespace_only() {
900         let mut rec = ByteRecord::from(vec![b" \t\n\r\x0c"]);
901         rec.trim();
902         assert_eq!(rec.get(0), Some(b("")));
903     }
904 
905     #[test]
trim_front()906     fn trim_front() {
907         let mut rec = ByteRecord::from(vec![b" abc"]);
908         rec.trim();
909         assert_eq!(rec.get(0), Some(b("abc")));
910 
911         let mut rec = ByteRecord::from(vec![b(" abc"), b("  xyz")]);
912         rec.trim();
913         assert_eq!(rec.get(0), Some(b("abc")));
914         assert_eq!(rec.get(1), Some(b("xyz")));
915     }
916 
917     #[test]
trim_back()918     fn trim_back() {
919         let mut rec = ByteRecord::from(vec![b"abc "]);
920         rec.trim();
921         assert_eq!(rec.get(0), Some(b("abc")));
922 
923         let mut rec = ByteRecord::from(vec![b("abc "), b("xyz  ")]);
924         rec.trim();
925         assert_eq!(rec.get(0), Some(b("abc")));
926         assert_eq!(rec.get(1), Some(b("xyz")));
927     }
928 
929     #[test]
trim_both()930     fn trim_both() {
931         let mut rec = ByteRecord::from(vec![b" abc "]);
932         rec.trim();
933         assert_eq!(rec.get(0), Some(b("abc")));
934 
935         let mut rec = ByteRecord::from(vec![b(" abc "), b("  xyz  ")]);
936         rec.trim();
937         assert_eq!(rec.get(0), Some(b("abc")));
938         assert_eq!(rec.get(1), Some(b("xyz")));
939     }
940 
941     #[test]
trim_does_not_panic_on_empty_records_1()942     fn trim_does_not_panic_on_empty_records_1() {
943         let mut rec = ByteRecord::from(vec![b""]);
944         rec.trim();
945         assert_eq!(rec.get(0), Some(b("")));
946     }
947 
948     #[test]
trim_does_not_panic_on_empty_records_2()949     fn trim_does_not_panic_on_empty_records_2() {
950         let mut rec = ByteRecord::from(vec![b"", b""]);
951         rec.trim();
952         assert_eq!(rec.get(0), Some(b("")));
953         assert_eq!(rec.get(1), Some(b("")));
954     }
955 
956     #[test]
trim_does_not_panic_on_empty_records_3()957     fn trim_does_not_panic_on_empty_records_3() {
958         let mut rec = ByteRecord::new();
959         rec.trim();
960         assert_eq!(rec.as_slice().len(), 0);
961     }
962 
963     #[test]
empty_field_1()964     fn empty_field_1() {
965         let mut rec = ByteRecord::new();
966         rec.push_field(b"");
967 
968         assert_eq!(rec.len(), 1);
969         assert_eq!(rec.get(0), Some(b("")));
970         assert_eq!(rec.get(1), None);
971         assert_eq!(rec.get(2), None);
972     }
973 
974     #[test]
empty_field_2()975     fn empty_field_2() {
976         let mut rec = ByteRecord::new();
977         rec.push_field(b"");
978         rec.push_field(b"");
979 
980         assert_eq!(rec.len(), 2);
981         assert_eq!(rec.get(0), Some(b("")));
982         assert_eq!(rec.get(1), Some(b("")));
983         assert_eq!(rec.get(2), None);
984         assert_eq!(rec.get(3), None);
985     }
986 
987     #[test]
empty_surround_1()988     fn empty_surround_1() {
989         let mut rec = ByteRecord::new();
990         rec.push_field(b"foo");
991         rec.push_field(b"");
992         rec.push_field(b"quux");
993 
994         assert_eq!(rec.len(), 3);
995         assert_eq!(rec.get(0), Some(b("foo")));
996         assert_eq!(rec.get(1), Some(b("")));
997         assert_eq!(rec.get(2), Some(b("quux")));
998         assert_eq!(rec.get(3), None);
999         assert_eq!(rec.get(4), None);
1000     }
1001 
1002     #[test]
empty_surround_2()1003     fn empty_surround_2() {
1004         let mut rec = ByteRecord::new();
1005         rec.push_field(b"foo");
1006         rec.push_field(b"");
1007         rec.push_field(b"quux");
1008         rec.push_field(b"");
1009 
1010         assert_eq!(rec.len(), 4);
1011         assert_eq!(rec.get(0), Some(b("foo")));
1012         assert_eq!(rec.get(1), Some(b("")));
1013         assert_eq!(rec.get(2), Some(b("quux")));
1014         assert_eq!(rec.get(3), Some(b("")));
1015         assert_eq!(rec.get(4), None);
1016         assert_eq!(rec.get(5), None);
1017     }
1018 
1019     #[test]
utf8_error_1()1020     fn utf8_error_1() {
1021         let mut rec = ByteRecord::new();
1022         rec.push_field(b"foo");
1023         rec.push_field(b"b\xFFar");
1024 
1025         let err = StringRecord::from_byte_record(rec).unwrap_err();
1026         assert_eq!(err.utf8_error().field(), 1);
1027         assert_eq!(err.utf8_error().valid_up_to(), 1);
1028     }
1029 
1030     #[test]
utf8_error_2()1031     fn utf8_error_2() {
1032         let mut rec = ByteRecord::new();
1033         rec.push_field(b"\xFF");
1034 
1035         let err = StringRecord::from_byte_record(rec).unwrap_err();
1036         assert_eq!(err.utf8_error().field(), 0);
1037         assert_eq!(err.utf8_error().valid_up_to(), 0);
1038     }
1039 
1040     #[test]
utf8_error_3()1041     fn utf8_error_3() {
1042         let mut rec = ByteRecord::new();
1043         rec.push_field(b"a\xFF");
1044 
1045         let err = StringRecord::from_byte_record(rec).unwrap_err();
1046         assert_eq!(err.utf8_error().field(), 0);
1047         assert_eq!(err.utf8_error().valid_up_to(), 1);
1048     }
1049 
1050     #[test]
utf8_error_4()1051     fn utf8_error_4() {
1052         let mut rec = ByteRecord::new();
1053         rec.push_field(b"a");
1054         rec.push_field(b"b");
1055         rec.push_field(b"c");
1056         rec.push_field(b"d");
1057         rec.push_field(b"xyz\xFF");
1058 
1059         let err = StringRecord::from_byte_record(rec).unwrap_err();
1060         assert_eq!(err.utf8_error().field(), 4);
1061         assert_eq!(err.utf8_error().valid_up_to(), 3);
1062     }
1063 
1064     #[test]
utf8_error_5()1065     fn utf8_error_5() {
1066         let mut rec = ByteRecord::new();
1067         rec.push_field(b"a");
1068         rec.push_field(b"b");
1069         rec.push_field(b"c");
1070         rec.push_field(b"d");
1071         rec.push_field(b"\xFFxyz");
1072 
1073         let err = StringRecord::from_byte_record(rec).unwrap_err();
1074         assert_eq!(err.utf8_error().field(), 4);
1075         assert_eq!(err.utf8_error().valid_up_to(), 0);
1076     }
1077 
1078     // This tests a tricky case where a single field on its own isn't valid
1079     // UTF-8, but the concatenation of all fields is.
1080     #[test]
utf8_error_6()1081     fn utf8_error_6() {
1082         let mut rec = ByteRecord::new();
1083         rec.push_field(b"a\xc9");
1084         rec.push_field(b"\x91b");
1085 
1086         let err = StringRecord::from_byte_record(rec).unwrap_err();
1087         assert_eq!(err.utf8_error().field(), 0);
1088         assert_eq!(err.utf8_error().valid_up_to(), 1);
1089     }
1090 
1091     // This tests that we can always clear a `ByteRecord` and get a guaranteed
1092     // successful conversion to UTF-8. This permits reusing the allocation.
1093     #[test]
utf8_clear_ok()1094     fn utf8_clear_ok() {
1095         let mut rec = ByteRecord::new();
1096         rec.push_field(b"\xFF");
1097         assert!(StringRecord::from_byte_record(rec).is_err());
1098 
1099         let mut rec = ByteRecord::new();
1100         rec.push_field(b"\xFF");
1101         rec.clear();
1102         assert!(StringRecord::from_byte_record(rec).is_ok());
1103     }
1104 
1105     #[test]
iter()1106     fn iter() {
1107         let data = vec!["foo", "bar", "baz", "quux", "wat"];
1108         let rec = ByteRecord::from(&*data);
1109         let got: Vec<&str> =
1110             rec.iter().map(|x| ::std::str::from_utf8(x).unwrap()).collect();
1111         assert_eq!(data, got);
1112     }
1113 
1114     #[test]
iter_reverse()1115     fn iter_reverse() {
1116         let mut data = vec!["foo", "bar", "baz", "quux", "wat"];
1117         let rec = ByteRecord::from(&*data);
1118         let got: Vec<&str> = rec
1119             .iter()
1120             .rev()
1121             .map(|x| ::std::str::from_utf8(x).unwrap())
1122             .collect();
1123         data.reverse();
1124         assert_eq!(data, got);
1125     }
1126 
1127     #[test]
iter_forward_and_reverse()1128     fn iter_forward_and_reverse() {
1129         let data = vec!["foo", "bar", "baz", "quux", "wat"];
1130         let rec = ByteRecord::from(data);
1131         let mut it = rec.iter();
1132 
1133         assert_eq!(it.next_back(), Some(b("wat")));
1134         assert_eq!(it.next(), Some(b("foo")));
1135         assert_eq!(it.next(), Some(b("bar")));
1136         assert_eq!(it.next_back(), Some(b("quux")));
1137         assert_eq!(it.next(), Some(b("baz")));
1138         assert_eq!(it.next_back(), None);
1139         assert_eq!(it.next(), None);
1140     }
1141 
1142     // Check that record equality respects field boundaries.
1143     //
1144     // Regression test for #138.
1145     #[test]
eq_field_boundaries()1146     fn eq_field_boundaries() {
1147         let test1 = ByteRecord::from(vec!["12", "34"]);
1148         let test2 = ByteRecord::from(vec!["123", "4"]);
1149 
1150         assert_ne!(test1, test2);
1151     }
1152 
1153     // Check that record equality respects number of fields.
1154     //
1155     // Regression test for #138.
1156     #[test]
eq_record_len()1157     fn eq_record_len() {
1158         let test1 = ByteRecord::from(vec!["12", "34", "56"]);
1159         let test2 = ByteRecord::from(vec!["12", "34"]);
1160         assert_ne!(test1, test2);
1161     }
1162 }
1163