1 use std::cmp;
2 use std::fmt;
3 use std::iter::FromIterator;
4 use std::ops::{self, Range};
5 use std::result;
6 
7 use bstr::{BString, ByteSlice};
8 use serde::de::Deserialize;
9 
10 use crate::deserializer::deserialize_byte_record;
11 use crate::error::{new_utf8_error, Result, Utf8Error};
12 use crate::string_record::StringRecord;
13 
14 /// A single CSV record stored as raw bytes.
15 ///
16 /// A byte record permits reading or writing CSV rows that are not UTF-8.
17 /// In general, you should prefer using a
18 /// [`StringRecord`](struct.StringRecord.html)
19 /// since it is more ergonomic, but a `ByteRecord` is provided in case you need
20 /// it.
21 ///
22 /// If you are using the Serde (de)serialization APIs, then you probably never
23 /// need to interact with a `ByteRecord` or a `StringRecord`. However, there
24 /// are some circumstances in which you might need to use a raw record type
25 /// while still using Serde. For example, if you need to deserialize possibly
26 /// invalid UTF-8 fields, then you'll need to first read your record into a
27 /// `ByteRecord`, and then use `ByteRecord::deserialize` to run Serde. Another
28 /// reason for using the raw record deserialization APIs is if you're using
29 /// Serde to read into borrowed data such as a `&'a str` or a `&'a [u8]`.
30 ///
31 /// Two `ByteRecord`s are compared on the basis of their field data. Any
32 /// position information associated with the records is ignored.
33 #[derive(Clone, Eq)]
34 pub struct ByteRecord(Box<ByteRecordInner>);
35 
36 impl PartialEq for ByteRecord {
eq(&self, other: &ByteRecord) -> bool37     fn eq(&self, other: &ByteRecord) -> bool {
38         if self.len() != other.len() {
39             return false;
40         }
41         self.iter().zip(other.iter()).all(|e| e.0 == e.1)
42     }
43 }
44 
45 impl<T: AsRef<[u8]>> PartialEq<Vec<T>> for ByteRecord {
eq(&self, other: &Vec<T>) -> bool46     fn eq(&self, other: &Vec<T>) -> bool {
47         self.iter_eq(other)
48     }
49 }
50 
51 impl<'a, T: AsRef<[u8]>> PartialEq<Vec<T>> for &'a ByteRecord {
eq(&self, other: &Vec<T>) -> bool52     fn eq(&self, other: &Vec<T>) -> bool {
53         self.iter_eq(other)
54     }
55 }
56 
57 impl<T: AsRef<[u8]>> PartialEq<[T]> for ByteRecord {
eq(&self, other: &[T]) -> bool58     fn eq(&self, other: &[T]) -> bool {
59         self.iter_eq(other)
60     }
61 }
62 
63 impl<'a, T: AsRef<[u8]>> PartialEq<[T]> for &'a ByteRecord {
eq(&self, other: &[T]) -> bool64     fn eq(&self, other: &[T]) -> bool {
65         self.iter_eq(other)
66     }
67 }
68 
69 impl fmt::Debug for ByteRecord {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result70     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
71         let mut fields = vec![];
72         for field in self {
73             fields.push(BString::from(field.to_vec()));
74         }
75         write!(f, "ByteRecord({:?})", fields)
76     }
77 }
78 
79 /// The inner portion of a byte record.
80 ///
81 /// We use this memory layout so that moving a `ByteRecord` only requires
82 /// moving a single pointer. The optimization is dubious at best, but does
83 /// seem to result in slightly better numbers in microbenchmarks. Methinks this
84 /// may heavily depend on the underlying allocator.
85 #[derive(Clone, Debug, Eq, PartialEq)]
86 struct ByteRecordInner {
87     /// The position of this byte record.
88     pos: Option<Position>,
89     /// All fields in this record, stored contiguously.
90     fields: Vec<u8>,
91     /// The number of and location of each field in this record.
92     bounds: Bounds,
93 }
94 
95 impl Default for ByteRecord {
96     #[inline]
default() -> ByteRecord97     fn default() -> ByteRecord {
98         ByteRecord::new()
99     }
100 }
101 
102 impl ByteRecord {
103     /// Create a new empty `ByteRecord`.
104     ///
105     /// Note that you may find the `ByteRecord::from` constructor more
106     /// convenient, which is provided by an impl on the `From` trait.
107     ///
108     /// # Example: create an empty record
109     ///
110     /// ```
111     /// use csv::ByteRecord;
112     ///
113     /// let record = ByteRecord::new();
114     /// assert_eq!(record.len(), 0);
115     /// ```
116     ///
117     /// # Example: initialize a record from a `Vec`
118     ///
119     /// ```
120     /// use csv::ByteRecord;
121     ///
122     /// let record = ByteRecord::from(vec!["a", "b", "c"]);
123     /// assert_eq!(record.len(), 3);
124     /// ```
125     #[inline]
new() -> ByteRecord126     pub fn new() -> ByteRecord {
127         ByteRecord::with_capacity(0, 0)
128     }
129 
130     /// Create a new empty `ByteRecord` with the given capacity settings.
131     ///
132     /// `buffer` refers to the capacity of the buffer used to store the
133     /// actual row contents. `fields` refers to the number of fields one
134     /// might expect to store.
135     #[inline]
with_capacity(buffer: usize, fields: usize) -> ByteRecord136     pub fn with_capacity(buffer: usize, fields: usize) -> ByteRecord {
137         ByteRecord(Box::new(ByteRecordInner {
138             pos: None,
139             fields: vec![0; buffer],
140             bounds: Bounds::with_capacity(fields),
141         }))
142     }
143 
144     /// Deserialize this record.
145     ///
146     /// The `D` type parameter refers to the type that this record should be
147     /// deserialized into. The `'de` lifetime refers to the lifetime of the
148     /// `ByteRecord`. The `'de` lifetime permits deserializing into structs
149     /// that borrow field data from this record.
150     ///
151     /// An optional `headers` parameter permits deserializing into a struct
152     /// based on its field names (corresponding to header values) rather than
153     /// the order in which the fields are defined.
154     ///
155     /// # Example: without headers
156     ///
157     /// This shows how to deserialize a single row into a struct based on the
158     /// order in which fields occur. This example also shows how to borrow
159     /// fields from the `ByteRecord`, which results in zero allocation
160     /// deserialization.
161     ///
162     /// ```
163     /// use std::error::Error;
164     ///
165     /// use csv::ByteRecord;
166     /// use serde::Deserialize;
167     ///
168     /// #[derive(Deserialize)]
169     /// struct Row<'a> {
170     ///     city: &'a str,
171     ///     country: &'a str,
172     ///     population: u64,
173     /// }
174     ///
175     /// # fn main() { example().unwrap() }
176     /// fn example() -> Result<(), Box<dyn Error>> {
177     ///     let record = ByteRecord::from(vec![
178     ///         "Boston", "United States", "4628910",
179     ///     ]);
180     ///
181     ///     let row: Row = record.deserialize(None)?;
182     ///     assert_eq!(row.city, "Boston");
183     ///     assert_eq!(row.country, "United States");
184     ///     assert_eq!(row.population, 4628910);
185     ///     Ok(())
186     /// }
187     /// ```
188     ///
189     /// # Example: with headers
190     ///
191     /// This example is like the previous one, but shows how to deserialize
192     /// into a struct based on the struct's field names. For this to work,
193     /// you must provide a header row.
194     ///
195     /// This example also shows that you can deserialize into owned data
196     /// types (e.g., `String`) instead of borrowed data types (e.g., `&str`).
197     ///
198     /// ```
199     /// use std::error::Error;
200     ///
201     /// use csv::ByteRecord;
202     /// use serde::Deserialize;
203     ///
204     /// #[derive(Deserialize)]
205     /// struct Row {
206     ///     city: String,
207     ///     country: String,
208     ///     population: u64,
209     /// }
210     ///
211     /// # fn main() { example().unwrap() }
212     /// fn example() -> Result<(), Box<dyn Error>> {
213     ///     // Notice that the fields are not in the same order
214     ///     // as the fields in the struct!
215     ///     let header = ByteRecord::from(vec![
216     ///         "country", "city", "population",
217     ///     ]);
218     ///     let record = ByteRecord::from(vec![
219     ///         "United States", "Boston", "4628910",
220     ///     ]);
221     ///
222     ///     let row: Row = record.deserialize(Some(&header))?;
223     ///     assert_eq!(row.city, "Boston");
224     ///     assert_eq!(row.country, "United States");
225     ///     assert_eq!(row.population, 4628910);
226     ///     Ok(())
227     /// }
228     /// ```
deserialize<'de, D: Deserialize<'de>>( &'de self, headers: Option<&'de ByteRecord>, ) -> Result<D>229     pub fn deserialize<'de, D: Deserialize<'de>>(
230         &'de self,
231         headers: Option<&'de ByteRecord>,
232     ) -> Result<D> {
233         deserialize_byte_record(self, headers)
234     }
235 
236     /// Returns an iterator over all fields in this record.
237     ///
238     /// # Example
239     ///
240     /// This example shows how to iterate over each field in a `ByteRecord`.
241     ///
242     /// ```
243     /// use csv::ByteRecord;
244     ///
245     /// let record = ByteRecord::from(vec!["a", "b", "c"]);
246     /// for field in record.iter() {
247     ///     assert!(field == b"a" || field == b"b" || field == b"c");
248     /// }
249     /// ```
250     #[inline]
iter(&self) -> ByteRecordIter251     pub fn iter(&self) -> ByteRecordIter {
252         self.into_iter()
253     }
254 
255     /// Return the field at index `i`.
256     ///
257     /// If no field at index `i` exists, then this returns `None`.
258     ///
259     /// # Example
260     ///
261     /// ```
262     /// use csv::ByteRecord;
263     ///
264     /// let record = ByteRecord::from(vec!["a", "b", "c"]);
265     /// assert_eq!(record.get(1), Some(&b"b"[..]));
266     /// assert_eq!(record.get(3), None);
267     /// ```
268     #[inline]
get(&self, i: usize) -> Option<&[u8]>269     pub fn get(&self, i: usize) -> Option<&[u8]> {
270         self.0.bounds.get(i).map(|range| &self.0.fields[range])
271     }
272 
273     /// Returns true if and only if this record is empty.
274     ///
275     /// # Example
276     ///
277     /// ```
278     /// use csv::ByteRecord;
279     ///
280     /// assert!(ByteRecord::new().is_empty());
281     /// ```
282     #[inline]
is_empty(&self) -> bool283     pub fn is_empty(&self) -> bool {
284         self.len() == 0
285     }
286 
287     /// Returns the number of fields in this record.
288     ///
289     /// # Example
290     ///
291     /// ```
292     /// use csv::ByteRecord;
293     ///
294     /// let record = ByteRecord::from(vec!["a", "b", "c"]);
295     /// assert_eq!(record.len(), 3);
296     /// ```
297     #[inline]
len(&self) -> usize298     pub fn len(&self) -> usize {
299         self.0.bounds.len()
300     }
301 
302     /// Truncate this record to `n` fields.
303     ///
304     /// If `n` is greater than the number of fields in this record, then this
305     /// has no effect.
306     ///
307     /// # Example
308     ///
309     /// ```
310     /// use csv::ByteRecord;
311     ///
312     /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
313     /// assert_eq!(record.len(), 3);
314     /// record.truncate(1);
315     /// assert_eq!(record.len(), 1);
316     /// assert_eq!(record, vec!["a"]);
317     /// ```
318     #[inline]
truncate(&mut self, n: usize)319     pub fn truncate(&mut self, n: usize) {
320         if n <= self.len() {
321             self.0.bounds.len = n;
322         }
323     }
324 
325     /// Clear this record so that it has zero fields.
326     ///
327     /// This is equivalent to calling `truncate(0)`.
328     ///
329     /// Note that it is not necessary to clear the record to reuse it with
330     /// the CSV reader.
331     ///
332     /// # Example
333     ///
334     /// ```
335     /// use csv::ByteRecord;
336     ///
337     /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
338     /// assert_eq!(record.len(), 3);
339     /// record.clear();
340     /// assert_eq!(record.len(), 0);
341     /// ```
342     #[inline]
clear(&mut self)343     pub fn clear(&mut self) {
344         self.truncate(0);
345     }
346 
347     /// Trim the fields of this record so that leading and trailing whitespace
348     /// is removed.
349     ///
350     /// This method uses the ASCII definition of whitespace. That is, only
351     /// bytes in the class `[\t\n\v\f\r ]` are trimmed.
352     ///
353     /// # Example
354     ///
355     /// ```
356     /// use csv::ByteRecord;
357     ///
358     /// let mut record = ByteRecord::from(vec![
359     ///     "  ", "\tfoo", "bar  ", "b a z",
360     /// ]);
361     /// record.trim();
362     /// assert_eq!(record, vec!["", "foo", "bar", "b a z"]);
363     /// ```
trim(&mut self)364     pub fn trim(&mut self) {
365         let length = self.len();
366         if length == 0 {
367             return;
368         }
369         // TODO: We could likely do this in place, but for now, we allocate.
370         let mut trimmed =
371             ByteRecord::with_capacity(self.as_slice().len(), self.len());
372         trimmed.set_position(self.position().cloned());
373         for field in &*self {
374             trimmed.push_field(field.trim());
375         }
376         *self = trimmed;
377     }
378 
379     /// Add a new field to this record.
380     ///
381     /// # Example
382     ///
383     /// ```
384     /// use csv::ByteRecord;
385     ///
386     /// let mut record = ByteRecord::new();
387     /// record.push_field(b"foo");
388     /// assert_eq!(&record[0], b"foo");
389     /// ```
390     #[inline]
push_field(&mut self, field: &[u8])391     pub fn push_field(&mut self, field: &[u8]) {
392         let (s, e) = (self.0.bounds.end(), self.0.bounds.end() + field.len());
393         while e > self.0.fields.len() {
394             self.expand_fields();
395         }
396         self.0.fields[s..e].copy_from_slice(field);
397         self.0.bounds.add(e);
398     }
399 
400     /// Return the position of this record, if available.
401     ///
402     /// # Example
403     ///
404     /// ```
405     /// use std::error::Error;
406     ///
407     /// use csv::{ByteRecord, ReaderBuilder};
408     ///
409     /// # fn main() { example().unwrap(); }
410     /// fn example() -> Result<(), Box<dyn Error>> {
411     ///     let mut record = ByteRecord::new();
412     ///     let mut rdr = ReaderBuilder::new()
413     ///         .has_headers(false)
414     ///         .from_reader("a,b,c\nx,y,z".as_bytes());
415     ///
416     ///     assert!(rdr.read_byte_record(&mut record)?);
417     ///     {
418     ///         let pos = record.position().expect("a record position");
419     ///         assert_eq!(pos.byte(), 0);
420     ///         assert_eq!(pos.line(), 1);
421     ///         assert_eq!(pos.record(), 0);
422     ///     }
423     ///
424     ///     assert!(rdr.read_byte_record(&mut record)?);
425     ///     {
426     ///         let pos = record.position().expect("a record position");
427     ///         assert_eq!(pos.byte(), 6);
428     ///         assert_eq!(pos.line(), 2);
429     ///         assert_eq!(pos.record(), 1);
430     ///     }
431     ///
432     ///     // Finish the CSV reader for good measure.
433     ///     assert!(!rdr.read_byte_record(&mut record)?);
434     ///     Ok(())
435     /// }
436     /// ```
437     #[inline]
position(&self) -> Option<&Position>438     pub fn position(&self) -> Option<&Position> {
439         self.0.pos.as_ref()
440     }
441 
442     /// Set the position of this record.
443     ///
444     /// # Example
445     ///
446     /// ```
447     /// use csv::{ByteRecord, Position};
448     ///
449     /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
450     /// let mut pos = Position::new();
451     /// pos.set_byte(100);
452     /// pos.set_line(4);
453     /// pos.set_record(2);
454     ///
455     /// record.set_position(Some(pos.clone()));
456     /// assert_eq!(record.position(), Some(&pos));
457     /// ```
458     #[inline]
set_position(&mut self, pos: Option<Position>)459     pub fn set_position(&mut self, pos: Option<Position>) {
460         self.0.pos = pos;
461     }
462 
463     /// Return the start and end position of a field in this record.
464     ///
465     /// If no such field exists at the given index, then return `None`.
466     ///
467     /// The range returned can be used with the slice returned by `as_slice`.
468     ///
469     /// # Example
470     ///
471     /// ```
472     /// use csv::ByteRecord;
473     ///
474     /// let record = ByteRecord::from(vec!["foo", "quux", "z"]);
475     /// let range = record.range(1).expect("a record range");
476     /// assert_eq!(&record.as_slice()[range], &b"quux"[..]);
477     /// ```
478     #[inline]
range(&self, i: usize) -> Option<Range<usize>>479     pub fn range(&self, i: usize) -> Option<Range<usize>> {
480         self.0.bounds.get(i)
481     }
482 
483     /// Return the entire row as a single byte slice. The slice returned stores
484     /// all fields contiguously. The boundaries of each field can be determined
485     /// via the `range` method.
486     ///
487     /// # Example
488     ///
489     /// ```
490     /// use csv::ByteRecord;
491     ///
492     /// let record = ByteRecord::from(vec!["foo", "quux", "z"]);
493     /// assert_eq!(record.as_slice(), &b"fooquuxz"[..]);
494     /// ```
495     #[inline]
as_slice(&self) -> &[u8]496     pub fn as_slice(&self) -> &[u8] {
497         &self.0.fields[..self.0.bounds.end()]
498     }
499 
500     /// Retrieve the underlying parts of a byte record.
501     #[inline]
as_parts(&mut self) -> (&mut Vec<u8>, &mut Vec<usize>)502     pub(crate) fn as_parts(&mut self) -> (&mut Vec<u8>, &mut Vec<usize>) {
503         let inner = &mut *self.0;
504         (&mut inner.fields, &mut inner.bounds.ends)
505     }
506 
507     /// Set the number of fields in the given record record.
508     #[inline]
set_len(&mut self, len: usize)509     pub(crate) fn set_len(&mut self, len: usize) {
510         self.0.bounds.len = len;
511     }
512 
513     /// Expand the capacity for storing fields.
514     #[inline]
expand_fields(&mut self)515     pub(crate) fn expand_fields(&mut self) {
516         let new_len = self.0.fields.len().checked_mul(2).unwrap();
517         self.0.fields.resize(cmp::max(4, new_len), 0);
518     }
519 
520     /// Expand the capacity for storing field ending positions.
521     #[inline]
expand_ends(&mut self)522     pub(crate) fn expand_ends(&mut self) {
523         self.0.bounds.expand();
524     }
525 
526     /// Validate the given record as UTF-8.
527     ///
528     /// If it's not UTF-8, return an error.
529     #[inline]
validate(&self) -> result::Result<(), Utf8Error>530     pub(crate) fn validate(&self) -> result::Result<(), Utf8Error> {
531         // If the entire buffer is ASCII, then we have nothing to fear.
532         if self.0.fields[..self.0.bounds.end()].is_ascii() {
533             return Ok(());
534         }
535         // Otherwise, we must check each field individually to ensure that
536         // it's valid UTF-8.
537         for (i, field) in self.iter().enumerate() {
538             if let Err(err) = field.to_str() {
539                 return Err(new_utf8_error(i, err.valid_up_to()));
540             }
541         }
542         Ok(())
543     }
544 
545     /// Compare the given byte record with the iterator of fields for equality.
iter_eq<I, T>(&self, other: I) -> bool where I: IntoIterator<Item = T>, T: AsRef<[u8]>,546     pub(crate) fn iter_eq<I, T>(&self, other: I) -> bool
547     where
548         I: IntoIterator<Item = T>,
549         T: AsRef<[u8]>,
550     {
551         let mut it_record = self.iter();
552         let mut it_other = other.into_iter();
553         loop {
554             match (it_record.next(), it_other.next()) {
555                 (None, None) => return true,
556                 (None, Some(_)) | (Some(_), None) => return false,
557                 (Some(x), Some(y)) => {
558                     if x != y.as_ref() {
559                         return false;
560                     }
561                 }
562             }
563         }
564     }
565 }
566 
567 /// A position in CSV data.
568 ///
569 /// A position is used to report errors in CSV data. All positions include the
570 /// byte offset, line number and record index at which the error occurred.
571 ///
572 /// Byte offsets and record indices start at `0`. Line numbers start at `1`.
573 ///
574 /// A CSV reader will automatically assign the position of each record.
575 #[derive(Clone, Debug, Eq, PartialEq)]
576 pub struct Position {
577     byte: u64,
578     line: u64,
579     record: u64,
580 }
581 
582 impl Position {
583     /// Returns a new position initialized to the start value.
584     #[inline]
new() -> Position585     pub fn new() -> Position {
586         Position { byte: 0, line: 1, record: 0 }
587     }
588 
589     /// The byte offset, starting at `0`, of this position.
590     #[inline]
byte(&self) -> u64591     pub fn byte(&self) -> u64 {
592         self.byte
593     }
594     /// The line number, starting at `1`, of this position.
595     #[inline]
line(&self) -> u64596     pub fn line(&self) -> u64 {
597         self.line
598     }
599     /// The record index, starting with the first record at `0`.
600     #[inline]
record(&self) -> u64601     pub fn record(&self) -> u64 {
602         self.record
603     }
604 
605     /// Set the byte offset of this position.
606     #[inline]
set_byte(&mut self, byte: u64) -> &mut Position607     pub fn set_byte(&mut self, byte: u64) -> &mut Position {
608         self.byte = byte;
609         self
610     }
611 
612     /// Set the line number of this position.
613     ///
614     /// If the line number is less than `1`, then this method panics.
615     #[inline]
set_line(&mut self, line: u64) -> &mut Position616     pub fn set_line(&mut self, line: u64) -> &mut Position {
617         assert!(line > 0);
618         self.line = line;
619         self
620     }
621 
622     /// Set the record index of this position.
623     #[inline]
set_record(&mut self, record: u64) -> &mut Position624     pub fn set_record(&mut self, record: u64) -> &mut Position {
625         self.record = record;
626         self
627     }
628 }
629 
630 /// The bounds of fields in a single record.
631 #[derive(Clone, Debug, Eq, PartialEq)]
632 struct Bounds {
633     /// The ending index of each field.
634     ends: Vec<usize>,
635     /// The number of fields in this record.
636     ///
637     /// Technically, we could drop this field and maintain an invariant that
638     /// `ends.len()` is always the number of fields, but doing that efficiently
639     /// requires attention to safety. We play it safe at essentially no cost.
640     len: usize,
641 }
642 
643 impl Default for Bounds {
644     #[inline]
default() -> Bounds645     fn default() -> Bounds {
646         Bounds::with_capacity(0)
647     }
648 }
649 
650 impl Bounds {
651     /// Create a new set of bounds with the given capacity for storing the
652     /// ends of fields.
653     #[inline]
with_capacity(capacity: usize) -> Bounds654     fn with_capacity(capacity: usize) -> Bounds {
655         Bounds { ends: vec![0; capacity], len: 0 }
656     }
657 
658     /// Returns the bounds of field `i`.
659     #[inline]
get(&self, i: usize) -> Option<Range<usize>>660     fn get(&self, i: usize) -> Option<Range<usize>> {
661         if i >= self.len {
662             return None;
663         }
664         let end = match self.ends.get(i) {
665             None => return None,
666             Some(&end) => end,
667         };
668         let start = match i.checked_sub(1).and_then(|i| self.ends.get(i)) {
669             None => 0,
670             Some(&start) => start,
671         };
672         Some(ops::Range { start: start, end: end })
673     }
674 
675     /// Returns a slice of ending positions of all fields.
676     #[inline]
ends(&self) -> &[usize]677     fn ends(&self) -> &[usize] {
678         &self.ends[..self.len]
679     }
680 
681     /// Return the last position of the last field.
682     ///
683     /// If there are no fields, this returns `0`.
684     #[inline]
end(&self) -> usize685     fn end(&self) -> usize {
686         self.ends().last().map(|&i| i).unwrap_or(0)
687     }
688 
689     /// Returns the number of fields in these bounds.
690     #[inline]
len(&self) -> usize691     fn len(&self) -> usize {
692         self.len
693     }
694 
695     /// Expand the capacity for storing field ending positions.
696     #[inline]
expand(&mut self)697     fn expand(&mut self) {
698         let new_len = self.ends.len().checked_mul(2).unwrap();
699         self.ends.resize(cmp::max(4, new_len), 0);
700     }
701 
702     /// Add a new field with the given ending position.
703     #[inline]
add(&mut self, pos: usize)704     fn add(&mut self, pos: usize) {
705         if self.len >= self.ends.len() {
706             self.expand();
707         }
708         self.ends[self.len] = pos;
709         self.len += 1;
710     }
711 }
712 
713 impl ops::Index<usize> for ByteRecord {
714     type Output = [u8];
715     #[inline]
index(&self, i: usize) -> &[u8]716     fn index(&self, i: usize) -> &[u8] {
717         self.get(i).unwrap()
718     }
719 }
720 
721 impl From<StringRecord> for ByteRecord {
722     #[inline]
from(record: StringRecord) -> ByteRecord723     fn from(record: StringRecord) -> ByteRecord {
724         record.into_byte_record()
725     }
726 }
727 
728 impl<T: AsRef<[u8]>> From<Vec<T>> for ByteRecord {
729     #[inline]
from(xs: Vec<T>) -> ByteRecord730     fn from(xs: Vec<T>) -> ByteRecord {
731         ByteRecord::from_iter(&xs)
732     }
733 }
734 
735 impl<'a, T: AsRef<[u8]>> From<&'a [T]> for ByteRecord {
736     #[inline]
from(xs: &'a [T]) -> ByteRecord737     fn from(xs: &'a [T]) -> ByteRecord {
738         ByteRecord::from_iter(xs)
739     }
740 }
741 
742 impl<T: AsRef<[u8]>> FromIterator<T> for ByteRecord {
743     #[inline]
from_iter<I: IntoIterator<Item = T>>(iter: I) -> ByteRecord744     fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> ByteRecord {
745         let mut record = ByteRecord::new();
746         record.extend(iter);
747         record
748     }
749 }
750 
751 impl<T: AsRef<[u8]>> Extend<T> for ByteRecord {
752     #[inline]
extend<I: IntoIterator<Item = T>>(&mut self, iter: I)753     fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
754         for x in iter {
755             self.push_field(x.as_ref());
756         }
757     }
758 }
759 
760 /// A double-ended iterator over the fields in a byte record.
761 ///
762 /// The `'r` lifetime variable refers to the lifetime of the `ByteRecord` that
763 /// is being iterated over.
764 pub struct ByteRecordIter<'r> {
765     /// The record we are iterating over.
766     r: &'r ByteRecord,
767     /// The starting index of the previous field. (For reverse iteration.)
768     last_start: usize,
769     /// The ending index of the previous field. (For forward iteration.)
770     last_end: usize,
771     /// The index of forward iteration.
772     i_forward: usize,
773     /// The index of reverse iteration.
774     i_reverse: usize,
775 }
776 
777 impl<'r> IntoIterator for &'r ByteRecord {
778     type IntoIter = ByteRecordIter<'r>;
779     type Item = &'r [u8];
780 
781     #[inline]
into_iter(self) -> ByteRecordIter<'r>782     fn into_iter(self) -> ByteRecordIter<'r> {
783         ByteRecordIter {
784             r: self,
785             last_start: self.as_slice().len(),
786             last_end: 0,
787             i_forward: 0,
788             i_reverse: self.len(),
789         }
790     }
791 }
792 
793 impl<'r> ExactSizeIterator for ByteRecordIter<'r> {}
794 
795 impl<'r> Iterator for ByteRecordIter<'r> {
796     type Item = &'r [u8];
797 
798     #[inline]
next(&mut self) -> Option<&'r [u8]>799     fn next(&mut self) -> Option<&'r [u8]> {
800         if self.i_forward == self.i_reverse {
801             None
802         } else {
803             let start = self.last_end;
804             let end = self.r.0.bounds.ends()[self.i_forward];
805             self.i_forward += 1;
806             self.last_end = end;
807             Some(&self.r.0.fields[start..end])
808         }
809     }
810 
811     #[inline]
size_hint(&self) -> (usize, Option<usize>)812     fn size_hint(&self) -> (usize, Option<usize>) {
813         let x = self.i_reverse - self.i_forward;
814         (x, Some(x))
815     }
816 
817     #[inline]
count(self) -> usize818     fn count(self) -> usize {
819         self.len()
820     }
821 }
822 
823 impl<'r> DoubleEndedIterator for ByteRecordIter<'r> {
824     #[inline]
next_back(&mut self) -> Option<&'r [u8]>825     fn next_back(&mut self) -> Option<&'r [u8]> {
826         if self.i_forward == self.i_reverse {
827             None
828         } else {
829             self.i_reverse -= 1;
830             let start = self
831                 .i_reverse
832                 .checked_sub(1)
833                 .map(|i| self.r.0.bounds.ends()[i])
834                 .unwrap_or(0);
835             let end = self.last_start;
836             self.last_start = start;
837             Some(&self.r.0.fields[start..end])
838         }
839     }
840 }
841 
842 #[cfg(test)]
843 mod tests {
844     use crate::string_record::StringRecord;
845 
846     use super::ByteRecord;
847 
b(s: &str) -> &[u8]848     fn b(s: &str) -> &[u8] {
849         s.as_bytes()
850     }
851 
852     #[test]
record_1()853     fn record_1() {
854         let mut rec = ByteRecord::new();
855         rec.push_field(b"foo");
856 
857         assert_eq!(rec.len(), 1);
858         assert_eq!(rec.get(0), Some(b("foo")));
859         assert_eq!(rec.get(1), None);
860         assert_eq!(rec.get(2), None);
861     }
862 
863     #[test]
record_2()864     fn record_2() {
865         let mut rec = ByteRecord::new();
866         rec.push_field(b"foo");
867         rec.push_field(b"quux");
868 
869         assert_eq!(rec.len(), 2);
870         assert_eq!(rec.get(0), Some(b("foo")));
871         assert_eq!(rec.get(1), Some(b("quux")));
872         assert_eq!(rec.get(2), None);
873         assert_eq!(rec.get(3), None);
874     }
875 
876     #[test]
empty_record()877     fn empty_record() {
878         let rec = ByteRecord::new();
879 
880         assert_eq!(rec.len(), 0);
881         assert_eq!(rec.get(0), None);
882         assert_eq!(rec.get(1), None);
883     }
884 
885     #[test]
trim_whitespace_only()886     fn trim_whitespace_only() {
887         let mut rec = ByteRecord::from(vec![b" \t\n\r\x0c"]);
888         rec.trim();
889         assert_eq!(rec.get(0), Some(b("")));
890     }
891 
892     #[test]
trim_front()893     fn trim_front() {
894         let mut rec = ByteRecord::from(vec![b" abc"]);
895         rec.trim();
896         assert_eq!(rec.get(0), Some(b("abc")));
897 
898         let mut rec = ByteRecord::from(vec![b(" abc"), b("  xyz")]);
899         rec.trim();
900         assert_eq!(rec.get(0), Some(b("abc")));
901         assert_eq!(rec.get(1), Some(b("xyz")));
902     }
903 
904     #[test]
trim_back()905     fn trim_back() {
906         let mut rec = ByteRecord::from(vec![b"abc "]);
907         rec.trim();
908         assert_eq!(rec.get(0), Some(b("abc")));
909 
910         let mut rec = ByteRecord::from(vec![b("abc "), b("xyz  ")]);
911         rec.trim();
912         assert_eq!(rec.get(0), Some(b("abc")));
913         assert_eq!(rec.get(1), Some(b("xyz")));
914     }
915 
916     #[test]
trim_both()917     fn trim_both() {
918         let mut rec = ByteRecord::from(vec![b" abc "]);
919         rec.trim();
920         assert_eq!(rec.get(0), Some(b("abc")));
921 
922         let mut rec = ByteRecord::from(vec![b(" abc "), b("  xyz  ")]);
923         rec.trim();
924         assert_eq!(rec.get(0), Some(b("abc")));
925         assert_eq!(rec.get(1), Some(b("xyz")));
926     }
927 
928     #[test]
trim_does_not_panic_on_empty_records_1()929     fn trim_does_not_panic_on_empty_records_1() {
930         let mut rec = ByteRecord::from(vec![b""]);
931         rec.trim();
932         assert_eq!(rec.get(0), Some(b("")));
933     }
934 
935     #[test]
trim_does_not_panic_on_empty_records_2()936     fn trim_does_not_panic_on_empty_records_2() {
937         let mut rec = ByteRecord::from(vec![b"", b""]);
938         rec.trim();
939         assert_eq!(rec.get(0), Some(b("")));
940         assert_eq!(rec.get(1), Some(b("")));
941     }
942 
943     #[test]
trim_does_not_panic_on_empty_records_3()944     fn trim_does_not_panic_on_empty_records_3() {
945         let mut rec = ByteRecord::new();
946         rec.trim();
947         assert_eq!(rec.as_slice().len(), 0);
948     }
949 
950     #[test]
empty_field_1()951     fn empty_field_1() {
952         let mut rec = ByteRecord::new();
953         rec.push_field(b"");
954 
955         assert_eq!(rec.len(), 1);
956         assert_eq!(rec.get(0), Some(b("")));
957         assert_eq!(rec.get(1), None);
958         assert_eq!(rec.get(2), None);
959     }
960 
961     #[test]
empty_field_2()962     fn empty_field_2() {
963         let mut rec = ByteRecord::new();
964         rec.push_field(b"");
965         rec.push_field(b"");
966 
967         assert_eq!(rec.len(), 2);
968         assert_eq!(rec.get(0), Some(b("")));
969         assert_eq!(rec.get(1), Some(b("")));
970         assert_eq!(rec.get(2), None);
971         assert_eq!(rec.get(3), None);
972     }
973 
974     #[test]
empty_surround_1()975     fn empty_surround_1() {
976         let mut rec = ByteRecord::new();
977         rec.push_field(b"foo");
978         rec.push_field(b"");
979         rec.push_field(b"quux");
980 
981         assert_eq!(rec.len(), 3);
982         assert_eq!(rec.get(0), Some(b("foo")));
983         assert_eq!(rec.get(1), Some(b("")));
984         assert_eq!(rec.get(2), Some(b("quux")));
985         assert_eq!(rec.get(3), None);
986         assert_eq!(rec.get(4), None);
987     }
988 
989     #[test]
empty_surround_2()990     fn empty_surround_2() {
991         let mut rec = ByteRecord::new();
992         rec.push_field(b"foo");
993         rec.push_field(b"");
994         rec.push_field(b"quux");
995         rec.push_field(b"");
996 
997         assert_eq!(rec.len(), 4);
998         assert_eq!(rec.get(0), Some(b("foo")));
999         assert_eq!(rec.get(1), Some(b("")));
1000         assert_eq!(rec.get(2), Some(b("quux")));
1001         assert_eq!(rec.get(3), Some(b("")));
1002         assert_eq!(rec.get(4), None);
1003         assert_eq!(rec.get(5), None);
1004     }
1005 
1006     #[test]
utf8_error_1()1007     fn utf8_error_1() {
1008         let mut rec = ByteRecord::new();
1009         rec.push_field(b"foo");
1010         rec.push_field(b"b\xFFar");
1011 
1012         let err = StringRecord::from_byte_record(rec).unwrap_err();
1013         assert_eq!(err.utf8_error().field(), 1);
1014         assert_eq!(err.utf8_error().valid_up_to(), 1);
1015     }
1016 
1017     #[test]
utf8_error_2()1018     fn utf8_error_2() {
1019         let mut rec = ByteRecord::new();
1020         rec.push_field(b"\xFF");
1021 
1022         let err = StringRecord::from_byte_record(rec).unwrap_err();
1023         assert_eq!(err.utf8_error().field(), 0);
1024         assert_eq!(err.utf8_error().valid_up_to(), 0);
1025     }
1026 
1027     #[test]
utf8_error_3()1028     fn utf8_error_3() {
1029         let mut rec = ByteRecord::new();
1030         rec.push_field(b"a\xFF");
1031 
1032         let err = StringRecord::from_byte_record(rec).unwrap_err();
1033         assert_eq!(err.utf8_error().field(), 0);
1034         assert_eq!(err.utf8_error().valid_up_to(), 1);
1035     }
1036 
1037     #[test]
utf8_error_4()1038     fn utf8_error_4() {
1039         let mut rec = ByteRecord::new();
1040         rec.push_field(b"a");
1041         rec.push_field(b"b");
1042         rec.push_field(b"c");
1043         rec.push_field(b"d");
1044         rec.push_field(b"xyz\xFF");
1045 
1046         let err = StringRecord::from_byte_record(rec).unwrap_err();
1047         assert_eq!(err.utf8_error().field(), 4);
1048         assert_eq!(err.utf8_error().valid_up_to(), 3);
1049     }
1050 
1051     #[test]
utf8_error_5()1052     fn utf8_error_5() {
1053         let mut rec = ByteRecord::new();
1054         rec.push_field(b"a");
1055         rec.push_field(b"b");
1056         rec.push_field(b"c");
1057         rec.push_field(b"d");
1058         rec.push_field(b"\xFFxyz");
1059 
1060         let err = StringRecord::from_byte_record(rec).unwrap_err();
1061         assert_eq!(err.utf8_error().field(), 4);
1062         assert_eq!(err.utf8_error().valid_up_to(), 0);
1063     }
1064 
1065     // This tests a tricky case where a single field on its own isn't valid
1066     // UTF-8, but the concatenation of all fields is.
1067     #[test]
utf8_error_6()1068     fn utf8_error_6() {
1069         let mut rec = ByteRecord::new();
1070         rec.push_field(b"a\xc9");
1071         rec.push_field(b"\x91b");
1072 
1073         let err = StringRecord::from_byte_record(rec).unwrap_err();
1074         assert_eq!(err.utf8_error().field(), 0);
1075         assert_eq!(err.utf8_error().valid_up_to(), 1);
1076     }
1077 
1078     // This tests that we can always clear a `ByteRecord` and get a guaranteed
1079     // successful conversion to UTF-8. This permits reusing the allocation.
1080     #[test]
utf8_clear_ok()1081     fn utf8_clear_ok() {
1082         let mut rec = ByteRecord::new();
1083         rec.push_field(b"\xFF");
1084         assert!(StringRecord::from_byte_record(rec).is_err());
1085 
1086         let mut rec = ByteRecord::new();
1087         rec.push_field(b"\xFF");
1088         rec.clear();
1089         assert!(StringRecord::from_byte_record(rec).is_ok());
1090     }
1091 
1092     #[test]
iter()1093     fn iter() {
1094         let data = vec!["foo", "bar", "baz", "quux", "wat"];
1095         let rec = ByteRecord::from(&*data);
1096         let got: Vec<&str> =
1097             rec.iter().map(|x| ::std::str::from_utf8(x).unwrap()).collect();
1098         assert_eq!(data, got);
1099     }
1100 
1101     #[test]
iter_reverse()1102     fn iter_reverse() {
1103         let mut data = vec!["foo", "bar", "baz", "quux", "wat"];
1104         let rec = ByteRecord::from(&*data);
1105         let got: Vec<&str> = rec
1106             .iter()
1107             .rev()
1108             .map(|x| ::std::str::from_utf8(x).unwrap())
1109             .collect();
1110         data.reverse();
1111         assert_eq!(data, got);
1112     }
1113 
1114     #[test]
iter_forward_and_reverse()1115     fn iter_forward_and_reverse() {
1116         let data = vec!["foo", "bar", "baz", "quux", "wat"];
1117         let rec = ByteRecord::from(data);
1118         let mut it = rec.iter();
1119 
1120         assert_eq!(it.next_back(), Some(b("wat")));
1121         assert_eq!(it.next(), Some(b("foo")));
1122         assert_eq!(it.next(), Some(b("bar")));
1123         assert_eq!(it.next_back(), Some(b("quux")));
1124         assert_eq!(it.next(), Some(b("baz")));
1125         assert_eq!(it.next_back(), None);
1126         assert_eq!(it.next(), None);
1127     }
1128 
1129     // Check that record equality respects field boundaries.
1130     //
1131     // Regression test for #138.
1132     #[test]
eq_field_boundaries()1133     fn eq_field_boundaries() {
1134         let test1 = ByteRecord::from(vec!["12", "34"]);
1135         let test2 = ByteRecord::from(vec!["123", "4"]);
1136 
1137         assert_ne!(test1, test2);
1138     }
1139 
1140     // Check that record equality respects number of fields.
1141     //
1142     // Regression test for #138.
1143     #[test]
eq_record_len()1144     fn eq_record_len() {
1145         let test1 = ByteRecord::from(vec!["12", "34", "56"]);
1146         let test2 = ByteRecord::from(vec!["12", "34"]);
1147         assert_ne!(test1, test2);
1148     }
1149 }
1150