csv-1.1.2/src/byte_record.rs

use std::cmp;
use std::fmt;
use std::iter::FromIterator;
use std::ops::{self, Range};
use std::result;

use bstr::{BString, ByteSlice};
use serde::de::Deserialize;

use crate::deserializer::deserialize_byte_record;
use crate::error::{new_utf8_error, Result, Utf8Error};
use crate::string_record::StringRecord;

/// A single CSV record stored as raw bytes.
///
/// A byte record permits reading or writing CSV rows that are not UTF-8.
/// In general, you should prefer using a
/// [`StringRecord`](struct.StringRecord.html)
/// since it is more ergonomic, but a `ByteRecord` is provided in case you need
/// it.
///
/// If you are using the Serde (de)serialization APIs, then you probably never
/// need to interact with a `ByteRecord` or a `StringRecord`. However, there
/// are some circumstances in which you might need to use a raw record type
/// while still using Serde. For example, if you need to deserialize possibly
/// invalid UTF-8 fields, then you'll need to first read your record into a
/// `ByteRecord`, and then use `ByteRecord::deserialize` to run Serde. Another
/// reason for using the raw record deserialization APIs is if you're using
/// Serde to read into borrowed data such as a `&'a str` or a `&'a [u8]`.
///
/// Two `ByteRecord`s are compared on the basis of their field data. Any
/// position information associated with the records is ignored.
#[derive(Clone, Eq)]
pub struct ByteRecord(Box<ByteRecordInner>);

impl PartialEq for ByteRecord {
    fn eq(&self, other: &ByteRecord) -> bool {
        if self.len() != other.len() {
            return false;
        }
        self.iter().zip(other.iter()).all(|e| e.0 == e.1)
    }
}

impl<T: AsRef<[u8]>> PartialEq<Vec<T>> for ByteRecord {
    fn eq(&self, other: &Vec<T>) -> bool {
        self.iter_eq(other)
    }
}

impl<'a, T: AsRef<[u8]>> PartialEq<Vec<T>> for &'a ByteRecord {
    fn eq(&self, other: &Vec<T>) -> bool {
        self.iter_eq(other)
    }
}

impl<T: AsRef<[u8]>> PartialEq<[T]> for ByteRecord {
    fn eq(&self, other: &[T]) -> bool {
        self.iter_eq(other)
    }
}

impl<'a, T: AsRef<[u8]>> PartialEq<[T]> for &'a ByteRecord {
    fn eq(&self, other: &[T]) -> bool {
        self.iter_eq(other)
    }
}

impl fmt::Debug for ByteRecord {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        let mut fields = vec![];
        for field in self {
            fields.push(BString::from(field.to_vec()));
        }
        write!(f, "ByteRecord({:?})", fields)
    }
}

/// The inner portion of a byte record.
///
/// We use this memory layout so that moving a `ByteRecord` only requires
/// moving a single pointer. The optimization is dubious at best, but does
/// seem to result in slightly better numbers in microbenchmarks. Methinks this
/// may heavily depend on the underlying allocator.
#[derive(Clone, Debug, Eq, PartialEq)]
struct ByteRecordInner {
    /// The position of this byte record.
    pos: Option<Position>,
    /// All fields in this record, stored contiguously.
    fields: Vec<u8>,
    /// The number of and location of each field in this record.
    bounds: Bounds,
}

impl Default for ByteRecord {
    #[inline]
    fn default() -> ByteRecord {
        ByteRecord::new()
    }
}

impl ByteRecord {
    /// Create a new empty `ByteRecord`.
    ///
    /// Note that you may find the `ByteRecord::from` constructor more
    /// convenient, which is provided by an impl on the `From` trait.
    ///
    /// # Example: create an empty record
    ///
    /// ```
    /// use csv::ByteRecord;
    ///
    /// let record = ByteRecord::new();
    /// assert_eq!(record.len(), 0);
    /// ```
    ///
    /// # Example: initialize a record from a `Vec`
    ///
    /// ```
    /// use csv::ByteRecord;
    ///
    /// let record = ByteRecord::from(vec!["a", "b", "c"]);
    /// assert_eq!(record.len(), 3);
    /// ```
    #[inline]
    pub fn new() -> ByteRecord {
        ByteRecord::with_capacity(0, 0)
    }

    /// Create a new empty `ByteRecord` with the given capacity settings.
    ///
    /// `buffer` refers to the capacity of the buffer used to store the
    /// actual row contents. `fields` refers to the number of fields one
    /// might expect to store.
    #[inline]
    pub fn with_capacity(buffer: usize, fields: usize) -> ByteRecord {
        ByteRecord(Box::new(ByteRecordInner {
            pos: None,
            fields: vec![0; buffer],
            bounds: Bounds::with_capacity(fields),
        }))
    }

    /// Deserialize this record.
    ///
    /// The `D` type parameter refers to the type that this record should be
    /// deserialized into. The `'de` lifetime refers to the lifetime of the
    /// `ByteRecord`. The `'de` lifetime permits deserializing into structs
    /// that borrow field data from this record.
    ///
    /// An optional `headers` parameter permits deserializing into a struct
    /// based on its field names (corresponding to header values) rather than
    /// the order in which the fields are defined.
    ///
    /// # Example: without headers
    ///
    /// This shows how to deserialize a single row into a struct based on the
    /// order in which fields occur. This example also shows how to borrow
    /// fields from the `ByteRecord`, which results in zero allocation
    /// deserialization.
    ///
    /// ```
    /// use std::error::Error;
    ///
    /// use csv::ByteRecord;
    /// use serde::Deserialize;
    ///
    /// #[derive(Deserialize)]
    /// struct Row<'a> {
    ///     city: &'a str,
    ///     country: &'a str,
    ///     population: u64,
    /// }
    ///
    /// # fn main() { example().unwrap() }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let record = ByteRecord::from(vec![
    ///         "Boston", "United States", "4628910",
    ///     ]);
    ///
    ///     let row: Row = record.deserialize(None)?;
    ///     assert_eq!(row.city, "Boston");
    ///     assert_eq!(row.country, "United States");
    ///     assert_eq!(row.population, 4628910);
    ///     Ok(())
    /// }
    /// ```
    ///
    /// # Example: with headers
    ///
    /// This example is like the previous one, but shows how to deserialize
    /// into a struct based on the struct's field names. For this to work,
    /// you must provide a header row.
    ///
    /// This example also shows that you can deserialize into owned data
    /// types (e.g., `String`) instead of borrowed data types (e.g., `&str`).
    ///
    /// ```
    /// use std::error::Error;
    ///
    /// use csv::ByteRecord;
    /// use serde::Deserialize;
    ///
    /// #[derive(Deserialize)]
    /// struct Row {
    ///     city: String,
    ///     country: String,
    ///     population: u64,
    /// }
    ///
    /// # fn main() { example().unwrap() }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     // Notice that the fields are not in the same order
    ///     // as the fields in the struct!
    ///     let header = ByteRecord::from(vec![
    ///         "country", "city", "population",
    ///     ]);
    ///     let record = ByteRecord::from(vec![
    ///         "United States", "Boston", "4628910",
    ///     ]);
    ///
    ///     let row: Row = record.deserialize(Some(&header))?;
    ///     assert_eq!(row.city, "Boston");
    ///     assert_eq!(row.country, "United States");
    ///     assert_eq!(row.population, 4628910);
    ///     Ok(())
    /// }
    /// ```
    pub fn deserialize<'de, D: Deserialize<'de>>(
        &'de self,
        headers: Option<&'de ByteRecord>,
    ) -> Result<D> {
        deserialize_byte_record(self, headers)
    }

    /// Returns an iterator over all fields in this record.
    ///
    /// # Example
    ///
    /// This example shows how to iterate over each field in a `ByteRecord`.
    ///
    /// ```
    /// use csv::ByteRecord;
    ///
    /// let record = ByteRecord::from(vec!["a", "b", "c"]);
    /// for field in record.iter() {
    ///     assert!(field == b"a" || field == b"b" || field == b"c");
    /// }
    /// ```
    #[inline]
    pub fn iter(&self) -> ByteRecordIter {
        self.into_iter()
    }

    /// Return the field at index `i`.
    ///
    /// If no field at index `i` exists, then this returns `None`.
    ///
    /// # Example
    ///
    /// ```
    /// use csv::ByteRecord;
    ///
    /// let record = ByteRecord::from(vec!["a", "b", "c"]);
    /// assert_eq!(record.get(1), Some(&b"b"[..]));
    /// assert_eq!(record.get(3), None);
    /// ```
    #[inline]
    pub fn get(&self, i: usize) -> Option<&[u8]> {
        self.0.bounds.get(i).map(|range| &self.0.fields[range])
    }

    /// Returns true if and only if this record is empty.
    ///
    /// # Example
    ///
    /// ```
    /// use csv::ByteRecord;
    ///
    /// assert!(ByteRecord::new().is_empty());
    /// ```
    #[inline]
    pub fn is_empty(&self) -> bool {
        self.len() == 0
    }

    /// Returns the number of fields in this record.
    ///
    /// # Example
    ///
    /// ```
    /// use csv::ByteRecord;
    ///
    /// let record = ByteRecord::from(vec!["a", "b", "c"]);
    /// assert_eq!(record.len(), 3);
    /// ```
    #[inline]
    pub fn len(&self) -> usize {
        self.0.bounds.len()
    }

    /// Truncate this record to `n` fields.
    ///
    /// If `n` is greater than the number of fields in this record, then this
    /// has no effect.
    ///
    /// # Example
    ///
    /// ```
    /// use csv::ByteRecord;
    ///
    /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
    /// assert_eq!(record.len(), 3);
    /// record.truncate(1);
    /// assert_eq!(record.len(), 1);
    /// assert_eq!(record, vec!["a"]);
    /// ```
    #[inline]
    pub fn truncate(&mut self, n: usize) {
        if n <= self.len() {
            self.0.bounds.len = n;
        }
    }

    /// Clear this record so that it has zero fields.
    ///
    /// This is equivalent to calling `truncate(0)`.
    ///
    /// Note that it is not necessary to clear the record to reuse it with
    /// the CSV reader.
    ///
    /// # Example
    ///
    /// ```
    /// use csv::ByteRecord;
    ///
    /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
    /// assert_eq!(record.len(), 3);
    /// record.clear();
    /// assert_eq!(record.len(), 0);
    /// ```
    #[inline]
    pub fn clear(&mut self) {
        self.truncate(0);
    }

    /// Trim the fields of this record so that leading and trailing whitespace
    /// is removed.
    ///
    /// This method uses the ASCII definition of whitespace. That is, only
    /// bytes in the class `[\t\n\v\f\r ]` are trimmed.
    ///
    /// # Example
    ///
    /// ```
    /// use csv::ByteRecord;
    ///
    /// let mut record = ByteRecord::from(vec![
    ///     "  ", "\tfoo", "bar  ", "b a z",
    /// ]);
    /// record.trim();
    /// assert_eq!(record, vec!["", "foo", "bar", "b a z"]);
    /// ```
    pub fn trim(&mut self) {
        let length = self.len();
        if length == 0 {
            return;
        }
        // TODO: We could likely do this in place, but for now, we allocate.
        let mut trimmed =
            ByteRecord::with_capacity(self.as_slice().len(), self.len());
        trimmed.set_position(self.position().cloned());
        for field in &*self {
            trimmed.push_field(field.trim());
        }
        *self = trimmed;
    }

    /// Add a new field to this record.
    ///
    /// # Example
    ///
    /// ```
    /// use csv::ByteRecord;
    ///
    /// let mut record = ByteRecord::new();
    /// record.push_field(b"foo");
    /// assert_eq!(&record[0], b"foo");
    /// ```
    #[inline]
    pub fn push_field(&mut self, field: &[u8]) {
        let (s, e) = (self.0.bounds.end(), self.0.bounds.end() + field.len());
        while e > self.0.fields.len() {
            self.expand_fields();
        }
        self.0.fields[s..e].copy_from_slice(field);
        self.0.bounds.add(e);
    }

    /// Return the position of this record, if available.
    ///
    /// # Example
    ///
    /// ```
    /// use std::error::Error;
    ///
    /// use csv::{ByteRecord, ReaderBuilder};
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let mut record = ByteRecord::new();
    ///     let mut rdr = ReaderBuilder::new()
    ///         .has_headers(false)
    ///         .from_reader("a,b,c\nx,y,z".as_bytes());
    ///
    ///     assert!(rdr.read_byte_record(&mut record)?);
    ///     {
    ///         let pos = record.position().expect("a record position");
    ///         assert_eq!(pos.byte(), 0);
    ///         assert_eq!(pos.line(), 1);
    ///         assert_eq!(pos.record(), 0);
    ///     }
    ///
    ///     assert!(rdr.read_byte_record(&mut record)?);
    ///     {
    ///         let pos = record.position().expect("a record position");
    ///         assert_eq!(pos.byte(), 6);
    ///         assert_eq!(pos.line(), 2);
    ///         assert_eq!(pos.record(), 1);
    ///     }
    ///
    ///     // Finish the CSV reader for good measure.
    ///     assert!(!rdr.read_byte_record(&mut record)?);
    ///     Ok(())
    /// }
    /// ```
    #[inline]
    pub fn position(&self) -> Option<&Position> {
        self.0.pos.as_ref()
    }

    /// Set the position of this record.
    ///
    /// # Example
    ///
    /// ```
    /// use csv::{ByteRecord, Position};
    ///
    /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
    /// let mut pos = Position::new();
    /// pos.set_byte(100);
    /// pos.set_line(4);
    /// pos.set_record(2);
    ///
    /// record.set_position(Some(pos.clone()));
    /// assert_eq!(record.position(), Some(&pos));
    /// ```
    #[inline]
    pub fn set_position(&mut self, pos: Option<Position>) {
        self.0.pos = pos;
    }

    /// Return the start and end position of a field in this record.
    ///
    /// If no such field exists at the given index, then return `None`.
    ///
    /// The range returned can be used with the slice returned by `as_slice`.
    ///
    /// # Example
    ///
    /// ```
    /// use csv::ByteRecord;
    ///
    /// let record = ByteRecord::from(vec!["foo", "quux", "z"]);
    /// let range = record.range(1).expect("a record range");
    /// assert_eq!(&record.as_slice()[range], &b"quux"[..]);
    /// ```
    #[inline]
    pub fn range(&self, i: usize) -> Option<Range<usize>> {
        self.0.bounds.get(i)
    }

    /// Return the entire row as a single byte slice. The slice returned stores
    /// all fields contiguously. The boundaries of each field can be determined
    /// via the `range` method.
    ///
    /// # Example
    ///
    /// ```
    /// use csv::ByteRecord;
    ///
    /// let record = ByteRecord::from(vec!["foo", "quux", "z"]);
    /// assert_eq!(record.as_slice(), &b"fooquuxz"[..]);
    /// ```
    #[inline]
    pub fn as_slice(&self) -> &[u8] {
        &self.0.fields[..self.0.bounds.end()]
    }

    /// Retrieve the underlying parts of a byte record.
    #[inline]
    pub(crate) fn as_parts(&mut self) -> (&mut Vec<u8>, &mut Vec<usize>) {
        let inner = &mut *self.0;
        (&mut inner.fields, &mut inner.bounds.ends)
    }

    /// Set the number of fields in the given record record.
    #[inline]
    pub(crate) fn set_len(&mut self, len: usize) {
        self.0.bounds.len = len;
    }

    /// Expand the capacity for storing fields.
    #[inline]
    pub(crate) fn expand_fields(&mut self) {
        let new_len = self.0.fields.len().checked_mul(2).unwrap();
        self.0.fields.resize(cmp::max(4, new_len), 0);
    }

    /// Expand the capacity for storing field ending positions.
    #[inline]
    pub(crate) fn expand_ends(&mut self) {
        self.0.bounds.expand();
    }

    /// Validate the given record as UTF-8.
    ///
    /// If it's not UTF-8, return an error.
    #[inline]
    pub(crate) fn validate(&self) -> result::Result<(), Utf8Error> {
        // If the entire buffer is ASCII, then we have nothing to fear.
        if self.0.fields[..self.0.bounds.end()].is_ascii() {
            return Ok(());
        }
        // Otherwise, we must check each field individually to ensure that
        // it's valid UTF-8.
        for (i, field) in self.iter().enumerate() {
            if let Err(err) = field.to_str() {
                return Err(new_utf8_error(i, err.valid_up_to()));
            }
        }
        Ok(())
    }

    /// Compare the given byte record with the iterator of fields for equality.
    pub(crate) fn iter_eq<I, T>(&self, other: I) -> bool
    where
        I: IntoIterator<Item = T>,
        T: AsRef<[u8]>,
    {
        let mut it_record = self.iter();
        let mut it_other = other.into_iter();
        loop {
            match (it_record.next(), it_other.next()) {
                (None, None) => return true,
                (None, Some(_)) | (Some(_), None) => return false,
                (Some(x), Some(y)) => {
                    if x != y.as_ref() {
                        return false;
                    }
                }
            }
        }
    }
}

/// A position in CSV data.
///
/// A position is used to report errors in CSV data. All positions include the
/// byte offset, line number and record index at which the error occurred.
///
/// Byte offsets and record indices start at `0`. Line numbers start at `1`.
///
/// A CSV reader will automatically assign the position of each record.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Position {
    byte: u64,
    line: u64,
    record: u64,
}

impl Position {
    /// Returns a new position initialized to the start value.
    #[inline]
    pub fn new() -> Position {
        Position { byte: 0, line: 1, record: 0 }
    }

    /// The byte offset, starting at `0`, of this position.
    #[inline]
    pub fn byte(&self) -> u64 {
        self.byte
    }
    /// The line number, starting at `1`, of this position.
    #[inline]
    pub fn line(&self) -> u64 {
        self.line
    }
    /// The record index, starting with the first record at `0`.
    #[inline]
    pub fn record(&self) -> u64 {
        self.record
    }

    /// Set the byte offset of this position.
    #[inline]
    pub fn set_byte(&mut self, byte: u64) -> &mut Position {
        self.byte = byte;
        self
    }

    /// Set the line number of this position.
    ///
    /// If the line number is less than `1`, then this method panics.
    #[inline]
    pub fn set_line(&mut self, line: u64) -> &mut Position {
        assert!(line > 0);
        self.line = line;
        self
    }

    /// Set the record index of this position.
    #[inline]
    pub fn set_record(&mut self, record: u64) -> &mut Position {
        self.record = record;
        self
    }
}

/// The bounds of fields in a single record.
#[derive(Clone, Debug, Eq, PartialEq)]
struct Bounds {
    /// The ending index of each field.
    ends: Vec<usize>,
    /// The number of fields in this record.
    ///
    /// Technically, we could drop this field and maintain an invariant that
    /// `ends.len()` is always the number of fields, but doing that efficiently
    /// requires attention to safety. We play it safe at essentially no cost.
    len: usize,
}

impl Default for Bounds {
    #[inline]
    fn default() -> Bounds {
        Bounds::with_capacity(0)
    }
}

impl Bounds {
    /// Create a new set of bounds with the given capacity for storing the
    /// ends of fields.
    #[inline]
    fn with_capacity(capacity: usize) -> Bounds {
        Bounds { ends: vec![0; capacity], len: 0 }
    }

    /// Returns the bounds of field `i`.
    #[inline]
    fn get(&self, i: usize) -> Option<Range<usize>> {
        if i >= self.len {
            return None;
        }
        let end = match self.ends.get(i) {
            None => return None,
            Some(&end) => end,
        };
        let start = match i.checked_sub(1).and_then(|i| self.ends.get(i)) {
            None => 0,
            Some(&start) => start,
        };
        Some(ops::Range { start: start, end: end })
    }

    /// Returns a slice of ending positions of all fields.
    #[inline]
    fn ends(&self) -> &[usize] {
        &self.ends[..self.len]
    }

    /// Return the last position of the last field.
    ///
    /// If there are no fields, this returns `0`.
    #[inline]
    fn end(&self) -> usize {
        self.ends().last().map(|&i| i).unwrap_or(0)
    }

    /// Returns the number of fields in these bounds.
    #[inline]
    fn len(&self) -> usize {
        self.len
    }

    /// Expand the capacity for storing field ending positions.
    #[inline]
    fn expand(&mut self) {
        let new_len = self.ends.len().checked_mul(2).unwrap();
        self.ends.resize(cmp::max(4, new_len), 0);
    }

    /// Add a new field with the given ending position.
    #[inline]
    fn add(&mut self, pos: usize) {
        if self.len >= self.ends.len() {
            self.expand();
        }
        self.ends[self.len] = pos;
        self.len += 1;
    }
}

impl ops::Index<usize> for ByteRecord {
    type Output = [u8];
    #[inline]
    fn index(&self, i: usize) -> &[u8] {
        self.get(i).unwrap()
    }
}

impl From<StringRecord> for ByteRecord {
    #[inline]
    fn from(record: StringRecord) -> ByteRecord {
        record.into_byte_record()
    }
}

impl<T: AsRef<[u8]>> From<Vec<T>> for ByteRecord {
    #[inline]
    fn from(xs: Vec<T>) -> ByteRecord {
        ByteRecord::from_iter(&xs)
    }
}

impl<'a, T: AsRef<[u8]>> From<&'a [T]> for ByteRecord {
    #[inline]
    fn from(xs: &'a [T]) -> ByteRecord {
        ByteRecord::from_iter(xs)
    }
}

impl<T: AsRef<[u8]>> FromIterator<T> for ByteRecord {
    #[inline]
    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> ByteRecord {
        let mut record = ByteRecord::new();
        record.extend(iter);
        record
    }
}

impl<T: AsRef<[u8]>> Extend<T> for ByteRecord {
    #[inline]
    fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
        for x in iter {
            self.push_field(x.as_ref());
        }
    }
}

/// A double-ended iterator over the fields in a byte record.
///
/// The `'r` lifetime variable refers to the lifetime of the `ByteRecord` that
/// is being iterated over.
pub struct ByteRecordIter<'r> {
    /// The record we are iterating over.
    r: &'r ByteRecord,
    /// The starting index of the previous field. (For reverse iteration.)
    last_start: usize,
    /// The ending index of the previous field. (For forward iteration.)
    last_end: usize,
    /// The index of forward iteration.
    i_forward: usize,
    /// The index of reverse iteration.
    i_reverse: usize,
}

impl<'r> IntoIterator for &'r ByteRecord {
    type IntoIter = ByteRecordIter<'r>;
    type Item = &'r [u8];

    #[inline]
    fn into_iter(self) -> ByteRecordIter<'r> {
        ByteRecordIter {
            r: self,
            last_start: self.as_slice().len(),
            last_end: 0,
            i_forward: 0,
            i_reverse: self.len(),
        }
    }
}

impl<'r> ExactSizeIterator for ByteRecordIter<'r> {}

impl<'r> Iterator for ByteRecordIter<'r> {
    type Item = &'r [u8];

    #[inline]
    fn next(&mut self) -> Option<&'r [u8]> {
        if self.i_forward == self.i_reverse {
            None
        } else {
            let start = self.last_end;
            let end = self.r.0.bounds.ends()[self.i_forward];
            self.i_forward += 1;
            self.last_end = end;
            Some(&self.r.0.fields[start..end])
        }
    }

    #[inline]
    fn size_hint(&self) -> (usize, Option<usize>) {
        let x = self.i_reverse - self.i_forward;
        (x, Some(x))
    }

    #[inline]
    fn count(self) -> usize {
        self.len()
    }
}

impl<'r> DoubleEndedIterator for ByteRecordIter<'r> {
    #[inline]
    fn next_back(&mut self) -> Option<&'r [u8]> {
        if self.i_forward == self.i_reverse {
            None
        } else {
            self.i_reverse -= 1;
            let start = self
                .i_reverse
                .checked_sub(1)
                .map(|i| self.r.0.bounds.ends()[i])
                .unwrap_or(0);
            let end = self.last_start;
            self.last_start = start;
            Some(&self.r.0.fields[start..end])
        }
    }
}

#[cfg(test)]
mod tests {
    use crate::string_record::StringRecord;

    use super::ByteRecord;

    fn b(s: &str) -> &[u8] {
        s.as_bytes()
    }

    #[test]
    fn record_1() {
        let mut rec = ByteRecord::new();
        rec.push_field(b"foo");

        assert_eq!(rec.len(), 1);
        assert_eq!(rec.get(0), Some(b("foo")));
        assert_eq!(rec.get(1), None);
        assert_eq!(rec.get(2), None);
    }

    #[test]
    fn record_2() {
        let mut rec = ByteRecord::new();
        rec.push_field(b"foo");
        rec.push_field(b"quux");

        assert_eq!(rec.len(), 2);
        assert_eq!(rec.get(0), Some(b("foo")));
        assert_eq!(rec.get(1), Some(b("quux")));
        assert_eq!(rec.get(2), None);
        assert_eq!(rec.get(3), None);
    }

    #[test]
    fn empty_record() {
        let rec = ByteRecord::new();

        assert_eq!(rec.len(), 0);
        assert_eq!(rec.get(0), None);
        assert_eq!(rec.get(1), None);
    }

    #[test]
    fn trim_whitespace_only() {
        let mut rec = ByteRecord::from(vec![b" \t\n\r\x0c"]);
        rec.trim();
        assert_eq!(rec.get(0), Some(b("")));
    }

    #[test]
    fn trim_front() {
        let mut rec = ByteRecord::from(vec![b" abc"]);
        rec.trim();
        assert_eq!(rec.get(0), Some(b("abc")));

        let mut rec = ByteRecord::from(vec![b(" abc"), b("  xyz")]);
        rec.trim();
        assert_eq!(rec.get(0), Some(b("abc")));
        assert_eq!(rec.get(1), Some(b("xyz")));
    }

    #[test]
    fn trim_back() {
        let mut rec = ByteRecord::from(vec![b"abc "]);
        rec.trim();
        assert_eq!(rec.get(0), Some(b("abc")));

        let mut rec = ByteRecord::from(vec![b("abc "), b("xyz  ")]);
        rec.trim();
        assert_eq!(rec.get(0), Some(b("abc")));
        assert_eq!(rec.get(1), Some(b("xyz")));
    }

    #[test]
    fn trim_both() {
        let mut rec = ByteRecord::from(vec![b" abc "]);
        rec.trim();
        assert_eq!(rec.get(0), Some(b("abc")));

        let mut rec = ByteRecord::from(vec![b(" abc "), b("  xyz  ")]);
        rec.trim();
        assert_eq!(rec.get(0), Some(b("abc")));
        assert_eq!(rec.get(1), Some(b("xyz")));
    }

    #[test]
    fn trim_does_not_panic_on_empty_records_1() {
        let mut rec = ByteRecord::from(vec![b""]);
        rec.trim();
        assert_eq!(rec.get(0), Some(b("")));
    }

    #[test]
    fn trim_does_not_panic_on_empty_records_2() {
        let mut rec = ByteRecord::from(vec![b"", b""]);
        rec.trim();
        assert_eq!(rec.get(0), Some(b("")));
        assert_eq!(rec.get(1), Some(b("")));
    }

    #[test]
    fn trim_does_not_panic_on_empty_records_3() {
        let mut rec = ByteRecord::new();
        rec.trim();
        assert_eq!(rec.as_slice().len(), 0);
    }

    #[test]
    fn empty_field_1() {
        let mut rec = ByteRecord::new();
        rec.push_field(b"");

        assert_eq!(rec.len(), 1);
        assert_eq!(rec.get(0), Some(b("")));
        assert_eq!(rec.get(1), None);
        assert_eq!(rec.get(2), None);
    }

    #[test]
    fn empty_field_2() {
        let mut rec = ByteRecord::new();
        rec.push_field(b"");
        rec.push_field(b"");

        assert_eq!(rec.len(), 2);
        assert_eq!(rec.get(0), Some(b("")));
        assert_eq!(rec.get(1), Some(b("")));
        assert_eq!(rec.get(2), None);
        assert_eq!(rec.get(3), None);
    }

    #[test]
    fn empty_surround_1() {
        let mut rec = ByteRecord::new();
        rec.push_field(b"foo");
        rec.push_field(b"");
        rec.push_field(b"quux");

        assert_eq!(rec.len(), 3);
        assert_eq!(rec.get(0), Some(b("foo")));
        assert_eq!(rec.get(1), Some(b("")));
        assert_eq!(rec.get(2), Some(b("quux")));
        assert_eq!(rec.get(3), None);
        assert_eq!(rec.get(4), None);
    }

    #[test]
    fn empty_surround_2() {
        let mut rec = ByteRecord::new();
        rec.push_field(b"foo");
        rec.push_field(b"");
        rec.push_field(b"quux");
        rec.push_field(b"");

        assert_eq!(rec.len(), 4);
        assert_eq!(rec.get(0), Some(b("foo")));
        assert_eq!(rec.get(1), Some(b("")));
        assert_eq!(rec.get(2), Some(b("quux")));
        assert_eq!(rec.get(3), Some(b("")));
        assert_eq!(rec.get(4), None);
        assert_eq!(rec.get(5), None);
    }

    #[test]
    fn utf8_error_1() {
        let mut rec = ByteRecord::new();
        rec.push_field(b"foo");
        rec.push_field(b"b\xFFar");

        let err = StringRecord::from_byte_record(rec).unwrap_err();
        assert_eq!(err.utf8_error().field(), 1);
        assert_eq!(err.utf8_error().valid_up_to(), 1);
    }

    #[test]
    fn utf8_error_2() {
        let mut rec = ByteRecord::new();
        rec.push_field(b"\xFF");

        let err = StringRecord::from_byte_record(rec).unwrap_err();
        assert_eq!(err.utf8_error().field(), 0);
        assert_eq!(err.utf8_error().valid_up_to(), 0);
    }

    #[test]
    fn utf8_error_3() {
        let mut rec = ByteRecord::new();
        rec.push_field(b"a\xFF");

        let err = StringRecord::from_byte_record(rec).unwrap_err();
        assert_eq!(err.utf8_error().field(), 0);
        assert_eq!(err.utf8_error().valid_up_to(), 1);
    }

    #[test]
    fn utf8_error_4() {
        let mut rec = ByteRecord::new();
        rec.push_field(b"a");
        rec.push_field(b"b");
        rec.push_field(b"c");
        rec.push_field(b"d");
        rec.push_field(b"xyz\xFF");

        let err = StringRecord::from_byte_record(rec).unwrap_err();
        assert_eq!(err.utf8_error().field(), 4);
        assert_eq!(err.utf8_error().valid_up_to(), 3);
    }

    #[test]
    fn utf8_error_5() {
        let mut rec = ByteRecord::new();
        rec.push_field(b"a");
        rec.push_field(b"b");
        rec.push_field(b"c");
        rec.push_field(b"d");
        rec.push_field(b"\xFFxyz");

        let err = StringRecord::from_byte_record(rec).unwrap_err();
        assert_eq!(err.utf8_error().field(), 4);
        assert_eq!(err.utf8_error().valid_up_to(), 0);
    }

    // This tests a tricky case where a single field on its own isn't valid
    // UTF-8, but the concatenation of all fields is.
    #[test]
    fn utf8_error_6() {
        let mut rec = ByteRecord::new();
        rec.push_field(b"a\xc9");
        rec.push_field(b"\x91b");

        let err = StringRecord::from_byte_record(rec).unwrap_err();
        assert_eq!(err.utf8_error().field(), 0);
        assert_eq!(err.utf8_error().valid_up_to(), 1);
    }

    // This tests that we can always clear a `ByteRecord` and get a guaranteed
    // successful conversion to UTF-8. This permits reusing the allocation.
    #[test]
    fn utf8_clear_ok() {
        let mut rec = ByteRecord::new();
        rec.push_field(b"\xFF");
        assert!(StringRecord::from_byte_record(rec).is_err());

        let mut rec = ByteRecord::new();
        rec.push_field(b"\xFF");
        rec.clear();
        assert!(StringRecord::from_byte_record(rec).is_ok());
    }

    #[test]
    fn iter() {
        let data = vec!["foo", "bar", "baz", "quux", "wat"];
        let rec = ByteRecord::from(&*data);
        let got: Vec<&str> =
            rec.iter().map(|x| ::std::str::from_utf8(x).unwrap()).collect();
        assert_eq!(data, got);
    }

    #[test]
    fn iter_reverse() {
        let mut data = vec!["foo", "bar", "baz", "quux", "wat"];
        let rec = ByteRecord::from(&*data);
        let got: Vec<&str> = rec
            .iter()
            .rev()
            .map(|x| ::std::str::from_utf8(x).unwrap())
            .collect();
        data.reverse();
        assert_eq!(data, got);
    }

    #[test]
    fn iter_forward_and_reverse() {
        let data = vec!["foo", "bar", "baz", "quux", "wat"];
        let rec = ByteRecord::from(data);
        let mut it = rec.iter();

        assert_eq!(it.next_back(), Some(b("wat")));
        assert_eq!(it.next(), Some(b("foo")));
        assert_eq!(it.next(), Some(b("bar")));
        assert_eq!(it.next_back(), Some(b("quux")));
        assert_eq!(it.next(), Some(b("baz")));
        assert_eq!(it.next_back(), None);
        assert_eq!(it.next(), None);
    }

    // Check that record equality respects field boundaries.
    //
    // Regression test for #138.
    #[test]
    fn eq_field_boundaries() {
        let test1 = ByteRecord::from(vec!["12", "34"]);
        let test2 = ByteRecord::from(vec!["123", "4"]);

        assert_ne!(test1, test2);
    }

    // Check that record equality respects number of fields.
    //
    // Regression test for #138.
    #[test]
    fn eq_record_len() {
        let test1 = ByteRecord::from(vec!["12", "34", "56"]);
        let test2 = ByteRecord::from(vec!["12", "34"]);
        assert_ne!(test1, test2);
    }
}