1 use std::cmp; 2 use std::fmt; 3 use std::iter::FromIterator; 4 use std::ops::{self, Range}; 5 use std::result; 6 7 use bstr::{BString, ByteSlice}; 8 use serde::de::Deserialize; 9 10 use crate::deserializer::deserialize_byte_record; 11 use crate::error::{new_utf8_error, Result, Utf8Error}; 12 use crate::string_record::StringRecord; 13 14 /// A single CSV record stored as raw bytes. 15 /// 16 /// A byte record permits reading or writing CSV rows that are not UTF-8. 17 /// In general, you should prefer using a 18 /// [`StringRecord`](struct.StringRecord.html) 19 /// since it is more ergonomic, but a `ByteRecord` is provided in case you need 20 /// it. 21 /// 22 /// If you are using the Serde (de)serialization APIs, then you probably never 23 /// need to interact with a `ByteRecord` or a `StringRecord`. However, there 24 /// are some circumstances in which you might need to use a raw record type 25 /// while still using Serde. For example, if you need to deserialize possibly 26 /// invalid UTF-8 fields, then you'll need to first read your record into a 27 /// `ByteRecord`, and then use `ByteRecord::deserialize` to run Serde. Another 28 /// reason for using the raw record deserialization APIs is if you're using 29 /// Serde to read into borrowed data such as a `&'a str` or a `&'a [u8]`. 30 /// 31 /// Two `ByteRecord`s are compared on the basis of their field data. Any 32 /// position information associated with the records is ignored. 33 #[derive(Clone, Eq)] 34 pub struct ByteRecord(Box<ByteRecordInner>); 35 36 impl PartialEq for ByteRecord { eq(&self, other: &ByteRecord) -> bool37 fn eq(&self, other: &ByteRecord) -> bool { 38 if self.len() != other.len() { 39 return false; 40 } 41 self.iter().zip(other.iter()).all(|e| e.0 == e.1) 42 } 43 } 44 45 impl<T: AsRef<[u8]>> PartialEq<Vec<T>> for ByteRecord { eq(&self, other: &Vec<T>) -> bool46 fn eq(&self, other: &Vec<T>) -> bool { 47 self.iter_eq(other) 48 } 49 } 50 51 impl<'a, T: AsRef<[u8]>> PartialEq<Vec<T>> for &'a ByteRecord { eq(&self, other: &Vec<T>) -> bool52 fn eq(&self, other: &Vec<T>) -> bool { 53 self.iter_eq(other) 54 } 55 } 56 57 impl<T: AsRef<[u8]>> PartialEq<[T]> for ByteRecord { eq(&self, other: &[T]) -> bool58 fn eq(&self, other: &[T]) -> bool { 59 self.iter_eq(other) 60 } 61 } 62 63 impl<'a, T: AsRef<[u8]>> PartialEq<[T]> for &'a ByteRecord { eq(&self, other: &[T]) -> bool64 fn eq(&self, other: &[T]) -> bool { 65 self.iter_eq(other) 66 } 67 } 68 69 impl fmt::Debug for ByteRecord { fmt(&self, f: &mut fmt::Formatter) -> fmt::Result70 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 71 let mut fields = vec![]; 72 for field in self { 73 fields.push(BString::from(field.to_vec())); 74 } 75 write!(f, "ByteRecord({:?})", fields) 76 } 77 } 78 79 /// The inner portion of a byte record. 80 /// 81 /// We use this memory layout so that moving a `ByteRecord` only requires 82 /// moving a single pointer. The optimization is dubious at best, but does 83 /// seem to result in slightly better numbers in microbenchmarks. Methinks this 84 /// may heavily depend on the underlying allocator. 85 #[derive(Clone, Debug, Eq, PartialEq)] 86 struct ByteRecordInner { 87 /// The position of this byte record. 88 pos: Option<Position>, 89 /// All fields in this record, stored contiguously. 90 fields: Vec<u8>, 91 /// The number of and location of each field in this record. 92 bounds: Bounds, 93 } 94 95 impl Default for ByteRecord { 96 #[inline] default() -> ByteRecord97 fn default() -> ByteRecord { 98 ByteRecord::new() 99 } 100 } 101 102 impl ByteRecord { 103 /// Create a new empty `ByteRecord`. 104 /// 105 /// Note that you may find the `ByteRecord::from` constructor more 106 /// convenient, which is provided by an impl on the `From` trait. 107 /// 108 /// # Example: create an empty record 109 /// 110 /// ``` 111 /// use csv::ByteRecord; 112 /// 113 /// let record = ByteRecord::new(); 114 /// assert_eq!(record.len(), 0); 115 /// ``` 116 /// 117 /// # Example: initialize a record from a `Vec` 118 /// 119 /// ``` 120 /// use csv::ByteRecord; 121 /// 122 /// let record = ByteRecord::from(vec!["a", "b", "c"]); 123 /// assert_eq!(record.len(), 3); 124 /// ``` 125 #[inline] new() -> ByteRecord126 pub fn new() -> ByteRecord { 127 ByteRecord::with_capacity(0, 0) 128 } 129 130 /// Create a new empty `ByteRecord` with the given capacity settings. 131 /// 132 /// `buffer` refers to the capacity of the buffer used to store the 133 /// actual row contents. `fields` refers to the number of fields one 134 /// might expect to store. 135 #[inline] with_capacity(buffer: usize, fields: usize) -> ByteRecord136 pub fn with_capacity(buffer: usize, fields: usize) -> ByteRecord { 137 ByteRecord(Box::new(ByteRecordInner { 138 pos: None, 139 fields: vec![0; buffer], 140 bounds: Bounds::with_capacity(fields), 141 })) 142 } 143 144 /// Deserialize this record. 145 /// 146 /// The `D` type parameter refers to the type that this record should be 147 /// deserialized into. The `'de` lifetime refers to the lifetime of the 148 /// `ByteRecord`. The `'de` lifetime permits deserializing into structs 149 /// that borrow field data from this record. 150 /// 151 /// An optional `headers` parameter permits deserializing into a struct 152 /// based on its field names (corresponding to header values) rather than 153 /// the order in which the fields are defined. 154 /// 155 /// # Example: without headers 156 /// 157 /// This shows how to deserialize a single row into a struct based on the 158 /// order in which fields occur. This example also shows how to borrow 159 /// fields from the `ByteRecord`, which results in zero allocation 160 /// deserialization. 161 /// 162 /// ``` 163 /// use std::error::Error; 164 /// 165 /// use csv::ByteRecord; 166 /// use serde::Deserialize; 167 /// 168 /// #[derive(Deserialize)] 169 /// struct Row<'a> { 170 /// city: &'a str, 171 /// country: &'a str, 172 /// population: u64, 173 /// } 174 /// 175 /// # fn main() { example().unwrap() } 176 /// fn example() -> Result<(), Box<dyn Error>> { 177 /// let record = ByteRecord::from(vec![ 178 /// "Boston", "United States", "4628910", 179 /// ]); 180 /// 181 /// let row: Row = record.deserialize(None)?; 182 /// assert_eq!(row.city, "Boston"); 183 /// assert_eq!(row.country, "United States"); 184 /// assert_eq!(row.population, 4628910); 185 /// Ok(()) 186 /// } 187 /// ``` 188 /// 189 /// # Example: with headers 190 /// 191 /// This example is like the previous one, but shows how to deserialize 192 /// into a struct based on the struct's field names. For this to work, 193 /// you must provide a header row. 194 /// 195 /// This example also shows that you can deserialize into owned data 196 /// types (e.g., `String`) instead of borrowed data types (e.g., `&str`). 197 /// 198 /// ``` 199 /// use std::error::Error; 200 /// 201 /// use csv::ByteRecord; 202 /// use serde::Deserialize; 203 /// 204 /// #[derive(Deserialize)] 205 /// struct Row { 206 /// city: String, 207 /// country: String, 208 /// population: u64, 209 /// } 210 /// 211 /// # fn main() { example().unwrap() } 212 /// fn example() -> Result<(), Box<dyn Error>> { 213 /// // Notice that the fields are not in the same order 214 /// // as the fields in the struct! 215 /// let header = ByteRecord::from(vec![ 216 /// "country", "city", "population", 217 /// ]); 218 /// let record = ByteRecord::from(vec![ 219 /// "United States", "Boston", "4628910", 220 /// ]); 221 /// 222 /// let row: Row = record.deserialize(Some(&header))?; 223 /// assert_eq!(row.city, "Boston"); 224 /// assert_eq!(row.country, "United States"); 225 /// assert_eq!(row.population, 4628910); 226 /// Ok(()) 227 /// } 228 /// ``` deserialize<'de, D: Deserialize<'de>>( &'de self, headers: Option<&'de ByteRecord>, ) -> Result<D>229 pub fn deserialize<'de, D: Deserialize<'de>>( 230 &'de self, 231 headers: Option<&'de ByteRecord>, 232 ) -> Result<D> { 233 deserialize_byte_record(self, headers) 234 } 235 236 /// Returns an iterator over all fields in this record. 237 /// 238 /// # Example 239 /// 240 /// This example shows how to iterate over each field in a `ByteRecord`. 241 /// 242 /// ``` 243 /// use csv::ByteRecord; 244 /// 245 /// let record = ByteRecord::from(vec!["a", "b", "c"]); 246 /// for field in record.iter() { 247 /// assert!(field == b"a" || field == b"b" || field == b"c"); 248 /// } 249 /// ``` 250 #[inline] iter(&self) -> ByteRecordIter251 pub fn iter(&self) -> ByteRecordIter { 252 self.into_iter() 253 } 254 255 /// Return the field at index `i`. 256 /// 257 /// If no field at index `i` exists, then this returns `None`. 258 /// 259 /// # Example 260 /// 261 /// ``` 262 /// use csv::ByteRecord; 263 /// 264 /// let record = ByteRecord::from(vec!["a", "b", "c"]); 265 /// assert_eq!(record.get(1), Some(&b"b"[..])); 266 /// assert_eq!(record.get(3), None); 267 /// ``` 268 #[inline] get(&self, i: usize) -> Option<&[u8]>269 pub fn get(&self, i: usize) -> Option<&[u8]> { 270 self.0.bounds.get(i).map(|range| &self.0.fields[range]) 271 } 272 273 /// Returns true if and only if this record is empty. 274 /// 275 /// # Example 276 /// 277 /// ``` 278 /// use csv::ByteRecord; 279 /// 280 /// assert!(ByteRecord::new().is_empty()); 281 /// ``` 282 #[inline] is_empty(&self) -> bool283 pub fn is_empty(&self) -> bool { 284 self.len() == 0 285 } 286 287 /// Returns the number of fields in this record. 288 /// 289 /// # Example 290 /// 291 /// ``` 292 /// use csv::ByteRecord; 293 /// 294 /// let record = ByteRecord::from(vec!["a", "b", "c"]); 295 /// assert_eq!(record.len(), 3); 296 /// ``` 297 #[inline] len(&self) -> usize298 pub fn len(&self) -> usize { 299 self.0.bounds.len() 300 } 301 302 /// Truncate this record to `n` fields. 303 /// 304 /// If `n` is greater than the number of fields in this record, then this 305 /// has no effect. 306 /// 307 /// # Example 308 /// 309 /// ``` 310 /// use csv::ByteRecord; 311 /// 312 /// let mut record = ByteRecord::from(vec!["a", "b", "c"]); 313 /// assert_eq!(record.len(), 3); 314 /// record.truncate(1); 315 /// assert_eq!(record.len(), 1); 316 /// assert_eq!(record, vec!["a"]); 317 /// ``` 318 #[inline] truncate(&mut self, n: usize)319 pub fn truncate(&mut self, n: usize) { 320 if n <= self.len() { 321 self.0.bounds.len = n; 322 } 323 } 324 325 /// Clear this record so that it has zero fields. 326 /// 327 /// This is equivalent to calling `truncate(0)`. 328 /// 329 /// Note that it is not necessary to clear the record to reuse it with 330 /// the CSV reader. 331 /// 332 /// # Example 333 /// 334 /// ``` 335 /// use csv::ByteRecord; 336 /// 337 /// let mut record = ByteRecord::from(vec!["a", "b", "c"]); 338 /// assert_eq!(record.len(), 3); 339 /// record.clear(); 340 /// assert_eq!(record.len(), 0); 341 /// ``` 342 #[inline] clear(&mut self)343 pub fn clear(&mut self) { 344 self.truncate(0); 345 } 346 347 /// Trim the fields of this record so that leading and trailing whitespace 348 /// is removed. 349 /// 350 /// This method uses the ASCII definition of whitespace. That is, only 351 /// bytes in the class `[\t\n\v\f\r ]` are trimmed. 352 /// 353 /// # Example 354 /// 355 /// ``` 356 /// use csv::ByteRecord; 357 /// 358 /// let mut record = ByteRecord::from(vec![ 359 /// " ", "\tfoo", "bar ", "b a z", 360 /// ]); 361 /// record.trim(); 362 /// assert_eq!(record, vec!["", "foo", "bar", "b a z"]); 363 /// ``` trim(&mut self)364 pub fn trim(&mut self) { 365 let length = self.len(); 366 if length == 0 { 367 return; 368 } 369 // TODO: We could likely do this in place, but for now, we allocate. 370 let mut trimmed = 371 ByteRecord::with_capacity(self.as_slice().len(), self.len()); 372 trimmed.set_position(self.position().cloned()); 373 for field in &*self { 374 trimmed.push_field(field.trim()); 375 } 376 *self = trimmed; 377 } 378 379 /// Add a new field to this record. 380 /// 381 /// # Example 382 /// 383 /// ``` 384 /// use csv::ByteRecord; 385 /// 386 /// let mut record = ByteRecord::new(); 387 /// record.push_field(b"foo"); 388 /// assert_eq!(&record[0], b"foo"); 389 /// ``` 390 #[inline] push_field(&mut self, field: &[u8])391 pub fn push_field(&mut self, field: &[u8]) { 392 let (s, e) = (self.0.bounds.end(), self.0.bounds.end() + field.len()); 393 while e > self.0.fields.len() { 394 self.expand_fields(); 395 } 396 self.0.fields[s..e].copy_from_slice(field); 397 self.0.bounds.add(e); 398 } 399 400 /// Return the position of this record, if available. 401 /// 402 /// # Example 403 /// 404 /// ``` 405 /// use std::error::Error; 406 /// 407 /// use csv::{ByteRecord, ReaderBuilder}; 408 /// 409 /// # fn main() { example().unwrap(); } 410 /// fn example() -> Result<(), Box<dyn Error>> { 411 /// let mut record = ByteRecord::new(); 412 /// let mut rdr = ReaderBuilder::new() 413 /// .has_headers(false) 414 /// .from_reader("a,b,c\nx,y,z".as_bytes()); 415 /// 416 /// assert!(rdr.read_byte_record(&mut record)?); 417 /// { 418 /// let pos = record.position().expect("a record position"); 419 /// assert_eq!(pos.byte(), 0); 420 /// assert_eq!(pos.line(), 1); 421 /// assert_eq!(pos.record(), 0); 422 /// } 423 /// 424 /// assert!(rdr.read_byte_record(&mut record)?); 425 /// { 426 /// let pos = record.position().expect("a record position"); 427 /// assert_eq!(pos.byte(), 6); 428 /// assert_eq!(pos.line(), 2); 429 /// assert_eq!(pos.record(), 1); 430 /// } 431 /// 432 /// // Finish the CSV reader for good measure. 433 /// assert!(!rdr.read_byte_record(&mut record)?); 434 /// Ok(()) 435 /// } 436 /// ``` 437 #[inline] position(&self) -> Option<&Position>438 pub fn position(&self) -> Option<&Position> { 439 self.0.pos.as_ref() 440 } 441 442 /// Set the position of this record. 443 /// 444 /// # Example 445 /// 446 /// ``` 447 /// use csv::{ByteRecord, Position}; 448 /// 449 /// let mut record = ByteRecord::from(vec!["a", "b", "c"]); 450 /// let mut pos = Position::new(); 451 /// pos.set_byte(100); 452 /// pos.set_line(4); 453 /// pos.set_record(2); 454 /// 455 /// record.set_position(Some(pos.clone())); 456 /// assert_eq!(record.position(), Some(&pos)); 457 /// ``` 458 #[inline] set_position(&mut self, pos: Option<Position>)459 pub fn set_position(&mut self, pos: Option<Position>) { 460 self.0.pos = pos; 461 } 462 463 /// Return the start and end position of a field in this record. 464 /// 465 /// If no such field exists at the given index, then return `None`. 466 /// 467 /// The range returned can be used with the slice returned by `as_slice`. 468 /// 469 /// # Example 470 /// 471 /// ``` 472 /// use csv::ByteRecord; 473 /// 474 /// let record = ByteRecord::from(vec!["foo", "quux", "z"]); 475 /// let range = record.range(1).expect("a record range"); 476 /// assert_eq!(&record.as_slice()[range], &b"quux"[..]); 477 /// ``` 478 #[inline] range(&self, i: usize) -> Option<Range<usize>>479 pub fn range(&self, i: usize) -> Option<Range<usize>> { 480 self.0.bounds.get(i) 481 } 482 483 /// Return the entire row as a single byte slice. The slice returned stores 484 /// all fields contiguously. The boundaries of each field can be determined 485 /// via the `range` method. 486 /// 487 /// # Example 488 /// 489 /// ``` 490 /// use csv::ByteRecord; 491 /// 492 /// let record = ByteRecord::from(vec!["foo", "quux", "z"]); 493 /// assert_eq!(record.as_slice(), &b"fooquuxz"[..]); 494 /// ``` 495 #[inline] as_slice(&self) -> &[u8]496 pub fn as_slice(&self) -> &[u8] { 497 &self.0.fields[..self.0.bounds.end()] 498 } 499 500 /// Retrieve the underlying parts of a byte record. 501 #[inline] as_parts(&mut self) -> (&mut Vec<u8>, &mut Vec<usize>)502 pub(crate) fn as_parts(&mut self) -> (&mut Vec<u8>, &mut Vec<usize>) { 503 let inner = &mut *self.0; 504 (&mut inner.fields, &mut inner.bounds.ends) 505 } 506 507 /// Set the number of fields in the given record record. 508 #[inline] set_len(&mut self, len: usize)509 pub(crate) fn set_len(&mut self, len: usize) { 510 self.0.bounds.len = len; 511 } 512 513 /// Expand the capacity for storing fields. 514 #[inline] expand_fields(&mut self)515 pub(crate) fn expand_fields(&mut self) { 516 let new_len = self.0.fields.len().checked_mul(2).unwrap(); 517 self.0.fields.resize(cmp::max(4, new_len), 0); 518 } 519 520 /// Expand the capacity for storing field ending positions. 521 #[inline] expand_ends(&mut self)522 pub(crate) fn expand_ends(&mut self) { 523 self.0.bounds.expand(); 524 } 525 526 /// Validate the given record as UTF-8. 527 /// 528 /// If it's not UTF-8, return an error. 529 #[inline] validate(&self) -> result::Result<(), Utf8Error>530 pub(crate) fn validate(&self) -> result::Result<(), Utf8Error> { 531 // If the entire buffer is ASCII, then we have nothing to fear. 532 if self.0.fields[..self.0.bounds.end()].is_ascii() { 533 return Ok(()); 534 } 535 // Otherwise, we must check each field individually to ensure that 536 // it's valid UTF-8. 537 for (i, field) in self.iter().enumerate() { 538 if let Err(err) = field.to_str() { 539 return Err(new_utf8_error(i, err.valid_up_to())); 540 } 541 } 542 Ok(()) 543 } 544 545 /// Compare the given byte record with the iterator of fields for equality. iter_eq<I, T>(&self, other: I) -> bool where I: IntoIterator<Item = T>, T: AsRef<[u8]>,546 pub(crate) fn iter_eq<I, T>(&self, other: I) -> bool 547 where 548 I: IntoIterator<Item = T>, 549 T: AsRef<[u8]>, 550 { 551 let mut it_record = self.iter(); 552 let mut it_other = other.into_iter(); 553 loop { 554 match (it_record.next(), it_other.next()) { 555 (None, None) => return true, 556 (None, Some(_)) | (Some(_), None) => return false, 557 (Some(x), Some(y)) => { 558 if x != y.as_ref() { 559 return false; 560 } 561 } 562 } 563 } 564 } 565 } 566 567 /// A position in CSV data. 568 /// 569 /// A position is used to report errors in CSV data. All positions include the 570 /// byte offset, line number and record index at which the error occurred. 571 /// 572 /// Byte offsets and record indices start at `0`. Line numbers start at `1`. 573 /// 574 /// A CSV reader will automatically assign the position of each record. 575 #[derive(Clone, Debug, Eq, PartialEq)] 576 pub struct Position { 577 byte: u64, 578 line: u64, 579 record: u64, 580 } 581 582 impl Position { 583 /// Returns a new position initialized to the start value. 584 #[inline] new() -> Position585 pub fn new() -> Position { 586 Position { byte: 0, line: 1, record: 0 } 587 } 588 589 /// The byte offset, starting at `0`, of this position. 590 #[inline] byte(&self) -> u64591 pub fn byte(&self) -> u64 { 592 self.byte 593 } 594 /// The line number, starting at `1`, of this position. 595 #[inline] line(&self) -> u64596 pub fn line(&self) -> u64 { 597 self.line 598 } 599 /// The record index, starting with the first record at `0`. 600 #[inline] record(&self) -> u64601 pub fn record(&self) -> u64 { 602 self.record 603 } 604 605 /// Set the byte offset of this position. 606 #[inline] set_byte(&mut self, byte: u64) -> &mut Position607 pub fn set_byte(&mut self, byte: u64) -> &mut Position { 608 self.byte = byte; 609 self 610 } 611 612 /// Set the line number of this position. 613 /// 614 /// If the line number is less than `1`, then this method panics. 615 #[inline] set_line(&mut self, line: u64) -> &mut Position616 pub fn set_line(&mut self, line: u64) -> &mut Position { 617 assert!(line > 0); 618 self.line = line; 619 self 620 } 621 622 /// Set the record index of this position. 623 #[inline] set_record(&mut self, record: u64) -> &mut Position624 pub fn set_record(&mut self, record: u64) -> &mut Position { 625 self.record = record; 626 self 627 } 628 } 629 630 /// The bounds of fields in a single record. 631 #[derive(Clone, Debug, Eq, PartialEq)] 632 struct Bounds { 633 /// The ending index of each field. 634 ends: Vec<usize>, 635 /// The number of fields in this record. 636 /// 637 /// Technically, we could drop this field and maintain an invariant that 638 /// `ends.len()` is always the number of fields, but doing that efficiently 639 /// requires attention to safety. We play it safe at essentially no cost. 640 len: usize, 641 } 642 643 impl Default for Bounds { 644 #[inline] default() -> Bounds645 fn default() -> Bounds { 646 Bounds::with_capacity(0) 647 } 648 } 649 650 impl Bounds { 651 /// Create a new set of bounds with the given capacity for storing the 652 /// ends of fields. 653 #[inline] with_capacity(capacity: usize) -> Bounds654 fn with_capacity(capacity: usize) -> Bounds { 655 Bounds { ends: vec![0; capacity], len: 0 } 656 } 657 658 /// Returns the bounds of field `i`. 659 #[inline] get(&self, i: usize) -> Option<Range<usize>>660 fn get(&self, i: usize) -> Option<Range<usize>> { 661 if i >= self.len { 662 return None; 663 } 664 let end = match self.ends.get(i) { 665 None => return None, 666 Some(&end) => end, 667 }; 668 let start = match i.checked_sub(1).and_then(|i| self.ends.get(i)) { 669 None => 0, 670 Some(&start) => start, 671 }; 672 Some(ops::Range { start: start, end: end }) 673 } 674 675 /// Returns a slice of ending positions of all fields. 676 #[inline] ends(&self) -> &[usize]677 fn ends(&self) -> &[usize] { 678 &self.ends[..self.len] 679 } 680 681 /// Return the last position of the last field. 682 /// 683 /// If there are no fields, this returns `0`. 684 #[inline] end(&self) -> usize685 fn end(&self) -> usize { 686 self.ends().last().map(|&i| i).unwrap_or(0) 687 } 688 689 /// Returns the number of fields in these bounds. 690 #[inline] len(&self) -> usize691 fn len(&self) -> usize { 692 self.len 693 } 694 695 /// Expand the capacity for storing field ending positions. 696 #[inline] expand(&mut self)697 fn expand(&mut self) { 698 let new_len = self.ends.len().checked_mul(2).unwrap(); 699 self.ends.resize(cmp::max(4, new_len), 0); 700 } 701 702 /// Add a new field with the given ending position. 703 #[inline] add(&mut self, pos: usize)704 fn add(&mut self, pos: usize) { 705 if self.len >= self.ends.len() { 706 self.expand(); 707 } 708 self.ends[self.len] = pos; 709 self.len += 1; 710 } 711 } 712 713 impl ops::Index<usize> for ByteRecord { 714 type Output = [u8]; 715 #[inline] index(&self, i: usize) -> &[u8]716 fn index(&self, i: usize) -> &[u8] { 717 self.get(i).unwrap() 718 } 719 } 720 721 impl From<StringRecord> for ByteRecord { 722 #[inline] from(record: StringRecord) -> ByteRecord723 fn from(record: StringRecord) -> ByteRecord { 724 record.into_byte_record() 725 } 726 } 727 728 impl<T: AsRef<[u8]>> From<Vec<T>> for ByteRecord { 729 #[inline] from(xs: Vec<T>) -> ByteRecord730 fn from(xs: Vec<T>) -> ByteRecord { 731 ByteRecord::from_iter(&xs) 732 } 733 } 734 735 impl<'a, T: AsRef<[u8]>> From<&'a [T]> for ByteRecord { 736 #[inline] from(xs: &'a [T]) -> ByteRecord737 fn from(xs: &'a [T]) -> ByteRecord { 738 ByteRecord::from_iter(xs) 739 } 740 } 741 742 impl<T: AsRef<[u8]>> FromIterator<T> for ByteRecord { 743 #[inline] from_iter<I: IntoIterator<Item = T>>(iter: I) -> ByteRecord744 fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> ByteRecord { 745 let mut record = ByteRecord::new(); 746 record.extend(iter); 747 record 748 } 749 } 750 751 impl<T: AsRef<[u8]>> Extend<T> for ByteRecord { 752 #[inline] extend<I: IntoIterator<Item = T>>(&mut self, iter: I)753 fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) { 754 for x in iter { 755 self.push_field(x.as_ref()); 756 } 757 } 758 } 759 760 /// A double-ended iterator over the fields in a byte record. 761 /// 762 /// The `'r` lifetime variable refers to the lifetime of the `ByteRecord` that 763 /// is being iterated over. 764 pub struct ByteRecordIter<'r> { 765 /// The record we are iterating over. 766 r: &'r ByteRecord, 767 /// The starting index of the previous field. (For reverse iteration.) 768 last_start: usize, 769 /// The ending index of the previous field. (For forward iteration.) 770 last_end: usize, 771 /// The index of forward iteration. 772 i_forward: usize, 773 /// The index of reverse iteration. 774 i_reverse: usize, 775 } 776 777 impl<'r> IntoIterator for &'r ByteRecord { 778 type IntoIter = ByteRecordIter<'r>; 779 type Item = &'r [u8]; 780 781 #[inline] into_iter(self) -> ByteRecordIter<'r>782 fn into_iter(self) -> ByteRecordIter<'r> { 783 ByteRecordIter { 784 r: self, 785 last_start: self.as_slice().len(), 786 last_end: 0, 787 i_forward: 0, 788 i_reverse: self.len(), 789 } 790 } 791 } 792 793 impl<'r> ExactSizeIterator for ByteRecordIter<'r> {} 794 795 impl<'r> Iterator for ByteRecordIter<'r> { 796 type Item = &'r [u8]; 797 798 #[inline] next(&mut self) -> Option<&'r [u8]>799 fn next(&mut self) -> Option<&'r [u8]> { 800 if self.i_forward == self.i_reverse { 801 None 802 } else { 803 let start = self.last_end; 804 let end = self.r.0.bounds.ends()[self.i_forward]; 805 self.i_forward += 1; 806 self.last_end = end; 807 Some(&self.r.0.fields[start..end]) 808 } 809 } 810 811 #[inline] size_hint(&self) -> (usize, Option<usize>)812 fn size_hint(&self) -> (usize, Option<usize>) { 813 let x = self.i_reverse - self.i_forward; 814 (x, Some(x)) 815 } 816 817 #[inline] count(self) -> usize818 fn count(self) -> usize { 819 self.len() 820 } 821 } 822 823 impl<'r> DoubleEndedIterator for ByteRecordIter<'r> { 824 #[inline] next_back(&mut self) -> Option<&'r [u8]>825 fn next_back(&mut self) -> Option<&'r [u8]> { 826 if self.i_forward == self.i_reverse { 827 None 828 } else { 829 self.i_reverse -= 1; 830 let start = self 831 .i_reverse 832 .checked_sub(1) 833 .map(|i| self.r.0.bounds.ends()[i]) 834 .unwrap_or(0); 835 let end = self.last_start; 836 self.last_start = start; 837 Some(&self.r.0.fields[start..end]) 838 } 839 } 840 } 841 842 #[cfg(test)] 843 mod tests { 844 use crate::string_record::StringRecord; 845 846 use super::ByteRecord; 847 b(s: &str) -> &[u8]848 fn b(s: &str) -> &[u8] { 849 s.as_bytes() 850 } 851 852 #[test] record_1()853 fn record_1() { 854 let mut rec = ByteRecord::new(); 855 rec.push_field(b"foo"); 856 857 assert_eq!(rec.len(), 1); 858 assert_eq!(rec.get(0), Some(b("foo"))); 859 assert_eq!(rec.get(1), None); 860 assert_eq!(rec.get(2), None); 861 } 862 863 #[test] record_2()864 fn record_2() { 865 let mut rec = ByteRecord::new(); 866 rec.push_field(b"foo"); 867 rec.push_field(b"quux"); 868 869 assert_eq!(rec.len(), 2); 870 assert_eq!(rec.get(0), Some(b("foo"))); 871 assert_eq!(rec.get(1), Some(b("quux"))); 872 assert_eq!(rec.get(2), None); 873 assert_eq!(rec.get(3), None); 874 } 875 876 #[test] empty_record()877 fn empty_record() { 878 let rec = ByteRecord::new(); 879 880 assert_eq!(rec.len(), 0); 881 assert_eq!(rec.get(0), None); 882 assert_eq!(rec.get(1), None); 883 } 884 885 #[test] trim_whitespace_only()886 fn trim_whitespace_only() { 887 let mut rec = ByteRecord::from(vec![b" \t\n\r\x0c"]); 888 rec.trim(); 889 assert_eq!(rec.get(0), Some(b(""))); 890 } 891 892 #[test] trim_front()893 fn trim_front() { 894 let mut rec = ByteRecord::from(vec![b" abc"]); 895 rec.trim(); 896 assert_eq!(rec.get(0), Some(b("abc"))); 897 898 let mut rec = ByteRecord::from(vec![b(" abc"), b(" xyz")]); 899 rec.trim(); 900 assert_eq!(rec.get(0), Some(b("abc"))); 901 assert_eq!(rec.get(1), Some(b("xyz"))); 902 } 903 904 #[test] trim_back()905 fn trim_back() { 906 let mut rec = ByteRecord::from(vec![b"abc "]); 907 rec.trim(); 908 assert_eq!(rec.get(0), Some(b("abc"))); 909 910 let mut rec = ByteRecord::from(vec![b("abc "), b("xyz ")]); 911 rec.trim(); 912 assert_eq!(rec.get(0), Some(b("abc"))); 913 assert_eq!(rec.get(1), Some(b("xyz"))); 914 } 915 916 #[test] trim_both()917 fn trim_both() { 918 let mut rec = ByteRecord::from(vec![b" abc "]); 919 rec.trim(); 920 assert_eq!(rec.get(0), Some(b("abc"))); 921 922 let mut rec = ByteRecord::from(vec![b(" abc "), b(" xyz ")]); 923 rec.trim(); 924 assert_eq!(rec.get(0), Some(b("abc"))); 925 assert_eq!(rec.get(1), Some(b("xyz"))); 926 } 927 928 #[test] trim_does_not_panic_on_empty_records_1()929 fn trim_does_not_panic_on_empty_records_1() { 930 let mut rec = ByteRecord::from(vec![b""]); 931 rec.trim(); 932 assert_eq!(rec.get(0), Some(b(""))); 933 } 934 935 #[test] trim_does_not_panic_on_empty_records_2()936 fn trim_does_not_panic_on_empty_records_2() { 937 let mut rec = ByteRecord::from(vec![b"", b""]); 938 rec.trim(); 939 assert_eq!(rec.get(0), Some(b(""))); 940 assert_eq!(rec.get(1), Some(b(""))); 941 } 942 943 #[test] trim_does_not_panic_on_empty_records_3()944 fn trim_does_not_panic_on_empty_records_3() { 945 let mut rec = ByteRecord::new(); 946 rec.trim(); 947 assert_eq!(rec.as_slice().len(), 0); 948 } 949 950 #[test] empty_field_1()951 fn empty_field_1() { 952 let mut rec = ByteRecord::new(); 953 rec.push_field(b""); 954 955 assert_eq!(rec.len(), 1); 956 assert_eq!(rec.get(0), Some(b(""))); 957 assert_eq!(rec.get(1), None); 958 assert_eq!(rec.get(2), None); 959 } 960 961 #[test] empty_field_2()962 fn empty_field_2() { 963 let mut rec = ByteRecord::new(); 964 rec.push_field(b""); 965 rec.push_field(b""); 966 967 assert_eq!(rec.len(), 2); 968 assert_eq!(rec.get(0), Some(b(""))); 969 assert_eq!(rec.get(1), Some(b(""))); 970 assert_eq!(rec.get(2), None); 971 assert_eq!(rec.get(3), None); 972 } 973 974 #[test] empty_surround_1()975 fn empty_surround_1() { 976 let mut rec = ByteRecord::new(); 977 rec.push_field(b"foo"); 978 rec.push_field(b""); 979 rec.push_field(b"quux"); 980 981 assert_eq!(rec.len(), 3); 982 assert_eq!(rec.get(0), Some(b("foo"))); 983 assert_eq!(rec.get(1), Some(b(""))); 984 assert_eq!(rec.get(2), Some(b("quux"))); 985 assert_eq!(rec.get(3), None); 986 assert_eq!(rec.get(4), None); 987 } 988 989 #[test] empty_surround_2()990 fn empty_surround_2() { 991 let mut rec = ByteRecord::new(); 992 rec.push_field(b"foo"); 993 rec.push_field(b""); 994 rec.push_field(b"quux"); 995 rec.push_field(b""); 996 997 assert_eq!(rec.len(), 4); 998 assert_eq!(rec.get(0), Some(b("foo"))); 999 assert_eq!(rec.get(1), Some(b(""))); 1000 assert_eq!(rec.get(2), Some(b("quux"))); 1001 assert_eq!(rec.get(3), Some(b(""))); 1002 assert_eq!(rec.get(4), None); 1003 assert_eq!(rec.get(5), None); 1004 } 1005 1006 #[test] utf8_error_1()1007 fn utf8_error_1() { 1008 let mut rec = ByteRecord::new(); 1009 rec.push_field(b"foo"); 1010 rec.push_field(b"b\xFFar"); 1011 1012 let err = StringRecord::from_byte_record(rec).unwrap_err(); 1013 assert_eq!(err.utf8_error().field(), 1); 1014 assert_eq!(err.utf8_error().valid_up_to(), 1); 1015 } 1016 1017 #[test] utf8_error_2()1018 fn utf8_error_2() { 1019 let mut rec = ByteRecord::new(); 1020 rec.push_field(b"\xFF"); 1021 1022 let err = StringRecord::from_byte_record(rec).unwrap_err(); 1023 assert_eq!(err.utf8_error().field(), 0); 1024 assert_eq!(err.utf8_error().valid_up_to(), 0); 1025 } 1026 1027 #[test] utf8_error_3()1028 fn utf8_error_3() { 1029 let mut rec = ByteRecord::new(); 1030 rec.push_field(b"a\xFF"); 1031 1032 let err = StringRecord::from_byte_record(rec).unwrap_err(); 1033 assert_eq!(err.utf8_error().field(), 0); 1034 assert_eq!(err.utf8_error().valid_up_to(), 1); 1035 } 1036 1037 #[test] utf8_error_4()1038 fn utf8_error_4() { 1039 let mut rec = ByteRecord::new(); 1040 rec.push_field(b"a"); 1041 rec.push_field(b"b"); 1042 rec.push_field(b"c"); 1043 rec.push_field(b"d"); 1044 rec.push_field(b"xyz\xFF"); 1045 1046 let err = StringRecord::from_byte_record(rec).unwrap_err(); 1047 assert_eq!(err.utf8_error().field(), 4); 1048 assert_eq!(err.utf8_error().valid_up_to(), 3); 1049 } 1050 1051 #[test] utf8_error_5()1052 fn utf8_error_5() { 1053 let mut rec = ByteRecord::new(); 1054 rec.push_field(b"a"); 1055 rec.push_field(b"b"); 1056 rec.push_field(b"c"); 1057 rec.push_field(b"d"); 1058 rec.push_field(b"\xFFxyz"); 1059 1060 let err = StringRecord::from_byte_record(rec).unwrap_err(); 1061 assert_eq!(err.utf8_error().field(), 4); 1062 assert_eq!(err.utf8_error().valid_up_to(), 0); 1063 } 1064 1065 // This tests a tricky case where a single field on its own isn't valid 1066 // UTF-8, but the concatenation of all fields is. 1067 #[test] utf8_error_6()1068 fn utf8_error_6() { 1069 let mut rec = ByteRecord::new(); 1070 rec.push_field(b"a\xc9"); 1071 rec.push_field(b"\x91b"); 1072 1073 let err = StringRecord::from_byte_record(rec).unwrap_err(); 1074 assert_eq!(err.utf8_error().field(), 0); 1075 assert_eq!(err.utf8_error().valid_up_to(), 1); 1076 } 1077 1078 // This tests that we can always clear a `ByteRecord` and get a guaranteed 1079 // successful conversion to UTF-8. This permits reusing the allocation. 1080 #[test] utf8_clear_ok()1081 fn utf8_clear_ok() { 1082 let mut rec = ByteRecord::new(); 1083 rec.push_field(b"\xFF"); 1084 assert!(StringRecord::from_byte_record(rec).is_err()); 1085 1086 let mut rec = ByteRecord::new(); 1087 rec.push_field(b"\xFF"); 1088 rec.clear(); 1089 assert!(StringRecord::from_byte_record(rec).is_ok()); 1090 } 1091 1092 #[test] iter()1093 fn iter() { 1094 let data = vec!["foo", "bar", "baz", "quux", "wat"]; 1095 let rec = ByteRecord::from(&*data); 1096 let got: Vec<&str> = 1097 rec.iter().map(|x| ::std::str::from_utf8(x).unwrap()).collect(); 1098 assert_eq!(data, got); 1099 } 1100 1101 #[test] iter_reverse()1102 fn iter_reverse() { 1103 let mut data = vec!["foo", "bar", "baz", "quux", "wat"]; 1104 let rec = ByteRecord::from(&*data); 1105 let got: Vec<&str> = rec 1106 .iter() 1107 .rev() 1108 .map(|x| ::std::str::from_utf8(x).unwrap()) 1109 .collect(); 1110 data.reverse(); 1111 assert_eq!(data, got); 1112 } 1113 1114 #[test] iter_forward_and_reverse()1115 fn iter_forward_and_reverse() { 1116 let data = vec!["foo", "bar", "baz", "quux", "wat"]; 1117 let rec = ByteRecord::from(data); 1118 let mut it = rec.iter(); 1119 1120 assert_eq!(it.next_back(), Some(b("wat"))); 1121 assert_eq!(it.next(), Some(b("foo"))); 1122 assert_eq!(it.next(), Some(b("bar"))); 1123 assert_eq!(it.next_back(), Some(b("quux"))); 1124 assert_eq!(it.next(), Some(b("baz"))); 1125 assert_eq!(it.next_back(), None); 1126 assert_eq!(it.next(), None); 1127 } 1128 1129 // Check that record equality respects field boundaries. 1130 // 1131 // Regression test for #138. 1132 #[test] eq_field_boundaries()1133 fn eq_field_boundaries() { 1134 let test1 = ByteRecord::from(vec!["12", "34"]); 1135 let test2 = ByteRecord::from(vec!["123", "4"]); 1136 1137 assert_ne!(test1, test2); 1138 } 1139 1140 // Check that record equality respects number of fields. 1141 // 1142 // Regression test for #138. 1143 #[test] eq_record_len()1144 fn eq_record_len() { 1145 let test1 = ByteRecord::from(vec!["12", "34", "56"]); 1146 let test2 = ByteRecord::from(vec!["12", "34"]); 1147 assert_ne!(test1, test2); 1148 } 1149 } 1150