1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 //! Contains Row enum that is used to represent record in Rust.
19
20 use std::fmt;
21
22 use chrono::{Local, TimeZone};
23 use num_bigint::{BigInt, Sign};
24
25 use crate::basic::{LogicalType, Type as PhysicalType};
26 use crate::data_type::{ByteArray, Decimal, Int96};
27 use crate::errors::{ParquetError, Result};
28 use crate::schema::types::ColumnDescPtr;
29
30 /// Macro as a shortcut to generate 'not yet implemented' panic error.
31 macro_rules! nyi {
32 ($column_descr:ident, $value:ident) => {{
33 unimplemented!(
34 "Conversion for physical type {}, logical type {}, value {:?}",
35 $column_descr.physical_type(),
36 $column_descr.logical_type(),
37 $value
38 );
39 }};
40 }
41
42 /// `Row` represents a nested Parquet record.
43 #[derive(Clone, Debug, PartialEq)]
44 pub struct Row {
45 fields: Vec<(String, Field)>,
46 }
47
48 impl Row {
49 /// Get the number of fields in this row.
len(&self) -> usize50 pub fn len(&self) -> usize {
51 self.fields.len()
52 }
53
54 /// Get an iterator to go through all columns in the row.
55 ///
56 /// # Example
57 ///
58 /// ```no_run
59 /// use std::fs::File;
60 /// use parquet::record::Row;
61 /// use parquet::file::reader::{FileReader, SerializedFileReader};
62 ///
63 /// let file = File::open("/path/to/file").unwrap();
64 /// let reader = SerializedFileReader::new(file).unwrap();
65 /// let row: Row = reader.get_row_iter(None).unwrap().next().unwrap();
66 /// for (idx, (name, field)) in row.get_column_iter().enumerate() {
67 /// println!("column index: {}, column name: {}, column value: {}", idx, name, field);
68 /// }
69 /// ```
get_column_iter(&self) -> RowColumnIter70 pub fn get_column_iter(&self) -> RowColumnIter {
71 RowColumnIter {
72 fields: &self.fields,
73 curr: 0,
74 count: self.fields.len(),
75 }
76 }
77 }
78
79 pub struct RowColumnIter<'a> {
80 fields: &'a Vec<(String, Field)>,
81 curr: usize,
82 count: usize,
83 }
84
85 impl<'a> Iterator for RowColumnIter<'a> {
86 type Item = (&'a String, &'a Field);
87
next(&mut self) -> Option<Self::Item>88 fn next(&mut self) -> Option<Self::Item> {
89 let idx = self.curr;
90 if idx >= self.count {
91 return None;
92 }
93 self.curr += 1;
94 Some((&self.fields[idx].0, &self.fields[idx].1))
95 }
96 }
97
98 /// Trait for type-safe convenient access to fields within a Row.
99 pub trait RowAccessor {
get_bool(&self, i: usize) -> Result<bool>100 fn get_bool(&self, i: usize) -> Result<bool>;
get_byte(&self, i: usize) -> Result<i8>101 fn get_byte(&self, i: usize) -> Result<i8>;
get_short(&self, i: usize) -> Result<i16>102 fn get_short(&self, i: usize) -> Result<i16>;
get_int(&self, i: usize) -> Result<i32>103 fn get_int(&self, i: usize) -> Result<i32>;
get_long(&self, i: usize) -> Result<i64>104 fn get_long(&self, i: usize) -> Result<i64>;
get_ubyte(&self, i: usize) -> Result<u8>105 fn get_ubyte(&self, i: usize) -> Result<u8>;
get_ushort(&self, i: usize) -> Result<u16>106 fn get_ushort(&self, i: usize) -> Result<u16>;
get_uint(&self, i: usize) -> Result<u32>107 fn get_uint(&self, i: usize) -> Result<u32>;
get_ulong(&self, i: usize) -> Result<u64>108 fn get_ulong(&self, i: usize) -> Result<u64>;
get_float(&self, i: usize) -> Result<f32>109 fn get_float(&self, i: usize) -> Result<f32>;
get_double(&self, i: usize) -> Result<f64>110 fn get_double(&self, i: usize) -> Result<f64>;
get_timestamp_millis(&self, i: usize) -> Result<u64>111 fn get_timestamp_millis(&self, i: usize) -> Result<u64>;
get_timestamp_micros(&self, i: usize) -> Result<u64>112 fn get_timestamp_micros(&self, i: usize) -> Result<u64>;
get_decimal(&self, i: usize) -> Result<&Decimal>113 fn get_decimal(&self, i: usize) -> Result<&Decimal>;
get_string(&self, i: usize) -> Result<&String>114 fn get_string(&self, i: usize) -> Result<&String>;
get_bytes(&self, i: usize) -> Result<&ByteArray>115 fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
get_group(&self, i: usize) -> Result<&Row>116 fn get_group(&self, i: usize) -> Result<&Row>;
get_list(&self, i: usize) -> Result<&List>117 fn get_list(&self, i: usize) -> Result<&List>;
get_map(&self, i: usize) -> Result<&Map>118 fn get_map(&self, i: usize) -> Result<&Map>;
119 }
120
121 /// Trait for formating fields within a Row.
122 pub trait RowFormatter {
fmt(&self, i: usize) -> &fmt::Display123 fn fmt(&self, i: usize) -> &fmt::Display;
124 }
125
126 /// Macro to generate type-safe get_xxx methods for primitive types,
127 /// e.g. `get_bool`, `get_short`.
128 macro_rules! row_primitive_accessor {
129 ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
130 fn $METHOD(&self, i: usize) -> Result<$TY> {
131 match self.fields[i].1 {
132 Field::$VARIANT(v) => Ok(v),
133 _ => Err(general_err!(
134 "Cannot access {} as {}",
135 self.fields[i].1.get_type_name(),
136 stringify!($VARIANT)
137 )),
138 }
139 }
140 };
141 }
142
143 /// Macro to generate type-safe get_xxx methods for reference types,
144 /// e.g. `get_list`, `get_map`.
145 macro_rules! row_complex_accessor {
146 ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
147 fn $METHOD(&self, i: usize) -> Result<&$TY> {
148 match self.fields[i].1 {
149 Field::$VARIANT(ref v) => Ok(v),
150 _ => Err(general_err!(
151 "Cannot access {} as {}",
152 self.fields[i].1.get_type_name(),
153 stringify!($VARIANT)
154 )),
155 }
156 }
157 };
158 }
159
160 impl RowFormatter for Row {
161 /// Get Display reference for a given field.
fmt(&self, i: usize) -> &fmt::Display162 fn fmt(&self, i: usize) -> &fmt::Display {
163 &self.fields[i].1
164 }
165 }
166
167 impl RowAccessor for Row {
168 row_primitive_accessor!(get_bool, Bool, bool);
169
170 row_primitive_accessor!(get_byte, Byte, i8);
171
172 row_primitive_accessor!(get_short, Short, i16);
173
174 row_primitive_accessor!(get_int, Int, i32);
175
176 row_primitive_accessor!(get_long, Long, i64);
177
178 row_primitive_accessor!(get_ubyte, UByte, u8);
179
180 row_primitive_accessor!(get_ushort, UShort, u16);
181
182 row_primitive_accessor!(get_uint, UInt, u32);
183
184 row_primitive_accessor!(get_ulong, ULong, u64);
185
186 row_primitive_accessor!(get_float, Float, f32);
187
188 row_primitive_accessor!(get_double, Double, f64);
189
190 row_primitive_accessor!(get_timestamp_millis, TimestampMillis, u64);
191
192 row_primitive_accessor!(get_timestamp_micros, TimestampMicros, u64);
193
194 row_complex_accessor!(get_decimal, Decimal, Decimal);
195
196 row_complex_accessor!(get_string, Str, String);
197
198 row_complex_accessor!(get_bytes, Bytes, ByteArray);
199
200 row_complex_accessor!(get_group, Group, Row);
201
202 row_complex_accessor!(get_list, ListInternal, List);
203
204 row_complex_accessor!(get_map, MapInternal, Map);
205 }
206
207 /// Constructs a `Row` from the list of `fields` and returns it.
208 #[inline]
make_row(fields: Vec<(String, Field)>) -> Row209 pub fn make_row(fields: Vec<(String, Field)>) -> Row {
210 Row { fields }
211 }
212
213 impl fmt::Display for Row {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result214 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
215 write!(f, "{{")?;
216 for (i, &(ref key, ref value)) in self.fields.iter().enumerate() {
217 key.fmt(f)?;
218 write!(f, ": ")?;
219 value.fmt(f)?;
220 if i < self.fields.len() - 1 {
221 write!(f, ", ")?;
222 }
223 }
224 write!(f, "}}")
225 }
226 }
227
228 /// `List` represents a list which contains an array of elements.
229 #[derive(Clone, Debug, PartialEq)]
230 pub struct List {
231 elements: Vec<Field>,
232 }
233
234 impl List {
235 /// Get the number of fields in this row
len(&self) -> usize236 pub fn len(&self) -> usize {
237 self.elements.len()
238 }
239 }
240
241 /// Constructs a `List` from the list of `fields` and returns it.
242 #[inline]
make_list(elements: Vec<Field>) -> List243 pub fn make_list(elements: Vec<Field>) -> List {
244 List { elements }
245 }
246
247 /// Trait for type-safe access of an index for a `List`.
248 /// Note that the get_XXX methods do not do bound checking.
249 pub trait ListAccessor {
get_bool(&self, i: usize) -> Result<bool>250 fn get_bool(&self, i: usize) -> Result<bool>;
get_byte(&self, i: usize) -> Result<i8>251 fn get_byte(&self, i: usize) -> Result<i8>;
get_short(&self, i: usize) -> Result<i16>252 fn get_short(&self, i: usize) -> Result<i16>;
get_int(&self, i: usize) -> Result<i32>253 fn get_int(&self, i: usize) -> Result<i32>;
get_long(&self, i: usize) -> Result<i64>254 fn get_long(&self, i: usize) -> Result<i64>;
get_ubyte(&self, i: usize) -> Result<u8>255 fn get_ubyte(&self, i: usize) -> Result<u8>;
get_ushort(&self, i: usize) -> Result<u16>256 fn get_ushort(&self, i: usize) -> Result<u16>;
get_uint(&self, i: usize) -> Result<u32>257 fn get_uint(&self, i: usize) -> Result<u32>;
get_ulong(&self, i: usize) -> Result<u64>258 fn get_ulong(&self, i: usize) -> Result<u64>;
get_float(&self, i: usize) -> Result<f32>259 fn get_float(&self, i: usize) -> Result<f32>;
get_double(&self, i: usize) -> Result<f64>260 fn get_double(&self, i: usize) -> Result<f64>;
get_timestamp_millis(&self, i: usize) -> Result<u64>261 fn get_timestamp_millis(&self, i: usize) -> Result<u64>;
get_timestamp_micros(&self, i: usize) -> Result<u64>262 fn get_timestamp_micros(&self, i: usize) -> Result<u64>;
get_decimal(&self, i: usize) -> Result<&Decimal>263 fn get_decimal(&self, i: usize) -> Result<&Decimal>;
get_string(&self, i: usize) -> Result<&String>264 fn get_string(&self, i: usize) -> Result<&String>;
get_bytes(&self, i: usize) -> Result<&ByteArray>265 fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
get_group(&self, i: usize) -> Result<&Row>266 fn get_group(&self, i: usize) -> Result<&Row>;
get_list(&self, i: usize) -> Result<&List>267 fn get_list(&self, i: usize) -> Result<&List>;
get_map(&self, i: usize) -> Result<&Map>268 fn get_map(&self, i: usize) -> Result<&Map>;
269 }
270
271 /// Macro to generate type-safe get_xxx methods for primitive types,
272 /// e.g. get_bool, get_short
273 macro_rules! list_primitive_accessor {
274 ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
275 fn $METHOD(&self, i: usize) -> Result<$TY> {
276 match self.elements[i] {
277 Field::$VARIANT(v) => Ok(v),
278 _ => Err(general_err!(
279 "Cannot access {} as {}",
280 self.elements[i].get_type_name(),
281 stringify!($VARIANT)
282 )),
283 }
284 }
285 };
286 }
287
288 /// Macro to generate type-safe get_xxx methods for reference types
289 /// e.g. get_list, get_map
290 macro_rules! list_complex_accessor {
291 ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
292 fn $METHOD(&self, i: usize) -> Result<&$TY> {
293 match self.elements[i] {
294 Field::$VARIANT(ref v) => Ok(v),
295 _ => Err(general_err!(
296 "Cannot access {} as {}",
297 self.elements[i].get_type_name(),
298 stringify!($VARIANT)
299 )),
300 }
301 }
302 };
303 }
304
305 impl ListAccessor for List {
306 list_primitive_accessor!(get_bool, Bool, bool);
307
308 list_primitive_accessor!(get_byte, Byte, i8);
309
310 list_primitive_accessor!(get_short, Short, i16);
311
312 list_primitive_accessor!(get_int, Int, i32);
313
314 list_primitive_accessor!(get_long, Long, i64);
315
316 list_primitive_accessor!(get_ubyte, UByte, u8);
317
318 list_primitive_accessor!(get_ushort, UShort, u16);
319
320 list_primitive_accessor!(get_uint, UInt, u32);
321
322 list_primitive_accessor!(get_ulong, ULong, u64);
323
324 list_primitive_accessor!(get_float, Float, f32);
325
326 list_primitive_accessor!(get_double, Double, f64);
327
328 list_primitive_accessor!(get_timestamp_millis, TimestampMillis, u64);
329
330 list_primitive_accessor!(get_timestamp_micros, TimestampMicros, u64);
331
332 list_complex_accessor!(get_decimal, Decimal, Decimal);
333
334 list_complex_accessor!(get_string, Str, String);
335
336 list_complex_accessor!(get_bytes, Bytes, ByteArray);
337
338 list_complex_accessor!(get_group, Group, Row);
339
340 list_complex_accessor!(get_list, ListInternal, List);
341
342 list_complex_accessor!(get_map, MapInternal, Map);
343 }
344
345 /// `Map` represents a map which contains a list of key->value pairs.
346 #[derive(Clone, Debug, PartialEq)]
347 pub struct Map {
348 entries: Vec<(Field, Field)>,
349 }
350
351 impl Map {
352 /// Get the number of fields in this row
len(&self) -> usize353 pub fn len(&self) -> usize {
354 self.entries.len()
355 }
356 }
357
358 /// Constructs a `Map` from the list of `entries` and returns it.
359 #[inline]
make_map(entries: Vec<(Field, Field)>) -> Map360 pub fn make_map(entries: Vec<(Field, Field)>) -> Map {
361 Map { entries }
362 }
363
364 /// Trait for type-safe access of an index for a `Map`
365 pub trait MapAccessor {
get_keys<'a>(&'a self) -> Box<ListAccessor + 'a>366 fn get_keys<'a>(&'a self) -> Box<ListAccessor + 'a>;
get_values<'a>(&'a self) -> Box<ListAccessor + 'a>367 fn get_values<'a>(&'a self) -> Box<ListAccessor + 'a>;
368 }
369
370 struct MapList<'a> {
371 elements: Vec<&'a Field>,
372 }
373
374 /// Macro to generate type-safe get_xxx methods for primitive types,
375 /// e.g. get_bool, get_short
376 macro_rules! map_list_primitive_accessor {
377 ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
378 fn $METHOD(&self, i: usize) -> Result<$TY> {
379 match self.elements[i] {
380 Field::$VARIANT(v) => Ok(*v),
381 _ => Err(general_err!(
382 "Cannot access {} as {}",
383 self.elements[i].get_type_name(),
384 stringify!($VARIANT)
385 )),
386 }
387 }
388 };
389 }
390
391 impl<'a> ListAccessor for MapList<'a> {
392 map_list_primitive_accessor!(get_bool, Bool, bool);
393
394 map_list_primitive_accessor!(get_byte, Byte, i8);
395
396 map_list_primitive_accessor!(get_short, Short, i16);
397
398 map_list_primitive_accessor!(get_int, Int, i32);
399
400 map_list_primitive_accessor!(get_long, Long, i64);
401
402 map_list_primitive_accessor!(get_ubyte, UByte, u8);
403
404 map_list_primitive_accessor!(get_ushort, UShort, u16);
405
406 map_list_primitive_accessor!(get_uint, UInt, u32);
407
408 map_list_primitive_accessor!(get_ulong, ULong, u64);
409
410 map_list_primitive_accessor!(get_float, Float, f32);
411
412 map_list_primitive_accessor!(get_double, Double, f64);
413
414 map_list_primitive_accessor!(get_timestamp_millis, TimestampMillis, u64);
415
416 map_list_primitive_accessor!(get_timestamp_micros, TimestampMicros, u64);
417
418 list_complex_accessor!(get_decimal, Decimal, Decimal);
419
420 list_complex_accessor!(get_string, Str, String);
421
422 list_complex_accessor!(get_bytes, Bytes, ByteArray);
423
424 list_complex_accessor!(get_group, Group, Row);
425
426 list_complex_accessor!(get_list, ListInternal, List);
427
428 list_complex_accessor!(get_map, MapInternal, Map);
429 }
430
431 impl MapAccessor for Map {
get_keys<'a>(&'a self) -> Box<ListAccessor + 'a>432 fn get_keys<'a>(&'a self) -> Box<ListAccessor + 'a> {
433 let map_list = MapList {
434 elements: self.entries.iter().map(|v| &v.0).collect(),
435 };
436 Box::new(map_list)
437 }
438
get_values<'a>(&'a self) -> Box<ListAccessor + 'a>439 fn get_values<'a>(&'a self) -> Box<ListAccessor + 'a> {
440 let map_list = MapList {
441 elements: self.entries.iter().map(|v| &v.1).collect(),
442 };
443 Box::new(map_list)
444 }
445 }
446
447 /// API to represent a single field in a `Row`.
448 #[derive(Clone, Debug, PartialEq)]
449 pub enum Field {
450 // Primitive types
451 /// Null value.
452 Null,
453 /// Boolean value (`true`, `false`).
454 Bool(bool),
455 /// Signed integer INT_8.
456 Byte(i8),
457 /// Signed integer INT_16.
458 Short(i16),
459 /// Signed integer INT_32.
460 Int(i32),
461 /// Signed integer INT_64.
462 Long(i64),
463 // Unsigned integer UINT_8.
464 UByte(u8),
465 // Unsigned integer UINT_16.
466 UShort(u16),
467 // Unsigned integer UINT_32.
468 UInt(u32),
469 // Unsigned integer UINT_64.
470 ULong(u64),
471 /// IEEE 32-bit floating point value.
472 Float(f32),
473 /// IEEE 64-bit floating point value.
474 Double(f64),
475 /// Decimal value.
476 Decimal(Decimal),
477 /// UTF-8 encoded character string.
478 Str(String),
479 /// General binary value.
480 Bytes(ByteArray),
481 /// Date without a time of day, stores the number of days from the
482 /// Unix epoch, 1 January 1970.
483 Date(u32),
484 /// Milliseconds from the Unix epoch, 1 January 1970.
485 TimestampMillis(u64),
486 /// Microseconds from the Unix epoch, 1 Janiary 1970.
487 TimestampMicros(u64),
488
489 // ----------------------------------------------------------------------
490 // Complex types
491 /// Struct, child elements are tuples of field-value pairs.
492 Group(Row),
493 /// List of elements.
494 ListInternal(List),
495 /// List of key-value pairs.
496 MapInternal(Map),
497 }
498
499 impl Field {
500 /// Get the type name.
get_type_name(&self) -> &'static str501 fn get_type_name(&self) -> &'static str {
502 match *self {
503 Field::Null => "Null",
504 Field::Bool(_) => "Bool",
505 Field::Byte(_) => "Byte",
506 Field::Short(_) => "Short",
507 Field::Int(_) => "Int",
508 Field::Long(_) => "Long",
509 Field::UByte(_) => "UByte",
510 Field::UShort(_) => "UShort",
511 Field::UInt(_) => "UInt",
512 Field::ULong(_) => "ULong",
513 Field::Float(_) => "Float",
514 Field::Double(_) => "Double",
515 Field::Decimal(_) => "Decimal",
516 Field::Date(_) => "Date",
517 Field::Str(_) => "Str",
518 Field::Bytes(_) => "Bytes",
519 Field::TimestampMillis(_) => "TimestampMillis",
520 Field::TimestampMicros(_) => "TimestampMicros",
521 Field::Group(_) => "Group",
522 Field::ListInternal(_) => "ListInternal",
523 Field::MapInternal(_) => "MapInternal",
524 }
525 }
526
527 /// Determines if this Row represents a primitive value.
is_primitive(&self) -> bool528 pub fn is_primitive(&self) -> bool {
529 match *self {
530 Field::Group(_) => false,
531 Field::ListInternal(_) => false,
532 Field::MapInternal(_) => false,
533 _ => true,
534 }
535 }
536
537 /// Converts Parquet BOOLEAN type with logical type into `bool` value.
538 #[inline]
convert_bool(_descr: &ColumnDescPtr, value: bool) -> Self539 pub fn convert_bool(_descr: &ColumnDescPtr, value: bool) -> Self {
540 Field::Bool(value)
541 }
542
543 /// Converts Parquet INT32 type with logical type into `i32` value.
544 #[inline]
convert_int32(descr: &ColumnDescPtr, value: i32) -> Self545 pub fn convert_int32(descr: &ColumnDescPtr, value: i32) -> Self {
546 match descr.logical_type() {
547 LogicalType::INT_8 => Field::Byte(value as i8),
548 LogicalType::INT_16 => Field::Short(value as i16),
549 LogicalType::INT_32 | LogicalType::NONE => Field::Int(value),
550 LogicalType::UINT_8 => Field::UByte(value as u8),
551 LogicalType::UINT_16 => Field::UShort(value as u16),
552 LogicalType::UINT_32 => Field::UInt(value as u32),
553 LogicalType::DATE => Field::Date(value as u32),
554 LogicalType::DECIMAL => Field::Decimal(Decimal::from_i32(
555 value,
556 descr.type_precision(),
557 descr.type_scale(),
558 )),
559 _ => nyi!(descr, value),
560 }
561 }
562
563 /// Converts Parquet INT64 type with logical type into `i64` value.
564 #[inline]
convert_int64(descr: &ColumnDescPtr, value: i64) -> Self565 pub fn convert_int64(descr: &ColumnDescPtr, value: i64) -> Self {
566 match descr.logical_type() {
567 LogicalType::INT_64 | LogicalType::NONE => Field::Long(value),
568 LogicalType::UINT_64 => Field::ULong(value as u64),
569 LogicalType::TIMESTAMP_MILLIS => Field::TimestampMillis(value as u64),
570 LogicalType::TIMESTAMP_MICROS => Field::TimestampMicros(value as u64),
571 LogicalType::DECIMAL => Field::Decimal(Decimal::from_i64(
572 value,
573 descr.type_precision(),
574 descr.type_scale(),
575 )),
576 _ => nyi!(descr, value),
577 }
578 }
579
580 /// Converts Parquet INT96 (nanosecond timestamps) type and logical type into
581 /// `Timestamp` value.
582 #[inline]
convert_int96(_descr: &ColumnDescPtr, value: Int96) -> Self583 pub fn convert_int96(_descr: &ColumnDescPtr, value: Int96) -> Self {
584 Field::TimestampMillis(value.to_i64() as u64)
585 }
586
587 /// Converts Parquet FLOAT type with logical type into `f32` value.
588 #[inline]
convert_float(_descr: &ColumnDescPtr, value: f32) -> Self589 pub fn convert_float(_descr: &ColumnDescPtr, value: f32) -> Self {
590 Field::Float(value)
591 }
592
593 /// Converts Parquet DOUBLE type with logical type into `f64` value.
594 #[inline]
convert_double(_descr: &ColumnDescPtr, value: f64) -> Self595 pub fn convert_double(_descr: &ColumnDescPtr, value: f64) -> Self {
596 Field::Double(value)
597 }
598
599 /// Converts Parquet BYTE_ARRAY type with logical type into either UTF8 string or
600 /// array of bytes.
601 #[inline]
convert_byte_array(descr: &ColumnDescPtr, value: ByteArray) -> Self602 pub fn convert_byte_array(descr: &ColumnDescPtr, value: ByteArray) -> Self {
603 match descr.physical_type() {
604 PhysicalType::BYTE_ARRAY => match descr.logical_type() {
605 LogicalType::UTF8 | LogicalType::ENUM | LogicalType::JSON => {
606 let value = String::from_utf8(value.data().to_vec()).unwrap();
607 Field::Str(value)
608 }
609 LogicalType::BSON | LogicalType::NONE => Field::Bytes(value),
610 LogicalType::DECIMAL => Field::Decimal(Decimal::from_bytes(
611 value,
612 descr.type_precision(),
613 descr.type_scale(),
614 )),
615 _ => nyi!(descr, value),
616 },
617 PhysicalType::FIXED_LEN_BYTE_ARRAY => match descr.logical_type() {
618 LogicalType::DECIMAL => Field::Decimal(Decimal::from_bytes(
619 value,
620 descr.type_precision(),
621 descr.type_scale(),
622 )),
623 LogicalType::NONE => Field::Bytes(value),
624 _ => nyi!(descr, value),
625 },
626 _ => nyi!(descr, value),
627 }
628 }
629 }
630
631 impl fmt::Display for Field {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result632 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
633 match *self {
634 Field::Null => write!(f, "null"),
635 Field::Bool(value) => write!(f, "{}", value),
636 Field::Byte(value) => write!(f, "{}", value),
637 Field::Short(value) => write!(f, "{}", value),
638 Field::Int(value) => write!(f, "{}", value),
639 Field::Long(value) => write!(f, "{}", value),
640 Field::UByte(value) => write!(f, "{}", value),
641 Field::UShort(value) => write!(f, "{}", value),
642 Field::UInt(value) => write!(f, "{}", value),
643 Field::ULong(value) => write!(f, "{}", value),
644 Field::Float(value) => {
645 if value > 1e19 || value < 1e-15 {
646 write!(f, "{:E}", value)
647 } else {
648 write!(f, "{:?}", value)
649 }
650 }
651 Field::Double(value) => {
652 if value > 1e19 || value < 1e-15 {
653 write!(f, "{:E}", value)
654 } else {
655 write!(f, "{:?}", value)
656 }
657 }
658 Field::Decimal(ref value) => {
659 write!(f, "{}", convert_decimal_to_string(value))
660 }
661 Field::Str(ref value) => write!(f, "\"{}\"", value),
662 Field::Bytes(ref value) => write!(f, "{:?}", value.data()),
663 Field::Date(value) => write!(f, "{}", convert_date_to_string(value)),
664 Field::TimestampMillis(value) => {
665 write!(f, "{}", convert_timestamp_millis_to_string(value))
666 }
667 Field::TimestampMicros(value) => {
668 write!(f, "{}", convert_timestamp_micros_to_string(value))
669 }
670 Field::Group(ref fields) => write!(f, "{}", fields),
671 Field::ListInternal(ref list) => {
672 let elems = &list.elements;
673 write!(f, "[")?;
674 for (i, field) in elems.iter().enumerate() {
675 field.fmt(f)?;
676 if i < elems.len() - 1 {
677 write!(f, ", ")?;
678 }
679 }
680 write!(f, "]")
681 }
682 Field::MapInternal(ref map) => {
683 let entries = &map.entries;
684 write!(f, "{{")?;
685 for (i, &(ref key, ref value)) in entries.iter().enumerate() {
686 key.fmt(f)?;
687 write!(f, " -> ")?;
688 value.fmt(f)?;
689 if i < entries.len() - 1 {
690 write!(f, ", ")?;
691 }
692 }
693 write!(f, "}}")
694 }
695 }
696 }
697 }
698
699 /// Helper method to convert Parquet date into a string.
700 /// Input `value` is a number of days since the epoch in UTC.
701 /// Date is displayed in local timezone.
702 #[inline]
convert_date_to_string(value: u32) -> String703 fn convert_date_to_string(value: u32) -> String {
704 static NUM_SECONDS_IN_DAY: i64 = 60 * 60 * 24;
705 let dt = Local.timestamp(value as i64 * NUM_SECONDS_IN_DAY, 0).date();
706 format!("{}", dt.format("%Y-%m-%d %:z"))
707 }
708
709 /// Helper method to convert Parquet timestamp into a string.
710 /// Input `value` is a number of milliseconds since the epoch in UTC.
711 /// Datetime is displayed in local timezone.
712 #[inline]
convert_timestamp_millis_to_string(value: u64) -> String713 fn convert_timestamp_millis_to_string(value: u64) -> String {
714 let dt = Local.timestamp((value / 1000) as i64, 0);
715 format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"))
716 }
717
718 /// Helper method to convert Parquet timestamp into a string.
719 /// Input `value` is a number of microseconds since the epoch in UTC.
720 /// Datetime is displayed in local timezone.
721 #[inline]
convert_timestamp_micros_to_string(value: u64) -> String722 fn convert_timestamp_micros_to_string(value: u64) -> String {
723 convert_timestamp_millis_to_string(value / 1000)
724 }
725
726 /// Helper method to convert Parquet decimal into a string.
727 /// We assert that `scale >= 0` and `precision > scale`, but this will be enforced
728 /// when constructing Parquet schema.
729 #[inline]
convert_decimal_to_string(decimal: &Decimal) -> String730 fn convert_decimal_to_string(decimal: &Decimal) -> String {
731 assert!(decimal.scale() >= 0 && decimal.precision() > decimal.scale());
732
733 // Specify as signed bytes to resolve sign as part of conversion.
734 let num = BigInt::from_signed_bytes_be(decimal.data());
735
736 // Offset of the first digit in a string.
737 let negative = if num.sign() == Sign::Minus { 1 } else { 0 };
738 let mut num_str = num.to_string();
739 let mut point = num_str.len() as i32 - decimal.scale() - negative;
740
741 // Convert to string form without scientific notation.
742 if point <= 0 {
743 // Zeros need to be prepended to the unscaled value.
744 while point < 0 {
745 num_str.insert(negative as usize, '0');
746 point += 1;
747 }
748 num_str.insert_str(negative as usize, "0.");
749 } else {
750 // No zeroes need to be prepended to the unscaled value, simply insert decimal
751 // point.
752 num_str.insert((point + negative) as usize, '.');
753 }
754
755 num_str
756 }
757
758 #[cfg(test)]
759 mod tests {
760 use super::*;
761
762 use chrono;
763 use std::rc::Rc;
764
765 use crate::schema::types::{ColumnDescriptor, ColumnPath, PrimitiveTypeBuilder};
766
767 /// Creates test column descriptor based on provided type parameters.
768 macro_rules! make_column_descr {
769 ($physical_type:expr, $logical_type:expr) => {{
770 let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
771 .with_logical_type($logical_type)
772 .build()
773 .unwrap();
774 Rc::new(ColumnDescriptor::new(
775 Rc::new(tpe),
776 None,
777 0,
778 0,
779 ColumnPath::from("col"),
780 ))
781 }};
782 ($physical_type:expr, $logical_type:expr, $len:expr, $prec:expr, $scale:expr) => {{
783 let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
784 .with_logical_type($logical_type)
785 .with_length($len)
786 .with_precision($prec)
787 .with_scale($scale)
788 .build()
789 .unwrap();
790 Rc::new(ColumnDescriptor::new(
791 Rc::new(tpe),
792 None,
793 0,
794 0,
795 ColumnPath::from("col"),
796 ))
797 }};
798 }
799
800 #[test]
test_row_convert_bool()801 fn test_row_convert_bool() {
802 // BOOLEAN value does not depend on logical type
803 let descr = make_column_descr![PhysicalType::BOOLEAN, LogicalType::NONE];
804
805 let row = Field::convert_bool(&descr, true);
806 assert_eq!(row, Field::Bool(true));
807
808 let row = Field::convert_bool(&descr, false);
809 assert_eq!(row, Field::Bool(false));
810 }
811
812 #[test]
test_row_convert_int32()813 fn test_row_convert_int32() {
814 let descr = make_column_descr![PhysicalType::INT32, LogicalType::INT_8];
815 let row = Field::convert_int32(&descr, 111);
816 assert_eq!(row, Field::Byte(111));
817
818 let descr = make_column_descr![PhysicalType::INT32, LogicalType::INT_16];
819 let row = Field::convert_int32(&descr, 222);
820 assert_eq!(row, Field::Short(222));
821
822 let descr = make_column_descr![PhysicalType::INT32, LogicalType::INT_32];
823 let row = Field::convert_int32(&descr, 333);
824 assert_eq!(row, Field::Int(333));
825
826 let descr = make_column_descr![PhysicalType::INT32, LogicalType::UINT_8];
827 let row = Field::convert_int32(&descr, -1);
828 assert_eq!(row, Field::UByte(255));
829
830 let descr = make_column_descr![PhysicalType::INT32, LogicalType::UINT_16];
831 let row = Field::convert_int32(&descr, 256);
832 assert_eq!(row, Field::UShort(256));
833
834 let descr = make_column_descr![PhysicalType::INT32, LogicalType::UINT_32];
835 let row = Field::convert_int32(&descr, 1234);
836 assert_eq!(row, Field::UInt(1234));
837
838 let descr = make_column_descr![PhysicalType::INT32, LogicalType::NONE];
839 let row = Field::convert_int32(&descr, 444);
840 assert_eq!(row, Field::Int(444));
841
842 let descr = make_column_descr![PhysicalType::INT32, LogicalType::DATE];
843 let row = Field::convert_int32(&descr, 14611);
844 assert_eq!(row, Field::Date(14611));
845
846 let descr =
847 make_column_descr![PhysicalType::INT32, LogicalType::DECIMAL, 0, 8, 2];
848 let row = Field::convert_int32(&descr, 444);
849 assert_eq!(row, Field::Decimal(Decimal::from_i32(444, 8, 2)));
850 }
851
852 #[test]
test_row_convert_int64()853 fn test_row_convert_int64() {
854 let descr = make_column_descr![PhysicalType::INT64, LogicalType::INT_64];
855 let row = Field::convert_int64(&descr, 1111);
856 assert_eq!(row, Field::Long(1111));
857
858 let descr = make_column_descr![PhysicalType::INT64, LogicalType::UINT_64];
859 let row = Field::convert_int64(&descr, 78239823);
860 assert_eq!(row, Field::ULong(78239823));
861
862 let descr =
863 make_column_descr![PhysicalType::INT64, LogicalType::TIMESTAMP_MILLIS];
864 let row = Field::convert_int64(&descr, 1541186529153);
865 assert_eq!(row, Field::TimestampMillis(1541186529153));
866
867 let descr =
868 make_column_descr![PhysicalType::INT64, LogicalType::TIMESTAMP_MICROS];
869 let row = Field::convert_int64(&descr, 1541186529153123);
870 assert_eq!(row, Field::TimestampMicros(1541186529153123));
871
872 let descr = make_column_descr![PhysicalType::INT64, LogicalType::NONE];
873 let row = Field::convert_int64(&descr, 2222);
874 assert_eq!(row, Field::Long(2222));
875
876 let descr =
877 make_column_descr![PhysicalType::INT64, LogicalType::DECIMAL, 0, 8, 2];
878 let row = Field::convert_int64(&descr, 3333);
879 assert_eq!(row, Field::Decimal(Decimal::from_i64(3333, 8, 2)));
880 }
881
882 #[test]
test_row_convert_int96()883 fn test_row_convert_int96() {
884 // INT96 value does not depend on logical type
885 let descr = make_column_descr![PhysicalType::INT96, LogicalType::NONE];
886
887 let value = Int96::from(vec![0, 0, 2454923]);
888 let row = Field::convert_int96(&descr, value);
889 assert_eq!(row, Field::TimestampMillis(1238544000000));
890
891 let value = Int96::from(vec![4165425152, 13, 2454923]);
892 let row = Field::convert_int96(&descr, value);
893 assert_eq!(row, Field::TimestampMillis(1238544060000));
894 }
895
896 #[test]
897 #[should_panic(expected = "Expected non-negative milliseconds when converting Int96")]
test_row_convert_int96_invalid()898 fn test_row_convert_int96_invalid() {
899 // INT96 value does not depend on logical type
900 let descr = make_column_descr![PhysicalType::INT96, LogicalType::NONE];
901
902 let value = Int96::from(vec![0, 0, 0]);
903 Field::convert_int96(&descr, value);
904 }
905
906 #[test]
test_row_convert_float()907 fn test_row_convert_float() {
908 // FLOAT value does not depend on logical type
909 let descr = make_column_descr![PhysicalType::FLOAT, LogicalType::NONE];
910 let row = Field::convert_float(&descr, 2.31);
911 assert_eq!(row, Field::Float(2.31));
912 }
913
914 #[test]
test_row_convert_double()915 fn test_row_convert_double() {
916 // DOUBLE value does not depend on logical type
917 let descr = make_column_descr![PhysicalType::DOUBLE, LogicalType::NONE];
918 let row = Field::convert_double(&descr, 1.56);
919 assert_eq!(row, Field::Double(1.56));
920 }
921
922 #[test]
test_row_convert_byte_array()923 fn test_row_convert_byte_array() {
924 // UTF8
925 let descr = make_column_descr![PhysicalType::BYTE_ARRAY, LogicalType::UTF8];
926 let value = ByteArray::from(vec![b'A', b'B', b'C', b'D']);
927 let row = Field::convert_byte_array(&descr, value);
928 assert_eq!(row, Field::Str("ABCD".to_string()));
929
930 // ENUM
931 let descr = make_column_descr![PhysicalType::BYTE_ARRAY, LogicalType::ENUM];
932 let value = ByteArray::from(vec![b'1', b'2', b'3']);
933 let row = Field::convert_byte_array(&descr, value);
934 assert_eq!(row, Field::Str("123".to_string()));
935
936 // JSON
937 let descr = make_column_descr![PhysicalType::BYTE_ARRAY, LogicalType::JSON];
938 let value = ByteArray::from(vec![b'{', b'"', b'a', b'"', b':', b'1', b'}']);
939 let row = Field::convert_byte_array(&descr, value);
940 assert_eq!(row, Field::Str("{\"a\":1}".to_string()));
941
942 // NONE
943 let descr = make_column_descr![PhysicalType::BYTE_ARRAY, LogicalType::NONE];
944 let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
945 let row = Field::convert_byte_array(&descr, value.clone());
946 assert_eq!(row, Field::Bytes(value));
947
948 // BSON
949 let descr = make_column_descr![PhysicalType::BYTE_ARRAY, LogicalType::BSON];
950 let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
951 let row = Field::convert_byte_array(&descr, value.clone());
952 assert_eq!(row, Field::Bytes(value));
953
954 // DECIMAL
955 let descr =
956 make_column_descr![PhysicalType::BYTE_ARRAY, LogicalType::DECIMAL, 0, 8, 2];
957 let value = ByteArray::from(vec![207, 200]);
958 let row = Field::convert_byte_array(&descr, value.clone());
959 assert_eq!(row, Field::Decimal(Decimal::from_bytes(value, 8, 2)));
960
961 // DECIMAL (FIXED_LEN_BYTE_ARRAY)
962 let descr = make_column_descr![
963 PhysicalType::FIXED_LEN_BYTE_ARRAY,
964 LogicalType::DECIMAL,
965 8,
966 17,
967 5
968 ];
969 let value = ByteArray::from(vec![0, 0, 0, 0, 0, 4, 147, 224]);
970 let row = Field::convert_byte_array(&descr, value.clone());
971 assert_eq!(row, Field::Decimal(Decimal::from_bytes(value, 17, 5)));
972
973 // NONE (FIXED_LEN_BYTE_ARRAY)
974 let descr = make_column_descr![
975 PhysicalType::FIXED_LEN_BYTE_ARRAY,
976 LogicalType::NONE,
977 6,
978 0,
979 0
980 ];
981 let value = ByteArray::from(vec![1, 2, 3, 4, 5, 6]);
982 let row = Field::convert_byte_array(&descr, value.clone());
983 assert_eq!(row, Field::Bytes(value));
984 }
985
986 #[test]
test_convert_date_to_string()987 fn test_convert_date_to_string() {
988 fn check_date_conversion(y: u32, m: u32, d: u32) {
989 let datetime = chrono::NaiveDate::from_ymd(y as i32, m, d).and_hms(0, 0, 0);
990 let dt = Local.from_utc_datetime(&datetime);
991 let res = convert_date_to_string((dt.timestamp() / 60 / 60 / 24) as u32);
992 let exp = format!("{}", dt.format("%Y-%m-%d %:z"));
993 assert_eq!(res, exp);
994 }
995
996 check_date_conversion(2010, 01, 02);
997 check_date_conversion(2014, 05, 01);
998 check_date_conversion(2016, 02, 29);
999 check_date_conversion(2017, 09, 12);
1000 check_date_conversion(2018, 03, 31);
1001 }
1002
1003 #[test]
test_convert_timestamp_to_string()1004 fn test_convert_timestamp_to_string() {
1005 fn check_datetime_conversion(y: u32, m: u32, d: u32, h: u32, mi: u32, s: u32) {
1006 let datetime = chrono::NaiveDate::from_ymd(y as i32, m, d).and_hms(h, mi, s);
1007 let dt = Local.from_utc_datetime(&datetime);
1008 let res = convert_timestamp_millis_to_string(dt.timestamp_millis() as u64);
1009 let exp = format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"));
1010 assert_eq!(res, exp);
1011 }
1012
1013 check_datetime_conversion(2010, 01, 02, 13, 12, 54);
1014 check_datetime_conversion(2011, 01, 03, 08, 23, 01);
1015 check_datetime_conversion(2012, 04, 05, 11, 06, 32);
1016 check_datetime_conversion(2013, 05, 12, 16, 38, 00);
1017 check_datetime_conversion(2014, 11, 28, 21, 15, 12);
1018 }
1019
1020 #[test]
test_convert_float_to_string()1021 fn test_convert_float_to_string() {
1022 assert_eq!(format!("{}", Field::Float(1.0)), "1.0");
1023 assert_eq!(format!("{}", Field::Float(9.63)), "9.63");
1024 assert_eq!(format!("{}", Field::Float(1e-15)), "0.000000000000001");
1025 assert_eq!(format!("{}", Field::Float(1e-16)), "1E-16");
1026 assert_eq!(format!("{}", Field::Float(1e19)), "10000000000000000000.0");
1027 assert_eq!(format!("{}", Field::Float(1e20)), "1E20");
1028 assert_eq!(format!("{}", Field::Float(1.7976931E30)), "1.7976931E30");
1029 assert_eq!(format!("{}", Field::Float(-1.7976931E30)), "-1.7976931E30");
1030 }
1031
1032 #[test]
test_convert_double_to_string()1033 fn test_convert_double_to_string() {
1034 assert_eq!(format!("{}", Field::Double(1.0)), "1.0");
1035 assert_eq!(format!("{}", Field::Double(9.63)), "9.63");
1036 assert_eq!(format!("{}", Field::Double(1e-15)), "0.000000000000001");
1037 assert_eq!(format!("{}", Field::Double(1e-16)), "1E-16");
1038 assert_eq!(format!("{}", Field::Double(1e19)), "10000000000000000000.0");
1039 assert_eq!(format!("{}", Field::Double(1e20)), "1E20");
1040 assert_eq!(
1041 format!("{}", Field::Double(1.79769313486E308)),
1042 "1.79769313486E308"
1043 );
1044 assert_eq!(
1045 format!("{}", Field::Double(-1.79769313486E308)),
1046 "-1.79769313486E308"
1047 );
1048 }
1049
1050 #[test]
test_convert_decimal_to_string()1051 fn test_convert_decimal_to_string() {
1052 // Helper method to compare decimal
1053 fn check_decimal(bytes: Vec<u8>, precision: i32, scale: i32, res: &str) {
1054 let decimal = Decimal::from_bytes(ByteArray::from(bytes), precision, scale);
1055 assert_eq!(convert_decimal_to_string(&decimal), res);
1056 }
1057
1058 // This example previously used to fail in some engines
1059 check_decimal(
1060 vec![0, 0, 0, 0, 0, 0, 0, 0, 13, 224, 182, 179, 167, 100, 0, 0],
1061 38,
1062 18,
1063 "1.000000000000000000",
1064 );
1065 check_decimal(
1066 vec![
1067 249, 233, 247, 16, 185, 192, 202, 223, 215, 165, 192, 166, 67, 72,
1068 ],
1069 36,
1070 28,
1071 "-12344.0242342304923409234234293432",
1072 );
1073 check_decimal(vec![0, 0, 0, 0, 0, 4, 147, 224], 17, 5, "3.00000");
1074 check_decimal(vec![0, 0, 0, 0, 1, 201, 195, 140], 18, 2, "300000.12");
1075 check_decimal(vec![207, 200], 10, 2, "-123.44");
1076 check_decimal(vec![207, 200], 10, 8, "-0.00012344");
1077 }
1078
1079 #[test]
test_row_display()1080 fn test_row_display() {
1081 // Primitive types
1082 assert_eq!(format!("{}", Field::Null), "null");
1083 assert_eq!(format!("{}", Field::Bool(true)), "true");
1084 assert_eq!(format!("{}", Field::Bool(false)), "false");
1085 assert_eq!(format!("{}", Field::Byte(1)), "1");
1086 assert_eq!(format!("{}", Field::Short(2)), "2");
1087 assert_eq!(format!("{}", Field::Int(3)), "3");
1088 assert_eq!(format!("{}", Field::Long(4)), "4");
1089 assert_eq!(format!("{}", Field::UByte(1)), "1");
1090 assert_eq!(format!("{}", Field::UShort(2)), "2");
1091 assert_eq!(format!("{}", Field::UInt(3)), "3");
1092 assert_eq!(format!("{}", Field::ULong(4)), "4");
1093 assert_eq!(format!("{}", Field::Float(5.0)), "5.0");
1094 assert_eq!(format!("{}", Field::Float(5.1234)), "5.1234");
1095 assert_eq!(format!("{}", Field::Double(6.0)), "6.0");
1096 assert_eq!(format!("{}", Field::Double(6.1234)), "6.1234");
1097 assert_eq!(format!("{}", Field::Str("abc".to_string())), "\"abc\"");
1098 assert_eq!(
1099 format!("{}", Field::Bytes(ByteArray::from(vec![1, 2, 3]))),
1100 "[1, 2, 3]"
1101 );
1102 assert_eq!(
1103 format!("{}", Field::Date(14611)),
1104 convert_date_to_string(14611)
1105 );
1106 assert_eq!(
1107 format!("{}", Field::TimestampMillis(1262391174000)),
1108 convert_timestamp_millis_to_string(1262391174000)
1109 );
1110 assert_eq!(
1111 format!("{}", Field::TimestampMicros(1262391174000000)),
1112 convert_timestamp_micros_to_string(1262391174000000)
1113 );
1114 assert_eq!(
1115 format!("{}", Field::Decimal(Decimal::from_i32(4, 8, 2))),
1116 convert_decimal_to_string(&Decimal::from_i32(4, 8, 2))
1117 );
1118
1119 // Complex types
1120 let fields = vec![
1121 ("x".to_string(), Field::Null),
1122 ("Y".to_string(), Field::Int(2)),
1123 ("z".to_string(), Field::Float(3.1)),
1124 ("a".to_string(), Field::Str("abc".to_string())),
1125 ];
1126 let row = Field::Group(make_row(fields));
1127 assert_eq!(format!("{}", row), "{x: null, Y: 2, z: 3.1, a: \"abc\"}");
1128
1129 let row = Field::ListInternal(make_list(vec![
1130 Field::Int(2),
1131 Field::Int(1),
1132 Field::Null,
1133 Field::Int(12),
1134 ]));
1135 assert_eq!(format!("{}", row), "[2, 1, null, 12]");
1136
1137 let row = Field::MapInternal(make_map(vec![
1138 (Field::Int(1), Field::Float(1.2)),
1139 (Field::Int(2), Field::Float(4.5)),
1140 (Field::Int(3), Field::Float(2.3)),
1141 ]));
1142 assert_eq!(format!("{}", row), "{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}");
1143 }
1144
1145 #[test]
test_is_primitive()1146 fn test_is_primitive() {
1147 // primitives
1148 assert!(Field::Null.is_primitive());
1149 assert!(Field::Bool(true).is_primitive());
1150 assert!(Field::Bool(false).is_primitive());
1151 assert!(Field::Byte(1).is_primitive());
1152 assert!(Field::Short(2).is_primitive());
1153 assert!(Field::Int(3).is_primitive());
1154 assert!(Field::Long(4).is_primitive());
1155 assert!(Field::UByte(1).is_primitive());
1156 assert!(Field::UShort(2).is_primitive());
1157 assert!(Field::UInt(3).is_primitive());
1158 assert!(Field::ULong(4).is_primitive());
1159 assert!(Field::Float(5.0).is_primitive());
1160 assert!(Field::Float(5.1234).is_primitive());
1161 assert!(Field::Double(6.0).is_primitive());
1162 assert!(Field::Double(6.1234).is_primitive());
1163 assert!(Field::Str("abc".to_string()).is_primitive());
1164 assert!(Field::Bytes(ByteArray::from(vec![1, 2, 3])).is_primitive());
1165 assert!(Field::TimestampMillis(12345678).is_primitive());
1166 assert!(Field::TimestampMicros(12345678901).is_primitive());
1167 assert!(Field::Decimal(Decimal::from_i32(4, 8, 2)).is_primitive());
1168
1169 // complex types
1170 assert_eq!(
1171 false,
1172 Field::Group(make_row(vec![
1173 ("x".to_string(), Field::Null),
1174 ("Y".to_string(), Field::Int(2)),
1175 ("z".to_string(), Field::Float(3.1)),
1176 ("a".to_string(), Field::Str("abc".to_string()))
1177 ]))
1178 .is_primitive()
1179 );
1180
1181 assert_eq!(
1182 false,
1183 Field::ListInternal(make_list(vec![
1184 Field::Int(2),
1185 Field::Int(1),
1186 Field::Null,
1187 Field::Int(12)
1188 ]))
1189 .is_primitive()
1190 );
1191
1192 assert_eq!(
1193 false,
1194 Field::MapInternal(make_map(vec![
1195 (Field::Int(1), Field::Float(1.2)),
1196 (Field::Int(2), Field::Float(4.5)),
1197 (Field::Int(3), Field::Float(2.3))
1198 ]))
1199 .is_primitive()
1200 );
1201 }
1202
1203 #[test]
test_row_primitive_field_fmt()1204 fn test_row_primitive_field_fmt() {
1205 // Primitives types
1206 let row = make_row(vec![
1207 ("00".to_string(), Field::Null),
1208 ("01".to_string(), Field::Bool(false)),
1209 ("02".to_string(), Field::Byte(3)),
1210 ("03".to_string(), Field::Short(4)),
1211 ("04".to_string(), Field::Int(5)),
1212 ("05".to_string(), Field::Long(6)),
1213 ("06".to_string(), Field::UByte(7)),
1214 ("07".to_string(), Field::UShort(8)),
1215 ("08".to_string(), Field::UInt(9)),
1216 ("09".to_string(), Field::ULong(10)),
1217 ("10".to_string(), Field::Float(11.1)),
1218 ("11".to_string(), Field::Double(12.1)),
1219 ("12".to_string(), Field::Str("abc".to_string())),
1220 (
1221 "13".to_string(),
1222 Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1223 ),
1224 ("14".to_string(), Field::Date(14611)),
1225 ("15".to_string(), Field::TimestampMillis(1262391174000)),
1226 ("16".to_string(), Field::TimestampMicros(1262391174000000)),
1227 ("17".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1228 ]);
1229
1230 assert_eq!("null", format!("{}", row.fmt(0)));
1231 assert_eq!("false", format!("{}", row.fmt(1)));
1232 assert_eq!("3", format!("{}", row.fmt(2)));
1233 assert_eq!("4", format!("{}", row.fmt(3)));
1234 assert_eq!("5", format!("{}", row.fmt(4)));
1235 assert_eq!("6", format!("{}", row.fmt(5)));
1236 assert_eq!("7", format!("{}", row.fmt(6)));
1237 assert_eq!("8", format!("{}", row.fmt(7)));
1238 assert_eq!("9", format!("{}", row.fmt(8)));
1239 assert_eq!("10", format!("{}", row.fmt(9)));
1240 assert_eq!("11.1", format!("{}", row.fmt(10)));
1241 assert_eq!("12.1", format!("{}", row.fmt(11)));
1242 assert_eq!("\"abc\"", format!("{}", row.fmt(12)));
1243 assert_eq!("[1, 2, 3, 4, 5]", format!("{}", row.fmt(13)));
1244 assert_eq!(convert_date_to_string(14611), format!("{}", row.fmt(14)));
1245 assert_eq!(
1246 convert_timestamp_millis_to_string(1262391174000),
1247 format!("{}", row.fmt(15))
1248 );
1249 assert_eq!(
1250 convert_timestamp_micros_to_string(1262391174000000),
1251 format!("{}", row.fmt(16))
1252 );
1253 assert_eq!("0.04", format!("{}", row.fmt(17)));
1254 }
1255
1256 #[test]
test_row_complex_field_fmt()1257 fn test_row_complex_field_fmt() {
1258 // Complex types
1259 let row = make_row(vec![
1260 (
1261 "00".to_string(),
1262 Field::Group(make_row(vec![
1263 ("x".to_string(), Field::Null),
1264 ("Y".to_string(), Field::Int(2)),
1265 ])),
1266 ),
1267 (
1268 "01".to_string(),
1269 Field::ListInternal(make_list(vec![
1270 Field::Int(2),
1271 Field::Int(1),
1272 Field::Null,
1273 Field::Int(12),
1274 ])),
1275 ),
1276 (
1277 "02".to_string(),
1278 Field::MapInternal(make_map(vec![
1279 (Field::Int(1), Field::Float(1.2)),
1280 (Field::Int(2), Field::Float(4.5)),
1281 (Field::Int(3), Field::Float(2.3)),
1282 ])),
1283 ),
1284 ]);
1285
1286 assert_eq!("{x: null, Y: 2}", format!("{}", row.fmt(0)));
1287 assert_eq!("[2, 1, null, 12]", format!("{}", row.fmt(1)));
1288 assert_eq!("{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}", format!("{}", row.fmt(2)));
1289 }
1290
1291 #[test]
test_row_primitive_accessors()1292 fn test_row_primitive_accessors() {
1293 // primitives
1294 let row = make_row(vec![
1295 ("a".to_string(), Field::Null),
1296 ("b".to_string(), Field::Bool(false)),
1297 ("c".to_string(), Field::Byte(3)),
1298 ("d".to_string(), Field::Short(4)),
1299 ("e".to_string(), Field::Int(5)),
1300 ("f".to_string(), Field::Long(6)),
1301 ("g".to_string(), Field::UByte(3)),
1302 ("h".to_string(), Field::UShort(4)),
1303 ("i".to_string(), Field::UInt(5)),
1304 ("j".to_string(), Field::ULong(6)),
1305 ("k".to_string(), Field::Float(7.1)),
1306 ("l".to_string(), Field::Double(8.1)),
1307 ("m".to_string(), Field::Str("abc".to_string())),
1308 (
1309 "n".to_string(),
1310 Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1311 ),
1312 ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1313 ]);
1314
1315 assert_eq!(false, row.get_bool(1).unwrap());
1316 assert_eq!(3, row.get_byte(2).unwrap());
1317 assert_eq!(4, row.get_short(3).unwrap());
1318 assert_eq!(5, row.get_int(4).unwrap());
1319 assert_eq!(6, row.get_long(5).unwrap());
1320 assert_eq!(3, row.get_ubyte(6).unwrap());
1321 assert_eq!(4, row.get_ushort(7).unwrap());
1322 assert_eq!(5, row.get_uint(8).unwrap());
1323 assert_eq!(6, row.get_ulong(9).unwrap());
1324 assert_eq!(7.1, row.get_float(10).unwrap());
1325 assert_eq!(8.1, row.get_double(11).unwrap());
1326 assert_eq!("abc", row.get_string(12).unwrap());
1327 assert_eq!(5, row.get_bytes(13).unwrap().len());
1328 assert_eq!(7, row.get_decimal(14).unwrap().precision());
1329 }
1330
1331 #[test]
test_row_primitive_invalid_accessors()1332 fn test_row_primitive_invalid_accessors() {
1333 // primitives
1334 let row = make_row(vec![
1335 ("a".to_string(), Field::Null),
1336 ("b".to_string(), Field::Bool(false)),
1337 ("c".to_string(), Field::Byte(3)),
1338 ("d".to_string(), Field::Short(4)),
1339 ("e".to_string(), Field::Int(5)),
1340 ("f".to_string(), Field::Long(6)),
1341 ("g".to_string(), Field::UByte(3)),
1342 ("h".to_string(), Field::UShort(4)),
1343 ("i".to_string(), Field::UInt(5)),
1344 ("j".to_string(), Field::ULong(6)),
1345 ("k".to_string(), Field::Float(7.1)),
1346 ("l".to_string(), Field::Double(8.1)),
1347 ("m".to_string(), Field::Str("abc".to_string())),
1348 (
1349 "n".to_string(),
1350 Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1351 ),
1352 ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1353 ]);
1354
1355 for i in 0..row.len() {
1356 assert!(row.get_group(i).is_err());
1357 }
1358 }
1359
1360 #[test]
test_row_complex_accessors()1361 fn test_row_complex_accessors() {
1362 let row = make_row(vec![
1363 (
1364 "a".to_string(),
1365 Field::Group(make_row(vec![
1366 ("x".to_string(), Field::Null),
1367 ("Y".to_string(), Field::Int(2)),
1368 ])),
1369 ),
1370 (
1371 "b".to_string(),
1372 Field::ListInternal(make_list(vec![
1373 Field::Int(2),
1374 Field::Int(1),
1375 Field::Null,
1376 Field::Int(12),
1377 ])),
1378 ),
1379 (
1380 "c".to_string(),
1381 Field::MapInternal(make_map(vec![
1382 (Field::Int(1), Field::Float(1.2)),
1383 (Field::Int(2), Field::Float(4.5)),
1384 (Field::Int(3), Field::Float(2.3)),
1385 ])),
1386 ),
1387 ]);
1388
1389 assert_eq!(2, row.get_group(0).unwrap().len());
1390 assert_eq!(4, row.get_list(1).unwrap().len());
1391 assert_eq!(3, row.get_map(2).unwrap().len());
1392 }
1393
1394 #[test]
test_row_complex_invalid_accessors()1395 fn test_row_complex_invalid_accessors() {
1396 let row = make_row(vec![
1397 (
1398 "a".to_string(),
1399 Field::Group(make_row(vec![
1400 ("x".to_string(), Field::Null),
1401 ("Y".to_string(), Field::Int(2)),
1402 ])),
1403 ),
1404 (
1405 "b".to_string(),
1406 Field::ListInternal(make_list(vec![
1407 Field::Int(2),
1408 Field::Int(1),
1409 Field::Null,
1410 Field::Int(12),
1411 ])),
1412 ),
1413 (
1414 "c".to_string(),
1415 Field::MapInternal(make_map(vec![
1416 (Field::Int(1), Field::Float(1.2)),
1417 (Field::Int(2), Field::Float(4.5)),
1418 (Field::Int(3), Field::Float(2.3)),
1419 ])),
1420 ),
1421 ]);
1422
1423 assert_eq!(
1424 ParquetError::General("Cannot access Group as Float".to_string()),
1425 row.get_float(0).unwrap_err()
1426 );
1427 assert_eq!(
1428 ParquetError::General("Cannot access ListInternal as Float".to_string()),
1429 row.get_float(1).unwrap_err()
1430 );
1431 assert_eq!(
1432 ParquetError::General("Cannot access MapInternal as Float".to_string()),
1433 row.get_float(2).unwrap_err()
1434 );
1435 }
1436
1437 #[test]
test_list_primitive_accessors()1438 fn test_list_primitive_accessors() {
1439 // primitives
1440 let list = make_list(vec![Field::Bool(false)]);
1441 assert_eq!(false, list.get_bool(0).unwrap());
1442
1443 let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
1444 assert_eq!(4, list.get_byte(1).unwrap());
1445
1446 let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
1447 assert_eq!(6, list.get_short(2).unwrap());
1448
1449 let list = make_list(vec![Field::Int(5)]);
1450 assert_eq!(5, list.get_int(0).unwrap());
1451
1452 let list = make_list(vec![Field::Long(6), Field::Long(7)]);
1453 assert_eq!(7, list.get_long(1).unwrap());
1454
1455 let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
1456 assert_eq!(4, list.get_ubyte(1).unwrap());
1457
1458 let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
1459 assert_eq!(6, list.get_ushort(2).unwrap());
1460
1461 let list = make_list(vec![Field::UInt(5)]);
1462 assert_eq!(5, list.get_uint(0).unwrap());
1463
1464 let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
1465 assert_eq!(7, list.get_ulong(1).unwrap());
1466
1467 let list = make_list(vec![
1468 Field::Float(8.1),
1469 Field::Float(9.2),
1470 Field::Float(10.3),
1471 ]);
1472 assert_eq!(10.3, list.get_float(2).unwrap());
1473
1474 let list = make_list(vec![Field::Double(3.1415)]);
1475 assert_eq!(3.1415, list.get_double(0).unwrap());
1476
1477 let list = make_list(vec![Field::Str("abc".to_string())]);
1478 assert_eq!(&"abc".to_string(), list.get_string(0).unwrap());
1479
1480 let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
1481 assert_eq!(&[1, 2, 3, 4, 5], list.get_bytes(0).unwrap().data());
1482
1483 let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
1484 assert_eq!(&[0, 0, 0, 4], list.get_decimal(0).unwrap().data());
1485 }
1486
1487 #[test]
test_list_primitive_invalid_accessors()1488 fn test_list_primitive_invalid_accessors() {
1489 // primitives
1490 let list = make_list(vec![Field::Bool(false)]);
1491 assert!(list.get_byte(0).is_err());
1492
1493 let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
1494 assert!(list.get_short(1).is_err());
1495
1496 let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
1497 assert!(list.get_int(2).is_err());
1498
1499 let list = make_list(vec![Field::Int(5)]);
1500 assert!(list.get_long(0).is_err());
1501
1502 let list = make_list(vec![Field::Long(6), Field::Long(7)]);
1503 assert!(list.get_float(1).is_err());
1504
1505 let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
1506 assert!(list.get_short(1).is_err());
1507
1508 let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
1509 assert!(list.get_int(2).is_err());
1510
1511 let list = make_list(vec![Field::UInt(5)]);
1512 assert!(list.get_long(0).is_err());
1513
1514 let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
1515 assert!(list.get_float(1).is_err());
1516
1517 let list = make_list(vec![
1518 Field::Float(8.1),
1519 Field::Float(9.2),
1520 Field::Float(10.3),
1521 ]);
1522 assert!(list.get_double(2).is_err());
1523
1524 let list = make_list(vec![Field::Double(3.1415)]);
1525 assert!(list.get_string(0).is_err());
1526
1527 let list = make_list(vec![Field::Str("abc".to_string())]);
1528 assert!(list.get_bytes(0).is_err());
1529
1530 let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
1531 assert!(list.get_bool(0).is_err());
1532
1533 let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
1534 assert!(list.get_bool(0).is_err());
1535 }
1536
1537 #[test]
test_list_complex_accessors()1538 fn test_list_complex_accessors() {
1539 let list = make_list(vec![Field::Group(make_row(vec![
1540 ("x".to_string(), Field::Null),
1541 ("Y".to_string(), Field::Int(2)),
1542 ]))]);
1543 assert_eq!(2, list.get_group(0).unwrap().len());
1544
1545 let list = make_list(vec![Field::ListInternal(make_list(vec![
1546 Field::Int(2),
1547 Field::Int(1),
1548 Field::Null,
1549 Field::Int(12),
1550 ]))]);
1551 assert_eq!(4, list.get_list(0).unwrap().len());
1552
1553 let list = make_list(vec![Field::MapInternal(make_map(vec![
1554 (Field::Int(1), Field::Float(1.2)),
1555 (Field::Int(2), Field::Float(4.5)),
1556 (Field::Int(3), Field::Float(2.3)),
1557 ]))]);
1558 assert_eq!(3, list.get_map(0).unwrap().len());
1559 }
1560
1561 #[test]
test_list_complex_invalid_accessors()1562 fn test_list_complex_invalid_accessors() {
1563 let list = make_list(vec![Field::Group(make_row(vec![
1564 ("x".to_string(), Field::Null),
1565 ("Y".to_string(), Field::Int(2)),
1566 ]))]);
1567 assert_eq!(
1568 general_err!("Cannot access Group as Float".to_string()),
1569 list.get_float(0).unwrap_err()
1570 );
1571
1572 let list = make_list(vec![Field::ListInternal(make_list(vec![
1573 Field::Int(2),
1574 Field::Int(1),
1575 Field::Null,
1576 Field::Int(12),
1577 ]))]);
1578 assert_eq!(
1579 general_err!("Cannot access ListInternal as Float".to_string()),
1580 list.get_float(0).unwrap_err()
1581 );
1582
1583 let list = make_list(vec![Field::MapInternal(make_map(vec![
1584 (Field::Int(1), Field::Float(1.2)),
1585 (Field::Int(2), Field::Float(4.5)),
1586 (Field::Int(3), Field::Float(2.3)),
1587 ]))]);
1588 assert_eq!(
1589 general_err!("Cannot access MapInternal as Float".to_string()),
1590 list.get_float(0).unwrap_err()
1591 );
1592 }
1593
1594 #[test]
test_map_accessors()1595 fn test_map_accessors() {
1596 // a map from int to string
1597 let map = make_map(vec![
1598 (Field::Int(1), Field::Str("a".to_string())),
1599 (Field::Int(2), Field::Str("b".to_string())),
1600 (Field::Int(3), Field::Str("c".to_string())),
1601 (Field::Int(4), Field::Str("d".to_string())),
1602 (Field::Int(5), Field::Str("e".to_string())),
1603 ]);
1604
1605 assert_eq!(5, map.len());
1606 for i in 0..5 {
1607 assert_eq!((i + 1) as i32, map.get_keys().get_int(i).unwrap());
1608 assert_eq!(
1609 &((i as u8 + 'a' as u8) as char).to_string(),
1610 map.get_values().get_string(i).unwrap()
1611 );
1612 }
1613 }
1614 }
1615