1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 //! Contains Row enum that is used to represent record in Rust.
19 
20 use std::fmt;
21 
22 use chrono::{Local, TimeZone};
23 use num_bigint::{BigInt, Sign};
24 
25 use crate::basic::{LogicalType, Type as PhysicalType};
26 use crate::data_type::{ByteArray, Decimal, Int96};
27 use crate::errors::{ParquetError, Result};
28 use crate::schema::types::ColumnDescPtr;
29 
30 /// Macro as a shortcut to generate 'not yet implemented' panic error.
31 macro_rules! nyi {
32     ($column_descr:ident, $value:ident) => {{
33         unimplemented!(
34             "Conversion for physical type {}, logical type {}, value {:?}",
35             $column_descr.physical_type(),
36             $column_descr.logical_type(),
37             $value
38         );
39     }};
40 }
41 
42 /// `Row` represents a nested Parquet record.
43 #[derive(Clone, Debug, PartialEq)]
44 pub struct Row {
45     fields: Vec<(String, Field)>,
46 }
47 
48 impl Row {
49     /// Get the number of fields in this row.
len(&self) -> usize50     pub fn len(&self) -> usize {
51         self.fields.len()
52     }
53 
54     /// Get an iterator to go through all columns in the row.
55     ///
56     /// # Example
57     ///
58     /// ```no_run
59     /// use std::fs::File;
60     /// use parquet::record::Row;
61     /// use parquet::file::reader::{FileReader, SerializedFileReader};
62     ///
63     /// let file = File::open("/path/to/file").unwrap();
64     /// let reader = SerializedFileReader::new(file).unwrap();
65     /// let row: Row = reader.get_row_iter(None).unwrap().next().unwrap();
66     /// for (idx, (name, field)) in row.get_column_iter().enumerate() {
67     ///     println!("column index: {}, column name: {}, column value: {}", idx, name, field);
68     /// }
69     /// ```
get_column_iter(&self) -> RowColumnIter70     pub fn get_column_iter(&self) -> RowColumnIter {
71         RowColumnIter {
72             fields: &self.fields,
73             curr: 0,
74             count: self.fields.len(),
75         }
76     }
77 }
78 
79 pub struct RowColumnIter<'a> {
80     fields: &'a Vec<(String, Field)>,
81     curr: usize,
82     count: usize,
83 }
84 
85 impl<'a> Iterator for RowColumnIter<'a> {
86     type Item = (&'a String, &'a Field);
87 
next(&mut self) -> Option<Self::Item>88     fn next(&mut self) -> Option<Self::Item> {
89         let idx = self.curr;
90         if idx >= self.count {
91             return None;
92         }
93         self.curr += 1;
94         Some((&self.fields[idx].0, &self.fields[idx].1))
95     }
96 }
97 
98 /// Trait for type-safe convenient access to fields within a Row.
99 pub trait RowAccessor {
get_bool(&self, i: usize) -> Result<bool>100     fn get_bool(&self, i: usize) -> Result<bool>;
get_byte(&self, i: usize) -> Result<i8>101     fn get_byte(&self, i: usize) -> Result<i8>;
get_short(&self, i: usize) -> Result<i16>102     fn get_short(&self, i: usize) -> Result<i16>;
get_int(&self, i: usize) -> Result<i32>103     fn get_int(&self, i: usize) -> Result<i32>;
get_long(&self, i: usize) -> Result<i64>104     fn get_long(&self, i: usize) -> Result<i64>;
get_ubyte(&self, i: usize) -> Result<u8>105     fn get_ubyte(&self, i: usize) -> Result<u8>;
get_ushort(&self, i: usize) -> Result<u16>106     fn get_ushort(&self, i: usize) -> Result<u16>;
get_uint(&self, i: usize) -> Result<u32>107     fn get_uint(&self, i: usize) -> Result<u32>;
get_ulong(&self, i: usize) -> Result<u64>108     fn get_ulong(&self, i: usize) -> Result<u64>;
get_float(&self, i: usize) -> Result<f32>109     fn get_float(&self, i: usize) -> Result<f32>;
get_double(&self, i: usize) -> Result<f64>110     fn get_double(&self, i: usize) -> Result<f64>;
get_timestamp_millis(&self, i: usize) -> Result<u64>111     fn get_timestamp_millis(&self, i: usize) -> Result<u64>;
get_timestamp_micros(&self, i: usize) -> Result<u64>112     fn get_timestamp_micros(&self, i: usize) -> Result<u64>;
get_decimal(&self, i: usize) -> Result<&Decimal>113     fn get_decimal(&self, i: usize) -> Result<&Decimal>;
get_string(&self, i: usize) -> Result<&String>114     fn get_string(&self, i: usize) -> Result<&String>;
get_bytes(&self, i: usize) -> Result<&ByteArray>115     fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
get_group(&self, i: usize) -> Result<&Row>116     fn get_group(&self, i: usize) -> Result<&Row>;
get_list(&self, i: usize) -> Result<&List>117     fn get_list(&self, i: usize) -> Result<&List>;
get_map(&self, i: usize) -> Result<&Map>118     fn get_map(&self, i: usize) -> Result<&Map>;
119 }
120 
121 /// Trait for formating fields within a Row.
122 pub trait RowFormatter {
fmt(&self, i: usize) -> &fmt::Display123     fn fmt(&self, i: usize) -> &fmt::Display;
124 }
125 
126 /// Macro to generate type-safe get_xxx methods for primitive types,
127 /// e.g. `get_bool`, `get_short`.
128 macro_rules! row_primitive_accessor {
129     ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
130         fn $METHOD(&self, i: usize) -> Result<$TY> {
131             match self.fields[i].1 {
132                 Field::$VARIANT(v) => Ok(v),
133                 _ => Err(general_err!(
134                     "Cannot access {} as {}",
135                     self.fields[i].1.get_type_name(),
136                     stringify!($VARIANT)
137                 )),
138             }
139         }
140     };
141 }
142 
143 /// Macro to generate type-safe get_xxx methods for reference types,
144 /// e.g. `get_list`, `get_map`.
145 macro_rules! row_complex_accessor {
146     ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
147         fn $METHOD(&self, i: usize) -> Result<&$TY> {
148             match self.fields[i].1 {
149                 Field::$VARIANT(ref v) => Ok(v),
150                 _ => Err(general_err!(
151                     "Cannot access {} as {}",
152                     self.fields[i].1.get_type_name(),
153                     stringify!($VARIANT)
154                 )),
155             }
156         }
157     };
158 }
159 
160 impl RowFormatter for Row {
161     /// Get Display reference for a given field.
fmt(&self, i: usize) -> &fmt::Display162     fn fmt(&self, i: usize) -> &fmt::Display {
163         &self.fields[i].1
164     }
165 }
166 
167 impl RowAccessor for Row {
168     row_primitive_accessor!(get_bool, Bool, bool);
169 
170     row_primitive_accessor!(get_byte, Byte, i8);
171 
172     row_primitive_accessor!(get_short, Short, i16);
173 
174     row_primitive_accessor!(get_int, Int, i32);
175 
176     row_primitive_accessor!(get_long, Long, i64);
177 
178     row_primitive_accessor!(get_ubyte, UByte, u8);
179 
180     row_primitive_accessor!(get_ushort, UShort, u16);
181 
182     row_primitive_accessor!(get_uint, UInt, u32);
183 
184     row_primitive_accessor!(get_ulong, ULong, u64);
185 
186     row_primitive_accessor!(get_float, Float, f32);
187 
188     row_primitive_accessor!(get_double, Double, f64);
189 
190     row_primitive_accessor!(get_timestamp_millis, TimestampMillis, u64);
191 
192     row_primitive_accessor!(get_timestamp_micros, TimestampMicros, u64);
193 
194     row_complex_accessor!(get_decimal, Decimal, Decimal);
195 
196     row_complex_accessor!(get_string, Str, String);
197 
198     row_complex_accessor!(get_bytes, Bytes, ByteArray);
199 
200     row_complex_accessor!(get_group, Group, Row);
201 
202     row_complex_accessor!(get_list, ListInternal, List);
203 
204     row_complex_accessor!(get_map, MapInternal, Map);
205 }
206 
207 /// Constructs a `Row` from the list of `fields` and returns it.
208 #[inline]
make_row(fields: Vec<(String, Field)>) -> Row209 pub fn make_row(fields: Vec<(String, Field)>) -> Row {
210     Row { fields }
211 }
212 
213 impl fmt::Display for Row {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result214     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
215         write!(f, "{{")?;
216         for (i, &(ref key, ref value)) in self.fields.iter().enumerate() {
217             key.fmt(f)?;
218             write!(f, ": ")?;
219             value.fmt(f)?;
220             if i < self.fields.len() - 1 {
221                 write!(f, ", ")?;
222             }
223         }
224         write!(f, "}}")
225     }
226 }
227 
228 /// `List` represents a list which contains an array of elements.
229 #[derive(Clone, Debug, PartialEq)]
230 pub struct List {
231     elements: Vec<Field>,
232 }
233 
234 impl List {
235     /// Get the number of fields in this row
len(&self) -> usize236     pub fn len(&self) -> usize {
237         self.elements.len()
238     }
239 }
240 
241 /// Constructs a `List` from the list of `fields` and returns it.
242 #[inline]
make_list(elements: Vec<Field>) -> List243 pub fn make_list(elements: Vec<Field>) -> List {
244     List { elements }
245 }
246 
247 /// Trait for type-safe access of an index for a `List`.
248 /// Note that the get_XXX methods do not do bound checking.
249 pub trait ListAccessor {
get_bool(&self, i: usize) -> Result<bool>250     fn get_bool(&self, i: usize) -> Result<bool>;
get_byte(&self, i: usize) -> Result<i8>251     fn get_byte(&self, i: usize) -> Result<i8>;
get_short(&self, i: usize) -> Result<i16>252     fn get_short(&self, i: usize) -> Result<i16>;
get_int(&self, i: usize) -> Result<i32>253     fn get_int(&self, i: usize) -> Result<i32>;
get_long(&self, i: usize) -> Result<i64>254     fn get_long(&self, i: usize) -> Result<i64>;
get_ubyte(&self, i: usize) -> Result<u8>255     fn get_ubyte(&self, i: usize) -> Result<u8>;
get_ushort(&self, i: usize) -> Result<u16>256     fn get_ushort(&self, i: usize) -> Result<u16>;
get_uint(&self, i: usize) -> Result<u32>257     fn get_uint(&self, i: usize) -> Result<u32>;
get_ulong(&self, i: usize) -> Result<u64>258     fn get_ulong(&self, i: usize) -> Result<u64>;
get_float(&self, i: usize) -> Result<f32>259     fn get_float(&self, i: usize) -> Result<f32>;
get_double(&self, i: usize) -> Result<f64>260     fn get_double(&self, i: usize) -> Result<f64>;
get_timestamp_millis(&self, i: usize) -> Result<u64>261     fn get_timestamp_millis(&self, i: usize) -> Result<u64>;
get_timestamp_micros(&self, i: usize) -> Result<u64>262     fn get_timestamp_micros(&self, i: usize) -> Result<u64>;
get_decimal(&self, i: usize) -> Result<&Decimal>263     fn get_decimal(&self, i: usize) -> Result<&Decimal>;
get_string(&self, i: usize) -> Result<&String>264     fn get_string(&self, i: usize) -> Result<&String>;
get_bytes(&self, i: usize) -> Result<&ByteArray>265     fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
get_group(&self, i: usize) -> Result<&Row>266     fn get_group(&self, i: usize) -> Result<&Row>;
get_list(&self, i: usize) -> Result<&List>267     fn get_list(&self, i: usize) -> Result<&List>;
get_map(&self, i: usize) -> Result<&Map>268     fn get_map(&self, i: usize) -> Result<&Map>;
269 }
270 
271 /// Macro to generate type-safe get_xxx methods for primitive types,
272 /// e.g. get_bool, get_short
273 macro_rules! list_primitive_accessor {
274     ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
275         fn $METHOD(&self, i: usize) -> Result<$TY> {
276             match self.elements[i] {
277                 Field::$VARIANT(v) => Ok(v),
278                 _ => Err(general_err!(
279                     "Cannot access {} as {}",
280                     self.elements[i].get_type_name(),
281                     stringify!($VARIANT)
282                 )),
283             }
284         }
285     };
286 }
287 
288 /// Macro to generate type-safe get_xxx methods for reference types
289 /// e.g. get_list, get_map
290 macro_rules! list_complex_accessor {
291     ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
292         fn $METHOD(&self, i: usize) -> Result<&$TY> {
293             match self.elements[i] {
294                 Field::$VARIANT(ref v) => Ok(v),
295                 _ => Err(general_err!(
296                     "Cannot access {} as {}",
297                     self.elements[i].get_type_name(),
298                     stringify!($VARIANT)
299                 )),
300             }
301         }
302     };
303 }
304 
305 impl ListAccessor for List {
306     list_primitive_accessor!(get_bool, Bool, bool);
307 
308     list_primitive_accessor!(get_byte, Byte, i8);
309 
310     list_primitive_accessor!(get_short, Short, i16);
311 
312     list_primitive_accessor!(get_int, Int, i32);
313 
314     list_primitive_accessor!(get_long, Long, i64);
315 
316     list_primitive_accessor!(get_ubyte, UByte, u8);
317 
318     list_primitive_accessor!(get_ushort, UShort, u16);
319 
320     list_primitive_accessor!(get_uint, UInt, u32);
321 
322     list_primitive_accessor!(get_ulong, ULong, u64);
323 
324     list_primitive_accessor!(get_float, Float, f32);
325 
326     list_primitive_accessor!(get_double, Double, f64);
327 
328     list_primitive_accessor!(get_timestamp_millis, TimestampMillis, u64);
329 
330     list_primitive_accessor!(get_timestamp_micros, TimestampMicros, u64);
331 
332     list_complex_accessor!(get_decimal, Decimal, Decimal);
333 
334     list_complex_accessor!(get_string, Str, String);
335 
336     list_complex_accessor!(get_bytes, Bytes, ByteArray);
337 
338     list_complex_accessor!(get_group, Group, Row);
339 
340     list_complex_accessor!(get_list, ListInternal, List);
341 
342     list_complex_accessor!(get_map, MapInternal, Map);
343 }
344 
345 /// `Map` represents a map which contains a list of key->value pairs.
346 #[derive(Clone, Debug, PartialEq)]
347 pub struct Map {
348     entries: Vec<(Field, Field)>,
349 }
350 
351 impl Map {
352     /// Get the number of fields in this row
len(&self) -> usize353     pub fn len(&self) -> usize {
354         self.entries.len()
355     }
356 }
357 
358 /// Constructs a `Map` from the list of `entries` and returns it.
359 #[inline]
make_map(entries: Vec<(Field, Field)>) -> Map360 pub fn make_map(entries: Vec<(Field, Field)>) -> Map {
361     Map { entries }
362 }
363 
364 /// Trait for type-safe access of an index for a `Map`
365 pub trait MapAccessor {
get_keys<'a>(&'a self) -> Box<ListAccessor + 'a>366     fn get_keys<'a>(&'a self) -> Box<ListAccessor + 'a>;
get_values<'a>(&'a self) -> Box<ListAccessor + 'a>367     fn get_values<'a>(&'a self) -> Box<ListAccessor + 'a>;
368 }
369 
370 struct MapList<'a> {
371     elements: Vec<&'a Field>,
372 }
373 
374 /// Macro to generate type-safe get_xxx methods for primitive types,
375 /// e.g. get_bool, get_short
376 macro_rules! map_list_primitive_accessor {
377     ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
378         fn $METHOD(&self, i: usize) -> Result<$TY> {
379             match self.elements[i] {
380                 Field::$VARIANT(v) => Ok(*v),
381                 _ => Err(general_err!(
382                     "Cannot access {} as {}",
383                     self.elements[i].get_type_name(),
384                     stringify!($VARIANT)
385                 )),
386             }
387         }
388     };
389 }
390 
391 impl<'a> ListAccessor for MapList<'a> {
392     map_list_primitive_accessor!(get_bool, Bool, bool);
393 
394     map_list_primitive_accessor!(get_byte, Byte, i8);
395 
396     map_list_primitive_accessor!(get_short, Short, i16);
397 
398     map_list_primitive_accessor!(get_int, Int, i32);
399 
400     map_list_primitive_accessor!(get_long, Long, i64);
401 
402     map_list_primitive_accessor!(get_ubyte, UByte, u8);
403 
404     map_list_primitive_accessor!(get_ushort, UShort, u16);
405 
406     map_list_primitive_accessor!(get_uint, UInt, u32);
407 
408     map_list_primitive_accessor!(get_ulong, ULong, u64);
409 
410     map_list_primitive_accessor!(get_float, Float, f32);
411 
412     map_list_primitive_accessor!(get_double, Double, f64);
413 
414     map_list_primitive_accessor!(get_timestamp_millis, TimestampMillis, u64);
415 
416     map_list_primitive_accessor!(get_timestamp_micros, TimestampMicros, u64);
417 
418     list_complex_accessor!(get_decimal, Decimal, Decimal);
419 
420     list_complex_accessor!(get_string, Str, String);
421 
422     list_complex_accessor!(get_bytes, Bytes, ByteArray);
423 
424     list_complex_accessor!(get_group, Group, Row);
425 
426     list_complex_accessor!(get_list, ListInternal, List);
427 
428     list_complex_accessor!(get_map, MapInternal, Map);
429 }
430 
431 impl MapAccessor for Map {
get_keys<'a>(&'a self) -> Box<ListAccessor + 'a>432     fn get_keys<'a>(&'a self) -> Box<ListAccessor + 'a> {
433         let map_list = MapList {
434             elements: self.entries.iter().map(|v| &v.0).collect(),
435         };
436         Box::new(map_list)
437     }
438 
get_values<'a>(&'a self) -> Box<ListAccessor + 'a>439     fn get_values<'a>(&'a self) -> Box<ListAccessor + 'a> {
440         let map_list = MapList {
441             elements: self.entries.iter().map(|v| &v.1).collect(),
442         };
443         Box::new(map_list)
444     }
445 }
446 
447 /// API to represent a single field in a `Row`.
448 #[derive(Clone, Debug, PartialEq)]
449 pub enum Field {
450     // Primitive types
451     /// Null value.
452     Null,
453     /// Boolean value (`true`, `false`).
454     Bool(bool),
455     /// Signed integer INT_8.
456     Byte(i8),
457     /// Signed integer INT_16.
458     Short(i16),
459     /// Signed integer INT_32.
460     Int(i32),
461     /// Signed integer INT_64.
462     Long(i64),
463     // Unsigned integer UINT_8.
464     UByte(u8),
465     // Unsigned integer UINT_16.
466     UShort(u16),
467     // Unsigned integer UINT_32.
468     UInt(u32),
469     // Unsigned integer UINT_64.
470     ULong(u64),
471     /// IEEE 32-bit floating point value.
472     Float(f32),
473     /// IEEE 64-bit floating point value.
474     Double(f64),
475     /// Decimal value.
476     Decimal(Decimal),
477     /// UTF-8 encoded character string.
478     Str(String),
479     /// General binary value.
480     Bytes(ByteArray),
481     /// Date without a time of day, stores the number of days from the
482     /// Unix epoch, 1 January 1970.
483     Date(u32),
484     /// Milliseconds from the Unix epoch, 1 January 1970.
485     TimestampMillis(u64),
486     /// Microseconds from the Unix epoch, 1 Janiary 1970.
487     TimestampMicros(u64),
488 
489     // ----------------------------------------------------------------------
490     // Complex types
491     /// Struct, child elements are tuples of field-value pairs.
492     Group(Row),
493     /// List of elements.
494     ListInternal(List),
495     /// List of key-value pairs.
496     MapInternal(Map),
497 }
498 
499 impl Field {
500     /// Get the type name.
get_type_name(&self) -> &'static str501     fn get_type_name(&self) -> &'static str {
502         match *self {
503             Field::Null => "Null",
504             Field::Bool(_) => "Bool",
505             Field::Byte(_) => "Byte",
506             Field::Short(_) => "Short",
507             Field::Int(_) => "Int",
508             Field::Long(_) => "Long",
509             Field::UByte(_) => "UByte",
510             Field::UShort(_) => "UShort",
511             Field::UInt(_) => "UInt",
512             Field::ULong(_) => "ULong",
513             Field::Float(_) => "Float",
514             Field::Double(_) => "Double",
515             Field::Decimal(_) => "Decimal",
516             Field::Date(_) => "Date",
517             Field::Str(_) => "Str",
518             Field::Bytes(_) => "Bytes",
519             Field::TimestampMillis(_) => "TimestampMillis",
520             Field::TimestampMicros(_) => "TimestampMicros",
521             Field::Group(_) => "Group",
522             Field::ListInternal(_) => "ListInternal",
523             Field::MapInternal(_) => "MapInternal",
524         }
525     }
526 
527     /// Determines if this Row represents a primitive value.
is_primitive(&self) -> bool528     pub fn is_primitive(&self) -> bool {
529         match *self {
530             Field::Group(_) => false,
531             Field::ListInternal(_) => false,
532             Field::MapInternal(_) => false,
533             _ => true,
534         }
535     }
536 
537     /// Converts Parquet BOOLEAN type with logical type into `bool` value.
538     #[inline]
convert_bool(_descr: &ColumnDescPtr, value: bool) -> Self539     pub fn convert_bool(_descr: &ColumnDescPtr, value: bool) -> Self {
540         Field::Bool(value)
541     }
542 
543     /// Converts Parquet INT32 type with logical type into `i32` value.
544     #[inline]
convert_int32(descr: &ColumnDescPtr, value: i32) -> Self545     pub fn convert_int32(descr: &ColumnDescPtr, value: i32) -> Self {
546         match descr.logical_type() {
547             LogicalType::INT_8 => Field::Byte(value as i8),
548             LogicalType::INT_16 => Field::Short(value as i16),
549             LogicalType::INT_32 | LogicalType::NONE => Field::Int(value),
550             LogicalType::UINT_8 => Field::UByte(value as u8),
551             LogicalType::UINT_16 => Field::UShort(value as u16),
552             LogicalType::UINT_32 => Field::UInt(value as u32),
553             LogicalType::DATE => Field::Date(value as u32),
554             LogicalType::DECIMAL => Field::Decimal(Decimal::from_i32(
555                 value,
556                 descr.type_precision(),
557                 descr.type_scale(),
558             )),
559             _ => nyi!(descr, value),
560         }
561     }
562 
563     /// Converts Parquet INT64 type with logical type into `i64` value.
564     #[inline]
convert_int64(descr: &ColumnDescPtr, value: i64) -> Self565     pub fn convert_int64(descr: &ColumnDescPtr, value: i64) -> Self {
566         match descr.logical_type() {
567             LogicalType::INT_64 | LogicalType::NONE => Field::Long(value),
568             LogicalType::UINT_64 => Field::ULong(value as u64),
569             LogicalType::TIMESTAMP_MILLIS => Field::TimestampMillis(value as u64),
570             LogicalType::TIMESTAMP_MICROS => Field::TimestampMicros(value as u64),
571             LogicalType::DECIMAL => Field::Decimal(Decimal::from_i64(
572                 value,
573                 descr.type_precision(),
574                 descr.type_scale(),
575             )),
576             _ => nyi!(descr, value),
577         }
578     }
579 
580     /// Converts Parquet INT96 (nanosecond timestamps) type and logical type into
581     /// `Timestamp` value.
582     #[inline]
convert_int96(_descr: &ColumnDescPtr, value: Int96) -> Self583     pub fn convert_int96(_descr: &ColumnDescPtr, value: Int96) -> Self {
584         Field::TimestampMillis(value.to_i64() as u64)
585     }
586 
587     /// Converts Parquet FLOAT type with logical type into `f32` value.
588     #[inline]
convert_float(_descr: &ColumnDescPtr, value: f32) -> Self589     pub fn convert_float(_descr: &ColumnDescPtr, value: f32) -> Self {
590         Field::Float(value)
591     }
592 
593     /// Converts Parquet DOUBLE type with logical type into `f64` value.
594     #[inline]
convert_double(_descr: &ColumnDescPtr, value: f64) -> Self595     pub fn convert_double(_descr: &ColumnDescPtr, value: f64) -> Self {
596         Field::Double(value)
597     }
598 
599     /// Converts Parquet BYTE_ARRAY type with logical type into either UTF8 string or
600     /// array of bytes.
601     #[inline]
convert_byte_array(descr: &ColumnDescPtr, value: ByteArray) -> Self602     pub fn convert_byte_array(descr: &ColumnDescPtr, value: ByteArray) -> Self {
603         match descr.physical_type() {
604             PhysicalType::BYTE_ARRAY => match descr.logical_type() {
605                 LogicalType::UTF8 | LogicalType::ENUM | LogicalType::JSON => {
606                     let value = String::from_utf8(value.data().to_vec()).unwrap();
607                     Field::Str(value)
608                 }
609                 LogicalType::BSON | LogicalType::NONE => Field::Bytes(value),
610                 LogicalType::DECIMAL => Field::Decimal(Decimal::from_bytes(
611                     value,
612                     descr.type_precision(),
613                     descr.type_scale(),
614                 )),
615                 _ => nyi!(descr, value),
616             },
617             PhysicalType::FIXED_LEN_BYTE_ARRAY => match descr.logical_type() {
618                 LogicalType::DECIMAL => Field::Decimal(Decimal::from_bytes(
619                     value,
620                     descr.type_precision(),
621                     descr.type_scale(),
622                 )),
623                 LogicalType::NONE => Field::Bytes(value),
624                 _ => nyi!(descr, value),
625             },
626             _ => nyi!(descr, value),
627         }
628     }
629 }
630 
631 impl fmt::Display for Field {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result632     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
633         match *self {
634             Field::Null => write!(f, "null"),
635             Field::Bool(value) => write!(f, "{}", value),
636             Field::Byte(value) => write!(f, "{}", value),
637             Field::Short(value) => write!(f, "{}", value),
638             Field::Int(value) => write!(f, "{}", value),
639             Field::Long(value) => write!(f, "{}", value),
640             Field::UByte(value) => write!(f, "{}", value),
641             Field::UShort(value) => write!(f, "{}", value),
642             Field::UInt(value) => write!(f, "{}", value),
643             Field::ULong(value) => write!(f, "{}", value),
644             Field::Float(value) => {
645                 if value > 1e19 || value < 1e-15 {
646                     write!(f, "{:E}", value)
647                 } else {
648                     write!(f, "{:?}", value)
649                 }
650             }
651             Field::Double(value) => {
652                 if value > 1e19 || value < 1e-15 {
653                     write!(f, "{:E}", value)
654                 } else {
655                     write!(f, "{:?}", value)
656                 }
657             }
658             Field::Decimal(ref value) => {
659                 write!(f, "{}", convert_decimal_to_string(value))
660             }
661             Field::Str(ref value) => write!(f, "\"{}\"", value),
662             Field::Bytes(ref value) => write!(f, "{:?}", value.data()),
663             Field::Date(value) => write!(f, "{}", convert_date_to_string(value)),
664             Field::TimestampMillis(value) => {
665                 write!(f, "{}", convert_timestamp_millis_to_string(value))
666             }
667             Field::TimestampMicros(value) => {
668                 write!(f, "{}", convert_timestamp_micros_to_string(value))
669             }
670             Field::Group(ref fields) => write!(f, "{}", fields),
671             Field::ListInternal(ref list) => {
672                 let elems = &list.elements;
673                 write!(f, "[")?;
674                 for (i, field) in elems.iter().enumerate() {
675                     field.fmt(f)?;
676                     if i < elems.len() - 1 {
677                         write!(f, ", ")?;
678                     }
679                 }
680                 write!(f, "]")
681             }
682             Field::MapInternal(ref map) => {
683                 let entries = &map.entries;
684                 write!(f, "{{")?;
685                 for (i, &(ref key, ref value)) in entries.iter().enumerate() {
686                     key.fmt(f)?;
687                     write!(f, " -> ")?;
688                     value.fmt(f)?;
689                     if i < entries.len() - 1 {
690                         write!(f, ", ")?;
691                     }
692                 }
693                 write!(f, "}}")
694             }
695         }
696     }
697 }
698 
699 /// Helper method to convert Parquet date into a string.
700 /// Input `value` is a number of days since the epoch in UTC.
701 /// Date is displayed in local timezone.
702 #[inline]
convert_date_to_string(value: u32) -> String703 fn convert_date_to_string(value: u32) -> String {
704     static NUM_SECONDS_IN_DAY: i64 = 60 * 60 * 24;
705     let dt = Local.timestamp(value as i64 * NUM_SECONDS_IN_DAY, 0).date();
706     format!("{}", dt.format("%Y-%m-%d %:z"))
707 }
708 
709 /// Helper method to convert Parquet timestamp into a string.
710 /// Input `value` is a number of milliseconds since the epoch in UTC.
711 /// Datetime is displayed in local timezone.
712 #[inline]
convert_timestamp_millis_to_string(value: u64) -> String713 fn convert_timestamp_millis_to_string(value: u64) -> String {
714     let dt = Local.timestamp((value / 1000) as i64, 0);
715     format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"))
716 }
717 
718 /// Helper method to convert Parquet timestamp into a string.
719 /// Input `value` is a number of microseconds since the epoch in UTC.
720 /// Datetime is displayed in local timezone.
721 #[inline]
convert_timestamp_micros_to_string(value: u64) -> String722 fn convert_timestamp_micros_to_string(value: u64) -> String {
723     convert_timestamp_millis_to_string(value / 1000)
724 }
725 
726 /// Helper method to convert Parquet decimal into a string.
727 /// We assert that `scale >= 0` and `precision > scale`, but this will be enforced
728 /// when constructing Parquet schema.
729 #[inline]
convert_decimal_to_string(decimal: &Decimal) -> String730 fn convert_decimal_to_string(decimal: &Decimal) -> String {
731     assert!(decimal.scale() >= 0 && decimal.precision() > decimal.scale());
732 
733     // Specify as signed bytes to resolve sign as part of conversion.
734     let num = BigInt::from_signed_bytes_be(decimal.data());
735 
736     // Offset of the first digit in a string.
737     let negative = if num.sign() == Sign::Minus { 1 } else { 0 };
738     let mut num_str = num.to_string();
739     let mut point = num_str.len() as i32 - decimal.scale() - negative;
740 
741     // Convert to string form without scientific notation.
742     if point <= 0 {
743         // Zeros need to be prepended to the unscaled value.
744         while point < 0 {
745             num_str.insert(negative as usize, '0');
746             point += 1;
747         }
748         num_str.insert_str(negative as usize, "0.");
749     } else {
750         // No zeroes need to be prepended to the unscaled value, simply insert decimal
751         // point.
752         num_str.insert((point + negative) as usize, '.');
753     }
754 
755     num_str
756 }
757 
758 #[cfg(test)]
759 mod tests {
760     use super::*;
761 
762     use chrono;
763     use std::rc::Rc;
764 
765     use crate::schema::types::{ColumnDescriptor, ColumnPath, PrimitiveTypeBuilder};
766 
767     /// Creates test column descriptor based on provided type parameters.
768     macro_rules! make_column_descr {
769         ($physical_type:expr, $logical_type:expr) => {{
770             let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
771                 .with_logical_type($logical_type)
772                 .build()
773                 .unwrap();
774             Rc::new(ColumnDescriptor::new(
775                 Rc::new(tpe),
776                 None,
777                 0,
778                 0,
779                 ColumnPath::from("col"),
780             ))
781         }};
782         ($physical_type:expr, $logical_type:expr, $len:expr, $prec:expr, $scale:expr) => {{
783             let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
784                 .with_logical_type($logical_type)
785                 .with_length($len)
786                 .with_precision($prec)
787                 .with_scale($scale)
788                 .build()
789                 .unwrap();
790             Rc::new(ColumnDescriptor::new(
791                 Rc::new(tpe),
792                 None,
793                 0,
794                 0,
795                 ColumnPath::from("col"),
796             ))
797         }};
798     }
799 
800     #[test]
test_row_convert_bool()801     fn test_row_convert_bool() {
802         // BOOLEAN value does not depend on logical type
803         let descr = make_column_descr![PhysicalType::BOOLEAN, LogicalType::NONE];
804 
805         let row = Field::convert_bool(&descr, true);
806         assert_eq!(row, Field::Bool(true));
807 
808         let row = Field::convert_bool(&descr, false);
809         assert_eq!(row, Field::Bool(false));
810     }
811 
812     #[test]
test_row_convert_int32()813     fn test_row_convert_int32() {
814         let descr = make_column_descr![PhysicalType::INT32, LogicalType::INT_8];
815         let row = Field::convert_int32(&descr, 111);
816         assert_eq!(row, Field::Byte(111));
817 
818         let descr = make_column_descr![PhysicalType::INT32, LogicalType::INT_16];
819         let row = Field::convert_int32(&descr, 222);
820         assert_eq!(row, Field::Short(222));
821 
822         let descr = make_column_descr![PhysicalType::INT32, LogicalType::INT_32];
823         let row = Field::convert_int32(&descr, 333);
824         assert_eq!(row, Field::Int(333));
825 
826         let descr = make_column_descr![PhysicalType::INT32, LogicalType::UINT_8];
827         let row = Field::convert_int32(&descr, -1);
828         assert_eq!(row, Field::UByte(255));
829 
830         let descr = make_column_descr![PhysicalType::INT32, LogicalType::UINT_16];
831         let row = Field::convert_int32(&descr, 256);
832         assert_eq!(row, Field::UShort(256));
833 
834         let descr = make_column_descr![PhysicalType::INT32, LogicalType::UINT_32];
835         let row = Field::convert_int32(&descr, 1234);
836         assert_eq!(row, Field::UInt(1234));
837 
838         let descr = make_column_descr![PhysicalType::INT32, LogicalType::NONE];
839         let row = Field::convert_int32(&descr, 444);
840         assert_eq!(row, Field::Int(444));
841 
842         let descr = make_column_descr![PhysicalType::INT32, LogicalType::DATE];
843         let row = Field::convert_int32(&descr, 14611);
844         assert_eq!(row, Field::Date(14611));
845 
846         let descr =
847             make_column_descr![PhysicalType::INT32, LogicalType::DECIMAL, 0, 8, 2];
848         let row = Field::convert_int32(&descr, 444);
849         assert_eq!(row, Field::Decimal(Decimal::from_i32(444, 8, 2)));
850     }
851 
852     #[test]
test_row_convert_int64()853     fn test_row_convert_int64() {
854         let descr = make_column_descr![PhysicalType::INT64, LogicalType::INT_64];
855         let row = Field::convert_int64(&descr, 1111);
856         assert_eq!(row, Field::Long(1111));
857 
858         let descr = make_column_descr![PhysicalType::INT64, LogicalType::UINT_64];
859         let row = Field::convert_int64(&descr, 78239823);
860         assert_eq!(row, Field::ULong(78239823));
861 
862         let descr =
863             make_column_descr![PhysicalType::INT64, LogicalType::TIMESTAMP_MILLIS];
864         let row = Field::convert_int64(&descr, 1541186529153);
865         assert_eq!(row, Field::TimestampMillis(1541186529153));
866 
867         let descr =
868             make_column_descr![PhysicalType::INT64, LogicalType::TIMESTAMP_MICROS];
869         let row = Field::convert_int64(&descr, 1541186529153123);
870         assert_eq!(row, Field::TimestampMicros(1541186529153123));
871 
872         let descr = make_column_descr![PhysicalType::INT64, LogicalType::NONE];
873         let row = Field::convert_int64(&descr, 2222);
874         assert_eq!(row, Field::Long(2222));
875 
876         let descr =
877             make_column_descr![PhysicalType::INT64, LogicalType::DECIMAL, 0, 8, 2];
878         let row = Field::convert_int64(&descr, 3333);
879         assert_eq!(row, Field::Decimal(Decimal::from_i64(3333, 8, 2)));
880     }
881 
882     #[test]
test_row_convert_int96()883     fn test_row_convert_int96() {
884         // INT96 value does not depend on logical type
885         let descr = make_column_descr![PhysicalType::INT96, LogicalType::NONE];
886 
887         let value = Int96::from(vec![0, 0, 2454923]);
888         let row = Field::convert_int96(&descr, value);
889         assert_eq!(row, Field::TimestampMillis(1238544000000));
890 
891         let value = Int96::from(vec![4165425152, 13, 2454923]);
892         let row = Field::convert_int96(&descr, value);
893         assert_eq!(row, Field::TimestampMillis(1238544060000));
894     }
895 
896     #[test]
897     #[should_panic(expected = "Expected non-negative milliseconds when converting Int96")]
test_row_convert_int96_invalid()898     fn test_row_convert_int96_invalid() {
899         // INT96 value does not depend on logical type
900         let descr = make_column_descr![PhysicalType::INT96, LogicalType::NONE];
901 
902         let value = Int96::from(vec![0, 0, 0]);
903         Field::convert_int96(&descr, value);
904     }
905 
906     #[test]
test_row_convert_float()907     fn test_row_convert_float() {
908         // FLOAT value does not depend on logical type
909         let descr = make_column_descr![PhysicalType::FLOAT, LogicalType::NONE];
910         let row = Field::convert_float(&descr, 2.31);
911         assert_eq!(row, Field::Float(2.31));
912     }
913 
914     #[test]
test_row_convert_double()915     fn test_row_convert_double() {
916         // DOUBLE value does not depend on logical type
917         let descr = make_column_descr![PhysicalType::DOUBLE, LogicalType::NONE];
918         let row = Field::convert_double(&descr, 1.56);
919         assert_eq!(row, Field::Double(1.56));
920     }
921 
922     #[test]
test_row_convert_byte_array()923     fn test_row_convert_byte_array() {
924         // UTF8
925         let descr = make_column_descr![PhysicalType::BYTE_ARRAY, LogicalType::UTF8];
926         let value = ByteArray::from(vec![b'A', b'B', b'C', b'D']);
927         let row = Field::convert_byte_array(&descr, value);
928         assert_eq!(row, Field::Str("ABCD".to_string()));
929 
930         // ENUM
931         let descr = make_column_descr![PhysicalType::BYTE_ARRAY, LogicalType::ENUM];
932         let value = ByteArray::from(vec![b'1', b'2', b'3']);
933         let row = Field::convert_byte_array(&descr, value);
934         assert_eq!(row, Field::Str("123".to_string()));
935 
936         // JSON
937         let descr = make_column_descr![PhysicalType::BYTE_ARRAY, LogicalType::JSON];
938         let value = ByteArray::from(vec![b'{', b'"', b'a', b'"', b':', b'1', b'}']);
939         let row = Field::convert_byte_array(&descr, value);
940         assert_eq!(row, Field::Str("{\"a\":1}".to_string()));
941 
942         // NONE
943         let descr = make_column_descr![PhysicalType::BYTE_ARRAY, LogicalType::NONE];
944         let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
945         let row = Field::convert_byte_array(&descr, value.clone());
946         assert_eq!(row, Field::Bytes(value));
947 
948         // BSON
949         let descr = make_column_descr![PhysicalType::BYTE_ARRAY, LogicalType::BSON];
950         let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
951         let row = Field::convert_byte_array(&descr, value.clone());
952         assert_eq!(row, Field::Bytes(value));
953 
954         // DECIMAL
955         let descr =
956             make_column_descr![PhysicalType::BYTE_ARRAY, LogicalType::DECIMAL, 0, 8, 2];
957         let value = ByteArray::from(vec![207, 200]);
958         let row = Field::convert_byte_array(&descr, value.clone());
959         assert_eq!(row, Field::Decimal(Decimal::from_bytes(value, 8, 2)));
960 
961         // DECIMAL (FIXED_LEN_BYTE_ARRAY)
962         let descr = make_column_descr![
963             PhysicalType::FIXED_LEN_BYTE_ARRAY,
964             LogicalType::DECIMAL,
965             8,
966             17,
967             5
968         ];
969         let value = ByteArray::from(vec![0, 0, 0, 0, 0, 4, 147, 224]);
970         let row = Field::convert_byte_array(&descr, value.clone());
971         assert_eq!(row, Field::Decimal(Decimal::from_bytes(value, 17, 5)));
972 
973         // NONE (FIXED_LEN_BYTE_ARRAY)
974         let descr = make_column_descr![
975             PhysicalType::FIXED_LEN_BYTE_ARRAY,
976             LogicalType::NONE,
977             6,
978             0,
979             0
980         ];
981         let value = ByteArray::from(vec![1, 2, 3, 4, 5, 6]);
982         let row = Field::convert_byte_array(&descr, value.clone());
983         assert_eq!(row, Field::Bytes(value));
984     }
985 
986     #[test]
test_convert_date_to_string()987     fn test_convert_date_to_string() {
988         fn check_date_conversion(y: u32, m: u32, d: u32) {
989             let datetime = chrono::NaiveDate::from_ymd(y as i32, m, d).and_hms(0, 0, 0);
990             let dt = Local.from_utc_datetime(&datetime);
991             let res = convert_date_to_string((dt.timestamp() / 60 / 60 / 24) as u32);
992             let exp = format!("{}", dt.format("%Y-%m-%d %:z"));
993             assert_eq!(res, exp);
994         }
995 
996         check_date_conversion(2010, 01, 02);
997         check_date_conversion(2014, 05, 01);
998         check_date_conversion(2016, 02, 29);
999         check_date_conversion(2017, 09, 12);
1000         check_date_conversion(2018, 03, 31);
1001     }
1002 
1003     #[test]
test_convert_timestamp_to_string()1004     fn test_convert_timestamp_to_string() {
1005         fn check_datetime_conversion(y: u32, m: u32, d: u32, h: u32, mi: u32, s: u32) {
1006             let datetime = chrono::NaiveDate::from_ymd(y as i32, m, d).and_hms(h, mi, s);
1007             let dt = Local.from_utc_datetime(&datetime);
1008             let res = convert_timestamp_millis_to_string(dt.timestamp_millis() as u64);
1009             let exp = format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"));
1010             assert_eq!(res, exp);
1011         }
1012 
1013         check_datetime_conversion(2010, 01, 02, 13, 12, 54);
1014         check_datetime_conversion(2011, 01, 03, 08, 23, 01);
1015         check_datetime_conversion(2012, 04, 05, 11, 06, 32);
1016         check_datetime_conversion(2013, 05, 12, 16, 38, 00);
1017         check_datetime_conversion(2014, 11, 28, 21, 15, 12);
1018     }
1019 
1020     #[test]
test_convert_float_to_string()1021     fn test_convert_float_to_string() {
1022         assert_eq!(format!("{}", Field::Float(1.0)), "1.0");
1023         assert_eq!(format!("{}", Field::Float(9.63)), "9.63");
1024         assert_eq!(format!("{}", Field::Float(1e-15)), "0.000000000000001");
1025         assert_eq!(format!("{}", Field::Float(1e-16)), "1E-16");
1026         assert_eq!(format!("{}", Field::Float(1e19)), "10000000000000000000.0");
1027         assert_eq!(format!("{}", Field::Float(1e20)), "1E20");
1028         assert_eq!(format!("{}", Field::Float(1.7976931E30)), "1.7976931E30");
1029         assert_eq!(format!("{}", Field::Float(-1.7976931E30)), "-1.7976931E30");
1030     }
1031 
1032     #[test]
test_convert_double_to_string()1033     fn test_convert_double_to_string() {
1034         assert_eq!(format!("{}", Field::Double(1.0)), "1.0");
1035         assert_eq!(format!("{}", Field::Double(9.63)), "9.63");
1036         assert_eq!(format!("{}", Field::Double(1e-15)), "0.000000000000001");
1037         assert_eq!(format!("{}", Field::Double(1e-16)), "1E-16");
1038         assert_eq!(format!("{}", Field::Double(1e19)), "10000000000000000000.0");
1039         assert_eq!(format!("{}", Field::Double(1e20)), "1E20");
1040         assert_eq!(
1041             format!("{}", Field::Double(1.79769313486E308)),
1042             "1.79769313486E308"
1043         );
1044         assert_eq!(
1045             format!("{}", Field::Double(-1.79769313486E308)),
1046             "-1.79769313486E308"
1047         );
1048     }
1049 
1050     #[test]
test_convert_decimal_to_string()1051     fn test_convert_decimal_to_string() {
1052         // Helper method to compare decimal
1053         fn check_decimal(bytes: Vec<u8>, precision: i32, scale: i32, res: &str) {
1054             let decimal = Decimal::from_bytes(ByteArray::from(bytes), precision, scale);
1055             assert_eq!(convert_decimal_to_string(&decimal), res);
1056         }
1057 
1058         // This example previously used to fail in some engines
1059         check_decimal(
1060             vec![0, 0, 0, 0, 0, 0, 0, 0, 13, 224, 182, 179, 167, 100, 0, 0],
1061             38,
1062             18,
1063             "1.000000000000000000",
1064         );
1065         check_decimal(
1066             vec![
1067                 249, 233, 247, 16, 185, 192, 202, 223, 215, 165, 192, 166, 67, 72,
1068             ],
1069             36,
1070             28,
1071             "-12344.0242342304923409234234293432",
1072         );
1073         check_decimal(vec![0, 0, 0, 0, 0, 4, 147, 224], 17, 5, "3.00000");
1074         check_decimal(vec![0, 0, 0, 0, 1, 201, 195, 140], 18, 2, "300000.12");
1075         check_decimal(vec![207, 200], 10, 2, "-123.44");
1076         check_decimal(vec![207, 200], 10, 8, "-0.00012344");
1077     }
1078 
1079     #[test]
test_row_display()1080     fn test_row_display() {
1081         // Primitive types
1082         assert_eq!(format!("{}", Field::Null), "null");
1083         assert_eq!(format!("{}", Field::Bool(true)), "true");
1084         assert_eq!(format!("{}", Field::Bool(false)), "false");
1085         assert_eq!(format!("{}", Field::Byte(1)), "1");
1086         assert_eq!(format!("{}", Field::Short(2)), "2");
1087         assert_eq!(format!("{}", Field::Int(3)), "3");
1088         assert_eq!(format!("{}", Field::Long(4)), "4");
1089         assert_eq!(format!("{}", Field::UByte(1)), "1");
1090         assert_eq!(format!("{}", Field::UShort(2)), "2");
1091         assert_eq!(format!("{}", Field::UInt(3)), "3");
1092         assert_eq!(format!("{}", Field::ULong(4)), "4");
1093         assert_eq!(format!("{}", Field::Float(5.0)), "5.0");
1094         assert_eq!(format!("{}", Field::Float(5.1234)), "5.1234");
1095         assert_eq!(format!("{}", Field::Double(6.0)), "6.0");
1096         assert_eq!(format!("{}", Field::Double(6.1234)), "6.1234");
1097         assert_eq!(format!("{}", Field::Str("abc".to_string())), "\"abc\"");
1098         assert_eq!(
1099             format!("{}", Field::Bytes(ByteArray::from(vec![1, 2, 3]))),
1100             "[1, 2, 3]"
1101         );
1102         assert_eq!(
1103             format!("{}", Field::Date(14611)),
1104             convert_date_to_string(14611)
1105         );
1106         assert_eq!(
1107             format!("{}", Field::TimestampMillis(1262391174000)),
1108             convert_timestamp_millis_to_string(1262391174000)
1109         );
1110         assert_eq!(
1111             format!("{}", Field::TimestampMicros(1262391174000000)),
1112             convert_timestamp_micros_to_string(1262391174000000)
1113         );
1114         assert_eq!(
1115             format!("{}", Field::Decimal(Decimal::from_i32(4, 8, 2))),
1116             convert_decimal_to_string(&Decimal::from_i32(4, 8, 2))
1117         );
1118 
1119         // Complex types
1120         let fields = vec![
1121             ("x".to_string(), Field::Null),
1122             ("Y".to_string(), Field::Int(2)),
1123             ("z".to_string(), Field::Float(3.1)),
1124             ("a".to_string(), Field::Str("abc".to_string())),
1125         ];
1126         let row = Field::Group(make_row(fields));
1127         assert_eq!(format!("{}", row), "{x: null, Y: 2, z: 3.1, a: \"abc\"}");
1128 
1129         let row = Field::ListInternal(make_list(vec![
1130             Field::Int(2),
1131             Field::Int(1),
1132             Field::Null,
1133             Field::Int(12),
1134         ]));
1135         assert_eq!(format!("{}", row), "[2, 1, null, 12]");
1136 
1137         let row = Field::MapInternal(make_map(vec![
1138             (Field::Int(1), Field::Float(1.2)),
1139             (Field::Int(2), Field::Float(4.5)),
1140             (Field::Int(3), Field::Float(2.3)),
1141         ]));
1142         assert_eq!(format!("{}", row), "{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}");
1143     }
1144 
1145     #[test]
test_is_primitive()1146     fn test_is_primitive() {
1147         // primitives
1148         assert!(Field::Null.is_primitive());
1149         assert!(Field::Bool(true).is_primitive());
1150         assert!(Field::Bool(false).is_primitive());
1151         assert!(Field::Byte(1).is_primitive());
1152         assert!(Field::Short(2).is_primitive());
1153         assert!(Field::Int(3).is_primitive());
1154         assert!(Field::Long(4).is_primitive());
1155         assert!(Field::UByte(1).is_primitive());
1156         assert!(Field::UShort(2).is_primitive());
1157         assert!(Field::UInt(3).is_primitive());
1158         assert!(Field::ULong(4).is_primitive());
1159         assert!(Field::Float(5.0).is_primitive());
1160         assert!(Field::Float(5.1234).is_primitive());
1161         assert!(Field::Double(6.0).is_primitive());
1162         assert!(Field::Double(6.1234).is_primitive());
1163         assert!(Field::Str("abc".to_string()).is_primitive());
1164         assert!(Field::Bytes(ByteArray::from(vec![1, 2, 3])).is_primitive());
1165         assert!(Field::TimestampMillis(12345678).is_primitive());
1166         assert!(Field::TimestampMicros(12345678901).is_primitive());
1167         assert!(Field::Decimal(Decimal::from_i32(4, 8, 2)).is_primitive());
1168 
1169         // complex types
1170         assert_eq!(
1171             false,
1172             Field::Group(make_row(vec![
1173                 ("x".to_string(), Field::Null),
1174                 ("Y".to_string(), Field::Int(2)),
1175                 ("z".to_string(), Field::Float(3.1)),
1176                 ("a".to_string(), Field::Str("abc".to_string()))
1177             ]))
1178             .is_primitive()
1179         );
1180 
1181         assert_eq!(
1182             false,
1183             Field::ListInternal(make_list(vec![
1184                 Field::Int(2),
1185                 Field::Int(1),
1186                 Field::Null,
1187                 Field::Int(12)
1188             ]))
1189             .is_primitive()
1190         );
1191 
1192         assert_eq!(
1193             false,
1194             Field::MapInternal(make_map(vec![
1195                 (Field::Int(1), Field::Float(1.2)),
1196                 (Field::Int(2), Field::Float(4.5)),
1197                 (Field::Int(3), Field::Float(2.3))
1198             ]))
1199             .is_primitive()
1200         );
1201     }
1202 
1203     #[test]
test_row_primitive_field_fmt()1204     fn test_row_primitive_field_fmt() {
1205         // Primitives types
1206         let row = make_row(vec![
1207             ("00".to_string(), Field::Null),
1208             ("01".to_string(), Field::Bool(false)),
1209             ("02".to_string(), Field::Byte(3)),
1210             ("03".to_string(), Field::Short(4)),
1211             ("04".to_string(), Field::Int(5)),
1212             ("05".to_string(), Field::Long(6)),
1213             ("06".to_string(), Field::UByte(7)),
1214             ("07".to_string(), Field::UShort(8)),
1215             ("08".to_string(), Field::UInt(9)),
1216             ("09".to_string(), Field::ULong(10)),
1217             ("10".to_string(), Field::Float(11.1)),
1218             ("11".to_string(), Field::Double(12.1)),
1219             ("12".to_string(), Field::Str("abc".to_string())),
1220             (
1221                 "13".to_string(),
1222                 Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1223             ),
1224             ("14".to_string(), Field::Date(14611)),
1225             ("15".to_string(), Field::TimestampMillis(1262391174000)),
1226             ("16".to_string(), Field::TimestampMicros(1262391174000000)),
1227             ("17".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1228         ]);
1229 
1230         assert_eq!("null", format!("{}", row.fmt(0)));
1231         assert_eq!("false", format!("{}", row.fmt(1)));
1232         assert_eq!("3", format!("{}", row.fmt(2)));
1233         assert_eq!("4", format!("{}", row.fmt(3)));
1234         assert_eq!("5", format!("{}", row.fmt(4)));
1235         assert_eq!("6", format!("{}", row.fmt(5)));
1236         assert_eq!("7", format!("{}", row.fmt(6)));
1237         assert_eq!("8", format!("{}", row.fmt(7)));
1238         assert_eq!("9", format!("{}", row.fmt(8)));
1239         assert_eq!("10", format!("{}", row.fmt(9)));
1240         assert_eq!("11.1", format!("{}", row.fmt(10)));
1241         assert_eq!("12.1", format!("{}", row.fmt(11)));
1242         assert_eq!("\"abc\"", format!("{}", row.fmt(12)));
1243         assert_eq!("[1, 2, 3, 4, 5]", format!("{}", row.fmt(13)));
1244         assert_eq!(convert_date_to_string(14611), format!("{}", row.fmt(14)));
1245         assert_eq!(
1246             convert_timestamp_millis_to_string(1262391174000),
1247             format!("{}", row.fmt(15))
1248         );
1249         assert_eq!(
1250             convert_timestamp_micros_to_string(1262391174000000),
1251             format!("{}", row.fmt(16))
1252         );
1253         assert_eq!("0.04", format!("{}", row.fmt(17)));
1254     }
1255 
1256     #[test]
test_row_complex_field_fmt()1257     fn test_row_complex_field_fmt() {
1258         // Complex types
1259         let row = make_row(vec![
1260             (
1261                 "00".to_string(),
1262                 Field::Group(make_row(vec![
1263                     ("x".to_string(), Field::Null),
1264                     ("Y".to_string(), Field::Int(2)),
1265                 ])),
1266             ),
1267             (
1268                 "01".to_string(),
1269                 Field::ListInternal(make_list(vec![
1270                     Field::Int(2),
1271                     Field::Int(1),
1272                     Field::Null,
1273                     Field::Int(12),
1274                 ])),
1275             ),
1276             (
1277                 "02".to_string(),
1278                 Field::MapInternal(make_map(vec![
1279                     (Field::Int(1), Field::Float(1.2)),
1280                     (Field::Int(2), Field::Float(4.5)),
1281                     (Field::Int(3), Field::Float(2.3)),
1282                 ])),
1283             ),
1284         ]);
1285 
1286         assert_eq!("{x: null, Y: 2}", format!("{}", row.fmt(0)));
1287         assert_eq!("[2, 1, null, 12]", format!("{}", row.fmt(1)));
1288         assert_eq!("{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}", format!("{}", row.fmt(2)));
1289     }
1290 
1291     #[test]
test_row_primitive_accessors()1292     fn test_row_primitive_accessors() {
1293         // primitives
1294         let row = make_row(vec![
1295             ("a".to_string(), Field::Null),
1296             ("b".to_string(), Field::Bool(false)),
1297             ("c".to_string(), Field::Byte(3)),
1298             ("d".to_string(), Field::Short(4)),
1299             ("e".to_string(), Field::Int(5)),
1300             ("f".to_string(), Field::Long(6)),
1301             ("g".to_string(), Field::UByte(3)),
1302             ("h".to_string(), Field::UShort(4)),
1303             ("i".to_string(), Field::UInt(5)),
1304             ("j".to_string(), Field::ULong(6)),
1305             ("k".to_string(), Field::Float(7.1)),
1306             ("l".to_string(), Field::Double(8.1)),
1307             ("m".to_string(), Field::Str("abc".to_string())),
1308             (
1309                 "n".to_string(),
1310                 Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1311             ),
1312             ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1313         ]);
1314 
1315         assert_eq!(false, row.get_bool(1).unwrap());
1316         assert_eq!(3, row.get_byte(2).unwrap());
1317         assert_eq!(4, row.get_short(3).unwrap());
1318         assert_eq!(5, row.get_int(4).unwrap());
1319         assert_eq!(6, row.get_long(5).unwrap());
1320         assert_eq!(3, row.get_ubyte(6).unwrap());
1321         assert_eq!(4, row.get_ushort(7).unwrap());
1322         assert_eq!(5, row.get_uint(8).unwrap());
1323         assert_eq!(6, row.get_ulong(9).unwrap());
1324         assert_eq!(7.1, row.get_float(10).unwrap());
1325         assert_eq!(8.1, row.get_double(11).unwrap());
1326         assert_eq!("abc", row.get_string(12).unwrap());
1327         assert_eq!(5, row.get_bytes(13).unwrap().len());
1328         assert_eq!(7, row.get_decimal(14).unwrap().precision());
1329     }
1330 
1331     #[test]
test_row_primitive_invalid_accessors()1332     fn test_row_primitive_invalid_accessors() {
1333         // primitives
1334         let row = make_row(vec![
1335             ("a".to_string(), Field::Null),
1336             ("b".to_string(), Field::Bool(false)),
1337             ("c".to_string(), Field::Byte(3)),
1338             ("d".to_string(), Field::Short(4)),
1339             ("e".to_string(), Field::Int(5)),
1340             ("f".to_string(), Field::Long(6)),
1341             ("g".to_string(), Field::UByte(3)),
1342             ("h".to_string(), Field::UShort(4)),
1343             ("i".to_string(), Field::UInt(5)),
1344             ("j".to_string(), Field::ULong(6)),
1345             ("k".to_string(), Field::Float(7.1)),
1346             ("l".to_string(), Field::Double(8.1)),
1347             ("m".to_string(), Field::Str("abc".to_string())),
1348             (
1349                 "n".to_string(),
1350                 Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1351             ),
1352             ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1353         ]);
1354 
1355         for i in 0..row.len() {
1356             assert!(row.get_group(i).is_err());
1357         }
1358     }
1359 
1360     #[test]
test_row_complex_accessors()1361     fn test_row_complex_accessors() {
1362         let row = make_row(vec![
1363             (
1364                 "a".to_string(),
1365                 Field::Group(make_row(vec![
1366                     ("x".to_string(), Field::Null),
1367                     ("Y".to_string(), Field::Int(2)),
1368                 ])),
1369             ),
1370             (
1371                 "b".to_string(),
1372                 Field::ListInternal(make_list(vec![
1373                     Field::Int(2),
1374                     Field::Int(1),
1375                     Field::Null,
1376                     Field::Int(12),
1377                 ])),
1378             ),
1379             (
1380                 "c".to_string(),
1381                 Field::MapInternal(make_map(vec![
1382                     (Field::Int(1), Field::Float(1.2)),
1383                     (Field::Int(2), Field::Float(4.5)),
1384                     (Field::Int(3), Field::Float(2.3)),
1385                 ])),
1386             ),
1387         ]);
1388 
1389         assert_eq!(2, row.get_group(0).unwrap().len());
1390         assert_eq!(4, row.get_list(1).unwrap().len());
1391         assert_eq!(3, row.get_map(2).unwrap().len());
1392     }
1393 
1394     #[test]
test_row_complex_invalid_accessors()1395     fn test_row_complex_invalid_accessors() {
1396         let row = make_row(vec![
1397             (
1398                 "a".to_string(),
1399                 Field::Group(make_row(vec![
1400                     ("x".to_string(), Field::Null),
1401                     ("Y".to_string(), Field::Int(2)),
1402                 ])),
1403             ),
1404             (
1405                 "b".to_string(),
1406                 Field::ListInternal(make_list(vec![
1407                     Field::Int(2),
1408                     Field::Int(1),
1409                     Field::Null,
1410                     Field::Int(12),
1411                 ])),
1412             ),
1413             (
1414                 "c".to_string(),
1415                 Field::MapInternal(make_map(vec![
1416                     (Field::Int(1), Field::Float(1.2)),
1417                     (Field::Int(2), Field::Float(4.5)),
1418                     (Field::Int(3), Field::Float(2.3)),
1419                 ])),
1420             ),
1421         ]);
1422 
1423         assert_eq!(
1424             ParquetError::General("Cannot access Group as Float".to_string()),
1425             row.get_float(0).unwrap_err()
1426         );
1427         assert_eq!(
1428             ParquetError::General("Cannot access ListInternal as Float".to_string()),
1429             row.get_float(1).unwrap_err()
1430         );
1431         assert_eq!(
1432             ParquetError::General("Cannot access MapInternal as Float".to_string()),
1433             row.get_float(2).unwrap_err()
1434         );
1435     }
1436 
1437     #[test]
test_list_primitive_accessors()1438     fn test_list_primitive_accessors() {
1439         // primitives
1440         let list = make_list(vec![Field::Bool(false)]);
1441         assert_eq!(false, list.get_bool(0).unwrap());
1442 
1443         let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
1444         assert_eq!(4, list.get_byte(1).unwrap());
1445 
1446         let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
1447         assert_eq!(6, list.get_short(2).unwrap());
1448 
1449         let list = make_list(vec![Field::Int(5)]);
1450         assert_eq!(5, list.get_int(0).unwrap());
1451 
1452         let list = make_list(vec![Field::Long(6), Field::Long(7)]);
1453         assert_eq!(7, list.get_long(1).unwrap());
1454 
1455         let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
1456         assert_eq!(4, list.get_ubyte(1).unwrap());
1457 
1458         let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
1459         assert_eq!(6, list.get_ushort(2).unwrap());
1460 
1461         let list = make_list(vec![Field::UInt(5)]);
1462         assert_eq!(5, list.get_uint(0).unwrap());
1463 
1464         let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
1465         assert_eq!(7, list.get_ulong(1).unwrap());
1466 
1467         let list = make_list(vec![
1468             Field::Float(8.1),
1469             Field::Float(9.2),
1470             Field::Float(10.3),
1471         ]);
1472         assert_eq!(10.3, list.get_float(2).unwrap());
1473 
1474         let list = make_list(vec![Field::Double(3.1415)]);
1475         assert_eq!(3.1415, list.get_double(0).unwrap());
1476 
1477         let list = make_list(vec![Field::Str("abc".to_string())]);
1478         assert_eq!(&"abc".to_string(), list.get_string(0).unwrap());
1479 
1480         let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
1481         assert_eq!(&[1, 2, 3, 4, 5], list.get_bytes(0).unwrap().data());
1482 
1483         let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
1484         assert_eq!(&[0, 0, 0, 4], list.get_decimal(0).unwrap().data());
1485     }
1486 
1487     #[test]
test_list_primitive_invalid_accessors()1488     fn test_list_primitive_invalid_accessors() {
1489         // primitives
1490         let list = make_list(vec![Field::Bool(false)]);
1491         assert!(list.get_byte(0).is_err());
1492 
1493         let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
1494         assert!(list.get_short(1).is_err());
1495 
1496         let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
1497         assert!(list.get_int(2).is_err());
1498 
1499         let list = make_list(vec![Field::Int(5)]);
1500         assert!(list.get_long(0).is_err());
1501 
1502         let list = make_list(vec![Field::Long(6), Field::Long(7)]);
1503         assert!(list.get_float(1).is_err());
1504 
1505         let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
1506         assert!(list.get_short(1).is_err());
1507 
1508         let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
1509         assert!(list.get_int(2).is_err());
1510 
1511         let list = make_list(vec![Field::UInt(5)]);
1512         assert!(list.get_long(0).is_err());
1513 
1514         let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
1515         assert!(list.get_float(1).is_err());
1516 
1517         let list = make_list(vec![
1518             Field::Float(8.1),
1519             Field::Float(9.2),
1520             Field::Float(10.3),
1521         ]);
1522         assert!(list.get_double(2).is_err());
1523 
1524         let list = make_list(vec![Field::Double(3.1415)]);
1525         assert!(list.get_string(0).is_err());
1526 
1527         let list = make_list(vec![Field::Str("abc".to_string())]);
1528         assert!(list.get_bytes(0).is_err());
1529 
1530         let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
1531         assert!(list.get_bool(0).is_err());
1532 
1533         let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
1534         assert!(list.get_bool(0).is_err());
1535     }
1536 
1537     #[test]
test_list_complex_accessors()1538     fn test_list_complex_accessors() {
1539         let list = make_list(vec![Field::Group(make_row(vec![
1540             ("x".to_string(), Field::Null),
1541             ("Y".to_string(), Field::Int(2)),
1542         ]))]);
1543         assert_eq!(2, list.get_group(0).unwrap().len());
1544 
1545         let list = make_list(vec![Field::ListInternal(make_list(vec![
1546             Field::Int(2),
1547             Field::Int(1),
1548             Field::Null,
1549             Field::Int(12),
1550         ]))]);
1551         assert_eq!(4, list.get_list(0).unwrap().len());
1552 
1553         let list = make_list(vec![Field::MapInternal(make_map(vec![
1554             (Field::Int(1), Field::Float(1.2)),
1555             (Field::Int(2), Field::Float(4.5)),
1556             (Field::Int(3), Field::Float(2.3)),
1557         ]))]);
1558         assert_eq!(3, list.get_map(0).unwrap().len());
1559     }
1560 
1561     #[test]
test_list_complex_invalid_accessors()1562     fn test_list_complex_invalid_accessors() {
1563         let list = make_list(vec![Field::Group(make_row(vec![
1564             ("x".to_string(), Field::Null),
1565             ("Y".to_string(), Field::Int(2)),
1566         ]))]);
1567         assert_eq!(
1568             general_err!("Cannot access Group as Float".to_string()),
1569             list.get_float(0).unwrap_err()
1570         );
1571 
1572         let list = make_list(vec![Field::ListInternal(make_list(vec![
1573             Field::Int(2),
1574             Field::Int(1),
1575             Field::Null,
1576             Field::Int(12),
1577         ]))]);
1578         assert_eq!(
1579             general_err!("Cannot access ListInternal as Float".to_string()),
1580             list.get_float(0).unwrap_err()
1581         );
1582 
1583         let list = make_list(vec![Field::MapInternal(make_map(vec![
1584             (Field::Int(1), Field::Float(1.2)),
1585             (Field::Int(2), Field::Float(4.5)),
1586             (Field::Int(3), Field::Float(2.3)),
1587         ]))]);
1588         assert_eq!(
1589             general_err!("Cannot access MapInternal as Float".to_string()),
1590             list.get_float(0).unwrap_err()
1591         );
1592     }
1593 
1594     #[test]
test_map_accessors()1595     fn test_map_accessors() {
1596         // a map from int to string
1597         let map = make_map(vec![
1598             (Field::Int(1), Field::Str("a".to_string())),
1599             (Field::Int(2), Field::Str("b".to_string())),
1600             (Field::Int(3), Field::Str("c".to_string())),
1601             (Field::Int(4), Field::Str("d".to_string())),
1602             (Field::Int(5), Field::Str("e".to_string())),
1603         ]);
1604 
1605         assert_eq!(5, map.len());
1606         for i in 0..5 {
1607             assert_eq!((i + 1) as i32, map.get_keys().get_int(i).unwrap());
1608             assert_eq!(
1609                 &((i as u8 + 'a' as u8) as char).to_string(),
1610                 map.get_values().get_string(i).unwrap()
1611             );
1612         }
1613     }
1614 }
1615