1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 //! Defines the logical data types of Arrow arrays.
19 //!
20 //! The most important things you might be looking for are:
21 //!  * [`Schema`](crate::datatypes::Schema) to describe a schema.
22 //!  * [`Field`](crate::datatypes::Field) to describe one field within a schema.
23 //!  * [`DataType`](crate::datatypes::DataType) to describe the type of a field.
24 
25 use std::sync::Arc;
26 
27 mod native;
28 pub use native::*;
29 mod field;
30 pub use field::*;
31 mod schema;
32 pub use schema::*;
33 mod numeric;
34 pub use numeric::*;
35 mod types;
36 pub use types::*;
37 mod datatype;
38 pub use datatype::*;
39 
40 /// A reference-counted reference to a [`Schema`](crate::datatypes::Schema).
41 pub type SchemaRef = Arc<Schema>;
42 
43 #[cfg(test)]
44 mod tests {
45     use super::*;
46     use crate::error::Result;
47     use serde_json::Value::{Bool, Number as VNumber};
48     use serde_json::{Number, Value};
49     use std::{
50         collections::{BTreeMap, HashMap},
51         f32::NAN,
52     };
53 
54     #[test]
test_list_datatype_equality()55     fn test_list_datatype_equality() {
56         // tests that list type equality is checked while ignoring list names
57         let list_a = DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
58         let list_b = DataType::List(Box::new(Field::new("array", DataType::Int32, true)));
59         let list_c = DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
60         let list_d = DataType::List(Box::new(Field::new("item", DataType::UInt32, true)));
61         assert!(list_a.equals_datatype(&list_b));
62         assert!(!list_a.equals_datatype(&list_c));
63         assert!(!list_b.equals_datatype(&list_c));
64         assert!(!list_a.equals_datatype(&list_d));
65 
66         let list_e =
67             DataType::FixedSizeList(Box::new(Field::new("item", list_a, false)), 3);
68         let list_f =
69             DataType::FixedSizeList(Box::new(Field::new("array", list_b, false)), 3);
70         let list_g = DataType::FixedSizeList(
71             Box::new(Field::new("item", DataType::FixedSizeBinary(3), true)),
72             3,
73         );
74         assert!(list_e.equals_datatype(&list_f));
75         assert!(!list_e.equals_datatype(&list_g));
76         assert!(!list_f.equals_datatype(&list_g));
77 
78         let list_h = DataType::Struct(vec![Field::new("f1", list_e, true)]);
79         let list_i = DataType::Struct(vec![Field::new("f1", list_f.clone(), true)]);
80         let list_j = DataType::Struct(vec![Field::new("f1", list_f.clone(), false)]);
81         let list_k = DataType::Struct(vec![
82             Field::new("f1", list_f.clone(), false),
83             Field::new("f2", list_g.clone(), false),
84             Field::new("f3", DataType::Utf8, true),
85         ]);
86         let list_l = DataType::Struct(vec![
87             Field::new("ff1", list_f.clone(), false),
88             Field::new("ff2", list_g.clone(), false),
89             Field::new("ff3", DataType::LargeUtf8, true),
90         ]);
91         let list_m = DataType::Struct(vec![
92             Field::new("ff1", list_f, false),
93             Field::new("ff2", list_g, false),
94             Field::new("ff3", DataType::Utf8, true),
95         ]);
96         assert!(list_h.equals_datatype(&list_i));
97         assert!(!list_h.equals_datatype(&list_j));
98         assert!(!list_k.equals_datatype(&list_l));
99         assert!(list_k.equals_datatype(&list_m));
100     }
101 
102     #[test]
create_struct_type()103     fn create_struct_type() {
104         let _person = DataType::Struct(vec![
105             Field::new("first_name", DataType::Utf8, false),
106             Field::new("last_name", DataType::Utf8, false),
107             Field::new(
108                 "address",
109                 DataType::Struct(vec![
110                     Field::new("street", DataType::Utf8, false),
111                     Field::new("zip", DataType::UInt16, false),
112                 ]),
113                 false,
114             ),
115         ]);
116     }
117 
118     #[test]
serde_struct_type()119     fn serde_struct_type() {
120         let kv_array = [("k".to_string(), "v".to_string())];
121         let field_metadata: BTreeMap<String, String> = kv_array.iter().cloned().collect();
122 
123         // Non-empty map: should be converted as JSON obj { ... }
124         let mut first_name = Field::new("first_name", DataType::Utf8, false);
125         first_name.set_metadata(Some(field_metadata));
126 
127         // Empty map: should be omitted.
128         let mut last_name = Field::new("last_name", DataType::Utf8, false);
129         last_name.set_metadata(Some(BTreeMap::default()));
130 
131         let person = DataType::Struct(vec![
132             first_name,
133             last_name,
134             Field::new(
135                 "address",
136                 DataType::Struct(vec![
137                     Field::new("street", DataType::Utf8, false),
138                     Field::new("zip", DataType::UInt16, false),
139                 ]),
140                 false,
141             ),
142         ]);
143 
144         let serialized = serde_json::to_string(&person).unwrap();
145 
146         // NOTE that this is testing the default (derived) serialization format, not the
147         // JSON format specified in metadata.md
148 
149         assert_eq!(
150             "{\"Struct\":[\
151              {\"name\":\"first_name\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false,\"metadata\":{\"k\":\"v\"}},\
152              {\"name\":\"last_name\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false},\
153              {\"name\":\"address\",\"data_type\":{\"Struct\":\
154              [{\"name\":\"street\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false},\
155              {\"name\":\"zip\",\"data_type\":\"UInt16\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false}\
156              ]},\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false}]}",
157             serialized
158         );
159 
160         let deserialized = serde_json::from_str(&serialized).unwrap();
161 
162         assert_eq!(person, deserialized);
163     }
164 
165     #[test]
struct_field_to_json()166     fn struct_field_to_json() {
167         let f = Field::new(
168             "address",
169             DataType::Struct(vec![
170                 Field::new("street", DataType::Utf8, false),
171                 Field::new("zip", DataType::UInt16, false),
172             ]),
173             false,
174         );
175         let value: Value = serde_json::from_str(
176             r#"{
177                 "name": "address",
178                 "nullable": false,
179                 "type": {
180                     "name": "struct"
181                 },
182                 "children": [
183                     {
184                         "name": "street",
185                         "nullable": false,
186                         "type": {
187                             "name": "utf8"
188                         },
189                         "children": []
190                     },
191                     {
192                         "name": "zip",
193                         "nullable": false,
194                         "type": {
195                             "name": "int",
196                             "bitWidth": 16,
197                             "isSigned": false
198                         },
199                         "children": []
200                     }
201                 ]
202             }"#,
203         )
204         .unwrap();
205         assert_eq!(value, f.to_json());
206     }
207 
208     #[test]
primitive_field_to_json()209     fn primitive_field_to_json() {
210         let f = Field::new("first_name", DataType::Utf8, false);
211         let value: Value = serde_json::from_str(
212             r#"{
213                 "name": "first_name",
214                 "nullable": false,
215                 "type": {
216                     "name": "utf8"
217                 },
218                 "children": []
219             }"#,
220         )
221         .unwrap();
222         assert_eq!(value, f.to_json());
223     }
224     #[test]
parse_struct_from_json()225     fn parse_struct_from_json() {
226         let json = r#"
227         {
228             "name": "address",
229             "type": {
230                 "name": "struct"
231             },
232             "nullable": false,
233             "children": [
234                 {
235                     "name": "street",
236                     "type": {
237                     "name": "utf8"
238                     },
239                     "nullable": false,
240                     "children": []
241                 },
242                 {
243                     "name": "zip",
244                     "type": {
245                     "name": "int",
246                     "isSigned": false,
247                     "bitWidth": 16
248                     },
249                     "nullable": false,
250                     "children": []
251                 }
252             ]
253         }
254         "#;
255         let value: Value = serde_json::from_str(json).unwrap();
256         let dt = Field::from(&value).unwrap();
257 
258         let expected = Field::new(
259             "address",
260             DataType::Struct(vec![
261                 Field::new("street", DataType::Utf8, false),
262                 Field::new("zip", DataType::UInt16, false),
263             ]),
264             false,
265         );
266 
267         assert_eq!(expected, dt);
268     }
269 
270     #[test]
parse_utf8_from_json()271     fn parse_utf8_from_json() {
272         let json = "{\"name\":\"utf8\"}";
273         let value: Value = serde_json::from_str(json).unwrap();
274         let dt = DataType::from(&value).unwrap();
275         assert_eq!(DataType::Utf8, dt);
276     }
277 
278     #[test]
parse_int32_from_json()279     fn parse_int32_from_json() {
280         let json = "{\"name\": \"int\", \"isSigned\": true, \"bitWidth\": 32}";
281         let value: Value = serde_json::from_str(json).unwrap();
282         let dt = DataType::from(&value).unwrap();
283         assert_eq!(DataType::Int32, dt);
284     }
285 
286     #[test]
schema_json()287     fn schema_json() {
288         // Add some custom metadata
289         let metadata: HashMap<String, String> =
290             [("Key".to_string(), "Value".to_string())]
291                 .iter()
292                 .cloned()
293                 .collect();
294 
295         let schema = Schema::new_with_metadata(
296             vec![
297                 Field::new("c1", DataType::Utf8, false),
298                 Field::new("c2", DataType::Binary, false),
299                 Field::new("c3", DataType::FixedSizeBinary(3), false),
300                 Field::new("c4", DataType::Boolean, false),
301                 Field::new("c5", DataType::Date32, false),
302                 Field::new("c6", DataType::Date64, false),
303                 Field::new("c7", DataType::Time32(TimeUnit::Second), false),
304                 Field::new("c8", DataType::Time32(TimeUnit::Millisecond), false),
305                 Field::new("c9", DataType::Time32(TimeUnit::Microsecond), false),
306                 Field::new("c10", DataType::Time32(TimeUnit::Nanosecond), false),
307                 Field::new("c11", DataType::Time64(TimeUnit::Second), false),
308                 Field::new("c12", DataType::Time64(TimeUnit::Millisecond), false),
309                 Field::new("c13", DataType::Time64(TimeUnit::Microsecond), false),
310                 Field::new("c14", DataType::Time64(TimeUnit::Nanosecond), false),
311                 Field::new("c15", DataType::Timestamp(TimeUnit::Second, None), false),
312                 Field::new(
313                     "c16",
314                     DataType::Timestamp(TimeUnit::Millisecond, Some("UTC".to_string())),
315                     false,
316                 ),
317                 Field::new(
318                     "c17",
319                     DataType::Timestamp(
320                         TimeUnit::Microsecond,
321                         Some("Africa/Johannesburg".to_string()),
322                     ),
323                     false,
324                 ),
325                 Field::new(
326                     "c18",
327                     DataType::Timestamp(TimeUnit::Nanosecond, None),
328                     false,
329                 ),
330                 Field::new("c19", DataType::Interval(IntervalUnit::DayTime), false),
331                 Field::new("c20", DataType::Interval(IntervalUnit::YearMonth), false),
332                 Field::new(
333                     "c21",
334                     DataType::List(Box::new(Field::new("item", DataType::Boolean, true))),
335                     false,
336                 ),
337                 Field::new(
338                     "c22",
339                     DataType::FixedSizeList(
340                         Box::new(Field::new("bools", DataType::Boolean, false)),
341                         5,
342                     ),
343                     false,
344                 ),
345                 Field::new(
346                     "c23",
347                     DataType::List(Box::new(Field::new(
348                         "inner_list",
349                         DataType::List(Box::new(Field::new(
350                             "struct",
351                             DataType::Struct(vec![]),
352                             true,
353                         ))),
354                         false,
355                     ))),
356                     true,
357                 ),
358                 Field::new(
359                     "c24",
360                     DataType::Struct(vec![
361                         Field::new("a", DataType::Utf8, false),
362                         Field::new("b", DataType::UInt16, false),
363                     ]),
364                     false,
365                 ),
366                 Field::new("c25", DataType::Interval(IntervalUnit::YearMonth), true),
367                 Field::new("c26", DataType::Interval(IntervalUnit::DayTime), true),
368                 Field::new("c27", DataType::Duration(TimeUnit::Second), false),
369                 Field::new("c28", DataType::Duration(TimeUnit::Millisecond), false),
370                 Field::new("c29", DataType::Duration(TimeUnit::Microsecond), false),
371                 Field::new("c30", DataType::Duration(TimeUnit::Nanosecond), false),
372                 Field::new_dict(
373                     "c31",
374                     DataType::Dictionary(
375                         Box::new(DataType::Int32),
376                         Box::new(DataType::Utf8),
377                     ),
378                     true,
379                     123,
380                     true,
381                 ),
382                 Field::new("c32", DataType::LargeBinary, true),
383                 Field::new("c33", DataType::LargeUtf8, true),
384                 Field::new(
385                     "c34",
386                     DataType::LargeList(Box::new(Field::new(
387                         "inner_large_list",
388                         DataType::LargeList(Box::new(Field::new(
389                             "struct",
390                             DataType::Struct(vec![]),
391                             false,
392                         ))),
393                         true,
394                     ))),
395                     true,
396                 ),
397             ],
398             metadata,
399         );
400 
401         let expected = schema.to_json();
402         let json = r#"{
403                 "fields": [
404                     {
405                         "name": "c1",
406                         "nullable": false,
407                         "type": {
408                             "name": "utf8"
409                         },
410                         "children": []
411                     },
412                     {
413                         "name": "c2",
414                         "nullable": false,
415                         "type": {
416                             "name": "binary"
417                         },
418                         "children": []
419                     },
420                     {
421                         "name": "c3",
422                         "nullable": false,
423                         "type": {
424                             "name": "fixedsizebinary",
425                             "byteWidth": 3
426                         },
427                         "children": []
428                     },
429                     {
430                         "name": "c4",
431                         "nullable": false,
432                         "type": {
433                             "name": "bool"
434                         },
435                         "children": []
436                     },
437                     {
438                         "name": "c5",
439                         "nullable": false,
440                         "type": {
441                             "name": "date",
442                             "unit": "DAY"
443                         },
444                         "children": []
445                     },
446                     {
447                         "name": "c6",
448                         "nullable": false,
449                         "type": {
450                             "name": "date",
451                             "unit": "MILLISECOND"
452                         },
453                         "children": []
454                     },
455                     {
456                         "name": "c7",
457                         "nullable": false,
458                         "type": {
459                             "name": "time",
460                             "bitWidth": 32,
461                             "unit": "SECOND"
462                         },
463                         "children": []
464                     },
465                     {
466                         "name": "c8",
467                         "nullable": false,
468                         "type": {
469                             "name": "time",
470                             "bitWidth": 32,
471                             "unit": "MILLISECOND"
472                         },
473                         "children": []
474                     },
475                     {
476                         "name": "c9",
477                         "nullable": false,
478                         "type": {
479                             "name": "time",
480                             "bitWidth": 32,
481                             "unit": "MICROSECOND"
482                         },
483                         "children": []
484                     },
485                     {
486                         "name": "c10",
487                         "nullable": false,
488                         "type": {
489                             "name": "time",
490                             "bitWidth": 32,
491                             "unit": "NANOSECOND"
492                         },
493                         "children": []
494                     },
495                     {
496                         "name": "c11",
497                         "nullable": false,
498                         "type": {
499                             "name": "time",
500                             "bitWidth": 64,
501                             "unit": "SECOND"
502                         },
503                         "children": []
504                     },
505                     {
506                         "name": "c12",
507                         "nullable": false,
508                         "type": {
509                             "name": "time",
510                             "bitWidth": 64,
511                             "unit": "MILLISECOND"
512                         },
513                         "children": []
514                     },
515                     {
516                         "name": "c13",
517                         "nullable": false,
518                         "type": {
519                             "name": "time",
520                             "bitWidth": 64,
521                             "unit": "MICROSECOND"
522                         },
523                         "children": []
524                     },
525                     {
526                         "name": "c14",
527                         "nullable": false,
528                         "type": {
529                             "name": "time",
530                             "bitWidth": 64,
531                             "unit": "NANOSECOND"
532                         },
533                         "children": []
534                     },
535                     {
536                         "name": "c15",
537                         "nullable": false,
538                         "type": {
539                             "name": "timestamp",
540                             "unit": "SECOND"
541                         },
542                         "children": []
543                     },
544                     {
545                         "name": "c16",
546                         "nullable": false,
547                         "type": {
548                             "name": "timestamp",
549                             "unit": "MILLISECOND",
550                             "timezone": "UTC"
551                         },
552                         "children": []
553                     },
554                     {
555                         "name": "c17",
556                         "nullable": false,
557                         "type": {
558                             "name": "timestamp",
559                             "unit": "MICROSECOND",
560                             "timezone": "Africa/Johannesburg"
561                         },
562                         "children": []
563                     },
564                     {
565                         "name": "c18",
566                         "nullable": false,
567                         "type": {
568                             "name": "timestamp",
569                             "unit": "NANOSECOND"
570                         },
571                         "children": []
572                     },
573                     {
574                         "name": "c19",
575                         "nullable": false,
576                         "type": {
577                             "name": "interval",
578                             "unit": "DAY_TIME"
579                         },
580                         "children": []
581                     },
582                     {
583                         "name": "c20",
584                         "nullable": false,
585                         "type": {
586                             "name": "interval",
587                             "unit": "YEAR_MONTH"
588                         },
589                         "children": []
590                     },
591                     {
592                         "name": "c21",
593                         "nullable": false,
594                         "type": {
595                             "name": "list"
596                         },
597                         "children": [
598                             {
599                                 "name": "item",
600                                 "nullable": true,
601                                 "type": {
602                                     "name": "bool"
603                                 },
604                                 "children": []
605                             }
606                         ]
607                     },
608                     {
609                         "name": "c22",
610                         "nullable": false,
611                         "type": {
612                             "name": "fixedsizelist",
613                             "listSize": 5
614                         },
615                         "children": [
616                             {
617                                 "name": "bools",
618                                 "nullable": false,
619                                 "type": {
620                                     "name": "bool"
621                                 },
622                                 "children": []
623                             }
624                         ]
625                     },
626                     {
627                         "name": "c23",
628                         "nullable": true,
629                         "type": {
630                             "name": "list"
631                         },
632                         "children": [
633                             {
634                                 "name": "inner_list",
635                                 "nullable": false,
636                                 "type": {
637                                     "name": "list"
638                                 },
639                                 "children": [
640                                     {
641                                         "name": "struct",
642                                         "nullable": true,
643                                         "type": {
644                                             "name": "struct"
645                                         },
646                                         "children": []
647                                     }
648                                 ]
649                             }
650                         ]
651                     },
652                     {
653                         "name": "c24",
654                         "nullable": false,
655                         "type": {
656                             "name": "struct"
657                         },
658                         "children": [
659                             {
660                                 "name": "a",
661                                 "nullable": false,
662                                 "type": {
663                                     "name": "utf8"
664                                 },
665                                 "children": []
666                             },
667                             {
668                                 "name": "b",
669                                 "nullable": false,
670                                 "type": {
671                                     "name": "int",
672                                     "bitWidth": 16,
673                                     "isSigned": false
674                                 },
675                                 "children": []
676                             }
677                         ]
678                     },
679                     {
680                         "name": "c25",
681                         "nullable": true,
682                         "type": {
683                             "name": "interval",
684                             "unit": "YEAR_MONTH"
685                         },
686                         "children": []
687                     },
688                     {
689                         "name": "c26",
690                         "nullable": true,
691                         "type": {
692                             "name": "interval",
693                             "unit": "DAY_TIME"
694                         },
695                         "children": []
696                     },
697                     {
698                         "name": "c27",
699                         "nullable": false,
700                         "type": {
701                             "name": "duration",
702                             "unit": "SECOND"
703                         },
704                         "children": []
705                     },
706                     {
707                         "name": "c28",
708                         "nullable": false,
709                         "type": {
710                             "name": "duration",
711                             "unit": "MILLISECOND"
712                         },
713                         "children": []
714                     },
715                     {
716                         "name": "c29",
717                         "nullable": false,
718                         "type": {
719                             "name": "duration",
720                             "unit": "MICROSECOND"
721                         },
722                         "children": []
723                     },
724                     {
725                         "name": "c30",
726                         "nullable": false,
727                         "type": {
728                             "name": "duration",
729                             "unit": "NANOSECOND"
730                         },
731                         "children": []
732                     },
733                     {
734                         "name": "c31",
735                         "nullable": true,
736                         "children": [],
737                         "type": {
738                           "name": "utf8"
739                         },
740                         "dictionary": {
741                           "id": 123,
742                           "indexType": {
743                             "name": "int",
744                             "bitWidth": 32,
745                             "isSigned": true
746                           },
747                           "isOrdered": true
748                         }
749                     },
750                     {
751                         "name": "c32",
752                         "nullable": true,
753                         "type": {
754                           "name": "largebinary"
755                         },
756                         "children": []
757                     },
758                     {
759                         "name": "c33",
760                         "nullable": true,
761                         "type": {
762                           "name": "largeutf8"
763                         },
764                         "children": []
765                     },
766                     {
767                         "name": "c34",
768                         "nullable": true,
769                         "type": {
770                           "name": "largelist"
771                         },
772                         "children": [
773                             {
774                                 "name": "inner_large_list",
775                                 "nullable": true,
776                                 "type": {
777                                     "name": "largelist"
778                                 },
779                                 "children": [
780                                     {
781                                         "name": "struct",
782                                         "nullable": false,
783                                         "type": {
784                                             "name": "struct"
785                                         },
786                                         "children": []
787                                     }
788                                 ]
789                             }
790                         ]
791                     }
792                 ],
793                 "metadata" : {
794                     "Key": "Value"
795                 }
796             }"#;
797         let value: Value = serde_json::from_str(&json).unwrap();
798         assert_eq!(expected, value);
799 
800         // convert back to a schema
801         let value: Value = serde_json::from_str(&json).unwrap();
802         let schema2 = Schema::from(&value).unwrap();
803 
804         assert_eq!(schema, schema2);
805 
806         // Check that empty metadata produces empty value in JSON and can be parsed
807         let json = r#"{
808                 "fields": [
809                     {
810                         "name": "c1",
811                         "nullable": false,
812                         "type": {
813                             "name": "utf8"
814                         },
815                         "children": []
816                     }
817                 ],
818                 "metadata": {}
819             }"#;
820         let value: Value = serde_json::from_str(&json).unwrap();
821         let schema = Schema::from(&value).unwrap();
822         assert!(schema.metadata.is_empty());
823 
824         // Check that metadata field is not required in the JSON.
825         let json = r#"{
826                 "fields": [
827                     {
828                         "name": "c1",
829                         "nullable": false,
830                         "type": {
831                             "name": "utf8"
832                         },
833                         "children": []
834                     }
835                 ]
836             }"#;
837         let value: Value = serde_json::from_str(&json).unwrap();
838         let schema = Schema::from(&value).unwrap();
839         assert!(schema.metadata.is_empty());
840     }
841 
842     #[test]
create_schema_string()843     fn create_schema_string() {
844         let schema = person_schema();
845         assert_eq!(schema.to_string(),
846         "Field { name: \"first_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: Some({\"k\": \"v\"}) }, \
847         Field { name: \"last_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \
848         Field { name: \"address\", data_type: Struct([\
849             Field { name: \"street\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \
850             Field { name: \"zip\", data_type: UInt16, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }\
851         ]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \
852         Field { name: \"interests\", data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 123, dict_is_ordered: true, metadata: None }")
853     }
854 
855     #[test]
schema_field_accessors()856     fn schema_field_accessors() {
857         let schema = person_schema();
858 
859         // test schema accessors
860         assert_eq!(schema.fields().len(), 4);
861 
862         // test field accessors
863         let first_name = &schema.fields()[0];
864         assert_eq!(first_name.name(), "first_name");
865         assert_eq!(first_name.data_type(), &DataType::Utf8);
866         assert_eq!(first_name.is_nullable(), false);
867         assert_eq!(first_name.dict_id(), None);
868         assert_eq!(first_name.dict_is_ordered(), None);
869 
870         let metadata = first_name.metadata();
871         assert!(metadata.is_some());
872         let md = metadata.as_ref().unwrap();
873         assert_eq!(md.len(), 1);
874         let key = md.get("k");
875         assert!(key.is_some());
876         assert_eq!(key.unwrap(), "v");
877 
878         let interests = &schema.fields()[3];
879         assert_eq!(interests.name(), "interests");
880         assert_eq!(
881             interests.data_type(),
882             &DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8))
883         );
884         assert_eq!(interests.dict_id(), Some(123));
885         assert_eq!(interests.dict_is_ordered(), Some(true));
886     }
887 
888     #[test]
889     #[should_panic(
890         expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]"
891     )]
schema_index_of()892     fn schema_index_of() {
893         let schema = person_schema();
894         assert_eq!(schema.index_of("first_name").unwrap(), 0);
895         assert_eq!(schema.index_of("last_name").unwrap(), 1);
896         schema.index_of("nickname").unwrap();
897     }
898 
899     #[test]
900     #[should_panic(
901         expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]"
902     )]
schema_field_with_name()903     fn schema_field_with_name() {
904         let schema = person_schema();
905         assert_eq!(
906             schema.field_with_name("first_name").unwrap().name(),
907             "first_name"
908         );
909         assert_eq!(
910             schema.field_with_name("last_name").unwrap().name(),
911             "last_name"
912         );
913         schema.field_with_name("nickname").unwrap();
914     }
915 
916     #[test]
schema_field_with_dict_id()917     fn schema_field_with_dict_id() {
918         let schema = person_schema();
919 
920         let fields_dict_123: Vec<_> = schema
921             .fields_with_dict_id(123)
922             .iter()
923             .map(|f| f.name())
924             .collect();
925         assert_eq!(fields_dict_123, vec!["interests"]);
926 
927         assert!(schema.fields_with_dict_id(456).is_empty());
928     }
929 
930     #[test]
schema_equality()931     fn schema_equality() {
932         let schema1 = Schema::new(vec![
933             Field::new("c1", DataType::Utf8, false),
934             Field::new("c2", DataType::Float64, true),
935             Field::new("c3", DataType::LargeBinary, true),
936         ]);
937         let schema2 = Schema::new(vec![
938             Field::new("c1", DataType::Utf8, false),
939             Field::new("c2", DataType::Float64, true),
940             Field::new("c3", DataType::LargeBinary, true),
941         ]);
942 
943         assert_eq!(schema1, schema2);
944 
945         let schema3 = Schema::new(vec![
946             Field::new("c1", DataType::Utf8, false),
947             Field::new("c2", DataType::Float32, true),
948         ]);
949         let schema4 = Schema::new(vec![
950             Field::new("C1", DataType::Utf8, false),
951             Field::new("C2", DataType::Float64, true),
952         ]);
953 
954         assert!(schema1 != schema3);
955         assert!(schema1 != schema4);
956         assert!(schema2 != schema3);
957         assert!(schema2 != schema4);
958         assert!(schema3 != schema4);
959 
960         let mut f = Field::new("c1", DataType::Utf8, false);
961         f.set_metadata(Some(
962             [("foo".to_string(), "bar".to_string())]
963                 .iter()
964                 .cloned()
965                 .collect(),
966         ));
967         let schema5 = Schema::new(vec![
968             f,
969             Field::new("c2", DataType::Float64, true),
970             Field::new("c3", DataType::LargeBinary, true),
971         ]);
972         assert!(schema1 != schema5);
973     }
974 
975     #[test]
test_arrow_native_type_to_json()976     fn test_arrow_native_type_to_json() {
977         assert_eq!(Some(Bool(true)), true.into_json_value());
978         assert_eq!(Some(VNumber(Number::from(1))), 1i8.into_json_value());
979         assert_eq!(Some(VNumber(Number::from(1))), 1i16.into_json_value());
980         assert_eq!(Some(VNumber(Number::from(1))), 1i32.into_json_value());
981         assert_eq!(Some(VNumber(Number::from(1))), 1i64.into_json_value());
982         assert_eq!(Some(VNumber(Number::from(1))), 1u8.into_json_value());
983         assert_eq!(Some(VNumber(Number::from(1))), 1u16.into_json_value());
984         assert_eq!(Some(VNumber(Number::from(1))), 1u32.into_json_value());
985         assert_eq!(Some(VNumber(Number::from(1))), 1u64.into_json_value());
986         assert_eq!(
987             Some(VNumber(Number::from_f64(0.01f64).unwrap())),
988             0.01.into_json_value()
989         );
990         assert_eq!(
991             Some(VNumber(Number::from_f64(0.01f64).unwrap())),
992             0.01f64.into_json_value()
993         );
994         assert_eq!(None, NAN.into_json_value());
995     }
996 
person_schema() -> Schema997     fn person_schema() -> Schema {
998         let kv_array = [("k".to_string(), "v".to_string())];
999         let field_metadata: BTreeMap<String, String> = kv_array.iter().cloned().collect();
1000         let mut first_name = Field::new("first_name", DataType::Utf8, false);
1001         first_name.set_metadata(Some(field_metadata));
1002 
1003         Schema::new(vec![
1004             first_name,
1005             Field::new("last_name", DataType::Utf8, false),
1006             Field::new(
1007                 "address",
1008                 DataType::Struct(vec![
1009                     Field::new("street", DataType::Utf8, false),
1010                     Field::new("zip", DataType::UInt16, false),
1011                 ]),
1012                 false,
1013             ),
1014             Field::new_dict(
1015                 "interests",
1016                 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
1017                 true,
1018                 123,
1019                 true,
1020             ),
1021         ])
1022     }
1023 
1024     #[test]
test_try_merge_field_with_metadata()1025     fn test_try_merge_field_with_metadata() {
1026         // 1. Different values for the same key should cause error.
1027         let metadata1: BTreeMap<String, String> =
1028             [("foo".to_string(), "bar".to_string())]
1029                 .iter()
1030                 .cloned()
1031                 .collect();
1032         let mut f1 = Field::new("first_name", DataType::Utf8, false);
1033         f1.set_metadata(Some(metadata1));
1034 
1035         let metadata2: BTreeMap<String, String> =
1036             [("foo".to_string(), "baz".to_string())]
1037                 .iter()
1038                 .cloned()
1039                 .collect();
1040         let mut f2 = Field::new("first_name", DataType::Utf8, false);
1041         f2.set_metadata(Some(metadata2));
1042 
1043         assert!(
1044             Schema::try_merge(vec![Schema::new(vec![f1]), Schema::new(vec![f2])])
1045                 .is_err()
1046         );
1047 
1048         // 2. None + Some
1049         let mut f1 = Field::new("first_name", DataType::Utf8, false);
1050         let metadata2: BTreeMap<String, String> =
1051             [("missing".to_string(), "value".to_string())]
1052                 .iter()
1053                 .cloned()
1054                 .collect();
1055         let mut f2 = Field::new("first_name", DataType::Utf8, false);
1056         f2.set_metadata(Some(metadata2));
1057 
1058         assert!(f1.try_merge(&f2).is_ok());
1059         assert!(f1.metadata().is_some());
1060         assert_eq!(
1061             f1.metadata().as_ref().unwrap(),
1062             f2.metadata().as_ref().unwrap()
1063         );
1064 
1065         // 3. Some + Some
1066         let mut f1 = Field::new("first_name", DataType::Utf8, false);
1067         f1.set_metadata(Some(
1068             [("foo".to_string(), "bar".to_string())]
1069                 .iter()
1070                 .cloned()
1071                 .collect(),
1072         ));
1073         let mut f2 = Field::new("first_name", DataType::Utf8, false);
1074         f2.set_metadata(Some(
1075             [("foo2".to_string(), "bar2".to_string())]
1076                 .iter()
1077                 .cloned()
1078                 .collect(),
1079         ));
1080 
1081         assert!(f1.try_merge(&f2).is_ok());
1082         assert!(f1.metadata().is_some());
1083         assert_eq!(
1084             f1.metadata().clone().unwrap(),
1085             [
1086                 ("foo".to_string(), "bar".to_string()),
1087                 ("foo2".to_string(), "bar2".to_string())
1088             ]
1089             .iter()
1090             .cloned()
1091             .collect()
1092         );
1093 
1094         // 4. Some + None.
1095         let mut f1 = Field::new("first_name", DataType::Utf8, false);
1096         f1.set_metadata(Some(
1097             [("foo".to_string(), "bar".to_string())]
1098                 .iter()
1099                 .cloned()
1100                 .collect(),
1101         ));
1102         let f2 = Field::new("first_name", DataType::Utf8, false);
1103         assert!(f1.try_merge(&f2).is_ok());
1104         assert!(f1.metadata().is_some());
1105         assert_eq!(
1106             f1.metadata().clone().unwrap(),
1107             [("foo".to_string(), "bar".to_string())]
1108                 .iter()
1109                 .cloned()
1110                 .collect()
1111         );
1112 
1113         // 5. None + None.
1114         let mut f1 = Field::new("first_name", DataType::Utf8, false);
1115         let f2 = Field::new("first_name", DataType::Utf8, false);
1116         assert!(f1.try_merge(&f2).is_ok());
1117         assert!(f1.metadata().is_none());
1118     }
1119 
1120     #[test]
test_schema_merge() -> Result<()>1121     fn test_schema_merge() -> Result<()> {
1122         let merged = Schema::try_merge(vec![
1123             Schema::new(vec![
1124                 Field::new("first_name", DataType::Utf8, false),
1125                 Field::new("last_name", DataType::Utf8, false),
1126                 Field::new(
1127                     "address",
1128                     DataType::Struct(vec![Field::new("zip", DataType::UInt16, false)]),
1129                     false,
1130                 ),
1131             ]),
1132             Schema::new_with_metadata(
1133                 vec![
1134                     // nullable merge
1135                     Field::new("last_name", DataType::Utf8, true),
1136                     Field::new(
1137                         "address",
1138                         DataType::Struct(vec![
1139                             // add new nested field
1140                             Field::new("street", DataType::Utf8, false),
1141                             // nullable merge on nested field
1142                             Field::new("zip", DataType::UInt16, true),
1143                         ]),
1144                         false,
1145                     ),
1146                     // new field
1147                     Field::new("number", DataType::Utf8, true),
1148                 ],
1149                 [("foo".to_string(), "bar".to_string())]
1150                     .iter()
1151                     .cloned()
1152                     .collect::<HashMap<String, String>>(),
1153             ),
1154         ])?;
1155 
1156         assert_eq!(
1157             merged,
1158             Schema::new_with_metadata(
1159                 vec![
1160                     Field::new("first_name", DataType::Utf8, false),
1161                     Field::new("last_name", DataType::Utf8, true),
1162                     Field::new(
1163                         "address",
1164                         DataType::Struct(vec![
1165                             Field::new("zip", DataType::UInt16, true),
1166                             Field::new("street", DataType::Utf8, false),
1167                         ]),
1168                         false,
1169                     ),
1170                     Field::new("number", DataType::Utf8, true),
1171                 ],
1172                 [("foo".to_string(), "bar".to_string())]
1173                     .iter()
1174                     .cloned()
1175                     .collect::<HashMap<String, String>>()
1176             )
1177         );
1178 
1179         // support merge union fields
1180         assert_eq!(
1181             Schema::try_merge(vec![
1182                 Schema::new(vec![Field::new(
1183                     "c1",
1184                     DataType::Union(vec![
1185                         Field::new("c11", DataType::Utf8, true),
1186                         Field::new("c12", DataType::Utf8, true),
1187                     ]),
1188                     false
1189                 ),]),
1190                 Schema::new(vec![Field::new(
1191                     "c1",
1192                     DataType::Union(vec![
1193                         Field::new("c12", DataType::Utf8, true),
1194                         Field::new("c13", DataType::Time64(TimeUnit::Second), true),
1195                     ]),
1196                     false
1197                 ),])
1198             ])?,
1199             Schema::new(vec![Field::new(
1200                 "c1",
1201                 DataType::Union(vec![
1202                     Field::new("c11", DataType::Utf8, true),
1203                     Field::new("c12", DataType::Utf8, true),
1204                     Field::new("c13", DataType::Time64(TimeUnit::Second), true),
1205                 ]),
1206                 false
1207             ),]),
1208         );
1209 
1210         // incompatible field should throw error
1211         assert!(Schema::try_merge(vec![
1212             Schema::new(vec![
1213                 Field::new("first_name", DataType::Utf8, false),
1214                 Field::new("last_name", DataType::Utf8, false),
1215             ]),
1216             Schema::new(vec![Field::new("last_name", DataType::Int64, false),])
1217         ])
1218         .is_err());
1219 
1220         // incompatible metadata should throw error
1221         assert!(Schema::try_merge(vec![
1222             Schema::new_with_metadata(
1223                 vec![Field::new("first_name", DataType::Utf8, false)],
1224                 [("foo".to_string(), "bar".to_string()),]
1225                     .iter()
1226                     .cloned()
1227                     .collect::<HashMap<String, String>>()
1228             ),
1229             Schema::new_with_metadata(
1230                 vec![Field::new("last_name", DataType::Utf8, false)],
1231                 [("foo".to_string(), "baz".to_string()),]
1232                     .iter()
1233                     .cloned()
1234                     .collect::<HashMap<String, String>>()
1235             )
1236         ])
1237         .is_err());
1238 
1239         Ok(())
1240     }
1241 }
1242