1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, 12 // software distributed under the License is distributed on an 13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 // KIND, either express or implied. See the License for the 15 // specific language governing permissions and limitations 16 // under the License. 17 18 //! Defines the logical data types of Arrow arrays. 19 //! 20 //! The most important things you might be looking for are: 21 //! * [`Schema`](crate::datatypes::Schema) to describe a schema. 22 //! * [`Field`](crate::datatypes::Field) to describe one field within a schema. 23 //! * [`DataType`](crate::datatypes::DataType) to describe the type of a field. 24 25 use std::sync::Arc; 26 27 mod native; 28 pub use native::*; 29 mod field; 30 pub use field::*; 31 mod schema; 32 pub use schema::*; 33 mod numeric; 34 pub use numeric::*; 35 mod types; 36 pub use types::*; 37 mod datatype; 38 pub use datatype::*; 39 40 /// A reference-counted reference to a [`Schema`](crate::datatypes::Schema). 41 pub type SchemaRef = Arc<Schema>; 42 43 #[cfg(test)] 44 mod tests { 45 use super::*; 46 use crate::error::Result; 47 use serde_json::Value::{Bool, Number as VNumber}; 48 use serde_json::{Number, Value}; 49 use std::{ 50 collections::{BTreeMap, HashMap}, 51 f32::NAN, 52 }; 53 54 #[test] test_list_datatype_equality()55 fn test_list_datatype_equality() { 56 // tests that list type equality is checked while ignoring list names 57 let list_a = DataType::List(Box::new(Field::new("item", DataType::Int32, true))); 58 let list_b = DataType::List(Box::new(Field::new("array", DataType::Int32, true))); 59 let list_c = DataType::List(Box::new(Field::new("item", DataType::Int32, false))); 60 let list_d = DataType::List(Box::new(Field::new("item", DataType::UInt32, true))); 61 assert!(list_a.equals_datatype(&list_b)); 62 assert!(!list_a.equals_datatype(&list_c)); 63 assert!(!list_b.equals_datatype(&list_c)); 64 assert!(!list_a.equals_datatype(&list_d)); 65 66 let list_e = 67 DataType::FixedSizeList(Box::new(Field::new("item", list_a, false)), 3); 68 let list_f = 69 DataType::FixedSizeList(Box::new(Field::new("array", list_b, false)), 3); 70 let list_g = DataType::FixedSizeList( 71 Box::new(Field::new("item", DataType::FixedSizeBinary(3), true)), 72 3, 73 ); 74 assert!(list_e.equals_datatype(&list_f)); 75 assert!(!list_e.equals_datatype(&list_g)); 76 assert!(!list_f.equals_datatype(&list_g)); 77 78 let list_h = DataType::Struct(vec![Field::new("f1", list_e, true)]); 79 let list_i = DataType::Struct(vec![Field::new("f1", list_f.clone(), true)]); 80 let list_j = DataType::Struct(vec![Field::new("f1", list_f.clone(), false)]); 81 let list_k = DataType::Struct(vec![ 82 Field::new("f1", list_f.clone(), false), 83 Field::new("f2", list_g.clone(), false), 84 Field::new("f3", DataType::Utf8, true), 85 ]); 86 let list_l = DataType::Struct(vec![ 87 Field::new("ff1", list_f.clone(), false), 88 Field::new("ff2", list_g.clone(), false), 89 Field::new("ff3", DataType::LargeUtf8, true), 90 ]); 91 let list_m = DataType::Struct(vec![ 92 Field::new("ff1", list_f, false), 93 Field::new("ff2", list_g, false), 94 Field::new("ff3", DataType::Utf8, true), 95 ]); 96 assert!(list_h.equals_datatype(&list_i)); 97 assert!(!list_h.equals_datatype(&list_j)); 98 assert!(!list_k.equals_datatype(&list_l)); 99 assert!(list_k.equals_datatype(&list_m)); 100 } 101 102 #[test] create_struct_type()103 fn create_struct_type() { 104 let _person = DataType::Struct(vec![ 105 Field::new("first_name", DataType::Utf8, false), 106 Field::new("last_name", DataType::Utf8, false), 107 Field::new( 108 "address", 109 DataType::Struct(vec![ 110 Field::new("street", DataType::Utf8, false), 111 Field::new("zip", DataType::UInt16, false), 112 ]), 113 false, 114 ), 115 ]); 116 } 117 118 #[test] serde_struct_type()119 fn serde_struct_type() { 120 let kv_array = [("k".to_string(), "v".to_string())]; 121 let field_metadata: BTreeMap<String, String> = kv_array.iter().cloned().collect(); 122 123 // Non-empty map: should be converted as JSON obj { ... } 124 let mut first_name = Field::new("first_name", DataType::Utf8, false); 125 first_name.set_metadata(Some(field_metadata)); 126 127 // Empty map: should be omitted. 128 let mut last_name = Field::new("last_name", DataType::Utf8, false); 129 last_name.set_metadata(Some(BTreeMap::default())); 130 131 let person = DataType::Struct(vec![ 132 first_name, 133 last_name, 134 Field::new( 135 "address", 136 DataType::Struct(vec![ 137 Field::new("street", DataType::Utf8, false), 138 Field::new("zip", DataType::UInt16, false), 139 ]), 140 false, 141 ), 142 ]); 143 144 let serialized = serde_json::to_string(&person).unwrap(); 145 146 // NOTE that this is testing the default (derived) serialization format, not the 147 // JSON format specified in metadata.md 148 149 assert_eq!( 150 "{\"Struct\":[\ 151 {\"name\":\"first_name\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false,\"metadata\":{\"k\":\"v\"}},\ 152 {\"name\":\"last_name\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false},\ 153 {\"name\":\"address\",\"data_type\":{\"Struct\":\ 154 [{\"name\":\"street\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false},\ 155 {\"name\":\"zip\",\"data_type\":\"UInt16\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false}\ 156 ]},\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false}]}", 157 serialized 158 ); 159 160 let deserialized = serde_json::from_str(&serialized).unwrap(); 161 162 assert_eq!(person, deserialized); 163 } 164 165 #[test] struct_field_to_json()166 fn struct_field_to_json() { 167 let f = Field::new( 168 "address", 169 DataType::Struct(vec![ 170 Field::new("street", DataType::Utf8, false), 171 Field::new("zip", DataType::UInt16, false), 172 ]), 173 false, 174 ); 175 let value: Value = serde_json::from_str( 176 r#"{ 177 "name": "address", 178 "nullable": false, 179 "type": { 180 "name": "struct" 181 }, 182 "children": [ 183 { 184 "name": "street", 185 "nullable": false, 186 "type": { 187 "name": "utf8" 188 }, 189 "children": [] 190 }, 191 { 192 "name": "zip", 193 "nullable": false, 194 "type": { 195 "name": "int", 196 "bitWidth": 16, 197 "isSigned": false 198 }, 199 "children": [] 200 } 201 ] 202 }"#, 203 ) 204 .unwrap(); 205 assert_eq!(value, f.to_json()); 206 } 207 208 #[test] primitive_field_to_json()209 fn primitive_field_to_json() { 210 let f = Field::new("first_name", DataType::Utf8, false); 211 let value: Value = serde_json::from_str( 212 r#"{ 213 "name": "first_name", 214 "nullable": false, 215 "type": { 216 "name": "utf8" 217 }, 218 "children": [] 219 }"#, 220 ) 221 .unwrap(); 222 assert_eq!(value, f.to_json()); 223 } 224 #[test] parse_struct_from_json()225 fn parse_struct_from_json() { 226 let json = r#" 227 { 228 "name": "address", 229 "type": { 230 "name": "struct" 231 }, 232 "nullable": false, 233 "children": [ 234 { 235 "name": "street", 236 "type": { 237 "name": "utf8" 238 }, 239 "nullable": false, 240 "children": [] 241 }, 242 { 243 "name": "zip", 244 "type": { 245 "name": "int", 246 "isSigned": false, 247 "bitWidth": 16 248 }, 249 "nullable": false, 250 "children": [] 251 } 252 ] 253 } 254 "#; 255 let value: Value = serde_json::from_str(json).unwrap(); 256 let dt = Field::from(&value).unwrap(); 257 258 let expected = Field::new( 259 "address", 260 DataType::Struct(vec![ 261 Field::new("street", DataType::Utf8, false), 262 Field::new("zip", DataType::UInt16, false), 263 ]), 264 false, 265 ); 266 267 assert_eq!(expected, dt); 268 } 269 270 #[test] parse_utf8_from_json()271 fn parse_utf8_from_json() { 272 let json = "{\"name\":\"utf8\"}"; 273 let value: Value = serde_json::from_str(json).unwrap(); 274 let dt = DataType::from(&value).unwrap(); 275 assert_eq!(DataType::Utf8, dt); 276 } 277 278 #[test] parse_int32_from_json()279 fn parse_int32_from_json() { 280 let json = "{\"name\": \"int\", \"isSigned\": true, \"bitWidth\": 32}"; 281 let value: Value = serde_json::from_str(json).unwrap(); 282 let dt = DataType::from(&value).unwrap(); 283 assert_eq!(DataType::Int32, dt); 284 } 285 286 #[test] schema_json()287 fn schema_json() { 288 // Add some custom metadata 289 let metadata: HashMap<String, String> = 290 [("Key".to_string(), "Value".to_string())] 291 .iter() 292 .cloned() 293 .collect(); 294 295 let schema = Schema::new_with_metadata( 296 vec![ 297 Field::new("c1", DataType::Utf8, false), 298 Field::new("c2", DataType::Binary, false), 299 Field::new("c3", DataType::FixedSizeBinary(3), false), 300 Field::new("c4", DataType::Boolean, false), 301 Field::new("c5", DataType::Date32, false), 302 Field::new("c6", DataType::Date64, false), 303 Field::new("c7", DataType::Time32(TimeUnit::Second), false), 304 Field::new("c8", DataType::Time32(TimeUnit::Millisecond), false), 305 Field::new("c9", DataType::Time32(TimeUnit::Microsecond), false), 306 Field::new("c10", DataType::Time32(TimeUnit::Nanosecond), false), 307 Field::new("c11", DataType::Time64(TimeUnit::Second), false), 308 Field::new("c12", DataType::Time64(TimeUnit::Millisecond), false), 309 Field::new("c13", DataType::Time64(TimeUnit::Microsecond), false), 310 Field::new("c14", DataType::Time64(TimeUnit::Nanosecond), false), 311 Field::new("c15", DataType::Timestamp(TimeUnit::Second, None), false), 312 Field::new( 313 "c16", 314 DataType::Timestamp(TimeUnit::Millisecond, Some("UTC".to_string())), 315 false, 316 ), 317 Field::new( 318 "c17", 319 DataType::Timestamp( 320 TimeUnit::Microsecond, 321 Some("Africa/Johannesburg".to_string()), 322 ), 323 false, 324 ), 325 Field::new( 326 "c18", 327 DataType::Timestamp(TimeUnit::Nanosecond, None), 328 false, 329 ), 330 Field::new("c19", DataType::Interval(IntervalUnit::DayTime), false), 331 Field::new("c20", DataType::Interval(IntervalUnit::YearMonth), false), 332 Field::new( 333 "c21", 334 DataType::List(Box::new(Field::new("item", DataType::Boolean, true))), 335 false, 336 ), 337 Field::new( 338 "c22", 339 DataType::FixedSizeList( 340 Box::new(Field::new("bools", DataType::Boolean, false)), 341 5, 342 ), 343 false, 344 ), 345 Field::new( 346 "c23", 347 DataType::List(Box::new(Field::new( 348 "inner_list", 349 DataType::List(Box::new(Field::new( 350 "struct", 351 DataType::Struct(vec![]), 352 true, 353 ))), 354 false, 355 ))), 356 true, 357 ), 358 Field::new( 359 "c24", 360 DataType::Struct(vec![ 361 Field::new("a", DataType::Utf8, false), 362 Field::new("b", DataType::UInt16, false), 363 ]), 364 false, 365 ), 366 Field::new("c25", DataType::Interval(IntervalUnit::YearMonth), true), 367 Field::new("c26", DataType::Interval(IntervalUnit::DayTime), true), 368 Field::new("c27", DataType::Duration(TimeUnit::Second), false), 369 Field::new("c28", DataType::Duration(TimeUnit::Millisecond), false), 370 Field::new("c29", DataType::Duration(TimeUnit::Microsecond), false), 371 Field::new("c30", DataType::Duration(TimeUnit::Nanosecond), false), 372 Field::new_dict( 373 "c31", 374 DataType::Dictionary( 375 Box::new(DataType::Int32), 376 Box::new(DataType::Utf8), 377 ), 378 true, 379 123, 380 true, 381 ), 382 Field::new("c32", DataType::LargeBinary, true), 383 Field::new("c33", DataType::LargeUtf8, true), 384 Field::new( 385 "c34", 386 DataType::LargeList(Box::new(Field::new( 387 "inner_large_list", 388 DataType::LargeList(Box::new(Field::new( 389 "struct", 390 DataType::Struct(vec![]), 391 false, 392 ))), 393 true, 394 ))), 395 true, 396 ), 397 ], 398 metadata, 399 ); 400 401 let expected = schema.to_json(); 402 let json = r#"{ 403 "fields": [ 404 { 405 "name": "c1", 406 "nullable": false, 407 "type": { 408 "name": "utf8" 409 }, 410 "children": [] 411 }, 412 { 413 "name": "c2", 414 "nullable": false, 415 "type": { 416 "name": "binary" 417 }, 418 "children": [] 419 }, 420 { 421 "name": "c3", 422 "nullable": false, 423 "type": { 424 "name": "fixedsizebinary", 425 "byteWidth": 3 426 }, 427 "children": [] 428 }, 429 { 430 "name": "c4", 431 "nullable": false, 432 "type": { 433 "name": "bool" 434 }, 435 "children": [] 436 }, 437 { 438 "name": "c5", 439 "nullable": false, 440 "type": { 441 "name": "date", 442 "unit": "DAY" 443 }, 444 "children": [] 445 }, 446 { 447 "name": "c6", 448 "nullable": false, 449 "type": { 450 "name": "date", 451 "unit": "MILLISECOND" 452 }, 453 "children": [] 454 }, 455 { 456 "name": "c7", 457 "nullable": false, 458 "type": { 459 "name": "time", 460 "bitWidth": 32, 461 "unit": "SECOND" 462 }, 463 "children": [] 464 }, 465 { 466 "name": "c8", 467 "nullable": false, 468 "type": { 469 "name": "time", 470 "bitWidth": 32, 471 "unit": "MILLISECOND" 472 }, 473 "children": [] 474 }, 475 { 476 "name": "c9", 477 "nullable": false, 478 "type": { 479 "name": "time", 480 "bitWidth": 32, 481 "unit": "MICROSECOND" 482 }, 483 "children": [] 484 }, 485 { 486 "name": "c10", 487 "nullable": false, 488 "type": { 489 "name": "time", 490 "bitWidth": 32, 491 "unit": "NANOSECOND" 492 }, 493 "children": [] 494 }, 495 { 496 "name": "c11", 497 "nullable": false, 498 "type": { 499 "name": "time", 500 "bitWidth": 64, 501 "unit": "SECOND" 502 }, 503 "children": [] 504 }, 505 { 506 "name": "c12", 507 "nullable": false, 508 "type": { 509 "name": "time", 510 "bitWidth": 64, 511 "unit": "MILLISECOND" 512 }, 513 "children": [] 514 }, 515 { 516 "name": "c13", 517 "nullable": false, 518 "type": { 519 "name": "time", 520 "bitWidth": 64, 521 "unit": "MICROSECOND" 522 }, 523 "children": [] 524 }, 525 { 526 "name": "c14", 527 "nullable": false, 528 "type": { 529 "name": "time", 530 "bitWidth": 64, 531 "unit": "NANOSECOND" 532 }, 533 "children": [] 534 }, 535 { 536 "name": "c15", 537 "nullable": false, 538 "type": { 539 "name": "timestamp", 540 "unit": "SECOND" 541 }, 542 "children": [] 543 }, 544 { 545 "name": "c16", 546 "nullable": false, 547 "type": { 548 "name": "timestamp", 549 "unit": "MILLISECOND", 550 "timezone": "UTC" 551 }, 552 "children": [] 553 }, 554 { 555 "name": "c17", 556 "nullable": false, 557 "type": { 558 "name": "timestamp", 559 "unit": "MICROSECOND", 560 "timezone": "Africa/Johannesburg" 561 }, 562 "children": [] 563 }, 564 { 565 "name": "c18", 566 "nullable": false, 567 "type": { 568 "name": "timestamp", 569 "unit": "NANOSECOND" 570 }, 571 "children": [] 572 }, 573 { 574 "name": "c19", 575 "nullable": false, 576 "type": { 577 "name": "interval", 578 "unit": "DAY_TIME" 579 }, 580 "children": [] 581 }, 582 { 583 "name": "c20", 584 "nullable": false, 585 "type": { 586 "name": "interval", 587 "unit": "YEAR_MONTH" 588 }, 589 "children": [] 590 }, 591 { 592 "name": "c21", 593 "nullable": false, 594 "type": { 595 "name": "list" 596 }, 597 "children": [ 598 { 599 "name": "item", 600 "nullable": true, 601 "type": { 602 "name": "bool" 603 }, 604 "children": [] 605 } 606 ] 607 }, 608 { 609 "name": "c22", 610 "nullable": false, 611 "type": { 612 "name": "fixedsizelist", 613 "listSize": 5 614 }, 615 "children": [ 616 { 617 "name": "bools", 618 "nullable": false, 619 "type": { 620 "name": "bool" 621 }, 622 "children": [] 623 } 624 ] 625 }, 626 { 627 "name": "c23", 628 "nullable": true, 629 "type": { 630 "name": "list" 631 }, 632 "children": [ 633 { 634 "name": "inner_list", 635 "nullable": false, 636 "type": { 637 "name": "list" 638 }, 639 "children": [ 640 { 641 "name": "struct", 642 "nullable": true, 643 "type": { 644 "name": "struct" 645 }, 646 "children": [] 647 } 648 ] 649 } 650 ] 651 }, 652 { 653 "name": "c24", 654 "nullable": false, 655 "type": { 656 "name": "struct" 657 }, 658 "children": [ 659 { 660 "name": "a", 661 "nullable": false, 662 "type": { 663 "name": "utf8" 664 }, 665 "children": [] 666 }, 667 { 668 "name": "b", 669 "nullable": false, 670 "type": { 671 "name": "int", 672 "bitWidth": 16, 673 "isSigned": false 674 }, 675 "children": [] 676 } 677 ] 678 }, 679 { 680 "name": "c25", 681 "nullable": true, 682 "type": { 683 "name": "interval", 684 "unit": "YEAR_MONTH" 685 }, 686 "children": [] 687 }, 688 { 689 "name": "c26", 690 "nullable": true, 691 "type": { 692 "name": "interval", 693 "unit": "DAY_TIME" 694 }, 695 "children": [] 696 }, 697 { 698 "name": "c27", 699 "nullable": false, 700 "type": { 701 "name": "duration", 702 "unit": "SECOND" 703 }, 704 "children": [] 705 }, 706 { 707 "name": "c28", 708 "nullable": false, 709 "type": { 710 "name": "duration", 711 "unit": "MILLISECOND" 712 }, 713 "children": [] 714 }, 715 { 716 "name": "c29", 717 "nullable": false, 718 "type": { 719 "name": "duration", 720 "unit": "MICROSECOND" 721 }, 722 "children": [] 723 }, 724 { 725 "name": "c30", 726 "nullable": false, 727 "type": { 728 "name": "duration", 729 "unit": "NANOSECOND" 730 }, 731 "children": [] 732 }, 733 { 734 "name": "c31", 735 "nullable": true, 736 "children": [], 737 "type": { 738 "name": "utf8" 739 }, 740 "dictionary": { 741 "id": 123, 742 "indexType": { 743 "name": "int", 744 "bitWidth": 32, 745 "isSigned": true 746 }, 747 "isOrdered": true 748 } 749 }, 750 { 751 "name": "c32", 752 "nullable": true, 753 "type": { 754 "name": "largebinary" 755 }, 756 "children": [] 757 }, 758 { 759 "name": "c33", 760 "nullable": true, 761 "type": { 762 "name": "largeutf8" 763 }, 764 "children": [] 765 }, 766 { 767 "name": "c34", 768 "nullable": true, 769 "type": { 770 "name": "largelist" 771 }, 772 "children": [ 773 { 774 "name": "inner_large_list", 775 "nullable": true, 776 "type": { 777 "name": "largelist" 778 }, 779 "children": [ 780 { 781 "name": "struct", 782 "nullable": false, 783 "type": { 784 "name": "struct" 785 }, 786 "children": [] 787 } 788 ] 789 } 790 ] 791 } 792 ], 793 "metadata" : { 794 "Key": "Value" 795 } 796 }"#; 797 let value: Value = serde_json::from_str(&json).unwrap(); 798 assert_eq!(expected, value); 799 800 // convert back to a schema 801 let value: Value = serde_json::from_str(&json).unwrap(); 802 let schema2 = Schema::from(&value).unwrap(); 803 804 assert_eq!(schema, schema2); 805 806 // Check that empty metadata produces empty value in JSON and can be parsed 807 let json = r#"{ 808 "fields": [ 809 { 810 "name": "c1", 811 "nullable": false, 812 "type": { 813 "name": "utf8" 814 }, 815 "children": [] 816 } 817 ], 818 "metadata": {} 819 }"#; 820 let value: Value = serde_json::from_str(&json).unwrap(); 821 let schema = Schema::from(&value).unwrap(); 822 assert!(schema.metadata.is_empty()); 823 824 // Check that metadata field is not required in the JSON. 825 let json = r#"{ 826 "fields": [ 827 { 828 "name": "c1", 829 "nullable": false, 830 "type": { 831 "name": "utf8" 832 }, 833 "children": [] 834 } 835 ] 836 }"#; 837 let value: Value = serde_json::from_str(&json).unwrap(); 838 let schema = Schema::from(&value).unwrap(); 839 assert!(schema.metadata.is_empty()); 840 } 841 842 #[test] create_schema_string()843 fn create_schema_string() { 844 let schema = person_schema(); 845 assert_eq!(schema.to_string(), 846 "Field { name: \"first_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: Some({\"k\": \"v\"}) }, \ 847 Field { name: \"last_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \ 848 Field { name: \"address\", data_type: Struct([\ 849 Field { name: \"street\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \ 850 Field { name: \"zip\", data_type: UInt16, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }\ 851 ]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \ 852 Field { name: \"interests\", data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 123, dict_is_ordered: true, metadata: None }") 853 } 854 855 #[test] schema_field_accessors()856 fn schema_field_accessors() { 857 let schema = person_schema(); 858 859 // test schema accessors 860 assert_eq!(schema.fields().len(), 4); 861 862 // test field accessors 863 let first_name = &schema.fields()[0]; 864 assert_eq!(first_name.name(), "first_name"); 865 assert_eq!(first_name.data_type(), &DataType::Utf8); 866 assert_eq!(first_name.is_nullable(), false); 867 assert_eq!(first_name.dict_id(), None); 868 assert_eq!(first_name.dict_is_ordered(), None); 869 870 let metadata = first_name.metadata(); 871 assert!(metadata.is_some()); 872 let md = metadata.as_ref().unwrap(); 873 assert_eq!(md.len(), 1); 874 let key = md.get("k"); 875 assert!(key.is_some()); 876 assert_eq!(key.unwrap(), "v"); 877 878 let interests = &schema.fields()[3]; 879 assert_eq!(interests.name(), "interests"); 880 assert_eq!( 881 interests.data_type(), 882 &DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)) 883 ); 884 assert_eq!(interests.dict_id(), Some(123)); 885 assert_eq!(interests.dict_is_ordered(), Some(true)); 886 } 887 888 #[test] 889 #[should_panic( 890 expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]" 891 )] schema_index_of()892 fn schema_index_of() { 893 let schema = person_schema(); 894 assert_eq!(schema.index_of("first_name").unwrap(), 0); 895 assert_eq!(schema.index_of("last_name").unwrap(), 1); 896 schema.index_of("nickname").unwrap(); 897 } 898 899 #[test] 900 #[should_panic( 901 expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]" 902 )] schema_field_with_name()903 fn schema_field_with_name() { 904 let schema = person_schema(); 905 assert_eq!( 906 schema.field_with_name("first_name").unwrap().name(), 907 "first_name" 908 ); 909 assert_eq!( 910 schema.field_with_name("last_name").unwrap().name(), 911 "last_name" 912 ); 913 schema.field_with_name("nickname").unwrap(); 914 } 915 916 #[test] schema_field_with_dict_id()917 fn schema_field_with_dict_id() { 918 let schema = person_schema(); 919 920 let fields_dict_123: Vec<_> = schema 921 .fields_with_dict_id(123) 922 .iter() 923 .map(|f| f.name()) 924 .collect(); 925 assert_eq!(fields_dict_123, vec!["interests"]); 926 927 assert!(schema.fields_with_dict_id(456).is_empty()); 928 } 929 930 #[test] schema_equality()931 fn schema_equality() { 932 let schema1 = Schema::new(vec![ 933 Field::new("c1", DataType::Utf8, false), 934 Field::new("c2", DataType::Float64, true), 935 Field::new("c3", DataType::LargeBinary, true), 936 ]); 937 let schema2 = Schema::new(vec![ 938 Field::new("c1", DataType::Utf8, false), 939 Field::new("c2", DataType::Float64, true), 940 Field::new("c3", DataType::LargeBinary, true), 941 ]); 942 943 assert_eq!(schema1, schema2); 944 945 let schema3 = Schema::new(vec![ 946 Field::new("c1", DataType::Utf8, false), 947 Field::new("c2", DataType::Float32, true), 948 ]); 949 let schema4 = Schema::new(vec![ 950 Field::new("C1", DataType::Utf8, false), 951 Field::new("C2", DataType::Float64, true), 952 ]); 953 954 assert!(schema1 != schema3); 955 assert!(schema1 != schema4); 956 assert!(schema2 != schema3); 957 assert!(schema2 != schema4); 958 assert!(schema3 != schema4); 959 960 let mut f = Field::new("c1", DataType::Utf8, false); 961 f.set_metadata(Some( 962 [("foo".to_string(), "bar".to_string())] 963 .iter() 964 .cloned() 965 .collect(), 966 )); 967 let schema5 = Schema::new(vec![ 968 f, 969 Field::new("c2", DataType::Float64, true), 970 Field::new("c3", DataType::LargeBinary, true), 971 ]); 972 assert!(schema1 != schema5); 973 } 974 975 #[test] test_arrow_native_type_to_json()976 fn test_arrow_native_type_to_json() { 977 assert_eq!(Some(Bool(true)), true.into_json_value()); 978 assert_eq!(Some(VNumber(Number::from(1))), 1i8.into_json_value()); 979 assert_eq!(Some(VNumber(Number::from(1))), 1i16.into_json_value()); 980 assert_eq!(Some(VNumber(Number::from(1))), 1i32.into_json_value()); 981 assert_eq!(Some(VNumber(Number::from(1))), 1i64.into_json_value()); 982 assert_eq!(Some(VNumber(Number::from(1))), 1u8.into_json_value()); 983 assert_eq!(Some(VNumber(Number::from(1))), 1u16.into_json_value()); 984 assert_eq!(Some(VNumber(Number::from(1))), 1u32.into_json_value()); 985 assert_eq!(Some(VNumber(Number::from(1))), 1u64.into_json_value()); 986 assert_eq!( 987 Some(VNumber(Number::from_f64(0.01f64).unwrap())), 988 0.01.into_json_value() 989 ); 990 assert_eq!( 991 Some(VNumber(Number::from_f64(0.01f64).unwrap())), 992 0.01f64.into_json_value() 993 ); 994 assert_eq!(None, NAN.into_json_value()); 995 } 996 person_schema() -> Schema997 fn person_schema() -> Schema { 998 let kv_array = [("k".to_string(), "v".to_string())]; 999 let field_metadata: BTreeMap<String, String> = kv_array.iter().cloned().collect(); 1000 let mut first_name = Field::new("first_name", DataType::Utf8, false); 1001 first_name.set_metadata(Some(field_metadata)); 1002 1003 Schema::new(vec![ 1004 first_name, 1005 Field::new("last_name", DataType::Utf8, false), 1006 Field::new( 1007 "address", 1008 DataType::Struct(vec![ 1009 Field::new("street", DataType::Utf8, false), 1010 Field::new("zip", DataType::UInt16, false), 1011 ]), 1012 false, 1013 ), 1014 Field::new_dict( 1015 "interests", 1016 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)), 1017 true, 1018 123, 1019 true, 1020 ), 1021 ]) 1022 } 1023 1024 #[test] test_try_merge_field_with_metadata()1025 fn test_try_merge_field_with_metadata() { 1026 // 1. Different values for the same key should cause error. 1027 let metadata1: BTreeMap<String, String> = 1028 [("foo".to_string(), "bar".to_string())] 1029 .iter() 1030 .cloned() 1031 .collect(); 1032 let mut f1 = Field::new("first_name", DataType::Utf8, false); 1033 f1.set_metadata(Some(metadata1)); 1034 1035 let metadata2: BTreeMap<String, String> = 1036 [("foo".to_string(), "baz".to_string())] 1037 .iter() 1038 .cloned() 1039 .collect(); 1040 let mut f2 = Field::new("first_name", DataType::Utf8, false); 1041 f2.set_metadata(Some(metadata2)); 1042 1043 assert!( 1044 Schema::try_merge(vec![Schema::new(vec![f1]), Schema::new(vec![f2])]) 1045 .is_err() 1046 ); 1047 1048 // 2. None + Some 1049 let mut f1 = Field::new("first_name", DataType::Utf8, false); 1050 let metadata2: BTreeMap<String, String> = 1051 [("missing".to_string(), "value".to_string())] 1052 .iter() 1053 .cloned() 1054 .collect(); 1055 let mut f2 = Field::new("first_name", DataType::Utf8, false); 1056 f2.set_metadata(Some(metadata2)); 1057 1058 assert!(f1.try_merge(&f2).is_ok()); 1059 assert!(f1.metadata().is_some()); 1060 assert_eq!( 1061 f1.metadata().as_ref().unwrap(), 1062 f2.metadata().as_ref().unwrap() 1063 ); 1064 1065 // 3. Some + Some 1066 let mut f1 = Field::new("first_name", DataType::Utf8, false); 1067 f1.set_metadata(Some( 1068 [("foo".to_string(), "bar".to_string())] 1069 .iter() 1070 .cloned() 1071 .collect(), 1072 )); 1073 let mut f2 = Field::new("first_name", DataType::Utf8, false); 1074 f2.set_metadata(Some( 1075 [("foo2".to_string(), "bar2".to_string())] 1076 .iter() 1077 .cloned() 1078 .collect(), 1079 )); 1080 1081 assert!(f1.try_merge(&f2).is_ok()); 1082 assert!(f1.metadata().is_some()); 1083 assert_eq!( 1084 f1.metadata().clone().unwrap(), 1085 [ 1086 ("foo".to_string(), "bar".to_string()), 1087 ("foo2".to_string(), "bar2".to_string()) 1088 ] 1089 .iter() 1090 .cloned() 1091 .collect() 1092 ); 1093 1094 // 4. Some + None. 1095 let mut f1 = Field::new("first_name", DataType::Utf8, false); 1096 f1.set_metadata(Some( 1097 [("foo".to_string(), "bar".to_string())] 1098 .iter() 1099 .cloned() 1100 .collect(), 1101 )); 1102 let f2 = Field::new("first_name", DataType::Utf8, false); 1103 assert!(f1.try_merge(&f2).is_ok()); 1104 assert!(f1.metadata().is_some()); 1105 assert_eq!( 1106 f1.metadata().clone().unwrap(), 1107 [("foo".to_string(), "bar".to_string())] 1108 .iter() 1109 .cloned() 1110 .collect() 1111 ); 1112 1113 // 5. None + None. 1114 let mut f1 = Field::new("first_name", DataType::Utf8, false); 1115 let f2 = Field::new("first_name", DataType::Utf8, false); 1116 assert!(f1.try_merge(&f2).is_ok()); 1117 assert!(f1.metadata().is_none()); 1118 } 1119 1120 #[test] test_schema_merge() -> Result<()>1121 fn test_schema_merge() -> Result<()> { 1122 let merged = Schema::try_merge(vec![ 1123 Schema::new(vec![ 1124 Field::new("first_name", DataType::Utf8, false), 1125 Field::new("last_name", DataType::Utf8, false), 1126 Field::new( 1127 "address", 1128 DataType::Struct(vec![Field::new("zip", DataType::UInt16, false)]), 1129 false, 1130 ), 1131 ]), 1132 Schema::new_with_metadata( 1133 vec![ 1134 // nullable merge 1135 Field::new("last_name", DataType::Utf8, true), 1136 Field::new( 1137 "address", 1138 DataType::Struct(vec![ 1139 // add new nested field 1140 Field::new("street", DataType::Utf8, false), 1141 // nullable merge on nested field 1142 Field::new("zip", DataType::UInt16, true), 1143 ]), 1144 false, 1145 ), 1146 // new field 1147 Field::new("number", DataType::Utf8, true), 1148 ], 1149 [("foo".to_string(), "bar".to_string())] 1150 .iter() 1151 .cloned() 1152 .collect::<HashMap<String, String>>(), 1153 ), 1154 ])?; 1155 1156 assert_eq!( 1157 merged, 1158 Schema::new_with_metadata( 1159 vec![ 1160 Field::new("first_name", DataType::Utf8, false), 1161 Field::new("last_name", DataType::Utf8, true), 1162 Field::new( 1163 "address", 1164 DataType::Struct(vec![ 1165 Field::new("zip", DataType::UInt16, true), 1166 Field::new("street", DataType::Utf8, false), 1167 ]), 1168 false, 1169 ), 1170 Field::new("number", DataType::Utf8, true), 1171 ], 1172 [("foo".to_string(), "bar".to_string())] 1173 .iter() 1174 .cloned() 1175 .collect::<HashMap<String, String>>() 1176 ) 1177 ); 1178 1179 // support merge union fields 1180 assert_eq!( 1181 Schema::try_merge(vec![ 1182 Schema::new(vec![Field::new( 1183 "c1", 1184 DataType::Union(vec![ 1185 Field::new("c11", DataType::Utf8, true), 1186 Field::new("c12", DataType::Utf8, true), 1187 ]), 1188 false 1189 ),]), 1190 Schema::new(vec![Field::new( 1191 "c1", 1192 DataType::Union(vec![ 1193 Field::new("c12", DataType::Utf8, true), 1194 Field::new("c13", DataType::Time64(TimeUnit::Second), true), 1195 ]), 1196 false 1197 ),]) 1198 ])?, 1199 Schema::new(vec![Field::new( 1200 "c1", 1201 DataType::Union(vec![ 1202 Field::new("c11", DataType::Utf8, true), 1203 Field::new("c12", DataType::Utf8, true), 1204 Field::new("c13", DataType::Time64(TimeUnit::Second), true), 1205 ]), 1206 false 1207 ),]), 1208 ); 1209 1210 // incompatible field should throw error 1211 assert!(Schema::try_merge(vec![ 1212 Schema::new(vec![ 1213 Field::new("first_name", DataType::Utf8, false), 1214 Field::new("last_name", DataType::Utf8, false), 1215 ]), 1216 Schema::new(vec![Field::new("last_name", DataType::Int64, false),]) 1217 ]) 1218 .is_err()); 1219 1220 // incompatible metadata should throw error 1221 assert!(Schema::try_merge(vec![ 1222 Schema::new_with_metadata( 1223 vec![Field::new("first_name", DataType::Utf8, false)], 1224 [("foo".to_string(), "bar".to_string()),] 1225 .iter() 1226 .cloned() 1227 .collect::<HashMap<String, String>>() 1228 ), 1229 Schema::new_with_metadata( 1230 vec![Field::new("last_name", DataType::Utf8, false)], 1231 [("foo".to_string(), "baz".to_string()),] 1232 .iter() 1233 .cloned() 1234 .collect::<HashMap<String, String>>() 1235 ) 1236 ]) 1237 .is_err()); 1238 1239 Ok(()) 1240 } 1241 } 1242