1// Licensed to the Apache Software Foundation (ASF) under one 2// or more contributor license agreements. See the NOTICE file 3// distributed with this work for additional information 4// regarding copyright ownership. The ASF licenses this file 5// to you under the Apache License, Version 2.0 (the 6// "License"); you may not use this file except in compliance 7// with the License. You may obtain a copy of the License at 8// 9// http://www.apache.org/licenses/LICENSE-2.0 10// 11// Unless required by applicable law or agreed to in writing, software 12// distributed under the License is distributed on an "AS IS" BASIS, 13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14// See the License for the specific language governing permissions and 15// limitations under the License. 16 17package schema 18 19import ( 20 "encoding/json" 21 "fmt" 22 "math" 23 24 "github.com/apache/arrow/go/v6/parquet" 25 "github.com/apache/arrow/go/v6/parquet/internal/debug" 26 format "github.com/apache/arrow/go/v6/parquet/internal/gen-go/parquet" 27) 28 29// DecimalMetadata is a struct for managing scale and precision information between 30// converted and logical types. 31type DecimalMetadata struct { 32 IsSet bool 33 Scale int32 34 Precision int32 35} 36 37func getLogicalType(l *format.LogicalType) LogicalType { 38 switch { 39 case l.IsSetSTRING(): 40 return StringLogicalType{} 41 case l.IsSetMAP(): 42 return MapLogicalType{} 43 case l.IsSetLIST(): 44 return ListLogicalType{} 45 case l.IsSetENUM(): 46 return EnumLogicalType{} 47 case l.IsSetDECIMAL(): 48 return &DecimalLogicalType{typ: l.DECIMAL} 49 case l.IsSetDATE(): 50 return DateLogicalType{} 51 case l.IsSetTIME(): 52 if timeUnitFromThrift(l.TIME.Unit) == TimeUnitUnknown { 53 panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Time logical type") 54 } 55 return &TimeLogicalType{typ: l.TIME} 56 case l.IsSetTIMESTAMP(): 57 if timeUnitFromThrift(l.TIMESTAMP.Unit) == TimeUnitUnknown { 58 panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Timestamp logical type") 59 } 60 return &TimestampLogicalType{typ: l.TIMESTAMP} 61 case l.IsSetINTEGER(): 62 return &IntLogicalType{typ: l.INTEGER} 63 case l.IsSetUNKNOWN(): 64 return NullLogicalType{} 65 case l.IsSetJSON(): 66 return JSONLogicalType{} 67 case l.IsSetBSON(): 68 return BSONLogicalType{} 69 case l.IsSetUUID(): 70 return UUIDLogicalType{} 71 case l == nil: 72 return NoLogicalType{} 73 default: 74 panic("invalid logical type") 75 } 76} 77 78// TimeUnitType is an enum for denoting whether a time based logical type 79// is using milliseconds, microseconds or nanoseconds. 80type TimeUnitType int 81 82// Constants for the TimeUnitType 83const ( 84 TimeUnitMillis TimeUnitType = iota 85 TimeUnitMicros 86 TimeUnitNanos 87 TimeUnitUnknown 88) 89 90// LogicalType is the descriptor that defines the usage of a physical primitive 91// type in the schema, such as an Interval, Date, etc. 92type LogicalType interface { 93 // Returns true if a nested type like List or Map 94 IsNested() bool 95 // Returns true if this type can be serialized, ie: not Unknown/NoType/Interval 96 IsSerialized() bool 97 // Returns true if not NoLogicalType 98 IsValid() bool 99 // Returns true if it is NoType 100 IsNone() bool 101 // returns a string representation of the Logical Type 102 String() string 103 toThrift() *format.LogicalType 104 // Return the equivalent ConvertedType for legacy Parquet systems 105 ToConvertedType() (ConvertedType, DecimalMetadata) 106 // Returns true if the specified ConvertedType is compatible with this 107 // logical type 108 IsCompatible(ConvertedType, DecimalMetadata) bool 109 // Returns true if this logical type can be used with the provided physical type 110 IsApplicable(t parquet.Type, tlen int32) bool 111 // Returns true if the logical types are the same 112 Equals(LogicalType) bool 113 // Returns the default stat sort order for this logical type 114 SortOrder() SortOrder 115} 116 117// TemporalLogicalType is a smaller interface for Time based logical types 118// like Time / Timestamp 119type TemporalLogicalType interface { 120 LogicalType 121 IsAdjustedToUTC() bool 122 TimeUnit() TimeUnitType 123} 124 125// SortOrder mirrors the parquet.thrift sort order type 126type SortOrder int8 127 128// Constants for the Stat sort order definitions 129const ( 130 SortSIGNED SortOrder = iota 131 SortUNSIGNED 132 SortUNKNOWN 133) 134 135// DefaultSortOrder returns the default stat sort order for the given physical type 136func DefaultSortOrder(primitive format.Type) SortOrder { 137 switch primitive { 138 case format.Type_BOOLEAN, format.Type_INT32, format.Type_INT64, format.Type_FLOAT, format.Type_DOUBLE: 139 return SortSIGNED 140 case format.Type_BYTE_ARRAY, format.Type_FIXED_LEN_BYTE_ARRAY: 141 return SortUNSIGNED 142 case format.Type_INT96: 143 fallthrough 144 default: 145 return SortUNKNOWN 146 } 147} 148 149// GetLogicalSortOrder returns the default sort order for this logical type 150// or falls back to the default sort order for the physical type if not valid 151func GetLogicalSortOrder(logical LogicalType, primitive format.Type) SortOrder { 152 switch { 153 case logical == nil || !logical.IsValid(): 154 return SortUNKNOWN 155 case logical.Equals(NoLogicalType{}): 156 return DefaultSortOrder(primitive) 157 default: 158 return logical.SortOrder() 159 } 160} 161 162type baseLogicalType struct{} 163 164func (baseLogicalType) IsSerialized() bool { 165 return true 166} 167 168func (baseLogicalType) IsValid() bool { 169 return true 170} 171 172func (baseLogicalType) IsNested() bool { 173 return false 174} 175 176func (baseLogicalType) IsNone() bool { return false } 177 178// StringLogicalType is a UTF8 string, only usable with ByteArray and FixedLenByteArray 179type StringLogicalType struct{ baseLogicalType } 180 181func (StringLogicalType) SortOrder() SortOrder { 182 return SortUNSIGNED 183} 184 185func (StringLogicalType) MarshalJSON() ([]byte, error) { 186 return json.Marshal(map[string]string{"Type": StringLogicalType{}.String()}) 187} 188 189func (StringLogicalType) String() string { 190 return "String" 191} 192 193func (StringLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 194 return ConvertedTypes.UTF8, DecimalMetadata{} 195} 196 197func (StringLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool { 198 return t == ConvertedTypes.UTF8 && !dec.IsSet 199} 200 201func (StringLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 202 return t == parquet.Types.ByteArray 203} 204 205func (StringLogicalType) toThrift() *format.LogicalType { 206 return &format.LogicalType{STRING: format.NewStringType()} 207} 208 209func (StringLogicalType) Equals(rhs LogicalType) bool { 210 _, ok := rhs.(StringLogicalType) 211 return ok 212} 213 214// MapLogicalType represents a mapped type 215type MapLogicalType struct{ baseLogicalType } 216 217func (MapLogicalType) SortOrder() SortOrder { 218 return SortUNKNOWN 219} 220 221func (MapLogicalType) MarshalJSON() ([]byte, error) { 222 return json.Marshal(map[string]string{"Type": MapLogicalType{}.String()}) 223} 224 225func (MapLogicalType) String() string { 226 return "Map" 227} 228 229func (MapLogicalType) IsNested() bool { 230 return true 231} 232 233func (MapLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 234 return ConvertedTypes.Map, DecimalMetadata{} 235} 236 237func (MapLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool { 238 return (t == ConvertedTypes.Map || t == ConvertedTypes.MapKeyValue) && !dec.IsSet 239} 240 241func (MapLogicalType) IsApplicable(parquet.Type, int32) bool { 242 return false 243} 244 245func (MapLogicalType) toThrift() *format.LogicalType { 246 return &format.LogicalType{MAP: format.NewMapType()} 247} 248 249func (MapLogicalType) Equals(rhs LogicalType) bool { 250 _, ok := rhs.(MapLogicalType) 251 return ok 252} 253 254func NewListLogicalType() LogicalType { 255 return ListLogicalType{} 256} 257 258// ListLogicalType is used for columns which are themselves nested lists 259type ListLogicalType struct{ baseLogicalType } 260 261func (ListLogicalType) SortOrder() SortOrder { 262 return SortUNKNOWN 263} 264 265func (ListLogicalType) MarshalJSON() ([]byte, error) { 266 return json.Marshal(map[string]string{"Type": ListLogicalType{}.String()}) 267} 268 269func (ListLogicalType) String() string { 270 return "List" 271} 272 273func (ListLogicalType) IsNested() bool { 274 return true 275} 276 277func (ListLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 278 return ConvertedTypes.List, DecimalMetadata{} 279} 280 281func (ListLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool { 282 return t == ConvertedTypes.List && !dec.IsSet 283} 284 285func (ListLogicalType) IsApplicable(parquet.Type, int32) bool { 286 return false 287} 288 289func (ListLogicalType) toThrift() *format.LogicalType { 290 return &format.LogicalType{LIST: format.NewListType()} 291} 292 293func (ListLogicalType) Equals(rhs LogicalType) bool { 294 _, ok := rhs.(ListLogicalType) 295 return ok 296} 297 298// EnumLogicalType is for representing an enum, which should be a byte array type 299type EnumLogicalType struct{ baseLogicalType } 300 301func (EnumLogicalType) SortOrder() SortOrder { 302 return SortUNSIGNED 303} 304 305func (EnumLogicalType) MarshalJSON() ([]byte, error) { 306 return json.Marshal(map[string]string{"Type": EnumLogicalType{}.String()}) 307} 308 309func (EnumLogicalType) String() string { 310 return "Enum" 311} 312 313func (EnumLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 314 return ConvertedTypes.Enum, DecimalMetadata{} 315} 316 317func (EnumLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool { 318 return t == ConvertedTypes.Enum && !dec.IsSet 319} 320 321func (EnumLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 322 return t == parquet.Types.ByteArray 323} 324 325func (EnumLogicalType) toThrift() *format.LogicalType { 326 return &format.LogicalType{ENUM: format.NewEnumType()} 327} 328 329func (EnumLogicalType) Equals(rhs LogicalType) bool { 330 _, ok := rhs.(EnumLogicalType) 331 return ok 332} 333 334// NewDecimalLogicalType returns a Decimal logical type with the given 335// precision and scale. 336// 337// Panics if precision < 1 or scale is not in the range (0, precision) 338func NewDecimalLogicalType(precision int32, scale int32) LogicalType { 339 if precision < 1 { 340 panic("parquet: precision must be greater than or equal to 1 for decimal logical type") 341 } 342 if scale < 0 || scale > precision { 343 panic("parquet: scale must be a non-negative integer that does not exceed precision for decimal logical type") 344 } 345 return &DecimalLogicalType{typ: &format.DecimalType{Precision: precision, Scale: scale}} 346} 347 348// DecimalLogicalType is used to represent a decimal value of a given 349// precision and scale 350type DecimalLogicalType struct { 351 baseLogicalType 352 typ *format.DecimalType 353} 354 355func (t DecimalLogicalType) Precision() int32 { 356 return t.typ.Precision 357} 358 359func (t DecimalLogicalType) Scale() int32 { 360 return t.typ.Scale 361} 362 363func (DecimalLogicalType) SortOrder() SortOrder { 364 return SortSIGNED 365} 366 367func (t DecimalLogicalType) MarshalJSON() ([]byte, error) { 368 return json.Marshal(map[string]interface{}{"Type": "Decimal", "precision": t.typ.Precision, "scale": t.typ.Scale}) 369} 370 371func (t DecimalLogicalType) String() string { 372 return fmt.Sprintf("Decimal(precision=%d, scale=%d)", t.typ.Precision, t.typ.Scale) 373} 374 375func (t DecimalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 376 return ConvertedTypes.Decimal, DecimalMetadata{IsSet: true, Scale: t.typ.GetScale(), Precision: t.typ.GetPrecision()} 377} 378 379func (t DecimalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 380 return c == ConvertedTypes.Decimal && 381 dec.IsSet && dec.Scale == t.typ.Scale && dec.Precision == t.typ.Precision 382} 383 384func (t DecimalLogicalType) IsApplicable(typ parquet.Type, tlen int32) bool { 385 switch typ { 386 case parquet.Types.Int32: 387 return 1 <= t.typ.Precision && t.typ.Precision <= 9 388 case parquet.Types.Int64: 389 if t.typ.Precision < 10 { 390 debug.Log("int64 used for decimal logical, precision is small enough to use int32") 391 } 392 return 1 <= t.typ.Precision && t.typ.Precision <= 18 393 case parquet.Types.FixedLenByteArray: 394 return t.typ.Precision <= int32(math.Floor(math.Log10(math.Pow(2.0, (8.0*float64(tlen)-1.0))))) 395 case parquet.Types.ByteArray: 396 return true 397 } 398 return false 399} 400 401func (t DecimalLogicalType) toThrift() *format.LogicalType { 402 return &format.LogicalType{DECIMAL: t.typ} 403} 404 405func (t DecimalLogicalType) Equals(rhs LogicalType) bool { 406 other, ok := rhs.(*DecimalLogicalType) 407 if !ok { 408 return false 409 } 410 return t.typ.Precision == other.typ.Precision && t.typ.Scale == other.typ.Scale 411} 412 413// DateLogicalType is an int32 representing the number of days since the Unix Epoch 414// 1 January 1970 415type DateLogicalType struct{ baseLogicalType } 416 417func (DateLogicalType) SortOrder() SortOrder { 418 return SortSIGNED 419} 420 421func (DateLogicalType) MarshalJSON() ([]byte, error) { 422 return json.Marshal(map[string]string{"Type": DateLogicalType{}.String()}) 423} 424 425func (DateLogicalType) String() string { 426 return "Date" 427} 428 429func (DateLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 430 return ConvertedTypes.Date, DecimalMetadata{} 431} 432 433func (DateLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool { 434 return t == ConvertedTypes.Date && !dec.IsSet 435} 436 437func (DateLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 438 return t == parquet.Types.Int32 439} 440 441func (DateLogicalType) toThrift() *format.LogicalType { 442 return &format.LogicalType{DATE: format.NewDateType()} 443} 444 445func (DateLogicalType) Equals(rhs LogicalType) bool { 446 _, ok := rhs.(DateLogicalType) 447 return ok 448} 449 450func timeUnitFromThrift(unit *format.TimeUnit) TimeUnitType { 451 switch { 452 case unit == nil: 453 return TimeUnitUnknown 454 case unit.IsSetMILLIS(): 455 return TimeUnitMillis 456 case unit.IsSetMICROS(): 457 return TimeUnitMicros 458 case unit.IsSetNANOS(): 459 return TimeUnitNanos 460 default: 461 return TimeUnitUnknown 462 } 463} 464 465func timeUnitToString(unit *format.TimeUnit) string { 466 switch { 467 case unit == nil: 468 return "unknown" 469 case unit.IsSetMILLIS(): 470 return "milliseconds" 471 case unit.IsSetMICROS(): 472 return "microseconds" 473 case unit.IsSetNANOS(): 474 return "nanoseconds" 475 default: 476 return "unknown" 477 } 478} 479 480func timeUnitFromString(v string) TimeUnitType { 481 switch v { 482 case "millis": 483 return TimeUnitMillis 484 case "micros": 485 return TimeUnitMicros 486 case "nanos": 487 return TimeUnitNanos 488 default: 489 return TimeUnitUnknown 490 } 491} 492 493func createTimeUnit(unit TimeUnitType) *format.TimeUnit { 494 tunit := format.NewTimeUnit() 495 switch unit { 496 case TimeUnitMicros: 497 tunit.MICROS = format.NewMicroSeconds() 498 case TimeUnitMillis: 499 tunit.MILLIS = format.NewMilliSeconds() 500 case TimeUnitNanos: 501 tunit.NANOS = format.NewNanoSeconds() 502 default: 503 panic("parquet: time unit must be one of MILLIS, MICROS, or NANOS for Time logical type") 504 } 505 return tunit 506} 507 508// NewTimeLogicalType returns a time type of the given unit. 509func NewTimeLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType { 510 return &TimeLogicalType{typ: &format.TimeType{ 511 IsAdjustedToUTC: isAdjustedToUTC, 512 Unit: createTimeUnit(unit), 513 }} 514} 515 516// TimeLogicalType is a time type without a date and must be an 517// int32 for milliseconds, or an int64 for micro or nano seconds. 518type TimeLogicalType struct { 519 baseLogicalType 520 typ *format.TimeType 521} 522 523func (t TimeLogicalType) IsAdjustedToUTC() bool { 524 return t.typ.IsAdjustedToUTC 525} 526 527func (t TimeLogicalType) TimeUnit() TimeUnitType { 528 return timeUnitFromThrift(t.typ.Unit) 529} 530 531func (TimeLogicalType) SortOrder() SortOrder { 532 return SortSIGNED 533} 534 535func (t TimeLogicalType) MarshalJSON() ([]byte, error) { 536 return json.Marshal(map[string]interface{}{ 537 "Type": "Time", "isAdjustedToUTC": t.typ.IsAdjustedToUTC, "timeUnit": timeUnitToString(t.typ.GetUnit())}) 538} 539 540func (t TimeLogicalType) String() string { 541 return fmt.Sprintf("Time(isAdjustedToUTC=%t, timeUnit=%s)", t.typ.GetIsAdjustedToUTC(), timeUnitToString(t.typ.GetUnit())) 542} 543 544func (t TimeLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 545 unit := timeUnitFromThrift(t.typ.Unit) 546 if t.typ.IsAdjustedToUTC { 547 switch unit { 548 case TimeUnitMillis: 549 return ConvertedTypes.TimeMillis, DecimalMetadata{} 550 case TimeUnitMicros: 551 return ConvertedTypes.TimeMicros, DecimalMetadata{} 552 } 553 } 554 return ConvertedTypes.None, DecimalMetadata{} 555} 556 557func (t TimeLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 558 if dec.IsSet { 559 return false 560 } 561 unit := timeUnitFromThrift(t.typ.Unit) 562 if t.typ.IsAdjustedToUTC { 563 switch unit { 564 case TimeUnitMillis: 565 return c == ConvertedTypes.TimeMillis 566 case TimeUnitMicros: 567 return c == ConvertedTypes.TimeMicros 568 } 569 } 570 571 return c == ConvertedTypes.None || c == ConvertedTypes.NA 572} 573 574func (t TimeLogicalType) IsApplicable(typ parquet.Type, _ int32) bool { 575 return (typ == parquet.Types.Int32 && t.typ.GetUnit().IsSetMILLIS()) || 576 (typ == parquet.Types.Int64 && 577 (t.typ.GetUnit().IsSetMICROS() || t.typ.GetUnit().IsSetNANOS())) 578} 579 580func (t TimeLogicalType) toThrift() *format.LogicalType { 581 return &format.LogicalType{TIME: t.typ} 582} 583 584func (t TimeLogicalType) Equals(rhs LogicalType) bool { 585 other, ok := rhs.(*TimeLogicalType) 586 if !ok { 587 return false 588 } 589 return t.typ.IsAdjustedToUTC == other.typ.IsAdjustedToUTC && 590 timeUnitFromThrift(t.typ.Unit) == timeUnitFromThrift(other.typ.Unit) 591} 592 593// NewTimestampLogicalType returns a logical timestamp type with "forceConverted" 594// set to false 595func NewTimestampLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType { 596 return &TimestampLogicalType{ 597 typ: &format.TimestampType{ 598 IsAdjustedToUTC: isAdjustedToUTC, 599 Unit: createTimeUnit(unit), 600 }, 601 forceConverted: false, 602 fromConverted: false, 603 } 604} 605 606// NewTimestampLogicalTypeForce returns a timestamp logical type with 607// "forceConverted" set to true 608func NewTimestampLogicalTypeForce(isAdjustedToUTC bool, unit TimeUnitType) LogicalType { 609 return &TimestampLogicalType{ 610 typ: &format.TimestampType{ 611 IsAdjustedToUTC: isAdjustedToUTC, 612 Unit: createTimeUnit(unit), 613 }, 614 forceConverted: true, 615 fromConverted: false, 616 } 617} 618 619// TimestampLogicalType represents an int64 number that can be decoded 620// into a year, month, day, hour, minute, second, and subsecond 621type TimestampLogicalType struct { 622 baseLogicalType 623 typ *format.TimestampType 624 // forceConverted denotes whether or not the resulting serialized 625 // type when writing to parquet will be written as the legacy 626 // ConvertedType TIMESTAMP_MICROS/TIMESTAMP_MILLIS (true) 627 // or if it will write the proper current Logical Types (false, default) 628 forceConverted bool 629 // fromConverted denotes if the timestamp type was created by 630 // translating a legacy converted type of TIMESTAMP_MILLIS or 631 // TIMESTAMP_MICROS rather than by using the current logical 632 // types. Default is false. 633 fromConverted bool 634} 635 636func (t TimestampLogicalType) IsFromConvertedType() bool { 637 return t.fromConverted 638} 639 640func (t TimestampLogicalType) IsAdjustedToUTC() bool { 641 return t.typ.IsAdjustedToUTC 642} 643 644func (t TimestampLogicalType) TimeUnit() TimeUnitType { 645 return timeUnitFromThrift(t.typ.Unit) 646} 647 648func (TimestampLogicalType) SortOrder() SortOrder { 649 return SortSIGNED 650} 651 652func (t TimestampLogicalType) MarshalJSON() ([]byte, error) { 653 return json.Marshal(map[string]interface{}{ 654 "Type": "Timestamp", 655 "isAdjustedToUTC": t.typ.IsAdjustedToUTC, 656 "timeUnit": timeUnitToString(t.typ.GetUnit()), 657 "is_from_converted_type": t.fromConverted, 658 "force_set_converted_type": t.forceConverted, 659 }) 660} 661 662func (t TimestampLogicalType) IsSerialized() bool { 663 return !t.fromConverted 664} 665 666func (t TimestampLogicalType) String() string { 667 return fmt.Sprintf("Timestamp(isAdjustedToUTC=%t, timeUnit=%s, is_from_converted_type=%t, force_set_converted_type=%t)", 668 t.typ.GetIsAdjustedToUTC(), timeUnitToString(t.typ.GetUnit()), t.fromConverted, t.forceConverted) 669} 670 671func (t TimestampLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 672 unit := timeUnitFromThrift(t.typ.Unit) 673 if t.typ.IsAdjustedToUTC || t.forceConverted { 674 switch unit { 675 case TimeUnitMillis: 676 return ConvertedTypes.TimestampMillis, DecimalMetadata{} 677 case TimeUnitMicros: 678 return ConvertedTypes.TimestampMicros, DecimalMetadata{} 679 } 680 } 681 return ConvertedTypes.None, DecimalMetadata{} 682} 683 684func (t TimestampLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 685 if dec.IsSet { 686 return false 687 } 688 689 switch timeUnitFromThrift(t.typ.Unit) { 690 case TimeUnitMillis: 691 if t.typ.GetIsAdjustedToUTC() || t.forceConverted { 692 return c == ConvertedTypes.TimestampMillis 693 } 694 case TimeUnitMicros: 695 if t.typ.GetIsAdjustedToUTC() || t.forceConverted { 696 return c == ConvertedTypes.TimestampMicros 697 } 698 } 699 700 return c == ConvertedTypes.None || c == ConvertedTypes.NA 701} 702 703func (TimestampLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 704 return t == parquet.Types.Int64 705} 706 707func (t TimestampLogicalType) toThrift() *format.LogicalType { 708 return &format.LogicalType{TIMESTAMP: t.typ} 709} 710 711func (t TimestampLogicalType) Equals(rhs LogicalType) bool { 712 other, ok := rhs.(*TimestampLogicalType) 713 if !ok { 714 return false 715 } 716 return t.typ.IsAdjustedToUTC == other.typ.IsAdjustedToUTC && 717 timeUnitFromThrift(t.typ.Unit) == timeUnitFromThrift(other.typ.Unit) 718} 719 720// NewIntLogicalType creates an integer logical type of the desired bitwidth 721// and whether it is signed or not. 722// 723// Bit width must be exactly 8, 16, 32 or 64 for an integer logical type 724func NewIntLogicalType(bitWidth int8, signed bool) LogicalType { 725 switch bitWidth { 726 case 8, 16, 32, 64: 727 default: 728 panic("parquet: bit width must be exactly 8, 16, 32, or 64 for Int logical type") 729 } 730 return &IntLogicalType{ 731 typ: &format.IntType{ 732 BitWidth: bitWidth, 733 IsSigned: signed, 734 }, 735 } 736} 737 738// IntLogicalType represents an integer type of a specific bit width and 739// is either signed or unsigned. 740type IntLogicalType struct { 741 baseLogicalType 742 typ *format.IntType 743} 744 745func (t IntLogicalType) BitWidth() int8 { 746 return t.typ.BitWidth 747} 748 749func (t IntLogicalType) IsSigned() bool { 750 return t.typ.IsSigned 751} 752 753func (t IntLogicalType) SortOrder() SortOrder { 754 if t.typ.IsSigned { 755 return SortSIGNED 756 } 757 return SortUNSIGNED 758} 759 760func (t IntLogicalType) MarshalJSON() ([]byte, error) { 761 return json.Marshal(map[string]interface{}{ 762 "Type": "Int", "bitWidth": t.typ.BitWidth, "isSigned": t.typ.IsSigned, 763 }) 764} 765 766func (t IntLogicalType) String() string { 767 return fmt.Sprintf("Int(bitWidth=%d, isSigned=%t)", t.typ.GetBitWidth(), t.typ.GetIsSigned()) 768} 769 770func (t IntLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 771 var d DecimalMetadata 772 if t.typ.IsSigned { 773 switch t.typ.BitWidth { 774 case 8: 775 return ConvertedTypes.Int8, d 776 case 16: 777 return ConvertedTypes.Int16, d 778 case 32: 779 return ConvertedTypes.Int32, d 780 case 64: 781 return ConvertedTypes.Int64, d 782 } 783 } else { 784 switch t.typ.BitWidth { 785 case 8: 786 return ConvertedTypes.Uint8, d 787 case 16: 788 return ConvertedTypes.Uint16, d 789 case 32: 790 return ConvertedTypes.Uint32, d 791 case 64: 792 return ConvertedTypes.Uint64, d 793 } 794 } 795 return ConvertedTypes.None, d 796} 797 798func (t IntLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 799 if dec.IsSet { 800 return false 801 } 802 v, _ := t.ToConvertedType() 803 return c == v 804} 805 806func (t IntLogicalType) IsApplicable(typ parquet.Type, _ int32) bool { 807 return (typ == parquet.Types.Int32 && t.typ.GetBitWidth() <= 32) || 808 (typ == parquet.Types.Int64 && t.typ.GetBitWidth() == 64) 809} 810 811func (t IntLogicalType) toThrift() *format.LogicalType { 812 return &format.LogicalType{INTEGER: t.typ} 813} 814 815func (t IntLogicalType) Equals(rhs LogicalType) bool { 816 other, ok := rhs.(*IntLogicalType) 817 if !ok { 818 return false 819 } 820 821 return t.typ.GetIsSigned() == other.typ.GetIsSigned() && 822 t.typ.GetBitWidth() == other.typ.GetBitWidth() 823} 824 825// UnknownLogicalType is a type that is essentially a placeholder for when 826// we don't know the type. 827type UnknownLogicalType struct{ baseLogicalType } 828 829func (UnknownLogicalType) SortOrder() SortOrder { 830 return SortUNKNOWN 831} 832 833func (UnknownLogicalType) MarshalJSON() ([]byte, error) { 834 return json.Marshal(map[string]string{"Type": UnknownLogicalType{}.String()}) 835} 836 837func (UnknownLogicalType) IsValid() bool { return false } 838 839func (UnknownLogicalType) IsSerialized() bool { return false } 840 841func (UnknownLogicalType) String() string { 842 return "Unknown" 843} 844 845func (UnknownLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 846 return ConvertedTypes.NA, DecimalMetadata{} 847} 848 849func (UnknownLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 850 return c == ConvertedTypes.NA && !dec.IsSet 851} 852 853func (UnknownLogicalType) IsApplicable(parquet.Type, int32) bool { return true } 854 855func (UnknownLogicalType) toThrift() *format.LogicalType { 856 return &format.LogicalType{UNKNOWN: format.NewNullType()} 857} 858 859func (UnknownLogicalType) Equals(rhs LogicalType) bool { 860 _, ok := rhs.(UnknownLogicalType) 861 return ok 862} 863 864// JSONLogicalType represents a byte array column which is to be interpreted 865// as a JSON string. 866type JSONLogicalType struct{ baseLogicalType } 867 868func (JSONLogicalType) SortOrder() SortOrder { 869 return SortUNSIGNED 870} 871 872func (JSONLogicalType) MarshalJSON() ([]byte, error) { 873 return json.Marshal(map[string]string{"Type": JSONLogicalType{}.String()}) 874} 875 876func (JSONLogicalType) String() string { 877 return "JSON" 878} 879 880func (JSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 881 return ConvertedTypes.JSON, DecimalMetadata{} 882} 883 884func (JSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 885 return c == ConvertedTypes.JSON && !dec.IsSet 886} 887 888func (JSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 889 return t == parquet.Types.ByteArray 890} 891 892func (JSONLogicalType) toThrift() *format.LogicalType { 893 return &format.LogicalType{JSON: format.NewJsonType()} 894} 895 896func (JSONLogicalType) Equals(rhs LogicalType) bool { 897 _, ok := rhs.(JSONLogicalType) 898 return ok 899} 900 901// BSONLogicalType represents a binary JSON string in the byte array 902type BSONLogicalType struct{ baseLogicalType } 903 904func (BSONLogicalType) SortOrder() SortOrder { 905 return SortUNSIGNED 906} 907 908func (BSONLogicalType) MarshalJSON() ([]byte, error) { 909 return json.Marshal(map[string]string{"Type": BSONLogicalType{}.String()}) 910} 911 912func (BSONLogicalType) String() string { 913 return "BSON" 914} 915 916func (BSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 917 return ConvertedTypes.BSON, DecimalMetadata{} 918} 919 920func (BSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 921 return c == ConvertedTypes.BSON && !dec.IsSet 922} 923 924func (BSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 925 return t == parquet.Types.ByteArray 926} 927 928func (BSONLogicalType) toThrift() *format.LogicalType { 929 return &format.LogicalType{BSON: format.NewBsonType()} 930} 931 932func (BSONLogicalType) Equals(rhs LogicalType) bool { 933 _, ok := rhs.(BSONLogicalType) 934 return ok 935} 936 937// UUIDLogicalType can only be used with a FixedLength byte array column 938// that is exactly 16 bytes long 939type UUIDLogicalType struct{ baseLogicalType } 940 941func (UUIDLogicalType) SortOrder() SortOrder { 942 return SortUNSIGNED 943} 944 945func (UUIDLogicalType) MarshalJSON() ([]byte, error) { 946 return json.Marshal(map[string]string{"Type": UUIDLogicalType{}.String()}) 947} 948 949func (UUIDLogicalType) String() string { 950 return "UUID" 951} 952 953func (UUIDLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 954 return ConvertedTypes.None, DecimalMetadata{} 955} 956 957func (UUIDLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 958 if dec.IsSet { 959 return false 960 } 961 switch c { 962 case ConvertedTypes.None, ConvertedTypes.NA: 963 return true 964 } 965 return false 966} 967 968func (UUIDLogicalType) IsApplicable(t parquet.Type, tlen int32) bool { 969 return t == parquet.Types.FixedLenByteArray && tlen == 16 970} 971 972func (UUIDLogicalType) toThrift() *format.LogicalType { 973 return &format.LogicalType{UUID: format.NewUUIDType()} 974} 975 976func (UUIDLogicalType) Equals(rhs LogicalType) bool { 977 _, ok := rhs.(UUIDLogicalType) 978 return ok 979} 980 981// IntervalLogicalType is not yet in the thrift spec, but represents 982// an interval time and needs to be a fixed length byte array of 12 bytes 983type IntervalLogicalType struct{ baseLogicalType } 984 985func (IntervalLogicalType) SortOrder() SortOrder { 986 return SortUNKNOWN 987} 988 989func (IntervalLogicalType) MarshalJSON() ([]byte, error) { 990 return json.Marshal(map[string]string{"Type": IntervalLogicalType{}.String()}) 991} 992 993func (IntervalLogicalType) String() string { 994 return "Interval" 995} 996 997func (IntervalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 998 return ConvertedTypes.Interval, DecimalMetadata{} 999} 1000 1001func (IntervalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 1002 return c == ConvertedTypes.Interval && !dec.IsSet 1003} 1004 1005func (IntervalLogicalType) IsApplicable(t parquet.Type, tlen int32) bool { 1006 return t == parquet.Types.FixedLenByteArray && tlen == 12 1007} 1008 1009func (IntervalLogicalType) toThrift() *format.LogicalType { 1010 panic("no parquet IntervalLogicalType yet implemented") 1011} 1012 1013func (IntervalLogicalType) Equals(rhs LogicalType) bool { 1014 _, ok := rhs.(IntervalLogicalType) 1015 return ok 1016} 1017 1018type NullLogicalType struct{ baseLogicalType } 1019 1020func (NullLogicalType) SortOrder() SortOrder { 1021 return SortUNKNOWN 1022} 1023 1024func (NullLogicalType) MarshalJSON() ([]byte, error) { 1025 return json.Marshal(map[string]string{"Type": NullLogicalType{}.String()}) 1026} 1027 1028func (NullLogicalType) String() string { 1029 return "Null" 1030} 1031 1032func (NullLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 1033 return ConvertedTypes.None, DecimalMetadata{} 1034} 1035 1036func (NullLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 1037 if dec.IsSet { 1038 return false 1039 } 1040 switch c { 1041 case ConvertedTypes.None, ConvertedTypes.NA: 1042 return true 1043 } 1044 return false 1045} 1046 1047func (NullLogicalType) IsApplicable(parquet.Type, int32) bool { 1048 return true 1049} 1050 1051func (NullLogicalType) toThrift() *format.LogicalType { 1052 return &format.LogicalType{UNKNOWN: format.NewNullType()} 1053} 1054 1055func (NullLogicalType) Equals(rhs LogicalType) bool { 1056 _, ok := rhs.(NullLogicalType) 1057 return ok 1058} 1059 1060type NoLogicalType struct{ baseLogicalType } 1061 1062func (NoLogicalType) SortOrder() SortOrder { 1063 return SortUNKNOWN 1064} 1065 1066func (NoLogicalType) MarshalJSON() ([]byte, error) { 1067 return json.Marshal(map[string]string{"Type": NoLogicalType{}.String()}) 1068} 1069 1070func (NoLogicalType) IsSerialized() bool { return false } 1071 1072func (NoLogicalType) String() string { 1073 return "None" 1074} 1075 1076func (NoLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 1077 return ConvertedTypes.None, DecimalMetadata{} 1078} 1079 1080func (NoLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 1081 return c == ConvertedTypes.None && !dec.IsSet 1082} 1083 1084func (NoLogicalType) IsApplicable(parquet.Type, int32) bool { 1085 return true 1086} 1087 1088func (NoLogicalType) toThrift() *format.LogicalType { 1089 panic("cannot convert NoLogicalType to thrift") 1090} 1091 1092func (NoLogicalType) Equals(rhs LogicalType) bool { 1093 _, ok := rhs.(NoLogicalType) 1094 return ok 1095} 1096 1097func (NoLogicalType) IsNone() bool { return true } 1098