1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package schema
18
19import (
20	"encoding/json"
21	"fmt"
22	"math"
23
24	"github.com/apache/arrow/go/v6/parquet"
25	"github.com/apache/arrow/go/v6/parquet/internal/debug"
26	format "github.com/apache/arrow/go/v6/parquet/internal/gen-go/parquet"
27)
28
29// DecimalMetadata is a struct for managing scale and precision information between
30// converted and logical types.
31type DecimalMetadata struct {
32	IsSet     bool
33	Scale     int32
34	Precision int32
35}
36
37func getLogicalType(l *format.LogicalType) LogicalType {
38	switch {
39	case l.IsSetSTRING():
40		return StringLogicalType{}
41	case l.IsSetMAP():
42		return MapLogicalType{}
43	case l.IsSetLIST():
44		return ListLogicalType{}
45	case l.IsSetENUM():
46		return EnumLogicalType{}
47	case l.IsSetDECIMAL():
48		return &DecimalLogicalType{typ: l.DECIMAL}
49	case l.IsSetDATE():
50		return DateLogicalType{}
51	case l.IsSetTIME():
52		if timeUnitFromThrift(l.TIME.Unit) == TimeUnitUnknown {
53			panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Time logical type")
54		}
55		return &TimeLogicalType{typ: l.TIME}
56	case l.IsSetTIMESTAMP():
57		if timeUnitFromThrift(l.TIMESTAMP.Unit) == TimeUnitUnknown {
58			panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Timestamp logical type")
59		}
60		return &TimestampLogicalType{typ: l.TIMESTAMP}
61	case l.IsSetINTEGER():
62		return &IntLogicalType{typ: l.INTEGER}
63	case l.IsSetUNKNOWN():
64		return NullLogicalType{}
65	case l.IsSetJSON():
66		return JSONLogicalType{}
67	case l.IsSetBSON():
68		return BSONLogicalType{}
69	case l.IsSetUUID():
70		return UUIDLogicalType{}
71	case l == nil:
72		return NoLogicalType{}
73	default:
74		panic("invalid logical type")
75	}
76}
77
78// TimeUnitType is an enum for denoting whether a time based logical type
79// is using milliseconds, microseconds or nanoseconds.
80type TimeUnitType int
81
82// Constants for the TimeUnitType
83const (
84	TimeUnitMillis TimeUnitType = iota
85	TimeUnitMicros
86	TimeUnitNanos
87	TimeUnitUnknown
88)
89
90// LogicalType is the descriptor that defines the usage of a physical primitive
91// type in the schema, such as an Interval, Date, etc.
92type LogicalType interface {
93	// Returns true if a nested type like List or Map
94	IsNested() bool
95	// Returns true if this type can be serialized, ie: not Unknown/NoType/Interval
96	IsSerialized() bool
97	// Returns true if not NoLogicalType
98	IsValid() bool
99	// Returns true if it is NoType
100	IsNone() bool
101	// returns a string representation of the Logical Type
102	String() string
103	toThrift() *format.LogicalType
104	// Return the equivalent ConvertedType for legacy Parquet systems
105	ToConvertedType() (ConvertedType, DecimalMetadata)
106	// Returns true if the specified ConvertedType is compatible with this
107	// logical type
108	IsCompatible(ConvertedType, DecimalMetadata) bool
109	// Returns true if this logical type can be used with the provided physical type
110	IsApplicable(t parquet.Type, tlen int32) bool
111	// Returns true if the logical types are the same
112	Equals(LogicalType) bool
113	// Returns the default stat sort order for this logical type
114	SortOrder() SortOrder
115}
116
117// TemporalLogicalType is a smaller interface for Time based logical types
118// like Time / Timestamp
119type TemporalLogicalType interface {
120	LogicalType
121	IsAdjustedToUTC() bool
122	TimeUnit() TimeUnitType
123}
124
125// SortOrder mirrors the parquet.thrift sort order type
126type SortOrder int8
127
128// Constants for the Stat sort order definitions
129const (
130	SortSIGNED SortOrder = iota
131	SortUNSIGNED
132	SortUNKNOWN
133)
134
135// DefaultSortOrder returns the default stat sort order for the given physical type
136func DefaultSortOrder(primitive format.Type) SortOrder {
137	switch primitive {
138	case format.Type_BOOLEAN, format.Type_INT32, format.Type_INT64, format.Type_FLOAT, format.Type_DOUBLE:
139		return SortSIGNED
140	case format.Type_BYTE_ARRAY, format.Type_FIXED_LEN_BYTE_ARRAY:
141		return SortUNSIGNED
142	case format.Type_INT96:
143		fallthrough
144	default:
145		return SortUNKNOWN
146	}
147}
148
149// GetLogicalSortOrder returns the default sort order for this logical type
150// or falls back to the default sort order for the physical type if not valid
151func GetLogicalSortOrder(logical LogicalType, primitive format.Type) SortOrder {
152	switch {
153	case logical == nil || !logical.IsValid():
154		return SortUNKNOWN
155	case logical.Equals(NoLogicalType{}):
156		return DefaultSortOrder(primitive)
157	default:
158		return logical.SortOrder()
159	}
160}
161
162type baseLogicalType struct{}
163
164func (baseLogicalType) IsSerialized() bool {
165	return true
166}
167
168func (baseLogicalType) IsValid() bool {
169	return true
170}
171
172func (baseLogicalType) IsNested() bool {
173	return false
174}
175
176func (baseLogicalType) IsNone() bool { return false }
177
178// StringLogicalType is a UTF8 string, only usable with ByteArray and FixedLenByteArray
179type StringLogicalType struct{ baseLogicalType }
180
181func (StringLogicalType) SortOrder() SortOrder {
182	return SortUNSIGNED
183}
184
185func (StringLogicalType) MarshalJSON() ([]byte, error) {
186	return json.Marshal(map[string]string{"Type": StringLogicalType{}.String()})
187}
188
189func (StringLogicalType) String() string {
190	return "String"
191}
192
193func (StringLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
194	return ConvertedTypes.UTF8, DecimalMetadata{}
195}
196
197func (StringLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
198	return t == ConvertedTypes.UTF8 && !dec.IsSet
199}
200
201func (StringLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
202	return t == parquet.Types.ByteArray
203}
204
205func (StringLogicalType) toThrift() *format.LogicalType {
206	return &format.LogicalType{STRING: format.NewStringType()}
207}
208
209func (StringLogicalType) Equals(rhs LogicalType) bool {
210	_, ok := rhs.(StringLogicalType)
211	return ok
212}
213
214// MapLogicalType represents a mapped type
215type MapLogicalType struct{ baseLogicalType }
216
217func (MapLogicalType) SortOrder() SortOrder {
218	return SortUNKNOWN
219}
220
221func (MapLogicalType) MarshalJSON() ([]byte, error) {
222	return json.Marshal(map[string]string{"Type": MapLogicalType{}.String()})
223}
224
225func (MapLogicalType) String() string {
226	return "Map"
227}
228
229func (MapLogicalType) IsNested() bool {
230	return true
231}
232
233func (MapLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
234	return ConvertedTypes.Map, DecimalMetadata{}
235}
236
237func (MapLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
238	return (t == ConvertedTypes.Map || t == ConvertedTypes.MapKeyValue) && !dec.IsSet
239}
240
241func (MapLogicalType) IsApplicable(parquet.Type, int32) bool {
242	return false
243}
244
245func (MapLogicalType) toThrift() *format.LogicalType {
246	return &format.LogicalType{MAP: format.NewMapType()}
247}
248
249func (MapLogicalType) Equals(rhs LogicalType) bool {
250	_, ok := rhs.(MapLogicalType)
251	return ok
252}
253
254func NewListLogicalType() LogicalType {
255	return ListLogicalType{}
256}
257
258// ListLogicalType is used for columns which are themselves nested lists
259type ListLogicalType struct{ baseLogicalType }
260
261func (ListLogicalType) SortOrder() SortOrder {
262	return SortUNKNOWN
263}
264
265func (ListLogicalType) MarshalJSON() ([]byte, error) {
266	return json.Marshal(map[string]string{"Type": ListLogicalType{}.String()})
267}
268
269func (ListLogicalType) String() string {
270	return "List"
271}
272
273func (ListLogicalType) IsNested() bool {
274	return true
275}
276
277func (ListLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
278	return ConvertedTypes.List, DecimalMetadata{}
279}
280
281func (ListLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
282	return t == ConvertedTypes.List && !dec.IsSet
283}
284
285func (ListLogicalType) IsApplicable(parquet.Type, int32) bool {
286	return false
287}
288
289func (ListLogicalType) toThrift() *format.LogicalType {
290	return &format.LogicalType{LIST: format.NewListType()}
291}
292
293func (ListLogicalType) Equals(rhs LogicalType) bool {
294	_, ok := rhs.(ListLogicalType)
295	return ok
296}
297
298// EnumLogicalType is for representing an enum, which should be a byte array type
299type EnumLogicalType struct{ baseLogicalType }
300
301func (EnumLogicalType) SortOrder() SortOrder {
302	return SortUNSIGNED
303}
304
305func (EnumLogicalType) MarshalJSON() ([]byte, error) {
306	return json.Marshal(map[string]string{"Type": EnumLogicalType{}.String()})
307}
308
309func (EnumLogicalType) String() string {
310	return "Enum"
311}
312
313func (EnumLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
314	return ConvertedTypes.Enum, DecimalMetadata{}
315}
316
317func (EnumLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
318	return t == ConvertedTypes.Enum && !dec.IsSet
319}
320
321func (EnumLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
322	return t == parquet.Types.ByteArray
323}
324
325func (EnumLogicalType) toThrift() *format.LogicalType {
326	return &format.LogicalType{ENUM: format.NewEnumType()}
327}
328
329func (EnumLogicalType) Equals(rhs LogicalType) bool {
330	_, ok := rhs.(EnumLogicalType)
331	return ok
332}
333
334// NewDecimalLogicalType returns a Decimal logical type with the given
335// precision and scale.
336//
337// Panics if precision < 1 or scale is not in the range (0, precision)
338func NewDecimalLogicalType(precision int32, scale int32) LogicalType {
339	if precision < 1 {
340		panic("parquet: precision must be greater than or equal to 1 for decimal logical type")
341	}
342	if scale < 0 || scale > precision {
343		panic("parquet: scale must be a non-negative integer that does not exceed precision for decimal logical type")
344	}
345	return &DecimalLogicalType{typ: &format.DecimalType{Precision: precision, Scale: scale}}
346}
347
348// DecimalLogicalType is used to represent a decimal value of a given
349// precision and scale
350type DecimalLogicalType struct {
351	baseLogicalType
352	typ *format.DecimalType
353}
354
355func (t DecimalLogicalType) Precision() int32 {
356	return t.typ.Precision
357}
358
359func (t DecimalLogicalType) Scale() int32 {
360	return t.typ.Scale
361}
362
363func (DecimalLogicalType) SortOrder() SortOrder {
364	return SortSIGNED
365}
366
367func (t DecimalLogicalType) MarshalJSON() ([]byte, error) {
368	return json.Marshal(map[string]interface{}{"Type": "Decimal", "precision": t.typ.Precision, "scale": t.typ.Scale})
369}
370
371func (t DecimalLogicalType) String() string {
372	return fmt.Sprintf("Decimal(precision=%d, scale=%d)", t.typ.Precision, t.typ.Scale)
373}
374
375func (t DecimalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
376	return ConvertedTypes.Decimal, DecimalMetadata{IsSet: true, Scale: t.typ.GetScale(), Precision: t.typ.GetPrecision()}
377}
378
379func (t DecimalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
380	return c == ConvertedTypes.Decimal &&
381		dec.IsSet && dec.Scale == t.typ.Scale && dec.Precision == t.typ.Precision
382}
383
384func (t DecimalLogicalType) IsApplicable(typ parquet.Type, tlen int32) bool {
385	switch typ {
386	case parquet.Types.Int32:
387		return 1 <= t.typ.Precision && t.typ.Precision <= 9
388	case parquet.Types.Int64:
389		if t.typ.Precision < 10 {
390			debug.Log("int64 used for decimal logical, precision is small enough to use int32")
391		}
392		return 1 <= t.typ.Precision && t.typ.Precision <= 18
393	case parquet.Types.FixedLenByteArray:
394		return t.typ.Precision <= int32(math.Floor(math.Log10(math.Pow(2.0, (8.0*float64(tlen)-1.0)))))
395	case parquet.Types.ByteArray:
396		return true
397	}
398	return false
399}
400
401func (t DecimalLogicalType) toThrift() *format.LogicalType {
402	return &format.LogicalType{DECIMAL: t.typ}
403}
404
405func (t DecimalLogicalType) Equals(rhs LogicalType) bool {
406	other, ok := rhs.(*DecimalLogicalType)
407	if !ok {
408		return false
409	}
410	return t.typ.Precision == other.typ.Precision && t.typ.Scale == other.typ.Scale
411}
412
413// DateLogicalType is an int32 representing the number of days since the Unix Epoch
414// 1 January 1970
415type DateLogicalType struct{ baseLogicalType }
416
417func (DateLogicalType) SortOrder() SortOrder {
418	return SortSIGNED
419}
420
421func (DateLogicalType) MarshalJSON() ([]byte, error) {
422	return json.Marshal(map[string]string{"Type": DateLogicalType{}.String()})
423}
424
425func (DateLogicalType) String() string {
426	return "Date"
427}
428
429func (DateLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
430	return ConvertedTypes.Date, DecimalMetadata{}
431}
432
433func (DateLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
434	return t == ConvertedTypes.Date && !dec.IsSet
435}
436
437func (DateLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
438	return t == parquet.Types.Int32
439}
440
441func (DateLogicalType) toThrift() *format.LogicalType {
442	return &format.LogicalType{DATE: format.NewDateType()}
443}
444
445func (DateLogicalType) Equals(rhs LogicalType) bool {
446	_, ok := rhs.(DateLogicalType)
447	return ok
448}
449
450func timeUnitFromThrift(unit *format.TimeUnit) TimeUnitType {
451	switch {
452	case unit == nil:
453		return TimeUnitUnknown
454	case unit.IsSetMILLIS():
455		return TimeUnitMillis
456	case unit.IsSetMICROS():
457		return TimeUnitMicros
458	case unit.IsSetNANOS():
459		return TimeUnitNanos
460	default:
461		return TimeUnitUnknown
462	}
463}
464
465func timeUnitToString(unit *format.TimeUnit) string {
466	switch {
467	case unit == nil:
468		return "unknown"
469	case unit.IsSetMILLIS():
470		return "milliseconds"
471	case unit.IsSetMICROS():
472		return "microseconds"
473	case unit.IsSetNANOS():
474		return "nanoseconds"
475	default:
476		return "unknown"
477	}
478}
479
480func timeUnitFromString(v string) TimeUnitType {
481	switch v {
482	case "millis":
483		return TimeUnitMillis
484	case "micros":
485		return TimeUnitMicros
486	case "nanos":
487		return TimeUnitNanos
488	default:
489		return TimeUnitUnknown
490	}
491}
492
493func createTimeUnit(unit TimeUnitType) *format.TimeUnit {
494	tunit := format.NewTimeUnit()
495	switch unit {
496	case TimeUnitMicros:
497		tunit.MICROS = format.NewMicroSeconds()
498	case TimeUnitMillis:
499		tunit.MILLIS = format.NewMilliSeconds()
500	case TimeUnitNanos:
501		tunit.NANOS = format.NewNanoSeconds()
502	default:
503		panic("parquet: time unit must be one of MILLIS, MICROS, or NANOS for Time logical type")
504	}
505	return tunit
506}
507
508// NewTimeLogicalType returns a time type of the given unit.
509func NewTimeLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
510	return &TimeLogicalType{typ: &format.TimeType{
511		IsAdjustedToUTC: isAdjustedToUTC,
512		Unit:            createTimeUnit(unit),
513	}}
514}
515
516// TimeLogicalType is a time type without a date and must be an
517// int32 for milliseconds, or an int64 for micro or nano seconds.
518type TimeLogicalType struct {
519	baseLogicalType
520	typ *format.TimeType
521}
522
523func (t TimeLogicalType) IsAdjustedToUTC() bool {
524	return t.typ.IsAdjustedToUTC
525}
526
527func (t TimeLogicalType) TimeUnit() TimeUnitType {
528	return timeUnitFromThrift(t.typ.Unit)
529}
530
531func (TimeLogicalType) SortOrder() SortOrder {
532	return SortSIGNED
533}
534
535func (t TimeLogicalType) MarshalJSON() ([]byte, error) {
536	return json.Marshal(map[string]interface{}{
537		"Type": "Time", "isAdjustedToUTC": t.typ.IsAdjustedToUTC, "timeUnit": timeUnitToString(t.typ.GetUnit())})
538}
539
540func (t TimeLogicalType) String() string {
541	return fmt.Sprintf("Time(isAdjustedToUTC=%t, timeUnit=%s)", t.typ.GetIsAdjustedToUTC(), timeUnitToString(t.typ.GetUnit()))
542}
543
544func (t TimeLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
545	unit := timeUnitFromThrift(t.typ.Unit)
546	if t.typ.IsAdjustedToUTC {
547		switch unit {
548		case TimeUnitMillis:
549			return ConvertedTypes.TimeMillis, DecimalMetadata{}
550		case TimeUnitMicros:
551			return ConvertedTypes.TimeMicros, DecimalMetadata{}
552		}
553	}
554	return ConvertedTypes.None, DecimalMetadata{}
555}
556
557func (t TimeLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
558	if dec.IsSet {
559		return false
560	}
561	unit := timeUnitFromThrift(t.typ.Unit)
562	if t.typ.IsAdjustedToUTC {
563		switch unit {
564		case TimeUnitMillis:
565			return c == ConvertedTypes.TimeMillis
566		case TimeUnitMicros:
567			return c == ConvertedTypes.TimeMicros
568		}
569	}
570
571	return c == ConvertedTypes.None || c == ConvertedTypes.NA
572}
573
574func (t TimeLogicalType) IsApplicable(typ parquet.Type, _ int32) bool {
575	return (typ == parquet.Types.Int32 && t.typ.GetUnit().IsSetMILLIS()) ||
576		(typ == parquet.Types.Int64 &&
577			(t.typ.GetUnit().IsSetMICROS() || t.typ.GetUnit().IsSetNANOS()))
578}
579
580func (t TimeLogicalType) toThrift() *format.LogicalType {
581	return &format.LogicalType{TIME: t.typ}
582}
583
584func (t TimeLogicalType) Equals(rhs LogicalType) bool {
585	other, ok := rhs.(*TimeLogicalType)
586	if !ok {
587		return false
588	}
589	return t.typ.IsAdjustedToUTC == other.typ.IsAdjustedToUTC &&
590		timeUnitFromThrift(t.typ.Unit) == timeUnitFromThrift(other.typ.Unit)
591}
592
593// NewTimestampLogicalType returns a logical timestamp type with "forceConverted"
594// set to false
595func NewTimestampLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
596	return &TimestampLogicalType{
597		typ: &format.TimestampType{
598			IsAdjustedToUTC: isAdjustedToUTC,
599			Unit:            createTimeUnit(unit),
600		},
601		forceConverted: false,
602		fromConverted:  false,
603	}
604}
605
606// NewTimestampLogicalTypeForce returns a timestamp logical type with
607// "forceConverted" set to true
608func NewTimestampLogicalTypeForce(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
609	return &TimestampLogicalType{
610		typ: &format.TimestampType{
611			IsAdjustedToUTC: isAdjustedToUTC,
612			Unit:            createTimeUnit(unit),
613		},
614		forceConverted: true,
615		fromConverted:  false,
616	}
617}
618
619// TimestampLogicalType represents an int64 number that can be decoded
620// into a year, month, day, hour, minute, second, and subsecond
621type TimestampLogicalType struct {
622	baseLogicalType
623	typ *format.TimestampType
624	// forceConverted denotes whether or not the resulting serialized
625	// type when writing to parquet will be written as the legacy
626	// ConvertedType TIMESTAMP_MICROS/TIMESTAMP_MILLIS (true)
627	// or if it will write the proper current Logical Types (false, default)
628	forceConverted bool
629	// fromConverted denotes if the timestamp type was created by
630	// translating a legacy converted type of TIMESTAMP_MILLIS or
631	// TIMESTAMP_MICROS rather than by using the current logical
632	// types. Default is false.
633	fromConverted bool
634}
635
636func (t TimestampLogicalType) IsFromConvertedType() bool {
637	return t.fromConverted
638}
639
640func (t TimestampLogicalType) IsAdjustedToUTC() bool {
641	return t.typ.IsAdjustedToUTC
642}
643
644func (t TimestampLogicalType) TimeUnit() TimeUnitType {
645	return timeUnitFromThrift(t.typ.Unit)
646}
647
648func (TimestampLogicalType) SortOrder() SortOrder {
649	return SortSIGNED
650}
651
652func (t TimestampLogicalType) MarshalJSON() ([]byte, error) {
653	return json.Marshal(map[string]interface{}{
654		"Type":                     "Timestamp",
655		"isAdjustedToUTC":          t.typ.IsAdjustedToUTC,
656		"timeUnit":                 timeUnitToString(t.typ.GetUnit()),
657		"is_from_converted_type":   t.fromConverted,
658		"force_set_converted_type": t.forceConverted,
659	})
660}
661
662func (t TimestampLogicalType) IsSerialized() bool {
663	return !t.fromConverted
664}
665
666func (t TimestampLogicalType) String() string {
667	return fmt.Sprintf("Timestamp(isAdjustedToUTC=%t, timeUnit=%s, is_from_converted_type=%t, force_set_converted_type=%t)",
668		t.typ.GetIsAdjustedToUTC(), timeUnitToString(t.typ.GetUnit()), t.fromConverted, t.forceConverted)
669}
670
671func (t TimestampLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
672	unit := timeUnitFromThrift(t.typ.Unit)
673	if t.typ.IsAdjustedToUTC || t.forceConverted {
674		switch unit {
675		case TimeUnitMillis:
676			return ConvertedTypes.TimestampMillis, DecimalMetadata{}
677		case TimeUnitMicros:
678			return ConvertedTypes.TimestampMicros, DecimalMetadata{}
679		}
680	}
681	return ConvertedTypes.None, DecimalMetadata{}
682}
683
684func (t TimestampLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
685	if dec.IsSet {
686		return false
687	}
688
689	switch timeUnitFromThrift(t.typ.Unit) {
690	case TimeUnitMillis:
691		if t.typ.GetIsAdjustedToUTC() || t.forceConverted {
692			return c == ConvertedTypes.TimestampMillis
693		}
694	case TimeUnitMicros:
695		if t.typ.GetIsAdjustedToUTC() || t.forceConverted {
696			return c == ConvertedTypes.TimestampMicros
697		}
698	}
699
700	return c == ConvertedTypes.None || c == ConvertedTypes.NA
701}
702
703func (TimestampLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
704	return t == parquet.Types.Int64
705}
706
707func (t TimestampLogicalType) toThrift() *format.LogicalType {
708	return &format.LogicalType{TIMESTAMP: t.typ}
709}
710
711func (t TimestampLogicalType) Equals(rhs LogicalType) bool {
712	other, ok := rhs.(*TimestampLogicalType)
713	if !ok {
714		return false
715	}
716	return t.typ.IsAdjustedToUTC == other.typ.IsAdjustedToUTC &&
717		timeUnitFromThrift(t.typ.Unit) == timeUnitFromThrift(other.typ.Unit)
718}
719
720// NewIntLogicalType creates an integer logical type of the desired bitwidth
721// and whether it is signed or not.
722//
723// Bit width must be exactly 8, 16, 32 or 64 for an integer logical type
724func NewIntLogicalType(bitWidth int8, signed bool) LogicalType {
725	switch bitWidth {
726	case 8, 16, 32, 64:
727	default:
728		panic("parquet: bit width must be exactly 8, 16, 32, or 64 for Int logical type")
729	}
730	return &IntLogicalType{
731		typ: &format.IntType{
732			BitWidth: bitWidth,
733			IsSigned: signed,
734		},
735	}
736}
737
738// IntLogicalType represents an integer type of a specific bit width and
739// is either signed or unsigned.
740type IntLogicalType struct {
741	baseLogicalType
742	typ *format.IntType
743}
744
745func (t IntLogicalType) BitWidth() int8 {
746	return t.typ.BitWidth
747}
748
749func (t IntLogicalType) IsSigned() bool {
750	return t.typ.IsSigned
751}
752
753func (t IntLogicalType) SortOrder() SortOrder {
754	if t.typ.IsSigned {
755		return SortSIGNED
756	}
757	return SortUNSIGNED
758}
759
760func (t IntLogicalType) MarshalJSON() ([]byte, error) {
761	return json.Marshal(map[string]interface{}{
762		"Type": "Int", "bitWidth": t.typ.BitWidth, "isSigned": t.typ.IsSigned,
763	})
764}
765
766func (t IntLogicalType) String() string {
767	return fmt.Sprintf("Int(bitWidth=%d, isSigned=%t)", t.typ.GetBitWidth(), t.typ.GetIsSigned())
768}
769
770func (t IntLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
771	var d DecimalMetadata
772	if t.typ.IsSigned {
773		switch t.typ.BitWidth {
774		case 8:
775			return ConvertedTypes.Int8, d
776		case 16:
777			return ConvertedTypes.Int16, d
778		case 32:
779			return ConvertedTypes.Int32, d
780		case 64:
781			return ConvertedTypes.Int64, d
782		}
783	} else {
784		switch t.typ.BitWidth {
785		case 8:
786			return ConvertedTypes.Uint8, d
787		case 16:
788			return ConvertedTypes.Uint16, d
789		case 32:
790			return ConvertedTypes.Uint32, d
791		case 64:
792			return ConvertedTypes.Uint64, d
793		}
794	}
795	return ConvertedTypes.None, d
796}
797
798func (t IntLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
799	if dec.IsSet {
800		return false
801	}
802	v, _ := t.ToConvertedType()
803	return c == v
804}
805
806func (t IntLogicalType) IsApplicable(typ parquet.Type, _ int32) bool {
807	return (typ == parquet.Types.Int32 && t.typ.GetBitWidth() <= 32) ||
808		(typ == parquet.Types.Int64 && t.typ.GetBitWidth() == 64)
809}
810
811func (t IntLogicalType) toThrift() *format.LogicalType {
812	return &format.LogicalType{INTEGER: t.typ}
813}
814
815func (t IntLogicalType) Equals(rhs LogicalType) bool {
816	other, ok := rhs.(*IntLogicalType)
817	if !ok {
818		return false
819	}
820
821	return t.typ.GetIsSigned() == other.typ.GetIsSigned() &&
822		t.typ.GetBitWidth() == other.typ.GetBitWidth()
823}
824
825// UnknownLogicalType is a type that is essentially a placeholder for when
826// we don't know the type.
827type UnknownLogicalType struct{ baseLogicalType }
828
829func (UnknownLogicalType) SortOrder() SortOrder {
830	return SortUNKNOWN
831}
832
833func (UnknownLogicalType) MarshalJSON() ([]byte, error) {
834	return json.Marshal(map[string]string{"Type": UnknownLogicalType{}.String()})
835}
836
837func (UnknownLogicalType) IsValid() bool { return false }
838
839func (UnknownLogicalType) IsSerialized() bool { return false }
840
841func (UnknownLogicalType) String() string {
842	return "Unknown"
843}
844
845func (UnknownLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
846	return ConvertedTypes.NA, DecimalMetadata{}
847}
848
849func (UnknownLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
850	return c == ConvertedTypes.NA && !dec.IsSet
851}
852
853func (UnknownLogicalType) IsApplicable(parquet.Type, int32) bool { return true }
854
855func (UnknownLogicalType) toThrift() *format.LogicalType {
856	return &format.LogicalType{UNKNOWN: format.NewNullType()}
857}
858
859func (UnknownLogicalType) Equals(rhs LogicalType) bool {
860	_, ok := rhs.(UnknownLogicalType)
861	return ok
862}
863
864// JSONLogicalType represents a byte array column which is to be interpreted
865// as a JSON string.
866type JSONLogicalType struct{ baseLogicalType }
867
868func (JSONLogicalType) SortOrder() SortOrder {
869	return SortUNSIGNED
870}
871
872func (JSONLogicalType) MarshalJSON() ([]byte, error) {
873	return json.Marshal(map[string]string{"Type": JSONLogicalType{}.String()})
874}
875
876func (JSONLogicalType) String() string {
877	return "JSON"
878}
879
880func (JSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
881	return ConvertedTypes.JSON, DecimalMetadata{}
882}
883
884func (JSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
885	return c == ConvertedTypes.JSON && !dec.IsSet
886}
887
888func (JSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
889	return t == parquet.Types.ByteArray
890}
891
892func (JSONLogicalType) toThrift() *format.LogicalType {
893	return &format.LogicalType{JSON: format.NewJsonType()}
894}
895
896func (JSONLogicalType) Equals(rhs LogicalType) bool {
897	_, ok := rhs.(JSONLogicalType)
898	return ok
899}
900
901// BSONLogicalType represents a binary JSON string in the byte array
902type BSONLogicalType struct{ baseLogicalType }
903
904func (BSONLogicalType) SortOrder() SortOrder {
905	return SortUNSIGNED
906}
907
908func (BSONLogicalType) MarshalJSON() ([]byte, error) {
909	return json.Marshal(map[string]string{"Type": BSONLogicalType{}.String()})
910}
911
912func (BSONLogicalType) String() string {
913	return "BSON"
914}
915
916func (BSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
917	return ConvertedTypes.BSON, DecimalMetadata{}
918}
919
920func (BSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
921	return c == ConvertedTypes.BSON && !dec.IsSet
922}
923
924func (BSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
925	return t == parquet.Types.ByteArray
926}
927
928func (BSONLogicalType) toThrift() *format.LogicalType {
929	return &format.LogicalType{BSON: format.NewBsonType()}
930}
931
932func (BSONLogicalType) Equals(rhs LogicalType) bool {
933	_, ok := rhs.(BSONLogicalType)
934	return ok
935}
936
937// UUIDLogicalType can only be used with a FixedLength byte array column
938// that is exactly 16 bytes long
939type UUIDLogicalType struct{ baseLogicalType }
940
941func (UUIDLogicalType) SortOrder() SortOrder {
942	return SortUNSIGNED
943}
944
945func (UUIDLogicalType) MarshalJSON() ([]byte, error) {
946	return json.Marshal(map[string]string{"Type": UUIDLogicalType{}.String()})
947}
948
949func (UUIDLogicalType) String() string {
950	return "UUID"
951}
952
953func (UUIDLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
954	return ConvertedTypes.None, DecimalMetadata{}
955}
956
957func (UUIDLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
958	if dec.IsSet {
959		return false
960	}
961	switch c {
962	case ConvertedTypes.None, ConvertedTypes.NA:
963		return true
964	}
965	return false
966}
967
968func (UUIDLogicalType) IsApplicable(t parquet.Type, tlen int32) bool {
969	return t == parquet.Types.FixedLenByteArray && tlen == 16
970}
971
972func (UUIDLogicalType) toThrift() *format.LogicalType {
973	return &format.LogicalType{UUID: format.NewUUIDType()}
974}
975
976func (UUIDLogicalType) Equals(rhs LogicalType) bool {
977	_, ok := rhs.(UUIDLogicalType)
978	return ok
979}
980
981// IntervalLogicalType is not yet in the thrift spec, but represents
982// an interval time and needs to be a fixed length byte array of 12 bytes
983type IntervalLogicalType struct{ baseLogicalType }
984
985func (IntervalLogicalType) SortOrder() SortOrder {
986	return SortUNKNOWN
987}
988
989func (IntervalLogicalType) MarshalJSON() ([]byte, error) {
990	return json.Marshal(map[string]string{"Type": IntervalLogicalType{}.String()})
991}
992
993func (IntervalLogicalType) String() string {
994	return "Interval"
995}
996
997func (IntervalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
998	return ConvertedTypes.Interval, DecimalMetadata{}
999}
1000
1001func (IntervalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
1002	return c == ConvertedTypes.Interval && !dec.IsSet
1003}
1004
1005func (IntervalLogicalType) IsApplicable(t parquet.Type, tlen int32) bool {
1006	return t == parquet.Types.FixedLenByteArray && tlen == 12
1007}
1008
1009func (IntervalLogicalType) toThrift() *format.LogicalType {
1010	panic("no parquet IntervalLogicalType yet implemented")
1011}
1012
1013func (IntervalLogicalType) Equals(rhs LogicalType) bool {
1014	_, ok := rhs.(IntervalLogicalType)
1015	return ok
1016}
1017
1018type NullLogicalType struct{ baseLogicalType }
1019
1020func (NullLogicalType) SortOrder() SortOrder {
1021	return SortUNKNOWN
1022}
1023
1024func (NullLogicalType) MarshalJSON() ([]byte, error) {
1025	return json.Marshal(map[string]string{"Type": NullLogicalType{}.String()})
1026}
1027
1028func (NullLogicalType) String() string {
1029	return "Null"
1030}
1031
1032func (NullLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
1033	return ConvertedTypes.None, DecimalMetadata{}
1034}
1035
1036func (NullLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
1037	if dec.IsSet {
1038		return false
1039	}
1040	switch c {
1041	case ConvertedTypes.None, ConvertedTypes.NA:
1042		return true
1043	}
1044	return false
1045}
1046
1047func (NullLogicalType) IsApplicable(parquet.Type, int32) bool {
1048	return true
1049}
1050
1051func (NullLogicalType) toThrift() *format.LogicalType {
1052	return &format.LogicalType{UNKNOWN: format.NewNullType()}
1053}
1054
1055func (NullLogicalType) Equals(rhs LogicalType) bool {
1056	_, ok := rhs.(NullLogicalType)
1057	return ok
1058}
1059
1060type NoLogicalType struct{ baseLogicalType }
1061
1062func (NoLogicalType) SortOrder() SortOrder {
1063	return SortUNKNOWN
1064}
1065
1066func (NoLogicalType) MarshalJSON() ([]byte, error) {
1067	return json.Marshal(map[string]string{"Type": NoLogicalType{}.String()})
1068}
1069
1070func (NoLogicalType) IsSerialized() bool { return false }
1071
1072func (NoLogicalType) String() string {
1073	return "None"
1074}
1075
1076func (NoLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
1077	return ConvertedTypes.None, DecimalMetadata{}
1078}
1079
1080func (NoLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
1081	return c == ConvertedTypes.None && !dec.IsSet
1082}
1083
1084func (NoLogicalType) IsApplicable(parquet.Type, int32) bool {
1085	return true
1086}
1087
1088func (NoLogicalType) toThrift() *format.LogicalType {
1089	panic("cannot convert NoLogicalType to thrift")
1090}
1091
1092func (NoLogicalType) Equals(rhs LogicalType) bool {
1093	_, ok := rhs.(NoLogicalType)
1094	return ok
1095}
1096
1097func (NoLogicalType) IsNone() bool { return true }
1098