1// Code generated by statistics_types.gen.go.tmpl. DO NOT EDIT.
2
3// Licensed to the Apache Software Foundation (ASF) under one
4// or more contributor license agreements.  See the NOTICE file
5// distributed with this work for additional information
6// regarding copyright ownership.  The ASF licenses this file
7// to you under the Apache License, Version 2.0 (the
8// "License"); you may not use this file except in compliance
9// with the License.  You may obtain a copy of the License at
10//
11// http://www.apache.org/licenses/LICENSE-2.0
12//
13// Unless required by applicable law or agreed to in writing, software
14// distributed under the License is distributed on an "AS IS" BASIS,
15// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16// See the License for the specific language governing permissions and
17// limitations under the License.
18
19package metadata
20
21import (
22	"math"
23
24	"github.com/apache/arrow/go/v6/arrow"
25	"github.com/apache/arrow/go/v6/arrow/memory"
26	"github.com/apache/arrow/go/v6/parquet"
27	"github.com/apache/arrow/go/v6/parquet/internal/encoding"
28	"github.com/apache/arrow/go/v6/parquet/internal/utils"
29	"github.com/apache/arrow/go/v6/parquet/schema"
30	"golang.org/x/xerrors"
31)
32
33type minmaxPairInt32 [2]int32
34
35// Int32Statistics is the typed interface for managing stats for a column
36// of Int32 type.
37type Int32Statistics struct {
38	statistics
39	min int32
40	max int32
41
42	bitSetReader utils.SetBitRunReader
43}
44
45// NewInt32Statistics constructs an appropriate stat object type using the
46// given column descriptor and allocator.
47//
48// Panics if the physical type of descr is not parquet.Type.Int32
49func NewInt32Statistics(descr *schema.Column, mem memory.Allocator) *Int32Statistics {
50	if descr.PhysicalType() != parquet.Types.Int32 {
51		panic(xerrors.Errorf("parquet: invalid type %s for constructing a Int32 stat object", descr.PhysicalType()))
52	}
53
54	return &Int32Statistics{
55		statistics: statistics{
56			descr:            descr,
57			hasNullCount:     true,
58			hasDistinctCount: true,
59			order:            descr.SortOrder(),
60			encoder:          encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem),
61			mem:              mem,
62		},
63	}
64}
65
66// NewInt32StatisticsFromEncoded will construct a propertly typed statistics object
67// initializing it with the provided information.
68func NewInt32StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Int32Statistics {
69	ret := NewInt32Statistics(descr, mem)
70	ret.nvalues += nvalues
71	if encoded.IsSetNullCount() {
72		ret.incNulls(encoded.GetNullCount())
73	}
74	if encoded.IsSetDistinctCount() {
75		ret.incDistinct(encoded.GetDistinctCount())
76	}
77
78	encodedMin := encoded.GetMin()
79	if encodedMin != nil && len(encodedMin) > 0 {
80		ret.min = ret.plainDecode(encodedMin)
81	}
82	encodedMax := encoded.GetMax()
83	if encodedMax != nil && len(encodedMax) > 0 {
84		ret.max = ret.plainDecode(encodedMax)
85	}
86	ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin()
87	return ret
88}
89
90func (s *Int32Statistics) plainEncode(src int32) []byte {
91	s.encoder.(encoding.Int32Encoder).Put([]int32{src})
92	buf, err := s.encoder.FlushValues()
93	if err != nil {
94		panic(err) // recovered by Encode
95	}
96	defer buf.Release()
97
98	out := make([]byte, buf.Len())
99	copy(out, buf.Bytes())
100	return out
101}
102
103func (s *Int32Statistics) plainDecode(src []byte) int32 {
104	var buf [1]int32
105
106	decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem)
107	decoder.SetData(1, src)
108	decoder.(encoding.Int32Decoder).Decode(buf[:])
109	return buf[0]
110}
111
112func (s *Int32Statistics) minval(a, b int32) int32 {
113	if s.less(a, b) {
114		return a
115	}
116	return b
117}
118
119func (s *Int32Statistics) maxval(a, b int32) int32 {
120	if s.less(a, b) {
121		return b
122	}
123	return a
124}
125
126// MinMaxEqual returns true if both stat objects have the same Min and Max values
127func (s *Int32Statistics) MinMaxEqual(rhs *Int32Statistics) bool {
128	return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max)
129}
130
131// Equals returns true only if both objects are the same type, have the same min and
132// max values, null count, distinct count and number of values.
133func (s *Int32Statistics) Equals(other TypedStatistics) bool {
134	if s.Type() != other.Type() {
135		return false
136	}
137	rhs, ok := other.(*Int32Statistics)
138	if !ok {
139		return false
140	}
141
142	if s.HasMinMax() != rhs.HasMinMax() {
143		return false
144	}
145	return (s.hasMinMax && s.MinMaxEqual(rhs)) &&
146		s.NullCount() == rhs.NullCount() &&
147		s.DistinctCount() == rhs.DistinctCount() &&
148		s.NumValues() == rhs.NumValues()
149}
150
151func (s *Int32Statistics) getMinMax(values []int32) (min, max int32) {
152	if s.order == schema.SortSIGNED {
153		min, max = utils.GetMinMaxInt32(values)
154	} else {
155		umin, umax := utils.GetMinMaxUint32(arrow.Uint32Traits.CastFromBytes(arrow.Int32Traits.CastToBytes(values)))
156		min, max = int32(umin), int32(umax)
157	}
158	return
159}
160
161func (s *Int32Statistics) getMinMaxSpaced(values []int32, validBits []byte, validBitsOffset int64) (min, max int32) {
162	min = s.defaultMin()
163	max = s.defaultMax()
164	var fn func([]int32) (int32, int32)
165	if s.order == schema.SortSIGNED {
166		fn = utils.GetMinMaxInt32
167	} else {
168		fn = func(v []int32) (int32, int32) {
169			umin, umax := utils.GetMinMaxUint32(arrow.Uint32Traits.CastFromBytes(arrow.Int32Traits.CastToBytes(values)))
170			return int32(umin), int32(umax)
171		}
172	}
173
174	if s.bitSetReader == nil {
175		s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values)))
176	} else {
177		s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values)))
178	}
179
180	for {
181		run := s.bitSetReader.NextRun()
182		if run.Length == 0 {
183			break
184		}
185		localMin, localMax := fn(values[int(run.Pos):int(run.Pos+run.Length)])
186		if min > localMin {
187			min = localMin
188		}
189		if max < localMax {
190			max = localMax
191		}
192	}
193	return
194}
195
196func (s *Int32Statistics) Min() int32 { return s.min }
197func (s *Int32Statistics) Max() int32 { return s.max }
198
199// Merge merges the stats from other into this stat object, updating
200// the null count, distinct count, number of values and the min/max if
201// appropriate.
202func (s *Int32Statistics) Merge(other TypedStatistics) {
203	rhs, ok := other.(*Int32Statistics)
204	if !ok {
205		panic("incompatible stat type merge")
206	}
207
208	s.statistics.merge(rhs)
209	if rhs.HasMinMax() {
210		s.SetMinMax(rhs.Min(), rhs.Max())
211	}
212}
213
214// Update is used to add more values to the current stat object, finding the
215// min and max values etc.
216func (s *Int32Statistics) Update(values []int32, numNull int64) {
217	s.incNulls(numNull)
218	s.nvalues += int64(len(values))
219
220	if len(values) == 0 {
221		return
222	}
223
224	s.SetMinMax(s.getMinMax(values))
225}
226
227// UpdateSpaced is just like Update, but for spaced values using validBits to determine
228// and skip null values.
229func (s *Int32Statistics) UpdateSpaced(values []int32, validBits []byte, validBitsOffset, numNull int64) {
230	s.incNulls(numNull)
231	notnull := int64(len(values)) - numNull
232	s.nvalues += notnull
233
234	if notnull == 0 {
235		return
236	}
237
238	s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset))
239}
240
241// SetMinMax updates the min and max values only if they are not currently set
242// or if argMin is less than the current min / argMax is greater than the current max
243func (s *Int32Statistics) SetMinMax(argMin, argMax int32) {
244	maybeMinMax := s.cleanStat([2]int32{argMin, argMax})
245	if maybeMinMax == nil {
246		return
247	}
248
249	min := (*maybeMinMax)[0]
250	max := (*maybeMinMax)[1]
251
252	if !s.hasMinMax {
253		s.hasMinMax = true
254		s.min = min
255		s.max = max
256	} else {
257		if !s.less(s.min, min) {
258			s.min = min
259		}
260		if s.less(s.max, max) {
261			s.max = max
262		}
263	}
264}
265
266// EncodeMin returns the encoded min value with plain encoding.
267//
268// ByteArray stats do not include the length in the encoding.
269func (s *Int32Statistics) EncodeMin() []byte {
270	if s.HasMinMax() {
271		return s.plainEncode(s.min)
272	}
273	return nil
274}
275
276// EncodeMax returns the current encoded max value with plain encoding
277//
278// ByteArray stats do not include the length in the encoding
279func (s *Int32Statistics) EncodeMax() []byte {
280	if s.HasMinMax() {
281		return s.plainEncode(s.max)
282	}
283	return nil
284}
285
286// Encode returns a populated EncodedStatistics object
287func (s *Int32Statistics) Encode() (enc EncodedStatistics, err error) {
288	defer func() {
289		if r := recover(); r != nil {
290			switch r := r.(type) {
291			case error:
292				err = r
293			case string:
294				err = xerrors.New(r)
295			default:
296				err = xerrors.Errorf("unknown error type thrown from panic: %v", r)
297			}
298		}
299	}()
300	if s.HasMinMax() {
301		enc.SetMax(s.EncodeMax())
302		enc.SetMin(s.EncodeMin())
303	}
304	if s.HasNullCount() {
305		enc.SetNullCount(s.NullCount())
306	}
307	if s.HasDistinctCount() {
308		enc.SetDistinctCount(s.DistinctCount())
309	}
310	return
311}
312
313type minmaxPairInt64 [2]int64
314
315// Int64Statistics is the typed interface for managing stats for a column
316// of Int64 type.
317type Int64Statistics struct {
318	statistics
319	min int64
320	max int64
321
322	bitSetReader utils.SetBitRunReader
323}
324
325// NewInt64Statistics constructs an appropriate stat object type using the
326// given column descriptor and allocator.
327//
328// Panics if the physical type of descr is not parquet.Type.Int64
329func NewInt64Statistics(descr *schema.Column, mem memory.Allocator) *Int64Statistics {
330	if descr.PhysicalType() != parquet.Types.Int64 {
331		panic(xerrors.Errorf("parquet: invalid type %s for constructing a Int64 stat object", descr.PhysicalType()))
332	}
333
334	return &Int64Statistics{
335		statistics: statistics{
336			descr:            descr,
337			hasNullCount:     true,
338			hasDistinctCount: true,
339			order:            descr.SortOrder(),
340			encoder:          encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem),
341			mem:              mem,
342		},
343	}
344}
345
346// NewInt64StatisticsFromEncoded will construct a propertly typed statistics object
347// initializing it with the provided information.
348func NewInt64StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Int64Statistics {
349	ret := NewInt64Statistics(descr, mem)
350	ret.nvalues += nvalues
351	if encoded.IsSetNullCount() {
352		ret.incNulls(encoded.GetNullCount())
353	}
354	if encoded.IsSetDistinctCount() {
355		ret.incDistinct(encoded.GetDistinctCount())
356	}
357
358	encodedMin := encoded.GetMin()
359	if encodedMin != nil && len(encodedMin) > 0 {
360		ret.min = ret.plainDecode(encodedMin)
361	}
362	encodedMax := encoded.GetMax()
363	if encodedMax != nil && len(encodedMax) > 0 {
364		ret.max = ret.plainDecode(encodedMax)
365	}
366	ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin()
367	return ret
368}
369
370func (s *Int64Statistics) plainEncode(src int64) []byte {
371	s.encoder.(encoding.Int64Encoder).Put([]int64{src})
372	buf, err := s.encoder.FlushValues()
373	if err != nil {
374		panic(err) // recovered by Encode
375	}
376	defer buf.Release()
377
378	out := make([]byte, buf.Len())
379	copy(out, buf.Bytes())
380	return out
381}
382
383func (s *Int64Statistics) plainDecode(src []byte) int64 {
384	var buf [1]int64
385
386	decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem)
387	decoder.SetData(1, src)
388	decoder.(encoding.Int64Decoder).Decode(buf[:])
389	return buf[0]
390}
391
392func (s *Int64Statistics) minval(a, b int64) int64 {
393	if s.less(a, b) {
394		return a
395	}
396	return b
397}
398
399func (s *Int64Statistics) maxval(a, b int64) int64 {
400	if s.less(a, b) {
401		return b
402	}
403	return a
404}
405
406// MinMaxEqual returns true if both stat objects have the same Min and Max values
407func (s *Int64Statistics) MinMaxEqual(rhs *Int64Statistics) bool {
408	return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max)
409}
410
411// Equals returns true only if both objects are the same type, have the same min and
412// max values, null count, distinct count and number of values.
413func (s *Int64Statistics) Equals(other TypedStatistics) bool {
414	if s.Type() != other.Type() {
415		return false
416	}
417	rhs, ok := other.(*Int64Statistics)
418	if !ok {
419		return false
420	}
421
422	if s.HasMinMax() != rhs.HasMinMax() {
423		return false
424	}
425	return (s.hasMinMax && s.MinMaxEqual(rhs)) &&
426		s.NullCount() == rhs.NullCount() &&
427		s.DistinctCount() == rhs.DistinctCount() &&
428		s.NumValues() == rhs.NumValues()
429}
430
431func (s *Int64Statistics) getMinMax(values []int64) (min, max int64) {
432	if s.order == schema.SortSIGNED {
433		min, max = utils.GetMinMaxInt64(values)
434	} else {
435		umin, umax := utils.GetMinMaxUint64(arrow.Uint64Traits.CastFromBytes(arrow.Int64Traits.CastToBytes(values)))
436		min, max = int64(umin), int64(umax)
437	}
438	return
439}
440
441func (s *Int64Statistics) getMinMaxSpaced(values []int64, validBits []byte, validBitsOffset int64) (min, max int64) {
442	min = s.defaultMin()
443	max = s.defaultMax()
444	var fn func([]int64) (int64, int64)
445	if s.order == schema.SortSIGNED {
446		fn = utils.GetMinMaxInt64
447	} else {
448		fn = func(v []int64) (int64, int64) {
449			umin, umax := utils.GetMinMaxUint64(arrow.Uint64Traits.CastFromBytes(arrow.Int64Traits.CastToBytes(values)))
450			return int64(umin), int64(umax)
451		}
452	}
453
454	if s.bitSetReader == nil {
455		s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values)))
456	} else {
457		s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values)))
458	}
459
460	for {
461		run := s.bitSetReader.NextRun()
462		if run.Length == 0 {
463			break
464		}
465		localMin, localMax := fn(values[int(run.Pos):int(run.Pos+run.Length)])
466		if min > localMin {
467			min = localMin
468		}
469		if max < localMax {
470			max = localMax
471		}
472	}
473	return
474}
475
476func (s *Int64Statistics) Min() int64 { return s.min }
477func (s *Int64Statistics) Max() int64 { return s.max }
478
479// Merge merges the stats from other into this stat object, updating
480// the null count, distinct count, number of values and the min/max if
481// appropriate.
482func (s *Int64Statistics) Merge(other TypedStatistics) {
483	rhs, ok := other.(*Int64Statistics)
484	if !ok {
485		panic("incompatible stat type merge")
486	}
487
488	s.statistics.merge(rhs)
489	if rhs.HasMinMax() {
490		s.SetMinMax(rhs.Min(), rhs.Max())
491	}
492}
493
494// Update is used to add more values to the current stat object, finding the
495// min and max values etc.
496func (s *Int64Statistics) Update(values []int64, numNull int64) {
497	s.incNulls(numNull)
498	s.nvalues += int64(len(values))
499
500	if len(values) == 0 {
501		return
502	}
503
504	s.SetMinMax(s.getMinMax(values))
505}
506
507// UpdateSpaced is just like Update, but for spaced values using validBits to determine
508// and skip null values.
509func (s *Int64Statistics) UpdateSpaced(values []int64, validBits []byte, validBitsOffset, numNull int64) {
510	s.incNulls(numNull)
511	notnull := int64(len(values)) - numNull
512	s.nvalues += notnull
513
514	if notnull == 0 {
515		return
516	}
517
518	s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset))
519}
520
521// SetMinMax updates the min and max values only if they are not currently set
522// or if argMin is less than the current min / argMax is greater than the current max
523func (s *Int64Statistics) SetMinMax(argMin, argMax int64) {
524	maybeMinMax := s.cleanStat([2]int64{argMin, argMax})
525	if maybeMinMax == nil {
526		return
527	}
528
529	min := (*maybeMinMax)[0]
530	max := (*maybeMinMax)[1]
531
532	if !s.hasMinMax {
533		s.hasMinMax = true
534		s.min = min
535		s.max = max
536	} else {
537		if !s.less(s.min, min) {
538			s.min = min
539		}
540		if s.less(s.max, max) {
541			s.max = max
542		}
543	}
544}
545
546// EncodeMin returns the encoded min value with plain encoding.
547//
548// ByteArray stats do not include the length in the encoding.
549func (s *Int64Statistics) EncodeMin() []byte {
550	if s.HasMinMax() {
551		return s.plainEncode(s.min)
552	}
553	return nil
554}
555
556// EncodeMax returns the current encoded max value with plain encoding
557//
558// ByteArray stats do not include the length in the encoding
559func (s *Int64Statistics) EncodeMax() []byte {
560	if s.HasMinMax() {
561		return s.plainEncode(s.max)
562	}
563	return nil
564}
565
566// Encode returns a populated EncodedStatistics object
567func (s *Int64Statistics) Encode() (enc EncodedStatistics, err error) {
568	defer func() {
569		if r := recover(); r != nil {
570			switch r := r.(type) {
571			case error:
572				err = r
573			case string:
574				err = xerrors.New(r)
575			default:
576				err = xerrors.Errorf("unknown error type thrown from panic: %v", r)
577			}
578		}
579	}()
580	if s.HasMinMax() {
581		enc.SetMax(s.EncodeMax())
582		enc.SetMin(s.EncodeMin())
583	}
584	if s.HasNullCount() {
585		enc.SetNullCount(s.NullCount())
586	}
587	if s.HasDistinctCount() {
588		enc.SetDistinctCount(s.DistinctCount())
589	}
590	return
591}
592
593type minmaxPairInt96 [2]parquet.Int96
594
595// Int96Statistics is the typed interface for managing stats for a column
596// of Int96 type.
597type Int96Statistics struct {
598	statistics
599	min parquet.Int96
600	max parquet.Int96
601
602	bitSetReader utils.SetBitRunReader
603}
604
605// NewInt96Statistics constructs an appropriate stat object type using the
606// given column descriptor and allocator.
607//
608// Panics if the physical type of descr is not parquet.Type.Int96
609func NewInt96Statistics(descr *schema.Column, mem memory.Allocator) *Int96Statistics {
610	if descr.PhysicalType() != parquet.Types.Int96 {
611		panic(xerrors.Errorf("parquet: invalid type %s for constructing a Int96 stat object", descr.PhysicalType()))
612	}
613
614	return &Int96Statistics{
615		statistics: statistics{
616			descr:            descr,
617			hasNullCount:     true,
618			hasDistinctCount: true,
619			order:            descr.SortOrder(),
620			encoder:          encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem),
621			mem:              mem,
622		},
623	}
624}
625
626// NewInt96StatisticsFromEncoded will construct a propertly typed statistics object
627// initializing it with the provided information.
628func NewInt96StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Int96Statistics {
629	ret := NewInt96Statistics(descr, mem)
630	ret.nvalues += nvalues
631	if encoded.IsSetNullCount() {
632		ret.incNulls(encoded.GetNullCount())
633	}
634	if encoded.IsSetDistinctCount() {
635		ret.incDistinct(encoded.GetDistinctCount())
636	}
637
638	encodedMin := encoded.GetMin()
639	if encodedMin != nil && len(encodedMin) > 0 {
640		ret.min = ret.plainDecode(encodedMin)
641	}
642	encodedMax := encoded.GetMax()
643	if encodedMax != nil && len(encodedMax) > 0 {
644		ret.max = ret.plainDecode(encodedMax)
645	}
646	ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin()
647	return ret
648}
649
650func (s *Int96Statistics) plainEncode(src parquet.Int96) []byte {
651	s.encoder.(encoding.Int96Encoder).Put([]parquet.Int96{src})
652	buf, err := s.encoder.FlushValues()
653	if err != nil {
654		panic(err) // recovered by Encode
655	}
656	defer buf.Release()
657
658	out := make([]byte, buf.Len())
659	copy(out, buf.Bytes())
660	return out
661}
662
663func (s *Int96Statistics) plainDecode(src []byte) parquet.Int96 {
664	var buf [1]parquet.Int96
665
666	decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem)
667	decoder.SetData(1, src)
668	decoder.(encoding.Int96Decoder).Decode(buf[:])
669	return buf[0]
670}
671
672func (s *Int96Statistics) minval(a, b parquet.Int96) parquet.Int96 {
673	if s.less(a, b) {
674		return a
675	}
676	return b
677}
678
679func (s *Int96Statistics) maxval(a, b parquet.Int96) parquet.Int96 {
680	if s.less(a, b) {
681		return b
682	}
683	return a
684}
685
686// MinMaxEqual returns true if both stat objects have the same Min and Max values
687func (s *Int96Statistics) MinMaxEqual(rhs *Int96Statistics) bool {
688	return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max)
689}
690
691// Equals returns true only if both objects are the same type, have the same min and
692// max values, null count, distinct count and number of values.
693func (s *Int96Statistics) Equals(other TypedStatistics) bool {
694	if s.Type() != other.Type() {
695		return false
696	}
697	rhs, ok := other.(*Int96Statistics)
698	if !ok {
699		return false
700	}
701
702	if s.HasMinMax() != rhs.HasMinMax() {
703		return false
704	}
705	return (s.hasMinMax && s.MinMaxEqual(rhs)) &&
706		s.NullCount() == rhs.NullCount() &&
707		s.DistinctCount() == rhs.DistinctCount() &&
708		s.NumValues() == rhs.NumValues()
709}
710
711func (s *Int96Statistics) getMinMax(values []parquet.Int96) (min, max parquet.Int96) {
712	defMin := s.defaultMin()
713	defMax := s.defaultMax()
714
715	min = defMin
716	max = defMax
717
718	for _, v := range values {
719		min = s.minval(min, v)
720		max = s.maxval(max, v)
721	}
722	return
723}
724
725func (s *Int96Statistics) getMinMaxSpaced(values []parquet.Int96, validBits []byte, validBitsOffset int64) (min, max parquet.Int96) {
726	min = s.defaultMin()
727	max = s.defaultMax()
728
729	if s.bitSetReader == nil {
730		s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values)))
731	} else {
732		s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values)))
733	}
734
735	for {
736		run := s.bitSetReader.NextRun()
737		if run.Length == 0 {
738			break
739		}
740		for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] {
741			min = s.minval(min, v)
742			max = s.maxval(max, v)
743		}
744	}
745	return
746}
747
748func (s *Int96Statistics) Min() parquet.Int96 { return s.min }
749func (s *Int96Statistics) Max() parquet.Int96 { return s.max }
750
751// Merge merges the stats from other into this stat object, updating
752// the null count, distinct count, number of values and the min/max if
753// appropriate.
754func (s *Int96Statistics) Merge(other TypedStatistics) {
755	rhs, ok := other.(*Int96Statistics)
756	if !ok {
757		panic("incompatible stat type merge")
758	}
759
760	s.statistics.merge(rhs)
761	if rhs.HasMinMax() {
762		s.SetMinMax(rhs.Min(), rhs.Max())
763	}
764}
765
766// Update is used to add more values to the current stat object, finding the
767// min and max values etc.
768func (s *Int96Statistics) Update(values []parquet.Int96, numNull int64) {
769	s.incNulls(numNull)
770	s.nvalues += int64(len(values))
771
772	if len(values) == 0 {
773		return
774	}
775
776	s.SetMinMax(s.getMinMax(values))
777}
778
779// UpdateSpaced is just like Update, but for spaced values using validBits to determine
780// and skip null values.
781func (s *Int96Statistics) UpdateSpaced(values []parquet.Int96, validBits []byte, validBitsOffset, numNull int64) {
782	s.incNulls(numNull)
783	notnull := int64(len(values)) - numNull
784	s.nvalues += notnull
785
786	if notnull == 0 {
787		return
788	}
789
790	s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset))
791}
792
793// SetMinMax updates the min and max values only if they are not currently set
794// or if argMin is less than the current min / argMax is greater than the current max
795func (s *Int96Statistics) SetMinMax(argMin, argMax parquet.Int96) {
796	maybeMinMax := s.cleanStat([2]parquet.Int96{argMin, argMax})
797	if maybeMinMax == nil {
798		return
799	}
800
801	min := (*maybeMinMax)[0]
802	max := (*maybeMinMax)[1]
803
804	if !s.hasMinMax {
805		s.hasMinMax = true
806		s.min = min
807		s.max = max
808	} else {
809		if !s.less(s.min, min) {
810			s.min = min
811		}
812		if s.less(s.max, max) {
813			s.max = max
814		}
815	}
816}
817
818// EncodeMin returns the encoded min value with plain encoding.
819//
820// ByteArray stats do not include the length in the encoding.
821func (s *Int96Statistics) EncodeMin() []byte {
822	if s.HasMinMax() {
823		return s.plainEncode(s.min)
824	}
825	return nil
826}
827
828// EncodeMax returns the current encoded max value with plain encoding
829//
830// ByteArray stats do not include the length in the encoding
831func (s *Int96Statistics) EncodeMax() []byte {
832	if s.HasMinMax() {
833		return s.plainEncode(s.max)
834	}
835	return nil
836}
837
838// Encode returns a populated EncodedStatistics object
839func (s *Int96Statistics) Encode() (enc EncodedStatistics, err error) {
840	defer func() {
841		if r := recover(); r != nil {
842			switch r := r.(type) {
843			case error:
844				err = r
845			case string:
846				err = xerrors.New(r)
847			default:
848				err = xerrors.Errorf("unknown error type thrown from panic: %v", r)
849			}
850		}
851	}()
852	if s.HasMinMax() {
853		enc.SetMax(s.EncodeMax())
854		enc.SetMin(s.EncodeMin())
855	}
856	if s.HasNullCount() {
857		enc.SetNullCount(s.NullCount())
858	}
859	if s.HasDistinctCount() {
860		enc.SetDistinctCount(s.DistinctCount())
861	}
862	return
863}
864
865type minmaxPairFloat32 [2]float32
866
867// Float32Statistics is the typed interface for managing stats for a column
868// of Float32 type.
869type Float32Statistics struct {
870	statistics
871	min float32
872	max float32
873
874	bitSetReader utils.SetBitRunReader
875}
876
877// NewFloat32Statistics constructs an appropriate stat object type using the
878// given column descriptor and allocator.
879//
880// Panics if the physical type of descr is not parquet.Type.Float
881func NewFloat32Statistics(descr *schema.Column, mem memory.Allocator) *Float32Statistics {
882	if descr.PhysicalType() != parquet.Types.Float {
883		panic(xerrors.Errorf("parquet: invalid type %s for constructing a Float32 stat object", descr.PhysicalType()))
884	}
885
886	return &Float32Statistics{
887		statistics: statistics{
888			descr:            descr,
889			hasNullCount:     true,
890			hasDistinctCount: true,
891			order:            descr.SortOrder(),
892			encoder:          encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem),
893			mem:              mem,
894		},
895	}
896}
897
898// NewFloat32StatisticsFromEncoded will construct a propertly typed statistics object
899// initializing it with the provided information.
900func NewFloat32StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Float32Statistics {
901	ret := NewFloat32Statistics(descr, mem)
902	ret.nvalues += nvalues
903	if encoded.IsSetNullCount() {
904		ret.incNulls(encoded.GetNullCount())
905	}
906	if encoded.IsSetDistinctCount() {
907		ret.incDistinct(encoded.GetDistinctCount())
908	}
909
910	encodedMin := encoded.GetMin()
911	if encodedMin != nil && len(encodedMin) > 0 {
912		ret.min = ret.plainDecode(encodedMin)
913	}
914	encodedMax := encoded.GetMax()
915	if encodedMax != nil && len(encodedMax) > 0 {
916		ret.max = ret.plainDecode(encodedMax)
917	}
918	ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin()
919	return ret
920}
921
922func (s *Float32Statistics) plainEncode(src float32) []byte {
923	s.encoder.(encoding.Float32Encoder).Put([]float32{src})
924	buf, err := s.encoder.FlushValues()
925	if err != nil {
926		panic(err) // recovered by Encode
927	}
928	defer buf.Release()
929
930	out := make([]byte, buf.Len())
931	copy(out, buf.Bytes())
932	return out
933}
934
935func (s *Float32Statistics) plainDecode(src []byte) float32 {
936	var buf [1]float32
937
938	decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem)
939	decoder.SetData(1, src)
940	decoder.(encoding.Float32Decoder).Decode(buf[:])
941	return buf[0]
942}
943
944func (s *Float32Statistics) minval(a, b float32) float32 {
945	if s.less(a, b) {
946		return a
947	}
948	return b
949}
950
951func (s *Float32Statistics) maxval(a, b float32) float32 {
952	if s.less(a, b) {
953		return b
954	}
955	return a
956}
957
958// MinMaxEqual returns true if both stat objects have the same Min and Max values
959func (s *Float32Statistics) MinMaxEqual(rhs *Float32Statistics) bool {
960	return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max)
961}
962
963// Equals returns true only if both objects are the same type, have the same min and
964// max values, null count, distinct count and number of values.
965func (s *Float32Statistics) Equals(other TypedStatistics) bool {
966	if s.Type() != other.Type() {
967		return false
968	}
969	rhs, ok := other.(*Float32Statistics)
970	if !ok {
971		return false
972	}
973
974	if s.HasMinMax() != rhs.HasMinMax() {
975		return false
976	}
977	return (s.hasMinMax && s.MinMaxEqual(rhs)) &&
978		s.NullCount() == rhs.NullCount() &&
979		s.DistinctCount() == rhs.DistinctCount() &&
980		s.NumValues() == rhs.NumValues()
981}
982
983func (s *Float32Statistics) coalesce(val, fallback float32) float32 {
984	if math.IsNaN(float64(val)) {
985		return fallback
986	}
987	return val
988}
989
990func (s *Float32Statistics) getMinMax(values []float32) (min, max float32) {
991	defMin := s.defaultMin()
992	defMax := s.defaultMax()
993
994	min = defMin
995	max = defMax
996
997	for _, v := range values {
998		min = s.minval(min, s.coalesce(v, defMin))
999		max = s.maxval(max, s.coalesce(v, defMax))
1000	}
1001	return
1002}
1003
1004func (s *Float32Statistics) getMinMaxSpaced(values []float32, validBits []byte, validBitsOffset int64) (min, max float32) {
1005	min = s.defaultMin()
1006	max = s.defaultMax()
1007
1008	if s.bitSetReader == nil {
1009		s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values)))
1010	} else {
1011		s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values)))
1012	}
1013
1014	for {
1015		run := s.bitSetReader.NextRun()
1016		if run.Length == 0 {
1017			break
1018		}
1019		for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] {
1020			min = s.minval(min, coalesce(v, s.defaultMin()).(float32))
1021			max = s.maxval(max, coalesce(v, s.defaultMax()).(float32))
1022		}
1023	}
1024	return
1025}
1026
1027func (s *Float32Statistics) Min() float32 { return s.min }
1028func (s *Float32Statistics) Max() float32 { return s.max }
1029
1030// Merge merges the stats from other into this stat object, updating
1031// the null count, distinct count, number of values and the min/max if
1032// appropriate.
1033func (s *Float32Statistics) Merge(other TypedStatistics) {
1034	rhs, ok := other.(*Float32Statistics)
1035	if !ok {
1036		panic("incompatible stat type merge")
1037	}
1038
1039	s.statistics.merge(rhs)
1040	if rhs.HasMinMax() {
1041		s.SetMinMax(rhs.Min(), rhs.Max())
1042	}
1043}
1044
1045// Update is used to add more values to the current stat object, finding the
1046// min and max values etc.
1047func (s *Float32Statistics) Update(values []float32, numNull int64) {
1048	s.incNulls(numNull)
1049	s.nvalues += int64(len(values))
1050
1051	if len(values) == 0 {
1052		return
1053	}
1054
1055	s.SetMinMax(s.getMinMax(values))
1056}
1057
1058// UpdateSpaced is just like Update, but for spaced values using validBits to determine
1059// and skip null values.
1060func (s *Float32Statistics) UpdateSpaced(values []float32, validBits []byte, validBitsOffset, numNull int64) {
1061	s.incNulls(numNull)
1062	notnull := int64(len(values)) - numNull
1063	s.nvalues += notnull
1064
1065	if notnull == 0 {
1066		return
1067	}
1068
1069	s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset))
1070}
1071
1072// SetMinMax updates the min and max values only if they are not currently set
1073// or if argMin is less than the current min / argMax is greater than the current max
1074func (s *Float32Statistics) SetMinMax(argMin, argMax float32) {
1075	maybeMinMax := s.cleanStat([2]float32{argMin, argMax})
1076	if maybeMinMax == nil {
1077		return
1078	}
1079
1080	min := (*maybeMinMax)[0]
1081	max := (*maybeMinMax)[1]
1082
1083	if !s.hasMinMax {
1084		s.hasMinMax = true
1085		s.min = min
1086		s.max = max
1087	} else {
1088		if !s.less(s.min, min) {
1089			s.min = min
1090		}
1091		if s.less(s.max, max) {
1092			s.max = max
1093		}
1094	}
1095}
1096
1097// EncodeMin returns the encoded min value with plain encoding.
1098//
1099// ByteArray stats do not include the length in the encoding.
1100func (s *Float32Statistics) EncodeMin() []byte {
1101	if s.HasMinMax() {
1102		return s.plainEncode(s.min)
1103	}
1104	return nil
1105}
1106
1107// EncodeMax returns the current encoded max value with plain encoding
1108//
1109// ByteArray stats do not include the length in the encoding
1110func (s *Float32Statistics) EncodeMax() []byte {
1111	if s.HasMinMax() {
1112		return s.plainEncode(s.max)
1113	}
1114	return nil
1115}
1116
1117// Encode returns a populated EncodedStatistics object
1118func (s *Float32Statistics) Encode() (enc EncodedStatistics, err error) {
1119	defer func() {
1120		if r := recover(); r != nil {
1121			switch r := r.(type) {
1122			case error:
1123				err = r
1124			case string:
1125				err = xerrors.New(r)
1126			default:
1127				err = xerrors.Errorf("unknown error type thrown from panic: %v", r)
1128			}
1129		}
1130	}()
1131	if s.HasMinMax() {
1132		enc.SetMax(s.EncodeMax())
1133		enc.SetMin(s.EncodeMin())
1134	}
1135	if s.HasNullCount() {
1136		enc.SetNullCount(s.NullCount())
1137	}
1138	if s.HasDistinctCount() {
1139		enc.SetDistinctCount(s.DistinctCount())
1140	}
1141	return
1142}
1143
1144type minmaxPairFloat64 [2]float64
1145
1146// Float64Statistics is the typed interface for managing stats for a column
1147// of Float64 type.
1148type Float64Statistics struct {
1149	statistics
1150	min float64
1151	max float64
1152
1153	bitSetReader utils.SetBitRunReader
1154}
1155
1156// NewFloat64Statistics constructs an appropriate stat object type using the
1157// given column descriptor and allocator.
1158//
1159// Panics if the physical type of descr is not parquet.Type.Double
1160func NewFloat64Statistics(descr *schema.Column, mem memory.Allocator) *Float64Statistics {
1161	if descr.PhysicalType() != parquet.Types.Double {
1162		panic(xerrors.Errorf("parquet: invalid type %s for constructing a Float64 stat object", descr.PhysicalType()))
1163	}
1164
1165	return &Float64Statistics{
1166		statistics: statistics{
1167			descr:            descr,
1168			hasNullCount:     true,
1169			hasDistinctCount: true,
1170			order:            descr.SortOrder(),
1171			encoder:          encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem),
1172			mem:              mem,
1173		},
1174	}
1175}
1176
1177// NewFloat64StatisticsFromEncoded will construct a propertly typed statistics object
1178// initializing it with the provided information.
1179func NewFloat64StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Float64Statistics {
1180	ret := NewFloat64Statistics(descr, mem)
1181	ret.nvalues += nvalues
1182	if encoded.IsSetNullCount() {
1183		ret.incNulls(encoded.GetNullCount())
1184	}
1185	if encoded.IsSetDistinctCount() {
1186		ret.incDistinct(encoded.GetDistinctCount())
1187	}
1188
1189	encodedMin := encoded.GetMin()
1190	if encodedMin != nil && len(encodedMin) > 0 {
1191		ret.min = ret.plainDecode(encodedMin)
1192	}
1193	encodedMax := encoded.GetMax()
1194	if encodedMax != nil && len(encodedMax) > 0 {
1195		ret.max = ret.plainDecode(encodedMax)
1196	}
1197	ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin()
1198	return ret
1199}
1200
1201func (s *Float64Statistics) plainEncode(src float64) []byte {
1202	s.encoder.(encoding.Float64Encoder).Put([]float64{src})
1203	buf, err := s.encoder.FlushValues()
1204	if err != nil {
1205		panic(err) // recovered by Encode
1206	}
1207	defer buf.Release()
1208
1209	out := make([]byte, buf.Len())
1210	copy(out, buf.Bytes())
1211	return out
1212}
1213
1214func (s *Float64Statistics) plainDecode(src []byte) float64 {
1215	var buf [1]float64
1216
1217	decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem)
1218	decoder.SetData(1, src)
1219	decoder.(encoding.Float64Decoder).Decode(buf[:])
1220	return buf[0]
1221}
1222
1223func (s *Float64Statistics) minval(a, b float64) float64 {
1224	if s.less(a, b) {
1225		return a
1226	}
1227	return b
1228}
1229
1230func (s *Float64Statistics) maxval(a, b float64) float64 {
1231	if s.less(a, b) {
1232		return b
1233	}
1234	return a
1235}
1236
1237// MinMaxEqual returns true if both stat objects have the same Min and Max values
1238func (s *Float64Statistics) MinMaxEqual(rhs *Float64Statistics) bool {
1239	return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max)
1240}
1241
1242// Equals returns true only if both objects are the same type, have the same min and
1243// max values, null count, distinct count and number of values.
1244func (s *Float64Statistics) Equals(other TypedStatistics) bool {
1245	if s.Type() != other.Type() {
1246		return false
1247	}
1248	rhs, ok := other.(*Float64Statistics)
1249	if !ok {
1250		return false
1251	}
1252
1253	if s.HasMinMax() != rhs.HasMinMax() {
1254		return false
1255	}
1256	return (s.hasMinMax && s.MinMaxEqual(rhs)) &&
1257		s.NullCount() == rhs.NullCount() &&
1258		s.DistinctCount() == rhs.DistinctCount() &&
1259		s.NumValues() == rhs.NumValues()
1260}
1261
1262func (s *Float64Statistics) coalesce(val, fallback float64) float64 {
1263	if math.IsNaN(float64(val)) {
1264		return fallback
1265	}
1266	return val
1267}
1268
1269func (s *Float64Statistics) getMinMax(values []float64) (min, max float64) {
1270	defMin := s.defaultMin()
1271	defMax := s.defaultMax()
1272
1273	min = defMin
1274	max = defMax
1275
1276	for _, v := range values {
1277		min = s.minval(min, s.coalesce(v, defMin))
1278		max = s.maxval(max, s.coalesce(v, defMax))
1279	}
1280	return
1281}
1282
1283func (s *Float64Statistics) getMinMaxSpaced(values []float64, validBits []byte, validBitsOffset int64) (min, max float64) {
1284	min = s.defaultMin()
1285	max = s.defaultMax()
1286
1287	if s.bitSetReader == nil {
1288		s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values)))
1289	} else {
1290		s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values)))
1291	}
1292
1293	for {
1294		run := s.bitSetReader.NextRun()
1295		if run.Length == 0 {
1296			break
1297		}
1298		for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] {
1299			min = s.minval(min, coalesce(v, s.defaultMin()).(float64))
1300			max = s.maxval(max, coalesce(v, s.defaultMax()).(float64))
1301		}
1302	}
1303	return
1304}
1305
1306func (s *Float64Statistics) Min() float64 { return s.min }
1307func (s *Float64Statistics) Max() float64 { return s.max }
1308
1309// Merge merges the stats from other into this stat object, updating
1310// the null count, distinct count, number of values and the min/max if
1311// appropriate.
1312func (s *Float64Statistics) Merge(other TypedStatistics) {
1313	rhs, ok := other.(*Float64Statistics)
1314	if !ok {
1315		panic("incompatible stat type merge")
1316	}
1317
1318	s.statistics.merge(rhs)
1319	if rhs.HasMinMax() {
1320		s.SetMinMax(rhs.Min(), rhs.Max())
1321	}
1322}
1323
1324// Update is used to add more values to the current stat object, finding the
1325// min and max values etc.
1326func (s *Float64Statistics) Update(values []float64, numNull int64) {
1327	s.incNulls(numNull)
1328	s.nvalues += int64(len(values))
1329
1330	if len(values) == 0 {
1331		return
1332	}
1333
1334	s.SetMinMax(s.getMinMax(values))
1335}
1336
1337// UpdateSpaced is just like Update, but for spaced values using validBits to determine
1338// and skip null values.
1339func (s *Float64Statistics) UpdateSpaced(values []float64, validBits []byte, validBitsOffset, numNull int64) {
1340	s.incNulls(numNull)
1341	notnull := int64(len(values)) - numNull
1342	s.nvalues += notnull
1343
1344	if notnull == 0 {
1345		return
1346	}
1347
1348	s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset))
1349}
1350
1351// SetMinMax updates the min and max values only if they are not currently set
1352// or if argMin is less than the current min / argMax is greater than the current max
1353func (s *Float64Statistics) SetMinMax(argMin, argMax float64) {
1354	maybeMinMax := s.cleanStat([2]float64{argMin, argMax})
1355	if maybeMinMax == nil {
1356		return
1357	}
1358
1359	min := (*maybeMinMax)[0]
1360	max := (*maybeMinMax)[1]
1361
1362	if !s.hasMinMax {
1363		s.hasMinMax = true
1364		s.min = min
1365		s.max = max
1366	} else {
1367		if !s.less(s.min, min) {
1368			s.min = min
1369		}
1370		if s.less(s.max, max) {
1371			s.max = max
1372		}
1373	}
1374}
1375
1376// EncodeMin returns the encoded min value with plain encoding.
1377//
1378// ByteArray stats do not include the length in the encoding.
1379func (s *Float64Statistics) EncodeMin() []byte {
1380	if s.HasMinMax() {
1381		return s.plainEncode(s.min)
1382	}
1383	return nil
1384}
1385
1386// EncodeMax returns the current encoded max value with plain encoding
1387//
1388// ByteArray stats do not include the length in the encoding
1389func (s *Float64Statistics) EncodeMax() []byte {
1390	if s.HasMinMax() {
1391		return s.plainEncode(s.max)
1392	}
1393	return nil
1394}
1395
1396// Encode returns a populated EncodedStatistics object
1397func (s *Float64Statistics) Encode() (enc EncodedStatistics, err error) {
1398	defer func() {
1399		if r := recover(); r != nil {
1400			switch r := r.(type) {
1401			case error:
1402				err = r
1403			case string:
1404				err = xerrors.New(r)
1405			default:
1406				err = xerrors.Errorf("unknown error type thrown from panic: %v", r)
1407			}
1408		}
1409	}()
1410	if s.HasMinMax() {
1411		enc.SetMax(s.EncodeMax())
1412		enc.SetMin(s.EncodeMin())
1413	}
1414	if s.HasNullCount() {
1415		enc.SetNullCount(s.NullCount())
1416	}
1417	if s.HasDistinctCount() {
1418		enc.SetDistinctCount(s.DistinctCount())
1419	}
1420	return
1421}
1422
1423type minmaxPairBoolean [2]bool
1424
1425// BooleanStatistics is the typed interface for managing stats for a column
1426// of Boolean type.
1427type BooleanStatistics struct {
1428	statistics
1429	min bool
1430	max bool
1431
1432	bitSetReader utils.SetBitRunReader
1433}
1434
1435// NewBooleanStatistics constructs an appropriate stat object type using the
1436// given column descriptor and allocator.
1437//
1438// Panics if the physical type of descr is not parquet.Type.Boolean
1439func NewBooleanStatistics(descr *schema.Column, mem memory.Allocator) *BooleanStatistics {
1440	if descr.PhysicalType() != parquet.Types.Boolean {
1441		panic(xerrors.Errorf("parquet: invalid type %s for constructing a Boolean stat object", descr.PhysicalType()))
1442	}
1443
1444	return &BooleanStatistics{
1445		statistics: statistics{
1446			descr:            descr,
1447			hasNullCount:     true,
1448			hasDistinctCount: true,
1449			order:            descr.SortOrder(),
1450			encoder:          encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem),
1451			mem:              mem,
1452		},
1453	}
1454}
1455
1456// NewBooleanStatisticsFromEncoded will construct a propertly typed statistics object
1457// initializing it with the provided information.
1458func NewBooleanStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *BooleanStatistics {
1459	ret := NewBooleanStatistics(descr, mem)
1460	ret.nvalues += nvalues
1461	if encoded.IsSetNullCount() {
1462		ret.incNulls(encoded.GetNullCount())
1463	}
1464	if encoded.IsSetDistinctCount() {
1465		ret.incDistinct(encoded.GetDistinctCount())
1466	}
1467
1468	encodedMin := encoded.GetMin()
1469	if encodedMin != nil && len(encodedMin) > 0 {
1470		ret.min = ret.plainDecode(encodedMin)
1471	}
1472	encodedMax := encoded.GetMax()
1473	if encodedMax != nil && len(encodedMax) > 0 {
1474		ret.max = ret.plainDecode(encodedMax)
1475	}
1476	ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin()
1477	return ret
1478}
1479
1480func (s *BooleanStatistics) plainEncode(src bool) []byte {
1481	s.encoder.(encoding.BooleanEncoder).Put([]bool{src})
1482	buf, err := s.encoder.FlushValues()
1483	if err != nil {
1484		panic(err) // recovered by Encode
1485	}
1486	defer buf.Release()
1487
1488	out := make([]byte, buf.Len())
1489	copy(out, buf.Bytes())
1490	return out
1491}
1492
1493func (s *BooleanStatistics) plainDecode(src []byte) bool {
1494	var buf [1]bool
1495
1496	decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem)
1497	decoder.SetData(1, src)
1498	decoder.(encoding.BooleanDecoder).Decode(buf[:])
1499	return buf[0]
1500}
1501
1502func (s *BooleanStatistics) minval(a, b bool) bool {
1503	if s.less(a, b) {
1504		return a
1505	}
1506	return b
1507}
1508
1509func (s *BooleanStatistics) maxval(a, b bool) bool {
1510	if s.less(a, b) {
1511		return b
1512	}
1513	return a
1514}
1515
1516// MinMaxEqual returns true if both stat objects have the same Min and Max values
1517func (s *BooleanStatistics) MinMaxEqual(rhs *BooleanStatistics) bool {
1518	return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max)
1519}
1520
1521// Equals returns true only if both objects are the same type, have the same min and
1522// max values, null count, distinct count and number of values.
1523func (s *BooleanStatistics) Equals(other TypedStatistics) bool {
1524	if s.Type() != other.Type() {
1525		return false
1526	}
1527	rhs, ok := other.(*BooleanStatistics)
1528	if !ok {
1529		return false
1530	}
1531
1532	if s.HasMinMax() != rhs.HasMinMax() {
1533		return false
1534	}
1535	return (s.hasMinMax && s.MinMaxEqual(rhs)) &&
1536		s.NullCount() == rhs.NullCount() &&
1537		s.DistinctCount() == rhs.DistinctCount() &&
1538		s.NumValues() == rhs.NumValues()
1539}
1540
1541func (s *BooleanStatistics) getMinMax(values []bool) (min, max bool) {
1542	defMin := s.defaultMin()
1543	defMax := s.defaultMax()
1544
1545	min = defMin
1546	max = defMax
1547
1548	for _, v := range values {
1549		min = s.minval(min, v)
1550		max = s.maxval(max, v)
1551	}
1552	return
1553}
1554
1555func (s *BooleanStatistics) getMinMaxSpaced(values []bool, validBits []byte, validBitsOffset int64) (min, max bool) {
1556	min = s.defaultMin()
1557	max = s.defaultMax()
1558
1559	if s.bitSetReader == nil {
1560		s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values)))
1561	} else {
1562		s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values)))
1563	}
1564
1565	for {
1566		run := s.bitSetReader.NextRun()
1567		if run.Length == 0 {
1568			break
1569		}
1570		for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] {
1571			min = s.minval(min, v)
1572			max = s.maxval(max, v)
1573		}
1574	}
1575	return
1576}
1577
1578func (s *BooleanStatistics) Min() bool { return s.min }
1579func (s *BooleanStatistics) Max() bool { return s.max }
1580
1581// Merge merges the stats from other into this stat object, updating
1582// the null count, distinct count, number of values and the min/max if
1583// appropriate.
1584func (s *BooleanStatistics) Merge(other TypedStatistics) {
1585	rhs, ok := other.(*BooleanStatistics)
1586	if !ok {
1587		panic("incompatible stat type merge")
1588	}
1589
1590	s.statistics.merge(rhs)
1591	if rhs.HasMinMax() {
1592		s.SetMinMax(rhs.Min(), rhs.Max())
1593	}
1594}
1595
1596// Update is used to add more values to the current stat object, finding the
1597// min and max values etc.
1598func (s *BooleanStatistics) Update(values []bool, numNull int64) {
1599	s.incNulls(numNull)
1600	s.nvalues += int64(len(values))
1601
1602	if len(values) == 0 {
1603		return
1604	}
1605
1606	s.SetMinMax(s.getMinMax(values))
1607}
1608
1609// UpdateSpaced is just like Update, but for spaced values using validBits to determine
1610// and skip null values.
1611func (s *BooleanStatistics) UpdateSpaced(values []bool, validBits []byte, validBitsOffset, numNull int64) {
1612	s.incNulls(numNull)
1613	notnull := int64(len(values)) - numNull
1614	s.nvalues += notnull
1615
1616	if notnull == 0 {
1617		return
1618	}
1619
1620	s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset))
1621}
1622
1623// SetMinMax updates the min and max values only if they are not currently set
1624// or if argMin is less than the current min / argMax is greater than the current max
1625func (s *BooleanStatistics) SetMinMax(argMin, argMax bool) {
1626	maybeMinMax := s.cleanStat([2]bool{argMin, argMax})
1627	if maybeMinMax == nil {
1628		return
1629	}
1630
1631	min := (*maybeMinMax)[0]
1632	max := (*maybeMinMax)[1]
1633
1634	if !s.hasMinMax {
1635		s.hasMinMax = true
1636		s.min = min
1637		s.max = max
1638	} else {
1639		if !s.less(s.min, min) {
1640			s.min = min
1641		}
1642		if s.less(s.max, max) {
1643			s.max = max
1644		}
1645	}
1646}
1647
1648// EncodeMin returns the encoded min value with plain encoding.
1649//
1650// ByteArray stats do not include the length in the encoding.
1651func (s *BooleanStatistics) EncodeMin() []byte {
1652	if s.HasMinMax() {
1653		return s.plainEncode(s.min)
1654	}
1655	return nil
1656}
1657
1658// EncodeMax returns the current encoded max value with plain encoding
1659//
1660// ByteArray stats do not include the length in the encoding
1661func (s *BooleanStatistics) EncodeMax() []byte {
1662	if s.HasMinMax() {
1663		return s.plainEncode(s.max)
1664	}
1665	return nil
1666}
1667
1668// Encode returns a populated EncodedStatistics object
1669func (s *BooleanStatistics) Encode() (enc EncodedStatistics, err error) {
1670	defer func() {
1671		if r := recover(); r != nil {
1672			switch r := r.(type) {
1673			case error:
1674				err = r
1675			case string:
1676				err = xerrors.New(r)
1677			default:
1678				err = xerrors.Errorf("unknown error type thrown from panic: %v", r)
1679			}
1680		}
1681	}()
1682	if s.HasMinMax() {
1683		enc.SetMax(s.EncodeMax())
1684		enc.SetMin(s.EncodeMin())
1685	}
1686	if s.HasNullCount() {
1687		enc.SetNullCount(s.NullCount())
1688	}
1689	if s.HasDistinctCount() {
1690		enc.SetDistinctCount(s.DistinctCount())
1691	}
1692	return
1693}
1694
1695type minmaxPairByteArray [2]parquet.ByteArray
1696
1697// ByteArrayStatistics is the typed interface for managing stats for a column
1698// of ByteArray type.
1699type ByteArrayStatistics struct {
1700	statistics
1701	min parquet.ByteArray
1702	max parquet.ByteArray
1703
1704	bitSetReader utils.SetBitRunReader
1705}
1706
1707// NewByteArrayStatistics constructs an appropriate stat object type using the
1708// given column descriptor and allocator.
1709//
1710// Panics if the physical type of descr is not parquet.Type.ByteArray
1711func NewByteArrayStatistics(descr *schema.Column, mem memory.Allocator) *ByteArrayStatistics {
1712	if descr.PhysicalType() != parquet.Types.ByteArray {
1713		panic(xerrors.Errorf("parquet: invalid type %s for constructing a ByteArray stat object", descr.PhysicalType()))
1714	}
1715
1716	return &ByteArrayStatistics{
1717		statistics: statistics{
1718			descr:            descr,
1719			hasNullCount:     true,
1720			hasDistinctCount: true,
1721			order:            descr.SortOrder(),
1722			encoder:          encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem),
1723			mem:              mem,
1724		},
1725
1726		min: make([]byte, 0),
1727		max: make([]byte, 0),
1728	}
1729}
1730
1731// NewByteArrayStatisticsFromEncoded will construct a propertly typed statistics object
1732// initializing it with the provided information.
1733func NewByteArrayStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *ByteArrayStatistics {
1734	ret := NewByteArrayStatistics(descr, mem)
1735	ret.nvalues += nvalues
1736	if encoded.IsSetNullCount() {
1737		ret.incNulls(encoded.GetNullCount())
1738	}
1739	if encoded.IsSetDistinctCount() {
1740		ret.incDistinct(encoded.GetDistinctCount())
1741	}
1742
1743	encodedMin := encoded.GetMin()
1744	if encodedMin != nil && len(encodedMin) > 0 {
1745		ret.min = ret.plainDecode(encodedMin)
1746	}
1747	encodedMax := encoded.GetMax()
1748	if encodedMax != nil && len(encodedMax) > 0 {
1749		ret.max = ret.plainDecode(encodedMax)
1750	}
1751	ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin()
1752	return ret
1753}
1754
1755func (s *ByteArrayStatistics) plainEncode(src parquet.ByteArray) []byte {
1756	return src
1757}
1758
1759func (s *ByteArrayStatistics) plainDecode(src []byte) parquet.ByteArray {
1760	return src
1761}
1762
1763func (s *ByteArrayStatistics) minval(a, b parquet.ByteArray) parquet.ByteArray {
1764	switch {
1765	case a == nil:
1766		return b
1767	case b == nil:
1768		return a
1769	case s.less(a, b):
1770		return a
1771	default:
1772		return b
1773	}
1774}
1775
1776func (s *ByteArrayStatistics) maxval(a, b parquet.ByteArray) parquet.ByteArray {
1777	switch {
1778	case a == nil:
1779		return b
1780	case b == nil:
1781		return a
1782	case s.less(a, b):
1783		return b
1784	default:
1785		return a
1786	}
1787}
1788
1789// MinMaxEqual returns true if both stat objects have the same Min and Max values
1790func (s *ByteArrayStatistics) MinMaxEqual(rhs *ByteArrayStatistics) bool {
1791	return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max)
1792}
1793
1794// Equals returns true only if both objects are the same type, have the same min and
1795// max values, null count, distinct count and number of values.
1796func (s *ByteArrayStatistics) Equals(other TypedStatistics) bool {
1797	if s.Type() != other.Type() {
1798		return false
1799	}
1800	rhs, ok := other.(*ByteArrayStatistics)
1801	if !ok {
1802		return false
1803	}
1804
1805	if s.HasMinMax() != rhs.HasMinMax() {
1806		return false
1807	}
1808	return (s.hasMinMax && s.MinMaxEqual(rhs)) &&
1809		s.NullCount() == rhs.NullCount() &&
1810		s.DistinctCount() == rhs.DistinctCount() &&
1811		s.NumValues() == rhs.NumValues()
1812}
1813
1814func (s *ByteArrayStatistics) getMinMax(values []parquet.ByteArray) (min, max parquet.ByteArray) {
1815	defMin := s.defaultMin()
1816	defMax := s.defaultMax()
1817
1818	min = defMin
1819	max = defMax
1820
1821	for _, v := range values {
1822		min = s.minval(min, v)
1823		max = s.maxval(max, v)
1824	}
1825	return
1826}
1827
1828func (s *ByteArrayStatistics) getMinMaxSpaced(values []parquet.ByteArray, validBits []byte, validBitsOffset int64) (min, max parquet.ByteArray) {
1829	min = s.defaultMin()
1830	max = s.defaultMax()
1831
1832	if s.bitSetReader == nil {
1833		s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values)))
1834	} else {
1835		s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values)))
1836	}
1837
1838	for {
1839		run := s.bitSetReader.NextRun()
1840		if run.Length == 0 {
1841			break
1842		}
1843		for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] {
1844			min = s.minval(min, v)
1845			max = s.maxval(max, v)
1846		}
1847	}
1848	return
1849}
1850
1851func (s *ByteArrayStatistics) Min() parquet.ByteArray { return s.min }
1852func (s *ByteArrayStatistics) Max() parquet.ByteArray { return s.max }
1853
1854// Merge merges the stats from other into this stat object, updating
1855// the null count, distinct count, number of values and the min/max if
1856// appropriate.
1857func (s *ByteArrayStatistics) Merge(other TypedStatistics) {
1858	rhs, ok := other.(*ByteArrayStatistics)
1859	if !ok {
1860		panic("incompatible stat type merge")
1861	}
1862
1863	s.statistics.merge(rhs)
1864	if rhs.HasMinMax() {
1865		s.SetMinMax(rhs.Min(), rhs.Max())
1866	}
1867}
1868
1869// Update is used to add more values to the current stat object, finding the
1870// min and max values etc.
1871func (s *ByteArrayStatistics) Update(values []parquet.ByteArray, numNull int64) {
1872	s.incNulls(numNull)
1873	s.nvalues += int64(len(values))
1874
1875	if len(values) == 0 {
1876		return
1877	}
1878
1879	s.SetMinMax(s.getMinMax(values))
1880}
1881
1882// UpdateSpaced is just like Update, but for spaced values using validBits to determine
1883// and skip null values.
1884func (s *ByteArrayStatistics) UpdateSpaced(values []parquet.ByteArray, validBits []byte, validBitsOffset, numNull int64) {
1885	s.incNulls(numNull)
1886	notnull := int64(len(values)) - numNull
1887	s.nvalues += notnull
1888
1889	if notnull == 0 {
1890		return
1891	}
1892
1893	s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset))
1894}
1895
1896// SetMinMax updates the min and max values only if they are not currently set
1897// or if argMin is less than the current min / argMax is greater than the current max
1898func (s *ByteArrayStatistics) SetMinMax(argMin, argMax parquet.ByteArray) {
1899	maybeMinMax := s.cleanStat([2]parquet.ByteArray{argMin, argMax})
1900	if maybeMinMax == nil {
1901		return
1902	}
1903
1904	min := (*maybeMinMax)[0]
1905	max := (*maybeMinMax)[1]
1906
1907	if !s.hasMinMax {
1908		s.hasMinMax = true
1909		s.min = min
1910		s.max = max
1911	} else {
1912		if !s.less(s.min, min) {
1913			s.min = min
1914		}
1915		if s.less(s.max, max) {
1916			s.max = max
1917		}
1918	}
1919}
1920
1921// EncodeMin returns the encoded min value with plain encoding.
1922//
1923// ByteArray stats do not include the length in the encoding.
1924func (s *ByteArrayStatistics) EncodeMin() []byte {
1925	if s.HasMinMax() {
1926		return s.plainEncode(s.min)
1927	}
1928	return nil
1929}
1930
1931// EncodeMax returns the current encoded max value with plain encoding
1932//
1933// ByteArray stats do not include the length in the encoding
1934func (s *ByteArrayStatistics) EncodeMax() []byte {
1935	if s.HasMinMax() {
1936		return s.plainEncode(s.max)
1937	}
1938	return nil
1939}
1940
1941// Encode returns a populated EncodedStatistics object
1942func (s *ByteArrayStatistics) Encode() (enc EncodedStatistics, err error) {
1943	defer func() {
1944		if r := recover(); r != nil {
1945			switch r := r.(type) {
1946			case error:
1947				err = r
1948			case string:
1949				err = xerrors.New(r)
1950			default:
1951				err = xerrors.Errorf("unknown error type thrown from panic: %v", r)
1952			}
1953		}
1954	}()
1955	if s.HasMinMax() {
1956		enc.SetMax(s.EncodeMax())
1957		enc.SetMin(s.EncodeMin())
1958	}
1959	if s.HasNullCount() {
1960		enc.SetNullCount(s.NullCount())
1961	}
1962	if s.HasDistinctCount() {
1963		enc.SetDistinctCount(s.DistinctCount())
1964	}
1965	return
1966}
1967
1968type minmaxPairFixedLenByteArray [2]parquet.FixedLenByteArray
1969
1970// FixedLenByteArrayStatistics is the typed interface for managing stats for a column
1971// of FixedLenByteArray type.
1972type FixedLenByteArrayStatistics struct {
1973	statistics
1974	min parquet.FixedLenByteArray
1975	max parquet.FixedLenByteArray
1976
1977	bitSetReader utils.SetBitRunReader
1978}
1979
1980// NewFixedLenByteArrayStatistics constructs an appropriate stat object type using the
1981// given column descriptor and allocator.
1982//
1983// Panics if the physical type of descr is not parquet.Type.FixedLenByteArray
1984func NewFixedLenByteArrayStatistics(descr *schema.Column, mem memory.Allocator) *FixedLenByteArrayStatistics {
1985	if descr.PhysicalType() != parquet.Types.FixedLenByteArray {
1986		panic(xerrors.Errorf("parquet: invalid type %s for constructing a FixedLenByteArray stat object", descr.PhysicalType()))
1987	}
1988
1989	return &FixedLenByteArrayStatistics{
1990		statistics: statistics{
1991			descr:            descr,
1992			hasNullCount:     true,
1993			hasDistinctCount: true,
1994			order:            descr.SortOrder(),
1995			encoder:          encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem),
1996			mem:              mem,
1997		},
1998	}
1999}
2000
2001// NewFixedLenByteArrayStatisticsFromEncoded will construct a propertly typed statistics object
2002// initializing it with the provided information.
2003func NewFixedLenByteArrayStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *FixedLenByteArrayStatistics {
2004	ret := NewFixedLenByteArrayStatistics(descr, mem)
2005	ret.nvalues += nvalues
2006	if encoded.IsSetNullCount() {
2007		ret.incNulls(encoded.GetNullCount())
2008	}
2009	if encoded.IsSetDistinctCount() {
2010		ret.incDistinct(encoded.GetDistinctCount())
2011	}
2012
2013	encodedMin := encoded.GetMin()
2014	if encodedMin != nil && len(encodedMin) > 0 {
2015		ret.min = ret.plainDecode(encodedMin)
2016	}
2017	encodedMax := encoded.GetMax()
2018	if encodedMax != nil && len(encodedMax) > 0 {
2019		ret.max = ret.plainDecode(encodedMax)
2020	}
2021	ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin()
2022	return ret
2023}
2024
2025func (s *FixedLenByteArrayStatistics) plainEncode(src parquet.FixedLenByteArray) []byte {
2026	s.encoder.(encoding.FixedLenByteArrayEncoder).Put([]parquet.FixedLenByteArray{src})
2027	buf, err := s.encoder.FlushValues()
2028	if err != nil {
2029		panic(err) // recovered by Encode
2030	}
2031	defer buf.Release()
2032
2033	out := make([]byte, buf.Len())
2034	copy(out, buf.Bytes())
2035	return out
2036}
2037
2038func (s *FixedLenByteArrayStatistics) plainDecode(src []byte) parquet.FixedLenByteArray {
2039	var buf [1]parquet.FixedLenByteArray
2040
2041	decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem)
2042	decoder.SetData(1, src)
2043	decoder.(encoding.FixedLenByteArrayDecoder).Decode(buf[:])
2044	return buf[0]
2045}
2046
2047func (s *FixedLenByteArrayStatistics) minval(a, b parquet.FixedLenByteArray) parquet.FixedLenByteArray {
2048	switch {
2049	case a == nil:
2050		return b
2051	case b == nil:
2052		return a
2053	case s.less(a, b):
2054		return a
2055	default:
2056		return b
2057	}
2058}
2059
2060func (s *FixedLenByteArrayStatistics) maxval(a, b parquet.FixedLenByteArray) parquet.FixedLenByteArray {
2061	switch {
2062	case a == nil:
2063		return b
2064	case b == nil:
2065		return a
2066	case s.less(a, b):
2067		return b
2068	default:
2069		return a
2070	}
2071}
2072
2073// MinMaxEqual returns true if both stat objects have the same Min and Max values
2074func (s *FixedLenByteArrayStatistics) MinMaxEqual(rhs *FixedLenByteArrayStatistics) bool {
2075	return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max)
2076}
2077
2078// Equals returns true only if both objects are the same type, have the same min and
2079// max values, null count, distinct count and number of values.
2080func (s *FixedLenByteArrayStatistics) Equals(other TypedStatistics) bool {
2081	if s.Type() != other.Type() {
2082		return false
2083	}
2084	rhs, ok := other.(*FixedLenByteArrayStatistics)
2085	if !ok {
2086		return false
2087	}
2088
2089	if s.HasMinMax() != rhs.HasMinMax() {
2090		return false
2091	}
2092	return (s.hasMinMax && s.MinMaxEqual(rhs)) &&
2093		s.NullCount() == rhs.NullCount() &&
2094		s.DistinctCount() == rhs.DistinctCount() &&
2095		s.NumValues() == rhs.NumValues()
2096}
2097
2098func (s *FixedLenByteArrayStatistics) getMinMax(values []parquet.FixedLenByteArray) (min, max parquet.FixedLenByteArray) {
2099	defMin := s.defaultMin()
2100	defMax := s.defaultMax()
2101
2102	min = defMin
2103	max = defMax
2104
2105	for _, v := range values {
2106		min = s.minval(min, v)
2107		max = s.maxval(max, v)
2108	}
2109	return
2110}
2111
2112func (s *FixedLenByteArrayStatistics) getMinMaxSpaced(values []parquet.FixedLenByteArray, validBits []byte, validBitsOffset int64) (min, max parquet.FixedLenByteArray) {
2113	min = s.defaultMin()
2114	max = s.defaultMax()
2115
2116	if s.bitSetReader == nil {
2117		s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values)))
2118	} else {
2119		s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values)))
2120	}
2121
2122	for {
2123		run := s.bitSetReader.NextRun()
2124		if run.Length == 0 {
2125			break
2126		}
2127		for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] {
2128			min = s.minval(min, v)
2129			max = s.maxval(max, v)
2130		}
2131	}
2132	return
2133}
2134
2135func (s *FixedLenByteArrayStatistics) Min() parquet.FixedLenByteArray { return s.min }
2136func (s *FixedLenByteArrayStatistics) Max() parquet.FixedLenByteArray { return s.max }
2137
2138// Merge merges the stats from other into this stat object, updating
2139// the null count, distinct count, number of values and the min/max if
2140// appropriate.
2141func (s *FixedLenByteArrayStatistics) Merge(other TypedStatistics) {
2142	rhs, ok := other.(*FixedLenByteArrayStatistics)
2143	if !ok {
2144		panic("incompatible stat type merge")
2145	}
2146
2147	s.statistics.merge(rhs)
2148	if rhs.HasMinMax() {
2149		s.SetMinMax(rhs.Min(), rhs.Max())
2150	}
2151}
2152
2153// Update is used to add more values to the current stat object, finding the
2154// min and max values etc.
2155func (s *FixedLenByteArrayStatistics) Update(values []parquet.FixedLenByteArray, numNull int64) {
2156	s.incNulls(numNull)
2157	s.nvalues += int64(len(values))
2158
2159	if len(values) == 0 {
2160		return
2161	}
2162
2163	s.SetMinMax(s.getMinMax(values))
2164}
2165
2166// UpdateSpaced is just like Update, but for spaced values using validBits to determine
2167// and skip null values.
2168func (s *FixedLenByteArrayStatistics) UpdateSpaced(values []parquet.FixedLenByteArray, validBits []byte, validBitsOffset, numNull int64) {
2169	s.incNulls(numNull)
2170	notnull := int64(len(values)) - numNull
2171	s.nvalues += notnull
2172
2173	if notnull == 0 {
2174		return
2175	}
2176
2177	s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset))
2178}
2179
2180// SetMinMax updates the min and max values only if they are not currently set
2181// or if argMin is less than the current min / argMax is greater than the current max
2182func (s *FixedLenByteArrayStatistics) SetMinMax(argMin, argMax parquet.FixedLenByteArray) {
2183	maybeMinMax := s.cleanStat([2]parquet.FixedLenByteArray{argMin, argMax})
2184	if maybeMinMax == nil {
2185		return
2186	}
2187
2188	min := (*maybeMinMax)[0]
2189	max := (*maybeMinMax)[1]
2190
2191	if !s.hasMinMax {
2192		s.hasMinMax = true
2193		s.min = min
2194		s.max = max
2195	} else {
2196		if !s.less(s.min, min) {
2197			s.min = min
2198		}
2199		if s.less(s.max, max) {
2200			s.max = max
2201		}
2202	}
2203}
2204
2205// EncodeMin returns the encoded min value with plain encoding.
2206//
2207// ByteArray stats do not include the length in the encoding.
2208func (s *FixedLenByteArrayStatistics) EncodeMin() []byte {
2209	if s.HasMinMax() {
2210		return s.plainEncode(s.min)
2211	}
2212	return nil
2213}
2214
2215// EncodeMax returns the current encoded max value with plain encoding
2216//
2217// ByteArray stats do not include the length in the encoding
2218func (s *FixedLenByteArrayStatistics) EncodeMax() []byte {
2219	if s.HasMinMax() {
2220		return s.plainEncode(s.max)
2221	}
2222	return nil
2223}
2224
2225// Encode returns a populated EncodedStatistics object
2226func (s *FixedLenByteArrayStatistics) Encode() (enc EncodedStatistics, err error) {
2227	defer func() {
2228		if r := recover(); r != nil {
2229			switch r := r.(type) {
2230			case error:
2231				err = r
2232			case string:
2233				err = xerrors.New(r)
2234			default:
2235				err = xerrors.Errorf("unknown error type thrown from panic: %v", r)
2236			}
2237		}
2238	}()
2239	if s.HasMinMax() {
2240		enc.SetMax(s.EncodeMax())
2241		enc.SetMin(s.EncodeMin())
2242	}
2243	if s.HasNullCount() {
2244		enc.SetNullCount(s.NullCount())
2245	}
2246	if s.HasDistinctCount() {
2247		enc.SetDistinctCount(s.DistinctCount())
2248	}
2249	return
2250}
2251
2252// NewStatistics uses the type in the column descriptor to construct the appropriate
2253// typed stats object. If mem is nil, then memory.DefaultAllocator will be used.
2254func NewStatistics(descr *schema.Column, mem memory.Allocator) TypedStatistics {
2255	if mem == nil {
2256		mem = memory.DefaultAllocator
2257	}
2258	switch descr.PhysicalType() {
2259	case parquet.Types.Int32:
2260		return NewInt32Statistics(descr, mem)
2261	case parquet.Types.Int64:
2262		return NewInt64Statistics(descr, mem)
2263	case parquet.Types.Int96:
2264		return NewInt96Statistics(descr, mem)
2265	case parquet.Types.Float:
2266		return NewFloat32Statistics(descr, mem)
2267	case parquet.Types.Double:
2268		return NewFloat64Statistics(descr, mem)
2269	case parquet.Types.Boolean:
2270		return NewBooleanStatistics(descr, mem)
2271	case parquet.Types.ByteArray:
2272		return NewByteArrayStatistics(descr, mem)
2273	case parquet.Types.FixedLenByteArray:
2274		return NewFixedLenByteArrayStatistics(descr, mem)
2275	default:
2276		panic("not implemented")
2277	}
2278}
2279
2280// NewStatisticsFromEncoded uses the provided information to initialize a typed stat object
2281// by checking the type of the provided column descriptor.
2282//
2283// If mem is nil, then memory.DefaultAllocator is used.
2284func NewStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) TypedStatistics {
2285	if mem == nil {
2286		mem = memory.DefaultAllocator
2287	}
2288	switch descr.PhysicalType() {
2289	case parquet.Types.Int32:
2290		return NewInt32StatisticsFromEncoded(descr, mem, nvalues, encoded)
2291	case parquet.Types.Int64:
2292		return NewInt64StatisticsFromEncoded(descr, mem, nvalues, encoded)
2293	case parquet.Types.Int96:
2294		return NewInt96StatisticsFromEncoded(descr, mem, nvalues, encoded)
2295	case parquet.Types.Float:
2296		return NewFloat32StatisticsFromEncoded(descr, mem, nvalues, encoded)
2297	case parquet.Types.Double:
2298		return NewFloat64StatisticsFromEncoded(descr, mem, nvalues, encoded)
2299	case parquet.Types.Boolean:
2300		return NewBooleanStatisticsFromEncoded(descr, mem, nvalues, encoded)
2301	case parquet.Types.ByteArray:
2302		return NewByteArrayStatisticsFromEncoded(descr, mem, nvalues, encoded)
2303	case parquet.Types.FixedLenByteArray:
2304		return NewFixedLenByteArrayStatisticsFromEncoded(descr, mem, nvalues, encoded)
2305	default:
2306		panic("not implemented")
2307	}
2308}
2309