1// Code generated by statistics_types.gen.go.tmpl. DO NOT EDIT. 2 3// Licensed to the Apache Software Foundation (ASF) under one 4// or more contributor license agreements. See the NOTICE file 5// distributed with this work for additional information 6// regarding copyright ownership. The ASF licenses this file 7// to you under the Apache License, Version 2.0 (the 8// "License"); you may not use this file except in compliance 9// with the License. You may obtain a copy of the License at 10// 11// http://www.apache.org/licenses/LICENSE-2.0 12// 13// Unless required by applicable law or agreed to in writing, software 14// distributed under the License is distributed on an "AS IS" BASIS, 15// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16// See the License for the specific language governing permissions and 17// limitations under the License. 18 19package metadata 20 21import ( 22 "math" 23 24 "github.com/apache/arrow/go/v6/arrow" 25 "github.com/apache/arrow/go/v6/arrow/memory" 26 "github.com/apache/arrow/go/v6/parquet" 27 "github.com/apache/arrow/go/v6/parquet/internal/encoding" 28 "github.com/apache/arrow/go/v6/parquet/internal/utils" 29 "github.com/apache/arrow/go/v6/parquet/schema" 30 "golang.org/x/xerrors" 31) 32 33type minmaxPairInt32 [2]int32 34 35// Int32Statistics is the typed interface for managing stats for a column 36// of Int32 type. 37type Int32Statistics struct { 38 statistics 39 min int32 40 max int32 41 42 bitSetReader utils.SetBitRunReader 43} 44 45// NewInt32Statistics constructs an appropriate stat object type using the 46// given column descriptor and allocator. 47// 48// Panics if the physical type of descr is not parquet.Type.Int32 49func NewInt32Statistics(descr *schema.Column, mem memory.Allocator) *Int32Statistics { 50 if descr.PhysicalType() != parquet.Types.Int32 { 51 panic(xerrors.Errorf("parquet: invalid type %s for constructing a Int32 stat object", descr.PhysicalType())) 52 } 53 54 return &Int32Statistics{ 55 statistics: statistics{ 56 descr: descr, 57 hasNullCount: true, 58 hasDistinctCount: true, 59 order: descr.SortOrder(), 60 encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem), 61 mem: mem, 62 }, 63 } 64} 65 66// NewInt32StatisticsFromEncoded will construct a propertly typed statistics object 67// initializing it with the provided information. 68func NewInt32StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Int32Statistics { 69 ret := NewInt32Statistics(descr, mem) 70 ret.nvalues += nvalues 71 if encoded.IsSetNullCount() { 72 ret.incNulls(encoded.GetNullCount()) 73 } 74 if encoded.IsSetDistinctCount() { 75 ret.incDistinct(encoded.GetDistinctCount()) 76 } 77 78 encodedMin := encoded.GetMin() 79 if encodedMin != nil && len(encodedMin) > 0 { 80 ret.min = ret.plainDecode(encodedMin) 81 } 82 encodedMax := encoded.GetMax() 83 if encodedMax != nil && len(encodedMax) > 0 { 84 ret.max = ret.plainDecode(encodedMax) 85 } 86 ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin() 87 return ret 88} 89 90func (s *Int32Statistics) plainEncode(src int32) []byte { 91 s.encoder.(encoding.Int32Encoder).Put([]int32{src}) 92 buf, err := s.encoder.FlushValues() 93 if err != nil { 94 panic(err) // recovered by Encode 95 } 96 defer buf.Release() 97 98 out := make([]byte, buf.Len()) 99 copy(out, buf.Bytes()) 100 return out 101} 102 103func (s *Int32Statistics) plainDecode(src []byte) int32 { 104 var buf [1]int32 105 106 decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem) 107 decoder.SetData(1, src) 108 decoder.(encoding.Int32Decoder).Decode(buf[:]) 109 return buf[0] 110} 111 112func (s *Int32Statistics) minval(a, b int32) int32 { 113 if s.less(a, b) { 114 return a 115 } 116 return b 117} 118 119func (s *Int32Statistics) maxval(a, b int32) int32 { 120 if s.less(a, b) { 121 return b 122 } 123 return a 124} 125 126// MinMaxEqual returns true if both stat objects have the same Min and Max values 127func (s *Int32Statistics) MinMaxEqual(rhs *Int32Statistics) bool { 128 return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max) 129} 130 131// Equals returns true only if both objects are the same type, have the same min and 132// max values, null count, distinct count and number of values. 133func (s *Int32Statistics) Equals(other TypedStatistics) bool { 134 if s.Type() != other.Type() { 135 return false 136 } 137 rhs, ok := other.(*Int32Statistics) 138 if !ok { 139 return false 140 } 141 142 if s.HasMinMax() != rhs.HasMinMax() { 143 return false 144 } 145 return (s.hasMinMax && s.MinMaxEqual(rhs)) && 146 s.NullCount() == rhs.NullCount() && 147 s.DistinctCount() == rhs.DistinctCount() && 148 s.NumValues() == rhs.NumValues() 149} 150 151func (s *Int32Statistics) getMinMax(values []int32) (min, max int32) { 152 if s.order == schema.SortSIGNED { 153 min, max = utils.GetMinMaxInt32(values) 154 } else { 155 umin, umax := utils.GetMinMaxUint32(arrow.Uint32Traits.CastFromBytes(arrow.Int32Traits.CastToBytes(values))) 156 min, max = int32(umin), int32(umax) 157 } 158 return 159} 160 161func (s *Int32Statistics) getMinMaxSpaced(values []int32, validBits []byte, validBitsOffset int64) (min, max int32) { 162 min = s.defaultMin() 163 max = s.defaultMax() 164 var fn func([]int32) (int32, int32) 165 if s.order == schema.SortSIGNED { 166 fn = utils.GetMinMaxInt32 167 } else { 168 fn = func(v []int32) (int32, int32) { 169 umin, umax := utils.GetMinMaxUint32(arrow.Uint32Traits.CastFromBytes(arrow.Int32Traits.CastToBytes(values))) 170 return int32(umin), int32(umax) 171 } 172 } 173 174 if s.bitSetReader == nil { 175 s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values))) 176 } else { 177 s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values))) 178 } 179 180 for { 181 run := s.bitSetReader.NextRun() 182 if run.Length == 0 { 183 break 184 } 185 localMin, localMax := fn(values[int(run.Pos):int(run.Pos+run.Length)]) 186 if min > localMin { 187 min = localMin 188 } 189 if max < localMax { 190 max = localMax 191 } 192 } 193 return 194} 195 196func (s *Int32Statistics) Min() int32 { return s.min } 197func (s *Int32Statistics) Max() int32 { return s.max } 198 199// Merge merges the stats from other into this stat object, updating 200// the null count, distinct count, number of values and the min/max if 201// appropriate. 202func (s *Int32Statistics) Merge(other TypedStatistics) { 203 rhs, ok := other.(*Int32Statistics) 204 if !ok { 205 panic("incompatible stat type merge") 206 } 207 208 s.statistics.merge(rhs) 209 if rhs.HasMinMax() { 210 s.SetMinMax(rhs.Min(), rhs.Max()) 211 } 212} 213 214// Update is used to add more values to the current stat object, finding the 215// min and max values etc. 216func (s *Int32Statistics) Update(values []int32, numNull int64) { 217 s.incNulls(numNull) 218 s.nvalues += int64(len(values)) 219 220 if len(values) == 0 { 221 return 222 } 223 224 s.SetMinMax(s.getMinMax(values)) 225} 226 227// UpdateSpaced is just like Update, but for spaced values using validBits to determine 228// and skip null values. 229func (s *Int32Statistics) UpdateSpaced(values []int32, validBits []byte, validBitsOffset, numNull int64) { 230 s.incNulls(numNull) 231 notnull := int64(len(values)) - numNull 232 s.nvalues += notnull 233 234 if notnull == 0 { 235 return 236 } 237 238 s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset)) 239} 240 241// SetMinMax updates the min and max values only if they are not currently set 242// or if argMin is less than the current min / argMax is greater than the current max 243func (s *Int32Statistics) SetMinMax(argMin, argMax int32) { 244 maybeMinMax := s.cleanStat([2]int32{argMin, argMax}) 245 if maybeMinMax == nil { 246 return 247 } 248 249 min := (*maybeMinMax)[0] 250 max := (*maybeMinMax)[1] 251 252 if !s.hasMinMax { 253 s.hasMinMax = true 254 s.min = min 255 s.max = max 256 } else { 257 if !s.less(s.min, min) { 258 s.min = min 259 } 260 if s.less(s.max, max) { 261 s.max = max 262 } 263 } 264} 265 266// EncodeMin returns the encoded min value with plain encoding. 267// 268// ByteArray stats do not include the length in the encoding. 269func (s *Int32Statistics) EncodeMin() []byte { 270 if s.HasMinMax() { 271 return s.plainEncode(s.min) 272 } 273 return nil 274} 275 276// EncodeMax returns the current encoded max value with plain encoding 277// 278// ByteArray stats do not include the length in the encoding 279func (s *Int32Statistics) EncodeMax() []byte { 280 if s.HasMinMax() { 281 return s.plainEncode(s.max) 282 } 283 return nil 284} 285 286// Encode returns a populated EncodedStatistics object 287func (s *Int32Statistics) Encode() (enc EncodedStatistics, err error) { 288 defer func() { 289 if r := recover(); r != nil { 290 switch r := r.(type) { 291 case error: 292 err = r 293 case string: 294 err = xerrors.New(r) 295 default: 296 err = xerrors.Errorf("unknown error type thrown from panic: %v", r) 297 } 298 } 299 }() 300 if s.HasMinMax() { 301 enc.SetMax(s.EncodeMax()) 302 enc.SetMin(s.EncodeMin()) 303 } 304 if s.HasNullCount() { 305 enc.SetNullCount(s.NullCount()) 306 } 307 if s.HasDistinctCount() { 308 enc.SetDistinctCount(s.DistinctCount()) 309 } 310 return 311} 312 313type minmaxPairInt64 [2]int64 314 315// Int64Statistics is the typed interface for managing stats for a column 316// of Int64 type. 317type Int64Statistics struct { 318 statistics 319 min int64 320 max int64 321 322 bitSetReader utils.SetBitRunReader 323} 324 325// NewInt64Statistics constructs an appropriate stat object type using the 326// given column descriptor and allocator. 327// 328// Panics if the physical type of descr is not parquet.Type.Int64 329func NewInt64Statistics(descr *schema.Column, mem memory.Allocator) *Int64Statistics { 330 if descr.PhysicalType() != parquet.Types.Int64 { 331 panic(xerrors.Errorf("parquet: invalid type %s for constructing a Int64 stat object", descr.PhysicalType())) 332 } 333 334 return &Int64Statistics{ 335 statistics: statistics{ 336 descr: descr, 337 hasNullCount: true, 338 hasDistinctCount: true, 339 order: descr.SortOrder(), 340 encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem), 341 mem: mem, 342 }, 343 } 344} 345 346// NewInt64StatisticsFromEncoded will construct a propertly typed statistics object 347// initializing it with the provided information. 348func NewInt64StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Int64Statistics { 349 ret := NewInt64Statistics(descr, mem) 350 ret.nvalues += nvalues 351 if encoded.IsSetNullCount() { 352 ret.incNulls(encoded.GetNullCount()) 353 } 354 if encoded.IsSetDistinctCount() { 355 ret.incDistinct(encoded.GetDistinctCount()) 356 } 357 358 encodedMin := encoded.GetMin() 359 if encodedMin != nil && len(encodedMin) > 0 { 360 ret.min = ret.plainDecode(encodedMin) 361 } 362 encodedMax := encoded.GetMax() 363 if encodedMax != nil && len(encodedMax) > 0 { 364 ret.max = ret.plainDecode(encodedMax) 365 } 366 ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin() 367 return ret 368} 369 370func (s *Int64Statistics) plainEncode(src int64) []byte { 371 s.encoder.(encoding.Int64Encoder).Put([]int64{src}) 372 buf, err := s.encoder.FlushValues() 373 if err != nil { 374 panic(err) // recovered by Encode 375 } 376 defer buf.Release() 377 378 out := make([]byte, buf.Len()) 379 copy(out, buf.Bytes()) 380 return out 381} 382 383func (s *Int64Statistics) plainDecode(src []byte) int64 { 384 var buf [1]int64 385 386 decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem) 387 decoder.SetData(1, src) 388 decoder.(encoding.Int64Decoder).Decode(buf[:]) 389 return buf[0] 390} 391 392func (s *Int64Statistics) minval(a, b int64) int64 { 393 if s.less(a, b) { 394 return a 395 } 396 return b 397} 398 399func (s *Int64Statistics) maxval(a, b int64) int64 { 400 if s.less(a, b) { 401 return b 402 } 403 return a 404} 405 406// MinMaxEqual returns true if both stat objects have the same Min and Max values 407func (s *Int64Statistics) MinMaxEqual(rhs *Int64Statistics) bool { 408 return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max) 409} 410 411// Equals returns true only if both objects are the same type, have the same min and 412// max values, null count, distinct count and number of values. 413func (s *Int64Statistics) Equals(other TypedStatistics) bool { 414 if s.Type() != other.Type() { 415 return false 416 } 417 rhs, ok := other.(*Int64Statistics) 418 if !ok { 419 return false 420 } 421 422 if s.HasMinMax() != rhs.HasMinMax() { 423 return false 424 } 425 return (s.hasMinMax && s.MinMaxEqual(rhs)) && 426 s.NullCount() == rhs.NullCount() && 427 s.DistinctCount() == rhs.DistinctCount() && 428 s.NumValues() == rhs.NumValues() 429} 430 431func (s *Int64Statistics) getMinMax(values []int64) (min, max int64) { 432 if s.order == schema.SortSIGNED { 433 min, max = utils.GetMinMaxInt64(values) 434 } else { 435 umin, umax := utils.GetMinMaxUint64(arrow.Uint64Traits.CastFromBytes(arrow.Int64Traits.CastToBytes(values))) 436 min, max = int64(umin), int64(umax) 437 } 438 return 439} 440 441func (s *Int64Statistics) getMinMaxSpaced(values []int64, validBits []byte, validBitsOffset int64) (min, max int64) { 442 min = s.defaultMin() 443 max = s.defaultMax() 444 var fn func([]int64) (int64, int64) 445 if s.order == schema.SortSIGNED { 446 fn = utils.GetMinMaxInt64 447 } else { 448 fn = func(v []int64) (int64, int64) { 449 umin, umax := utils.GetMinMaxUint64(arrow.Uint64Traits.CastFromBytes(arrow.Int64Traits.CastToBytes(values))) 450 return int64(umin), int64(umax) 451 } 452 } 453 454 if s.bitSetReader == nil { 455 s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values))) 456 } else { 457 s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values))) 458 } 459 460 for { 461 run := s.bitSetReader.NextRun() 462 if run.Length == 0 { 463 break 464 } 465 localMin, localMax := fn(values[int(run.Pos):int(run.Pos+run.Length)]) 466 if min > localMin { 467 min = localMin 468 } 469 if max < localMax { 470 max = localMax 471 } 472 } 473 return 474} 475 476func (s *Int64Statistics) Min() int64 { return s.min } 477func (s *Int64Statistics) Max() int64 { return s.max } 478 479// Merge merges the stats from other into this stat object, updating 480// the null count, distinct count, number of values and the min/max if 481// appropriate. 482func (s *Int64Statistics) Merge(other TypedStatistics) { 483 rhs, ok := other.(*Int64Statistics) 484 if !ok { 485 panic("incompatible stat type merge") 486 } 487 488 s.statistics.merge(rhs) 489 if rhs.HasMinMax() { 490 s.SetMinMax(rhs.Min(), rhs.Max()) 491 } 492} 493 494// Update is used to add more values to the current stat object, finding the 495// min and max values etc. 496func (s *Int64Statistics) Update(values []int64, numNull int64) { 497 s.incNulls(numNull) 498 s.nvalues += int64(len(values)) 499 500 if len(values) == 0 { 501 return 502 } 503 504 s.SetMinMax(s.getMinMax(values)) 505} 506 507// UpdateSpaced is just like Update, but for spaced values using validBits to determine 508// and skip null values. 509func (s *Int64Statistics) UpdateSpaced(values []int64, validBits []byte, validBitsOffset, numNull int64) { 510 s.incNulls(numNull) 511 notnull := int64(len(values)) - numNull 512 s.nvalues += notnull 513 514 if notnull == 0 { 515 return 516 } 517 518 s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset)) 519} 520 521// SetMinMax updates the min and max values only if they are not currently set 522// or if argMin is less than the current min / argMax is greater than the current max 523func (s *Int64Statistics) SetMinMax(argMin, argMax int64) { 524 maybeMinMax := s.cleanStat([2]int64{argMin, argMax}) 525 if maybeMinMax == nil { 526 return 527 } 528 529 min := (*maybeMinMax)[0] 530 max := (*maybeMinMax)[1] 531 532 if !s.hasMinMax { 533 s.hasMinMax = true 534 s.min = min 535 s.max = max 536 } else { 537 if !s.less(s.min, min) { 538 s.min = min 539 } 540 if s.less(s.max, max) { 541 s.max = max 542 } 543 } 544} 545 546// EncodeMin returns the encoded min value with plain encoding. 547// 548// ByteArray stats do not include the length in the encoding. 549func (s *Int64Statistics) EncodeMin() []byte { 550 if s.HasMinMax() { 551 return s.plainEncode(s.min) 552 } 553 return nil 554} 555 556// EncodeMax returns the current encoded max value with plain encoding 557// 558// ByteArray stats do not include the length in the encoding 559func (s *Int64Statistics) EncodeMax() []byte { 560 if s.HasMinMax() { 561 return s.plainEncode(s.max) 562 } 563 return nil 564} 565 566// Encode returns a populated EncodedStatistics object 567func (s *Int64Statistics) Encode() (enc EncodedStatistics, err error) { 568 defer func() { 569 if r := recover(); r != nil { 570 switch r := r.(type) { 571 case error: 572 err = r 573 case string: 574 err = xerrors.New(r) 575 default: 576 err = xerrors.Errorf("unknown error type thrown from panic: %v", r) 577 } 578 } 579 }() 580 if s.HasMinMax() { 581 enc.SetMax(s.EncodeMax()) 582 enc.SetMin(s.EncodeMin()) 583 } 584 if s.HasNullCount() { 585 enc.SetNullCount(s.NullCount()) 586 } 587 if s.HasDistinctCount() { 588 enc.SetDistinctCount(s.DistinctCount()) 589 } 590 return 591} 592 593type minmaxPairInt96 [2]parquet.Int96 594 595// Int96Statistics is the typed interface for managing stats for a column 596// of Int96 type. 597type Int96Statistics struct { 598 statistics 599 min parquet.Int96 600 max parquet.Int96 601 602 bitSetReader utils.SetBitRunReader 603} 604 605// NewInt96Statistics constructs an appropriate stat object type using the 606// given column descriptor and allocator. 607// 608// Panics if the physical type of descr is not parquet.Type.Int96 609func NewInt96Statistics(descr *schema.Column, mem memory.Allocator) *Int96Statistics { 610 if descr.PhysicalType() != parquet.Types.Int96 { 611 panic(xerrors.Errorf("parquet: invalid type %s for constructing a Int96 stat object", descr.PhysicalType())) 612 } 613 614 return &Int96Statistics{ 615 statistics: statistics{ 616 descr: descr, 617 hasNullCount: true, 618 hasDistinctCount: true, 619 order: descr.SortOrder(), 620 encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem), 621 mem: mem, 622 }, 623 } 624} 625 626// NewInt96StatisticsFromEncoded will construct a propertly typed statistics object 627// initializing it with the provided information. 628func NewInt96StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Int96Statistics { 629 ret := NewInt96Statistics(descr, mem) 630 ret.nvalues += nvalues 631 if encoded.IsSetNullCount() { 632 ret.incNulls(encoded.GetNullCount()) 633 } 634 if encoded.IsSetDistinctCount() { 635 ret.incDistinct(encoded.GetDistinctCount()) 636 } 637 638 encodedMin := encoded.GetMin() 639 if encodedMin != nil && len(encodedMin) > 0 { 640 ret.min = ret.plainDecode(encodedMin) 641 } 642 encodedMax := encoded.GetMax() 643 if encodedMax != nil && len(encodedMax) > 0 { 644 ret.max = ret.plainDecode(encodedMax) 645 } 646 ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin() 647 return ret 648} 649 650func (s *Int96Statistics) plainEncode(src parquet.Int96) []byte { 651 s.encoder.(encoding.Int96Encoder).Put([]parquet.Int96{src}) 652 buf, err := s.encoder.FlushValues() 653 if err != nil { 654 panic(err) // recovered by Encode 655 } 656 defer buf.Release() 657 658 out := make([]byte, buf.Len()) 659 copy(out, buf.Bytes()) 660 return out 661} 662 663func (s *Int96Statistics) plainDecode(src []byte) parquet.Int96 { 664 var buf [1]parquet.Int96 665 666 decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem) 667 decoder.SetData(1, src) 668 decoder.(encoding.Int96Decoder).Decode(buf[:]) 669 return buf[0] 670} 671 672func (s *Int96Statistics) minval(a, b parquet.Int96) parquet.Int96 { 673 if s.less(a, b) { 674 return a 675 } 676 return b 677} 678 679func (s *Int96Statistics) maxval(a, b parquet.Int96) parquet.Int96 { 680 if s.less(a, b) { 681 return b 682 } 683 return a 684} 685 686// MinMaxEqual returns true if both stat objects have the same Min and Max values 687func (s *Int96Statistics) MinMaxEqual(rhs *Int96Statistics) bool { 688 return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max) 689} 690 691// Equals returns true only if both objects are the same type, have the same min and 692// max values, null count, distinct count and number of values. 693func (s *Int96Statistics) Equals(other TypedStatistics) bool { 694 if s.Type() != other.Type() { 695 return false 696 } 697 rhs, ok := other.(*Int96Statistics) 698 if !ok { 699 return false 700 } 701 702 if s.HasMinMax() != rhs.HasMinMax() { 703 return false 704 } 705 return (s.hasMinMax && s.MinMaxEqual(rhs)) && 706 s.NullCount() == rhs.NullCount() && 707 s.DistinctCount() == rhs.DistinctCount() && 708 s.NumValues() == rhs.NumValues() 709} 710 711func (s *Int96Statistics) getMinMax(values []parquet.Int96) (min, max parquet.Int96) { 712 defMin := s.defaultMin() 713 defMax := s.defaultMax() 714 715 min = defMin 716 max = defMax 717 718 for _, v := range values { 719 min = s.minval(min, v) 720 max = s.maxval(max, v) 721 } 722 return 723} 724 725func (s *Int96Statistics) getMinMaxSpaced(values []parquet.Int96, validBits []byte, validBitsOffset int64) (min, max parquet.Int96) { 726 min = s.defaultMin() 727 max = s.defaultMax() 728 729 if s.bitSetReader == nil { 730 s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values))) 731 } else { 732 s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values))) 733 } 734 735 for { 736 run := s.bitSetReader.NextRun() 737 if run.Length == 0 { 738 break 739 } 740 for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] { 741 min = s.minval(min, v) 742 max = s.maxval(max, v) 743 } 744 } 745 return 746} 747 748func (s *Int96Statistics) Min() parquet.Int96 { return s.min } 749func (s *Int96Statistics) Max() parquet.Int96 { return s.max } 750 751// Merge merges the stats from other into this stat object, updating 752// the null count, distinct count, number of values and the min/max if 753// appropriate. 754func (s *Int96Statistics) Merge(other TypedStatistics) { 755 rhs, ok := other.(*Int96Statistics) 756 if !ok { 757 panic("incompatible stat type merge") 758 } 759 760 s.statistics.merge(rhs) 761 if rhs.HasMinMax() { 762 s.SetMinMax(rhs.Min(), rhs.Max()) 763 } 764} 765 766// Update is used to add more values to the current stat object, finding the 767// min and max values etc. 768func (s *Int96Statistics) Update(values []parquet.Int96, numNull int64) { 769 s.incNulls(numNull) 770 s.nvalues += int64(len(values)) 771 772 if len(values) == 0 { 773 return 774 } 775 776 s.SetMinMax(s.getMinMax(values)) 777} 778 779// UpdateSpaced is just like Update, but for spaced values using validBits to determine 780// and skip null values. 781func (s *Int96Statistics) UpdateSpaced(values []parquet.Int96, validBits []byte, validBitsOffset, numNull int64) { 782 s.incNulls(numNull) 783 notnull := int64(len(values)) - numNull 784 s.nvalues += notnull 785 786 if notnull == 0 { 787 return 788 } 789 790 s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset)) 791} 792 793// SetMinMax updates the min and max values only if they are not currently set 794// or if argMin is less than the current min / argMax is greater than the current max 795func (s *Int96Statistics) SetMinMax(argMin, argMax parquet.Int96) { 796 maybeMinMax := s.cleanStat([2]parquet.Int96{argMin, argMax}) 797 if maybeMinMax == nil { 798 return 799 } 800 801 min := (*maybeMinMax)[0] 802 max := (*maybeMinMax)[1] 803 804 if !s.hasMinMax { 805 s.hasMinMax = true 806 s.min = min 807 s.max = max 808 } else { 809 if !s.less(s.min, min) { 810 s.min = min 811 } 812 if s.less(s.max, max) { 813 s.max = max 814 } 815 } 816} 817 818// EncodeMin returns the encoded min value with plain encoding. 819// 820// ByteArray stats do not include the length in the encoding. 821func (s *Int96Statistics) EncodeMin() []byte { 822 if s.HasMinMax() { 823 return s.plainEncode(s.min) 824 } 825 return nil 826} 827 828// EncodeMax returns the current encoded max value with plain encoding 829// 830// ByteArray stats do not include the length in the encoding 831func (s *Int96Statistics) EncodeMax() []byte { 832 if s.HasMinMax() { 833 return s.plainEncode(s.max) 834 } 835 return nil 836} 837 838// Encode returns a populated EncodedStatistics object 839func (s *Int96Statistics) Encode() (enc EncodedStatistics, err error) { 840 defer func() { 841 if r := recover(); r != nil { 842 switch r := r.(type) { 843 case error: 844 err = r 845 case string: 846 err = xerrors.New(r) 847 default: 848 err = xerrors.Errorf("unknown error type thrown from panic: %v", r) 849 } 850 } 851 }() 852 if s.HasMinMax() { 853 enc.SetMax(s.EncodeMax()) 854 enc.SetMin(s.EncodeMin()) 855 } 856 if s.HasNullCount() { 857 enc.SetNullCount(s.NullCount()) 858 } 859 if s.HasDistinctCount() { 860 enc.SetDistinctCount(s.DistinctCount()) 861 } 862 return 863} 864 865type minmaxPairFloat32 [2]float32 866 867// Float32Statistics is the typed interface for managing stats for a column 868// of Float32 type. 869type Float32Statistics struct { 870 statistics 871 min float32 872 max float32 873 874 bitSetReader utils.SetBitRunReader 875} 876 877// NewFloat32Statistics constructs an appropriate stat object type using the 878// given column descriptor and allocator. 879// 880// Panics if the physical type of descr is not parquet.Type.Float 881func NewFloat32Statistics(descr *schema.Column, mem memory.Allocator) *Float32Statistics { 882 if descr.PhysicalType() != parquet.Types.Float { 883 panic(xerrors.Errorf("parquet: invalid type %s for constructing a Float32 stat object", descr.PhysicalType())) 884 } 885 886 return &Float32Statistics{ 887 statistics: statistics{ 888 descr: descr, 889 hasNullCount: true, 890 hasDistinctCount: true, 891 order: descr.SortOrder(), 892 encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem), 893 mem: mem, 894 }, 895 } 896} 897 898// NewFloat32StatisticsFromEncoded will construct a propertly typed statistics object 899// initializing it with the provided information. 900func NewFloat32StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Float32Statistics { 901 ret := NewFloat32Statistics(descr, mem) 902 ret.nvalues += nvalues 903 if encoded.IsSetNullCount() { 904 ret.incNulls(encoded.GetNullCount()) 905 } 906 if encoded.IsSetDistinctCount() { 907 ret.incDistinct(encoded.GetDistinctCount()) 908 } 909 910 encodedMin := encoded.GetMin() 911 if encodedMin != nil && len(encodedMin) > 0 { 912 ret.min = ret.plainDecode(encodedMin) 913 } 914 encodedMax := encoded.GetMax() 915 if encodedMax != nil && len(encodedMax) > 0 { 916 ret.max = ret.plainDecode(encodedMax) 917 } 918 ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin() 919 return ret 920} 921 922func (s *Float32Statistics) plainEncode(src float32) []byte { 923 s.encoder.(encoding.Float32Encoder).Put([]float32{src}) 924 buf, err := s.encoder.FlushValues() 925 if err != nil { 926 panic(err) // recovered by Encode 927 } 928 defer buf.Release() 929 930 out := make([]byte, buf.Len()) 931 copy(out, buf.Bytes()) 932 return out 933} 934 935func (s *Float32Statistics) plainDecode(src []byte) float32 { 936 var buf [1]float32 937 938 decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem) 939 decoder.SetData(1, src) 940 decoder.(encoding.Float32Decoder).Decode(buf[:]) 941 return buf[0] 942} 943 944func (s *Float32Statistics) minval(a, b float32) float32 { 945 if s.less(a, b) { 946 return a 947 } 948 return b 949} 950 951func (s *Float32Statistics) maxval(a, b float32) float32 { 952 if s.less(a, b) { 953 return b 954 } 955 return a 956} 957 958// MinMaxEqual returns true if both stat objects have the same Min and Max values 959func (s *Float32Statistics) MinMaxEqual(rhs *Float32Statistics) bool { 960 return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max) 961} 962 963// Equals returns true only if both objects are the same type, have the same min and 964// max values, null count, distinct count and number of values. 965func (s *Float32Statistics) Equals(other TypedStatistics) bool { 966 if s.Type() != other.Type() { 967 return false 968 } 969 rhs, ok := other.(*Float32Statistics) 970 if !ok { 971 return false 972 } 973 974 if s.HasMinMax() != rhs.HasMinMax() { 975 return false 976 } 977 return (s.hasMinMax && s.MinMaxEqual(rhs)) && 978 s.NullCount() == rhs.NullCount() && 979 s.DistinctCount() == rhs.DistinctCount() && 980 s.NumValues() == rhs.NumValues() 981} 982 983func (s *Float32Statistics) coalesce(val, fallback float32) float32 { 984 if math.IsNaN(float64(val)) { 985 return fallback 986 } 987 return val 988} 989 990func (s *Float32Statistics) getMinMax(values []float32) (min, max float32) { 991 defMin := s.defaultMin() 992 defMax := s.defaultMax() 993 994 min = defMin 995 max = defMax 996 997 for _, v := range values { 998 min = s.minval(min, s.coalesce(v, defMin)) 999 max = s.maxval(max, s.coalesce(v, defMax)) 1000 } 1001 return 1002} 1003 1004func (s *Float32Statistics) getMinMaxSpaced(values []float32, validBits []byte, validBitsOffset int64) (min, max float32) { 1005 min = s.defaultMin() 1006 max = s.defaultMax() 1007 1008 if s.bitSetReader == nil { 1009 s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values))) 1010 } else { 1011 s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values))) 1012 } 1013 1014 for { 1015 run := s.bitSetReader.NextRun() 1016 if run.Length == 0 { 1017 break 1018 } 1019 for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] { 1020 min = s.minval(min, coalesce(v, s.defaultMin()).(float32)) 1021 max = s.maxval(max, coalesce(v, s.defaultMax()).(float32)) 1022 } 1023 } 1024 return 1025} 1026 1027func (s *Float32Statistics) Min() float32 { return s.min } 1028func (s *Float32Statistics) Max() float32 { return s.max } 1029 1030// Merge merges the stats from other into this stat object, updating 1031// the null count, distinct count, number of values and the min/max if 1032// appropriate. 1033func (s *Float32Statistics) Merge(other TypedStatistics) { 1034 rhs, ok := other.(*Float32Statistics) 1035 if !ok { 1036 panic("incompatible stat type merge") 1037 } 1038 1039 s.statistics.merge(rhs) 1040 if rhs.HasMinMax() { 1041 s.SetMinMax(rhs.Min(), rhs.Max()) 1042 } 1043} 1044 1045// Update is used to add more values to the current stat object, finding the 1046// min and max values etc. 1047func (s *Float32Statistics) Update(values []float32, numNull int64) { 1048 s.incNulls(numNull) 1049 s.nvalues += int64(len(values)) 1050 1051 if len(values) == 0 { 1052 return 1053 } 1054 1055 s.SetMinMax(s.getMinMax(values)) 1056} 1057 1058// UpdateSpaced is just like Update, but for spaced values using validBits to determine 1059// and skip null values. 1060func (s *Float32Statistics) UpdateSpaced(values []float32, validBits []byte, validBitsOffset, numNull int64) { 1061 s.incNulls(numNull) 1062 notnull := int64(len(values)) - numNull 1063 s.nvalues += notnull 1064 1065 if notnull == 0 { 1066 return 1067 } 1068 1069 s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset)) 1070} 1071 1072// SetMinMax updates the min and max values only if they are not currently set 1073// or if argMin is less than the current min / argMax is greater than the current max 1074func (s *Float32Statistics) SetMinMax(argMin, argMax float32) { 1075 maybeMinMax := s.cleanStat([2]float32{argMin, argMax}) 1076 if maybeMinMax == nil { 1077 return 1078 } 1079 1080 min := (*maybeMinMax)[0] 1081 max := (*maybeMinMax)[1] 1082 1083 if !s.hasMinMax { 1084 s.hasMinMax = true 1085 s.min = min 1086 s.max = max 1087 } else { 1088 if !s.less(s.min, min) { 1089 s.min = min 1090 } 1091 if s.less(s.max, max) { 1092 s.max = max 1093 } 1094 } 1095} 1096 1097// EncodeMin returns the encoded min value with plain encoding. 1098// 1099// ByteArray stats do not include the length in the encoding. 1100func (s *Float32Statistics) EncodeMin() []byte { 1101 if s.HasMinMax() { 1102 return s.plainEncode(s.min) 1103 } 1104 return nil 1105} 1106 1107// EncodeMax returns the current encoded max value with plain encoding 1108// 1109// ByteArray stats do not include the length in the encoding 1110func (s *Float32Statistics) EncodeMax() []byte { 1111 if s.HasMinMax() { 1112 return s.plainEncode(s.max) 1113 } 1114 return nil 1115} 1116 1117// Encode returns a populated EncodedStatistics object 1118func (s *Float32Statistics) Encode() (enc EncodedStatistics, err error) { 1119 defer func() { 1120 if r := recover(); r != nil { 1121 switch r := r.(type) { 1122 case error: 1123 err = r 1124 case string: 1125 err = xerrors.New(r) 1126 default: 1127 err = xerrors.Errorf("unknown error type thrown from panic: %v", r) 1128 } 1129 } 1130 }() 1131 if s.HasMinMax() { 1132 enc.SetMax(s.EncodeMax()) 1133 enc.SetMin(s.EncodeMin()) 1134 } 1135 if s.HasNullCount() { 1136 enc.SetNullCount(s.NullCount()) 1137 } 1138 if s.HasDistinctCount() { 1139 enc.SetDistinctCount(s.DistinctCount()) 1140 } 1141 return 1142} 1143 1144type minmaxPairFloat64 [2]float64 1145 1146// Float64Statistics is the typed interface for managing stats for a column 1147// of Float64 type. 1148type Float64Statistics struct { 1149 statistics 1150 min float64 1151 max float64 1152 1153 bitSetReader utils.SetBitRunReader 1154} 1155 1156// NewFloat64Statistics constructs an appropriate stat object type using the 1157// given column descriptor and allocator. 1158// 1159// Panics if the physical type of descr is not parquet.Type.Double 1160func NewFloat64Statistics(descr *schema.Column, mem memory.Allocator) *Float64Statistics { 1161 if descr.PhysicalType() != parquet.Types.Double { 1162 panic(xerrors.Errorf("parquet: invalid type %s for constructing a Float64 stat object", descr.PhysicalType())) 1163 } 1164 1165 return &Float64Statistics{ 1166 statistics: statistics{ 1167 descr: descr, 1168 hasNullCount: true, 1169 hasDistinctCount: true, 1170 order: descr.SortOrder(), 1171 encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem), 1172 mem: mem, 1173 }, 1174 } 1175} 1176 1177// NewFloat64StatisticsFromEncoded will construct a propertly typed statistics object 1178// initializing it with the provided information. 1179func NewFloat64StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Float64Statistics { 1180 ret := NewFloat64Statistics(descr, mem) 1181 ret.nvalues += nvalues 1182 if encoded.IsSetNullCount() { 1183 ret.incNulls(encoded.GetNullCount()) 1184 } 1185 if encoded.IsSetDistinctCount() { 1186 ret.incDistinct(encoded.GetDistinctCount()) 1187 } 1188 1189 encodedMin := encoded.GetMin() 1190 if encodedMin != nil && len(encodedMin) > 0 { 1191 ret.min = ret.plainDecode(encodedMin) 1192 } 1193 encodedMax := encoded.GetMax() 1194 if encodedMax != nil && len(encodedMax) > 0 { 1195 ret.max = ret.plainDecode(encodedMax) 1196 } 1197 ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin() 1198 return ret 1199} 1200 1201func (s *Float64Statistics) plainEncode(src float64) []byte { 1202 s.encoder.(encoding.Float64Encoder).Put([]float64{src}) 1203 buf, err := s.encoder.FlushValues() 1204 if err != nil { 1205 panic(err) // recovered by Encode 1206 } 1207 defer buf.Release() 1208 1209 out := make([]byte, buf.Len()) 1210 copy(out, buf.Bytes()) 1211 return out 1212} 1213 1214func (s *Float64Statistics) plainDecode(src []byte) float64 { 1215 var buf [1]float64 1216 1217 decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem) 1218 decoder.SetData(1, src) 1219 decoder.(encoding.Float64Decoder).Decode(buf[:]) 1220 return buf[0] 1221} 1222 1223func (s *Float64Statistics) minval(a, b float64) float64 { 1224 if s.less(a, b) { 1225 return a 1226 } 1227 return b 1228} 1229 1230func (s *Float64Statistics) maxval(a, b float64) float64 { 1231 if s.less(a, b) { 1232 return b 1233 } 1234 return a 1235} 1236 1237// MinMaxEqual returns true if both stat objects have the same Min and Max values 1238func (s *Float64Statistics) MinMaxEqual(rhs *Float64Statistics) bool { 1239 return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max) 1240} 1241 1242// Equals returns true only if both objects are the same type, have the same min and 1243// max values, null count, distinct count and number of values. 1244func (s *Float64Statistics) Equals(other TypedStatistics) bool { 1245 if s.Type() != other.Type() { 1246 return false 1247 } 1248 rhs, ok := other.(*Float64Statistics) 1249 if !ok { 1250 return false 1251 } 1252 1253 if s.HasMinMax() != rhs.HasMinMax() { 1254 return false 1255 } 1256 return (s.hasMinMax && s.MinMaxEqual(rhs)) && 1257 s.NullCount() == rhs.NullCount() && 1258 s.DistinctCount() == rhs.DistinctCount() && 1259 s.NumValues() == rhs.NumValues() 1260} 1261 1262func (s *Float64Statistics) coalesce(val, fallback float64) float64 { 1263 if math.IsNaN(float64(val)) { 1264 return fallback 1265 } 1266 return val 1267} 1268 1269func (s *Float64Statistics) getMinMax(values []float64) (min, max float64) { 1270 defMin := s.defaultMin() 1271 defMax := s.defaultMax() 1272 1273 min = defMin 1274 max = defMax 1275 1276 for _, v := range values { 1277 min = s.minval(min, s.coalesce(v, defMin)) 1278 max = s.maxval(max, s.coalesce(v, defMax)) 1279 } 1280 return 1281} 1282 1283func (s *Float64Statistics) getMinMaxSpaced(values []float64, validBits []byte, validBitsOffset int64) (min, max float64) { 1284 min = s.defaultMin() 1285 max = s.defaultMax() 1286 1287 if s.bitSetReader == nil { 1288 s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values))) 1289 } else { 1290 s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values))) 1291 } 1292 1293 for { 1294 run := s.bitSetReader.NextRun() 1295 if run.Length == 0 { 1296 break 1297 } 1298 for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] { 1299 min = s.minval(min, coalesce(v, s.defaultMin()).(float64)) 1300 max = s.maxval(max, coalesce(v, s.defaultMax()).(float64)) 1301 } 1302 } 1303 return 1304} 1305 1306func (s *Float64Statistics) Min() float64 { return s.min } 1307func (s *Float64Statistics) Max() float64 { return s.max } 1308 1309// Merge merges the stats from other into this stat object, updating 1310// the null count, distinct count, number of values and the min/max if 1311// appropriate. 1312func (s *Float64Statistics) Merge(other TypedStatistics) { 1313 rhs, ok := other.(*Float64Statistics) 1314 if !ok { 1315 panic("incompatible stat type merge") 1316 } 1317 1318 s.statistics.merge(rhs) 1319 if rhs.HasMinMax() { 1320 s.SetMinMax(rhs.Min(), rhs.Max()) 1321 } 1322} 1323 1324// Update is used to add more values to the current stat object, finding the 1325// min and max values etc. 1326func (s *Float64Statistics) Update(values []float64, numNull int64) { 1327 s.incNulls(numNull) 1328 s.nvalues += int64(len(values)) 1329 1330 if len(values) == 0 { 1331 return 1332 } 1333 1334 s.SetMinMax(s.getMinMax(values)) 1335} 1336 1337// UpdateSpaced is just like Update, but for spaced values using validBits to determine 1338// and skip null values. 1339func (s *Float64Statistics) UpdateSpaced(values []float64, validBits []byte, validBitsOffset, numNull int64) { 1340 s.incNulls(numNull) 1341 notnull := int64(len(values)) - numNull 1342 s.nvalues += notnull 1343 1344 if notnull == 0 { 1345 return 1346 } 1347 1348 s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset)) 1349} 1350 1351// SetMinMax updates the min and max values only if they are not currently set 1352// or if argMin is less than the current min / argMax is greater than the current max 1353func (s *Float64Statistics) SetMinMax(argMin, argMax float64) { 1354 maybeMinMax := s.cleanStat([2]float64{argMin, argMax}) 1355 if maybeMinMax == nil { 1356 return 1357 } 1358 1359 min := (*maybeMinMax)[0] 1360 max := (*maybeMinMax)[1] 1361 1362 if !s.hasMinMax { 1363 s.hasMinMax = true 1364 s.min = min 1365 s.max = max 1366 } else { 1367 if !s.less(s.min, min) { 1368 s.min = min 1369 } 1370 if s.less(s.max, max) { 1371 s.max = max 1372 } 1373 } 1374} 1375 1376// EncodeMin returns the encoded min value with plain encoding. 1377// 1378// ByteArray stats do not include the length in the encoding. 1379func (s *Float64Statistics) EncodeMin() []byte { 1380 if s.HasMinMax() { 1381 return s.plainEncode(s.min) 1382 } 1383 return nil 1384} 1385 1386// EncodeMax returns the current encoded max value with plain encoding 1387// 1388// ByteArray stats do not include the length in the encoding 1389func (s *Float64Statistics) EncodeMax() []byte { 1390 if s.HasMinMax() { 1391 return s.plainEncode(s.max) 1392 } 1393 return nil 1394} 1395 1396// Encode returns a populated EncodedStatistics object 1397func (s *Float64Statistics) Encode() (enc EncodedStatistics, err error) { 1398 defer func() { 1399 if r := recover(); r != nil { 1400 switch r := r.(type) { 1401 case error: 1402 err = r 1403 case string: 1404 err = xerrors.New(r) 1405 default: 1406 err = xerrors.Errorf("unknown error type thrown from panic: %v", r) 1407 } 1408 } 1409 }() 1410 if s.HasMinMax() { 1411 enc.SetMax(s.EncodeMax()) 1412 enc.SetMin(s.EncodeMin()) 1413 } 1414 if s.HasNullCount() { 1415 enc.SetNullCount(s.NullCount()) 1416 } 1417 if s.HasDistinctCount() { 1418 enc.SetDistinctCount(s.DistinctCount()) 1419 } 1420 return 1421} 1422 1423type minmaxPairBoolean [2]bool 1424 1425// BooleanStatistics is the typed interface for managing stats for a column 1426// of Boolean type. 1427type BooleanStatistics struct { 1428 statistics 1429 min bool 1430 max bool 1431 1432 bitSetReader utils.SetBitRunReader 1433} 1434 1435// NewBooleanStatistics constructs an appropriate stat object type using the 1436// given column descriptor and allocator. 1437// 1438// Panics if the physical type of descr is not parquet.Type.Boolean 1439func NewBooleanStatistics(descr *schema.Column, mem memory.Allocator) *BooleanStatistics { 1440 if descr.PhysicalType() != parquet.Types.Boolean { 1441 panic(xerrors.Errorf("parquet: invalid type %s for constructing a Boolean stat object", descr.PhysicalType())) 1442 } 1443 1444 return &BooleanStatistics{ 1445 statistics: statistics{ 1446 descr: descr, 1447 hasNullCount: true, 1448 hasDistinctCount: true, 1449 order: descr.SortOrder(), 1450 encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem), 1451 mem: mem, 1452 }, 1453 } 1454} 1455 1456// NewBooleanStatisticsFromEncoded will construct a propertly typed statistics object 1457// initializing it with the provided information. 1458func NewBooleanStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *BooleanStatistics { 1459 ret := NewBooleanStatistics(descr, mem) 1460 ret.nvalues += nvalues 1461 if encoded.IsSetNullCount() { 1462 ret.incNulls(encoded.GetNullCount()) 1463 } 1464 if encoded.IsSetDistinctCount() { 1465 ret.incDistinct(encoded.GetDistinctCount()) 1466 } 1467 1468 encodedMin := encoded.GetMin() 1469 if encodedMin != nil && len(encodedMin) > 0 { 1470 ret.min = ret.plainDecode(encodedMin) 1471 } 1472 encodedMax := encoded.GetMax() 1473 if encodedMax != nil && len(encodedMax) > 0 { 1474 ret.max = ret.plainDecode(encodedMax) 1475 } 1476 ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin() 1477 return ret 1478} 1479 1480func (s *BooleanStatistics) plainEncode(src bool) []byte { 1481 s.encoder.(encoding.BooleanEncoder).Put([]bool{src}) 1482 buf, err := s.encoder.FlushValues() 1483 if err != nil { 1484 panic(err) // recovered by Encode 1485 } 1486 defer buf.Release() 1487 1488 out := make([]byte, buf.Len()) 1489 copy(out, buf.Bytes()) 1490 return out 1491} 1492 1493func (s *BooleanStatistics) plainDecode(src []byte) bool { 1494 var buf [1]bool 1495 1496 decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem) 1497 decoder.SetData(1, src) 1498 decoder.(encoding.BooleanDecoder).Decode(buf[:]) 1499 return buf[0] 1500} 1501 1502func (s *BooleanStatistics) minval(a, b bool) bool { 1503 if s.less(a, b) { 1504 return a 1505 } 1506 return b 1507} 1508 1509func (s *BooleanStatistics) maxval(a, b bool) bool { 1510 if s.less(a, b) { 1511 return b 1512 } 1513 return a 1514} 1515 1516// MinMaxEqual returns true if both stat objects have the same Min and Max values 1517func (s *BooleanStatistics) MinMaxEqual(rhs *BooleanStatistics) bool { 1518 return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max) 1519} 1520 1521// Equals returns true only if both objects are the same type, have the same min and 1522// max values, null count, distinct count and number of values. 1523func (s *BooleanStatistics) Equals(other TypedStatistics) bool { 1524 if s.Type() != other.Type() { 1525 return false 1526 } 1527 rhs, ok := other.(*BooleanStatistics) 1528 if !ok { 1529 return false 1530 } 1531 1532 if s.HasMinMax() != rhs.HasMinMax() { 1533 return false 1534 } 1535 return (s.hasMinMax && s.MinMaxEqual(rhs)) && 1536 s.NullCount() == rhs.NullCount() && 1537 s.DistinctCount() == rhs.DistinctCount() && 1538 s.NumValues() == rhs.NumValues() 1539} 1540 1541func (s *BooleanStatistics) getMinMax(values []bool) (min, max bool) { 1542 defMin := s.defaultMin() 1543 defMax := s.defaultMax() 1544 1545 min = defMin 1546 max = defMax 1547 1548 for _, v := range values { 1549 min = s.minval(min, v) 1550 max = s.maxval(max, v) 1551 } 1552 return 1553} 1554 1555func (s *BooleanStatistics) getMinMaxSpaced(values []bool, validBits []byte, validBitsOffset int64) (min, max bool) { 1556 min = s.defaultMin() 1557 max = s.defaultMax() 1558 1559 if s.bitSetReader == nil { 1560 s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values))) 1561 } else { 1562 s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values))) 1563 } 1564 1565 for { 1566 run := s.bitSetReader.NextRun() 1567 if run.Length == 0 { 1568 break 1569 } 1570 for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] { 1571 min = s.minval(min, v) 1572 max = s.maxval(max, v) 1573 } 1574 } 1575 return 1576} 1577 1578func (s *BooleanStatistics) Min() bool { return s.min } 1579func (s *BooleanStatistics) Max() bool { return s.max } 1580 1581// Merge merges the stats from other into this stat object, updating 1582// the null count, distinct count, number of values and the min/max if 1583// appropriate. 1584func (s *BooleanStatistics) Merge(other TypedStatistics) { 1585 rhs, ok := other.(*BooleanStatistics) 1586 if !ok { 1587 panic("incompatible stat type merge") 1588 } 1589 1590 s.statistics.merge(rhs) 1591 if rhs.HasMinMax() { 1592 s.SetMinMax(rhs.Min(), rhs.Max()) 1593 } 1594} 1595 1596// Update is used to add more values to the current stat object, finding the 1597// min and max values etc. 1598func (s *BooleanStatistics) Update(values []bool, numNull int64) { 1599 s.incNulls(numNull) 1600 s.nvalues += int64(len(values)) 1601 1602 if len(values) == 0 { 1603 return 1604 } 1605 1606 s.SetMinMax(s.getMinMax(values)) 1607} 1608 1609// UpdateSpaced is just like Update, but for spaced values using validBits to determine 1610// and skip null values. 1611func (s *BooleanStatistics) UpdateSpaced(values []bool, validBits []byte, validBitsOffset, numNull int64) { 1612 s.incNulls(numNull) 1613 notnull := int64(len(values)) - numNull 1614 s.nvalues += notnull 1615 1616 if notnull == 0 { 1617 return 1618 } 1619 1620 s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset)) 1621} 1622 1623// SetMinMax updates the min and max values only if they are not currently set 1624// or if argMin is less than the current min / argMax is greater than the current max 1625func (s *BooleanStatistics) SetMinMax(argMin, argMax bool) { 1626 maybeMinMax := s.cleanStat([2]bool{argMin, argMax}) 1627 if maybeMinMax == nil { 1628 return 1629 } 1630 1631 min := (*maybeMinMax)[0] 1632 max := (*maybeMinMax)[1] 1633 1634 if !s.hasMinMax { 1635 s.hasMinMax = true 1636 s.min = min 1637 s.max = max 1638 } else { 1639 if !s.less(s.min, min) { 1640 s.min = min 1641 } 1642 if s.less(s.max, max) { 1643 s.max = max 1644 } 1645 } 1646} 1647 1648// EncodeMin returns the encoded min value with plain encoding. 1649// 1650// ByteArray stats do not include the length in the encoding. 1651func (s *BooleanStatistics) EncodeMin() []byte { 1652 if s.HasMinMax() { 1653 return s.plainEncode(s.min) 1654 } 1655 return nil 1656} 1657 1658// EncodeMax returns the current encoded max value with plain encoding 1659// 1660// ByteArray stats do not include the length in the encoding 1661func (s *BooleanStatistics) EncodeMax() []byte { 1662 if s.HasMinMax() { 1663 return s.plainEncode(s.max) 1664 } 1665 return nil 1666} 1667 1668// Encode returns a populated EncodedStatistics object 1669func (s *BooleanStatistics) Encode() (enc EncodedStatistics, err error) { 1670 defer func() { 1671 if r := recover(); r != nil { 1672 switch r := r.(type) { 1673 case error: 1674 err = r 1675 case string: 1676 err = xerrors.New(r) 1677 default: 1678 err = xerrors.Errorf("unknown error type thrown from panic: %v", r) 1679 } 1680 } 1681 }() 1682 if s.HasMinMax() { 1683 enc.SetMax(s.EncodeMax()) 1684 enc.SetMin(s.EncodeMin()) 1685 } 1686 if s.HasNullCount() { 1687 enc.SetNullCount(s.NullCount()) 1688 } 1689 if s.HasDistinctCount() { 1690 enc.SetDistinctCount(s.DistinctCount()) 1691 } 1692 return 1693} 1694 1695type minmaxPairByteArray [2]parquet.ByteArray 1696 1697// ByteArrayStatistics is the typed interface for managing stats for a column 1698// of ByteArray type. 1699type ByteArrayStatistics struct { 1700 statistics 1701 min parquet.ByteArray 1702 max parquet.ByteArray 1703 1704 bitSetReader utils.SetBitRunReader 1705} 1706 1707// NewByteArrayStatistics constructs an appropriate stat object type using the 1708// given column descriptor and allocator. 1709// 1710// Panics if the physical type of descr is not parquet.Type.ByteArray 1711func NewByteArrayStatistics(descr *schema.Column, mem memory.Allocator) *ByteArrayStatistics { 1712 if descr.PhysicalType() != parquet.Types.ByteArray { 1713 panic(xerrors.Errorf("parquet: invalid type %s for constructing a ByteArray stat object", descr.PhysicalType())) 1714 } 1715 1716 return &ByteArrayStatistics{ 1717 statistics: statistics{ 1718 descr: descr, 1719 hasNullCount: true, 1720 hasDistinctCount: true, 1721 order: descr.SortOrder(), 1722 encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem), 1723 mem: mem, 1724 }, 1725 1726 min: make([]byte, 0), 1727 max: make([]byte, 0), 1728 } 1729} 1730 1731// NewByteArrayStatisticsFromEncoded will construct a propertly typed statistics object 1732// initializing it with the provided information. 1733func NewByteArrayStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *ByteArrayStatistics { 1734 ret := NewByteArrayStatistics(descr, mem) 1735 ret.nvalues += nvalues 1736 if encoded.IsSetNullCount() { 1737 ret.incNulls(encoded.GetNullCount()) 1738 } 1739 if encoded.IsSetDistinctCount() { 1740 ret.incDistinct(encoded.GetDistinctCount()) 1741 } 1742 1743 encodedMin := encoded.GetMin() 1744 if encodedMin != nil && len(encodedMin) > 0 { 1745 ret.min = ret.plainDecode(encodedMin) 1746 } 1747 encodedMax := encoded.GetMax() 1748 if encodedMax != nil && len(encodedMax) > 0 { 1749 ret.max = ret.plainDecode(encodedMax) 1750 } 1751 ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin() 1752 return ret 1753} 1754 1755func (s *ByteArrayStatistics) plainEncode(src parquet.ByteArray) []byte { 1756 return src 1757} 1758 1759func (s *ByteArrayStatistics) plainDecode(src []byte) parquet.ByteArray { 1760 return src 1761} 1762 1763func (s *ByteArrayStatistics) minval(a, b parquet.ByteArray) parquet.ByteArray { 1764 switch { 1765 case a == nil: 1766 return b 1767 case b == nil: 1768 return a 1769 case s.less(a, b): 1770 return a 1771 default: 1772 return b 1773 } 1774} 1775 1776func (s *ByteArrayStatistics) maxval(a, b parquet.ByteArray) parquet.ByteArray { 1777 switch { 1778 case a == nil: 1779 return b 1780 case b == nil: 1781 return a 1782 case s.less(a, b): 1783 return b 1784 default: 1785 return a 1786 } 1787} 1788 1789// MinMaxEqual returns true if both stat objects have the same Min and Max values 1790func (s *ByteArrayStatistics) MinMaxEqual(rhs *ByteArrayStatistics) bool { 1791 return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max) 1792} 1793 1794// Equals returns true only if both objects are the same type, have the same min and 1795// max values, null count, distinct count and number of values. 1796func (s *ByteArrayStatistics) Equals(other TypedStatistics) bool { 1797 if s.Type() != other.Type() { 1798 return false 1799 } 1800 rhs, ok := other.(*ByteArrayStatistics) 1801 if !ok { 1802 return false 1803 } 1804 1805 if s.HasMinMax() != rhs.HasMinMax() { 1806 return false 1807 } 1808 return (s.hasMinMax && s.MinMaxEqual(rhs)) && 1809 s.NullCount() == rhs.NullCount() && 1810 s.DistinctCount() == rhs.DistinctCount() && 1811 s.NumValues() == rhs.NumValues() 1812} 1813 1814func (s *ByteArrayStatistics) getMinMax(values []parquet.ByteArray) (min, max parquet.ByteArray) { 1815 defMin := s.defaultMin() 1816 defMax := s.defaultMax() 1817 1818 min = defMin 1819 max = defMax 1820 1821 for _, v := range values { 1822 min = s.minval(min, v) 1823 max = s.maxval(max, v) 1824 } 1825 return 1826} 1827 1828func (s *ByteArrayStatistics) getMinMaxSpaced(values []parquet.ByteArray, validBits []byte, validBitsOffset int64) (min, max parquet.ByteArray) { 1829 min = s.defaultMin() 1830 max = s.defaultMax() 1831 1832 if s.bitSetReader == nil { 1833 s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values))) 1834 } else { 1835 s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values))) 1836 } 1837 1838 for { 1839 run := s.bitSetReader.NextRun() 1840 if run.Length == 0 { 1841 break 1842 } 1843 for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] { 1844 min = s.minval(min, v) 1845 max = s.maxval(max, v) 1846 } 1847 } 1848 return 1849} 1850 1851func (s *ByteArrayStatistics) Min() parquet.ByteArray { return s.min } 1852func (s *ByteArrayStatistics) Max() parquet.ByteArray { return s.max } 1853 1854// Merge merges the stats from other into this stat object, updating 1855// the null count, distinct count, number of values and the min/max if 1856// appropriate. 1857func (s *ByteArrayStatistics) Merge(other TypedStatistics) { 1858 rhs, ok := other.(*ByteArrayStatistics) 1859 if !ok { 1860 panic("incompatible stat type merge") 1861 } 1862 1863 s.statistics.merge(rhs) 1864 if rhs.HasMinMax() { 1865 s.SetMinMax(rhs.Min(), rhs.Max()) 1866 } 1867} 1868 1869// Update is used to add more values to the current stat object, finding the 1870// min and max values etc. 1871func (s *ByteArrayStatistics) Update(values []parquet.ByteArray, numNull int64) { 1872 s.incNulls(numNull) 1873 s.nvalues += int64(len(values)) 1874 1875 if len(values) == 0 { 1876 return 1877 } 1878 1879 s.SetMinMax(s.getMinMax(values)) 1880} 1881 1882// UpdateSpaced is just like Update, but for spaced values using validBits to determine 1883// and skip null values. 1884func (s *ByteArrayStatistics) UpdateSpaced(values []parquet.ByteArray, validBits []byte, validBitsOffset, numNull int64) { 1885 s.incNulls(numNull) 1886 notnull := int64(len(values)) - numNull 1887 s.nvalues += notnull 1888 1889 if notnull == 0 { 1890 return 1891 } 1892 1893 s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset)) 1894} 1895 1896// SetMinMax updates the min and max values only if they are not currently set 1897// or if argMin is less than the current min / argMax is greater than the current max 1898func (s *ByteArrayStatistics) SetMinMax(argMin, argMax parquet.ByteArray) { 1899 maybeMinMax := s.cleanStat([2]parquet.ByteArray{argMin, argMax}) 1900 if maybeMinMax == nil { 1901 return 1902 } 1903 1904 min := (*maybeMinMax)[0] 1905 max := (*maybeMinMax)[1] 1906 1907 if !s.hasMinMax { 1908 s.hasMinMax = true 1909 s.min = min 1910 s.max = max 1911 } else { 1912 if !s.less(s.min, min) { 1913 s.min = min 1914 } 1915 if s.less(s.max, max) { 1916 s.max = max 1917 } 1918 } 1919} 1920 1921// EncodeMin returns the encoded min value with plain encoding. 1922// 1923// ByteArray stats do not include the length in the encoding. 1924func (s *ByteArrayStatistics) EncodeMin() []byte { 1925 if s.HasMinMax() { 1926 return s.plainEncode(s.min) 1927 } 1928 return nil 1929} 1930 1931// EncodeMax returns the current encoded max value with plain encoding 1932// 1933// ByteArray stats do not include the length in the encoding 1934func (s *ByteArrayStatistics) EncodeMax() []byte { 1935 if s.HasMinMax() { 1936 return s.plainEncode(s.max) 1937 } 1938 return nil 1939} 1940 1941// Encode returns a populated EncodedStatistics object 1942func (s *ByteArrayStatistics) Encode() (enc EncodedStatistics, err error) { 1943 defer func() { 1944 if r := recover(); r != nil { 1945 switch r := r.(type) { 1946 case error: 1947 err = r 1948 case string: 1949 err = xerrors.New(r) 1950 default: 1951 err = xerrors.Errorf("unknown error type thrown from panic: %v", r) 1952 } 1953 } 1954 }() 1955 if s.HasMinMax() { 1956 enc.SetMax(s.EncodeMax()) 1957 enc.SetMin(s.EncodeMin()) 1958 } 1959 if s.HasNullCount() { 1960 enc.SetNullCount(s.NullCount()) 1961 } 1962 if s.HasDistinctCount() { 1963 enc.SetDistinctCount(s.DistinctCount()) 1964 } 1965 return 1966} 1967 1968type minmaxPairFixedLenByteArray [2]parquet.FixedLenByteArray 1969 1970// FixedLenByteArrayStatistics is the typed interface for managing stats for a column 1971// of FixedLenByteArray type. 1972type FixedLenByteArrayStatistics struct { 1973 statistics 1974 min parquet.FixedLenByteArray 1975 max parquet.FixedLenByteArray 1976 1977 bitSetReader utils.SetBitRunReader 1978} 1979 1980// NewFixedLenByteArrayStatistics constructs an appropriate stat object type using the 1981// given column descriptor and allocator. 1982// 1983// Panics if the physical type of descr is not parquet.Type.FixedLenByteArray 1984func NewFixedLenByteArrayStatistics(descr *schema.Column, mem memory.Allocator) *FixedLenByteArrayStatistics { 1985 if descr.PhysicalType() != parquet.Types.FixedLenByteArray { 1986 panic(xerrors.Errorf("parquet: invalid type %s for constructing a FixedLenByteArray stat object", descr.PhysicalType())) 1987 } 1988 1989 return &FixedLenByteArrayStatistics{ 1990 statistics: statistics{ 1991 descr: descr, 1992 hasNullCount: true, 1993 hasDistinctCount: true, 1994 order: descr.SortOrder(), 1995 encoder: encoding.NewEncoder(descr.PhysicalType(), parquet.Encodings.Plain, false, descr, mem), 1996 mem: mem, 1997 }, 1998 } 1999} 2000 2001// NewFixedLenByteArrayStatisticsFromEncoded will construct a propertly typed statistics object 2002// initializing it with the provided information. 2003func NewFixedLenByteArrayStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *FixedLenByteArrayStatistics { 2004 ret := NewFixedLenByteArrayStatistics(descr, mem) 2005 ret.nvalues += nvalues 2006 if encoded.IsSetNullCount() { 2007 ret.incNulls(encoded.GetNullCount()) 2008 } 2009 if encoded.IsSetDistinctCount() { 2010 ret.incDistinct(encoded.GetDistinctCount()) 2011 } 2012 2013 encodedMin := encoded.GetMin() 2014 if encodedMin != nil && len(encodedMin) > 0 { 2015 ret.min = ret.plainDecode(encodedMin) 2016 } 2017 encodedMax := encoded.GetMax() 2018 if encodedMax != nil && len(encodedMax) > 0 { 2019 ret.max = ret.plainDecode(encodedMax) 2020 } 2021 ret.hasMinMax = encoded.IsSetMax() || encoded.IsSetMin() 2022 return ret 2023} 2024 2025func (s *FixedLenByteArrayStatistics) plainEncode(src parquet.FixedLenByteArray) []byte { 2026 s.encoder.(encoding.FixedLenByteArrayEncoder).Put([]parquet.FixedLenByteArray{src}) 2027 buf, err := s.encoder.FlushValues() 2028 if err != nil { 2029 panic(err) // recovered by Encode 2030 } 2031 defer buf.Release() 2032 2033 out := make([]byte, buf.Len()) 2034 copy(out, buf.Bytes()) 2035 return out 2036} 2037 2038func (s *FixedLenByteArrayStatistics) plainDecode(src []byte) parquet.FixedLenByteArray { 2039 var buf [1]parquet.FixedLenByteArray 2040 2041 decoder := encoding.NewDecoder(s.descr.PhysicalType(), parquet.Encodings.Plain, s.descr, s.mem) 2042 decoder.SetData(1, src) 2043 decoder.(encoding.FixedLenByteArrayDecoder).Decode(buf[:]) 2044 return buf[0] 2045} 2046 2047func (s *FixedLenByteArrayStatistics) minval(a, b parquet.FixedLenByteArray) parquet.FixedLenByteArray { 2048 switch { 2049 case a == nil: 2050 return b 2051 case b == nil: 2052 return a 2053 case s.less(a, b): 2054 return a 2055 default: 2056 return b 2057 } 2058} 2059 2060func (s *FixedLenByteArrayStatistics) maxval(a, b parquet.FixedLenByteArray) parquet.FixedLenByteArray { 2061 switch { 2062 case a == nil: 2063 return b 2064 case b == nil: 2065 return a 2066 case s.less(a, b): 2067 return b 2068 default: 2069 return a 2070 } 2071} 2072 2073// MinMaxEqual returns true if both stat objects have the same Min and Max values 2074func (s *FixedLenByteArrayStatistics) MinMaxEqual(rhs *FixedLenByteArrayStatistics) bool { 2075 return s.equal(s.min, rhs.min) && s.equal(s.max, rhs.max) 2076} 2077 2078// Equals returns true only if both objects are the same type, have the same min and 2079// max values, null count, distinct count and number of values. 2080func (s *FixedLenByteArrayStatistics) Equals(other TypedStatistics) bool { 2081 if s.Type() != other.Type() { 2082 return false 2083 } 2084 rhs, ok := other.(*FixedLenByteArrayStatistics) 2085 if !ok { 2086 return false 2087 } 2088 2089 if s.HasMinMax() != rhs.HasMinMax() { 2090 return false 2091 } 2092 return (s.hasMinMax && s.MinMaxEqual(rhs)) && 2093 s.NullCount() == rhs.NullCount() && 2094 s.DistinctCount() == rhs.DistinctCount() && 2095 s.NumValues() == rhs.NumValues() 2096} 2097 2098func (s *FixedLenByteArrayStatistics) getMinMax(values []parquet.FixedLenByteArray) (min, max parquet.FixedLenByteArray) { 2099 defMin := s.defaultMin() 2100 defMax := s.defaultMax() 2101 2102 min = defMin 2103 max = defMax 2104 2105 for _, v := range values { 2106 min = s.minval(min, v) 2107 max = s.maxval(max, v) 2108 } 2109 return 2110} 2111 2112func (s *FixedLenByteArrayStatistics) getMinMaxSpaced(values []parquet.FixedLenByteArray, validBits []byte, validBitsOffset int64) (min, max parquet.FixedLenByteArray) { 2113 min = s.defaultMin() 2114 max = s.defaultMax() 2115 2116 if s.bitSetReader == nil { 2117 s.bitSetReader = utils.NewSetBitRunReader(validBits, validBitsOffset, int64(len(values))) 2118 } else { 2119 s.bitSetReader.Reset(validBits, validBitsOffset, int64(len(values))) 2120 } 2121 2122 for { 2123 run := s.bitSetReader.NextRun() 2124 if run.Length == 0 { 2125 break 2126 } 2127 for _, v := range values[int(run.Pos):int(run.Pos+run.Length)] { 2128 min = s.minval(min, v) 2129 max = s.maxval(max, v) 2130 } 2131 } 2132 return 2133} 2134 2135func (s *FixedLenByteArrayStatistics) Min() parquet.FixedLenByteArray { return s.min } 2136func (s *FixedLenByteArrayStatistics) Max() parquet.FixedLenByteArray { return s.max } 2137 2138// Merge merges the stats from other into this stat object, updating 2139// the null count, distinct count, number of values and the min/max if 2140// appropriate. 2141func (s *FixedLenByteArrayStatistics) Merge(other TypedStatistics) { 2142 rhs, ok := other.(*FixedLenByteArrayStatistics) 2143 if !ok { 2144 panic("incompatible stat type merge") 2145 } 2146 2147 s.statistics.merge(rhs) 2148 if rhs.HasMinMax() { 2149 s.SetMinMax(rhs.Min(), rhs.Max()) 2150 } 2151} 2152 2153// Update is used to add more values to the current stat object, finding the 2154// min and max values etc. 2155func (s *FixedLenByteArrayStatistics) Update(values []parquet.FixedLenByteArray, numNull int64) { 2156 s.incNulls(numNull) 2157 s.nvalues += int64(len(values)) 2158 2159 if len(values) == 0 { 2160 return 2161 } 2162 2163 s.SetMinMax(s.getMinMax(values)) 2164} 2165 2166// UpdateSpaced is just like Update, but for spaced values using validBits to determine 2167// and skip null values. 2168func (s *FixedLenByteArrayStatistics) UpdateSpaced(values []parquet.FixedLenByteArray, validBits []byte, validBitsOffset, numNull int64) { 2169 s.incNulls(numNull) 2170 notnull := int64(len(values)) - numNull 2171 s.nvalues += notnull 2172 2173 if notnull == 0 { 2174 return 2175 } 2176 2177 s.SetMinMax(s.getMinMaxSpaced(values, validBits, validBitsOffset)) 2178} 2179 2180// SetMinMax updates the min and max values only if they are not currently set 2181// or if argMin is less than the current min / argMax is greater than the current max 2182func (s *FixedLenByteArrayStatistics) SetMinMax(argMin, argMax parquet.FixedLenByteArray) { 2183 maybeMinMax := s.cleanStat([2]parquet.FixedLenByteArray{argMin, argMax}) 2184 if maybeMinMax == nil { 2185 return 2186 } 2187 2188 min := (*maybeMinMax)[0] 2189 max := (*maybeMinMax)[1] 2190 2191 if !s.hasMinMax { 2192 s.hasMinMax = true 2193 s.min = min 2194 s.max = max 2195 } else { 2196 if !s.less(s.min, min) { 2197 s.min = min 2198 } 2199 if s.less(s.max, max) { 2200 s.max = max 2201 } 2202 } 2203} 2204 2205// EncodeMin returns the encoded min value with plain encoding. 2206// 2207// ByteArray stats do not include the length in the encoding. 2208func (s *FixedLenByteArrayStatistics) EncodeMin() []byte { 2209 if s.HasMinMax() { 2210 return s.plainEncode(s.min) 2211 } 2212 return nil 2213} 2214 2215// EncodeMax returns the current encoded max value with plain encoding 2216// 2217// ByteArray stats do not include the length in the encoding 2218func (s *FixedLenByteArrayStatistics) EncodeMax() []byte { 2219 if s.HasMinMax() { 2220 return s.plainEncode(s.max) 2221 } 2222 return nil 2223} 2224 2225// Encode returns a populated EncodedStatistics object 2226func (s *FixedLenByteArrayStatistics) Encode() (enc EncodedStatistics, err error) { 2227 defer func() { 2228 if r := recover(); r != nil { 2229 switch r := r.(type) { 2230 case error: 2231 err = r 2232 case string: 2233 err = xerrors.New(r) 2234 default: 2235 err = xerrors.Errorf("unknown error type thrown from panic: %v", r) 2236 } 2237 } 2238 }() 2239 if s.HasMinMax() { 2240 enc.SetMax(s.EncodeMax()) 2241 enc.SetMin(s.EncodeMin()) 2242 } 2243 if s.HasNullCount() { 2244 enc.SetNullCount(s.NullCount()) 2245 } 2246 if s.HasDistinctCount() { 2247 enc.SetDistinctCount(s.DistinctCount()) 2248 } 2249 return 2250} 2251 2252// NewStatistics uses the type in the column descriptor to construct the appropriate 2253// typed stats object. If mem is nil, then memory.DefaultAllocator will be used. 2254func NewStatistics(descr *schema.Column, mem memory.Allocator) TypedStatistics { 2255 if mem == nil { 2256 mem = memory.DefaultAllocator 2257 } 2258 switch descr.PhysicalType() { 2259 case parquet.Types.Int32: 2260 return NewInt32Statistics(descr, mem) 2261 case parquet.Types.Int64: 2262 return NewInt64Statistics(descr, mem) 2263 case parquet.Types.Int96: 2264 return NewInt96Statistics(descr, mem) 2265 case parquet.Types.Float: 2266 return NewFloat32Statistics(descr, mem) 2267 case parquet.Types.Double: 2268 return NewFloat64Statistics(descr, mem) 2269 case parquet.Types.Boolean: 2270 return NewBooleanStatistics(descr, mem) 2271 case parquet.Types.ByteArray: 2272 return NewByteArrayStatistics(descr, mem) 2273 case parquet.Types.FixedLenByteArray: 2274 return NewFixedLenByteArrayStatistics(descr, mem) 2275 default: 2276 panic("not implemented") 2277 } 2278} 2279 2280// NewStatisticsFromEncoded uses the provided information to initialize a typed stat object 2281// by checking the type of the provided column descriptor. 2282// 2283// If mem is nil, then memory.DefaultAllocator is used. 2284func NewStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) TypedStatistics { 2285 if mem == nil { 2286 mem = memory.DefaultAllocator 2287 } 2288 switch descr.PhysicalType() { 2289 case parquet.Types.Int32: 2290 return NewInt32StatisticsFromEncoded(descr, mem, nvalues, encoded) 2291 case parquet.Types.Int64: 2292 return NewInt64StatisticsFromEncoded(descr, mem, nvalues, encoded) 2293 case parquet.Types.Int96: 2294 return NewInt96StatisticsFromEncoded(descr, mem, nvalues, encoded) 2295 case parquet.Types.Float: 2296 return NewFloat32StatisticsFromEncoded(descr, mem, nvalues, encoded) 2297 case parquet.Types.Double: 2298 return NewFloat64StatisticsFromEncoded(descr, mem, nvalues, encoded) 2299 case parquet.Types.Boolean: 2300 return NewBooleanStatisticsFromEncoded(descr, mem, nvalues, encoded) 2301 case parquet.Types.ByteArray: 2302 return NewByteArrayStatisticsFromEncoded(descr, mem, nvalues, encoded) 2303 case parquet.Types.FixedLenByteArray: 2304 return NewFixedLenByteArrayStatisticsFromEncoded(descr, mem, nvalues, encoded) 2305 default: 2306 panic("not implemented") 2307 } 2308} 2309