1// Licensed to the Apache Software Foundation (ASF) under one 2// or more contributor license agreements. See the NOTICE file 3// distributed with this work for additional information 4// regarding copyright ownership. The ASF licenses this file 5// to you under the Apache License, Version 2.0 (the 6// "License"); you may not use this file except in compliance 7// with the License. You may obtain a copy of the License at 8// 9// http://www.apache.org/licenses/LICENSE-2.0 10// 11// Unless required by applicable law or agreed to in writing, software 12// distributed under the License is distributed on an "AS IS" BASIS, 13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14// See the License for the specific language governing permissions and 15// limitations under the License. 16 17package encoding 18 19import ( 20 "github.com/apache/arrow/go/v6/arrow/bitutil" 21 "github.com/apache/arrow/go/v6/parquet" 22 "github.com/apache/arrow/go/v6/parquet/internal/utils" 23) 24 25const ( 26 boolBufSize = 1024 27 boolsInBuf = boolBufSize * 8 28) 29 30// PlainBooleanEncoder encodes bools as a bitmap as per the Plain Encoding 31type PlainBooleanEncoder struct { 32 encoder 33 bitsBuffer []byte 34 wr utils.BitmapWriter 35} 36 37// Type for the PlainBooleanEncoder is parquet.Types.Boolean 38func (PlainBooleanEncoder) Type() parquet.Type { 39 return parquet.Types.Boolean 40} 41 42// Put encodes the contents of in into the underlying data buffer. 43func (enc *PlainBooleanEncoder) Put(in []bool) { 44 if enc.bitsBuffer == nil { 45 enc.bitsBuffer = make([]byte, boolBufSize) 46 } 47 if enc.wr == nil { 48 enc.wr = utils.NewBitmapWriter(enc.bitsBuffer, 0, boolsInBuf) 49 } 50 51 n := enc.wr.AppendBools(in) 52 for n < len(in) { 53 enc.wr.Finish() 54 enc.append(enc.bitsBuffer) 55 enc.wr.Reset(0, boolsInBuf) 56 in = in[n:] 57 n = enc.wr.AppendBools(in) 58 } 59} 60 61// PutSpaced will use the validBits bitmap to determine which values are nulls 62// and can be left out from the slice, and the encoded without those nulls. 63func (enc *PlainBooleanEncoder) PutSpaced(in []bool, validBits []byte, validBitsOffset int64) { 64 bufferOut := make([]bool, len(in)) 65 nvalid := spacedCompress(in, bufferOut, validBits, validBitsOffset) 66 enc.Put(bufferOut[:nvalid]) 67} 68 69// EstimatedDataEncodedSize returns the current number of bytes that have 70// been buffered so far 71func (enc *PlainBooleanEncoder) EstimatedDataEncodedSize() int64 { 72 return int64(enc.sink.Len() + int(bitutil.BytesForBits(int64(enc.wr.Pos())))) 73} 74 75// FlushValues returns the buffered data, the responsibility is on the caller 76// to release the buffer memory 77func (enc *PlainBooleanEncoder) FlushValues() (Buffer, error) { 78 if enc.wr.Pos() > 0 { 79 toFlush := int(enc.wr.Pos()) 80 enc.append(enc.bitsBuffer[:bitutil.BytesForBits(int64(toFlush))]) 81 } 82 83 return enc.sink.Finish(), nil 84} 85