1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package encoding
18
19import (
20	"github.com/apache/arrow/go/v6/arrow/bitutil"
21	"github.com/apache/arrow/go/v6/parquet"
22	"github.com/apache/arrow/go/v6/parquet/internal/utils"
23)
24
25const (
26	boolBufSize = 1024
27	boolsInBuf  = boolBufSize * 8
28)
29
30// PlainBooleanEncoder encodes bools as a bitmap as per the Plain Encoding
31type PlainBooleanEncoder struct {
32	encoder
33	bitsBuffer []byte
34	wr         utils.BitmapWriter
35}
36
37// Type for the PlainBooleanEncoder is parquet.Types.Boolean
38func (PlainBooleanEncoder) Type() parquet.Type {
39	return parquet.Types.Boolean
40}
41
42// Put encodes the contents of in into the underlying data buffer.
43func (enc *PlainBooleanEncoder) Put(in []bool) {
44	if enc.bitsBuffer == nil {
45		enc.bitsBuffer = make([]byte, boolBufSize)
46	}
47	if enc.wr == nil {
48		enc.wr = utils.NewBitmapWriter(enc.bitsBuffer, 0, boolsInBuf)
49	}
50
51	n := enc.wr.AppendBools(in)
52	for n < len(in) {
53		enc.wr.Finish()
54		enc.append(enc.bitsBuffer)
55		enc.wr.Reset(0, boolsInBuf)
56		in = in[n:]
57		n = enc.wr.AppendBools(in)
58	}
59}
60
61// PutSpaced will use the validBits bitmap to determine which values are nulls
62// and can be left out from the slice, and the encoded without those nulls.
63func (enc *PlainBooleanEncoder) PutSpaced(in []bool, validBits []byte, validBitsOffset int64) {
64	bufferOut := make([]bool, len(in))
65	nvalid := spacedCompress(in, bufferOut, validBits, validBitsOffset)
66	enc.Put(bufferOut[:nvalid])
67}
68
69// EstimatedDataEncodedSize returns the current number of bytes that have
70// been buffered so far
71func (enc *PlainBooleanEncoder) EstimatedDataEncodedSize() int64 {
72	return int64(enc.sink.Len() + int(bitutil.BytesForBits(int64(enc.wr.Pos()))))
73}
74
75// FlushValues returns the buffered data, the responsibility is on the caller
76// to release the buffer memory
77func (enc *PlainBooleanEncoder) FlushValues() (Buffer, error) {
78	if enc.wr.Pos() > 0 {
79		toFlush := int(enc.wr.Pos())
80		enc.append(enc.bitsBuffer[:bitutil.BytesForBits(int64(toFlush))])
81	}
82
83	return enc.sink.Finish(), nil
84}
85