1// Licensed to the Apache Software Foundation (ASF) under one 2// or more contributor license agreements. See the NOTICE file 3// distributed with this work for additional information 4// regarding copyright ownership. The ASF licenses this file 5// to you under the Apache License, Version 2.0 (the 6// "License"); you may not use this file except in compliance 7// with the License. You may obtain a copy of the License at 8// 9// http://www.apache.org/licenses/LICENSE-2.0 10// 11// Unless required by applicable law or agreed to in writing, software 12// distributed under the License is distributed on an "AS IS" BASIS, 13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14// See the License for the specific language governing permissions and 15// limitations under the License. 16 17package array 18 19import ( 20 "math" 21 "sync/atomic" 22 23 "github.com/apache/arrow/go/v6/arrow" 24 "github.com/apache/arrow/go/v6/arrow/internal/debug" 25 "github.com/apache/arrow/go/v6/arrow/memory" 26) 27 28const ( 29 binaryArrayMaximumCapacity = math.MaxInt32 30) 31 32// A BinaryBuilder is used to build a Binary array using the Append methods. 33type BinaryBuilder struct { 34 builder 35 36 dtype arrow.BinaryDataType 37 offsets *int32BufferBuilder 38 values *byteBufferBuilder 39} 40 41func NewBinaryBuilder(mem memory.Allocator, dtype arrow.BinaryDataType) *BinaryBuilder { 42 b := &BinaryBuilder{ 43 builder: builder{refCount: 1, mem: mem}, 44 dtype: dtype, 45 offsets: newInt32BufferBuilder(mem), 46 values: newByteBufferBuilder(mem), 47 } 48 return b 49} 50 51// Release decreases the reference count by 1. 52// When the reference count goes to zero, the memory is freed. 53// Release may be called simultaneously from multiple goroutines. 54func (b *BinaryBuilder) Release() { 55 debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") 56 57 if atomic.AddInt64(&b.refCount, -1) == 0 { 58 if b.nullBitmap != nil { 59 b.nullBitmap.Release() 60 b.nullBitmap = nil 61 } 62 if b.offsets != nil { 63 b.offsets.Release() 64 b.offsets = nil 65 } 66 if b.values != nil { 67 b.values.Release() 68 b.values = nil 69 } 70 } 71} 72 73func (b *BinaryBuilder) Append(v []byte) { 74 b.Reserve(1) 75 b.appendNextOffset() 76 b.values.Append(v) 77 b.UnsafeAppendBoolToBitmap(true) 78} 79 80func (b *BinaryBuilder) AppendString(v string) { 81 b.Append([]byte(v)) 82} 83 84func (b *BinaryBuilder) AppendNull() { 85 b.Reserve(1) 86 b.appendNextOffset() 87 b.UnsafeAppendBoolToBitmap(false) 88} 89 90// AppendValues will append the values in the v slice. The valid slice determines which values 91// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, 92// all values in v are appended and considered valid. 93func (b *BinaryBuilder) AppendValues(v [][]byte, valid []bool) { 94 if len(v) != len(valid) && len(valid) != 0 { 95 panic("len(v) != len(valid) && len(valid) != 0") 96 } 97 98 if len(v) == 0 { 99 return 100 } 101 102 b.Reserve(len(v)) 103 for _, vv := range v { 104 b.appendNextOffset() 105 b.values.Append(vv) 106 } 107 108 b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) 109} 110 111// AppendStringValues will append the values in the v slice. The valid slice determines which values 112// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, 113// all values in v are appended and considered valid. 114func (b *BinaryBuilder) AppendStringValues(v []string, valid []bool) { 115 if len(v) != len(valid) && len(valid) != 0 { 116 panic("len(v) != len(valid) && len(valid) != 0") 117 } 118 119 if len(v) == 0 { 120 return 121 } 122 123 b.Reserve(len(v)) 124 for _, vv := range v { 125 b.appendNextOffset() 126 b.values.Append([]byte(vv)) 127 } 128 129 b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) 130} 131 132func (b *BinaryBuilder) Value(i int) []byte { 133 offsets := b.offsets.Values() 134 start := int(offsets[i]) 135 var end int 136 if i == (b.length - 1) { 137 end = b.values.Len() 138 } else { 139 end = int(offsets[i+1]) 140 } 141 return b.values.Bytes()[start:end] 142} 143 144func (b *BinaryBuilder) init(capacity int) { 145 b.builder.init(capacity) 146 b.offsets.resize((capacity + 1) * arrow.Int32SizeBytes) 147} 148 149// DataLen returns the number of bytes in the data array. 150func (b *BinaryBuilder) DataLen() int { return b.values.length } 151 152// DataCap returns the total number of bytes that can be stored 153// without allocating additional memory. 154func (b *BinaryBuilder) DataCap() int { return b.values.capacity } 155 156// Reserve ensures there is enough space for appending n elements 157// by checking the capacity and calling Resize if necessary. 158func (b *BinaryBuilder) Reserve(n int) { 159 b.builder.reserve(n, b.Resize) 160} 161 162// ReserveData ensures there is enough space for appending n bytes 163// by checking the capacity and resizing the data buffer if necessary. 164func (b *BinaryBuilder) ReserveData(n int) { 165 if b.values.capacity < b.values.length+n { 166 b.values.resize(b.values.Len() + n) 167 } 168} 169 170// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), 171// additional memory will be allocated. If n is smaller, the allocated memory may be reduced. 172func (b *BinaryBuilder) Resize(n int) { 173 b.offsets.resize((n + 1) * arrow.Int32SizeBytes) 174 b.builder.resize(n, b.init) 175} 176 177// NewArray creates a Binary array from the memory buffers used by the builder and resets the BinaryBuilder 178// so it can be used to build a new array. 179func (b *BinaryBuilder) NewArray() Interface { 180 return b.NewBinaryArray() 181} 182 183// NewBinaryArray creates a Binary array from the memory buffers used by the builder and resets the BinaryBuilder 184// so it can be used to build a new array. 185func (b *BinaryBuilder) NewBinaryArray() (a *Binary) { 186 data := b.newData() 187 a = NewBinaryData(data) 188 data.Release() 189 return 190} 191 192func (b *BinaryBuilder) newData() (data *Data) { 193 b.appendNextOffset() 194 offsets, values := b.offsets.Finish(), b.values.Finish() 195 data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, offsets, values}, nil, b.nulls, 0) 196 if offsets != nil { 197 offsets.Release() 198 } 199 200 if values != nil { 201 values.Release() 202 } 203 204 b.builder.reset() 205 206 return 207} 208 209func (b *BinaryBuilder) appendNextOffset() { 210 numBytes := b.values.Len() 211 // TODO(sgc): check binaryArrayMaximumCapacity? 212 b.offsets.AppendValue(int32(numBytes)) 213} 214 215var ( 216 _ Builder = (*BinaryBuilder)(nil) 217) 218