1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 import org.apache.arrow.memory.ArrowBuf; 19 import org.apache.arrow.memory.BufferAllocator; 20 import org.apache.arrow.memory.ReferenceManager; 21 import org.apache.arrow.memory.util.CommonUtil; 22 import org.apache.arrow.util.Preconditions; 23 import org.apache.arrow.vector.BaseValueVector; 24 import org.apache.arrow.vector.BitVectorHelper; 25 import org.apache.arrow.vector.FieldVector; 26 import org.apache.arrow.vector.ValueVector; 27 import org.apache.arrow.vector.complex.AbstractStructVector; 28 import org.apache.arrow.vector.complex.ListVector; 29 import org.apache.arrow.vector.complex.NonNullableStructVector; 30 import org.apache.arrow.vector.complex.StructVector; 31 import org.apache.arrow.vector.compare.VectorVisitor; 32 import org.apache.arrow.vector.types.Types; 33 import org.apache.arrow.vector.types.UnionMode; 34 import org.apache.arrow.vector.compare.RangeEqualsVisitor; 35 import org.apache.arrow.vector.types.pojo.ArrowType; 36 import org.apache.arrow.vector.types.pojo.Field; 37 import org.apache.arrow.vector.types.pojo.FieldType; 38 import org.apache.arrow.vector.util.CallBack; 39 import org.apache.arrow.vector.util.DataSizeRoundingUtil; 40 import org.apache.arrow.vector.util.TransferPair; 41 42 import java.util.Arrays; 43 import java.util.stream.Collectors; 44 45 <@pp.dropOutputFile /> 46 <@pp.changeOutputFile name="/org/apache/arrow/vector/complex/DenseUnionVector.java" /> 47 48 49 <#include "/@includes/license.ftl" /> 50 51 package org.apache.arrow.vector.complex; 52 53 <#include "/@includes/vv_imports.ftl" /> 54 import java.util.ArrayList; 55 import java.util.Collections; 56 import java.util.Iterator; 57 import org.apache.arrow.memory.util.CommonUtil; 58 import org.apache.arrow.memory.util.hash.ArrowBufHasher; 59 import org.apache.arrow.memory.util.hash.SimpleHasher; 60 import org.apache.arrow.vector.compare.VectorVisitor; 61 import org.apache.arrow.vector.complex.impl.ComplexCopier; 62 import org.apache.arrow.vector.util.CallBack; 63 import org.apache.arrow.vector.ipc.message.ArrowFieldNode; 64 import org.apache.arrow.vector.BaseValueVector; 65 import org.apache.arrow.vector.util.OversizedAllocationException; 66 import org.apache.arrow.util.Preconditions; 67 68 import static org.apache.arrow.vector.types.UnionMode.Dense; 69 70 71 72 /* 73 * This class is generated using freemarker and the ${.template_name} template. 74 */ 75 @SuppressWarnings("unused") 76 77 78 /** 79 * A vector which can hold values of different types. It does so by using a StructVector which contains a vector for each 80 * primitive type that is stored. StructVector is used in order to take advantage of its serialization/deserialization methods, 81 * as well as the addOrGet method. 82 * 83 * For performance reasons, DenseUnionVector stores a cached reference to each subtype vector, to avoid having to do the struct lookup 84 * each time the vector is accessed. 85 * Source code generated using FreeMarker template ${.template_name} 86 */ 87 public class DenseUnionVector extends AbstractContainerVector implements FieldVector { 88 int valueCount; 89 90 NonNullableStructVector internalStruct; 91 private ArrowBuf typeBuffer; 92 private ArrowBuf offsetBuffer; 93 94 /** 95 * The key is type Id, and the value is vector. 96 */ 97 private ValueVector[] childVectors = new ValueVector[Byte.MAX_VALUE + 1]; 98 99 /** 100 * The index is the type id, and the value is the type field. 101 */ 102 private Field[] typeFields = new Field[Byte.MAX_VALUE + 1]; 103 /** 104 * The index is the index into the typeFields array, and the value is the logical field id. 105 */ 106 private byte[] typeMapFields = new byte[Byte.MAX_VALUE + 1]; 107 108 /** 109 * The next type id to allocate. 110 */ 111 private byte nextTypeId = 0; 112 113 private FieldReader reader; 114 115 private long typeBufferAllocationSizeInBytes; 116 private long offsetBufferAllocationSizeInBytes; 117 118 private final FieldType fieldType; 119 120 public static final byte TYPE_WIDTH = 1; 121 public static final byte OFFSET_WIDTH = 4; 122 123 private static final FieldType INTERNAL_STRUCT_TYPE = new FieldType(/*nullable*/ false, 124 ArrowType.Struct.INSTANCE, /*dictionary*/ null, /*metadata*/ null); 125 empty(String name, BufferAllocator allocator)126 public static DenseUnionVector empty(String name, BufferAllocator allocator) { 127 FieldType fieldType = FieldType.nullable(new ArrowType.Union( 128 UnionMode.Dense, null)); 129 return new DenseUnionVector(name, allocator, fieldType, null); 130 } 131 DenseUnionVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack)132 public DenseUnionVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { 133 super(name, allocator, callBack); 134 this.fieldType = fieldType; 135 this.internalStruct = new NonNullableStructVector( 136 "internal", 137 allocator, 138 INTERNAL_STRUCT_TYPE, 139 callBack, 140 AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE, 141 false); 142 this.typeBuffer = allocator.getEmpty(); 143 this.typeBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH; 144 this.offsetBuffer = allocator.getEmpty(); 145 this.offsetBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH; 146 } 147 getAllocator()148 public BufferAllocator getAllocator() { 149 return allocator; 150 } 151 152 @Override getMinorType()153 public MinorType getMinorType() { 154 return MinorType.DENSEUNION; 155 } 156 157 @Override initializeChildrenFromFields(List<Field> children)158 public void initializeChildrenFromFields(List<Field> children) { 159 for (Field field : children) { 160 byte typeId = registerNewTypeId(field); 161 FieldVector vector = (FieldVector) internalStruct.add(field.getName(), field.getFieldType()); 162 vector.initializeChildrenFromFields(field.getChildren()); 163 childVectors[typeId] = vector; 164 } 165 } 166 167 @Override getChildrenFromFields()168 public List<FieldVector> getChildrenFromFields() { 169 return internalStruct.getChildrenFromFields(); 170 } 171 172 @Override loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers)173 public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) { 174 if (ownBuffers.size() != 2) { 175 throw new IllegalArgumentException("Illegal buffer count for dense union with type " + getField().getFieldType() + 176 ", expected " + 2 + ", got: " + ownBuffers.size()); 177 } 178 179 ArrowBuf buffer = ownBuffers.get(0); 180 typeBuffer.getReferenceManager().release(); 181 typeBuffer = buffer.getReferenceManager().retain(buffer, allocator); 182 typeBufferAllocationSizeInBytes = typeBuffer.capacity(); 183 184 buffer = ownBuffers.get(1); 185 offsetBuffer.getReferenceManager().release(); 186 offsetBuffer = buffer.getReferenceManager().retain(buffer, allocator); 187 offsetBufferAllocationSizeInBytes = offsetBuffer.capacity(); 188 189 this.valueCount = fieldNode.getLength(); 190 } 191 192 @Override getFieldBuffers()193 public List<ArrowBuf> getFieldBuffers() { 194 List<ArrowBuf> result = new ArrayList<>(2); 195 setReaderAndWriterIndex(); 196 result.add(typeBuffer); 197 result.add(offsetBuffer); 198 199 return result; 200 } 201 setReaderAndWriterIndex()202 private void setReaderAndWriterIndex() { 203 typeBuffer.readerIndex(0); 204 typeBuffer.writerIndex(valueCount * TYPE_WIDTH); 205 206 offsetBuffer.readerIndex(0); 207 offsetBuffer.writerIndex((long) valueCount * OFFSET_WIDTH); 208 } 209 210 /** 211 * Get the inner vectors. 212 * 213 * @deprecated This API will be removed as the current implementations no longer support inner vectors. 214 * 215 * @return the inner vectors for this field as defined by the TypeLayout 216 */ 217 @Override 218 @Deprecated getFieldInnerVectors()219 public List<BufferBacked> getFieldInnerVectors() { 220 throw new UnsupportedOperationException("There are no inner vectors. Use geFieldBuffers"); 221 } 222 fieldName(byte typeId, MinorType type)223 private String fieldName(byte typeId, MinorType type) { 224 return type.name().toLowerCase() + typeId; 225 } 226 fieldType(MinorType type)227 private FieldType fieldType(MinorType type) { 228 return FieldType.nullable(type.getType()); 229 } 230 registerNewTypeId(Field field)231 public synchronized byte registerNewTypeId(Field field) { 232 if (nextTypeId == typeFields.length) { 233 throw new IllegalStateException("Dense union vector support at most " + 234 typeFields.length + " relative types. Please use union of union instead"); 235 } 236 byte typeId = nextTypeId; 237 if (fieldType != null) { 238 int[] typeIds = ((ArrowType.Union) fieldType.getType()).getTypeIds(); 239 if (typeIds != null) { 240 int thisTypeId = typeIds[nextTypeId]; 241 if (thisTypeId > Byte.MAX_VALUE) { 242 throw new IllegalStateException("Dense union vector types must be bytes. " + thisTypeId + " is too large"); 243 } 244 typeId = (byte) thisTypeId; 245 } 246 } 247 typeFields[typeId] = field; 248 typeMapFields[nextTypeId] = typeId; 249 this.nextTypeId += 1; 250 return typeId; 251 } 252 addOrGet(byte typeId, MinorType minorType, Class<T> c)253 private <T extends FieldVector> T addOrGet(byte typeId, MinorType minorType, Class<T> c) { 254 return internalStruct.addOrGet(fieldName(typeId, minorType), fieldType(minorType), c); 255 } 256 addOrGet(byte typeId, MinorType minorType, ArrowType arrowType, Class<T> c)257 private <T extends FieldVector> T addOrGet(byte typeId, MinorType minorType, ArrowType arrowType, Class<T> c) { 258 return internalStruct.addOrGet(fieldName(typeId, minorType), FieldType.nullable(arrowType), c); 259 } 260 261 @Override getOffsetBufferAddress()262 public long getOffsetBufferAddress() { 263 return offsetBuffer.memoryAddress(); 264 } 265 266 @Override getDataBufferAddress()267 public long getDataBufferAddress() { 268 throw new UnsupportedOperationException(); 269 } 270 271 @Override getValidityBufferAddress()272 public long getValidityBufferAddress() { 273 throw new UnsupportedOperationException(); 274 } 275 276 @Override getValidityBuffer()277 public ArrowBuf getValidityBuffer() { throw new UnsupportedOperationException(); } 278 279 @Override getOffsetBuffer()280 public ArrowBuf getOffsetBuffer() { return offsetBuffer; } 281 getTypeBuffer()282 public ArrowBuf getTypeBuffer() { return typeBuffer; } 283 284 @Override getDataBuffer()285 public ArrowBuf getDataBuffer() { throw new UnsupportedOperationException(); } 286 getStruct(byte typeId)287 public StructVector getStruct(byte typeId) { 288 StructVector structVector = typeId < 0 ? null : (StructVector) childVectors[typeId]; 289 if (structVector == null) { 290 int vectorCount = internalStruct.size(); 291 structVector = addOrGet(typeId, MinorType.STRUCT, StructVector.class); 292 if (internalStruct.size() > vectorCount) { 293 structVector.allocateNew(); 294 childVectors[typeId] = structVector; 295 if (callBack != null) { 296 callBack.doWork(); 297 } 298 } 299 } 300 return structVector; 301 } 302 303 <#list vv.types as type> 304 <#list type.minor as minor> 305 <#assign name = minor.class?cap_first /> 306 <#assign fields = minor.fields!type.fields /> 307 <#assign uncappedName = name?uncap_first/> 308 <#assign lowerCaseName = name?lower_case/> 309 <#if !minor.typeParams?? || minor.class?starts_with("Decimal")> 310 Vector(byte typeId<#if minor.class?starts_with(R)>, ArrowType arrowType</#if>)311 public ${name}Vector get${name}Vector(byte typeId<#if minor.class?starts_with("Decimal")>, ArrowType arrowType</#if>) { 312 ValueVector vector = typeId < 0 ? null : childVectors[typeId]; 313 if (vector == null) { 314 int vectorCount = internalStruct.size(); 315 vector = addOrGet(typeId, MinorType.${name?upper_case}<#if minor.class?starts_with("Decimal")>, arrowType</#if>, ${name}Vector.class); 316 childVectors[typeId] = vector; 317 if (internalStruct.size() > vectorCount) { 318 vector.allocateNew(); 319 if (callBack != null) { 320 callBack.doWork(); 321 } 322 } 323 } 324 return (${name}Vector) vector; 325 } 326 </#if> 327 </#list> 328 </#list> 329 getList(byte typeId)330 public ListVector getList(byte typeId) { 331 ListVector listVector = typeId < 0 ? null : (ListVector) childVectors[typeId]; 332 if (listVector == null) { 333 int vectorCount = internalStruct.size(); 334 listVector = addOrGet(typeId, MinorType.LIST, ListVector.class); 335 if (internalStruct.size() > vectorCount) { 336 listVector.allocateNew(); 337 childVectors[typeId] = listVector; 338 if (callBack != null) { 339 callBack.doWork(); 340 } 341 } 342 } 343 return listVector; 344 } 345 getMap(byte typeId)346 public MapVector getMap(byte typeId) { 347 MapVector mapVector = typeId < 0 ? null : (MapVector) childVectors[typeId]; 348 if (mapVector == null) { 349 int vectorCount = internalStruct.size(); 350 mapVector = addOrGet(typeId, MinorType.MAP, MapVector.class); 351 if (internalStruct.size() > vectorCount) { 352 mapVector.allocateNew(); 353 childVectors[typeId] = mapVector; 354 if (callBack != null) { 355 callBack.doWork(); 356 } 357 } 358 } 359 return mapVector; 360 } 361 getTypeId(int index)362 public byte getTypeId(int index) { 363 return typeBuffer.getByte(index * TYPE_WIDTH); 364 } 365 getVectorByType(byte typeId)366 public ValueVector getVectorByType(byte typeId) { 367 return typeId < 0 ? null : childVectors[typeId]; 368 } 369 370 @Override allocateNew()371 public void allocateNew() throws OutOfMemoryException { 372 /* new allocation -- clear the current buffers */ 373 clear(); 374 internalStruct.allocateNew(); 375 try { 376 allocateTypeBuffer(); 377 allocateOffsetBuffer(); 378 } catch (Exception e) { 379 clear(); 380 throw e; 381 } 382 } 383 384 @Override allocateNewSafe()385 public boolean allocateNewSafe() { 386 /* new allocation -- clear the current buffers */ 387 clear(); 388 boolean safe = internalStruct.allocateNewSafe(); 389 if (!safe) { return false; } 390 try { 391 allocateTypeBuffer(); 392 allocateOffsetBuffer(); 393 } catch (Exception e) { 394 clear(); 395 return false; 396 } 397 398 return true; 399 } 400 allocateTypeBuffer()401 private void allocateTypeBuffer() { 402 typeBuffer = allocator.buffer(typeBufferAllocationSizeInBytes); 403 typeBuffer.readerIndex(0); 404 setNegative(0, typeBuffer.capacity()); 405 } 406 allocateOffsetBuffer()407 private void allocateOffsetBuffer() { 408 offsetBuffer = allocator.buffer(offsetBufferAllocationSizeInBytes); 409 offsetBuffer.readerIndex(0); 410 offsetBuffer.setZero(0, offsetBuffer.capacity()); 411 } 412 413 414 @Override reAlloc()415 public void reAlloc() { 416 internalStruct.reAlloc(); 417 reallocTypeBuffer(); 418 reallocOffsetBuffer(); 419 } 420 getOffset(int index)421 public int getOffset(int index) { 422 return offsetBuffer.getInt((long) index * OFFSET_WIDTH); 423 } 424 reallocTypeBuffer()425 private void reallocTypeBuffer() { 426 final long currentBufferCapacity = typeBuffer.capacity(); 427 long newAllocationSize = currentBufferCapacity * 2; 428 if (newAllocationSize == 0) { 429 if (typeBufferAllocationSizeInBytes > 0) { 430 newAllocationSize = typeBufferAllocationSizeInBytes; 431 } else { 432 newAllocationSize = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH * 2; 433 } 434 } 435 436 newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); 437 assert newAllocationSize >= 1; 438 439 if (newAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) { 440 throw new OversizedAllocationException("Unable to expand the buffer"); 441 } 442 443 final ArrowBuf newBuf = allocator.buffer((int)newAllocationSize); 444 newBuf.setBytes(0, typeBuffer, 0, currentBufferCapacity); 445 typeBuffer.getReferenceManager().release(1); 446 typeBuffer = newBuf; 447 typeBufferAllocationSizeInBytes = (int)newAllocationSize; 448 setNegative(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); 449 } 450 reallocOffsetBuffer()451 private void reallocOffsetBuffer() { 452 final long currentBufferCapacity = offsetBuffer.capacity(); 453 long newAllocationSize = currentBufferCapacity * 2; 454 if (newAllocationSize == 0) { 455 if (offsetBufferAllocationSizeInBytes > 0) { 456 newAllocationSize = offsetBufferAllocationSizeInBytes; 457 } else { 458 newAllocationSize = BaseValueVector.INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH * 2; 459 } 460 } 461 462 newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); 463 assert newAllocationSize >= 1; 464 465 if (newAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) { 466 throw new OversizedAllocationException("Unable to expand the buffer"); 467 } 468 469 final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize); 470 newBuf.setBytes(0, offsetBuffer, 0, currentBufferCapacity); 471 newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); 472 offsetBuffer.getReferenceManager().release(1); 473 offsetBuffer = newBuf; 474 offsetBufferAllocationSizeInBytes = (int) newAllocationSize; 475 } 476 477 @Override setInitialCapacity(int numRecords)478 public void setInitialCapacity(int numRecords) { } 479 480 @Override getValueCapacity()481 public int getValueCapacity() { 482 long capacity = getTypeBufferValueCapacity(); 483 long offsetCapacity = getOffsetBufferValueCapacity(); 484 if (offsetCapacity < capacity) { 485 capacity = offsetCapacity; 486 } 487 long structCapacity = internalStruct.getValueCapacity(); 488 if (structCapacity < capacity) { 489 structCapacity = capacity; 490 } 491 return (int) capacity; 492 } 493 494 @Override close()495 public void close() { 496 clear(); 497 } 498 499 @Override clear()500 public void clear() { 501 valueCount = 0; 502 typeBuffer.getReferenceManager().release(); 503 typeBuffer = allocator.getEmpty(); 504 offsetBuffer.getReferenceManager().release(); 505 offsetBuffer = allocator.getEmpty(); 506 internalStruct.clear(); 507 } 508 509 @Override reset()510 public void reset() { 511 valueCount = 0; 512 setNegative(0, typeBuffer.capacity()); 513 offsetBuffer.setZero(0, offsetBuffer.capacity()); 514 internalStruct.reset(); 515 } 516 517 @Override getField()518 public Field getField() { 519 int childCount = (int) Arrays.stream(typeFields).filter(field -> field != null).count(); 520 List<org.apache.arrow.vector.types.pojo.Field> childFields = new ArrayList<>(childCount); 521 int[] typeIds = new int[childCount]; 522 for (int i = 0; i < typeFields.length; i++) { 523 if (typeFields[i] != null) { 524 int curIdx = childFields.size(); 525 typeIds[curIdx] = i; 526 childFields.add(typeFields[i]); 527 } 528 } 529 530 FieldType fieldType; 531 if (this.fieldType == null) { 532 fieldType = FieldType.nullable(new ArrowType.Union(Dense, typeIds)); 533 } else { 534 final UnionMode mode = UnionMode.Dense; 535 fieldType = new FieldType(this.fieldType.isNullable(), new ArrowType.Union(mode, typeIds), 536 this.fieldType.getDictionary(), this.fieldType.getMetadata()); 537 } 538 539 return new Field(name, fieldType, childFields); 540 } 541 542 @Override getTransferPair(BufferAllocator allocator)543 public TransferPair getTransferPair(BufferAllocator allocator) { 544 return getTransferPair(name, allocator); 545 } 546 547 @Override getTransferPair(String ref, BufferAllocator allocator)548 public TransferPair getTransferPair(String ref, BufferAllocator allocator) { 549 return getTransferPair(ref, allocator, null); 550 } 551 552 @Override getTransferPair(String ref, BufferAllocator allocator, CallBack callBack)553 public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { 554 return new org.apache.arrow.vector.complex.DenseUnionVector.TransferImpl(ref, allocator, callBack); 555 } 556 557 @Override makeTransferPair(ValueVector target)558 public TransferPair makeTransferPair(ValueVector target) { 559 return new TransferImpl((DenseUnionVector) target); 560 } 561 562 @Override copyFrom(int inIndex, int outIndex, ValueVector from)563 public void copyFrom(int inIndex, int outIndex, ValueVector from) { 564 Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); 565 DenseUnionVector fromCast = (DenseUnionVector) from; 566 int inOffset = fromCast.offsetBuffer.getInt((long) inIndex * OFFSET_WIDTH); 567 fromCast.getReader().setPosition(inOffset); 568 int outOffset = offsetBuffer.getInt((long) outIndex * OFFSET_WIDTH); 569 getWriter().setPosition(outOffset); 570 ComplexCopier.copy(fromCast.reader, writer); 571 } 572 573 @Override copyFromSafe(int inIndex, int outIndex, ValueVector from)574 public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { 575 copyFrom(inIndex, outIndex, from); 576 } 577 addVector(byte typeId, FieldVector v)578 public FieldVector addVector(byte typeId, FieldVector v) { 579 final String name = v.getName().isEmpty() ? fieldName(typeId, v.getMinorType()) : v.getName(); 580 Preconditions.checkState(internalStruct.getChild(name) == null, String.format("%s vector already exists", name)); 581 final FieldVector newVector = internalStruct.addOrGet(name, v.getField().getFieldType(), v.getClass()); 582 v.makeTransferPair(newVector).transfer(); 583 internalStruct.putChild(name, newVector); 584 childVectors[typeId] = newVector; 585 if (callBack != null) { 586 callBack.doWork(); 587 } 588 return newVector; 589 } 590 591 private class TransferImpl implements TransferPair { 592 private final TransferPair[] internalTransferPairs = new TransferPair[nextTypeId]; 593 private final DenseUnionVector to; 594 TransferImpl(String name, BufferAllocator allocator, CallBack callBack)595 public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { 596 to = new DenseUnionVector(name, allocator, null, callBack); 597 internalStruct.makeTransferPair(to.internalStruct); 598 createTransferPairs(); 599 } 600 TransferImpl(DenseUnionVector to)601 public TransferImpl(DenseUnionVector to) { 602 this.to = to; 603 internalStruct.makeTransferPair(to.internalStruct); 604 createTransferPairs(); 605 } 606 createTransferPairs()607 private void createTransferPairs() { 608 for (int i = 0; i < nextTypeId; i++) { 609 ValueVector srcVec = internalStruct.getVectorById(i); 610 ValueVector dstVec = to.internalStruct.getVectorById(i); 611 to.typeFields[i] = typeFields[i]; 612 to.typeMapFields[i] = typeMapFields[i]; 613 to.childVectors[i] = dstVec; 614 internalTransferPairs[i] = srcVec.makeTransferPair(dstVec); 615 } 616 } 617 618 @Override transfer()619 public void transfer() { 620 to.clear(); 621 622 ReferenceManager refManager = typeBuffer.getReferenceManager(); 623 to.typeBuffer = refManager.transferOwnership(typeBuffer, to.allocator).getTransferredBuffer(); 624 625 refManager = offsetBuffer.getReferenceManager(); 626 to.offsetBuffer = refManager.transferOwnership(offsetBuffer, to.allocator).getTransferredBuffer(); 627 628 for (int i = 0; i < nextTypeId; i++) { 629 if (internalTransferPairs[i] != null) { 630 internalTransferPairs[i].transfer(); 631 to.childVectors[i] = internalTransferPairs[i].getTo(); 632 } 633 } 634 to.valueCount = valueCount; 635 clear(); 636 } 637 638 @Override splitAndTransfer(int startIndex, int length)639 public void splitAndTransfer(int startIndex, int length) { 640 to.clear(); 641 642 // transfer type buffer 643 int startPoint = startIndex * TYPE_WIDTH; 644 int sliceLength = length * TYPE_WIDTH; 645 ArrowBuf slicedBuffer = typeBuffer.slice(startPoint, sliceLength); 646 ReferenceManager refManager = slicedBuffer.getReferenceManager(); 647 to.typeBuffer = refManager.transferOwnership(slicedBuffer, to.allocator).getTransferredBuffer(); 648 649 // transfer offset byffer 650 while (to.offsetBuffer.capacity() < (long) length * OFFSET_WIDTH) { 651 to.reallocOffsetBuffer(); 652 } 653 654 int [] typeCounts = new int[nextTypeId]; 655 int [] typeStarts = new int[nextTypeId]; 656 for (int i = 0; i < typeCounts.length; i++) { 657 typeCounts[i] = 0; 658 typeStarts[i] = -1; 659 } 660 661 for (int i = startIndex; i < startIndex + length; i++) { 662 byte typeId = typeBuffer.getByte(i); 663 to.offsetBuffer.setInt((long) (i - startIndex) * OFFSET_WIDTH, typeCounts[typeId]); 664 typeCounts[typeId] += 1; 665 if (typeStarts[typeId] == -1) { 666 typeStarts[typeId] = offsetBuffer.getInt((long) i * OFFSET_WIDTH); 667 } 668 } 669 670 // transfer vector values 671 for (int i = 0; i < nextTypeId; i++) { 672 if (typeCounts[i] > 0 && typeStarts[i] != -1) { 673 internalTransferPairs[i].splitAndTransfer(typeStarts[i], typeCounts[i]); 674 to.childVectors[i] = internalTransferPairs[i].getTo(); 675 } 676 } 677 678 to.setValueCount(length); 679 } 680 681 @Override getTo()682 public ValueVector getTo() { 683 return to; 684 } 685 686 @Override copyValueSafe(int from, int to)687 public void copyValueSafe(int from, int to) { 688 this.to.copyFrom(from, to, DenseUnionVector.this); 689 } 690 } 691 692 @Override getReader()693 public FieldReader getReader() { 694 if (reader == null) { 695 reader = new DenseUnionReader(this); 696 } 697 return reader; 698 } 699 getWriter()700 public FieldWriter getWriter() { 701 if (writer == null) { 702 writer = new DenseUnionWriter(this); 703 } 704 return writer; 705 } 706 707 @Override getBufferSize()708 public int getBufferSize() { 709 return this.getBufferSizeFor(this.valueCount); 710 } 711 712 @Override getBufferSizeFor(final int count)713 public int getBufferSizeFor(final int count) { 714 if (count == 0) { 715 return 0; 716 } 717 return (int) (count * TYPE_WIDTH + (long) count * OFFSET_WIDTH 718 + DataSizeRoundingUtil.divideBy8Ceil(count) + internalStruct.getBufferSizeFor(count)); 719 } 720 721 @Override getBuffers(boolean clear)722 public ArrowBuf[] getBuffers(boolean clear) { 723 List<ArrowBuf> list = new java.util.ArrayList<>(); 724 setReaderAndWriterIndex(); 725 if (getBufferSize() != 0) { 726 list.add(typeBuffer); 727 list.add(offsetBuffer); 728 list.addAll(java.util.Arrays.asList(internalStruct.getBuffers(clear))); 729 } 730 if (clear) { 731 valueCount = 0; 732 typeBuffer.getReferenceManager().retain(); 733 typeBuffer.close(); 734 typeBuffer = allocator.getEmpty(); 735 offsetBuffer.getReferenceManager().retain(); 736 offsetBuffer.close(); 737 offsetBuffer = allocator.getEmpty(); 738 } 739 return list.toArray(new ArrowBuf[list.size()]); 740 } 741 742 @Override iterator()743 public Iterator<ValueVector> iterator() { 744 return internalStruct.iterator(); 745 } 746 getVector(int index)747 private ValueVector getVector(int index) { 748 byte typeId = typeBuffer.getByte(index * TYPE_WIDTH); 749 return getVectorByType(typeId); 750 } 751 getObject(int index)752 public Object getObject(int index) { 753 ValueVector vector = getVector(index); 754 if (vector != null) { 755 int offset = offsetBuffer.getInt((long) index * OFFSET_WIDTH); 756 return vector.isNull(offset) ? null : vector.getObject(offset); 757 } 758 return null; 759 } 760 get(int index, DenseUnionHolder holder)761 public void get(int index, DenseUnionHolder holder) { 762 FieldReader reader = new DenseUnionReader(DenseUnionVector.this); 763 reader.setPosition(index); 764 holder.reader = reader; 765 } 766 getValueCount()767 public int getValueCount() { 768 return valueCount; 769 } 770 771 /** 772 * IMPORTANT: Union types always return non null as there is no validity buffer. 773 * 774 * To check validity correctly you must check the underlying vector. 775 */ isNull(int index)776 public boolean isNull(int index) { 777 return false; 778 } 779 780 @Override getNullCount()781 public int getNullCount() { 782 return 0; 783 } 784 isSet(int index)785 public int isSet(int index) { 786 return isNull(index) ? 0 : 1; 787 } 788 789 DenseUnionWriter writer; 790 setValueCount(int valueCount)791 public void setValueCount(int valueCount) { 792 this.valueCount = valueCount; 793 while (valueCount > getTypeBufferValueCapacity()) { 794 reallocTypeBuffer(); 795 reallocOffsetBuffer(); 796 } 797 setChildVectorValueCounts(); 798 } 799 setChildVectorValueCounts()800 private void setChildVectorValueCounts() { 801 int [] counts = new int[Byte.MAX_VALUE + 1]; 802 for (int i = 0; i < this.valueCount; i++) { 803 byte typeId = getTypeId(i); 804 if (typeId != -1) { 805 counts[typeId] += 1; 806 } 807 } 808 for (int i = 0; i < nextTypeId; i++) { 809 childVectors[typeMapFields[i]].setValueCount(counts[typeMapFields[i]]); 810 } 811 } 812 setSafe(int index, DenseUnionHolder holder)813 public void setSafe(int index, DenseUnionHolder holder) { 814 FieldReader reader = holder.reader; 815 if (writer == null) { 816 writer = new DenseUnionWriter(DenseUnionVector.this); 817 } 818 int offset = offsetBuffer.getInt((long) index * OFFSET_WIDTH); 819 MinorType type = reader.getMinorType(); 820 writer.setPosition(offset); 821 byte typeId = holder.typeId; 822 switch (type) { 823 <#list vv.types as type> 824 <#list type.minor as minor> 825 <#assign name = minor.class?cap_first /> 826 <#assign fields = minor.fields!type.fields /> 827 <#assign uncappedName = name?uncap_first/> 828 <#if !minor.typeParams?? || minor.class?starts_with("Decimal")> 829 case ${name?upper_case}: 830 Nullable${name}Holder ${uncappedName}Holder = new Nullable${name}Holder(); 831 reader.read(${uncappedName}Holder); 832 setSafe(index, ${uncappedName}Holder); 833 break; 834 </#if> 835 </#list> 836 </#list> 837 case STRUCT: 838 case LIST: { 839 setTypeId(index, typeId); 840 ComplexCopier.copy(reader, writer); 841 break; 842 } 843 default: 844 throw new UnsupportedOperationException(); 845 } 846 } 847 <#list vv.types as type> 848 <#list type.minor as minor> 849 <#assign name = minor.class?cap_first /> 850 <#assign fields = minor.fields!type.fields /> 851 <#assign uncappedName = name?uncap_first/> 852 <#if !minor.typeParams?? || minor.class?starts_with("Decimal")> setSafe(int index, Nullable${name}Holder holder)853 public void setSafe(int index, Nullable${name}Holder holder) { 854 while (index >= getOffsetBufferValueCapacity()) { 855 reallocOffsetBuffer(); 856 } 857 byte typeId = getTypeId(index); 858 ${name}Vector vector = get${name}Vector(typeId<#if minor.class?starts_with("Decimal")>, new ArrowType.Decimal(holder.precision, holder.scale, holder.WIDTH * 8)</#if>); 859 int offset = vector.getValueCount(); 860 vector.setValueCount(offset + 1); 861 vector.setSafe(offset, holder); 862 offsetBuffer.setInt((long) index * OFFSET_WIDTH, offset); 863 } 864 </#if> 865 </#list> 866 </#list> 867 setTypeId(int index, byte typeId)868 public void setTypeId(int index, byte typeId) { 869 while (index >= getTypeBufferValueCapacity()) { 870 reallocTypeBuffer(); 871 } 872 typeBuffer.setByte(index * TYPE_WIDTH , typeId); 873 } 874 getTypeBufferValueCapacity()875 private int getTypeBufferValueCapacity() { 876 return (int) typeBuffer.capacity() / TYPE_WIDTH; 877 } 878 getOffsetBufferValueCapacity()879 private long getOffsetBufferValueCapacity() { 880 return offsetBuffer.capacity() / OFFSET_WIDTH; 881 } 882 883 @Override hashCode(int index, ArrowBufHasher hasher)884 public int hashCode(int index, ArrowBufHasher hasher) { 885 if (isNull(index)) { 886 return 0; 887 } 888 int offset = offsetBuffer.getInt((long) index * OFFSET_WIDTH); 889 return getVector(index).hashCode(offset, hasher); 890 } 891 892 @Override hashCode(int index)893 public int hashCode(int index) { 894 return hashCode(index, SimpleHasher.INSTANCE); 895 } 896 897 @Override accept(VectorVisitor<OUT, IN> visitor, IN value)898 public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) { 899 return visitor.visit(this, value); 900 } 901 902 @Override getName()903 public String getName() { 904 return name; 905 } 906 setNegative(long start, long end)907 private void setNegative(long start, long end) { 908 for (long i = start;i < end; i++) { 909 typeBuffer.setByte(i, -1); 910 } 911 } 912 913 @Override addOrGet(String name, FieldType fieldType, Class<T> clazz)914 public <T extends FieldVector> T addOrGet(String name, FieldType fieldType, Class<T> clazz) { 915 return internalStruct.addOrGet(name, fieldType, clazz); 916 } 917 918 @Override getChild(String name, Class<T> clazz)919 public <T extends FieldVector> T getChild(String name, Class<T> clazz) { 920 return internalStruct.getChild(name, clazz); 921 } 922 923 @Override getChildVectorWithOrdinal(String name)924 public VectorWithOrdinal getChildVectorWithOrdinal(String name) { 925 return internalStruct.getChildVectorWithOrdinal(name); 926 } 927 928 @Override size()929 public int size() { 930 return internalStruct.size(); 931 } 932 933 @Override setInitialCapacity(int valueCount, double density)934 public void setInitialCapacity(int valueCount, double density) { 935 for (final ValueVector vector : internalStruct) { 936 if (vector instanceof DensityAwareVector) { 937 ((DensityAwareVector) vector).setInitialCapacity(valueCount, density); 938 } else { 939 vector.setInitialCapacity(valueCount); 940 } 941 } 942 } 943 } 944