1// Licensed to the Apache Software Foundation (ASF) under one 2// or more contributor license agreements. See the NOTICE file 3// distributed with this work for additional information 4// regarding copyright ownership. The ASF licenses this file 5// to you under the Apache License, Version 2.0 (the 6// "License"); you may not use this file except in compliance 7// with the License. You may obtain a copy of the License at 8// 9// http://www.apache.org/licenses/LICENSE-2.0 10// 11// Unless required by applicable law or agreed to in writing, 12// software distributed under the License is distributed on an 13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14// KIND, either express or implied. See the License for the 15// specific language governing permissions and limitations 16// under the License. 17 18import { Vector } from './vector'; 19import { truncateBitmap } from './util/bit'; 20import { popcnt_bit_range } from './util/bit'; 21import { BufferType, UnionMode, Type } from './enum'; 22import { DataType, SparseUnion, DenseUnion, strideForType } from './type'; 23import { toArrayBufferView, toUint8Array, toInt32Array } from './util/buffer'; 24import { 25 Dictionary, 26 Null, Int, Float, 27 Binary, Bool, Utf8, Decimal, 28 Date_, Time, Timestamp, Interval, 29 List, Struct, Union, FixedSizeBinary, FixedSizeList, Map_, 30} from './type'; 31 32// When slicing, we do not know the null count of the sliced range without 33// doing some computation. To avoid doing this eagerly, we set the null count 34// to -1 (any negative number will do). When Vector.nullCount is called the 35// first time, the null count will be computed. See ARROW-33 36/** @ignore */ export type kUnknownNullCount = -1; 37/** @ignore */ export const kUnknownNullCount = -1; 38 39/** @ignore */ export type NullBuffer = Uint8Array | null | undefined; 40/** @ignore */ export type TypeIdsBuffer = Int8Array | ArrayLike<number> | Iterable<number> | undefined; 41/** @ignore */ export type ValueOffsetsBuffer = Int32Array | ArrayLike<number> | Iterable<number> | undefined; 42/** @ignore */ export type DataBuffer<T extends DataType> = T['TArray'] | ArrayLike<number> | Iterable<number> | undefined; 43 44/** @ignore */ 45export interface Buffers<T extends DataType> { 46 [BufferType.OFFSET]: Int32Array; 47 [BufferType.DATA]: T['TArray']; 48 [BufferType.VALIDITY]: Uint8Array; 49 [BufferType.TYPE]: T['TArray']; 50} 51 52/** @ignore */ 53export interface Data<T extends DataType = DataType> { 54 readonly TType: T['TType']; 55 readonly TArray: T['TArray']; 56 readonly TValue: T['TValue']; 57} 58 59/** @ignore */ 60export class Data<T extends DataType = DataType> { 61 62 public readonly type: T; 63 public readonly length: number; 64 public readonly offset: number; 65 public readonly stride: number; 66 public readonly childData: Data[]; 67 68 /** 69 * The dictionary for this Vector, if any. Only used for Dictionary type. 70 */ 71 public dictionary?: Vector; 72 73 // @ts-ignore 74 public readonly values: Buffers<T>[BufferType.DATA]; 75 // @ts-ignore 76 public readonly typeIds: Buffers<T>[BufferType.TYPE]; 77 // @ts-ignore 78 public readonly nullBitmap: Buffers<T>[BufferType.VALIDITY]; 79 // @ts-ignore 80 public readonly valueOffsets: Buffers<T>[BufferType.OFFSET]; 81 82 public get typeId(): T['TType'] { return this.type.typeId; } 83 public get ArrayType(): T['ArrayType'] { return this.type.ArrayType; } 84 public get buffers() { 85 return [this.valueOffsets, this.values, this.nullBitmap, this.typeIds] as Buffers<T>; 86 } 87 public get byteLength(): number { 88 let byteLength = 0; 89 let { valueOffsets, values, nullBitmap, typeIds } = this; 90 valueOffsets && (byteLength += valueOffsets.byteLength); 91 values && (byteLength += values.byteLength); 92 nullBitmap && (byteLength += nullBitmap.byteLength); 93 typeIds && (byteLength += typeIds.byteLength); 94 return this.childData.reduce((byteLength, child) => byteLength + child.byteLength, byteLength); 95 } 96 97 protected _nullCount: number | kUnknownNullCount; 98 99 public get nullCount() { 100 let nullCount = this._nullCount; 101 let nullBitmap: Uint8Array | undefined; 102 if (nullCount <= kUnknownNullCount && (nullBitmap = this.nullBitmap)) { 103 this._nullCount = nullCount = this.length - popcnt_bit_range(nullBitmap, this.offset, this.offset + this.length); 104 } 105 return nullCount; 106 } 107 108 constructor(type: T, offset: number, length: number, nullCount?: number, buffers?: Partial<Buffers<T>> | Data<T>, childData?: (Data | Vector)[], dictionary?: Vector) { 109 this.type = type; 110 this.dictionary = dictionary; 111 this.offset = Math.floor(Math.max(offset || 0, 0)); 112 this.length = Math.floor(Math.max(length || 0, 0)); 113 this._nullCount = Math.floor(Math.max(nullCount || 0, -1)); 114 this.childData = (childData || []).map((x) => x instanceof Data ? x : x.data) as Data[]; 115 let buffer: Buffers<T>[keyof Buffers<T>]; 116 if (buffers instanceof Data) { 117 this.stride = buffers.stride; 118 this.values = buffers.values; 119 this.typeIds = buffers.typeIds; 120 this.nullBitmap = buffers.nullBitmap; 121 this.valueOffsets = buffers.valueOffsets; 122 } else { 123 this.stride = strideForType(type); 124 if (buffers) { 125 (buffer = (buffers as Buffers<T>)[0]) && (this.valueOffsets = buffer); 126 (buffer = (buffers as Buffers<T>)[1]) && (this.values = buffer); 127 (buffer = (buffers as Buffers<T>)[2]) && (this.nullBitmap = buffer); 128 (buffer = (buffers as Buffers<T>)[3]) && (this.typeIds = buffer); 129 } 130 } 131 } 132 133 public clone<R extends DataType>(type: R, offset = this.offset, length = this.length, nullCount = this._nullCount, buffers: Buffers<R> = <any> this, childData: (Data | Vector)[] = this.childData) { 134 return new Data(type, offset, length, nullCount, buffers, childData, this.dictionary); 135 } 136 137 public slice(offset: number, length: number): Data<T> { 138 const { stride, typeId, childData } = this; 139 // +true === 1, +false === 0, so this means 140 // we keep nullCount at 0 if it's already 0, 141 // otherwise set to the invalidated flag -1 142 const nullCount = +(this._nullCount === 0) - 1; 143 const childStride = typeId === 16 /* FixedSizeList */ ? stride : 1; 144 const buffers = this._sliceBuffers(offset, length, stride, typeId); 145 return this.clone<T>(this.type, this.offset + offset, length, nullCount, buffers, 146 // Don't slice children if we have value offsets (the variable-width types) 147 (!childData.length || this.valueOffsets) ? childData : this._sliceChildren(childData, childStride * offset, childStride * length)); 148 } 149 150 public _changeLengthAndBackfillNullBitmap(newLength: number): Data<T> { 151 if (this.typeId === Type.Null) { 152 return this.clone(this.type, 0, newLength, 0); 153 } 154 const { length, nullCount } = this; 155 // start initialized with 0s (nulls), then fill from 0 to length with 1s (not null) 156 const bitmap = new Uint8Array(((newLength + 63) & ~63) >> 3).fill(255, 0, length >> 3); 157 // set all the bits in the last byte (up to bit `length - length % 8`) to 1 (not null) 158 bitmap[length >> 3] = (1 << (length - (length & ~7))) - 1; 159 // if we have a nullBitmap, truncate + slice and set it over the pre-filled 1s 160 if (nullCount > 0) { 161 bitmap.set(truncateBitmap(this.offset, length, this.nullBitmap), 0); 162 } 163 const buffers = this.buffers; 164 buffers[BufferType.VALIDITY] = bitmap; 165 return this.clone(this.type, 0, newLength, nullCount + (newLength - length), buffers); 166 } 167 168 protected _sliceBuffers(offset: number, length: number, stride: number, typeId: T['TType']): Buffers<T> { 169 let arr: any, { buffers } = this; 170 // If typeIds exist, slice the typeIds buffer 171 (arr = buffers[BufferType.TYPE]) && (buffers[BufferType.TYPE] = arr.subarray(offset, offset + length)); 172 // If offsets exist, only slice the offsets buffer 173 (arr = buffers[BufferType.OFFSET]) && (buffers[BufferType.OFFSET] = arr.subarray(offset, offset + length + 1)) || 174 // Otherwise if no offsets, slice the data buffer. Don't slice the data vector for Booleans, since the offset goes by bits not bytes 175 (arr = buffers[BufferType.DATA]) && (buffers[BufferType.DATA] = typeId === 6 ? arr : arr.subarray(stride * offset, stride * (offset + length))); 176 return buffers; 177 } 178 179 protected _sliceChildren(childData: Data[], offset: number, length: number): Data[] { 180 return childData.map((child) => child.slice(offset, length)); 181 } 182 183 // 184 // Convenience methods for creating Data instances for each of the Arrow Vector types 185 // 186 /** @nocollapse */ 187 public static new<T extends DataType>(type: T, offset: number, length: number, nullCount?: number, buffers?: Partial<Buffers<T>> | Data<T>, childData?: (Data | Vector)[], dictionary?: Vector): Data<T> { 188 if (buffers instanceof Data) { buffers = buffers.buffers; } else if (!buffers) { buffers = [] as Partial<Buffers<T>>; } 189 switch (type.typeId) { 190 case Type.Null: return <unknown> Data.Null( <unknown> type as Null, offset, length) as Data<T>; 191 case Type.Int: return <unknown> Data.Int( <unknown> type as Int, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>; 192 case Type.Dictionary: return <unknown> Data.Dictionary( <unknown> type as Dictionary, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || [], dictionary!) as Data<T>; 193 case Type.Float: return <unknown> Data.Float( <unknown> type as Float, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>; 194 case Type.Bool: return <unknown> Data.Bool( <unknown> type as Bool, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>; 195 case Type.Decimal: return <unknown> Data.Decimal( <unknown> type as Decimal, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>; 196 case Type.Date: return <unknown> Data.Date( <unknown> type as Date_, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>; 197 case Type.Time: return <unknown> Data.Time( <unknown> type as Time, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>; 198 case Type.Timestamp: return <unknown> Data.Timestamp( <unknown> type as Timestamp, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>; 199 case Type.Interval: return <unknown> Data.Interval( <unknown> type as Interval, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>; 200 case Type.FixedSizeBinary: return <unknown> Data.FixedSizeBinary( <unknown> type as FixedSizeBinary, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>; 201 case Type.Binary: return <unknown> Data.Binary( <unknown> type as Binary, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.OFFSET] || [], buffers[BufferType.DATA] || []) as Data<T>; 202 case Type.Utf8: return <unknown> Data.Utf8( <unknown> type as Utf8, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.OFFSET] || [], buffers[BufferType.DATA] || []) as Data<T>; 203 case Type.List: return <unknown> Data.List( <unknown> type as List, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.OFFSET] || [], (childData || [])[0]) as Data<T>; 204 case Type.FixedSizeList: return <unknown> Data.FixedSizeList( <unknown> type as FixedSizeList, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], (childData || [])[0]) as Data<T>; 205 case Type.Struct: return <unknown> Data.Struct( <unknown> type as Struct, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], childData || []) as Data<T>; 206 case Type.Map: return <unknown> Data.Map( <unknown> type as Map_, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.OFFSET] || [], (childData || [])[0]) as Data<T>; 207 case Type.Union: return <unknown> Data.Union( <unknown> type as Union, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.TYPE] || [], buffers[BufferType.OFFSET] || childData, childData) as Data<T>; 208 } 209 throw new Error(`Unrecognized typeId ${type.typeId}`); 210 } 211 212 /** @nocollapse */ 213 public static Null<T extends Null>(type: T, offset: number, length: number) { 214 return new Data(type, offset, length, 0); 215 } 216 /** @nocollapse */ 217 public static Int<T extends Int>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) { 218 return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]); 219 } 220 /** @nocollapse */ 221 public static Dictionary<T extends Dictionary>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>, dictionary: Vector<T['dictionary']>) { 222 return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView<T['TArray']>(type.indices.ArrayType, data), toUint8Array(nullBitmap)], [], dictionary); 223 } 224 /** @nocollapse */ 225 public static Float<T extends Float>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) { 226 return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]); 227 } 228 /** @nocollapse */ 229 public static Bool<T extends Bool>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) { 230 return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]); 231 } 232 /** @nocollapse */ 233 public static Decimal<T extends Decimal>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) { 234 return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]); 235 } 236 /** @nocollapse */ 237 public static Date<T extends Date_>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) { 238 return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]); 239 } 240 /** @nocollapse */ 241 public static Time<T extends Time>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) { 242 return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]); 243 } 244 /** @nocollapse */ 245 public static Timestamp<T extends Timestamp>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) { 246 return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]); 247 } 248 /** @nocollapse */ 249 public static Interval<T extends Interval>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) { 250 return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]); 251 } 252 /** @nocollapse */ 253 public static FixedSizeBinary<T extends FixedSizeBinary>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) { 254 return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]); 255 } 256 /** @nocollapse */ 257 public static Binary<T extends Binary>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, data: DataBuffer<T>) { 258 return new Data(type, offset, length, nullCount, [toInt32Array(valueOffsets), toUint8Array(data), toUint8Array(nullBitmap)]); 259 } 260 /** @nocollapse */ 261 public static Utf8<T extends Utf8>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, data: DataBuffer<T>) { 262 return new Data(type, offset, length, nullCount, [toInt32Array(valueOffsets), toUint8Array(data), toUint8Array(nullBitmap)]); 263 } 264 /** @nocollapse */ 265 public static List<T extends List>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, child: Data<T['valueType']> | Vector<T['valueType']>) { 266 return new Data(type, offset, length, nullCount, [toInt32Array(valueOffsets), undefined, toUint8Array(nullBitmap)], child ? [child] : []); 267 } 268 /** @nocollapse */ 269 public static FixedSizeList<T extends FixedSizeList>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, child: Data<T['valueType']> | Vector<T['valueType']>) { 270 return new Data(type, offset, length, nullCount, [undefined, undefined, toUint8Array(nullBitmap)], child ? [child] : []); 271 } 272 /** @nocollapse */ 273 public static Struct<T extends Struct>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, children: (Data | Vector)[]) { 274 return new Data(type, offset, length, nullCount, [undefined, undefined, toUint8Array(nullBitmap)], children); 275 } 276 /** @nocollapse */ 277 public static Map<T extends Map_>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, child: (Data | Vector)) { 278 return new Data(type, offset, length, nullCount, [toInt32Array(valueOffsets), undefined, toUint8Array(nullBitmap)], child ? [child] : []); 279 } 280 public static Union<T extends SparseUnion>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, children: (Data | Vector)[], _?: any): Data<T>; 281 public static Union<T extends DenseUnion>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, valueOffsets: ValueOffsetsBuffer, children: (Data | Vector)[]): Data<T>; 282 public static Union<T extends Union>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, valueOffsetsOrChildren: ValueOffsetsBuffer | (Data | Vector)[], children?: (Data | Vector)[]): Data<T>; 283 /** @nocollapse */ 284 public static Union<T extends Union>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, valueOffsetsOrChildren: ValueOffsetsBuffer | (Data | Vector)[], children?: (Data | Vector)[]) { 285 const buffers = <unknown> [ 286 undefined, undefined, 287 toUint8Array(nullBitmap), 288 toArrayBufferView(type.ArrayType, typeIds) 289 ] as Partial<Buffers<T>>; 290 if (type.mode === UnionMode.Sparse) { 291 return new Data(type, offset, length, nullCount, buffers, valueOffsetsOrChildren as (Data | Vector)[]); 292 } 293 buffers[BufferType.OFFSET] = toInt32Array(<ValueOffsetsBuffer> valueOffsetsOrChildren); 294 return new Data(type, offset, length, nullCount, buffers, children); 295 } 296} 297 298(Data.prototype as any).childData = Object.freeze([]); 299