1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18import { Vector } from './vector';
19import { truncateBitmap } from './util/bit';
20import { popcnt_bit_range } from './util/bit';
21import { BufferType, UnionMode, Type } from './enum';
22import { DataType, SparseUnion, DenseUnion, strideForType } from './type';
23import { toArrayBufferView, toUint8Array, toInt32Array } from './util/buffer';
24import {
25    Dictionary,
26    Null, Int, Float,
27    Binary, Bool, Utf8, Decimal,
28    Date_, Time, Timestamp, Interval,
29    List, Struct, Union, FixedSizeBinary, FixedSizeList, Map_,
30} from './type';
31
32// When slicing, we do not know the null count of the sliced range without
33// doing some computation. To avoid doing this eagerly, we set the null count
34// to -1 (any negative number will do). When Vector.nullCount is called the
35// first time, the null count will be computed. See ARROW-33
36/** @ignore */ export type kUnknownNullCount = -1;
37/** @ignore */ export const kUnknownNullCount = -1;
38
39/** @ignore */ export type NullBuffer = Uint8Array | null | undefined;
40/** @ignore */ export type TypeIdsBuffer = Int8Array  | ArrayLike<number> | Iterable<number> | undefined;
41/** @ignore */ export type ValueOffsetsBuffer = Int32Array  | ArrayLike<number> | Iterable<number> | undefined;
42/** @ignore */ export type DataBuffer<T extends DataType> = T['TArray'] | ArrayLike<number> | Iterable<number> | undefined;
43
44/** @ignore */
45export interface Buffers<T extends DataType> {
46      [BufferType.OFFSET]: Int32Array;
47        [BufferType.DATA]: T['TArray'];
48    [BufferType.VALIDITY]: Uint8Array;
49        [BufferType.TYPE]: T['TArray'];
50}
51
52/** @ignore */
53export interface Data<T extends DataType = DataType> {
54    readonly TType: T['TType'];
55    readonly TArray: T['TArray'];
56    readonly TValue: T['TValue'];
57}
58
59/** @ignore */
60export class Data<T extends DataType = DataType> {
61
62    public readonly type: T;
63    public readonly length: number;
64    public readonly offset: number;
65    public readonly stride: number;
66    public readonly childData: Data[];
67
68    /**
69     * The dictionary for this Vector, if any. Only used for Dictionary type.
70     */
71    public dictionary?: Vector;
72
73    // @ts-ignore
74    public readonly values: Buffers<T>[BufferType.DATA];
75    // @ts-ignore
76    public readonly typeIds: Buffers<T>[BufferType.TYPE];
77    // @ts-ignore
78    public readonly nullBitmap: Buffers<T>[BufferType.VALIDITY];
79    // @ts-ignore
80    public readonly valueOffsets: Buffers<T>[BufferType.OFFSET];
81
82    public get typeId(): T['TType'] { return this.type.typeId; }
83    public get ArrayType(): T['ArrayType'] { return this.type.ArrayType; }
84    public get buffers() {
85        return [this.valueOffsets, this.values, this.nullBitmap, this.typeIds] as Buffers<T>;
86    }
87    public get byteLength(): number {
88        let byteLength = 0;
89        let { valueOffsets, values, nullBitmap, typeIds } = this;
90        valueOffsets && (byteLength += valueOffsets.byteLength);
91        values       && (byteLength += values.byteLength);
92        nullBitmap   && (byteLength += nullBitmap.byteLength);
93        typeIds      && (byteLength += typeIds.byteLength);
94        return this.childData.reduce((byteLength, child) => byteLength + child.byteLength, byteLength);
95    }
96
97    protected _nullCount: number | kUnknownNullCount;
98
99    public get nullCount() {
100        let nullCount = this._nullCount;
101        let nullBitmap: Uint8Array | undefined;
102        if (nullCount <= kUnknownNullCount && (nullBitmap = this.nullBitmap)) {
103            this._nullCount = nullCount = this.length - popcnt_bit_range(nullBitmap, this.offset, this.offset + this.length);
104        }
105        return nullCount;
106    }
107
108    constructor(type: T, offset: number, length: number, nullCount?: number, buffers?: Partial<Buffers<T>> | Data<T>, childData?: (Data | Vector)[], dictionary?: Vector) {
109        this.type = type;
110        this.dictionary = dictionary;
111        this.offset = Math.floor(Math.max(offset || 0, 0));
112        this.length = Math.floor(Math.max(length || 0, 0));
113        this._nullCount = Math.floor(Math.max(nullCount || 0, -1));
114        this.childData = (childData || []).map((x) => x instanceof Data ? x : x.data) as Data[];
115        let buffer: Buffers<T>[keyof Buffers<T>];
116        if (buffers instanceof Data) {
117            this.stride = buffers.stride;
118            this.values = buffers.values;
119            this.typeIds = buffers.typeIds;
120            this.nullBitmap = buffers.nullBitmap;
121            this.valueOffsets = buffers.valueOffsets;
122        } else {
123            this.stride = strideForType(type);
124            if (buffers) {
125                (buffer = (buffers as Buffers<T>)[0]) && (this.valueOffsets = buffer);
126                (buffer = (buffers as Buffers<T>)[1]) && (this.values = buffer);
127                (buffer = (buffers as Buffers<T>)[2]) && (this.nullBitmap = buffer);
128                (buffer = (buffers as Buffers<T>)[3]) && (this.typeIds = buffer);
129            }
130        }
131    }
132
133    public clone<R extends DataType>(type: R, offset = this.offset, length = this.length, nullCount = this._nullCount, buffers: Buffers<R> = <any> this, childData: (Data | Vector)[] = this.childData) {
134        return new Data(type, offset, length, nullCount, buffers, childData, this.dictionary);
135    }
136
137    public slice(offset: number, length: number): Data<T> {
138        const { stride, typeId, childData } = this;
139        // +true === 1, +false === 0, so this means
140        // we keep nullCount at 0 if it's already 0,
141        // otherwise set to the invalidated flag -1
142        const nullCount = +(this._nullCount === 0) - 1;
143        const childStride = typeId === 16 /* FixedSizeList */ ? stride : 1;
144        const buffers = this._sliceBuffers(offset, length, stride, typeId);
145        return this.clone<T>(this.type, this.offset + offset, length, nullCount, buffers,
146            // Don't slice children if we have value offsets (the variable-width types)
147            (!childData.length || this.valueOffsets) ? childData : this._sliceChildren(childData, childStride * offset, childStride * length));
148    }
149
150    public _changeLengthAndBackfillNullBitmap(newLength: number): Data<T> {
151        if (this.typeId === Type.Null) {
152            return this.clone(this.type, 0, newLength, 0);
153        }
154        const { length, nullCount } = this;
155        // start initialized with 0s (nulls), then fill from 0 to length with 1s (not null)
156        const bitmap = new Uint8Array(((newLength + 63) & ~63) >> 3).fill(255, 0, length >> 3);
157        // set all the bits in the last byte (up to bit `length - length % 8`) to 1 (not null)
158        bitmap[length >> 3] = (1 << (length - (length & ~7))) - 1;
159        // if we have a nullBitmap, truncate + slice and set it over the pre-filled 1s
160        if (nullCount > 0) {
161            bitmap.set(truncateBitmap(this.offset, length, this.nullBitmap), 0);
162        }
163        const buffers = this.buffers;
164        buffers[BufferType.VALIDITY] = bitmap;
165        return this.clone(this.type, 0, newLength, nullCount + (newLength - length), buffers);
166    }
167
168    protected _sliceBuffers(offset: number, length: number, stride: number, typeId: T['TType']): Buffers<T> {
169        let arr: any, { buffers } = this;
170        // If typeIds exist, slice the typeIds buffer
171        (arr = buffers[BufferType.TYPE]) && (buffers[BufferType.TYPE] = arr.subarray(offset, offset + length));
172        // If offsets exist, only slice the offsets buffer
173        (arr = buffers[BufferType.OFFSET]) && (buffers[BufferType.OFFSET] = arr.subarray(offset, offset + length + 1)) ||
174        // Otherwise if no offsets, slice the data buffer. Don't slice the data vector for Booleans, since the offset goes by bits not bytes
175        (arr = buffers[BufferType.DATA]) && (buffers[BufferType.DATA] = typeId === 6 ? arr : arr.subarray(stride * offset, stride * (offset + length)));
176        return buffers;
177    }
178
179    protected _sliceChildren(childData: Data[], offset: number, length: number): Data[] {
180        return childData.map((child) => child.slice(offset, length));
181    }
182
183    //
184    // Convenience methods for creating Data instances for each of the Arrow Vector types
185    //
186    /** @nocollapse */
187    public static new<T extends DataType>(type: T, offset: number, length: number, nullCount?: number, buffers?: Partial<Buffers<T>> | Data<T>, childData?: (Data | Vector)[], dictionary?: Vector): Data<T> {
188        if (buffers instanceof Data) { buffers = buffers.buffers; } else if (!buffers) { buffers = [] as Partial<Buffers<T>>; }
189        switch (type.typeId) {
190            case Type.Null:            return <unknown> Data.Null(            <unknown> type as Null,            offset, length) as Data<T>;
191            case Type.Int:             return <unknown> Data.Int(             <unknown> type as Int,             offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>;
192            case Type.Dictionary:      return <unknown> Data.Dictionary(      <unknown> type as Dictionary,      offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || [], dictionary!) as Data<T>;
193            case Type.Float:           return <unknown> Data.Float(           <unknown> type as Float,           offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>;
194            case Type.Bool:            return <unknown> Data.Bool(            <unknown> type as Bool,            offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>;
195            case Type.Decimal:         return <unknown> Data.Decimal(         <unknown> type as Decimal,         offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>;
196            case Type.Date:            return <unknown> Data.Date(            <unknown> type as Date_,           offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>;
197            case Type.Time:            return <unknown> Data.Time(            <unknown> type as Time,            offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>;
198            case Type.Timestamp:       return <unknown> Data.Timestamp(       <unknown> type as Timestamp,       offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>;
199            case Type.Interval:        return <unknown> Data.Interval(        <unknown> type as Interval,        offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>;
200            case Type.FixedSizeBinary: return <unknown> Data.FixedSizeBinary( <unknown> type as FixedSizeBinary, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>;
201            case Type.Binary:          return <unknown> Data.Binary(          <unknown> type as Binary,          offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.OFFSET] || [], buffers[BufferType.DATA] || []) as Data<T>;
202            case Type.Utf8:            return <unknown> Data.Utf8(            <unknown> type as Utf8,            offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.OFFSET] || [], buffers[BufferType.DATA] || []) as Data<T>;
203            case Type.List:            return <unknown> Data.List(            <unknown> type as List,            offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.OFFSET] || [], (childData || [])[0]) as Data<T>;
204            case Type.FixedSizeList:   return <unknown> Data.FixedSizeList(   <unknown> type as FixedSizeList,   offset, length, nullCount || 0, buffers[BufferType.VALIDITY], (childData || [])[0]) as Data<T>;
205            case Type.Struct:          return <unknown> Data.Struct(          <unknown> type as Struct,          offset, length, nullCount || 0, buffers[BufferType.VALIDITY], childData || []) as Data<T>;
206            case Type.Map:             return <unknown> Data.Map(             <unknown> type as Map_,            offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.OFFSET] || [], (childData || [])[0]) as Data<T>;
207            case Type.Union:           return <unknown> Data.Union(           <unknown> type as Union,           offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.TYPE] || [], buffers[BufferType.OFFSET] || childData, childData) as Data<T>;
208        }
209        throw new Error(`Unrecognized typeId ${type.typeId}`);
210    }
211
212    /** @nocollapse */
213    public static Null<T extends Null>(type: T, offset: number, length: number) {
214        return new Data(type, offset, length, 0);
215    }
216    /** @nocollapse */
217    public static Int<T extends Int>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
218        return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
219    }
220    /** @nocollapse */
221    public static Dictionary<T extends Dictionary>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>, dictionary: Vector<T['dictionary']>) {
222        return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView<T['TArray']>(type.indices.ArrayType, data), toUint8Array(nullBitmap)], [], dictionary);
223    }
224    /** @nocollapse */
225    public static Float<T extends Float>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
226        return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
227    }
228    /** @nocollapse */
229    public static Bool<T extends Bool>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
230        return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
231    }
232    /** @nocollapse */
233    public static Decimal<T extends Decimal>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
234        return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
235    }
236    /** @nocollapse */
237    public static Date<T extends Date_>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
238        return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
239    }
240    /** @nocollapse */
241    public static Time<T extends Time>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
242        return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
243    }
244    /** @nocollapse */
245    public static Timestamp<T extends Timestamp>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
246        return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
247    }
248    /** @nocollapse */
249    public static Interval<T extends Interval>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
250        return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
251    }
252    /** @nocollapse */
253    public static FixedSizeBinary<T extends FixedSizeBinary>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
254        return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
255    }
256    /** @nocollapse */
257    public static Binary<T extends Binary>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, data: DataBuffer<T>) {
258        return new Data(type, offset, length, nullCount, [toInt32Array(valueOffsets), toUint8Array(data), toUint8Array(nullBitmap)]);
259    }
260    /** @nocollapse */
261    public static Utf8<T extends Utf8>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, data: DataBuffer<T>) {
262        return new Data(type, offset, length, nullCount, [toInt32Array(valueOffsets), toUint8Array(data), toUint8Array(nullBitmap)]);
263    }
264    /** @nocollapse */
265    public static List<T extends List>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, child: Data<T['valueType']> | Vector<T['valueType']>) {
266        return new Data(type, offset, length, nullCount, [toInt32Array(valueOffsets), undefined, toUint8Array(nullBitmap)], child ? [child] : []);
267    }
268    /** @nocollapse */
269    public static FixedSizeList<T extends FixedSizeList>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, child: Data<T['valueType']> | Vector<T['valueType']>) {
270        return new Data(type, offset, length, nullCount, [undefined, undefined, toUint8Array(nullBitmap)], child ? [child] : []);
271    }
272    /** @nocollapse */
273    public static Struct<T extends Struct>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, children: (Data | Vector)[]) {
274        return new Data(type, offset, length, nullCount, [undefined, undefined, toUint8Array(nullBitmap)], children);
275    }
276    /** @nocollapse */
277    public static Map<T extends Map_>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, child: (Data | Vector)) {
278        return new Data(type, offset, length, nullCount, [toInt32Array(valueOffsets), undefined, toUint8Array(nullBitmap)], child ? [child] : []);
279    }
280    public static Union<T extends SparseUnion>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, children: (Data | Vector)[], _?: any): Data<T>;
281    public static Union<T extends DenseUnion>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, valueOffsets: ValueOffsetsBuffer, children: (Data | Vector)[]): Data<T>;
282    public static Union<T extends Union>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, valueOffsetsOrChildren: ValueOffsetsBuffer | (Data | Vector)[], children?: (Data | Vector)[]): Data<T>;
283    /** @nocollapse */
284    public static Union<T extends Union>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, valueOffsetsOrChildren: ValueOffsetsBuffer | (Data | Vector)[], children?: (Data | Vector)[]) {
285        const buffers = <unknown> [
286            undefined, undefined,
287            toUint8Array(nullBitmap),
288            toArrayBufferView(type.ArrayType, typeIds)
289        ] as Partial<Buffers<T>>;
290        if (type.mode === UnionMode.Sparse) {
291            return new Data(type, offset, length, nullCount, buffers, valueOffsetsOrChildren as (Data | Vector)[]);
292        }
293        buffers[BufferType.OFFSET] = toInt32Array(<ValueOffsetsBuffer> valueOffsetsOrChildren);
294        return new Data(type, offset, length, nullCount, buffers, children);
295    }
296}
297
298(Data.prototype as any).childData = Object.freeze([]);
299