1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18import randomatic from 'randomatic';
19import { VectorType as V } from 'apache-arrow/interfaces';
20
21import {
22    Data, Vector, Visitor, DataType,
23    Table, Schema, Field, RecordBatch,
24    Null,
25    Bool,
26    Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64,
27    Float, Float16, Float32, Float64,
28    Utf8,
29    Binary,
30    FixedSizeBinary,
31    Date_, DateDay, DateMillisecond,
32    Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
33    Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
34    Decimal,
35    List,
36    Struct,
37    Union, DenseUnion, SparseUnion,
38    Dictionary,
39    Interval, IntervalDayTime, IntervalYearMonth,
40    FixedSizeList,
41    Map_,
42    DateUnit, TimeUnit, UnionMode,
43    util
44} from './Arrow';
45
46type TKeys = Int8 | Int16 | Int32 | Uint8 | Uint16 | Uint32;
47
48interface TestDataVectorGenerator extends Visitor {
49
50    visit<T extends Null>            (type: T, length?: number): GeneratedVector<V<T>>;
51    visit<T extends Bool>            (type: T, length?: number, nullCount?: number): GeneratedVector<V<T>>;
52    visit<T extends Int>             (type: T, length?: number, nullCount?: number): GeneratedVector<V<T>>;
53    visit<T extends Float>           (type: T, length?: number, nullCount?: number): GeneratedVector<V<T>>;
54    visit<T extends Utf8>            (type: T, length?: number, nullCount?: number): GeneratedVector<V<T>>;
55    visit<T extends Binary>          (type: T, length?: number, nullCount?: number): GeneratedVector<V<T>>;
56    visit<T extends FixedSizeBinary> (type: T, length?: number, nullCount?: number): GeneratedVector<V<T>>;
57    visit<T extends Date_>           (type: T, length?: number, nullCount?: number): GeneratedVector<V<T>>;
58    visit<T extends Timestamp>       (type: T, length?: number, nullCount?: number): GeneratedVector<V<T>>;
59    visit<T extends Time>            (type: T, length?: number, nullCount?: number): GeneratedVector<V<T>>;
60    visit<T extends Decimal>         (type: T, length?: number, nullCount?: number): GeneratedVector<V<T>>;
61    visit<T extends Interval>        (type: T, length?: number, nullCount?: number): GeneratedVector<V<T>>;
62    visit<T extends List>            (type: T, length?: number, nullCount?: number, child?: Vector): GeneratedVector<V<T>>;
63    visit<T extends FixedSizeList>   (type: T, length?: number, nullCount?: number, child?: Vector): GeneratedVector<V<T>>;
64    visit<T extends Dictionary>      (type: T, length?: number, nullCount?: number, dictionary?: Vector): GeneratedVector<V<T>>;
65    visit<T extends Union>           (type: T, length?: number, nullCount?: number, children?: Vector[]): GeneratedVector<V<T>>;
66    visit<T extends Struct>          (type: T, length?: number, nullCount?: number, children?: Vector[]): GeneratedVector<V<T>>;
67    visit<T extends Map_>            (type: T, length?: number, nullCount?: number, child?: Vector): GeneratedVector<V<T>>;
68    visit<T extends DataType>        (type: T, length?: number, ...args: any[]): GeneratedVector<V<T>>;
69
70    visitNull:            typeof generateNull;
71    visitBool:            typeof generateBool;
72    visitInt:             typeof generateInt;
73    visitFloat:           typeof generateFloat;
74    visitUtf8:            typeof generateUtf8;
75    visitBinary:          typeof generateBinary;
76    visitFixedSizeBinary: typeof generateFixedSizeBinary;
77    visitDate:            typeof generateDate;
78    visitTimestamp:       typeof generateTimestamp;
79    visitTime:            typeof generateTime;
80    visitDecimal:         typeof generateDecimal;
81    visitList:            typeof generateList;
82    visitStruct:          typeof generateStruct;
83    visitUnion:           typeof generateUnion;
84    visitDictionary:      typeof generateDictionary;
85    visitInterval:        typeof generateInterval;
86    visitFixedSizeList:   typeof generateFixedSizeList;
87    visitMap:             typeof generateMap;
88}
89
90class TestDataVectorGenerator extends Visitor {}
91
92TestDataVectorGenerator.prototype.visitNull            = generateNull;
93TestDataVectorGenerator.prototype.visitBool            = generateBool;
94TestDataVectorGenerator.prototype.visitInt             = generateInt;
95TestDataVectorGenerator.prototype.visitFloat           = generateFloat;
96TestDataVectorGenerator.prototype.visitUtf8            = generateUtf8;
97TestDataVectorGenerator.prototype.visitBinary          = generateBinary;
98TestDataVectorGenerator.prototype.visitFixedSizeBinary = generateFixedSizeBinary;
99TestDataVectorGenerator.prototype.visitDate            = generateDate;
100TestDataVectorGenerator.prototype.visitTimestamp       = generateTimestamp;
101TestDataVectorGenerator.prototype.visitTime            = generateTime;
102TestDataVectorGenerator.prototype.visitDecimal         = generateDecimal;
103TestDataVectorGenerator.prototype.visitList            = generateList;
104TestDataVectorGenerator.prototype.visitStruct          = generateStruct;
105TestDataVectorGenerator.prototype.visitUnion           = generateUnion;
106TestDataVectorGenerator.prototype.visitDictionary      = generateDictionary;
107TestDataVectorGenerator.prototype.visitInterval        = generateInterval;
108TestDataVectorGenerator.prototype.visitFixedSizeList   = generateFixedSizeList;
109TestDataVectorGenerator.prototype.visitMap             = generateMap;
110
111const vectorGenerator = new TestDataVectorGenerator();
112
113const defaultListChild = new Field('list[Int32]', new Int32());
114
115const defaultRecordBatchChildren = () => [
116    new Field('i32', new Int32()),
117    new Field('f32', new Float32()),
118    new Field('dict', new Dictionary(new Utf8(), new Int32()))
119];
120
121const defaultStructChildren = () => [
122    new Field('struct[0]', new Int32()),
123    new Field('struct[1]', new Utf8()),
124    new Field('struct[2]', new List(new Field('list[DateDay]', new DateDay())))
125];
126
127const defaultMapChild = () => [
128    new Field('', new Struct<{ key: Utf8; value: Float32 }>([
129        new Field('key', new Utf8()),
130        new Field('value', new Float32())
131    ]))
132][0];
133
134const defaultUnionChildren = () => [
135    new Field('union[0]', new Float64()),
136    new Field('union[1]', new Dictionary(new Uint32(), new Int32())),
137    new Field('union[2]', new Map_(defaultMapChild()))
138];
139
140export interface GeneratedTable {
141    table: Table;
142    rows: () => any[][];
143    cols: () => any[][];
144    keys: () => number[][];
145    rowBatches: (() => any[][])[];
146    colBatches: (() => any[][])[];
147    keyBatches: (() => number[][])[];
148}
149
150export interface GeneratedRecordBatch {
151    recordBatch: RecordBatch;
152    rows: () => any[][];
153    cols: () => any[][];
154    keys: () => number[][];
155}
156
157export type GeneratedVector<TVec extends Vector = Vector> = {
158    vector: TVec;
159    keys?: number[];
160    values: () => (TVec['TValue'] | null)[];
161};
162
163export const table = (lengths = [100], schema: Schema = new Schema(defaultRecordBatchChildren(), new Map([['foo', 'bar']]))): GeneratedTable => {
164    const generated = lengths.map((length) => recordBatch(length, schema));
165    const rowBatches = generated.map(({ rows }) => rows);
166    const colBatches = generated.map(({ cols }) => cols);
167    const keyBatches = generated.map(({ keys }) => keys);
168    const rows = memoize(() => rowBatches.reduce((rows: any[][], batch) => [...rows, ...batch()], []));
169    const keys = memoize(() => keyBatches.reduce((keys: any[][], batch) => (
170        !keys.length ? batch() : keys.map((idxs, i) => [...(idxs || []), ...(batch()[i] || [])])
171    ), []));
172    const cols = memoize(() => colBatches.reduce((cols: any[][], batch) => (
173        !cols.length ? batch() : cols.map((vals, i) => [...vals, ...batch()[i]])
174    ), []));
175
176    return { rows, cols, keys, rowBatches, colBatches, keyBatches, table: new Table(schema, generated.map(({ recordBatch }) => recordBatch)) };
177};
178
179export const recordBatch = (length = 100, schema: Schema = new Schema(defaultRecordBatchChildren())): GeneratedRecordBatch => {
180
181    const generated = schema.fields.map((f) => vectorGenerator.visit(f.type, length));
182    const vecs = generated.map(({ vector }) => vector);
183
184    const keys = memoize(() => generated.map(({ keys }) => keys));
185    const cols = memoize(() => generated.map(({ values }) => values()));
186    const rows = ((_cols: () => any[][]) => memoize((rows: any[][] = [], cols: any[][] = _cols()) => {
187        for (let i = -1; ++i < length; rows[i] = cols.map((vals) => vals[i]));
188        return rows;
189    }))(cols);
190
191    return { rows, cols, keys, recordBatch: new RecordBatch(schema, length, vecs) };
192};
193
194export const null_ = (length = 100) => vectorGenerator.visit(new Null(), length);
195export const bool = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Bool(), length, nullCount);
196export const int8 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Int8(), length, nullCount);
197export const int16 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Int16(), length, nullCount);
198export const int32 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Int32(), length, nullCount);
199export const int64 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Int64(), length, nullCount);
200export const uint8 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Uint8(), length, nullCount);
201export const uint16 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Uint16(), length, nullCount);
202export const uint32 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Uint32(), length, nullCount);
203export const uint64 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Uint64(), length, nullCount);
204export const float16 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Float16(), length, nullCount);
205export const float32 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Float32(), length, nullCount);
206export const float64 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Float64(), length, nullCount);
207export const utf8 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Utf8(), length, nullCount);
208export const binary = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Binary(), length, nullCount);
209export const fixedSizeBinary = (length = 100, nullCount = length * 0.2 | 0, byteWidth = 8) => vectorGenerator.visit(new FixedSizeBinary(byteWidth), length, nullCount);
210export const dateDay = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new DateDay(), length, nullCount);
211export const dateMillisecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new DateMillisecond(), length, nullCount);
212export const timestampSecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimestampSecond(), length, nullCount);
213export const timestampMillisecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimestampMillisecond(), length, nullCount);
214export const timestampMicrosecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimestampMicrosecond(), length, nullCount);
215export const timestampNanosecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimestampNanosecond(), length, nullCount);
216export const timeSecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimeSecond(), length, nullCount);
217export const timeMillisecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimeMillisecond(), length, nullCount);
218export const timeMicrosecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimeMicrosecond(), length, nullCount);
219export const timeNanosecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimeNanosecond(), length, nullCount);
220export const decimal = (length = 100, nullCount = length * 0.2 | 0, scale = 2, precision = 9) => vectorGenerator.visit(new Decimal(scale, precision), length, nullCount);
221export const list = (length = 100, nullCount = length * 0.2 | 0, child = defaultListChild) => vectorGenerator.visit(new List(child), length, nullCount);
222export const struct = <T extends { [key: string]: DataType } = any>(length = 100, nullCount = length * 0.2 | 0, children: Field<T[keyof T]>[] = <any> defaultStructChildren()) => vectorGenerator.visit(new Struct<T>(children), length, nullCount);
223export const denseUnion = (length = 100, nullCount = length * 0.2 | 0, children: Field[] = defaultUnionChildren()) => vectorGenerator.visit(new DenseUnion(children.map((f) => f.typeId), children), length, nullCount);
224export const sparseUnion = (length = 100, nullCount = length * 0.2 | 0, children: Field[] = defaultUnionChildren()) => vectorGenerator.visit(new SparseUnion(children.map((f) => f.typeId), children), length, nullCount);
225export const dictionary = <T extends DataType = Utf8, TKey extends TKeys = Int32> (length = 100, nullCount = length * 0.2 | 0, dict: T = <any> new Utf8(), keys: TKey = <any> new Int32()) => vectorGenerator.visit(new Dictionary(dict, keys), length, nullCount);
226export const intervalDayTime = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new IntervalDayTime(), length, nullCount);
227export const intervalYearMonth = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new IntervalYearMonth(), length, nullCount);
228export const fixedSizeList = (length = 100, nullCount = length * 0.2 | 0, listSize = 2, child = defaultListChild) => vectorGenerator.visit(new FixedSizeList(listSize, child), length, nullCount);
229export const map = <TKey extends DataType = any, TValue extends DataType = any>(length = 100, nullCount = length * 0.2 | 0, child: Field<Struct<{key: TKey; value: TValue}>> = <any> defaultMapChild()) => vectorGenerator.visit(new Map_<TKey, TValue>(child), length, nullCount);
230
231export const vecs = {
232    null_, bool, int8, int16, int32, int64, uint8, uint16, uint32, uint64, float16, float32, float64, utf8, binary, fixedSizeBinary, dateDay, dateMillisecond, timestampSecond, timestampMillisecond, timestampMicrosecond, timestampNanosecond, timeSecond, timeMillisecond, timeMicrosecond, timeNanosecond, decimal, list, struct, denseUnion, sparseUnion, dictionary, intervalDayTime, intervalYearMonth, fixedSizeList, map
233} as { [k: string]: (...args: any[]) => any };
234
235function generateNull<T extends Null>(this: TestDataVectorGenerator, type: T, length = 100): GeneratedVector<V<T>> {
236    return { values: () => Array.from({ length }, () => null), vector: Vector.new(Data.Null(type, 0, length)) };
237}
238
239function generateBool<T extends Bool>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedVector<V<T>> {
240    const data = createBitmap(length, length / 2 | 0);
241    const nullBitmap = createBitmap(length, nullCount);
242    const values = memoize(() => {
243        const values = [] as (boolean | null)[];
244        iterateBitmap(length, nullBitmap, (i, valid) => values[i] = !valid ? null : isValid(data, i));
245        return values;
246    });
247    iterateBitmap(length, nullBitmap, (i, valid) => !valid && (data[i >> 3] &= ~(1 << (i % 8))));
248
249    return { values, vector: Vector.new(Data.Bool(type, 0, length, nullCount, nullBitmap, data)) };
250}
251
252function generateInt<T extends Int>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedVector<V<T>> {
253    const ArrayType = type.ArrayType;
254    const stride = 1 + Number(type.bitWidth > 32);
255    const nullBitmap = createBitmap(length, nullCount);
256    const data = fillRandom(ArrayType as any, length * stride);
257    const values = memoize(() => {
258        const values = [] as (number | null)[];
259        iterateBitmap(length, nullBitmap, (i, valid) => {
260            values[i] = !valid ? null
261                : stride === 1 ? data[i]
262                : data.subarray(i * stride, (i + 1) * stride);
263        });
264        return values;
265    });
266    iterateBitmap(length, nullBitmap, (i, valid) => !valid && (data.set(new Uint8Array(stride), i * stride)));
267    return { values, vector: Vector.new(Data.Int(type, 0, length, nullCount, nullBitmap, data)) };
268}
269
270function generateFloat<T extends Float>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedVector<V<T>> {
271    const ArrayType = type.ArrayType;
272    const precision = type.precision;
273    const data = fillRandom(ArrayType as any, length);
274    const nullBitmap = createBitmap(length, nullCount);
275    const values = memoize(() => {
276        const values = [] as (number | null)[];
277        iterateBitmap(length, nullBitmap, (i, valid) => {
278            values[i] = !valid ? null : precision > 0 ? data[i] : util.uint16ToFloat64(data[i]);
279        });
280        return values;
281    });
282    iterateBitmap(length, nullBitmap, (i, valid) => data[i] = !valid ? 0 : data[i] * Math.random());
283    return { values, vector: Vector.new(Data.Float(type, 0, length, nullCount, nullBitmap, data)) };
284}
285
286function generateUtf8<T extends Utf8>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedVector<V<T>> {
287    const nullBitmap = createBitmap(length, nullCount);
288    const offsets = createVariableWidthOffsets(length, nullBitmap, undefined, undefined, nullCount != 0);
289    const values: string[] = new Array(offsets.length - 1).fill(null);
290    [...offsets.slice(1)]
291        .map((o, i) => isValid(nullBitmap, i) ? o - offsets[i] : null)
292        .reduce((map, length, i) => {
293            if (length !== null) {
294                if (length > 0) {
295                    do {
296                        values[i] = randomString(length);
297                    } while (map.has(values[i]));
298                    return map.set(values[i], i);
299                }
300                values[i] = '';
301            }
302            return map;
303        }, new Map<string, number>());
304    const data = createVariableWidthBytes(length, nullBitmap, offsets, (i) => encodeUtf8(values[i]));
305    return { values: () => values, vector: Vector.new(Data.Utf8(type, 0, length, nullCount, nullBitmap, offsets, data)) };
306}
307
308function generateBinary<T extends Binary>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedVector<V<T>> {
309    const nullBitmap = createBitmap(length, nullCount);
310    const offsets = createVariableWidthOffsets(length, nullBitmap, undefined, undefined, nullCount != 0);
311    const values = [...offsets.slice(1)]
312        .map((o, i) => isValid(nullBitmap, i) ? o - offsets[i] : null)
313        .map((length) => length == null ? null : randomBytes(length));
314    const data = createVariableWidthBytes(length, nullBitmap, offsets, (i) => values[i]!);
315    return { values: () => values, vector: Vector.new(Data.Binary(type, 0, length, nullCount, nullBitmap, offsets, data)) };
316}
317
318function generateFixedSizeBinary<T extends FixedSizeBinary>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedVector<V<T>> {
319    const nullBitmap = createBitmap(length, nullCount);
320    const data = fillRandom(Uint8Array, length * type.byteWidth);
321    const values = memoize(() => {
322        const values = [] as (Uint8Array | null)[];
323        iterateBitmap(length, nullBitmap, (i, valid) => {
324            values[i] = !valid ? null : data.subarray(i * type.byteWidth, (i + 1) * type.byteWidth);
325        });
326        return values;
327    });
328    iterateBitmap(length, nullBitmap, (i, valid) => !valid && data.set(new Uint8Array(type.byteWidth), i * type.byteWidth));
329    return { values, vector: Vector.new(Data.FixedSizeBinary(type, 0, length, nullCount, nullBitmap, data)) };
330}
331
332function generateDate<T extends Date_>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedVector<V<T>> {
333    const values = [] as (number | null)[];
334    const nullBitmap = createBitmap(length, nullCount);
335    const data = type.unit === DateUnit.DAY
336        ? createDate32(length, nullBitmap, values)
337        : createDate64(length, nullBitmap, values);
338    return {
339        values: () => values.map((x) => x == null ? null : new Date(x)),
340        vector: Vector.new(Data.Date(type, 0, length, nullCount, nullBitmap, data))
341    };
342}
343
344function generateTimestamp<T extends Timestamp>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedVector<V<T>> {
345    const values = [] as (number | null)[];
346    const nullBitmap = createBitmap(length, nullCount);
347    const multiple = type.unit === TimeUnit.NANOSECOND ? 1000000000 :
348                     type.unit === TimeUnit.MICROSECOND ? 1000000 :
349                     type.unit === TimeUnit.MILLISECOND ? 1000 : 1;
350    const data = createTimestamp(length, nullBitmap, multiple, values);
351    return { values: () => values, vector: Vector.new(Data.Timestamp(type, 0, length, nullCount, nullBitmap, data)) };
352}
353
354function generateTime<T extends Time>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedVector<V<T>> {
355    const values = [] as (Int32Array | number | null)[];
356    const nullBitmap = createBitmap(length, nullCount);
357    const multiple = type.unit === TimeUnit.NANOSECOND ? 1000000000 :
358                     type.unit === TimeUnit.MICROSECOND ? 1000000 :
359                     type.unit === TimeUnit.MILLISECOND ? 1000 : 1;
360    const data = type.bitWidth === 32
361        ? createTime32(length, nullBitmap, multiple, values as (number | null)[])
362        : createTime64(length, nullBitmap, multiple, values as (Int32Array | null)[]);
363    return { values: () => values, vector: Vector.new(Data.Time(type, 0, length, nullCount, nullBitmap, data)) };
364}
365
366function generateDecimal<T extends Decimal>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedVector<V<T>> {
367    const data = fillRandom(Uint32Array, length * 4);
368    const nullBitmap = createBitmap(length, nullCount);
369    const view = new DataView(data.buffer, 0, data.byteLength);
370    const values = memoize(() => {
371        const values = [] as (Uint32Array | null)[];
372        iterateBitmap(length, nullBitmap, (i, valid) => {
373            values[i] = !valid ? null : new Uint32Array(data.buffer, 16 * i, 4);
374        });
375        return values;
376    });
377    iterateBitmap(length, nullBitmap, (i, valid) => {
378        if (!valid) {
379            view.setFloat64(4 * (i + 0), 0, true);
380            view.setFloat64(4 * (i + 1), 0, true);
381        }
382    });
383    return { values, vector: Vector.new(Data.Decimal(type, 0, length,  nullCount, nullBitmap, data))};
384}
385
386function generateInterval<T extends Interval>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedVector<V<T>> {
387    const stride = (1 + type.unit);
388    const nullBitmap = createBitmap(length, nullCount);
389    const data = fillRandom(Int32Array, length * stride);
390    const values = memoize(() => {
391        const values = [] as (Int32Array | null)[];
392        iterateBitmap(length, nullBitmap, (i: number, valid: boolean) => {
393            values[i] = !valid ? null : stride === 2
394                ? new Int32Array(data.buffer, 4 * i * stride, stride)
395                : new Int32Array([data[i] / 12 | 0, data[i] % 12 | 0]);
396        });
397        return values;
398    });
399    iterateBitmap(length, nullBitmap, (i: number, valid: boolean) => {
400        !valid && data.set(new Int32Array(stride), i * stride);
401    });
402    return { values, vector: Vector.new(Data.Interval(type, 0, length, nullCount, nullBitmap, data)) };
403}
404
405function generateList<T extends List>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0, child = this.visit(type.children[0].type, length * 3, nullCount * 3)): GeneratedVector<V<T>> {
406    const childVec = child.vector;
407    const nullBitmap = createBitmap(length, nullCount);
408    const stride = childVec.length / (length - nullCount);
409    const offsets = createVariableWidthOffsets(length, nullBitmap, childVec.length, stride);
410    const values = memoize(() => {
411        const childValues = child.values();
412        const values: (T['valueType'] | null)[] = [...offsets.slice(1)]
413            .map((offset, i) => isValid(nullBitmap, i) ? offset : null)
414            .map((o, i) => o == null ? null : childValues.slice(offsets[i], o));
415        return values;
416    });
417    return { values, vector: Vector.new(Data.List(type, 0, length, nullCount, nullBitmap, offsets, childVec)) };
418}
419
420function generateFixedSizeList<T extends FixedSizeList>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0, child = this.visit(type.children[0].type, length * type.listSize, nullCount * type.listSize)): GeneratedVector<V<T>> {
421    const nullBitmap = createBitmap(length, nullCount);
422    const values = memoize(() => {
423        const childValues = child.values();
424        const values = [] as (T['valueType'] | null)[];
425        for (let i = -1, stride = type.listSize; ++i < length;) {
426            values[i] = isValid(nullBitmap, i) ? childValues.slice(i * stride, (i + 1) * stride) : null;
427        }
428        return values;
429    });
430    return { values, vector: Vector.new(Data.FixedSizeList(type, 0, length, nullCount, nullBitmap, child.vector)) };
431}
432
433function generateDictionary<T extends Dictionary>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0, dictionary = this.visit(type.dictionary, length, 0)): GeneratedVector<V<T>> {
434
435    const t = <any> type;
436    const currValues = t.dictionaryValues;
437    const hasDict = t.dictionaryVector && t.dictionaryVector.length > 0;
438    const dict = hasDict ? t.dictionaryVector.concat(dictionary.vector) : dictionary.vector;
439    const vals = hasDict ? (() => [...currValues(), ...dictionary.values()]) : dictionary.values;
440
441    const maxIdx = dict.length - 1;
442    const keys = new t.indices.ArrayType(length);
443    const nullBitmap = createBitmap(length, nullCount);
444
445    const values = memoize(() => {
446        const dict = vals();
447        const values = [] as (T['TValue'] | null)[];
448        iterateBitmap(length, nullBitmap, (i, valid) => {
449            values[i] = !valid ? null : dict[keys[i]];
450        });
451        return values;
452    });
453
454    iterateBitmap(length, nullBitmap, (i, valid) => {
455        keys[i] = !valid ? 0 : rand() * maxIdx | 0;
456    });
457
458    t.dictionaryVector = dict;
459    t.dictionaryValues = vals;
460
461    return { values, keys, vector: Vector.new(Data.Dictionary(type, 0, length, nullCount, nullBitmap, keys, dict)) };
462}
463
464function generateUnion<T extends Union>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0, children?: GeneratedVector<any>[]): GeneratedVector<V<T>> {
465
466    const numChildren = type.children.length;
467
468    if (!children) {
469        if (type.mode === UnionMode.Sparse) {
470            children = type.children.map((f) => this.visit(f.type, length, nullCount));
471        } else {
472            const childLength = Math.ceil(length / numChildren);
473            const childNullCount = (nullCount / childLength) | 0;
474            children = type.children.map((f) => this.visit(f.type, childLength, childNullCount));
475        }
476    }
477
478    const typeIds = type.typeIds;
479    const typeIdsBuffer = new Int8Array(length);
480    const vecs = children.map(({ vector }) => vector);
481    const cols = children.map(({ values }) => values);
482    const nullBitmap = createBitmap(length, nullCount);
483    const typeIdToChildIndex = typeIds.reduce((typeIdToChildIndex, typeId, idx) => {
484        return (typeIdToChildIndex[typeId] = idx) && typeIdToChildIndex || typeIdToChildIndex;
485    }, Object.create(null) as { [key: number]: number });
486
487    if (type.mode === UnionMode.Sparse) {
488        const values = memoize(() => {
489            const values = [] as any[];
490            const childValues = cols.map((x) => x());
491            iterateBitmap(length, nullBitmap, (i, valid) => {
492                values[i] = !valid ? null : childValues[typeIdToChildIndex[typeIdsBuffer[i]]][i];
493            });
494            return values;
495        });
496        iterateBitmap(length, nullBitmap, (i, valid) => {
497            typeIdsBuffer[i] = !valid ? 0 : typeIds[rand() * numChildren | 0];
498        });
499        return { values, vector: Vector.new(Data.Union(type as SparseUnion, 0, length, nullCount, nullBitmap, typeIdsBuffer, vecs)) } as GeneratedVector<V<T>>;
500    }
501
502    const offsets = new Int32Array(length);
503    const values = memoize(() => {
504        const values = [] as any[];
505        const childValues = cols.map((x) => x());
506        iterateBitmap(length, nullBitmap, (i, valid) => {
507            values[i] = !valid ? null : childValues[typeIdToChildIndex[typeIdsBuffer[i]]][offsets[i]];
508        });
509        return values;
510    });
511    iterateBitmap(length, nullBitmap, (i, valid) => {
512        if (!valid) {
513            offsets[i] = 0;
514            typeIdsBuffer[i] = 0;
515        } else {
516            const colIdx = rand() * numChildren | 0;
517            offsets[i] = i / numChildren | 0;
518            typeIdsBuffer[i] = typeIds[colIdx];
519        }
520    });
521    return { values, vector: Vector.new(Data.Union(type as DenseUnion, 0, length, nullCount, nullBitmap, typeIdsBuffer, offsets, vecs)) } as GeneratedVector<V<T>>;
522}
523
524function generateStruct<T extends Struct>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0, children = type.children.map((f) => this.visit(f.type, length, nullCount))): GeneratedVector<V<T>> {
525    const vecs = children.map(({ vector }) => vector);
526    const cols = children.map(({ values }) => values);
527    const nullBitmap = createBitmap(length, nullCount);
528    const values = memoize(() => {
529        const values = [] as any[];
530        const childValues = cols.map((x) => x());
531        const names = type.children.map((f) => f.name);
532        iterateBitmap(length, nullBitmap, (i, valid) => {
533            values[i] = !valid ? null : childValues.reduce((row, col, j) => ({
534                ...row, [names[j]]: col[i]
535            }), {});
536        });
537        return values;
538    });
539    return { values, vector: Vector.new(Data.Struct(type, 0, length, nullCount, nullBitmap, vecs)) };
540}
541
542function generateMap<T extends Map_>(this: TestDataVectorGenerator,
543                                     type: T, length = 100, nullCount = length * 0.2 | 0,
544                                     child = this.visit(type.children[0].type, length * 3, 0, [
545                                         this.visit(type.children[0].type.children[0].type, length * 3, 0),
546                                         this.visit(type.children[0].type.children[1].type, length * 3, nullCount * 3)
547                                     ])): GeneratedVector<V<T>> {
548
549    type K = T['keyType']['TValue'];
550    type V = T['valueType']['TValue'];
551
552    const childVec = child.vector;
553    const nullBitmap = createBitmap(length, nullCount);
554    const stride = childVec.length / (length - nullCount);
555    const offsets = createVariableWidthOffsets(length, nullBitmap, childVec.length, stride);
556    const values = memoize(() => {
557        const childValues = child.values() as { key: K; value: V }[];
558        const values: (T['TValue'] | null)[] = [...offsets.slice(1)]
559            .map((offset, i) => isValid(nullBitmap, i) ? offset : null)
560            .map((o, i) => o == null ? null : (() => {
561                const slice = childValues.slice(offsets[i], o);
562                const pairs = slice.map(({ key, value }) => [key, value]);
563                return new Map<K, V>(pairs as any as (readonly [K, V])[]);
564            })());
565        return values;
566    });
567    return { values, vector: Vector.new(Data.Map(type, 0, length, nullCount, nullBitmap, offsets, childVec)) };
568}
569
570type TypedArrayConstructor =
571    (typeof Int8Array) |
572    (typeof Int16Array) |
573    (typeof Int32Array) |
574    (typeof Uint8Array) |
575    (typeof Uint16Array) |
576    (typeof Uint32Array) |
577    (typeof Float32Array) |
578    (typeof Float64Array);
579
580
581const rand = Math.random.bind(Math);
582const randomBytes = (length: number) => fillRandom(Uint8Array, length);
583const randomString = (length: number) => randomatic('?', length, { chars: `abcdefghijklmnopqrstuvwxyz0123456789_` });
584
585const memoize = (fn: () => any) => ((x?: any) => () => x || (x = fn()))();
586
587const encodeUtf8 = ((encoder) =>
588    encoder.encode.bind(encoder) as (input?: string, options?: { stream?: boolean }) => Uint8Array
589)(new TextEncoder());
590
591function fillRandom<T extends TypedArrayConstructor>(ArrayType: T, length: number) {
592    const BPE = ArrayType.BYTES_PER_ELEMENT;
593    const array = new ArrayType(length);
594    const max = (2 ** (8 * BPE)) - 1;
595    for (let i = -1; ++i < length; array[i] = rand() * max * (rand() > 0.5 ? -1 : 1));
596    return array as InstanceType<T>;
597}
598
599function isValid(bitmap: Uint8Array, i: number) {
600    return (bitmap[i >> 3] & 1 << (i % 8)) !== 0;
601}
602
603function iterateBitmap(length: number, bitmap: Uint8Array, fn: (index: number, valid: boolean) => any) {
604    let byteIndex = 0, valueIndex = 0;
605    for (let bit = 0; length > 0; bit = 0) {
606        let byte = bitmap[byteIndex++];
607        do {
608            fn(valueIndex++, (byte & 1 << bit) !== 0);
609        } while (--length > 0 && ++bit < 8);
610    }
611}
612
613function createBitmap(length: number, nullCount: number) {
614    const nulls = Object.create(null) as { [key: number]: boolean };
615    const bytes = new Uint8Array((((length >> 3) + 7) & ~7) || 8).fill(255);
616    for (let i, j = -1; ++j < nullCount;) {
617        while (nulls[i = (rand() * length) | 0]);
618        nulls[i] = true;
619        bytes[i >> 3] &= ~(1 << (i % 8)); // false
620    }
621    return bytes;
622}
623
624function createVariableWidthOffsets(length: number, nullBitmap: Uint8Array, max = Infinity, stride = 20, allowEmpty = true) {
625    const offsets = new Int32Array(length + 1);
626    iterateBitmap(length, nullBitmap, (i, valid) => {
627        if (!valid) {
628            offsets[i + 1] = offsets[i];
629        } else {
630            do {
631                offsets[i + 1] = Math.min(max, offsets[i] + (rand() * stride | 0));
632            } while (!allowEmpty && offsets[i + 1] === offsets[i]);
633        }
634    });
635    return offsets;
636}
637
638function createVariableWidthBytes(length: number, nullBitmap: Uint8Array, offsets: Int32Array, getBytes: (index: number) => Uint8Array) {
639    const bytes = new Uint8Array(offsets[length]);
640    iterateBitmap(length, nullBitmap, (i, valid) => {
641        valid && bytes.set(getBytes(i), offsets[i]);
642    });
643    return bytes;
644}
645
646function createDate32(length: number, nullBitmap: Uint8Array, values: (number | null)[] = []) {
647    const data = new Int32Array(length).fill(Date.now() / 86400000 | 0);
648    iterateBitmap(length, nullBitmap, (i, valid) => {
649        if (!valid) {
650            data[i] = 0;
651            values[i] = null;
652        } else {
653            data[i] = data[i] + (rand() * 10000 * (rand() > 0.5 ? -1 : 1)) | 0;
654            values[i] = data[i] * 86400000;
655        }
656    });
657    return data;
658}
659
660function createDate64(length: number, nullBitmap: Uint8Array, values: (number | null)[] = []) {
661    const data = new Int32Array(length * 2).fill(0);
662    const data32 = createDate32(length, nullBitmap, values);
663    iterateBitmap(length, nullBitmap, (i, valid) => {
664        if (valid) {
665            const value = data32[i] * 86400000;
666            const hi = (value / 4294967296) | 0;
667            const lo = (value - 4294967296 * hi) | 0;
668            values[i] = value;
669            data[i * 2 + 0] = lo;
670            data[i * 2 + 1] = hi;
671        }
672    });
673    return data;
674}
675
676function createTimestamp(length: number, nullBitmap: Uint8Array, multiple: number, values: (number | null)[] = []) {
677    const mult = 86400 * multiple;
678    const data = new Int32Array(length * 2).fill(0);
679    const data32 = createDate32(length, nullBitmap, values);
680    iterateBitmap(length, nullBitmap, (i, valid) => {
681        if (valid) {
682            const value = data32[i] * mult;
683            const hi = (value / 4294967296) | 0;
684            const lo = (value - 4294967296 * hi) | 0;
685            data[i * 2 + 0] = lo;
686            data[i * 2 + 1] = hi;
687        }
688    });
689    return data;
690}
691
692function createTime32(length: number, nullBitmap: Uint8Array, multiple: number, values: (number | null)[] = []) {
693    const data = new Int32Array(length).fill(0);
694    iterateBitmap(length, nullBitmap, (i, valid) => {
695        if (!valid) {
696            data[i] = 0;
697            values[i] = null;
698        } else {
699            values[i] = data[i] = ((1000 * rand()) | 0 * multiple) * (rand() > 0.5 ? -1 : 1);
700        }
701    });
702    return data;
703}
704
705function createTime64(length: number, nullBitmap: Uint8Array, multiple: number, values: (Int32Array | null)[] = []) {
706    const data = new Int32Array(length * 2).fill(0);
707    iterateBitmap(length, nullBitmap, (i, valid) => {
708        if (!valid) {
709            values[i] = null;
710        } else {
711            const value = (1000 * rand()) | 0 * multiple;
712            const hi = (value / 4294967296) | 0;
713            const lo = (value - 4294967296 * hi) | 0;
714            data[i * 2 + 0] = lo;
715            data[i * 2 + 1] = hi;
716            values[i] = data.subarray(i * 2, (i + 1) * 2);
717        }
718    });
719    return data;
720}
721