1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18import { Data } from '../data';
19import { Field } from '../schema';
20import { clampRange } from '../util/vector';
21import { DataType, Dictionary } from '../type';
22import { selectChunkArgs } from '../util/args';
23import { DictionaryVector } from './dictionary';
24import { AbstractVector, Vector } from '../vector';
25import { Clonable, Sliceable, Applicative } from '../vector';
26
27/** @ignore */
28type ChunkedDict<T extends DataType> = T extends Dictionary ? Vector<T['dictionary']> : null | never;
29/** @ignore */
30type ChunkedKeys<T extends DataType> = T extends Dictionary ? Vector<T['indices']> | Chunked<T['indices']> : null | never;
31
32/** @ignore */
33export type SearchContinuation<T extends Chunked> = (column: T, chunkIndex: number, valueIndex: number) => any;
34
35/** @ignore */
36class ChunkedIterator<T extends DataType> implements IterableIterator<T['TValue'] | null> {
37    private chunkIndex = 0;
38    private chunkIterator: IterableIterator<T['TValue'] | null>;
39
40    constructor(
41        private chunks: Vector<T>[],
42    ) {
43        this.chunkIterator = this.getChunkIterator();
44    }
45
46    next(): IteratorResult<T['TValue'] | null> {
47        while (this.chunkIndex < this.chunks.length) {
48            const next = this.chunkIterator.next();
49
50            if (!next.done) {
51                return next;
52            }
53
54            if (++this.chunkIndex < this.chunks.length) {
55                this.chunkIterator = this.getChunkIterator();
56            }
57        }
58
59        return {done: true, value: null};
60    }
61
62    getChunkIterator() {
63        return this.chunks[this.chunkIndex][Symbol.iterator]();
64    }
65
66    [Symbol.iterator]() {
67        return this;
68    }
69}
70
71/** @ignore */
72export class Chunked<T extends DataType = any>
73    extends AbstractVector<T>
74    implements Clonable<Chunked<T>>,
75               Sliceable<Chunked<T>>,
76               Applicative<T, Chunked<T>> {
77
78    /** @nocollapse */
79    public static flatten<T extends DataType>(...vectors: (Vector<T> | Vector<T>[])[]) {
80        return selectChunkArgs<Vector<T>>(Vector, vectors);
81    }
82
83    /** @nocollapse */
84    public static concat<T extends DataType>(...vectors: (Vector<T> | Vector<T>[])[]) {
85        const chunks = Chunked.flatten<T>(...vectors);
86        return new Chunked<T>(chunks[0].type, chunks);
87    }
88
89    protected _type: T;
90    protected _length: number;
91    protected _chunks: Vector<T>[];
92    protected _numChildren: number;
93    protected _children?: Chunked[];
94    protected _nullCount = -1;
95    protected _chunkOffsets: Uint32Array;
96
97    constructor(type: T, chunks: Vector<T>[] = [], offsets = calculateOffsets(chunks)) {
98        super();
99        this._type = type;
100        this._chunks = chunks;
101        this._chunkOffsets = offsets;
102        this._length = offsets[offsets.length - 1];
103        this._numChildren = (this._type.children || []).length;
104    }
105
106    public get type() { return this._type; }
107    public get length() { return this._length; }
108    public get chunks() { return this._chunks; }
109    public get typeId(): T['TType'] { return this._type.typeId; }
110    public get VectorName() { return `Chunked<${this._type}>`; }
111    public get data(): Data<T> {
112        return this._chunks[0] ? this._chunks[0].data : <any> null;
113    }
114
115    public get ArrayType() { return this._type.ArrayType; }
116    public get numChildren() { return this._numChildren; }
117    public get stride() { return this._chunks[0] ? this._chunks[0].stride : 1; }
118    public get byteLength(): number {
119        return this._chunks.reduce((byteLength, chunk) => byteLength + chunk.byteLength, 0);
120    }
121    public get nullCount() {
122        let nullCount = this._nullCount;
123        if (nullCount < 0) {
124            this._nullCount = nullCount = this._chunks.reduce((x, { nullCount }) => x + nullCount, 0);
125        }
126        return nullCount;
127    }
128
129    protected _indices?: ChunkedKeys<T>;
130    public get indices(): ChunkedKeys<T> | null {
131        if (DataType.isDictionary(this._type)) {
132            if (!this._indices) {
133                const chunks = (<any> this._chunks) as DictionaryVector<T, any>[];
134                this._indices = (chunks.length === 1
135                    ? chunks[0].indices
136                    : Chunked.concat(...chunks.map((x) => x.indices))) as ChunkedKeys<T>;
137            }
138            return this._indices;
139        }
140        return null;
141    }
142    public get dictionary(): ChunkedDict<T> | null {
143        if (DataType.isDictionary(this._type)) {
144            return this._chunks[this._chunks.length - 1].data.dictionary as ChunkedDict<T>;
145        }
146        return null;
147    }
148
149    public [Symbol.iterator](): IterableIterator<T['TValue'] | null> {
150        return new ChunkedIterator(this._chunks);
151    }
152
153    public clone(chunks = this._chunks): Chunked<T> {
154        return new Chunked(this._type, chunks);
155    }
156
157    public concat(...others: Vector<T>[]): Chunked<T> {
158        return this.clone(Chunked.flatten(this, ...others));
159    }
160
161    public slice(begin?: number, end?: number): Chunked<T> {
162        return clampRange(this, begin, end, this._sliceInternal);
163    }
164
165    public getChildAt<R extends DataType = any>(index: number): Chunked<R> | null {
166
167        if (index < 0 || index >= this._numChildren) { return null; }
168
169        const columns = this._children || (this._children = []);
170        let child: Chunked<R>, field: Field<R>, chunks: Vector<R>[];
171
172        if (child = columns[index]) { return child; }
173        if (field = ((this._type.children || [])[index] as Field<R>)) {
174            chunks = this._chunks
175                .map((vector) => vector.getChildAt<R>(index))
176                .filter((vec): vec is Vector<R> => vec != null);
177            if (chunks.length > 0) {
178                return (columns[index] = new Chunked<R>(field.type, chunks));
179            }
180        }
181
182        return null;
183    }
184
185    public search(index: number): [number, number] | null;
186    public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N): ReturnType<N>;
187    public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N) {
188        const idx = index;
189        // binary search to find the child vector and value indices
190        const offsets = this._chunkOffsets;
191        let rhs = offsets.length - 1;
192        // return early if out of bounds, or if there's just one child
193        if (idx < 0            ) { return null; }
194        if (idx >= offsets[rhs]) { return null; }
195        if (rhs <= 1           ) { return then ? then(this, 0, idx) : [0, idx]; }
196        let lhs = 0, pos = 0, mid = 0;
197        do {
198            if (lhs + 1 === rhs) {
199                return then ? then(this, lhs, idx - pos) : [lhs, idx - pos];
200            }
201            mid = lhs + ((rhs - lhs) / 2) | 0;
202            idx >= offsets[mid] ? (lhs = mid) : (rhs = mid);
203        } while (idx < offsets[rhs] && idx >= (pos = offsets[lhs]));
204        return null;
205    }
206
207    public isValid(index: number): boolean {
208        return !!this.search(index, this.isValidInternal);
209    }
210
211    public get(index: number): T['TValue'] | null {
212        return this.search(index, this.getInternal);
213    }
214
215    public set(index: number, value: T['TValue'] | null): void {
216        this.search(index, ({ chunks }, i, j) => chunks[i].set(j, value));
217    }
218
219    public indexOf(element: T['TValue'], offset?: number): number {
220        if (offset && typeof offset === 'number') {
221            return this.search(offset, (self, i, j) => this.indexOfInternal(self, i, j, element))!;
222        }
223        return this.indexOfInternal(this, 0, Math.max(0, offset || 0), element);
224    }
225
226    public toArray(): T['TArray'] {
227        const { chunks } = this;
228        const n = chunks.length;
229        let ArrayType: any = this._type.ArrayType;
230        if (n <= 0) { return new ArrayType(0); }
231        if (n <= 1) { return chunks[0].toArray(); }
232        let len = 0;
233        const src = new Array(n);
234        for (let i = -1; ++i < n;) {
235            len += (src[i] = chunks[i].toArray()).length;
236        }
237        if (ArrayType !== src[0].constructor) {
238            ArrayType = src[0].constructor;
239        }
240        const dst = new ArrayType(len);
241        const set: any = ArrayType === Array ? arraySet : typedSet;
242        for (let i = -1, idx = 0; ++i < n;) {
243            idx = set(src[i], dst, idx);
244        }
245        return dst;
246    }
247
248    protected getInternal({ _chunks }: Chunked<T>, i: number, j: number) { return _chunks[i].get(j); }
249    protected isValidInternal({ _chunks }: Chunked<T>, i: number, j: number) { return _chunks[i].isValid(j); }
250    protected indexOfInternal({ _chunks }: Chunked<T>, chunkIndex: number, fromIndex: number, element: T['TValue']) {
251        let i = chunkIndex - 1;
252        const n = _chunks.length;
253        let start = fromIndex, offset = 0, found = -1;
254        while (++i < n) {
255            if (~(found = _chunks[i].indexOf(element, start))) {
256                return offset + found;
257            }
258            start = 0;
259            offset += _chunks[i].length;
260        }
261        return -1;
262    }
263
264    protected _sliceInternal(self: Chunked<T>, begin: number, end: number) {
265        const slices: Vector<T>[] = [];
266        const { chunks, _chunkOffsets: chunkOffsets } = self;
267        for (let i = -1, n = chunks.length; ++i < n;) {
268            const chunk = chunks[i];
269            const chunkLength = chunk.length;
270            const chunkOffset = chunkOffsets[i];
271            // If the child is to the right of the slice boundary, we can stop
272            if (chunkOffset >= end) { break; }
273            // If the child is to the left of of the slice boundary, exclude
274            if (begin >= chunkOffset + chunkLength) { continue; }
275            // If the child is between both left and right boundaries, include w/o slicing
276            if (chunkOffset >= begin && (chunkOffset + chunkLength) <= end) {
277                slices.push(chunk);
278                continue;
279            }
280            // If the child overlaps one of the slice boundaries, include that slice
281            const from = Math.max(0, begin - chunkOffset);
282            const to = Math.min(end - chunkOffset, chunkLength);
283            slices.push(chunk.slice(from, to) as Vector<T>);
284        }
285        return self.clone(slices);
286    }
287}
288
289/** @ignore */
290function calculateOffsets<T extends DataType>(vectors: Vector<T>[]) {
291    const offsets = new Uint32Array((vectors || []).length + 1);
292    let offset = offsets[0] = 0;
293    const length = offsets.length;
294    for (let index = 0; ++index < length;) {
295        offsets[index] = (offset += vectors[index - 1].length);
296    }
297    return offsets;
298}
299
300/** @ignore */
301const typedSet = (src: TypedArray, dst: TypedArray, offset: number) => {
302    dst.set(src, offset);
303    return (offset + src.length);
304};
305
306/** @ignore */
307const arraySet = (src: any[], dst: any[], offset: number) => {
308    let idx = offset;
309    for (let i = -1, n = src.length; ++i < n;) {
310        dst[idx++] = src[i];
311    }
312    return idx;
313};
314
315/** @ignore */
316interface TypedArray extends ArrayBufferView {
317    readonly length: number;
318    readonly [n: number]: number;
319    set(array: ArrayLike<number>, offset?: number): void;
320}
321