1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18import { Data } from '../data';
19import { Field } from '../schema';
20import { clampRange } from '../util/vector';
21import { DataType, Dictionary } from '../type';
22import { selectChunkArgs } from '../util/args';
23import { DictionaryVector } from './dictionary';
24import { AbstractVector, Vector } from '../vector';
25import { Clonable, Sliceable, Applicative } from '../vector';
26
27/** @ignore */
28type ChunkedDict<T extends DataType> = T extends Dictionary ? Vector<T['dictionary']> : null | never;
29/** @ignore */
30type ChunkedKeys<T extends DataType> = T extends Dictionary ? Vector<T['indices']> | Chunked<T['indices']> : null | never;
31
32/** @ignore */
33export type SearchContinuation<T extends Chunked> = (column: T, chunkIndex: number, valueIndex: number) => any;
34
35/** @ignore */
36export class Chunked<T extends DataType = any>
37    extends AbstractVector<T>
38    implements Clonable<Chunked<T>>,
39               Sliceable<Chunked<T>>,
40               Applicative<T, Chunked<T>> {
41
42    /** @nocollapse */
43    public static flatten<T extends DataType>(...vectors: (Vector<T> | Vector<T>[])[]) {
44        return selectChunkArgs<Vector<T>>(Vector, vectors);
45    }
46
47    /** @nocollapse */
48    public static concat<T extends DataType>(...vectors: (Vector<T> | Vector<T>[])[]) {
49        const chunks = Chunked.flatten<T>(...vectors);
50        return new Chunked<T>(chunks[0].type, chunks);
51    }
52
53    protected _type: T;
54    protected _length: number;
55    protected _chunks: Vector<T>[];
56    protected _numChildren: number;
57    protected _children?: Chunked[];
58    protected _nullCount: number = -1;
59    protected _chunkOffsets: Uint32Array;
60
61    constructor(type: T, chunks: Vector<T>[] = [], offsets = calculateOffsets(chunks)) {
62        super();
63        this._type = type;
64        this._chunks = chunks;
65        this._chunkOffsets = offsets;
66        this._length = offsets[offsets.length - 1];
67        this._numChildren = (this._type.children || []).length;
68    }
69
70    public get type() { return this._type; }
71    public get length() { return this._length; }
72    public get chunks() { return this._chunks; }
73    public get typeId(): T['TType'] { return this._type.typeId; }
74    public get VectorName() { return `Chunked<${this._type}>`; }
75    public get data(): Data<T> {
76        return this._chunks[0] ? this._chunks[0].data : <any> null;
77    }
78
79    public get ArrayType() { return this._type.ArrayType; }
80    public get numChildren() { return this._numChildren; }
81    public get stride() { return this._chunks[0] ? this._chunks[0].stride : 1; }
82    public get byteLength(): number {
83        return this._chunks.reduce((byteLength, chunk) => byteLength + chunk.byteLength, 0);
84    }
85    public get nullCount() {
86        let nullCount = this._nullCount;
87        if (nullCount < 0) {
88            this._nullCount = nullCount = this._chunks.reduce((x, { nullCount }) => x + nullCount, 0);
89        }
90        return nullCount;
91    }
92
93    protected _indices?: ChunkedKeys<T>;
94    public get indices(): ChunkedKeys<T> | null {
95        if (DataType.isDictionary(this._type)) {
96            if (!this._indices) {
97                const chunks = (<any> this._chunks) as DictionaryVector<T, any>[];
98                this._indices = (chunks.length === 1
99                    ? chunks[0].indices
100                    : Chunked.concat(...chunks.map((x) => x.indices))) as ChunkedKeys<T>;
101            }
102            return this._indices;
103        }
104        return null;
105    }
106    public get dictionary(): ChunkedDict<T> | null {
107        if (DataType.isDictionary(this._type)) {
108            return this._chunks[this._chunks.length - 1].data.dictionary as ChunkedDict<T>;
109        }
110        return null;
111    }
112
113    public *[Symbol.iterator](): IterableIterator<T['TValue'] | null> {
114        for (const chunk of this._chunks) {
115            yield* chunk;
116        }
117    }
118
119    public clone(chunks = this._chunks): Chunked<T> {
120        return new Chunked(this._type, chunks);
121    }
122
123    public concat(...others: Vector<T>[]): Chunked<T> {
124        return this.clone(Chunked.flatten(this, ...others));
125    }
126
127    public slice(begin?: number, end?: number): Chunked<T> {
128        return clampRange(this, begin, end, this._sliceInternal);
129    }
130
131    public getChildAt<R extends DataType = any>(index: number): Chunked<R> | null {
132
133        if (index < 0 || index >= this._numChildren) { return null; }
134
135        let columns = this._children || (this._children = []);
136        let child: Chunked<R>, field: Field<R>, chunks: Vector<R>[];
137
138        if (child = columns[index]) { return child; }
139        if (field = ((this._type.children || [])[index] as Field<R>)) {
140            chunks = this._chunks
141                .map((vector) => vector.getChildAt<R>(index))
142                .filter((vec): vec is Vector<R> => vec != null);
143            if (chunks.length > 0) {
144                return (columns[index] = new Chunked<R>(field.type, chunks));
145            }
146        }
147
148        return null;
149    }
150
151    public search(index: number): [number, number] | null;
152    public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N): ReturnType<N>;
153    public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N) {
154        let idx = index;
155        // binary search to find the child vector and value indices
156        let offsets = this._chunkOffsets, rhs = offsets.length - 1;
157        // return early if out of bounds, or if there's just one child
158        if (idx < 0            ) { return null; }
159        if (idx >= offsets[rhs]) { return null; }
160        if (rhs <= 1           ) { return then ? then(this, 0, idx) : [0, idx]; }
161        let lhs = 0, pos = 0, mid = 0;
162        do {
163            if (lhs + 1 === rhs) {
164                return then ? then(this, lhs, idx - pos) : [lhs, idx - pos];
165            }
166            mid = lhs + ((rhs - lhs) / 2) | 0;
167            idx >= offsets[mid] ? (lhs = mid) : (rhs = mid);
168        } while (idx < offsets[rhs] && idx >= (pos = offsets[lhs]));
169        return null;
170    }
171
172    public isValid(index: number): boolean {
173        return !!this.search(index, this.isValidInternal);
174    }
175
176    public get(index: number): T['TValue'] | null {
177        return this.search(index, this.getInternal);
178    }
179
180    public set(index: number, value: T['TValue'] | null): void {
181        this.search(index, ({ chunks }, i, j) => chunks[i].set(j, value));
182    }
183
184    public indexOf(element: T['TValue'], offset?: number): number {
185        if (offset && typeof offset === 'number') {
186            return this.search(offset, (self, i, j) => this.indexOfInternal(self, i, j, element))!;
187        }
188        return this.indexOfInternal(this, 0, Math.max(0, offset || 0), element);
189    }
190
191    public toArray(): T['TArray'] {
192        const { chunks } = this;
193        const n = chunks.length;
194        let ArrayType: any = this._type.ArrayType;
195        if (n <= 0) { return new ArrayType(0); }
196        if (n <= 1) { return chunks[0].toArray(); }
197        let len = 0, src = new Array(n);
198        for (let i = -1; ++i < n;) {
199            len += (src[i] = chunks[i].toArray()).length;
200        }
201        if (ArrayType !== src[0].constructor) {
202            ArrayType = src[0].constructor;
203        }
204        let dst = new ArrayType(len);
205        let set: any = ArrayType === Array ? arraySet : typedSet;
206        for (let i = -1, idx = 0; ++i < n;) {
207            idx = set(src[i], dst, idx);
208        }
209        return dst;
210    }
211
212    protected getInternal({ _chunks }: Chunked<T>, i: number, j: number) { return _chunks[i].get(j); }
213    protected isValidInternal({ _chunks }: Chunked<T>, i: number, j: number) { return _chunks[i].isValid(j); }
214    protected indexOfInternal({ _chunks }: Chunked<T>, chunkIndex: number, fromIndex: number, element: T['TValue']) {
215        let i = chunkIndex - 1, n = _chunks.length;
216        let start = fromIndex, offset = 0, found = -1;
217        while (++i < n) {
218            if (~(found = _chunks[i].indexOf(element, start))) {
219                return offset + found;
220            }
221            start = 0;
222            offset += _chunks[i].length;
223        }
224        return -1;
225    }
226
227    protected _sliceInternal(self: Chunked<T>, begin: number, end: number) {
228        const slices: Vector<T>[] = [];
229        const { chunks, _chunkOffsets: chunkOffsets } = self;
230        for (let i = -1, n = chunks.length; ++i < n;) {
231            const chunk = chunks[i];
232            const chunkLength = chunk.length;
233            const chunkOffset = chunkOffsets[i];
234            // If the child is to the right of the slice boundary, we can stop
235            if (chunkOffset >= end) { break; }
236            // If the child is to the left of of the slice boundary, exclude
237            if (begin >= chunkOffset + chunkLength) { continue; }
238            // If the child is between both left and right boundaries, include w/o slicing
239            if (chunkOffset >= begin && (chunkOffset + chunkLength) <= end) {
240                slices.push(chunk);
241                continue;
242            }
243            // If the child overlaps one of the slice boundaries, include that slice
244            const from = Math.max(0, begin - chunkOffset);
245            const to = Math.min(end - chunkOffset, chunkLength);
246            slices.push(chunk.slice(from, to) as Vector<T>);
247        }
248        return self.clone(slices);
249    }
250}
251
252/** @ignore */
253function calculateOffsets<T extends DataType>(vectors: Vector<T>[]) {
254    let offsets = new Uint32Array((vectors || []).length + 1);
255    let offset = offsets[0] = 0, length = offsets.length;
256    for (let index = 0; ++index < length;) {
257        offsets[index] = (offset += vectors[index - 1].length);
258    }
259    return offsets;
260}
261
262/** @ignore */
263const typedSet = (src: TypedArray, dst: TypedArray, offset: number) => {
264    dst.set(src, offset);
265    return (offset + src.length);
266};
267
268/** @ignore */
269const arraySet = (src: any[], dst: any[], offset: number) => {
270    let idx = offset;
271    for (let i = -1, n = src.length; ++i < n;) {
272        dst[idx++] = src[i];
273    }
274    return idx;
275};
276
277/** @ignore */
278interface TypedArray extends ArrayBufferView {
279    readonly length: number;
280    readonly [n: number]: number;
281    set(array: ArrayLike<number>, offset?: number): void;
282}
283