1// Licensed to the Apache Software Foundation (ASF) under one 2// or more contributor license agreements. See the NOTICE file 3// distributed with this work for additional information 4// regarding copyright ownership. The ASF licenses this file 5// to you under the Apache License, Version 2.0 (the 6// "License"); you may not use this file except in compliance 7// with the License. You may obtain a copy of the License at 8// 9// http://www.apache.org/licenses/LICENSE-2.0 10// 11// Unless required by applicable law or agreed to in writing, 12// software distributed under the License is distributed on an 13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14// KIND, either express or implied. See the License for the 15// specific language governing permissions and limitations 16// under the License. 17 18import { Data } from '../data'; 19import { Field } from '../schema'; 20import { clampRange } from '../util/vector'; 21import { DataType, Dictionary } from '../type'; 22import { selectChunkArgs } from '../util/args'; 23import { DictionaryVector } from './dictionary'; 24import { AbstractVector, Vector } from '../vector'; 25import { Clonable, Sliceable, Applicative } from '../vector'; 26 27/** @ignore */ 28type ChunkedDict<T extends DataType> = T extends Dictionary ? Vector<T['dictionary']> : null | never; 29/** @ignore */ 30type ChunkedKeys<T extends DataType> = T extends Dictionary ? Vector<T['indices']> | Chunked<T['indices']> : null | never; 31 32/** @ignore */ 33export type SearchContinuation<T extends Chunked> = (column: T, chunkIndex: number, valueIndex: number) => any; 34 35/** @ignore */ 36export class Chunked<T extends DataType = any> 37 extends AbstractVector<T> 38 implements Clonable<Chunked<T>>, 39 Sliceable<Chunked<T>>, 40 Applicative<T, Chunked<T>> { 41 42 /** @nocollapse */ 43 public static flatten<T extends DataType>(...vectors: (Vector<T> | Vector<T>[])[]) { 44 return selectChunkArgs<Vector<T>>(Vector, vectors); 45 } 46 47 /** @nocollapse */ 48 public static concat<T extends DataType>(...vectors: (Vector<T> | Vector<T>[])[]) { 49 const chunks = Chunked.flatten<T>(...vectors); 50 return new Chunked<T>(chunks[0].type, chunks); 51 } 52 53 protected _type: T; 54 protected _length: number; 55 protected _chunks: Vector<T>[]; 56 protected _numChildren: number; 57 protected _children?: Chunked[]; 58 protected _nullCount: number = -1; 59 protected _chunkOffsets: Uint32Array; 60 61 constructor(type: T, chunks: Vector<T>[] = [], offsets = calculateOffsets(chunks)) { 62 super(); 63 this._type = type; 64 this._chunks = chunks; 65 this._chunkOffsets = offsets; 66 this._length = offsets[offsets.length - 1]; 67 this._numChildren = (this._type.children || []).length; 68 } 69 70 public get type() { return this._type; } 71 public get length() { return this._length; } 72 public get chunks() { return this._chunks; } 73 public get typeId(): T['TType'] { return this._type.typeId; } 74 public get VectorName() { return `Chunked<${this._type}>`; } 75 public get data(): Data<T> { 76 return this._chunks[0] ? this._chunks[0].data : <any> null; 77 } 78 79 public get ArrayType() { return this._type.ArrayType; } 80 public get numChildren() { return this._numChildren; } 81 public get stride() { return this._chunks[0] ? this._chunks[0].stride : 1; } 82 public get byteLength(): number { 83 return this._chunks.reduce((byteLength, chunk) => byteLength + chunk.byteLength, 0); 84 } 85 public get nullCount() { 86 let nullCount = this._nullCount; 87 if (nullCount < 0) { 88 this._nullCount = nullCount = this._chunks.reduce((x, { nullCount }) => x + nullCount, 0); 89 } 90 return nullCount; 91 } 92 93 protected _indices?: ChunkedKeys<T>; 94 public get indices(): ChunkedKeys<T> | null { 95 if (DataType.isDictionary(this._type)) { 96 if (!this._indices) { 97 const chunks = (<any> this._chunks) as DictionaryVector<T, any>[]; 98 this._indices = (chunks.length === 1 99 ? chunks[0].indices 100 : Chunked.concat(...chunks.map((x) => x.indices))) as ChunkedKeys<T>; 101 } 102 return this._indices; 103 } 104 return null; 105 } 106 public get dictionary(): ChunkedDict<T> | null { 107 if (DataType.isDictionary(this._type)) { 108 return this._chunks[this._chunks.length - 1].data.dictionary as ChunkedDict<T>; 109 } 110 return null; 111 } 112 113 public *[Symbol.iterator](): IterableIterator<T['TValue'] | null> { 114 for (const chunk of this._chunks) { 115 yield* chunk; 116 } 117 } 118 119 public clone(chunks = this._chunks): Chunked<T> { 120 return new Chunked(this._type, chunks); 121 } 122 123 public concat(...others: Vector<T>[]): Chunked<T> { 124 return this.clone(Chunked.flatten(this, ...others)); 125 } 126 127 public slice(begin?: number, end?: number): Chunked<T> { 128 return clampRange(this, begin, end, this._sliceInternal); 129 } 130 131 public getChildAt<R extends DataType = any>(index: number): Chunked<R> | null { 132 133 if (index < 0 || index >= this._numChildren) { return null; } 134 135 let columns = this._children || (this._children = []); 136 let child: Chunked<R>, field: Field<R>, chunks: Vector<R>[]; 137 138 if (child = columns[index]) { return child; } 139 if (field = ((this._type.children || [])[index] as Field<R>)) { 140 chunks = this._chunks 141 .map((vector) => vector.getChildAt<R>(index)) 142 .filter((vec): vec is Vector<R> => vec != null); 143 if (chunks.length > 0) { 144 return (columns[index] = new Chunked<R>(field.type, chunks)); 145 } 146 } 147 148 return null; 149 } 150 151 public search(index: number): [number, number] | null; 152 public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N): ReturnType<N>; 153 public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N) { 154 let idx = index; 155 // binary search to find the child vector and value indices 156 let offsets = this._chunkOffsets, rhs = offsets.length - 1; 157 // return early if out of bounds, or if there's just one child 158 if (idx < 0 ) { return null; } 159 if (idx >= offsets[rhs]) { return null; } 160 if (rhs <= 1 ) { return then ? then(this, 0, idx) : [0, idx]; } 161 let lhs = 0, pos = 0, mid = 0; 162 do { 163 if (lhs + 1 === rhs) { 164 return then ? then(this, lhs, idx - pos) : [lhs, idx - pos]; 165 } 166 mid = lhs + ((rhs - lhs) / 2) | 0; 167 idx >= offsets[mid] ? (lhs = mid) : (rhs = mid); 168 } while (idx < offsets[rhs] && idx >= (pos = offsets[lhs])); 169 return null; 170 } 171 172 public isValid(index: number): boolean { 173 return !!this.search(index, this.isValidInternal); 174 } 175 176 public get(index: number): T['TValue'] | null { 177 return this.search(index, this.getInternal); 178 } 179 180 public set(index: number, value: T['TValue'] | null): void { 181 this.search(index, ({ chunks }, i, j) => chunks[i].set(j, value)); 182 } 183 184 public indexOf(element: T['TValue'], offset?: number): number { 185 if (offset && typeof offset === 'number') { 186 return this.search(offset, (self, i, j) => this.indexOfInternal(self, i, j, element))!; 187 } 188 return this.indexOfInternal(this, 0, Math.max(0, offset || 0), element); 189 } 190 191 public toArray(): T['TArray'] { 192 const { chunks } = this; 193 const n = chunks.length; 194 let ArrayType: any = this._type.ArrayType; 195 if (n <= 0) { return new ArrayType(0); } 196 if (n <= 1) { return chunks[0].toArray(); } 197 let len = 0, src = new Array(n); 198 for (let i = -1; ++i < n;) { 199 len += (src[i] = chunks[i].toArray()).length; 200 } 201 if (ArrayType !== src[0].constructor) { 202 ArrayType = src[0].constructor; 203 } 204 let dst = new ArrayType(len); 205 let set: any = ArrayType === Array ? arraySet : typedSet; 206 for (let i = -1, idx = 0; ++i < n;) { 207 idx = set(src[i], dst, idx); 208 } 209 return dst; 210 } 211 212 protected getInternal({ _chunks }: Chunked<T>, i: number, j: number) { return _chunks[i].get(j); } 213 protected isValidInternal({ _chunks }: Chunked<T>, i: number, j: number) { return _chunks[i].isValid(j); } 214 protected indexOfInternal({ _chunks }: Chunked<T>, chunkIndex: number, fromIndex: number, element: T['TValue']) { 215 let i = chunkIndex - 1, n = _chunks.length; 216 let start = fromIndex, offset = 0, found = -1; 217 while (++i < n) { 218 if (~(found = _chunks[i].indexOf(element, start))) { 219 return offset + found; 220 } 221 start = 0; 222 offset += _chunks[i].length; 223 } 224 return -1; 225 } 226 227 protected _sliceInternal(self: Chunked<T>, begin: number, end: number) { 228 const slices: Vector<T>[] = []; 229 const { chunks, _chunkOffsets: chunkOffsets } = self; 230 for (let i = -1, n = chunks.length; ++i < n;) { 231 const chunk = chunks[i]; 232 const chunkLength = chunk.length; 233 const chunkOffset = chunkOffsets[i]; 234 // If the child is to the right of the slice boundary, we can stop 235 if (chunkOffset >= end) { break; } 236 // If the child is to the left of of the slice boundary, exclude 237 if (begin >= chunkOffset + chunkLength) { continue; } 238 // If the child is between both left and right boundaries, include w/o slicing 239 if (chunkOffset >= begin && (chunkOffset + chunkLength) <= end) { 240 slices.push(chunk); 241 continue; 242 } 243 // If the child overlaps one of the slice boundaries, include that slice 244 const from = Math.max(0, begin - chunkOffset); 245 const to = Math.min(end - chunkOffset, chunkLength); 246 slices.push(chunk.slice(from, to) as Vector<T>); 247 } 248 return self.clone(slices); 249 } 250} 251 252/** @ignore */ 253function calculateOffsets<T extends DataType>(vectors: Vector<T>[]) { 254 let offsets = new Uint32Array((vectors || []).length + 1); 255 let offset = offsets[0] = 0, length = offsets.length; 256 for (let index = 0; ++index < length;) { 257 offsets[index] = (offset += vectors[index - 1].length); 258 } 259 return offsets; 260} 261 262/** @ignore */ 263const typedSet = (src: TypedArray, dst: TypedArray, offset: number) => { 264 dst.set(src, offset); 265 return (offset + src.length); 266}; 267 268/** @ignore */ 269const arraySet = (src: any[], dst: any[], offset: number) => { 270 let idx = offset; 271 for (let i = -1, n = src.length; ++i < n;) { 272 dst[idx++] = src[i]; 273 } 274 return idx; 275}; 276 277/** @ignore */ 278interface TypedArray extends ArrayBufferView { 279 readonly length: number; 280 readonly [n: number]: number; 281 set(array: ArrayLike<number>, offset?: number): void; 282} 283