1// Licensed to the Apache Software Foundation (ASF) under one 2// or more contributor license agreements. See the NOTICE file 3// distributed with this work for additional information 4// regarding copyright ownership. The ASF licenses this file 5// to you under the Apache License, Version 2.0 (the 6// "License"); you may not use this file except in compliance 7// with the License. You may obtain a copy of the License at 8// 9// http://www.apache.org/licenses/LICENSE-2.0 10// 11// Unless required by applicable law or agreed to in writing, 12// software distributed under the License is distributed on an 13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14// KIND, either express or implied. See the License for the 15// specific language governing permissions and limitations 16// under the License. 17 18import { Data } from '../data'; 19import { Field } from '../schema'; 20import { clampRange } from '../util/vector'; 21import { DataType, Dictionary } from '../type'; 22import { selectChunkArgs } from '../util/args'; 23import { DictionaryVector } from './dictionary'; 24import { AbstractVector, Vector } from '../vector'; 25import { Clonable, Sliceable, Applicative } from '../vector'; 26 27/** @ignore */ 28type ChunkedDict<T extends DataType> = T extends Dictionary ? Vector<T['dictionary']> : null | never; 29/** @ignore */ 30type ChunkedKeys<T extends DataType> = T extends Dictionary ? Vector<T['indices']> | Chunked<T['indices']> : null | never; 31 32/** @ignore */ 33export type SearchContinuation<T extends Chunked> = (column: T, chunkIndex: number, valueIndex: number) => any; 34 35/** @ignore */ 36class ChunkedIterator<T extends DataType> implements IterableIterator<T['TValue'] | null> { 37 private chunkIndex = 0; 38 private chunkIterator: IterableIterator<T['TValue'] | null>; 39 40 constructor( 41 private chunks: Vector<T>[], 42 ) { 43 this.chunkIterator = this.getChunkIterator(); 44 } 45 46 next(): IteratorResult<T['TValue'] | null> { 47 while (this.chunkIndex < this.chunks.length) { 48 const next = this.chunkIterator.next(); 49 50 if (!next.done) { 51 return next; 52 } 53 54 if (++this.chunkIndex < this.chunks.length) { 55 this.chunkIterator = this.getChunkIterator(); 56 } 57 } 58 59 return {done: true, value: null}; 60 } 61 62 getChunkIterator() { 63 return this.chunks[this.chunkIndex][Symbol.iterator](); 64 } 65 66 [Symbol.iterator]() { 67 return this; 68 } 69} 70 71/** @ignore */ 72export class Chunked<T extends DataType = any> 73 extends AbstractVector<T> 74 implements Clonable<Chunked<T>>, 75 Sliceable<Chunked<T>>, 76 Applicative<T, Chunked<T>> { 77 78 /** @nocollapse */ 79 public static flatten<T extends DataType>(...vectors: (Vector<T> | Vector<T>[])[]) { 80 return selectChunkArgs<Vector<T>>(Vector, vectors); 81 } 82 83 /** @nocollapse */ 84 public static concat<T extends DataType>(...vectors: (Vector<T> | Vector<T>[])[]) { 85 const chunks = Chunked.flatten<T>(...vectors); 86 return new Chunked<T>(chunks[0].type, chunks); 87 } 88 89 protected _type: T; 90 protected _length: number; 91 protected _chunks: Vector<T>[]; 92 protected _numChildren: number; 93 protected _children?: Chunked[]; 94 protected _nullCount = -1; 95 protected _chunkOffsets: Uint32Array; 96 97 constructor(type: T, chunks: Vector<T>[] = [], offsets = calculateOffsets(chunks)) { 98 super(); 99 this._type = type; 100 this._chunks = chunks; 101 this._chunkOffsets = offsets; 102 this._length = offsets[offsets.length - 1]; 103 this._numChildren = (this._type.children || []).length; 104 } 105 106 public get type() { return this._type; } 107 public get length() { return this._length; } 108 public get chunks() { return this._chunks; } 109 public get typeId(): T['TType'] { return this._type.typeId; } 110 public get VectorName() { return `Chunked<${this._type}>`; } 111 public get data(): Data<T> { 112 return this._chunks[0] ? this._chunks[0].data : <any> null; 113 } 114 115 public get ArrayType() { return this._type.ArrayType; } 116 public get numChildren() { return this._numChildren; } 117 public get stride() { return this._chunks[0] ? this._chunks[0].stride : 1; } 118 public get byteLength(): number { 119 return this._chunks.reduce((byteLength, chunk) => byteLength + chunk.byteLength, 0); 120 } 121 public get nullCount() { 122 let nullCount = this._nullCount; 123 if (nullCount < 0) { 124 this._nullCount = nullCount = this._chunks.reduce((x, { nullCount }) => x + nullCount, 0); 125 } 126 return nullCount; 127 } 128 129 protected _indices?: ChunkedKeys<T>; 130 public get indices(): ChunkedKeys<T> | null { 131 if (DataType.isDictionary(this._type)) { 132 if (!this._indices) { 133 const chunks = (<any> this._chunks) as DictionaryVector<T, any>[]; 134 this._indices = (chunks.length === 1 135 ? chunks[0].indices 136 : Chunked.concat(...chunks.map((x) => x.indices))) as ChunkedKeys<T>; 137 } 138 return this._indices; 139 } 140 return null; 141 } 142 public get dictionary(): ChunkedDict<T> | null { 143 if (DataType.isDictionary(this._type)) { 144 return this._chunks[this._chunks.length - 1].data.dictionary as ChunkedDict<T>; 145 } 146 return null; 147 } 148 149 public [Symbol.iterator](): IterableIterator<T['TValue'] | null> { 150 return new ChunkedIterator(this._chunks); 151 } 152 153 public clone(chunks = this._chunks): Chunked<T> { 154 return new Chunked(this._type, chunks); 155 } 156 157 public concat(...others: Vector<T>[]): Chunked<T> { 158 return this.clone(Chunked.flatten(this, ...others)); 159 } 160 161 public slice(begin?: number, end?: number): Chunked<T> { 162 return clampRange(this, begin, end, this._sliceInternal); 163 } 164 165 public getChildAt<R extends DataType = any>(index: number): Chunked<R> | null { 166 167 if (index < 0 || index >= this._numChildren) { return null; } 168 169 const columns = this._children || (this._children = []); 170 let child: Chunked<R>, field: Field<R>, chunks: Vector<R>[]; 171 172 if (child = columns[index]) { return child; } 173 if (field = ((this._type.children || [])[index] as Field<R>)) { 174 chunks = this._chunks 175 .map((vector) => vector.getChildAt<R>(index)) 176 .filter((vec): vec is Vector<R> => vec != null); 177 if (chunks.length > 0) { 178 return (columns[index] = new Chunked<R>(field.type, chunks)); 179 } 180 } 181 182 return null; 183 } 184 185 public search(index: number): [number, number] | null; 186 public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N): ReturnType<N>; 187 public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N) { 188 const idx = index; 189 // binary search to find the child vector and value indices 190 const offsets = this._chunkOffsets; 191 let rhs = offsets.length - 1; 192 // return early if out of bounds, or if there's just one child 193 if (idx < 0 ) { return null; } 194 if (idx >= offsets[rhs]) { return null; } 195 if (rhs <= 1 ) { return then ? then(this, 0, idx) : [0, idx]; } 196 let lhs = 0, pos = 0, mid = 0; 197 do { 198 if (lhs + 1 === rhs) { 199 return then ? then(this, lhs, idx - pos) : [lhs, idx - pos]; 200 } 201 mid = lhs + ((rhs - lhs) / 2) | 0; 202 idx >= offsets[mid] ? (lhs = mid) : (rhs = mid); 203 } while (idx < offsets[rhs] && idx >= (pos = offsets[lhs])); 204 return null; 205 } 206 207 public isValid(index: number): boolean { 208 return !!this.search(index, this.isValidInternal); 209 } 210 211 public get(index: number): T['TValue'] | null { 212 return this.search(index, this.getInternal); 213 } 214 215 public set(index: number, value: T['TValue'] | null): void { 216 this.search(index, ({ chunks }, i, j) => chunks[i].set(j, value)); 217 } 218 219 public indexOf(element: T['TValue'], offset?: number): number { 220 if (offset && typeof offset === 'number') { 221 return this.search(offset, (self, i, j) => this.indexOfInternal(self, i, j, element))!; 222 } 223 return this.indexOfInternal(this, 0, Math.max(0, offset || 0), element); 224 } 225 226 public toArray(): T['TArray'] { 227 const { chunks } = this; 228 const n = chunks.length; 229 let ArrayType: any = this._type.ArrayType; 230 if (n <= 0) { return new ArrayType(0); } 231 if (n <= 1) { return chunks[0].toArray(); } 232 let len = 0; 233 const src = new Array(n); 234 for (let i = -1; ++i < n;) { 235 len += (src[i] = chunks[i].toArray()).length; 236 } 237 if (ArrayType !== src[0].constructor) { 238 ArrayType = src[0].constructor; 239 } 240 const dst = new ArrayType(len); 241 const set: any = ArrayType === Array ? arraySet : typedSet; 242 for (let i = -1, idx = 0; ++i < n;) { 243 idx = set(src[i], dst, idx); 244 } 245 return dst; 246 } 247 248 protected getInternal({ _chunks }: Chunked<T>, i: number, j: number) { return _chunks[i].get(j); } 249 protected isValidInternal({ _chunks }: Chunked<T>, i: number, j: number) { return _chunks[i].isValid(j); } 250 protected indexOfInternal({ _chunks }: Chunked<T>, chunkIndex: number, fromIndex: number, element: T['TValue']) { 251 let i = chunkIndex - 1; 252 const n = _chunks.length; 253 let start = fromIndex, offset = 0, found = -1; 254 while (++i < n) { 255 if (~(found = _chunks[i].indexOf(element, start))) { 256 return offset + found; 257 } 258 start = 0; 259 offset += _chunks[i].length; 260 } 261 return -1; 262 } 263 264 protected _sliceInternal(self: Chunked<T>, begin: number, end: number) { 265 const slices: Vector<T>[] = []; 266 const { chunks, _chunkOffsets: chunkOffsets } = self; 267 for (let i = -1, n = chunks.length; ++i < n;) { 268 const chunk = chunks[i]; 269 const chunkLength = chunk.length; 270 const chunkOffset = chunkOffsets[i]; 271 // If the child is to the right of the slice boundary, we can stop 272 if (chunkOffset >= end) { break; } 273 // If the child is to the left of of the slice boundary, exclude 274 if (begin >= chunkOffset + chunkLength) { continue; } 275 // If the child is between both left and right boundaries, include w/o slicing 276 if (chunkOffset >= begin && (chunkOffset + chunkLength) <= end) { 277 slices.push(chunk); 278 continue; 279 } 280 // If the child overlaps one of the slice boundaries, include that slice 281 const from = Math.max(0, begin - chunkOffset); 282 const to = Math.min(end - chunkOffset, chunkLength); 283 slices.push(chunk.slice(from, to) as Vector<T>); 284 } 285 return self.clone(slices); 286 } 287} 288 289/** @ignore */ 290function calculateOffsets<T extends DataType>(vectors: Vector<T>[]) { 291 const offsets = new Uint32Array((vectors || []).length + 1); 292 let offset = offsets[0] = 0; 293 const length = offsets.length; 294 for (let index = 0; ++index < length;) { 295 offsets[index] = (offset += vectors[index - 1].length); 296 } 297 return offsets; 298} 299 300/** @ignore */ 301const typedSet = (src: TypedArray, dst: TypedArray, offset: number) => { 302 dst.set(src, offset); 303 return (offset + src.length); 304}; 305 306/** @ignore */ 307const arraySet = (src: any[], dst: any[], offset: number) => { 308 let idx = offset; 309 for (let i = -1, n = src.length; ++i < n;) { 310 dst[idx++] = src[i]; 311 } 312 return idx; 313}; 314 315/** @ignore */ 316interface TypedArray extends ArrayBufferView { 317 readonly length: number; 318 readonly [n: number]: number; 319 set(array: ArrayLike<number>, offset?: number): void; 320} 321