1 /** 2 * @file reader.h 3 * 4 * @section LICENSE 5 * 6 * The MIT License 7 * 8 * @copyright Copyright (c) 2017-2021 TileDB, Inc. 9 * 10 * Permission is hereby granted, free of charge, to any person obtaining a copy 11 * of this software and associated documentation files (the "Software"), to deal 12 * in the Software without restriction, including without limitation the rights 13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 * copies of the Software, and to permit persons to whom the Software is 15 * furnished to do so, subject to the following conditions: 16 * 17 * The above copyright notice and this permission notice shall be included in 18 * all copies or substantial portions of the Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 * THE SOFTWARE. 27 * 28 * @section DESCRIPTION 29 * 30 * This file defines class Reader. 31 */ 32 33 #ifndef TILEDB_READER_H 34 #define TILEDB_READER_H 35 36 #include <atomic> 37 38 #include "tiledb/common/logger_public.h" 39 #include "tiledb/common/status.h" 40 #include "tiledb/sm/array_schema/dimension.h" 41 #include "tiledb/sm/misc/types.h" 42 #include "tiledb/sm/query/iquery_strategy.h" 43 #include "tiledb/sm/query/query_buffer.h" 44 #include "tiledb/sm/query/query_condition.h" 45 #include "tiledb/sm/query/reader_base.h" 46 #include "tiledb/sm/query/result_cell_slab.h" 47 #include "tiledb/sm/query/result_coords.h" 48 #include "tiledb/sm/subarray/subarray_partitioner.h" 49 50 using namespace tiledb::common; 51 52 namespace tiledb { 53 namespace sm { 54 55 class Array; 56 class StorageManager; 57 class Tile; 58 59 /** Processes read queries. */ 60 class Reader : public ReaderBase, public IQueryStrategy { 61 public: 62 /* ********************************* */ 63 /* CONSTRUCTORS & DESTRUCTORS */ 64 /* ********************************* */ 65 66 /** Constructor. */ 67 Reader( 68 stats::Stats* stats, 69 tdb_shared_ptr<Logger> logger, 70 StorageManager* storage_manager, 71 Array* array, 72 Config& config, 73 std::unordered_map<std::string, QueryBuffer>& buffers, 74 Subarray& subarray, 75 Layout layout, 76 QueryCondition& condition); 77 78 /** Destructor. */ 79 ~Reader() = default; 80 81 DISABLE_COPY_AND_COPY_ASSIGN(Reader); 82 DISABLE_MOVE_AND_MOVE_ASSIGN(Reader); 83 84 /* ********************************* */ 85 /* API */ 86 /* ********************************* */ 87 88 /** Finalizes the reader. */ 89 Status finalize(); 90 91 /** 92 * Returns `true` if the query was incomplete, i.e., if all subarray 93 * partitions in the read state have not been processed or there 94 * was some buffer overflow. 95 */ 96 bool incomplete() const; 97 98 /** Initializes the reader. */ 99 Status init(); 100 101 /** Initialize the memory budget variables. */ 102 Status initialize_memory_budget(); 103 104 /** Returns the current read state. */ 105 const ReadState* read_state() const; 106 107 /** Returns the current read state. */ 108 ReadState* read_state(); 109 110 /** Performs a read query using its set members. */ 111 Status dowork(); 112 113 /** Resets the reader object. */ 114 void reset(); 115 116 /** 117 * Computes the result cell slabs for the input subarray, given the 118 * input result coordinates (retrieved from the sparse fragments). 119 * The function also computes and stores the results space tiles 120 * in `result_space_tiles`. This needs to be preserved throughout 121 * the cell copying operations, since this structure stores all 122 * the relevant result tiles for the dense fragments. 123 * 124 * @tparam T The domain datatype. 125 * @param subarray The input subarray. 126 * @param result_space_tiles The result space tiles computed by the 127 * function, which store the result tiles from the dense fragments. 128 * @param result_coords The result coordinates produced by the sparse 129 * fragments. 130 * @param result_tiles This will store pointers to the result tiles 131 * of both dense and sparse fragments. 132 * @param result_cell_slabs The returned result cell slabs. 133 * @return Status 134 */ 135 template <class T> 136 Status compute_result_cell_slabs( 137 const Subarray& subarray, 138 std::map<const T*, ResultSpaceTile<T>>* result_space_tiles, 139 std::vector<ResultCoords>* result_coords, 140 std::vector<ResultTile*>* result_tiles, 141 std::vector<ResultCellSlab>* result_cell_slabs) const; 142 143 /** 144 * Computes the result cell slabs for the input subarray, given the 145 * input result coordinates (retrieved from the sparse fragments). 146 * The function also computes and stores the results space tiles 147 * in `result_space_tiles`. This needs to be preserved throughout 148 * the cell copying operations, since this structure stores all 149 * the relevant result tiles for the dense fragments. Applicable 150 * only to row-/col-major subarray layouts. 151 * 152 * @tparam T The domain datatype. 153 * @param subarray The input subarray. 154 * @param result_coords The result coordinates produced by the sparse 155 * fragments. 156 * @param result_coords_pos The position in `result_coords` to be 157 * passed to the result cell slab iterator in the function. 158 * This practically keeps track of the sparse coordinate results 159 * already processed in successive calls of this function. 160 * The function updates this value with the current position 161 * returned by the iterator at the end of its process. 162 * @param result_space_tiles The result space tiles computed by the 163 * function, which store the result tiles from the dense fragments. 164 * @param result_tiles This will store pointers to the result tiles 165 * of both dense and sparse fragments. 166 * @param frag_tile_set Stores the unique pairs (frag_idx, tile_idx) 167 * for all result tiles. 168 * @param result_cell_slabs The returned result cell slabs. 169 * @return Status 170 */ 171 template <class T> 172 Status compute_result_cell_slabs_row_col( 173 const Subarray& subarray, 174 std::map<const T*, ResultSpaceTile<T>>* result_space_tiles, 175 std::vector<ResultCoords>* result_coords, 176 uint64_t* result_coords_pos, 177 std::vector<ResultTile*>* result_tiles, 178 std::set<std::pair<unsigned, uint64_t>>* frag_tile_set, 179 std::vector<ResultCellSlab>* result_cell_slabs) const; 180 181 /** 182 * Computes the result cell slabs for the input subarray, given the 183 * input result coordinates (retrieved from the sparse fragments). 184 * The function also computes and stores the results space tiles 185 * in `result_space_tiles`. This needs to be preserved throughout 186 * the cell copying operations, since this structure stores all 187 * the relevant result tiles for the dense fragments. Applicable 188 * only to global order subarray layouts. 189 * 190 * @tparam T The domain datatype. 191 * @param subarray The input subarray. 192 * @param result_coords The result coordinates produced by the sparse 193 * fragments. 194 * @param result_space_tiles The result space tiles computed by the 195 * function, which store the result tiles from the dense fragments. 196 * @param result_tiles This will store pointers to the result tiles 197 * of both dense and sparse fragments. 198 * @param result_cell_slabs The returned result cell slabs. 199 * @return Status 200 */ 201 template <class T> 202 Status compute_result_cell_slabs_global( 203 const Subarray& subarray, 204 std::map<const T*, ResultSpaceTile<T>>* result_space_tiles, 205 std::vector<ResultCoords>* result_coords, 206 std::vector<ResultTile*>* result_tiles, 207 std::vector<ResultCellSlab>* result_cell_slabs) const; 208 209 private: 210 /* ********************************* */ 211 /* PRIVATE ATTRIBUTES */ 212 /* ********************************* */ 213 214 /** UID of the logger instance */ 215 inline static std::atomic<uint64_t> logger_id_ = 0; 216 217 /** Read state. */ 218 ReadState read_state_; 219 220 /* ********************************* */ 221 /* PRIVATE METHODS */ 222 /* ********************************* */ 223 224 /** 225 * Compute the maximal cell slabs of contiguous sparse coordinates. 226 * 227 * @param coords The coordinates to compute the slabs from. 228 * @param result_cell_slabs The result cell slabs to compute. 229 * @return Status 230 */ 231 Status compute_result_cell_slabs( 232 const std::vector<ResultCoords>& result_coords, 233 std::vector<ResultCellSlab>* result_cell_slabs) const; 234 235 /** 236 * Retrieves the coordinates that overlap the input N-dimensional range 237 * from the input result tile. 238 * 239 * @param subarray The subarray to operate on. 240 * @param frag_idx The id of the fragment that the result tile belongs to. 241 * @param tile The result tile. 242 * @param range_idx The range id. 243 * @param result_coords The overlapping coordinates to retrieve. 244 * @return Status 245 */ 246 Status compute_range_result_coords( 247 Subarray* subarray, 248 unsigned frag_idx, 249 ResultTile* tile, 250 uint64_t range_idx, 251 std::vector<ResultCoords>* result_coords); 252 253 /** 254 * Computes the result coordinates for each range of the query 255 * subarray. 256 * 257 * @param subarray The subarray to operate on. 258 * @param single_fragment For each range, it indicates whether all 259 * result coordinates come from a single fragment. 260 * @param result_tile_map This is an auxialiary map that helps finding the 261 * result tiles of each range. 262 * @param result_tiles The result tiles to read the coordinates from. 263 * @param range_result_coords The result coordinates to be retrieved. 264 * It contains a vector for each range of the subarray. 265 * @return Status 266 */ 267 Status compute_range_result_coords( 268 Subarray* subarray, 269 const std::vector<bool>& single_fragment, 270 const std::map<std::pair<unsigned, uint64_t>, size_t>& result_tile_map, 271 std::vector<ResultTile>* result_tiles, 272 std::vector<std::vector<ResultCoords>>* range_result_coords); 273 274 /** 275 * Computes the result coordinates of a given range of the query 276 * subarray. 277 * 278 * @param subarray The subarray to operate on. 279 * @param range_idx The range to focus on. 280 * @param result_tile_map This is an auxialiary map that helps finding the 281 * result_tiles overlapping with each range. 282 * @param result_tiles The result tiles to read the coordinates from. 283 * @param range_result_coords The result coordinates to be retrieved. 284 * It contains a vector for each range of the subarray. 285 * @return Status 286 */ 287 Status compute_range_result_coords( 288 Subarray* subarray, 289 uint64_t range_idx, 290 const std::map<std::pair<unsigned, uint64_t>, size_t>& result_tile_map, 291 std::vector<ResultTile>* result_tiles, 292 std::vector<ResultCoords>* range_result_coords); 293 294 /** 295 * Computes the result coordinates of a given range of the query 296 * subarray. 297 * 298 * @param subarray The subarray to operate on. 299 * @param range_idx The range to focus on. 300 * @param fragment_idx The fragment to focus on. 301 * @param result_tile_map This is an auxialiary map that helps finding the 302 * result_tiles overlapping with each range. 303 * @param result_tiles The result tiles to read the coordinates from. 304 * @param range_result_coords The result coordinates to be retrieved. 305 * It contains a vector for each range of the subarray. 306 * @return Status 307 */ 308 Status compute_range_result_coords( 309 Subarray* subarray, 310 uint64_t range_idx, 311 uint32_t fragment_idx, 312 const std::map<std::pair<unsigned, uint64_t>, size_t>& result_tile_map, 313 std::vector<ResultTile>* result_tiles, 314 std::vector<ResultCoords>* range_result_coords); 315 316 /** 317 * Computes the final subarray result coordinates, which will be 318 * deduplicated and sorted on the specified subarray layout. 319 * 320 * @param range_result_coords The result coordinates for each subarray range. 321 * @param result_coords The final (subarray) result coordinates to be 322 * retrieved. 323 * @return Status 324 * 325 * @note the function will try to gradually clean up ``range_result_coords`` 326 * as it is done processing its elements to quickly reclaim memory. 327 */ 328 Status compute_subarray_coords( 329 std::vector<std::vector<ResultCoords>>* range_result_coords, 330 std::vector<ResultCoords>* result_coords); 331 332 /** 333 * Computes info about the sparse result tiles, such as which fragment they 334 * belong to, the tile index and the type of overlap. The tile vector 335 * contains unique info about the tiles. The function also computes 336 * a map from fragment index and tile id to a result tile to keep 337 * track of the unique result tile info for subarray ranges that overlap 338 * with common tiles. 339 * 340 * @param result_tiles The result tiles to be computed. 341 * @param result_tile_map The result tile map to be computed. 342 * @param single_fragment Each element corresponds to a range of the 343 * subarray and is set to ``true`` if all the overlapping 344 * tiles come from a single fragment for that range. 345 * @return Status 346 */ 347 Status compute_sparse_result_tiles( 348 std::vector<ResultTile>* result_tiles, 349 std::map<std::pair<unsigned, uint64_t>, size_t>* result_tile_map, 350 std::vector<bool>* single_fragment); 351 352 /** 353 * Computes the result coordinates from the sparse fragments. 354 * 355 * @param result_tiles This will store the unique result tiles. 356 * @param result_coords This will store the result coordinates. 357 */ 358 Status compute_result_coords( 359 std::vector<ResultTile>* result_tiles, 360 std::vector<ResultCoords>* result_coords); 361 362 /** 363 * Deduplicates the input result coordinates, breaking ties giving preference 364 * to the largest fragment index (i.e., it prefers more recent fragments). 365 * 366 * @param result_coords The result coordinates to dedup. 367 * @return Status 368 */ 369 Status dedup_result_coords(std::vector<ResultCoords>* result_coords) const; 370 371 /** Performs a read on a dense array. */ 372 Status dense_read(); 373 374 /** 375 * Performs a read on a dense array. 376 * 377 * @tparam The domain type. 378 * @return Status 379 */ 380 template <class T> 381 Status dense_read(); 382 383 /** 384 * Gets all the result coordinates of the input tile into `result_coords`. 385 * 386 * @param result_tile The result tile to read the coordinates from. 387 * @param result_coords The result coordinates to copy into. 388 * @return Status 389 */ 390 Status get_all_result_coords( 391 ResultTile* tile, std::vector<ResultCoords>* result_coords) const; 392 393 /** 394 * Returns `true` if a coordinate buffer for a separate dimension 395 * has been set. 396 */ 397 bool has_separate_coords() const; 398 399 /** Initializes the read state. */ 400 Status init_read_state(); 401 402 /** 403 * Sorts the input result coordinates according to the subarray layout. 404 * 405 * @param iter_begin The start position of the coordinates to sort. 406 * @param iter_end The end position of the coordinates to sort. 407 * @param coords_num The number of coordinates to be sorted. 408 * @param layout The layout to sort into. 409 * @return Status 410 */ 411 Status sort_result_coords( 412 std::vector<ResultCoords>::iterator iter_begin, 413 std::vector<ResultCoords>::iterator iter_end, 414 size_t coords_num, 415 Layout layout) const; 416 417 /** Performs a read on a sparse array. */ 418 Status sparse_read(); 419 420 /** 421 * Adds an extra offset in the end of the offsets buffer indicating the 422 * returned data size if an attribute is var-sized. 423 */ 424 Status add_extra_offset(); 425 426 /** 427 * Returns true if the input tile's MBR of the input fragment is fully 428 * covered by the non-empty domain of a more recent fragment. 429 */ 430 bool sparse_tile_overwritten(unsigned frag_idx, uint64_t tile_idx) const; 431 432 /** 433 * Erases the coordinate tiles (zipped or separate) from the input result 434 * tiles. 435 */ 436 void erase_coord_tiles(std::vector<ResultTile>* result_tiles) const; 437 438 /** Gets statistics about the result cells. */ 439 void get_result_cell_stats( 440 const std::vector<ResultCellSlab>& result_cell_slabs) const; 441 442 /** Gets statistics about the result tiles. */ 443 void get_result_tile_stats( 444 const std::vector<ResultTile*>& result_tiles) const; 445 446 /** 447 * Calculates the hilbert values of the result coordinates between 448 * `iter_begin` and `iter_begin + hilbert_values.size()`. 449 * The hilbert values are stored 450 * in `hilbert_values`, where the first pair value is the hilbert value 451 * and the second is the position of the result coords after the 452 * input iterator. 453 */ 454 Status calculate_hilbert_values( 455 std::vector<ResultCoords>::iterator iter_begin, 456 std::vector<std::pair<uint64_t, uint64_t>>* hilbert_values) const; 457 458 /** 459 * It reorganizes the result coords given the iterator offsets in 460 * `hilbert_values` (second values in the pair). This essentially 461 * sorts the result coordinates starting at `iter_begin` based 462 * on the already sorted hilbert values. 463 * 464 * The algorithm is in-place, operates with O(1) memory and 465 * in O(coords_num) time, but modifies the offsets/positions in 466 * `hilbert_values`. 467 */ 468 Status reorganize_result_coords( 469 std::vector<ResultCoords>::iterator iter_begin, 470 std::vector<std::pair<uint64_t, uint64_t>>* hilbert_values) const; 471 472 /** 473 * Returns true if the result coordinates between the two iterators 474 * belong to the same fragment. 475 */ 476 bool belong_to_single_fragment( 477 std::vector<ResultCoords>::iterator iter_begin, 478 std::vector<ResultCoords>::iterator iter_end) const; 479 480 /** Perform necessary checks before exiting a read loop */ 481 Status complete_read_loop(); 482 }; 483 484 } // namespace sm 485 } // namespace tiledb 486 487 #endif // TILEDB_READER_H 488