1 /** 2 * @file domain.h 3 * 4 * @section LICENSE 5 * 6 * The MIT License 7 * 8 * @copyright Copyright (c) 2017-2021 TileDB, Inc. 9 * 10 * Permission is hereby granted, free of charge, to any person obtaining a copy 11 * of this software and associated documentation files (the "Software"), to deal 12 * in the Software without restriction, including without limitation the rights 13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 * copies of the Software, and to permit persons to whom the Software is 15 * furnished to do so, subject to the following conditions: 16 * 17 * The above copyright notice and this permission notice shall be included in 18 * all copies or substantial portions of the Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 * THE SOFTWARE. 27 * 28 * @section DESCRIPTION 29 * 30 * This file defines class Domain. 31 */ 32 33 #ifndef TILEDB_DOMAIN_H 34 #define TILEDB_DOMAIN_H 35 36 #include "tiledb/common/macros.h" 37 #include "tiledb/common/status.h" 38 #include "tiledb/sm/misc/types.h" 39 #include "tiledb/sm/query/query_buffer.h" 40 #include "tiledb/sm/query/result_coords.h" 41 42 #include <vector> 43 44 using namespace tiledb::common; 45 46 namespace tiledb { 47 namespace sm { 48 49 class Buffer; 50 class ConstBuffer; 51 class Dimension; 52 53 enum class Datatype : uint8_t; 54 enum class Layout : uint8_t; 55 56 /** Defines an array domain, which consists of dimensions. */ 57 class Domain { 58 public: 59 /* ********************************* */ 60 /* CONSTRUCTORS & DESTRUCTORS */ 61 /* ********************************* */ 62 63 /** Empty constructor. */ 64 Domain(); 65 66 /** 67 * Constructor that clones the input domain. 68 * 69 * @param domain The object to clone. 70 */ 71 explicit Domain(const Domain* domain); 72 73 /** Copy constructor. */ 74 DISABLE_COPY(Domain); 75 76 /** Move constructor. */ 77 Domain(Domain&& rhs); 78 79 /** Destructor. */ 80 ~Domain() = default; 81 82 /* ********************************* */ 83 /* OPERATORS */ 84 /* ********************************* */ 85 86 /** Copy-assignment operator. */ 87 DISABLE_COPY_ASSIGN(Domain); 88 89 /** Move-assignment operator. */ 90 Domain& operator=(Domain&& rhs); 91 92 /* ********************************* */ 93 /* API */ 94 /* ********************************* */ 95 96 /** 97 * Adds a dimension to the domain. 98 * 99 * @param dim The dimension to be added. 100 * @return Status 101 */ 102 Status add_dimension(const Dimension* dim); 103 104 /** Returns true if all dimensions have fixed-sized domain datatypes. */ 105 bool all_dims_fixed() const; 106 107 /** Returns true if all dimensions have integer domain types. */ 108 bool all_dims_int() const; 109 110 /** Returns true if all dimensions have string domain types. */ 111 bool all_dims_string() const; 112 113 /** Returns true if all dimensions have real domain types. */ 114 bool all_dims_real() const; 115 116 /** Returns true if all dimensions have the same type. */ 117 bool all_dims_same_type() const; 118 119 /** Returns the number of cells per tile (only for the dense case). */ 120 uint64_t cell_num_per_tile() const; 121 122 /** 123 * Checks the cell order of the input coordinates. Note that, in the presence 124 * of a regular tile grid, this function assumes that the cells are in the 125 * same regular tile. 126 * 127 * @tparam T The coordinates type. 128 * @param dim_idx The dimension to compare the coordinates on. 129 * @param a The first input coordinates. 130 * @param b The second input coordinates. 131 * @return One of the following: 132 * - -1 if the first coordinate precedes the second 133 * - 0 if the two coordinates are identical 134 * - +1 if the first coordinate succeeds the second 135 */ 136 int cell_order_cmp( 137 unsigned dim_idx, const ResultCoords& a, const ResultCoords& b) const; 138 139 /** 140 * Checks the cell order of the input coordinates. Since the coordinates 141 * are given for a single dimension, this function simply checks which 142 * coordinate is larger. 143 * 144 * @param coord_a The first coordinate. 145 * @param coord_b The second coordinate. 146 * @return One of the following: 147 * - -1 if the first coordinate is smaller than the second 148 * - 0 if the two coordinates have the same cell order 149 * - +1 if the first coordinate is larger than the second 150 */ 151 template <class T> 152 static int cell_order_cmp_2(const void* coord_a, const void* coord_b); 153 154 /** 155 * Checks the cell order of the input coordinates. Since the coordinates 156 * are given for a single dimension, this function simply checks which 157 * coordinate is larger. 158 * 159 * @param dim The dimension to check the coordinates on. 160 * @param buff The buffer that stores all coordinates. 161 * @param a The position of the first coordinate in the buffer to check. 162 * @param b The position of the second coordinate in the buffer to check. 163 * @return One of the following: 164 * - -1 if the first coordinate is smaller than the second 165 * - 0 if the two coordinates have the same cell order 166 * - +1 if the first coordinate is larger than the second 167 */ 168 template <class T> 169 static int cell_order_cmp( 170 const Dimension* dim, const QueryBuffer* buff, uint64_t a, uint64_t b); 171 172 /** 173 * Checks the cell order of the input coordinates. 174 * 175 * @param coord_buffs The input coordinates, given n separate buffers, 176 * one per dimension. The buffers are sorted in the same order of the 177 * dimensions as defined in the array schema. 178 * @param a The position of the first coordinate tuple across all buffers. 179 * @param b The position of the second coordinate tuple across all buffers. 180 * @return One of the following: 181 * - -1 if the first coordinates precede the second on the cell order 182 * - 0 if the two coordinates have the same cell order 183 * - +1 if the first coordinates succeed the second on the cell order 184 */ 185 int cell_order_cmp( 186 const std::vector<const QueryBuffer*>& coord_buffs, 187 uint64_t a, 188 uint64_t b) const; 189 190 /** 191 * Populates the object members from the data in the input binary buffer. 192 * 193 * @param buff The buffer to deserialize from. 194 * @param version The array schema version. 195 * @return Status 196 */ 197 Status deserialize(ConstBuffer* buff, uint32_t version); 198 199 /** Returns the cell order. */ 200 Layout cell_order() const; 201 202 /** 203 * Crops the input ND range such that it does not exceed the array domain. 204 */ 205 void crop_ndrange(NDRange* ndrange) const; 206 207 /** Returns the tile order. */ 208 Layout tile_order() const; 209 210 /** Returns the number of dimensions. */ 211 unsigned int dim_num() const; 212 213 /** Returns the domain along the i-th dimension. */ 214 const Range& domain(unsigned i) const; 215 216 /** Returns the domain as a N-dimensional range object. */ 217 NDRange domain() const; 218 219 /** Returns the i-th dimensions (nullptr upon error). */ 220 const Dimension* dimension(unsigned int i) const; 221 222 /** Returns the dimension given a name (nullptr upon error). */ 223 const Dimension* dimension(const std::string& name) const; 224 225 /** Dumps the domain in ASCII format in the selected output. */ 226 void dump(FILE* out) const; 227 228 /** Expands ND range `r2` using ND range `r1`. */ 229 void expand_ndrange(const NDRange& r1, NDRange* r2) const; 230 231 /** 232 * Expands the input domain such that it coincides with the boundaries of 233 * the array's regular tiles (i.e., it maps it on the regular tile grid). 234 * If the array has no regular tile grid or real domain, the function 235 * does not do anything. 236 */ 237 void expand_to_tiles(NDRange* ndrange) const; 238 239 /** 240 * Retrieves the tile coordinates of the input cell coordinates. 241 * 242 * @tparam T The domain type. 243 * @param coords The cell coordinates. 244 * @param tile_coords The tile coordinates of the cell coordinates to 245 * be retrieved. 246 */ 247 template <class T> 248 void get_tile_coords(const T* coords, T* tile_coords) const; 249 250 /** 251 * Retrieves the end of a cell slab starting at the `start` input 252 * coordinates. The cell slab is determined based on the domain 253 * tile/cell order and the input query `layout`. Essentially a 254 * cell slab is a contiguous (in the global cell order) range of 255 * cells that follow the query layout. 256 * 257 * @tparam T The domain type. 258 * @param subarray The subarray in which the end of the cell slab must 259 * be contained. 260 * @param start The start coordinates. 261 * @param layout The query layout. 262 * @param end The cell slab end coordinates to be retrieved. 263 */ 264 template <class T> 265 void get_end_of_cell_slab(T* subarray, T* start, Layout layout, T* end) const; 266 267 /** 268 * Retrieves the next tile coordinates along the array tile order within a 269 * given tile domain. Applicable only to **dense** arrays. 270 * 271 * @tparam T The coordinates type. 272 * @param domain The targeted domain. 273 * @param tile_coords The input tile coordinates, which the function modifies 274 * to store the next tile coordinates at termination. 275 * @return void 276 */ 277 template <class T> 278 void get_next_tile_coords(const T* domain, T* tile_coords) const; 279 280 /** 281 * Retrieves the next tile coordinates along the array tile order within a 282 * given tile domain. Applicable only to **dense** arrays. 283 * 284 * @tparam T The coordinates type. 285 * @param domain The targeted domain. 286 * @param tile_coords The input tile coordinates, which the function modifies 287 * to store the next tile coordinates at termination. 288 * @param in Set to `true` if the retrieve coords are inside the domain. 289 * @return void 290 */ 291 template <class T> 292 void get_next_tile_coords(const T* domain, T* tile_coords, bool* in) const; 293 294 /** 295 * Gets the tile domain of the input cell `subarray`. 296 * 297 * @tparam T The domain type. 298 * @param subarray The input (cell) subarray. 299 * @param tile_subarray The tile subarray in the tile domain to be retrieved. 300 * @return void 301 */ 302 template <class T> 303 void get_tile_domain(const T* subarray, T* tile_subarray) const; 304 305 /** 306 * Returns the tile position along the array tile order within the input 307 * domain. Applicable only to **dense** arrays. 308 * 309 * @tparam T The domain type. 310 * @param domain The input domain, which is a cell domain partitioned into 311 * regular tiles in the same manner as that of the array domain (however 312 * *domain* may be a sub-domain of the array domain). 313 * @param tile_coords The tile coordinates, normalized inside the tile 314 * domain of cell `domain`. 315 * @return The tile position of *tile_coords* along the tile order of the 316 * array inside the input domain. 317 */ 318 template <class T> 319 uint64_t get_tile_pos(const T* domain, const T* tile_coords) const; 320 321 /** 322 * Gets the tile subarray for the input tile coordinates. 323 * 324 * @tparam T The coordinates type. 325 * @param tile_coords The input tile coordinates. 326 * @param tile_subarray The output tile subarray. 327 * @return void. 328 */ 329 template <class T> 330 void get_tile_subarray(const T* tile_coords, T* tile_subarray) const; 331 332 /** 333 * Gets the tile subarray for the input tile coordinates. The tile 334 * coordinates are with respect to the input `domain`. 335 * 336 * @tparam T The coordinates type. 337 * @param domain The domain `tile_coords` are in reference to. 338 * @param tile_coords The input tile coordinates. 339 * @param tile_subarray The output tile subarray. 340 * @return void. 341 */ 342 template <class T> 343 void get_tile_subarray( 344 const T* domain, const T* tile_coords, T* tile_subarray) const; 345 346 /** 347 * Checks if the domain has a dimension of the given name. 348 * 349 * @param name Name of dimension to check for 350 * @param has_dim Set to true if the domain has a dimension of the given name. 351 * @return Status 352 */ 353 Status has_dimension(const std::string& name, bool* has_dim) const; 354 355 /** 356 * Gets the index in the domain of a given dimension name 357 * 358 * @param name Name of dimension to check for 359 * @param dim_idx The index of this dimension in the domain 360 * @return Status 361 */ 362 Status get_dimension_index(const std::string& name, unsigned* dim_idx) const; 363 364 /** 365 * Initializes the domain. 366 * 367 * @param cell_order The cell order of the array the domain belongs to. 368 * @param tile_order The cell order of the array the domain belongs to. 369 * @return Status 370 */ 371 Status init(Layout cell_order, Layout tile_order); 372 373 /** Returns true if at least one dimension has null tile extent. */ 374 bool null_tile_extents() const; 375 376 /** 377 * Serializes the object members into a binary buffer. 378 * 379 * @param buff The buffer to serialize the data into. 380 * @param version The array schema version. 381 * @return Status 382 */ 383 Status serialize(Buffer* buff, uint32_t version); 384 385 /** 386 * For every dimension that has a null tile extent, it sets 387 * the tile extent to that dimension domain range. 388 */ 389 Status set_null_tile_extents_to_range(); 390 391 /** 392 * Based on the input subarray layout and the domain's cell layout 393 * and tile extents, this returns the number of cells that each 394 * pair of adjacent cells in a result cell slab (produced during the 395 * read algorithm for that subarray) are apart. If it is 396 * `UINT64_MAX`, then the cells in the result cell slabs are contiguous. 397 */ 398 template <class T> 399 uint64_t stride(Layout subarray_layout) const; 400 401 /** Returns the tile extent along the i-th dimension. */ 402 const ByteVecValue& tile_extent(unsigned i) const; 403 404 /** Returns the tile extents. */ 405 std::vector<ByteVecValue> tile_extents() const; 406 407 /** 408 * Returns the number of tiles intersecting the input ND range. 409 * Returns 0 if even a single dimension has non-integral type. 410 */ 411 uint64_t tile_num(const NDRange& ndrange) const; 412 413 /** 414 * Returns the number of cells in the input range. 415 * If there is an overflow, then the function returns MAX_UINT64. 416 * If at least one dimension had a non-integer domain, the 417 * function returns MAX_UINT64. 418 */ 419 uint64_t cell_num(const NDRange& ndrange) const; 420 421 /** Returns true if r1 is fully covered by r2. */ 422 bool covered(const NDRange& r1, const NDRange& r2) const; 423 424 /** Returns true if the two ND ranges overlap. */ 425 bool overlap(const NDRange& r1, const NDRange& r2) const; 426 427 /** 428 * Return ratio of the overalp of the two input ND ranges over 429 * the volume of `r2`. 430 */ 431 double overlap_ratio(const NDRange& r1, const NDRange& r2) const; 432 433 /** 434 * Checks the tile order of the input coordinates on the given dimension. 435 * 436 * @param The dimension to compare on. 437 * @param coord_a The first coordinate. 438 * @param coord_b The second coordinate. 439 * @return One of the following: 440 * - -1 if the first coordinate precedes the second on the tile order 441 * - 0 if the two coordinates have the same tile order 442 * - +1 if the first coordinate succeeds the second on the tile order 443 */ 444 template <class T> 445 static int tile_order_cmp( 446 const Dimension* dim, const void* coord_a, const void* coord_b); 447 448 /** 449 * Checks the tile order of the input coordinates. 450 * 451 * @param coord_buffs The input coordinates, given n separate buffers, 452 * one per dimension. The buffers are sorted in the same order of the 453 * dimensions as defined in the array schema. 454 * @param a The position of the first coordinate tuple across all buffers. 455 * @param b The position of the second coordinate tuple across all buffers. 456 * @return One of the following: 457 * - -1 if the first coordinates precede the second on the tile order 458 * - 0 if the two coordinates have the same tile order 459 * - +1 if the first coordinates succeed the second on the tile order 460 */ 461 int tile_order_cmp( 462 const std::vector<const QueryBuffer*>& coord_buffs, 463 uint64_t a, 464 uint64_t b) const; 465 466 /** 467 * Checks the tile order of the input coordinates for a given dimension. 468 * 469 * @param dim_idx The index of the dimension to focus on. 470 * @param coord_a The first coordinate on the given dimension. 471 * @param coord_b The second coordinate on the given dimension. 472 * @return One of the following: 473 * - -1 if the first coordinate precedes the second on the tile order 474 * - 0 if the two coordinates have the same tile order 475 * - +1 if the first coordinate succeeds the second on the tile order 476 */ 477 int tile_order_cmp( 478 unsigned dim_idx, const void* coord_a, const void* coord_b) const; 479 480 private: 481 /* ********************************* */ 482 /* PRIVATE ATTRIBUTES */ 483 /* ********************************* */ 484 485 /** The number of cells per tile. Meaningful only for the **dense** case. */ 486 uint64_t cell_num_per_tile_; 487 488 /** The cell order of the array the domain belongs to. */ 489 Layout cell_order_; 490 491 /** The domain dimensions. */ 492 std::vector<tdb_unique_ptr<Dimension>> dimensions_; 493 494 /** The number of dimensions. */ 495 unsigned dim_num_; 496 497 /** The tile order of the array the domain belongs to. */ 498 Layout tile_order_; 499 500 /** 501 * Vector of functions, one per dimension, for comparing the cell order of 502 * two coordinates. The inputs to the function are: 503 * 504 * - dim: The dimension to check the coordinates on. 505 * - buff: The buffer that stores all coorinates; 506 * - a, b: The positions of the two coordinates in the buffer to compare. 507 */ 508 std::vector<int (*)( 509 const Dimension* dim, const QueryBuffer* buff, uint64_t a, uint64_t b)> 510 cell_order_cmp_func_; 511 512 /** 513 * Vector of functions, one per dimension, for comparing the cell order of 514 * two coordinates. The inputs to the function are: 515 * 516 * - coord_a, coord_b: The two coordinates to compare. 517 */ 518 std::vector<int (*)(const void* coord_a, const void* coord_b)> 519 cell_order_cmp_func_2_; 520 521 /** 522 * Vector of functions, one per dimension, for comparing the tile order of 523 * two coordinates. The inputs to the function are: 524 * 525 * - dim: The dimension to compare on. 526 * - coord_a, coord_b: The two coordinates to compare. 527 */ 528 std::vector<int (*)( 529 const Dimension* dim, const void* coord_a, const void* coord_b)> 530 tile_order_cmp_func_; 531 532 /* ********************************* */ 533 /* PRIVATE METHODS */ 534 /* ********************************* */ 535 536 /** Compute the number of cells per tile. */ 537 void compute_cell_num_per_tile(); 538 539 /** 540 * Compute the number of cells per tile. 541 * 542 * @tparam T The coordinates type. 543 * @return void 544 */ 545 template <class T> 546 void compute_cell_num_per_tile(); 547 548 /** Prepares the comparator functions for each dimension. */ 549 void set_tile_cell_order_cmp_funcs(); 550 551 /** 552 * Retrieves the next tile coordinates along the array tile order within a 553 * given tile domain. Applicable only to **dense** arrays, and focusing on 554 * the **column-major** tile order. 555 * 556 * @tparam T The coordinates type. 557 * @param domain The targeted domain. 558 * @param tile_coords The input tile coordinates, which the function modifies 559 * to store the next tile coordinates at termination. 560 * @return void 561 */ 562 template <class T> 563 void get_next_tile_coords_col(const T* domain, T* tile_coords) const; 564 565 /** 566 * Retrieves the next tile coordinates along the array tile order within a 567 * given tile domain. Applicable only to **dense** arrays, and focusing on 568 * the **column-major** tile order. 569 * 570 * @tparam T The coordinates type. 571 * @param domain The targeted domain. 572 * @param tile_coords The input tile coordinates, which the function modifies 573 * to store the next tile coordinates at termination. 574 * @param in Set to `true` if the retrieved coords are inside the domain. 575 * @return void 576 */ 577 template <class T> 578 void get_next_tile_coords_col( 579 const T* domain, T* tile_coords, bool* in) const; 580 581 /** 582 * Retrieves the next tile coordinates along the array tile order within a 583 * given tile domain. Applicable only to **dense** arrays, and focusing on 584 * the **row-major** tile order. 585 * 586 * @tparam T The coordinates type. 587 * @param domain The targeted domain. 588 * @param tile_coords The input tile coordinates, which the function modifies 589 * to store the next tile coordinates at termination. 590 * @return void 591 */ 592 template <class T> 593 void get_next_tile_coords_row(const T* domain, T* tile_coords) const; 594 595 /** 596 * Retrieves the next tile coordinates along the array tile order within a 597 * given tile domain. Applicable only to **dense** arrays, and focusing on 598 * the **row-major** tile order. 599 * 600 * @tparam T The coordinates type. 601 * @param domain The targeted domain. 602 * @param tile_coords The input tile coordinates, which the function modifies 603 * to store the next tile coordinates at termination. 604 * @param in Set to `true` if the retrieved coords are inside the domain. 605 * @return void 606 */ 607 template <class T> 608 void get_next_tile_coords_row( 609 const T* domain, T* tile_coords, bool* in) const; 610 611 /** 612 * Returns the tile position along the array tile order within the input 613 * domain. Applicable only to **dense** arrays, and focusing on the 614 * **column-major** tile order. 615 * 616 * @tparam T The domain type. 617 * @param domain The input domain, which is a cell domain partitioned into 618 * regular tiles in the same manner as that of the array domain 619 * (however *domain* may be a sub-domain of the array domain). 620 * @param tile_coords The tile coordinates. 621 * @return The tile position of *tile_coords* along the tile order of the 622 * array inside the input domain. 623 */ 624 template <class T> 625 uint64_t get_tile_pos_col(const T* domain, const T* tile_coords) const; 626 627 /** 628 * Returns the tile position along the array tile order within the input 629 * domain. Applicable only to **dense** arrays, and focusing on the 630 * **row-major** tile order. 631 * 632 * @tparam T The domain type. 633 * @param domain The input domain, which is a cell domain partitioned into 634 * regular tiles in the same manner as that of the array domain (however 635 * *domain* may be a sub-domain of the array domain). 636 * @param tile_coords The tile coordinates. 637 * @return The tile position of *tile_coords* along the tile order of the 638 * array inside the input domain. 639 */ 640 template <class T> 641 uint64_t get_tile_pos_row(const T* domain, const T* tile_coords) const; 642 }; 643 644 } // namespace sm 645 } // namespace tiledb 646 647 #endif // TILEDB_DOMAIN_H 648