1 /** 2 * @file fragment_metadata.h 3 * 4 * @section LICENSE 5 * 6 * The MIT License 7 * 8 * @copyright Copyright (c) 2017-2021 TileDB, Inc. 9 * @copyright Copyright (c) 2016 MIT and Intel Corporation 10 * 11 * Permission is hereby granted, free of charge, to any person obtaining a copy 12 * of this software and associated documentation files (the "Software"), to deal 13 * in the Software without restriction, including without limitation the rights 14 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 15 * copies of the Software, and to permit persons to whom the Software is 16 * furnished to do so, subject to the following conditions: 17 * 18 * The above copyright notice and this permission notice shall be included in 19 * all copies or substantial portions of the Software. 20 * 21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 27 * THE SOFTWARE. 28 * 29 * @section DESCRIPTION 30 * 31 * This file defines class FragmentMetadata. 32 */ 33 34 #ifndef TILEDB_FRAGMENT_METADATA_H 35 #define TILEDB_FRAGMENT_METADATA_H 36 37 #include <mutex> 38 #include <unordered_map> 39 #include <vector> 40 41 #include "tiledb/common/status.h" 42 #include "tiledb/sm/misc/types.h" 43 #include "tiledb/sm/misc/uri.h" 44 #include "tiledb/sm/rtree/rtree.h" 45 46 using namespace tiledb::common; 47 48 namespace tiledb { 49 namespace sm { 50 51 class ArraySchema; 52 class Buffer; 53 class EncryptionKey; 54 class OpenArrayMemoryTracker; 55 class StorageManager; 56 57 /** Stores the metadata structures of a fragment. */ 58 class FragmentMetadata { 59 public: 60 /* ********************************* */ 61 /* CONSTRUCTORS & DESTRUCTORS */ 62 /* ********************************* */ 63 64 /** 65 * Default contructor 66 */ 67 FragmentMetadata() = default; 68 69 /** 70 * Constructor. 71 * 72 * @param storage_manager A storage manager instance. 73 * @param array_schema The schema of the array the fragment belongs to. 74 * @param fragment_uri The fragment URI. 75 * @param timestamp_range The timestamp range of the fragment. 76 * In TileDB, timestamps are in ms elapsed since 77 * 1970-01-01 00:00:00 +0000 (UTC). 78 * @param dense Indicates whether the fragment is dense or sparse. 79 */ 80 FragmentMetadata( 81 StorageManager* storage_manager, 82 const ArraySchema* array_schema, 83 const URI& fragment_uri, 84 const std::pair<uint64_t, uint64_t>& timestamp_range, 85 bool dense = true); 86 87 /** Destructor. */ 88 ~FragmentMetadata(); 89 90 // Copy initialization 91 FragmentMetadata(const FragmentMetadata& other); 92 93 FragmentMetadata& operator=(const FragmentMetadata& other); 94 95 /* ********************************* */ 96 /* API */ 97 /* ********************************* */ 98 99 /** Returns the number of cells in the fragment. */ 100 uint64_t cell_num() const; 101 102 /** Returns the number of cells in the tile at the input position. */ 103 uint64_t cell_num(uint64_t tile_pos) const; 104 105 /** 106 * Computes an upper bound on the buffer sizes needed when reading a subarray 107 * from the fragment, for a given set of attributes. Note that these upper 108 * bounds is added to those in `buffer_sizes`. 109 * 110 * @param encryption_key The encryption key the array was opened with. 111 * @param subarray The targeted subarray. 112 * @param buffer_sizes The upper bounds will be added to this map. The latter 113 * maps an attribute to a buffer size pair. For fix-sized attributes, only 114 * the first size is useful. For var-sized attributes, the first is the 115 * offsets size, whereas the second is the data size. 116 * @return Status 117 */ 118 Status add_max_buffer_sizes( 119 const EncryptionKey& encryption_key, 120 const void* subarray, 121 std::unordered_map<std::string, std::pair<uint64_t, uint64_t>>* 122 buffer_sizes); 123 124 /** 125 * Computes an upper bound on the buffer sizes needed when reading a subarray 126 * from the fragment, for a given set of attributes. Note that these upper 127 * bounds is added to those in `buffer_sizes`. Applicable only to the dense 128 * case. 129 * 130 * @param subarray The targeted subarray. 131 * @param buffer_sizes The upper bounds will be added to this map. The latter 132 * maps an attribute to a buffer size pair. For fix-sized attributes, only 133 * the first size is useful. For var-sized attributes, the first is the 134 * offsets size, whereas the second is the data size. 135 * @return Status 136 */ 137 Status add_max_buffer_sizes_dense( 138 const void* subarray, 139 std::unordered_map<std::string, std::pair<uint64_t, uint64_t>>* 140 buffer_sizes); 141 142 /** 143 * Computes an upper bound on the buffer sizes needed when reading a subarray 144 * from the fragment, for a given set of attributes. Note that these upper 145 * bounds is added to those in `buffer_sizes`. Applicable only to the dense 146 * case. 147 * 148 * @tparam T The coordinates type. 149 * @param subarray The targeted subarray. 150 * @param buffer_sizes The upper bounds will be added to this map. The latter 151 * maps an attribute to a buffer size pair. For fix-sized attributes, only 152 * the first size is useful. For var-sized attributes, the first is the 153 * offsets size, whereas the second is the data size. 154 * @return Status 155 */ 156 template <class T> 157 Status add_max_buffer_sizes_dense( 158 const T* subarray, 159 std::unordered_map<std::string, std::pair<uint64_t, uint64_t>>* 160 buffer_sizes); 161 162 /** 163 * Computes an upper bound on the buffer sizes needed when reading a subarray 164 * from the fragment, for a given set of attributes. Note that these upper 165 * bounds is added to those in `buffer_sizes`. Applicable only to the sparse 166 * case. 167 * 168 * @param encryption_key The encryption key the array was opened with. 169 * @param subarray The targeted subarray. 170 * @param buffer_sizes The upper bounds will be added to this map. The latter 171 * maps an attribute to a buffer size pair. For fix-sized attributes, only 172 * the first size is useful. For var-sized attributes, the first is the 173 * offsets size, whereas the second is the data size. 174 * @return Status 175 */ 176 Status add_max_buffer_sizes_sparse( 177 const EncryptionKey& encryption_key, 178 const NDRange& subarray, 179 std::unordered_map<std::string, std::pair<uint64_t, uint64_t>>* 180 buffer_sizes); 181 182 /** 183 * Returns the ids (positions) of the tiles overlapping `subarray`, along with 184 * with the coverage of the overlap. 185 */ 186 template <class T> 187 std::vector<std::pair<uint64_t, double>> compute_overlapping_tile_ids_cov( 188 const T* subarray) const; 189 190 /** 191 * Returns true if the corresponding fragment is dense, and false if it 192 * is sparse. 193 */ 194 bool dense() const; 195 196 /** Returns the (expanded) domain in which the fragment is constrained. */ 197 const NDRange& domain() const; 198 199 /** Returns the format version of this fragment. */ 200 uint32_t format_version() const; 201 202 /** Retrieves the fragment size. */ 203 Status fragment_size(uint64_t* size) const; 204 205 /** Returns the fragment URI. */ 206 const URI& fragment_uri() const; 207 208 /** Returns true if the metadata footer is consolidated. */ 209 bool has_consolidated_footer() const; 210 211 /** 212 * Returns true if the input range overlaps the non-empty 213 * domain of the fragment. 214 */ 215 bool overlaps_non_empty_domain(const NDRange& range) const; 216 217 /** 218 * Retrieves the overlap of all MBRs with the input ND range. The encryption 219 * key is needed because certain metadata may have to be loaded on-the-fly. 220 */ 221 Status get_tile_overlap(const NDRange& range, TileOverlap* tile_overlap); 222 223 /** 224 * Compute tile bitmap for the curent fragment/range/dimension. 225 */ 226 void compute_tile_bitmap( 227 const Range& range, unsigned d, std::vector<uint8_t>* tile_bitmap); 228 229 /** 230 * Initializes the fragment metadata structures. 231 * 232 * @param non_empty_domain The non-empty domain in which the array read/write 233 * will be constrained. 234 * @return Status 235 */ 236 Status init(const NDRange& non_empty_domain); 237 238 /** Returns the number of cells in the last tile. */ 239 uint64_t last_tile_cell_num() const; 240 241 /** 242 * Loads the basic metadata from storage or `f_buff` for later 243 * versions if it is not `nullptr`. 244 */ 245 Status load( 246 const EncryptionKey& encryption_key, 247 Buffer* f_buff, 248 uint64_t offset, 249 std::unordered_map<std::string, tiledb_shared_ptr<ArraySchema>> 250 array_schemas); 251 252 /** Stores all the metadata to storage. */ 253 Status store(const EncryptionKey& encryption_key); 254 255 /** Returns the non-empty domain in which the fragment is constrained. */ 256 const NDRange& non_empty_domain(); 257 258 /** 259 * Simply sets the number of cells for the last tile. 260 * 261 * @param cell_num The number of cells for the last tile. 262 * @return void 263 */ 264 void set_last_tile_cell_num(uint64_t cell_num); 265 266 /** 267 * Sets the input tile's MBR in the fragment metadata. It also expands the 268 * non-empty domain of the fragment. 269 * 270 * @param tile The tile index whose MBR will be set. 271 * @param mbr The MBR to be set. 272 * @return Status 273 */ 274 Status set_mbr(uint64_t tile, const NDRange& mbr); 275 276 /** 277 * Resizes the per-tile metadata vectors for the given number of tiles. This 278 * is not serialized, and is only used during writes. 279 * 280 * @param num_tiles Number of tiles 281 * @return Status 282 */ 283 Status set_num_tiles(uint64_t num_tiles); 284 285 /** 286 * Sets the tile "index base" which is added to the tile index in the set_*() 287 * functions. Only used during global order writes/appends. 288 * 289 * Ex: if the first global order write adds 2 tiles (indices 0 and 1) to the 290 * metadata, then tile index 0 in the second global order write should be tile 291 * index 2 in the metadata, since there are already 2 tiles in the metadata. 292 * 293 * @param tile_base New tile index base 294 */ 295 void set_tile_index_base(uint64_t tile_base); 296 297 /** 298 * Sets a tile offset for the input attribute or dimension. 299 * 300 * @param name The attribute/dimension for which the offset is set. 301 * @param tid The index of the tile for which the offset is set. 302 * @param step This is essentially the step by which the previous 303 * offset will be expanded. It is practically the last tile size. 304 * @return void 305 */ 306 void set_tile_offset(const std::string& name, uint64_t tid, uint64_t step); 307 308 /** 309 * Sets a variable tile offset for the input attribute or dimension. 310 * 311 * @param name The attribute/dimension for which the offset is set. 312 * @param tid The index of the tile for which the offset is set. 313 * @param step This is essentially the step by which the previous 314 * offset will be expanded. It is practically the last variable tile size. 315 * @return void 316 */ 317 void set_tile_var_offset( 318 const std::string& name, uint64_t tid, uint64_t step); 319 320 /** 321 * Sets a variable tile size for the input attribute or dimension. 322 * 323 * @param name The attribute/dimension for which the size is set. 324 * @param tid The index of the tile for which the offset is set. 325 * @param size The size to be appended. 326 * @return void 327 */ 328 void set_tile_var_size(const std::string& name, uint64_t tid, uint64_t size); 329 330 /** 331 * Sets a validity tile offset for the input attribute. 332 * 333 * @param name The attribute for which the offset is set. 334 * @param tid The index of the tile for which the offset is set. 335 * @param step This is essentially the step by which the previous 336 * offset will be expanded. It is practically the last tile size. 337 * @return void 338 */ 339 void set_tile_validity_offset( 340 const std::string& name, uint64_t tid, uint64_t step); 341 342 /** 343 * Sets array schema pointer. 344 * 345 * @param array_schema The schema pointer. 346 * @return void 347 */ 348 void set_array_schema(ArraySchema* array_schema); 349 350 /** Returns the tile index base value. */ 351 uint64_t tile_index_base() const; 352 353 /** Returns the number of tiles in the fragment. */ 354 uint64_t tile_num() const; 355 356 /** Returns the URI of the input attribute/dimension. */ 357 URI uri(const std::string& name) const; 358 359 /** Returns the URI of the input variable-sized attribute/dimension. */ 360 URI var_uri(const std::string& name) const; 361 362 /** Returns the validity URI of the input nullable attribute. */ 363 URI validity_uri(const std::string& name) const; 364 365 /** Return the array schema name. */ 366 const std::string& array_schema_name(); 367 368 /** 369 * Retrieves the starting offset of the input tile of the input attribute 370 * or dimension in the file. If the attribute/dimension is var-sized, it 371 * returns the starting offset of the offsets tile. 372 * 373 * @param name The input attribute/dimension. 374 * @param tile_idx The index of the tile in the metadata. 375 * @param offset The file offset to be retrieved. 376 * @return Status 377 */ 378 Status file_offset( 379 const std::string& name, uint64_t tile_idx, uint64_t* offset); 380 381 /** 382 * Retrieves the starting offset of the input tile of input attribute or 383 * dimension in the file. The attribute/dimension must be var-sized. 384 * 385 * @param name The input attribute/dimension. 386 * @param tile_idx The index of the tile in the metadata. 387 * @param offset The file offset to be retrieved. 388 * @return Status 389 */ 390 Status file_var_offset( 391 const std::string& name, uint64_t tile_idx, uint64_t* offset); 392 393 /** 394 * Retrieves the starting offset of the input validity tile of the 395 * input attribute in the file. 396 * 397 * @param name The input attribute. 398 * @param tile_idx The index of the tile in the metadata. 399 * @param offset The file offset to be retrieved. 400 * @return Status 401 */ 402 Status file_validity_offset( 403 const std::string& name, uint64_t tile_idx, uint64_t* offset); 404 405 /** 406 * Retrieves the size of the fragment metadata footer 407 * (which contains the generic tile offsets) along with its size. 408 */ 409 Status get_footer_size(uint32_t version, uint64_t* size) const; 410 411 uint64_t footer_size() const; 412 413 /** Returns the MBR of the input tile. */ 414 const NDRange& mbr(uint64_t tile_idx) const; 415 416 /** Returns all the MBRs of all tiles in the fragment. */ 417 const std::vector<NDRange>& mbrs() const; 418 419 /** 420 * Retrieves the size of the tile when it is persisted (e.g. the size of the 421 * compressed tile on disk) for a given attribute or dimension and tile index. 422 * If the attribute/dimension is var-sized, this will return the persisted 423 * size of the offsets tile. 424 * 425 * @param name The input attribute/dimension. 426 * @param tile_idx The index of the tile in the metadata. 427 * @param tile_size The tile size to be retrieved. 428 * @return Status 429 */ 430 Status persisted_tile_size( 431 const std::string& name, uint64_t tile_idx, uint64_t* tile_size); 432 433 /** 434 * Retrieves the size of the tile when it is persisted (e.g. the size of the 435 * compressed tile on disk) for a given var-sized attribute or dimension 436 * and tile index. 437 * 438 * @param name The input attribute/dimension. 439 * @param tile_idx The index of the tile in the metadata. 440 * @param tile_size The tile size to be retrieved. 441 * @return Status 442 */ 443 Status persisted_tile_var_size( 444 const std::string& name, uint64_t tile_idx, uint64_t* tile_size); 445 446 /** 447 * Retrieves the size of the validity tile when it is persisted (e.g. the size 448 * of the compressed tile on disk) for a given attribute. 449 * 450 * @param name The input attribute. 451 * @param tile_idx The index of the tile in the metadata. 452 * @param tile_size The tile size to be retrieved. 453 * @return Status 454 */ 455 Status persisted_tile_validity_size( 456 const std::string& name, uint64_t tile_idx, uint64_t* tile_size); 457 458 /** 459 * Returns the (uncompressed) tile size for a given attribute or dimension 460 * and tile index. If the attribute/dimension is var-sized, this will return 461 * the size of the offsets tile. 462 * 463 * @param name The input attribute/dimension. 464 * @param tile_idx The index of the tile in the metadata. 465 * @return The tile size. 466 */ 467 uint64_t tile_size(const std::string& name, uint64_t tile_idx) const; 468 469 /** 470 * Retrieves the (uncompressed) tile size for a given var-sized attribute or 471 * dimension and tile index. 472 * 473 * @param name The input attribute/dimension. 474 * @param tile_idx The index of the tile in the metadata. 475 * @param tile_size The tile size to be retrieved. 476 * @return Status 477 */ 478 Status tile_var_size( 479 const std::string& name, uint64_t tile_idx, uint64_t* tile_size); 480 481 /** Returns the first timestamp of the fragment timestamp range. */ 482 uint64_t first_timestamp() const; 483 484 /** Returns the fragment timestamp range. */ 485 const std::pair<uint64_t, uint64_t>& timestamp_range() const; 486 487 /** 488 * Returns `true` if the timestamp of the first operand is smaller, 489 * breaking ties based on the URI string. 490 */ 491 bool operator<(const FragmentMetadata& metadata) const; 492 493 /** Serializes the fragment metadata footer into the input buffer. */ 494 Status write_footer(Buffer* buff) const; 495 496 /** Loads the R-tree from storage. */ 497 Status load_rtree(const EncryptionKey& encryption_key); 498 499 /** Frees the memory associated with the rtree. */ 500 void free_rtree(); 501 502 /** 503 * Loads the variable tile sizes for the input attribute or dimension idx 504 * from storage. 505 * */ 506 Status load_tile_var_sizes( 507 const EncryptionKey& encryption_key, const std::string& name); 508 509 /** 510 * Loads tile offsets for the attribute/dimension names. 511 * 512 * @param encryption_key The key the array got opened with. 513 * @param names The attribute/dimension names. 514 * @return Status 515 */ 516 Status load_tile_offsets( 517 const EncryptionKey& encryption_key, std::vector<std::string>&& names); 518 519 /** 520 * Loads validity tile offsets for the attribute names. 521 * 522 * @param encryption_key The key the array got opened with. 523 * @param names The attribute names. 524 * @return Status 525 */ 526 Status load_tile_validity_offsets( 527 const EncryptionKey& encryption_key, std::vector<std::string>&& names); 528 529 /** 530 * Returns ArraySchema 531 * 532 * @return 533 */ 534 const ArraySchema* array_schema() const; 535 536 private: 537 /* ********************************* */ 538 /* TYPE DEFINITIONS */ 539 /* ********************************* */ 540 541 /** 542 * Stores the start offsets of the generic tiles stored in the 543 * metadata file, each separately storing the various metadata 544 * (e.g., R-Tree, tile offsets, etc). 545 */ 546 struct GenericTileOffsets { 547 uint64_t rtree_ = 0; 548 std::vector<uint64_t> tile_offsets_; 549 std::vector<uint64_t> tile_var_offsets_; 550 std::vector<uint64_t> tile_var_sizes_; 551 std::vector<uint64_t> tile_validity_offsets_; 552 }; 553 554 /** Keeps track of which metadata is loaded. */ 555 struct LoadedMetadata { 556 bool footer_ = false; 557 bool rtree_ = false; 558 std::vector<bool> tile_offsets_; 559 std::vector<bool> tile_var_offsets_; 560 std::vector<bool> tile_var_sizes_; 561 std::vector<bool> tile_validity_offsets_; 562 }; 563 564 /* ********************************* */ 565 /* PRIVATE ATTRIBUTES */ 566 /* ********************************* */ 567 568 /** The storage manager. */ 569 StorageManager* storage_manager_; 570 571 /** The array schema */ 572 const ArraySchema* array_schema_; 573 574 /** The array schema name */ 575 std::string array_schema_name_; 576 577 /** 578 * Maps an attribute or dimension to an index used in the various vector 579 * class members. Attributes are first, then TILEDB_COORDS, then the 580 * dimensions. 581 */ 582 std::unordered_map<std::string, unsigned> idx_map_; 583 584 /** A vector storing the first and last coordinates of each tile. */ 585 std::vector<std::vector<uint8_t>> bounding_coords_; 586 587 /** True if the fragment is dense, and false if it is sparse. */ 588 bool dense_; 589 590 /** 591 * The (expanded) domain in which the fragment is constrained. "Expanded" 592 * means that the domain is enlarged minimally to coincide with tile 593 * boundaries (if there is a tile grid imposed by tile extents). Note that the 594 * type of the domain must be the same as the type of the array coordinates. 595 */ 596 NDRange domain_; 597 598 /** Stores the size of each attribute file. */ 599 std::vector<uint64_t> file_sizes_; 600 601 /** Stores the size of each variable attribute file. */ 602 std::vector<uint64_t> file_var_sizes_; 603 604 /** Stores the size of each validity attribute file. */ 605 std::vector<uint64_t> file_validity_sizes_; 606 607 /** Size of the fragment metadata footer. */ 608 uint64_t footer_size_; 609 610 /** Offset of the fragment metadata footer. */ 611 uint64_t footer_offset_; 612 613 /** The uri of the fragment the metadata belongs to. */ 614 URI fragment_uri_; 615 616 /** True if the fragment metadata footer appears in a consolidated file. */ 617 bool has_consolidated_footer_; 618 619 /** Number of cells in the last tile (meaningful only in the sparse case). */ 620 uint64_t last_tile_cell_num_; 621 622 /** Number of sparse tiles. */ 623 uint64_t sparse_tile_num_; 624 625 /** Keeps track of which metadata has been loaded. */ 626 LoadedMetadata loaded_metadata_; 627 628 /** The size of the fragment metadata file. */ 629 uint64_t meta_file_size_; 630 631 /** Local mutex for thread-safety. */ 632 std::mutex mtx_; 633 634 /** Mutex per tile offset loading. */ 635 std::deque<std::mutex> tile_offsets_mtx_; 636 637 /** Mutex per tile var offset loading. */ 638 std::deque<std::mutex> tile_var_offsets_mtx_; 639 640 /** The non-empty domain of the fragment. */ 641 NDRange non_empty_domain_; 642 643 /** An RTree for the MBRs. */ 644 RTree rtree_; 645 646 /** 647 * The tile index base which is added to tile indices in setter functions. 648 * Only used in global order writes. 649 */ 650 uint64_t tile_index_base_; 651 652 /** 653 * The tile offsets in their corresponding attribute files. Meaningful only 654 * when there is compression. 655 */ 656 std::vector<std::vector<uint64_t>> tile_offsets_; 657 658 /** 659 * The variable tile offsets in their corresponding attribute files. 660 * Meaningful only for variable-sized tiles. 661 */ 662 std::vector<std::vector<uint64_t>> tile_var_offsets_; 663 664 /** 665 * The sizes of the uncompressed variable tiles. 666 * Meaningful only when there is compression for variable tiles. 667 */ 668 std::vector<std::vector<uint64_t>> tile_var_sizes_; 669 670 /** 671 * The validity tile offsets in their corresponding attribute files. 672 * Meaningful only when there is compression. 673 */ 674 std::vector<std::vector<uint64_t>> tile_validity_offsets_; 675 676 /** The format version of this metadata. */ 677 uint32_t version_; 678 679 /** The timestamp range of the fragment. */ 680 std::pair<uint64_t, uint64_t> timestamp_range_; 681 682 /** Stores the generic tile offsets, facilitating loading. */ 683 GenericTileOffsets gt_offsets_; 684 685 /** The uri of the array the metadata belongs to. */ 686 URI array_uri_; 687 688 /* ********************************* */ 689 /* PRIVATE METHODS */ 690 /* ********************************* */ 691 692 /** 693 * Retrieves the offset in the fragment metadata file of the footer 694 * (which contains the generic tile offsets) along with its size. 695 */ 696 Status get_footer_offset_and_size(uint64_t* offset, uint64_t* size) const; 697 698 /** 699 * Returns the size of the fragment metadata footer 700 * (which contains the generic tile offsets) along with its size. 701 * 702 * Applicable to format versions 3 and 4. 703 */ 704 uint64_t footer_size_v3_v4() const; 705 706 /** 707 * Returns the size of the fragment metadata footer 708 * (which contains the generic tile offsets) along with its size. 709 * 710 * Applicable to format versions 5 and 6. 711 */ 712 uint64_t footer_size_v5_v6() const; 713 714 /** 715 * Returns the size of the fragment metadata footer 716 * (which contains the generic tile offsets) along with its size. 717 * 718 * Applicable to format version 7 or higher. 719 */ 720 uint64_t footer_size_v7_or_higher() const; 721 722 /** 723 * Returns the ids (positions) of the tiles overlapping `subarray`. 724 * Applicable only to dense arrays. 725 */ 726 template <class T> 727 std::vector<uint64_t> compute_overlapping_tile_ids(const T* subarray) const; 728 729 /** 730 * Retrieves the tile domain for the input `subarray` based on the expanded 731 * `domain_`. 732 * 733 * @tparam T The domain type. 734 * @param subarray The targeted subarray. 735 * @param subarray_tile_domain The tile domain to be retrieved. 736 */ 737 template <class T> 738 void get_subarray_tile_domain( 739 const T* subarray, T* subarray_tile_domain) const; 740 741 /** 742 * Expands the non-empty domain using the input MBR. 743 */ 744 Status expand_non_empty_domain(const NDRange& mbr); 745 746 /** 747 * Loads the tile offsets for the input attribute or dimension idx 748 * from storage. 749 */ 750 Status load_tile_offsets(const EncryptionKey& encryption_key, unsigned idx); 751 752 /** 753 * Loads the variable tile offsets for the input attribute or dimension idx 754 * from storage. 755 */ 756 Status load_tile_var_offsets( 757 const EncryptionKey& encryption_key, unsigned idx); 758 759 /** 760 * Loads the variable tile sizes for the input attribute or dimension idx 761 * from storage. 762 * */ 763 Status load_tile_var_sizes(const EncryptionKey& encryption_key, unsigned idx); 764 765 /** 766 * Loads the validity tile offsets for the input attribute idx 767 * from storage. 768 */ 769 Status load_tile_validity_offsets( 770 const EncryptionKey& encryption_key, unsigned idx); 771 772 /** Loads the generic tile offsets from the buffer. */ 773 Status load_generic_tile_offsets(ConstBuffer* buff); 774 775 /** 776 * Loads the generic tile offsets from the buffer. Applicable to 777 * versions 4 and 5. 778 */ 779 Status load_generic_tile_offsets_v3_v4(ConstBuffer* buff); 780 781 /** 782 * Loads the generic tile offsets from the buffer. Applicable to 783 * versions 5 and 6. 784 */ 785 Status load_generic_tile_offsets_v5_v6(ConstBuffer* buff); 786 787 /** 788 * Loads the generic tile offsets from the buffer. Applicable to 789 * versions 7 or higher. 790 */ 791 Status load_generic_tile_offsets_v7_or_higher(ConstBuffer* buff); 792 793 /** 794 * Loads the array schema name. 795 */ 796 Status load_array_schema_name(ConstBuffer* buff); 797 798 /** 799 * Loads the bounding coordinates from the fragment metadata buffer. 800 * 801 * @param buff Metadata buffer. 802 * @return Status 803 */ 804 Status load_bounding_coords(ConstBuffer* buff); 805 806 /** Loads the sizes of each attribute or dimension file from the buffer. */ 807 Status load_file_sizes(ConstBuffer* buff); 808 809 /** 810 * Loads the sizes of each attribute or dimension file from the buffer. 811 * Applicable to format versions 1 to 4. 812 */ 813 Status load_file_sizes_v1_v4(ConstBuffer* buff); 814 815 /** 816 * Loads the sizes of each attribute or dimension file from the buffer. 817 * Applicable to format version 5 or higher. 818 */ 819 Status load_file_sizes_v5_or_higher(ConstBuffer* buff); 820 821 /** 822 * Loads the sizes of each variable attribute or dimension file from the 823 * buffer. 824 */ 825 Status load_file_var_sizes(ConstBuffer* buff); 826 827 /** 828 * Loads the sizes of each variable attribute or dimension file from the 829 * buffer. Applicable to version 1 to 4. 830 */ 831 Status load_file_var_sizes_v1_v4(ConstBuffer* buff); 832 833 /** 834 * Loads the sizes of each variable attribute or dimension file from the 835 * buffer. Applicable to version 5 or higher. 836 */ 837 Status load_file_var_sizes_v5_or_higher(ConstBuffer* buff); 838 839 /** Loads the sizes of each attribute validity file from the buffer. */ 840 Status load_file_validity_sizes(ConstBuffer* buff); 841 842 /** 843 * Loads the cell number of the last tile from the fragment metadata buffer. 844 * 845 * @param buff Metadata buffer. 846 * @return Status 847 */ 848 Status load_last_tile_cell_num(ConstBuffer* buff); 849 850 /** 851 * Loads the MBRs from the fragment metadata buffer. 852 * 853 * @param buff Metadata buffer. 854 * @return Status 855 */ 856 Status load_mbrs(ConstBuffer* buff); 857 858 /** Loads the non-empty domain from the input buffer. */ 859 Status load_non_empty_domain(ConstBuffer* buff); 860 861 /** 862 * Loads the non-empty domain from the input buffer, 863 * for format versions <= 2. 864 */ 865 Status load_non_empty_domain_v1_v2(ConstBuffer* buff); 866 867 /** 868 * Loads the non-empty domain from the input buffer, 869 * for format versions 3 and 4. 870 */ 871 Status load_non_empty_domain_v3_v4(ConstBuffer* buff); 872 873 /** 874 * Loads the non-empty domain from the input buffer, 875 * for format versions >= 5. 876 */ 877 Status load_non_empty_domain_v5_or_higher(ConstBuffer* buff); 878 879 /** 880 * Loads the tile offsets for the input attribute from the input buffer. 881 * Applicable to versions 1 and 2 882 */ 883 Status load_tile_offsets(ConstBuffer* buff); 884 885 /** 886 * Loads the tile offsets for the input attribute or dimension from the 887 * input buffer. 888 */ 889 Status load_tile_offsets(unsigned idx, ConstBuffer* buff); 890 891 /** 892 * Loads the variable tile offsets from the input buffer. 893 * Applicable to versions 1 and 2 894 */ 895 Status load_tile_var_offsets(ConstBuffer* buff); 896 897 /** 898 * Loads the variable tile offsets for the input attribute or dimension from 899 * the input buffer. 900 */ 901 Status load_tile_var_offsets(unsigned idx, ConstBuffer* buff); 902 903 /** Loads the variable tile sizes from the input buffer. */ 904 Status load_tile_var_sizes(ConstBuffer* buff); 905 906 /** 907 * Loads the variable tile sizes for the input attribute or dimension 908 * from the input buffer. 909 */ 910 Status load_tile_var_sizes(unsigned idx, ConstBuffer* buff); 911 912 /** 913 * Loads the validity tile offsets for the input attribute from the 914 * input buffer. 915 */ 916 Status load_tile_validity_offsets(unsigned idx, ConstBuffer* buff); 917 918 /** Loads the format version from the buffer. */ 919 Status load_version(ConstBuffer* buff); 920 921 /** Loads the `dense_` field from the buffer. */ 922 Status load_dense(ConstBuffer* buff); 923 924 /** Loads the number of sparse tiles from the buffer. */ 925 Status load_sparse_tile_num(ConstBuffer* buff); 926 927 /** Loads the basic metadata from storage (version 2 or before). */ 928 Status load_v1_v2( 929 const EncryptionKey& encryption_key, 930 const std::unordered_map<std::string, tiledb_shared_ptr<ArraySchema>>& 931 array_schemas); 932 933 /** 934 * Loads the basic metadata from storage or the input `f_buff` if 935 * it is not `nullptr` (version 3 or after). 936 */ 937 Status load_v3_or_higher( 938 const EncryptionKey& encryption_key, 939 Buffer* f_buff, 940 uint64_t offset, 941 std::unordered_map<std::string, tiledb_shared_ptr<ArraySchema>> 942 array_schemas); 943 944 /** 945 * Loads the footer of the metadata file, which contains 946 * only some basic info. If `f_buff` is `nullptr, then 947 * the footer will be loaded from the file, otherwise it 948 * will be loaded from `f_buff`. 949 */ 950 Status load_footer( 951 const EncryptionKey& encryption_key, 952 Buffer* f_buff, 953 uint64_t offset, 954 std::unordered_map<std::string, tiledb_shared_ptr<ArraySchema>> 955 array_schemas); 956 957 /** Writes the sizes of each attribute file to the buffer. */ 958 Status write_file_sizes(Buffer* buff) const; 959 960 /** Writes the sizes of each variable attribute file to the buffer. */ 961 Status write_file_var_sizes(Buffer* buff) const; 962 963 /** Writes the sizes of each validitiy attribute file to the buffer. */ 964 Status write_file_validity_sizes(Buffer* buff) const; 965 966 /** Writes the generic tile offsets to the buffer. */ 967 Status write_generic_tile_offsets(Buffer* buff) const; 968 969 /** Writes the array schema name. */ 970 Status write_array_schema_name(Buffer* buff) const; 971 972 /** 973 * Writes the cell number of the last tile to the fragment metadata buffer. 974 */ 975 Status write_last_tile_cell_num(Buffer* buff) const; 976 977 /** 978 * Writes the R-tree to storage. 979 * 980 * @param encryption_key The encryption key. 981 * @param nbytes The total number of bytes written for the R-tree. 982 * @return Status 983 */ 984 Status store_rtree(const EncryptionKey& encryption_key, uint64_t* nbytes); 985 986 /** Stores a footer with the basic information. */ 987 Status store_footer(const EncryptionKey& encryption_key); 988 989 /** Writes the R-tree to the input buffer. */ 990 Status write_rtree(Buffer* buff); 991 992 /** Writes the non-empty domain to the input buffer. */ 993 Status write_non_empty_domain(Buffer* buff) const; 994 995 /** 996 * Writes the tile offsets of the input attribute or dimension to storage. 997 * 998 * @param idx The index of the attribute or dimension. 999 * @param encryption_key The encryption key. 1000 * @param nbytes The total number of bytes written for the tile offsets. 1001 * @return Status 1002 */ 1003 Status store_tile_offsets( 1004 unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes); 1005 1006 /** 1007 * Writes the tile offsets of the input attribute or dimension idx to the 1008 * input buffer. 1009 */ 1010 Status write_tile_offsets(unsigned idx, Buffer* buff); 1011 1012 /** 1013 * Writes the variable tile offsets of the input attribute or dimension 1014 * to storage. 1015 * 1016 * @param idx The index of the attribute or dimension. 1017 * @param encryption_key The encryption key. 1018 * @param nbytes The total number of bytes written for the tile var offsets. 1019 * @return Status 1020 */ 1021 Status store_tile_var_offsets( 1022 unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes); 1023 1024 /** 1025 * Writes the variable tile offsets of the input attribute or dimension idx 1026 * to the buffer. 1027 */ 1028 Status write_tile_var_offsets(unsigned idx, Buffer* buff); 1029 1030 /** 1031 * Writes the variable tile sizes for the input attribute or dimension to 1032 * the buffer. 1033 * 1034 * @param idx The index of the attribute or dimension. 1035 * @param encryption_key The encryption key. 1036 * @param nbytes The total number of bytes written for the tile var sizes. 1037 * @return Status 1038 */ 1039 Status store_tile_var_sizes( 1040 unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes); 1041 1042 /** 1043 * Writes the variable tile sizes for the input attribute or dimension 1044 * to storage. 1045 */ 1046 Status write_tile_var_sizes(unsigned idx, Buffer* buff); 1047 1048 /** 1049 * Writes the validity tile offsets of the input attribute to storage. 1050 * 1051 * @param idx The index of the attribute. 1052 * @param encryption_key The encryption key. 1053 * @param nbytes The total number of bytes written for the validity tile 1054 * offsets. 1055 * @return Status 1056 */ 1057 Status store_tile_validity_offsets( 1058 unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes); 1059 1060 /** 1061 * Writes the validity tile offsets of the input attribute idx to the 1062 * input buffer. 1063 */ 1064 Status write_tile_validity_offsets(unsigned idx, Buffer* buff); 1065 1066 /** Writes the format version to the buffer. */ 1067 Status write_version(Buffer* buff) const; 1068 1069 /** Writes the `dense_` field to the buffer. */ 1070 Status write_dense(Buffer* buff) const; 1071 1072 /** Writes the number of sparse tiles to the buffer. */ 1073 Status write_sparse_tile_num(Buffer* buff) const; 1074 1075 /** 1076 * Reads the contents of a generic tile starting at the input offset, 1077 * and stores them into buffer ``buff``. 1078 */ 1079 Status read_generic_tile_from_file( 1080 const EncryptionKey& encryption_key, uint64_t offset, Buffer* buff) const; 1081 1082 /** 1083 * Reads the fragment metadata file footer (which contains the generic tile 1084 * offsets) into the input buffer. 1085 */ 1086 Status read_file_footer( 1087 Buffer* buff, uint64_t* footer_offset, uint64_t* footer_size) const; 1088 1089 /** 1090 * Writes the contents of the input buffer as a separate 1091 * generic tile to the metadata file. 1092 * 1093 * @param encryption_key The encryption key. 1094 * @param buff The buffer whose contents the function will write. 1095 * @param nbytes The total number of bytes written to the file. 1096 * @return Status 1097 */ 1098 Status write_generic_tile_to_file( 1099 const EncryptionKey& encryption_key, 1100 Buffer&& buff, 1101 uint64_t* nbytes) const; 1102 1103 /** 1104 * Writes the contents of the input buffer at the end of the fragment 1105 * metadata file, without applying any filters. This helps its quick 1106 * retrieval upon reading (as its size is predictable based on the 1107 * number of attributes). 1108 */ 1109 Status write_footer_to_file(Buffer* buff) const; 1110 1111 /** 1112 * Simple clean up function called in the case of error. It removes the 1113 * fragment metadata file and unlocks the array. 1114 */ 1115 void clean_up(); 1116 1117 /** 1118 * Encodes a dimension/attribute name to use in a file name. The 1119 * motiviation is to encode illegal/reserved file name characters. 1120 * 1121 * @param name The dimension/attribute name. 1122 * return std::string The encoded dimension/attribute name. 1123 */ 1124 std::string encode_name(const std::string& name) const; 1125 1126 /** 1127 * This builds the index mapping for attribute/dimension name to id. 1128 */ 1129 void build_idx_map(); 1130 }; 1131 1132 } // namespace sm 1133 } // namespace tiledb 1134 1135 #endif // TILEDB_FRAGMENT_METADATA_H 1136