1 /** 2 * @file storage_manager.h 3 * 4 * @section LICENSE 5 * 6 * The MIT License 7 * 8 * @copyright Copyright (c) 2017-2021 TileDB, Inc. 9 * @copyright Copyright (c) 2016 MIT and Intel Corporation 10 * 11 * Permission is hereby granted, free of charge, to any person obtaining a copy 12 * of this software and associated documentation files (the "Software"), to deal 13 * in the Software without restriction, including without limitation the rights 14 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 15 * copies of the Software, and to permit persons to whom the Software is 16 * furnished to do so, subject to the following conditions: 17 * 18 * The above copyright notice and this permission notice shall be included in 19 * all copies or substantial portions of the Software. 20 * 21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 27 * THE SOFTWARE. 28 * 29 * @section DESCRIPTION 30 * 31 * This file defines class StorageManager. 32 */ 33 34 #ifndef TILEDB_STORAGE_MANAGER_H 35 #define TILEDB_STORAGE_MANAGER_H 36 37 #include <atomic> 38 #include <condition_variable> 39 #include <functional> 40 #include <list> 41 #include <map> 42 #include <mutex> 43 #include <queue> 44 #include <string> 45 #include <thread> 46 #include <unordered_map> 47 48 #include "tiledb/common/heap_memory.h" 49 #include "tiledb/common/logger_public.h" 50 #include "tiledb/common/status.h" 51 #include "tiledb/common/thread_pool.h" 52 #include "tiledb/sm/config/config.h" 53 #include "tiledb/sm/enums/walk_order.h" 54 #include "tiledb/sm/filesystem/filelock.h" 55 #include "tiledb/sm/fragment/single_fragment_info.h" 56 #include "tiledb/sm/misc/cancelable_tasks.h" 57 #include "tiledb/sm/misc/uri.h" 58 #include "tiledb/sm/stats/global_stats.h" 59 60 using namespace tiledb::common; 61 62 namespace tiledb { 63 namespace sm { 64 65 class Array; 66 class ArraySchema; 67 class ArraySchemaEvolution; 68 class Buffer; 69 class BufferLRUCache; 70 class Consolidator; 71 class EncryptionKey; 72 class FragmentMetadata; 73 class FragmentInfo; 74 class Metadata; 75 class OpenArray; 76 class OpenArrayMemoryTracker; 77 class Query; 78 class RestClient; 79 class VFS; 80 81 enum class EncryptionType : uint8_t; 82 enum class ObjectType : uint8_t; 83 84 /** The storage manager that manages pretty much everything in TileDB. */ 85 class StorageManager { 86 public: 87 /* ********************************* */ 88 /* TYPE DEFINITIONS */ 89 /* ********************************* */ 90 91 /** Enables iteration over TileDB objects in a path. */ 92 class ObjectIter { 93 public: 94 /** 95 * There is a one-to-one correspondence between `expanded_` and `objs_`. 96 * An `expanded_` value is `true` if the corresponding `objs_` path 97 * has been expanded to the paths it contains in a post ored traversal. 98 * This is not used in a preorder traversal. 99 */ 100 std::list<bool> expanded_; 101 /** The next URI in string format. */ 102 std::string next_; 103 /** The next objects to be visited. */ 104 std::list<URI> objs_; 105 /** The traversal order of the iterator. */ 106 WalkOrder order_; 107 /** `True` if the iterator will recursively visit the directory tree. */ 108 bool recursive_; 109 }; 110 111 /* ********************************* */ 112 /* CONSTRUCTORS & DESTRUCTORS */ 113 /* ********************************* */ 114 115 /** Constructor. */ 116 StorageManager( 117 ThreadPool* compute_tp, 118 ThreadPool* io_tp, 119 stats::Stats* parent_stats, 120 tdb_shared_ptr<Logger> logger); 121 122 /** Destructor. */ 123 ~StorageManager(); 124 125 DISABLE_COPY_AND_COPY_ASSIGN(StorageManager); 126 DISABLE_MOVE_AND_MOVE_ASSIGN(StorageManager); 127 128 /* ********************************* */ 129 /* API */ 130 /* ********************************* */ 131 132 /** 133 * Closes an array opened for reads. 134 * 135 * @param array_uri The array URI 136 * @return Status 137 */ 138 Status array_close_for_reads(const URI& array_uri); 139 140 /** 141 * Closes an array opened for writes. 142 * 143 * @param array_uri The array URI 144 * @param encryption_key The array encryption key. 145 * @param array_metadata The array metadata, which the function 146 * flush to persistent storage. 147 * @return Status 148 */ 149 Status array_close_for_writes( 150 const URI& array_uri, 151 const EncryptionKey& encryption_key, 152 Metadata* array_metadata); 153 154 /** 155 * Opens an array for reads at a timestamp. All the metadata of the 156 * fragments created before or at the input timestamp are retrieved. 157 * 158 * If a timestamp_start is provided, this API will open the array between 159 * `timestamp_start` and `timestamp_end`. 160 * 161 * @param array_uri The array URI. 162 * @param enc_key The encryption key to use. 163 * @param timestamp_start The (optional) starting timestamp to open the array 164 * between, starting at this timestamp and ending at `timestamp_end`. 165 * @param timestamp_end The timestamp at which the array will be opened. 166 * In TileDB, timestamps are in ms elapsed since 167 * 1970-01-01 00:00:00 +0000 (UTC). 168 * @return tuple of Status, latest ArraySchema, map of all array schemas and 169 * vector of FragmentMetadata 170 * Status Ok on success, else error 171 * ArraySchema The array schema to be retrieved after the 172 * array is opened. 173 * ArraySchemaMap Map of all array schemas found keyed by name 174 * fragment_metadata The fragment metadata to be retrieved 175 * after the array is opened. 176 */ 177 std::tuple< 178 Status, 179 std::optional<ArraySchema*>, 180 std::optional< 181 std::unordered_map<std::string, tdb_shared_ptr<ArraySchema>>>, 182 std::optional<std::vector<tdb_shared_ptr<FragmentMetadata>>>> 183 array_open_for_reads( 184 const URI& array_uri, 185 const EncryptionKey& enc_key, 186 uint64_t timestamp_start, 187 uint64_t timestamp_end); 188 189 /** 190 * Opens an array for reads without fragments. 191 * 192 * @param array_uri The array URI. 193 * @param enc_key The encryption key to use. 194 * @return tuple of Status, latest ArraySchema and map of all array schemas 195 * Status Ok on success, else error 196 * ArraySchema The array schema to be retrieved after the 197 * array is opened. 198 * ArraySchemaMap Map of all array schemas found keyed by name 199 */ 200 std::tuple< 201 Status, 202 std::optional<ArraySchema*>, 203 std::optional< 204 std::unordered_map<std::string, tdb_shared_ptr<ArraySchema>>>> 205 array_open_for_reads_without_fragments( 206 const URI& array_uri, const EncryptionKey& enc_key); 207 208 /** Opens an array for writes. 209 * 210 * @param array_uri The array URI. 211 * @param enc_key The encryption key. 212 * @return tuple of Status, latest ArraySchema and map of all array schemas 213 * Status Ok on success, else error 214 * ArraySchema The array schema to be retrieved after the 215 * array is opened. 216 * ArraySchemaMap Map of all array schemas found keyed by name 217 */ 218 std::tuple< 219 Status, 220 std::optional<ArraySchema*>, 221 std::optional< 222 std::unordered_map<std::string, tdb_shared_ptr<ArraySchema>>>> 223 array_open_for_writes(const URI& array_uri, const EncryptionKey& enc_key); 224 225 /** 226 * Load fragments for an already open array. 227 * 228 * @param array_uri The array URI. 229 * @param enc_key The encryption key to use. 230 * @param fragment_metadata The fragment metadata to be retrieved 231 * after the array is opened. 232 * @param fragment_info The list of fragment info. 233 * @return Status 234 */ 235 Status array_load_fragments( 236 const URI& array_uri, 237 const EncryptionKey& enc_key, 238 std::vector<tdb_shared_ptr<FragmentMetadata>>* fragment_metadata, 239 const std::vector<TimestampedURI>& fragment_info); 240 241 /** 242 * Reopens an already open array at a potentially new timestamp, 243 * retrieving the fragment metadata of any new fragments written 244 * in the array. 245 * 246 * @param array_uri The array URI. 247 * @param enc_key The encryption key to use. 248 * @param timestamp_start The optional first timestamp between which the 249 * array will be opened. 250 * @param timestamp_end The timestamp at which the array will be opened. 251 * In TileDB, timestamps are in ms elapsed since 252 * 1970-01-01 00:00:00 +0000 (UTC). 253 * @return tuple of Status, latest ArraySchema, map of all array schemas and 254 * vector of FragmentMetadata 255 * Status Ok on success, else error 256 * ArraySchema The array schema to be retrieved after the 257 * array is opened. 258 * ArraySchemaMap Map of all array schemas found keyed by name 259 * FragmentMetadata The fragment metadata to be retrieved 260 * after the array is opened. 261 */ 262 std::tuple< 263 Status, 264 std::optional<ArraySchema*>, 265 std::optional< 266 std::unordered_map<std::string, tdb_shared_ptr<ArraySchema>>>, 267 std::optional<std::vector<tdb_shared_ptr<FragmentMetadata>>>> 268 array_reopen( 269 const URI& array_uri, 270 const EncryptionKey& enc_key, 271 uint64_t timestamp_start, 272 uint64_t timestamp_end); 273 274 /** 275 * Consolidates the fragments of an array into a single one. 276 * 277 * @param array_name The name of the array to be consolidated. 278 * @param encryption_type The encryption type of the array 279 * @param encryption_key If the array is encrypted, the private encryption 280 * key. For unencrypted arrays, pass `nullptr`. 281 * @param key_length The length in bytes of the encryption key. 282 * @param config Configuration parameters for the consolidation 283 * (`nullptr` means default, which will use the config associated with 284 * this instance). 285 * @return Status 286 */ 287 Status array_consolidate( 288 const char* array_name, 289 EncryptionType encryption_type, 290 const void* encryption_key, 291 uint32_t key_length, 292 const Config* config); 293 294 /** 295 * Cleans up the array, such as its consolidated fragments and array 296 * metadata. Note that this will coarsen the granularity of time traveling 297 * (see docs for more information). 298 * 299 * @param array_name The name of the array to be vacuumed. 300 * @param config Configuration parameters for vacuuming. 301 * @return Status 302 */ 303 Status array_vacuum(const char* array_name, const Config* config); 304 305 /** 306 * Cleans up fragments that took part in consolidation. Note that this 307 * will coarsen the granularity of time traveling (see docs for more 308 * information). 309 * 310 * @param array_name The name of the array to be vacuumed. 311 * @param timestamp_start The timestamp to start vacuuming at. 312 * @param timestamp_end The timestamp to end vacuuming at. 313 * @return Status 314 */ 315 Status array_vacuum_fragments( 316 const char* array_name, uint64_t timestamp_start, uint64_t timestamp_end); 317 318 /** 319 * Cleans up consolidated fragment metadata (all except the last one). 320 * 321 * @param array_name The name of the array to be consolidated. 322 * @return Status 323 */ 324 Status array_vacuum_fragment_meta(const char* array_name); 325 326 /** 327 * Cleans up consolidated array metadata. 328 * 329 * @param array_name The name of the array to be consolidated. 330 * @param timestamp_start The timestamp to start vacuuming at. 331 * @param timestamp_end The timestamp to end vacuuming at. 332 * @return Status 333 */ 334 Status array_vacuum_array_meta( 335 const char* array_name, uint64_t timestamp_start, uint64_t timestamp_end); 336 337 /** 338 * Consolidates the metadata of an array into a single file. 339 * 340 * @param array_name The name of the array whose metadata will be 341 * consolidated. 342 * @param encryption_type The encryption type of the array 343 * @param encryption_key If the array is encrypted, the private encryption 344 * key. For unencrypted arrays, pass `nullptr`. 345 * @param key_length The length in bytes of the encryption key. 346 * @param config Configuration parameters for the consolidation 347 * (`nullptr` means default, which will use the config associated with 348 * this instance). 349 * @return Status 350 */ 351 Status array_metadata_consolidate( 352 const char* array_name, 353 EncryptionType encryption_type, 354 const void* encryption_key, 355 uint32_t key_length, 356 const Config* config); 357 358 /** 359 * Creates a TileDB array storing its schema. 360 * 361 * @param array_uri The URI of the array to be created. 362 * @param array_schema The array schema. 363 * @param encryption_key The encryption key to use. 364 * @return Status 365 */ 366 Status array_create( 367 const URI& array_uri, 368 ArraySchema* array_schema, 369 const EncryptionKey& encryption_key); 370 371 /** 372 * Evolve a TileDB array schema and store its new schema. 373 * 374 * @param array_uri The URI of the array to be evolved. 375 * @param schema_evolution The schema evolution. 376 * @param encryption_key The encryption key to use. 377 * @return Status 378 */ 379 Status array_evolve_schema( 380 const URI& array_uri, 381 ArraySchemaEvolution* array_schema, 382 const EncryptionKey& encryption_key); 383 384 /** 385 * Upgrade a TileDB array to latest format version. 386 * 387 * @param array_uri The URI of the array to be upgraded. 388 * @param config Configuration parameters for the upgrade 389 * (`nullptr` means default, which will use the config associated with 390 * this instance). 391 * @return Status 392 */ 393 Status array_upgrade_version(const URI& array_uri, const Config* config); 394 395 /** 396 * Gets the memory tracker for an open array. 397 * 398 * @param array_uri The array URI. 399 * @param top_level Specifies if the call is the top level call in recursion. 400 * @return The memory tracker. 401 */ 402 OpenArrayMemoryTracker* array_memory_tracker( 403 const URI& array_uri, bool top_level = true); 404 405 /** 406 * Retrieves the non-empty domain from an array. This is the union of the 407 * non-empty domains of the array fragments. 408 * 409 * @param array An open array object (must be already open). 410 * @param domain The domain to be retrieved. 411 * @param is_empty `true` if the non-empty domain is empty (the array 412 * is empty). 413 * @return Status 414 */ 415 Status array_get_non_empty_domain( 416 Array* array, NDRange* domain, bool* is_empty); 417 418 /** 419 * Retrieves the non-empty domain from an array. This is the union of the 420 * non-empty domains of the array fragments. 421 * 422 * @param array An open array object (must be already open). 423 * @param domain The domain to be retrieved. 424 * @param is_empty `ture` if the non-empty domain is empty (the array 425 * is empty). 426 * @return Status 427 */ 428 Status array_get_non_empty_domain(Array* array, void* domain, bool* is_empty); 429 430 /** 431 * Retrieves the non-empty domain from an array on the given dimension. 432 * This is the union of the non-empty domains of the array fragments. 433 * 434 * @param array An open array object (must be already open). 435 * @param idx The dimension index. 436 * @param domain The domain to be retrieved. 437 * @param is_empty `ture` if the non-empty domain is empty (the array 438 * is empty). 439 * @return Status 440 */ 441 Status array_get_non_empty_domain_from_index( 442 Array* array, unsigned idx, void* domain, bool* is_empty); 443 444 /** 445 * Retrieves the non-empty domain from an array on the given dimension. 446 * This is the union of the non-empty domains of the array fragments. 447 * 448 * @param array An open array object (must be already open). 449 * @param name The dimension name. 450 * @param domain The domain to be retrieved. 451 * @param is_empty `ture` if the non-empty domain is empty (the array 452 * is empty). 453 * @return Status 454 */ 455 Status array_get_non_empty_domain_from_name( 456 Array* array, const char* name, void* domain, bool* is_empty); 457 458 /** 459 * Retrieves the non-empty domain size from an array on the given dimension. 460 * This is the union of the non-empty domains of the array fragments. 461 * Applicable only to var-sized dimensions. 462 * 463 * @param array An open array object (must be already open). 464 * @param idx The dimension index. 465 * @param start_size The size in bytes of the range start. 466 * @param end_size The size in bytes of the range end. 467 * @param is_empty `ture` if the non-empty domain is empty (the array 468 * is empty). 469 * @return Status 470 */ 471 Status array_get_non_empty_domain_var_size_from_index( 472 Array* array, 473 unsigned idx, 474 uint64_t* start_size, 475 uint64_t* end_size, 476 bool* is_empty); 477 478 /** 479 * Retrieves the non-empty domain size from an array on the given dimension. 480 * This is the union of the non-empty domains of the array fragments. 481 * Applicable only to var-sized dimensions. 482 * 483 * @param array An open array object (must be already open). 484 * @param name The dimension name. 485 * @param start_size The size in bytes of the range start. 486 * @param end_size The size in bytes of the range end. 487 * @param is_empty `ture` if the non-empty domain is empty (the array 488 * is empty). 489 * @return Status 490 */ 491 Status array_get_non_empty_domain_var_size_from_name( 492 Array* array, 493 const char* name, 494 uint64_t* start_size, 495 uint64_t* end_size, 496 bool* is_empty); 497 498 /** 499 * Retrieves the non-empty domain from an array on the given dimension. 500 * This is the union of the non-empty domains of the array fragments. 501 * Applicable only to var-sized dimensions. 502 * 503 * @param array An open array object (must be already open). 504 * @param idx The dimension index. 505 * @param start The domain range start to set. 506 * @param end The domain range end to set. 507 * @param is_empty `ture` if the non-empty domain is empty (the array 508 * is empty). 509 * @return Status 510 */ 511 Status array_get_non_empty_domain_var_from_index( 512 Array* array, unsigned idx, void* start, void* end, bool* is_empty); 513 514 /** 515 * Retrieves the non-empty domain from an array on the given dimension. 516 * This is the union of the non-empty domains of the array fragments. 517 * Applicable only to var-sized dimensions. 518 * 519 * @param array An open array object (must be already open). 520 * @param name The dimension name. 521 * @param start The domain range start to set. 522 * @param end The domain range end to set. 523 * @param is_empty `ture` if the non-empty domain is empty (the array 524 * is empty). 525 * @return Status 526 */ 527 Status array_get_non_empty_domain_var_from_name( 528 Array* array, const char* name, void* start, void* end, bool* is_empty); 529 530 /** 531 * Retrieves the encryption type from an array. 532 * 533 * @param array_uri URI of the array 534 * @param encryption_type Set to the encryption type of the array. 535 * @return Status 536 */ 537 Status array_get_encryption( 538 const std::string& array_uri, EncryptionType* encryption_type); 539 540 /** 541 * Exclusively locks an array preventing it from being opened in 542 * read mode. This function will wait on the array to 543 * be closed if it is already open (always in read mode). After an array 544 * is xlocked, any attempt to open an array in read mode will have to wait 545 * until the array is unlocked with `xunlock_array`. 546 * 547 * An array is exclusively locked only for a short time upon consolidation, 548 * during removing the directories of the old fragments that got consolidated. 549 * 550 * @note Arrays that are opened in write mode need not be xlocked. The 551 * reason is that the `OpenArray` objects created when opening 552 * in write mode do not store any fragment metadata and, hence, 553 * are not affected by a potentially concurrent consolidator deleting 554 * fragment directories. 555 */ 556 Status array_xlock(const URI& array_uri); 557 558 /** Releases an exclusive lock for the input array. */ 559 Status array_xunlock(const URI& array_uri); 560 561 /** 562 * Pushes an async query to the queue. 563 * 564 * @param query The async query. 565 * @return Status 566 */ 567 Status async_push_query(Query* query); 568 569 /** Cancels all background tasks. */ 570 Status cancel_all_tasks(); 571 572 /** Returns true while all tasks are being cancelled. */ 573 bool cancellation_in_progress(); 574 575 /** Returns the configuration parameters. */ 576 const Config& config() const; 577 578 /** Creates a directory with the input URI. */ 579 Status create_dir(const URI& uri); 580 581 /** Creates an empty file with the input URI. */ 582 Status touch(const URI& uri); 583 584 /** 585 * Gets the fragment information for a given array at a particular 586 * timestamp. 587 * 588 * @param array The array. 589 * @param timestamp_start The function will consider fragments created 590 * at or after this timestamp. 591 * @param timestamp_end The function will consider fragments created 592 * at or before this timestamp. 593 * @param fragment_info The fragment information to be retrieved. 594 * The fragments are sorted in chronological creation order. 595 * @param get_to_vacuum Whether or not to receive information about 596 * fragments to vacuum. 597 * @return Status 598 */ 599 Status get_fragment_info( 600 const Array& array, 601 uint64_t timestamp_start, 602 uint64_t timestamp_end, 603 FragmentInfo* fragment_info, 604 bool get_to_vacuum = false); 605 606 /** 607 * Gets the fragment info for a single fragment URI. 608 * 609 * @param array The array. 610 * @param fragment_uri The fragment URI. 611 * @param fragment_info The fragment info to retrieve. 612 * @return Status 613 */ 614 Status get_fragment_info( 615 const Array& array, 616 const URI& fragment_uri, 617 SingleFragmentInfo* fragment_info); 618 619 /** 620 * Retrieves all the fragment URIs of an array, along with the latest 621 * consolidated fragment metadata URI `meta_uri`. 622 */ 623 Status get_fragment_uris( 624 const URI& array_uri, 625 std::vector<URI>* fragment_uris, 626 URI* meta_uri) const; 627 628 /** Returns the current map of any set tags. */ 629 const std::unordered_map<std::string, std::string>& tags() const; 630 631 /** 632 * Creates a TileDB group. 633 * 634 * @param group The URI of the group to be created. 635 * @return Status 636 */ 637 Status group_create(const std::string& group); 638 639 /** 640 * Initializes the storage manager. 641 * 642 * @param config The configuration parameters. 643 * @return Status 644 */ 645 Status init(const Config* config); 646 647 /** Returns the thread pool for compute-bound tasks. */ 648 ThreadPool* compute_tp(); 649 650 /** Returns the thread pool for io-bound tasks. */ 651 ThreadPool* io_tp(); 652 653 /** 654 * If the storage manager was configured with a REST server, return the 655 * client instance. Else, return nullptr. 656 */ 657 RestClient* rest_client() const; 658 659 /** 660 * Checks if the input URI represents an array. 661 * 662 * @param The URI to be checked. 663 * @param is_array Set to `true` if the URI is an array and `false` otherwise. 664 * @return Status 665 */ 666 Status is_array(const URI& uri, bool* is_array) const; 667 668 /** 669 * Checks if the input URI represents a directory. 670 * 671 * @param The URI to be checked. 672 * @param is_dir Set to `true` if the URI is a directory and `false` 673 * otherwise. 674 * @return Status 675 */ 676 Status is_dir(const URI& uri, bool* is_dir) const; 677 678 /** 679 * Checks if the input URI represents a fragment. The functions takes into 680 * account the fragment version, which is retrived directly from the URI. 681 * For versions >= 5, the function checks whether the URI is included 682 * in `ok_uris`. For versions < 5, `ok_uris` is empty so the function 683 * checks for the existence of the fragment metadata file in the fragment 684 * URI directory. Therefore, the function is more expensive for earlier 685 * fragment versions. 686 * 687 * @param The URI to be checked. 688 * @param ok_uris For checking URI existence of versions >= 5. 689 * @param is_fragment Set to `1` if the URI is a fragment and `0` 690 * otherwise. 691 * @return Status 692 */ 693 Status is_fragment( 694 const URI& uri, const std::set<URI>& ok_uris, int* is_fragment) const; 695 696 /** 697 * Checks if the input URI represents a group. 698 * 699 * @param The URI to be checked. 700 * @param is_group Set to `true` if the URI is a group and `false` 701 * otherwise. 702 * @return Status 703 */ 704 Status is_group(const URI& uri, bool* is_group) const; 705 706 /** 707 * Checks if the input URI represents a file. 708 * 709 * @param The URI to be checked. 710 * @param is_file Set to `true` if the URI is a file and `false` 711 * otherwise. 712 * @return Status 713 */ 714 Status is_file(const URI& uri, bool* is_file) const; 715 716 /** 717 * Check if a URI is a vacuum file or not based on the file suffix 718 * @param uri 719 * @return true is vacuum file, false otherwise 720 */ 721 bool is_vacuum_file(const URI& uri) const; 722 723 /** 724 * Retrieve all array schemas for an array uri under its __schema directory. 725 * 726 * @param array_uri The URI path of the array. 727 * @param uris The vector of array schema URIS sorted from earliest to the 728 * latest. 729 * @return Status 730 */ 731 Status get_array_schema_uris( 732 const URI& array_uri, std::vector<URI>* schema_uris) const; 733 734 /** 735 * Get latest array schema for an array uri. 736 * 737 * @param array_uri The URI path of the array. 738 * @param uri The latest array schema URI. 739 * @return Status 740 */ 741 Status get_latest_array_schema_uri( 742 const URI& array_uri, URI* schema_uri) const; 743 744 /** 745 * Loads the schema of a schema uri from persistent storage into memory. 746 * 747 * @param array_schema_uri The URI path of the array schema. 748 * @param array_uri The URI path of the array. 749 * @param encryption_key The encryption key to use. 750 * @param array_schema The array schema to be retrieved. 751 * @return Status 752 */ 753 Status load_array_schema_from_uri( 754 const URI& array_schema_uri, 755 const EncryptionKey& encryption_key, 756 ArraySchema** array_schema); 757 758 /** 759 * Loads the schema of an array from persistent storage into memory. 760 * 761 * @param array_uri The URI path of the array. 762 * @param encryption_key The encryption key to use. 763 * @param array_schema The array schema to be retrieved. 764 * @return Status 765 */ 766 Status load_array_schema( 767 const URI& array_uri, 768 const EncryptionKey& encryption_key, 769 ArraySchema** array_schema); 770 771 /** 772 * Loads all schemas of an array from persistent storage into memory. 773 * 774 * @param array_uri The URI path of the array. 775 * @param encryption_key The encryption key to use. 776 * @return tuple of Status and optional unordered map. If Status is an error 777 * the unordered_map will be nullopt 778 * Status Ok on success, else error 779 * ArraySchemaMap Map of all array schemas found keyed by name 780 */ 781 std::tuple< 782 Status, 783 std::optional< 784 std::unordered_map<std::string, tdb_shared_ptr<ArraySchema>>>> 785 load_all_array_schemas( 786 const URI& array_uri, const EncryptionKey& encryption_key); 787 788 /** 789 * Loads the array metadata from persistent storage that were created 790 * at or before `timestamp_end` and at or after `timestamp_start`. 791 */ 792 Status load_array_metadata( 793 const URI& array_uri, 794 const EncryptionKey& encryption_key, 795 uint64_t timestamp_start, 796 uint64_t timestamp_end, 797 Metadata* metadata); 798 799 /** Removes a TileDB object (group, array). */ 800 Status object_remove(const char* path) const; 801 802 /** 803 * Renames a TileDB object (group, array). If 804 * `new_path` exists, `new_path` will be overwritten. 805 */ 806 Status object_move(const char* old_path, const char* new_path) const; 807 808 /** 809 * Creates a new object iterator for the input path. The iteration 810 * in this case will be recursive in the entire directory tree rooted 811 * at `path`. 812 * 813 * @param obj_iter The object iterator to be created (memory is allocated for 814 * it by the function). 815 * @param path The path the iterator will target at. 816 * @param order The traversal order of the iterator. 817 * @return Status 818 */ 819 Status object_iter_begin( 820 ObjectIter** obj_iter, const char* path, WalkOrder order); 821 822 /** 823 * Creates a new object iterator for the input path. The iteration will 824 * not be recursive, and only the children of `path` will be visited. 825 * 826 * @param obj_iter The object iterator to be created (memory is allocated for 827 * it by the function). 828 * @param path The path the iterator will target at. 829 * @return Status 830 */ 831 Status object_iter_begin(ObjectIter** obj_iter, const char* path); 832 833 /** Frees the object iterator. */ 834 void object_iter_free(ObjectIter* obj_iter); 835 836 /** 837 * Retrieves the next object path and type. 838 * 839 * @param obj_iter The object iterator. 840 * @param path The object path that is retrieved. 841 * @param type The object type that is retrieved. 842 * @param has_next True if an object path was retrieved and false otherwise. 843 * @return Status 844 */ 845 Status object_iter_next( 846 ObjectIter* obj_iter, 847 const char** path, 848 ObjectType* type, 849 bool* has_next); 850 851 /** 852 * Retrieves the next object in the post-order traversal. 853 * 854 * @param obj_iter The object iterator. 855 * @param path The object path that is retrieved. 856 * @param type The object type that is retrieved. 857 * @param has_next True if an object path was retrieved and false otherwise. 858 * @return Status 859 */ 860 Status object_iter_next_postorder( 861 ObjectIter* obj_iter, 862 const char** path, 863 ObjectType* type, 864 bool* has_next); 865 866 /** 867 * Retrieves the next object in the post-order traversal. 868 * 869 * @param obj_iter The object iterator. 870 * @param path The object path that is retrieved. 871 * @param type The object type that is retrieved. 872 * @param has_next True if an object path was retrieved and false otherwise. 873 * @return Status 874 */ 875 Status object_iter_next_preorder( 876 ObjectIter* obj_iter, 877 const char** path, 878 ObjectType* type, 879 bool* has_next); 880 881 /** 882 * Returns the tiledb object type 883 * 884 * @param uri Path to TileDB object resource 885 * @param type The ObjectType to be retrieved. 886 * @return Status 887 */ 888 Status object_type(const URI& uri, ObjectType* type) const; 889 890 /** Submits a query for (sync) execution. */ 891 Status query_submit(Query* query); 892 893 /** 894 * Submits a query for async execution. 895 * 896 * @param query The query to submit. 897 * @return Status 898 */ 899 Status query_submit_async(Query* query); 900 901 /** 902 * Reads from the cache into the input buffer. `uri` and `offset` collectively 903 * form the key of the cached object to be read. Essentially, this is used 904 * to read potentially cached tiles. `uri` is the URI of the attribute the 905 * tile belongs to, and `offset` is the offset in the attribute file where 906 * the tile is located. Observe that the `uri`, `offset` pair is unique. 907 * 908 * @param uri The URI of the cached object. 909 * @param offset The offset of the cached object. 910 * @param buffer The buffer to write into. The function reallocates memory 911 * for the buffer, sets its size to *nbytes* and resets its offset. 912 * @param nbytes Number of bytes to be read. 913 * @param in_cache This is set to `true` if the object is in the cache, 914 * and `false` otherwise. 915 * @return Status. 916 */ 917 Status read_from_cache( 918 const URI& uri, 919 uint64_t offset, 920 Buffer* buffer, 921 uint64_t nbytes, 922 bool* in_cache) const; 923 924 /** 925 * Reads from a file into the input buffer. 926 * 927 * @param uri The URI file to read from. 928 * @param offset The offset in the file the read will start from. 929 * @param buffer The buffer to write into. The function reallocates memory 930 * for the buffer, sets its size to *nbytes* and resets its offset. 931 * @param nbytes The number of bytes to read. 932 * @return Status. 933 */ 934 Status read( 935 const URI& uri, uint64_t offset, Buffer* buffer, uint64_t nbytes) const; 936 937 /** 938 * Reads from a file into the raw input buffer. 939 * 940 * @param uri The URI file to read from. 941 * @param offset The offset in the file the read will start from. 942 * @param buffer The buffer to write into. 943 * @param nbytes The number of bytes to read. 944 * @return Status. 945 */ 946 Status read( 947 const URI& uri, uint64_t offset, void* buffer, uint64_t nbytes) const; 948 949 /** 950 * Sets a string/string KV "tag" on the storage manager instance. 951 * 952 * This is currently only meant for internal TileDB Inc. usage. 953 * 954 * @param key Tag key 955 * @param value Tag value 956 * @return Status 957 */ 958 Status set_tag(const std::string& key, const std::string& value); 959 960 /** 961 * Stores an array schema into persistent storage. 962 * 963 * @param array_schema The array metadata to be stored. 964 * @param encryption_key The encryption key to use. 965 * @return Status 966 */ 967 Status store_array_schema( 968 ArraySchema* array_schema, const EncryptionKey& encryption_key); 969 970 /** 971 * Stores the array metadata into persistent storage. 972 * 973 * @param array_uri The array URI. 974 * @param encryption_key The encryption key to use. 975 * @param array_metadata The array metadata. 976 * @return Status 977 */ 978 Status store_array_metadata( 979 const URI& array_uri, 980 const EncryptionKey& encryption_key, 981 Metadata* array_metadata); 982 983 /** Closes a file, flushing its contents to persistent storage. */ 984 Status close_file(const URI& uri); 985 986 /** Syncs a file or directory, flushing its contents to persistent storage. */ 987 Status sync(const URI& uri); 988 989 /** Returns the virtual filesystem object. */ 990 VFS* vfs() const; 991 992 /** 993 * Writes the contents of a buffer into the cache. `uri` and `offset` 994 * collectively form the key of the object to be cached. Essentially, this is 995 * used to cach tiles. `uri` is the URI of the attribute the 996 * tile belongs to, and `offset` is the offset in the attribute file where 997 * the tile is located. Observe that the `uri`, `offset` pair is unique. 998 * 999 * @param uri The URI of the cached object. 1000 * @param offset The offset of the cached object. 1001 * @param buffer The buffer whose contents will be cached. 1002 * @return Status. 1003 */ 1004 Status write_to_cache(const URI& uri, uint64_t offset, Buffer* buffer) const; 1005 1006 /** 1007 * Writes the contents of a buffer into a URI file. 1008 * 1009 * @param uri The file to write into. 1010 * @param buffer The buffer to write. 1011 * @return Status. 1012 */ 1013 Status write(const URI& uri, Buffer* buffer) const; 1014 1015 /** 1016 * Writes the input data into a URI file. 1017 * 1018 * @param uri The file to write into. 1019 * @param data The data to write. 1020 * @param size The data size in bytes. 1021 * @return Status. 1022 */ 1023 Status write(const URI& uri, void* data, uint64_t size) const; 1024 1025 /** Returns `stats_`. */ 1026 stats::Stats* stats(); 1027 1028 /** Returns the internal logger object. */ 1029 tdb_shared_ptr<Logger> logger() const; 1030 1031 private: 1032 /* ********************************* */ 1033 /* PRIVATE DATATYPES */ 1034 /* ********************************* */ 1035 1036 /** 1037 * Helper RAII struct that increments 'queries_in_progress' in the constructor 1038 * and decrements in the destructor, on the given StorageManager instance. 1039 * 1040 * This ensures that the counter is decremented even in the case of 1041 * exceptions. 1042 */ 1043 struct QueryInProgress { 1044 /** The StorageManager instance. */ 1045 StorageManager* sm; 1046 1047 /** Constructor. Calls increment_in_progress() on given StorageManager. */ QueryInProgressQueryInProgress1048 QueryInProgress(StorageManager* sm) 1049 : sm(sm) { 1050 sm->increment_in_progress(); 1051 } 1052 1053 /** Destructor. Calls decrement_in_progress() on given StorageManager. */ ~QueryInProgressQueryInProgress1054 ~QueryInProgress() { 1055 sm->decrement_in_progress(); 1056 } 1057 }; 1058 1059 /* ********************************* */ 1060 /* PRIVATE ATTRIBUTES */ 1061 /* ********************************* */ 1062 1063 /** The class stats. */ 1064 stats::Stats* stats_; 1065 1066 /** The class logger. */ 1067 tdb_shared_ptr<Logger> logger_; 1068 1069 /** Set to true when tasks are being cancelled. */ 1070 bool cancellation_in_progress_; 1071 1072 /** Mutex protecting cancellation_in_progress_. */ 1073 std::mutex cancellation_in_progress_mtx_; 1074 1075 /** 1076 * The condition variable for exlcusively locking arrays. This is used 1077 * to wait for an array to be closed, before being exclusively locked 1078 * by `array_xlock`. 1079 */ 1080 std::condition_variable xlock_cv_; 1081 1082 /** Mutex for providing thread-safety upon creating TileDB objects. */ 1083 std::mutex object_create_mtx_; 1084 1085 /** Stores the TileDB configuration parameters. */ 1086 Config config_; 1087 1088 /** Stores exclusive filelocks for arrays. */ 1089 std::unordered_map<std::string, filelock_t> xfilelocks_; 1090 1091 /** Mutex for managing OpenArray objects for reads. */ 1092 std::mutex open_array_for_reads_mtx_; 1093 1094 /** Mutex for managing OpenArray objects for writes. */ 1095 std::mutex open_array_for_writes_mtx_; 1096 1097 /** Mutex for managing exclusive locks. */ 1098 std::mutex xlock_mtx_; 1099 1100 /** Stores the currently open arrays for reads. */ 1101 std::map<std::string, OpenArray*> open_arrays_for_reads_; 1102 1103 /** Stores the currently open arrays for writes. */ 1104 std::map<std::string, OpenArray*> open_arrays_for_writes_; 1105 1106 /** Count of the number of queries currently in progress. */ 1107 uint64_t queries_in_progress_; 1108 1109 /** Guards queries_in_progress_ counter. */ 1110 std::mutex queries_in_progress_mtx_; 1111 1112 /** Guards queries_in_progress_ counter. */ 1113 std::condition_variable queries_in_progress_cv_; 1114 1115 /** The thread pool for compute-bound tasks. */ 1116 ThreadPool* const compute_tp_; 1117 1118 /** The thread pool for io-bound tasks. */ 1119 ThreadPool* const io_tp_; 1120 1121 /** Tracks all scheduled tasks that can be safely cancelled before execution. 1122 */ 1123 CancelableTasks cancelable_tasks_; 1124 1125 /** Tags for the context object. */ 1126 std::unordered_map<std::string, std::string> tags_; 1127 1128 /** A tile cache. */ 1129 tdb_unique_ptr<BufferLRUCache> tile_cache_; 1130 1131 /** 1132 * Virtual filesystem handler. It directs queries to the appropriate 1133 * filesystem backend. Note that this is stateful. 1134 */ 1135 VFS* vfs_; 1136 1137 /** The rest client (may be null if none was configured). */ 1138 tdb_unique_ptr<RestClient> rest_client_; 1139 1140 /* ********************************* */ 1141 /* PRIVATE METHODS */ 1142 /* ********************************* */ 1143 1144 /** 1145 * This is an auxiliary function to the other `array_open*` functions. 1146 * It opens the array, retrieves an `OpenArray` instance, acquires 1147 * its mutex and increases its counter. The array schema of the array 1148 * is loaded, but not any fragment metadata at this point. 1149 * 1150 * @param array_uri The array URI. 1151 * @param encryption_key The encryption key. 1152 * @param open_array The `OpenArray` instance retrieved after opening 1153 * the array. Note that its mutex will be locked and its counter 1154 * will have been incremented when the function returns. 1155 * @return Status 1156 */ 1157 Status array_open_without_fragments( 1158 const URI& array_uri, 1159 const EncryptionKey& encryption_key, 1160 OpenArray** open_array); 1161 1162 /** Decrement the count of in-progress queries. */ 1163 void decrement_in_progress(); 1164 1165 /** Retrieves all the array metadata URI's of an array. */ 1166 Status get_array_metadata_uris( 1167 const URI& array_uri, std::vector<URI>* array_metadata_uris) const; 1168 1169 /** Increment the count of in-progress queries. */ 1170 void increment_in_progress(); 1171 1172 /** 1173 * Loads the array schema into an open array. 1174 * 1175 * @param array_uri The array URI. 1176 * @param open_array The open array object. 1177 * @param encryption_key The encryption key to use. 1178 * @return Status 1179 */ 1180 Status load_array_schema( 1181 const URI& array_uri, 1182 OpenArray* open_array, 1183 const EncryptionKey& encryption_key); 1184 1185 /** 1186 * Loads the array metadata whose URIs are specified in the input. 1187 * The array metadata are cached in binary form in `open_array`. 1188 * Only the metadata that have not yet been loaded will be fetched 1189 * into the open array object. Eventually, the function will 1190 * deserialize the appropriate array metadata into `metadata`. 1191 * 1192 * @param open_array The open array object. 1193 * @param encryption_key The encryption key to use. 1194 * @param array_metadata_to_load The timestamped URIs of the array 1195 * metadata to load. 1196 * @param metadata The array metadata to be retrieved. 1197 * @return Status 1198 */ 1199 Status load_array_metadata( 1200 OpenArray* open_array, 1201 const EncryptionKey& encryption_key, 1202 const std::vector<TimestampedURI>& array_metadata_to_load, 1203 Metadata* metadata); 1204 1205 /** 1206 * Loads the fragment metadata of an open array given a vector of 1207 * fragment URIs `fragments_to_load`. If the fragment metadata 1208 * are not already loaded into the array, the function loads them. 1209 * The function stores the fragment metadata of each fragment 1210 * in `fragments_to_load` into vector `fragment_metadata`, such 1211 * that there is a one-to-one correspondence between the two vectors. 1212 * 1213 * @param open_array The open array object. 1214 * @param encryption_key The encryption key to use. 1215 * @param fragments_to_load The fragments whose metadata to load. 1216 * @param meta_buff A buffer that contains the consolidated fragment 1217 * metadata. 1218 * @param offsets A map from a fragment name to an offset in `meta_buff` 1219 * where the basic metadata can be found. If the offset cannot be 1220 * found, then the metadata of that fragment will be loaded from 1221 * storage instead. 1222 * @param fragment_metadata The fragment metadata retrieved in a 1223 * vector. 1224 * @return Status 1225 */ 1226 Status load_fragment_metadata( 1227 OpenArray* open_array, 1228 const EncryptionKey& encryption_key, 1229 const std::vector<TimestampedURI>& fragments_to_load, 1230 Buffer* meta_buff, 1231 const std::unordered_map<std::string, uint64_t>& offsets, 1232 std::vector<tdb_shared_ptr<FragmentMetadata>>* fragment_metadata); 1233 1234 /** 1235 * Loads the latest consolidated fragment metadata from storage. 1236 * 1237 * @param uri The URI of the consolidated fragment metadata. 1238 * @param enc_key The encryption key that may be needed to access the file. 1239 * @param f_buff The buffer to hold the consolidated fragment metadata. 1240 * @param offsets A map from the fragment name to the offset in `f_buff` where 1241 * the basic fragment metadata starts. 1242 * @return Status 1243 */ 1244 Status load_consolidated_fragment_meta( 1245 const URI& uri, 1246 const EncryptionKey& enc_key, 1247 Buffer* f_buff, 1248 std::unordered_map<std::string, uint64_t>* offsets); 1249 1250 /** 1251 * Retrieves the URI of the latest consolidated fragment metadata, 1252 * among the URIs in `uris`. 1253 */ 1254 Status get_consolidated_fragment_meta_uri( 1255 const std::vector<URI>& uris, URI* meta_uri) const; 1256 1257 /** 1258 * Applicable to fragment and array metadata URIs. 1259 * 1260 * Gets the sorted URIs in ascending first timestamp order, 1261 * breaking ties with lexicographic 1262 * sorting of UUID. Only the URIs with timestamp between `timestamp_start` 1263 * and `timestamp_end` (inclusive) are considered. The sorted URIs are 1264 * stored in the last input, including their timestamps. 1265 */ 1266 Status get_sorted_uris( 1267 const std::vector<URI>& uris, 1268 std::vector<TimestampedURI>* sorted_uris, 1269 uint64_t timestamp_start, 1270 uint64_t timestamp_end) const; 1271 1272 /** 1273 * It computes the URIs `to_vacuum` from the input `uris`, considering 1274 * only the URIs whose first timestamp is greater than or equal to 1275 * `timestamp_start` and second timestamp is smaller than or equal to 1276 * `timestamp_end`. The function also retrieves the `vac_uris` (files with 1277 * `.vac` suffix) that were used to compute `to_vacuum`. 1278 */ 1279 Status get_uris_to_vacuum( 1280 const std::vector<URI>& uris, 1281 uint64_t timestamp_start, 1282 uint64_t timestamp_end, 1283 std::vector<URI>* to_vacuum, 1284 std::vector<URI>* vac_uris, 1285 bool allow_partial = true) const; 1286 1287 /** Block until there are zero in-progress queries. */ 1288 void wait_for_zero_in_progress(); 1289 1290 /** Initializes a REST client, if one was configured. */ 1291 Status init_rest_client(); 1292 1293 /** Sets default tag values on this StorageManager. */ 1294 Status set_default_tags(); 1295 }; 1296 1297 } // namespace sm 1298 } // namespace tiledb 1299 1300 #endif // TILEDB_STORAGE_MANAGER_H 1301