1 /** 2 * @file writer.h 3 * 4 * @section LICENSE 5 * 6 * The MIT License 7 * 8 * @copyright Copyright (c) 2017-2021 TileDB, Inc. 9 * 10 * Permission is hereby granted, free of charge, to any person obtaining a copy 11 * of this software and associated documentation files (the "Software"), to deal 12 * in the Software without restriction, including without limitation the rights 13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 * copies of the Software, and to permit persons to whom the Software is 15 * furnished to do so, subject to the following conditions: 16 * 17 * The above copyright notice and this permission notice shall be included in 18 * all copies or substantial portions of the Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 * THE SOFTWARE. 27 * 28 * @section DESCRIPTION 29 * 30 * This file defines class Writer. 31 */ 32 33 #ifndef TILEDB_WRITER_H 34 #define TILEDB_WRITER_H 35 36 #include <atomic> 37 38 #include "tiledb/common/status.h" 39 #include "tiledb/sm/fragment/written_fragment_info.h" 40 #include "tiledb/sm/misc/types.h" 41 #include "tiledb/sm/query/dense_tiler.h" 42 #include "tiledb/sm/query/iquery_strategy.h" 43 #include "tiledb/sm/query/query.h" 44 #include "tiledb/sm/query/query_buffer.h" 45 #include "tiledb/sm/query/strategy_base.h" 46 #include "tiledb/sm/stats/stats.h" 47 #include "tiledb/sm/tile/tile.h" 48 49 using namespace tiledb::common; 50 51 namespace tiledb { 52 namespace sm { 53 54 class Array; 55 class FragmentMetadata; 56 class StorageManager; 57 58 /** Processes write queries. */ 59 class Writer : public StrategyBase, public IQueryStrategy { 60 public: 61 /* ********************************* */ 62 /* TYPE DEFINITIONS */ 63 /* ********************************* */ 64 65 /** 66 * State used only in global writes, where the user can "append" 67 * by successive query submissions until the query is finalized. 68 */ 69 struct GlobalWriteState { 70 /** 71 * Stores the last tile of each attribute/dimension for each write 72 * operation. For fixed-sized attributes/dimensions, the second tile is 73 * ignored. For var-sized attributes/dimensions, the first tile is the 74 * offsets tile, whereas the second tile is the values tile. In both cases, 75 * the third tile stores a validity tile for nullable attributes. 76 */ 77 std::unordered_map<std::string, std::tuple<Tile, Tile, Tile>> last_tiles_; 78 79 /** 80 * Stores the number of cells written for each attribute/dimension across 81 * the write operations. 82 */ 83 std::unordered_map<std::string, uint64_t> cells_written_; 84 85 /** The fragment metadata that the writer will focus on. */ 86 tdb_shared_ptr<FragmentMetadata> frag_meta_; 87 }; 88 89 /* ********************************* */ 90 /* CONSTRUCTORS & DESTRUCTORS */ 91 /* ********************************* */ 92 93 /** Constructor. */ 94 Writer( 95 stats::Stats* stats, 96 tdb_shared_ptr<Logger> logger, 97 StorageManager* storage_manager, 98 Array* array, 99 Config& config, 100 std::unordered_map<std::string, QueryBuffer>& buffers, 101 Subarray& subarray, 102 Layout layout, 103 std::vector<WrittenFragmentInfo>& written_fragment_info, 104 bool disable_check_global_order, 105 Query::CoordsInfo& coords_info_, 106 URI fragment_uri = URI("")); 107 108 /** Destructor. */ 109 ~Writer(); 110 111 DISABLE_COPY_AND_COPY_ASSIGN(Writer); 112 DISABLE_MOVE_AND_MOVE_ASSIGN(Writer); 113 114 /* ********************************* */ 115 /* API */ 116 /* ********************************* */ 117 118 /** Returns the names of the buffers set by the user for the write query. */ 119 std::vector<std::string> buffer_names() const; 120 121 /** Finalizes the writer. */ 122 Status finalize(); 123 124 /** Writer is never in an imcomplete state. */ incomplete()125 bool incomplete() const { 126 return false; 127 } 128 129 /** Returns current setting of check_coord_dups_ */ 130 bool get_check_coord_dups() const; 131 132 /** Returns current setting of check_coord_oob_ */ 133 bool get_check_coord_oob() const; 134 135 /** Returns current setting of dedup_coords_ */ 136 bool get_dedup_coords() const; 137 138 /** Initializes the writer. */ 139 Status init(); 140 141 /** Initialize the memory budget variables. */ 142 Status initialize_memory_budget(); 143 144 /** Sets current setting of check_coord_dups_ */ 145 void set_check_coord_dups(bool b); 146 147 /** Sets current setting of check_coord_oob_ */ 148 void set_check_coord_oob(bool b); 149 150 /** Sets current setting of dedup_coords_ */ 151 void set_dedup_coords(bool b); 152 153 /** Performs a write query using its set members. */ 154 Status dowork(); 155 156 private: 157 /* ********************************* */ 158 /* PRIVATE ATTRIBUTES */ 159 /* ********************************* */ 160 161 /** 162 * The sizes of the coordinate buffers in a map (dimension -> size). 163 * Needed separate storage since QueryBuffer stores a pointer to the buffer 164 * sizes. 165 */ 166 std::unordered_map<std::string, uint64_t> coord_buffer_sizes_; 167 168 /** 169 * If `true`, it will not check if the written coordinates are 170 * in the global order. This supercedes the config. 171 */ 172 bool disable_check_global_order_; 173 174 /** Keeps track of the coords data. */ 175 Query::CoordsInfo& coords_info_; 176 177 /** 178 * Meaningful only when `dedup_coords_` is `false`. 179 * If `true`, a check for duplicate coordinates will be performed upon 180 * sparse writes and appropriate errors will be thrown in case 181 * duplicates are found. 182 */ 183 bool check_coord_dups_; 184 185 /** 186 * If `true`, a check for coordinates lying out-of-bounds (i.e., 187 * outside the array domain) will be performed upon 188 * sparse writes and appropriate errors will be thrown in case 189 * such coordinates are found. 190 */ 191 bool check_coord_oob_; 192 193 /** 194 * If `true`, the coordinates will be checked whether the 195 * obey the global array order and appropriate errors will be thrown. 196 */ 197 bool check_global_order_; 198 199 /** 200 * If `true`, deduplication of coordinates/cells will happen upon 201 * sparse writes. Ties are broken arbitrarily. 202 * 203 */ 204 bool dedup_coords_; 205 206 /** The name of the new fragment to be created. */ 207 URI fragment_uri_; 208 209 /** The state associated with global writes. */ 210 tdb_unique_ptr<GlobalWriteState> global_write_state_; 211 212 /** True if the writer has been initialized. */ 213 bool initialized_; 214 215 /** Stores information about the written fragments. */ 216 std::vector<WrittenFragmentInfo>& written_fragment_info_; 217 218 /** Allocated buffers that neeed to be cleaned upon destruction. */ 219 std::vector<void*> to_clean_; 220 221 /** UID of the logger instance */ 222 inline static std::atomic<uint64_t> logger_id_ = 0; 223 224 /* ********************************* */ 225 /* PRIVATE METHODS */ 226 /* ********************************* */ 227 228 /** Adss a fragment to `written_fragment_info_`. */ 229 Status add_written_fragment_info(const URI& uri); 230 231 /** Correctness checks for buffer sizes. */ 232 Status check_buffer_sizes() const; 233 234 /** 235 * Throws an error if there are coordinate duplicates. 236 * 237 * @param cell_pos The sorted positions of the coordinates in the 238 * `attr_buffers_`. 239 * @return Status 240 */ 241 Status check_coord_dups(const std::vector<uint64_t>& cell_pos) const; 242 243 /** 244 * Throws an error if there are coordinates falling out-of-bounds, i.e., 245 * outside the array domain. 246 * 247 * @return Status 248 */ 249 Status check_coord_oob() const; 250 251 /** 252 * Throws an error if there are coordinate duplicates. This function 253 * assumes that the coordinates are written in the global layout, 254 * which means that they are already sorted in the attribute buffers. 255 * 256 * @return Status 257 */ 258 Status check_coord_dups() const; 259 260 /** 261 * Throws an error if there are coordinates that do not obey the 262 * global order. 263 * 264 * @return Status 265 */ 266 Status check_global_order() const; 267 268 /** 269 * Throws an error if there are coordinates that do not obey the 270 * global order. Applicable only to Hilbert order. 271 * 272 * @return Status 273 */ 274 Status check_global_order_hilbert() const; 275 276 /** Correctness checks for `subarray_`. */ 277 Status check_subarray() const; 278 279 /** 280 * Check the validity of the provided buffer offsets for a variable attribute. 281 * 282 * @return Status 283 */ 284 Status check_var_attr_offsets() const; 285 286 /** 287 * Cleans up the coordinate buffers. Applicable only if the coordinate 288 * buffers were allocated by TileDB (not the user) 289 */ 290 void clear_coord_buffers(); 291 292 /** Closes all attribute files, flushing their state to storage. */ 293 Status close_files(tdb_shared_ptr<FragmentMetadata> meta) const; 294 295 /** 296 * Computes the positions of the coordinate duplicates (if any). Note 297 * that only the duplicate occurrences are determined, i.e., if the same 298 * coordinates appear 3 times, only 2 will be marked as duplicates, 299 * whereas the first occurrence will not be marked as duplicate. 300 * 301 * @param cell_pos The sorted positions of the coordinates in the 302 * `attr_buffers_`. 303 * @param A set indicating the positions of the duplicates. 304 * If there are not duplicates, this vector will be **empty** after 305 * the termination of the function. 306 * @return Status 307 */ 308 Status compute_coord_dups( 309 const std::vector<uint64_t>& cell_pos, 310 std::set<uint64_t>* coord_dups) const; 311 312 /** 313 * Computes the positions of the coordinate duplicates (if any). Note 314 * that only the duplicate occurrences are determined, i.e., if the same 315 * coordinates appear 3 times, only 2 will be marked as duplicates, 316 * whereas the first occurrence will not be marked as duplicate. 317 * 318 * This functions assumes that the coordinates are laid out in the 319 * global order and, hence, they are sorted in the attribute buffers. 320 * 321 * @param A set indicating the positions of the duplicates. 322 * If there are not duplicates, this vector will be **empty** after 323 * the termination of the function. 324 * @return Status 325 */ 326 Status compute_coord_dups(std::set<uint64_t>* coord_dups) const; 327 328 /** 329 * Computes the coordinates metadata (e.g., MBRs). 330 * 331 * @param tiles The tiles to calculate the coords metadata from. It is 332 * a vector of vectors, one vector of tiles per dimension. 333 * @param meta The fragment metadata that will store the coords metadata. 334 * @return Status 335 */ 336 Status compute_coords_metadata( 337 const std::unordered_map<std::string, std::vector<Tile>>& tiles, 338 tdb_shared_ptr<FragmentMetadata> meta) const; 339 340 /** 341 * Creates a new fragment. 342 * 343 * @param dense Whether the fragment is dense or not. 344 * @param frag_meta The fragment metadata to be generated. 345 * @return Status 346 */ 347 Status create_fragment( 348 bool dense, tdb_shared_ptr<FragmentMetadata>& frag_meta) const; 349 350 /** 351 * Runs the input coordinate and attribute tiles through their 352 * filter pipelines. The tile buffers are modified to contain the output 353 * of the pipeline. 354 */ 355 Status filter_tiles( 356 std::unordered_map<std::string, std::vector<Tile>>* tiles); 357 358 /** 359 * Applicable only to global writes. Filters the last attribute and 360 * coordinate tiles. 361 */ 362 Status filter_last_tiles( 363 std::unordered_map<std::string, std::vector<Tile>>* tiles); 364 365 /** 366 * Runs the input tiles for the input attribute through the filter pipeline. 367 * The tile buffers are modified to contain the output of the pipeline. 368 * 369 * @param name The attribute/dimension the tiles belong to. 370 * @param tile The tiles to be filtered. 371 * @return Status 372 */ 373 Status filter_tiles(const std::string& name, std::vector<Tile>* tiles); 374 375 /** 376 * Runs the input tile for the input attribute/dimension through the filter 377 * pipeline. The tile buffer is modified to contain the output of the 378 * pipeline. 379 * 380 * @param name The attribute/dimension the tile belong to. 381 * @param tile The tile to be filtered. 382 * @param offsets True if the tile to be filtered contains offsets for a 383 * var-sized attribute/dimension. 384 * @param offsets True if the tile to be filtered contains validity values. 385 * @return Status 386 */ 387 Status filter_tile( 388 const std::string& name, Tile* tile, bool offsets, bool nullable); 389 390 /** Finalizes the global write state. */ 391 Status finalize_global_write_state(); 392 393 /** 394 * Writes in the global layout. Applicable to both dense and sparse 395 * arrays. 396 */ 397 Status global_write(); 398 399 /** 400 * Applicable only to global writes. Writes the last tiles for each 401 * attribute remaining in the state, and records the metadata for 402 * the coordinates (if present). 403 * 404 * @return Status 405 */ 406 Status global_write_handle_last_tile(); 407 408 /** Initializes the global write state. */ 409 Status init_global_write_state(); 410 411 /** 412 * Initializes a fixed-sized tile. 413 * 414 * @param name The attribute/dimension the tile belongs to. 415 * @param tile The tile to be initialized. 416 * @return Status 417 */ 418 Status init_tile(const std::string& name, Tile* tile) const; 419 420 /** 421 * Initializes a var-sized tile. 422 * 423 * @param name The attribute/dimension the tile belongs to. 424 * @param tile The offsets tile to be initialized. 425 * @param tile_var The var-sized data tile to be initialized. 426 * @return Status 427 */ 428 Status init_tile(const std::string& name, Tile* tile, Tile* tile_var) const; 429 430 /** 431 * Initializes a fixed-sized, nullable tile. 432 * 433 * @param name The attribute the tile belongs to. 434 * @param tile The tile to be initialized. 435 * @param tile_validity The validity tile to be initialized. 436 * @return Status 437 */ 438 Status init_tile_nullable( 439 const std::string& name, Tile* tile, Tile* tile_validity) const; 440 441 /** 442 * Initializes a var-sized, nullable tile. 443 * 444 * @param name The attribute the tile belongs to. 445 * @param tile The offsets tile to be initialized. 446 * @param tile_var The var-sized data tile to be initialized. 447 * @param tile_validity The validity tile to be initialized. 448 * @return Status 449 */ 450 Status init_tile_nullable( 451 const std::string& name, 452 Tile* tile, 453 Tile* tile_var, 454 Tile* tile_validity) const; 455 456 /** 457 * Initializes the tiles for writing for the input attribute/dimension. 458 * 459 * @param name The attribute/dimension the tiles belong to. 460 * @param tile_num The number of tiles. 461 * @param tiles The tiles to be initialized. Note that the vector 462 * has been already preallocated. 463 * @return Status 464 */ 465 Status init_tiles( 466 const std::string& name, 467 uint64_t tile_num, 468 std::vector<Tile>* tiles) const; 469 470 /** 471 * Generates a new fragment name, which is in the form: <br> 472 * `__t_t_uuid_v`, where `t` is the input timestamp and `v` is the current 473 * format version. For instance, 474 * `__1458759561320_1458759561320_6ba7b8129dad11d180b400c04fd430c8_3`. 475 * 476 * If `timestamp` is 0, then it is set to the current time. 477 * 478 * @param timestamp The timestamp of when the array got opened for writes. It 479 * is in ms since 1970-01-01 00:00:00 +0000 (UTC). 480 * @param frag_uri Will store the new special fragment name 481 * @return Status 482 */ 483 Status new_fragment_name( 484 uint64_t timestamp, uint32_t format_version, std::string* frag_uri) const; 485 486 /** 487 * This deletes the global write state and deletes the potentially 488 * partially written fragment. 489 */ 490 void nuke_global_write_state(); 491 492 /** 493 * Optimize the layout for 1D arrays. Specifically, if the array 494 * is 1D and the query layout is not global or unordered, the layout 495 * should be the same as the cell order of the array. This produces 496 * equivalent results offering faster processing. 497 */ 498 void optimize_layout_for_1D(); 499 500 /** 501 * Checks the validity of the extra element from var-sized offsets of 502 * attributes 503 */ 504 Status check_extra_element(); 505 506 /** 507 * Writes in an ordered layout (col- or row-major order). Applicable only 508 * to dense arrays. 509 */ 510 Status ordered_write(); 511 512 /** 513 * Writes in an ordered layout (col- or row-major order). Applicable only 514 * to dense arrays. 515 * 516 * @tparam T The domain type. 517 */ 518 template <class T> 519 Status ordered_write(); 520 521 /** 522 * Return an element of the offsets buffer at a certain position 523 * taking into account the configured bitsize 524 */ 525 uint64_t get_offset_buffer_element( 526 const void* buffer, const uint64_t pos) const; 527 528 /** 529 * Return the size of an offsets buffer according to the configured 530 * options for variable-sized attributes 531 */ 532 inline uint64_t get_offset_buffer_size(const uint64_t buffer_size) const; 533 534 /** 535 * Return a buffer offset according to the configured options for 536 * variable-sized attributes (e.g. transform a byte offset to element offset) 537 */ 538 uint64_t prepare_buffer_offset( 539 const void* buffer, const uint64_t pos, const uint64_t datasize) const; 540 541 /** 542 * Applicable only to write in global order. It prepares only full 543 * tiles, storing the last potentially non-full tile in 544 * `global_write_state->last_tiles_` as part of the state to be used in 545 * the next write invocation. The last tiles are written to storage 546 * upon `finalize`. Upon each invocation, the function first 547 * populates the partially full last tile from the previous 548 * invocation. 549 * 550 * @param coord_dups The positions of the duplicate coordinates. 551 * @param tiles The **full** tiles to be created. 552 * @return Status 553 */ 554 Status prepare_full_tiles( 555 const std::set<uint64_t>& coord_dups, 556 std::unordered_map<std::string, std::vector<Tile>>* tiles) const; 557 558 /** 559 * Applicable only to write in global order. It prepares only full 560 * tiles, storing the last potentially non-full tile in 561 * `global_write_state->last_tiles_` as part of the state to be used in 562 * the next write invocation. The last tiles are written to storage 563 * upon `finalize`. Upon each invocation, the function first 564 * populates the partially full last tile from the previous 565 * invocation. 566 * 567 * @param name The attribute/dimension to prepare the tiles for. 568 * @param coord_dups The positions of the duplicate coordinates. 569 * @param tiles The **full** tiles to be created. 570 * @return Status 571 */ 572 Status prepare_full_tiles( 573 const std::string& name, 574 const std::set<uint64_t>& coord_dups, 575 std::vector<Tile>* tiles) const; 576 577 /** 578 * Applicable only to write in global order. It prepares only full 579 * tiles, storing the last potentially non-full tile in 580 * `global_write_state_->last_tiles_` as part of the state to be used in 581 * the next write invocation. The last tiles are written to storage 582 * upon `finalize`. Upon each invocation, the function first 583 * populates the partially full last tile from the previous 584 * invocation. Applicable only to fixed-sized attributes. 585 * 586 * @param name The attribute/dimension to prepare the tiles for. 587 * @param coord_dups The positions of the duplicate coordinates. 588 * @param tiles The **full** tiles to be created. 589 * @return Status 590 */ 591 Status prepare_full_tiles_fixed( 592 const std::string& name, 593 const std::set<uint64_t>& coord_dups, 594 std::vector<Tile>* tiles) const; 595 596 /** 597 * Applicable only to write in global order. It prepares only full 598 * tiles, storing the last potentially non-full tile in 599 * `global_write_state_->last_tiles_` as part of the state to be used in 600 * the next write invocation. The last tiles are written to storage 601 * upon `finalize`. Upon each invocation, the function first 602 * populates the partially full last tile from the previous 603 * invocation. Applicable only to var-sized attributes. 604 * 605 * @param name The attribute/dimension to prepare the tiles for. 606 * @param coord_dups The positions of the duplicate coordinates. 607 * @param tiles The **full** tiles to be created. 608 * @return Status 609 */ 610 Status prepare_full_tiles_var( 611 const std::string& name, 612 const std::set<uint64_t>& coord_dups, 613 std::vector<Tile>* tiles) const; 614 615 /** 616 * It prepares the attribute and coordinate tiles, re-organizing the cells 617 * from the user buffers based on the input sorted positions and coordinate 618 * duplicates. 619 * 620 * @param cell_pos The positions that resulted from sorting and 621 * according to which the cells must be re-arranged. 622 * @param coord_dups The set with the positions 623 * of duplicate coordinates/cells. 624 * @param tiles The tiles to be created, one vector per attribute or 625 * coordinate. 626 * @return Status 627 */ 628 Status prepare_tiles( 629 const std::vector<uint64_t>& cell_pos, 630 const std::set<uint64_t>& coord_dups, 631 std::unordered_map<std::string, std::vector<Tile>>* tiles) const; 632 633 /** 634 * It prepares the tiles for the input attribute or dimension, re-organizing 635 * the cells from the user buffers based on the input sorted positions. 636 * 637 * @param name The attribute or dimension to prepare the tiles for. 638 * @param cell_pos The positions that resulted from sorting and 639 * according to which the cells must be re-arranged. 640 * @param coord_dups The set with the positions 641 * of duplicate coordinates/cells. 642 * @param tiles The tiles to be created. 643 * @return Status 644 */ 645 Status prepare_tiles( 646 const std::string& name, 647 const std::vector<uint64_t>& cell_pos, 648 const std::set<uint64_t>& coord_dups, 649 std::vector<Tile>* tiles) const; 650 651 /** 652 * It prepares the tiles for the input attribute or dimension, re-organizing 653 * the cells from the user buffers based on the input sorted positions. 654 * Applicable only to fixed-sized attributes or dimensions. 655 * 656 * @param name The attribute or dimension to prepare the tiles for. 657 * @param cell_pos The positions that resulted from sorting and 658 * according to which the cells must be re-arranged. 659 * @param coord_dups The set with the positions 660 * of duplicate coordinates/cells. 661 * @param tiles The tiles to be created. 662 * @return Status 663 */ 664 Status prepare_tiles_fixed( 665 const std::string& name, 666 const std::vector<uint64_t>& cell_pos, 667 const std::set<uint64_t>& coord_dups, 668 std::vector<Tile>* tiles) const; 669 670 /** 671 * It prepares the tiles for the input attribute or dimension, re-organizing 672 * the cells from the user buffers based on the input sorted positions. 673 * Applicable only to var-sized attributes or dimensions. 674 * 675 * @param name The attribute to prepare the tiles for. 676 * @param cell_pos The positions that resulted from sorting and 677 * according to which the cells must be re-arranged. 678 * @param coord_dups The set with the positions 679 * of duplicate coordinates/cells. 680 * @param tiles The tiles to be created. 681 * @return Status 682 */ 683 Status prepare_tiles_var( 684 const std::string& name, 685 const std::vector<uint64_t>& cell_pos, 686 const std::set<uint64_t>& coord_dups, 687 std::vector<Tile>* tiles) const; 688 689 /** Resets the writer object, rendering it incomplete. */ 690 void reset(); 691 692 /** 693 * Sorts the coordinates of the user buffers, creating a vector with 694 * the sorted positions. 695 * 696 * @param cell_pos The sorted cell positions to be created. 697 * @return Status 698 */ 699 Status sort_coords(std::vector<uint64_t>* cell_pos) const; 700 701 /** 702 * Splits the coordinates buffer into separate coordinate 703 * buffers, one per dimension. Note that this will require extra memory 704 * allocation, which will be cleaned up in the class destructor. 705 * 706 * @return Status 707 */ 708 Status split_coords_buffer(); 709 710 /** 711 * Writes in unordered layout. Applicable to both dense and sparse arrays. 712 * Explicit coordinates must be provided for this write. 713 */ 714 Status unordered_write(); 715 716 /** 717 * Writes an empty cell range to the input tile. 718 * Applicable to **fixed-sized** attributes. 719 * 720 * @param cell_num Number of empty cells to write. 721 * @param cell_val_num Number of values per cell. 722 * @param tile The tile to write to. 723 * @return Status 724 */ 725 Status write_empty_cell_range_to_tile( 726 uint64_t num, uint32_t cell_val_num, Tile* tile) const; 727 728 /** 729 * Writes an empty cell range to the input tile. 730 * Applicable to **fixed-sized** attributes. 731 * 732 * @param cell_num Number of empty cells to write. 733 * @param cell_val_num Number of values per cell. 734 * @param tile The tile to write to. 735 * @param tile_validity The tile with the validity cells to write to. 736 * @return Status 737 */ 738 Status write_empty_cell_range_to_tile_nullable( 739 uint64_t num, 740 uint32_t cell_val_num, 741 Tile* tile, 742 Tile* tile_validity) const; 743 744 /** 745 * Writes an empty cell range to the input tile. 746 * Applicable to **variable-sized** attributes. 747 * 748 * @param num Number of empty values to write. 749 * @param tile The tile offsets to write to. 750 * @param tile_var The tile with the var-sized cells to write to. 751 * @return Status 752 */ 753 Status write_empty_cell_range_to_tile_var( 754 uint64_t num, Tile* tile, Tile* tile_var) const; 755 756 /** 757 * Writes an empty cell range to the input tile. 758 * Applicable to **variable-sized** attributes. 759 * 760 * @param num Number of empty values to write. 761 * @param tile The tile offsets to write to. 762 * @param tile_var The tile with the var-sized cells to write to. 763 * @param tile_validity The tile with the validity cells to write to. 764 * @return Status 765 */ 766 Status write_empty_cell_range_to_tile_var_nullable( 767 uint64_t num, Tile* tile, Tile* tile_var, Tile* tile_validity) const; 768 769 /** 770 * Writes the input cell range to the input tile, for a particular 771 * buffer. Applicable to **fixed-sized** attributes. 772 * 773 * @param buff The write buffer where the cells will be copied from. 774 * @param start The start element in the write buffer. 775 * @param end The end element in the write buffer. 776 * @param tile The tile to write to. 777 * @return Status 778 */ 779 Status write_cell_range_to_tile( 780 ConstBuffer* buff, uint64_t start, uint64_t end, Tile* tile) const; 781 782 /** 783 * Writes the input cell range to the input tile, for a particular 784 * buffer. Applicable to **fixed-sized** attributes. 785 * 786 * @param buff The write buffer where the cells will be copied from. 787 * @param buff_validity The write buffer where the validity cell values will 788 * be copied from. 789 * @param start The start element in the write buffer. 790 * @param end The end element in the write buffer. 791 * @param tile The tile to write to. 792 * @param tile_validity The validity tile to be initialized. 793 * @return Status 794 */ 795 Status write_cell_range_to_tile_nullable( 796 ConstBuffer* buff, 797 ConstBuffer* buff_validity, 798 uint64_t start, 799 uint64_t end, 800 Tile* tile, 801 Tile* tile_validity) const; 802 803 /** 804 * Writes the input cell range to the input tile, for a particular 805 * buffer. Applicable to **variable-sized** attributes. 806 * 807 * @param buff The write buffer where the cell offsets will be copied from. 808 * @param buff_var The write buffer where the cell values will be copied from. 809 * @param start The start element in the write buffer. 810 * @param end The end element in the write buffer. 811 * @param attr_datatype_size The size of each attribute value in `buff_var`. 812 * @param tile The tile offsets to write to. 813 * @param tile_var The tile with the var-sized cells to write to. 814 * @return Status 815 */ 816 Status write_cell_range_to_tile_var( 817 ConstBuffer* buff, 818 ConstBuffer* buff_var, 819 uint64_t start, 820 uint64_t end, 821 uint64_t attr_datatype_size, 822 Tile* tile, 823 Tile* tile_var) const; 824 825 /** 826 * Writes the input cell range to the input tile, for a particular 827 * buffer. Applicable to **variable-sized**, nullable attributes. 828 * 829 * @param buff The write buffer where the cell offsets will be copied from. 830 * @param buff_var The write buffer where the cell values will be copied from. 831 * @param buff_validity The write buffer where the validity cell values will 832 * be copied from. 833 * @param start The start element in the write buffer. 834 * @param end The end element in the write buffer. 835 * @param attr_datatype_size The size of each attribute value in `buff_var`. 836 * @param tile The tile offsets to write to. 837 * @param tile_var The tile with the var-sized cells to write to. 838 * @param tile_validity The validity tile to be initialized. 839 * @return Status 840 */ 841 Status write_cell_range_to_tile_var_nullable( 842 ConstBuffer* buff, 843 ConstBuffer* buff_var, 844 ConstBuffer* buff_validity, 845 uint64_t start, 846 uint64_t end, 847 uint64_t attr_datatype_size, 848 Tile* tile, 849 Tile* tile_var, 850 Tile* tile_validity) const; 851 852 /** 853 * Writes all the input tiles to storage. 854 * 855 * @param tiles Attribute/Coordinate tiles to be written, one element per 856 * attribute or dimension. 857 * @param tiles Attribute/Coordinate tiles to be written. 858 * @return Status 859 */ 860 Status write_all_tiles( 861 tdb_shared_ptr<FragmentMetadata> frag_meta, 862 std::unordered_map<std::string, std::vector<Tile>>* tiles); 863 864 /** 865 * Writes the input tiles for the input attribute/dimension to storage. 866 * 867 * @param name The attribute/dimension the tiles belong to. 868 * @param frag_meta The fragment metadata. 869 * @param start_tile_id The function will start writing tiles 870 * with ids in the fragment that start with this value. 871 * @param tiles The tiles to be written. 872 * @param close_files Whether to close the attribute/coordinate 873 * file in the end of the function call. 874 * @return Status 875 */ 876 Status write_tiles( 877 const std::string& name, 878 tdb_shared_ptr<FragmentMetadata> frag_meta, 879 uint64_t start_tile_id, 880 std::vector<Tile>* tiles, 881 bool close_files = true); 882 883 /** 884 * Returns the i-th coordinates in the coordinate buffers in string 885 * format. 886 */ 887 std::string coords_to_str(uint64_t i) const; 888 889 /** 890 * Invoked on error. It removes the directory of the input URI and 891 * resets the global write state. 892 */ 893 void clean_up(const URI& uri); 894 895 /** 896 * Applicable only to global writes. Returns true if all last tiles stored 897 * in the global write state are empty. 898 */ 899 bool all_last_tiles_empty() const; 900 901 /** Calculates the hilbert values of the input coordinate buffers. */ 902 Status calculate_hilbert_values( 903 const std::vector<const QueryBuffer*>& buffs, 904 std::vector<uint64_t>* hilbert_values) const; 905 906 /** 907 * Prepares, filters and writes dense tiles for the given attribute. 908 * 909 * @tparam T The array domain datatype. 910 * @param name The attribute name. 911 * @param frag_meta The metadata of the new fragment. 912 * @param dense_tiler The dense tiler that will prepare the tiles. 913 * @param thread_num The number of threads to be used for the function. 914 * @param stats Statistics to gather in the function. 915 */ 916 template <class T> 917 Status prepare_filter_and_write_tiles( 918 const std::string& name, 919 tdb_shared_ptr<FragmentMetadata> frag_meta, 920 DenseTiler<T>* dense_tiler, 921 uint64_t thread_num); 922 }; 923 924 } // namespace sm 925 } // namespace tiledb 926 927 #endif // TILEDB_WRITER_H 928