1 /**
2  * @file   writer.h
3  *
4  * @section LICENSE
5  *
6  * The MIT License
7  *
8  * @copyright Copyright (c) 2017-2021 TileDB, Inc.
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  * THE SOFTWARE.
27  *
28  * @section DESCRIPTION
29  *
30  * This file defines class Writer.
31  */
32 
33 #ifndef TILEDB_WRITER_H
34 #define TILEDB_WRITER_H
35 
36 #include <atomic>
37 
38 #include "tiledb/common/status.h"
39 #include "tiledb/sm/fragment/written_fragment_info.h"
40 #include "tiledb/sm/misc/types.h"
41 #include "tiledb/sm/query/dense_tiler.h"
42 #include "tiledb/sm/query/iquery_strategy.h"
43 #include "tiledb/sm/query/query.h"
44 #include "tiledb/sm/query/query_buffer.h"
45 #include "tiledb/sm/query/strategy_base.h"
46 #include "tiledb/sm/stats/stats.h"
47 #include "tiledb/sm/tile/tile.h"
48 
49 using namespace tiledb::common;
50 
51 namespace tiledb {
52 namespace sm {
53 
54 class Array;
55 class FragmentMetadata;
56 class StorageManager;
57 
58 /** Processes write queries. */
59 class Writer : public StrategyBase, public IQueryStrategy {
60  public:
61   /* ********************************* */
62   /*          TYPE DEFINITIONS         */
63   /* ********************************* */
64 
65   /**
66    * State used only in global writes, where the user can "append"
67    * by successive query submissions until the query is finalized.
68    */
69   struct GlobalWriteState {
70     /**
71      * Stores the last tile of each attribute/dimension for each write
72      * operation. For fixed-sized attributes/dimensions, the second tile is
73      * ignored. For var-sized attributes/dimensions, the first tile is the
74      * offsets tile, whereas the second tile is the values tile. In both cases,
75      * the third tile stores a validity tile for nullable attributes.
76      */
77     std::unordered_map<std::string, std::tuple<Tile, Tile, Tile>> last_tiles_;
78 
79     /**
80      * Stores the number of cells written for each attribute/dimension across
81      * the write operations.
82      */
83     std::unordered_map<std::string, uint64_t> cells_written_;
84 
85     /** The fragment metadata that the writer will focus on. */
86     tdb_shared_ptr<FragmentMetadata> frag_meta_;
87   };
88 
89   /* ********************************* */
90   /*     CONSTRUCTORS & DESTRUCTORS    */
91   /* ********************************* */
92 
93   /** Constructor. */
94   Writer(
95       stats::Stats* stats,
96       tdb_shared_ptr<Logger> logger,
97       StorageManager* storage_manager,
98       Array* array,
99       Config& config,
100       std::unordered_map<std::string, QueryBuffer>& buffers,
101       Subarray& subarray,
102       Layout layout,
103       std::vector<WrittenFragmentInfo>& written_fragment_info,
104       bool disable_check_global_order,
105       Query::CoordsInfo& coords_info_,
106       URI fragment_uri = URI(""));
107 
108   /** Destructor. */
109   ~Writer();
110 
111   DISABLE_COPY_AND_COPY_ASSIGN(Writer);
112   DISABLE_MOVE_AND_MOVE_ASSIGN(Writer);
113 
114   /* ********************************* */
115   /*                 API               */
116   /* ********************************* */
117 
118   /** Returns the names of the buffers set by the user for the write query. */
119   std::vector<std::string> buffer_names() const;
120 
121   /** Finalizes the writer. */
122   Status finalize();
123 
124   /** Writer is never in an imcomplete state. */
incomplete()125   bool incomplete() const {
126     return false;
127   }
128 
129   /** Returns current setting of check_coord_dups_ */
130   bool get_check_coord_dups() const;
131 
132   /** Returns current setting of check_coord_oob_ */
133   bool get_check_coord_oob() const;
134 
135   /** Returns current setting of dedup_coords_ */
136   bool get_dedup_coords() const;
137 
138   /** Initializes the writer. */
139   Status init();
140 
141   /** Initialize the memory budget variables. */
142   Status initialize_memory_budget();
143 
144   /** Sets current setting of check_coord_dups_ */
145   void set_check_coord_dups(bool b);
146 
147   /** Sets current setting of check_coord_oob_ */
148   void set_check_coord_oob(bool b);
149 
150   /** Sets current setting of dedup_coords_ */
151   void set_dedup_coords(bool b);
152 
153   /** Performs a write query using its set members. */
154   Status dowork();
155 
156  private:
157   /* ********************************* */
158   /*         PRIVATE ATTRIBUTES        */
159   /* ********************************* */
160 
161   /**
162    * The sizes of the coordinate buffers in a map (dimension -> size).
163    * Needed separate storage since QueryBuffer stores a pointer to the buffer
164    * sizes.
165    */
166   std::unordered_map<std::string, uint64_t> coord_buffer_sizes_;
167 
168   /**
169    * If `true`, it will not check if the written coordinates are
170    * in the global order. This supercedes the config.
171    */
172   bool disable_check_global_order_;
173 
174   /** Keeps track of the coords data. */
175   Query::CoordsInfo& coords_info_;
176 
177   /**
178    * Meaningful only when `dedup_coords_` is `false`.
179    * If `true`, a check for duplicate coordinates will be performed upon
180    * sparse writes and appropriate errors will be thrown in case
181    * duplicates are found.
182    */
183   bool check_coord_dups_;
184 
185   /**
186    * If `true`, a check for coordinates lying out-of-bounds (i.e.,
187    * outside the array domain) will be performed upon
188    * sparse writes and appropriate errors will be thrown in case
189    * such coordinates are found.
190    */
191   bool check_coord_oob_;
192 
193   /**
194    * If `true`, the coordinates will be checked whether the
195    * obey the global array order and appropriate errors will be thrown.
196    */
197   bool check_global_order_;
198 
199   /**
200    * If `true`, deduplication of coordinates/cells will happen upon
201    * sparse writes. Ties are broken arbitrarily.
202    *
203    */
204   bool dedup_coords_;
205 
206   /** The name of the new fragment to be created. */
207   URI fragment_uri_;
208 
209   /** The state associated with global writes. */
210   tdb_unique_ptr<GlobalWriteState> global_write_state_;
211 
212   /** True if the writer has been initialized. */
213   bool initialized_;
214 
215   /** Stores information about the written fragments. */
216   std::vector<WrittenFragmentInfo>& written_fragment_info_;
217 
218   /** Allocated buffers that neeed to be cleaned upon destruction. */
219   std::vector<void*> to_clean_;
220 
221   /** UID of the logger instance */
222   inline static std::atomic<uint64_t> logger_id_ = 0;
223 
224   /* ********************************* */
225   /*           PRIVATE METHODS         */
226   /* ********************************* */
227 
228   /** Adss a fragment to `written_fragment_info_`. */
229   Status add_written_fragment_info(const URI& uri);
230 
231   /** Correctness checks for buffer sizes. */
232   Status check_buffer_sizes() const;
233 
234   /**
235    * Throws an error if there are coordinate duplicates.
236    *
237    * @param cell_pos The sorted positions of the coordinates in the
238    *     `attr_buffers_`.
239    * @return Status
240    */
241   Status check_coord_dups(const std::vector<uint64_t>& cell_pos) const;
242 
243   /**
244    * Throws an error if there are coordinates falling out-of-bounds, i.e.,
245    * outside the array domain.
246    *
247    * @return Status
248    */
249   Status check_coord_oob() const;
250 
251   /**
252    * Throws an error if there are coordinate duplicates. This function
253    * assumes that the coordinates are written in the global layout,
254    * which means that they are already sorted in the attribute buffers.
255    *
256    * @return Status
257    */
258   Status check_coord_dups() const;
259 
260   /**
261    * Throws an error if there are coordinates that do not obey the
262    * global order.
263    *
264    * @return Status
265    */
266   Status check_global_order() const;
267 
268   /**
269    * Throws an error if there are coordinates that do not obey the
270    * global order. Applicable only to Hilbert order.
271    *
272    * @return Status
273    */
274   Status check_global_order_hilbert() const;
275 
276   /** Correctness checks for `subarray_`. */
277   Status check_subarray() const;
278 
279   /**
280    * Check the validity of the provided buffer offsets for a variable attribute.
281    *
282    * @return Status
283    */
284   Status check_var_attr_offsets() const;
285 
286   /**
287    * Cleans up the coordinate buffers. Applicable only if the coordinate
288    * buffers were allocated by TileDB (not the user)
289    */
290   void clear_coord_buffers();
291 
292   /** Closes all attribute files, flushing their state to storage. */
293   Status close_files(tdb_shared_ptr<FragmentMetadata> meta) const;
294 
295   /**
296    * Computes the positions of the coordinate duplicates (if any). Note
297    * that only the duplicate occurrences are determined, i.e., if the same
298    * coordinates appear 3 times, only 2 will be marked as duplicates,
299    * whereas the first occurrence will not be marked as duplicate.
300    *
301    * @param cell_pos The sorted positions of the coordinates in the
302    *     `attr_buffers_`.
303    * @param A set indicating the positions of the duplicates.
304    *     If there are not duplicates, this vector will be **empty** after
305    *     the termination of the function.
306    * @return Status
307    */
308   Status compute_coord_dups(
309       const std::vector<uint64_t>& cell_pos,
310       std::set<uint64_t>* coord_dups) const;
311 
312   /**
313    * Computes the positions of the coordinate duplicates (if any). Note
314    * that only the duplicate occurrences are determined, i.e., if the same
315    * coordinates appear 3 times, only 2 will be marked as duplicates,
316    * whereas the first occurrence will not be marked as duplicate.
317    *
318    * This functions assumes that the coordinates are laid out in the
319    * global order and, hence, they are sorted in the attribute buffers.
320    *
321    * @param A set indicating the positions of the duplicates.
322    *     If there are not duplicates, this vector will be **empty** after
323    *     the termination of the function.
324    * @return Status
325    */
326   Status compute_coord_dups(std::set<uint64_t>* coord_dups) const;
327 
328   /**
329    * Computes the coordinates metadata (e.g., MBRs).
330    *
331    * @param tiles The tiles to calculate the coords metadata from. It is
332    *     a vector of vectors, one vector of tiles per dimension.
333    * @param meta The fragment metadata that will store the coords metadata.
334    * @return Status
335    */
336   Status compute_coords_metadata(
337       const std::unordered_map<std::string, std::vector<Tile>>& tiles,
338       tdb_shared_ptr<FragmentMetadata> meta) const;
339 
340   /**
341    * Creates a new fragment.
342    *
343    * @param dense Whether the fragment is dense or not.
344    * @param frag_meta The fragment metadata to be generated.
345    * @return Status
346    */
347   Status create_fragment(
348       bool dense, tdb_shared_ptr<FragmentMetadata>& frag_meta) const;
349 
350   /**
351    * Runs the input coordinate and attribute tiles through their
352    * filter pipelines. The tile buffers are modified to contain the output
353    * of the pipeline.
354    */
355   Status filter_tiles(
356       std::unordered_map<std::string, std::vector<Tile>>* tiles);
357 
358   /**
359    * Applicable only to global writes. Filters the last attribute and
360    * coordinate tiles.
361    */
362   Status filter_last_tiles(
363       std::unordered_map<std::string, std::vector<Tile>>* tiles);
364 
365   /**
366    * Runs the input tiles for the input attribute through the filter pipeline.
367    * The tile buffers are modified to contain the output of the pipeline.
368    *
369    * @param name The attribute/dimension the tiles belong to.
370    * @param tile The tiles to be filtered.
371    * @return Status
372    */
373   Status filter_tiles(const std::string& name, std::vector<Tile>* tiles);
374 
375   /**
376    * Runs the input tile for the input attribute/dimension through the filter
377    * pipeline. The tile buffer is modified to contain the output of the
378    * pipeline.
379    *
380    * @param name The attribute/dimension the tile belong to.
381    * @param tile The tile to be filtered.
382    * @param offsets True if the tile to be filtered contains offsets for a
383    *    var-sized attribute/dimension.
384    * @param offsets True if the tile to be filtered contains validity values.
385    * @return Status
386    */
387   Status filter_tile(
388       const std::string& name, Tile* tile, bool offsets, bool nullable);
389 
390   /** Finalizes the global write state. */
391   Status finalize_global_write_state();
392 
393   /**
394    * Writes in the global layout. Applicable to both dense and sparse
395    * arrays.
396    */
397   Status global_write();
398 
399   /**
400    * Applicable only to global writes. Writes the last tiles for each
401    * attribute remaining in the state, and records the metadata for
402    * the coordinates (if present).
403    *
404    * @return Status
405    */
406   Status global_write_handle_last_tile();
407 
408   /** Initializes the global write state. */
409   Status init_global_write_state();
410 
411   /**
412    * Initializes a fixed-sized tile.
413    *
414    * @param name The attribute/dimension the tile belongs to.
415    * @param tile The tile to be initialized.
416    * @return Status
417    */
418   Status init_tile(const std::string& name, Tile* tile) const;
419 
420   /**
421    * Initializes a var-sized tile.
422    *
423    * @param name The attribute/dimension the tile belongs to.
424    * @param tile The offsets tile to be initialized.
425    * @param tile_var The var-sized data tile to be initialized.
426    * @return Status
427    */
428   Status init_tile(const std::string& name, Tile* tile, Tile* tile_var) const;
429 
430   /**
431    * Initializes a fixed-sized, nullable tile.
432    *
433    * @param name The attribute the tile belongs to.
434    * @param tile The tile to be initialized.
435    * @param tile_validity The validity tile to be initialized.
436    * @return Status
437    */
438   Status init_tile_nullable(
439       const std::string& name, Tile* tile, Tile* tile_validity) const;
440 
441   /**
442    * Initializes a var-sized, nullable tile.
443    *
444    * @param name The attribute the tile belongs to.
445    * @param tile The offsets tile to be initialized.
446    * @param tile_var The var-sized data tile to be initialized.
447    * @param tile_validity The validity tile to be initialized.
448    * @return Status
449    */
450   Status init_tile_nullable(
451       const std::string& name,
452       Tile* tile,
453       Tile* tile_var,
454       Tile* tile_validity) const;
455 
456   /**
457    * Initializes the tiles for writing for the input attribute/dimension.
458    *
459    * @param name The attribute/dimension the tiles belong to.
460    * @param tile_num The number of tiles.
461    * @param tiles The tiles to be initialized. Note that the vector
462    *     has been already preallocated.
463    * @return Status
464    */
465   Status init_tiles(
466       const std::string& name,
467       uint64_t tile_num,
468       std::vector<Tile>* tiles) const;
469 
470   /**
471    * Generates a new fragment name, which is in the form: <br>
472    * `__t_t_uuid_v`, where `t` is the input timestamp and `v` is the current
473    * format version. For instance,
474    * `__1458759561320_1458759561320_6ba7b8129dad11d180b400c04fd430c8_3`.
475    *
476    * If `timestamp` is 0, then it is set to the current time.
477    *
478    * @param timestamp The timestamp of when the array got opened for writes. It
479    *     is in ms since 1970-01-01 00:00:00 +0000 (UTC).
480    * @param frag_uri Will store the new special fragment name
481    * @return Status
482    */
483   Status new_fragment_name(
484       uint64_t timestamp, uint32_t format_version, std::string* frag_uri) const;
485 
486   /**
487    * This deletes the global write state and deletes the potentially
488    * partially written fragment.
489    */
490   void nuke_global_write_state();
491 
492   /**
493    * Optimize the layout for 1D arrays. Specifically, if the array
494    * is 1D and the query layout is not global or unordered, the layout
495    * should be the same as the cell order of the array. This produces
496    * equivalent results offering faster processing.
497    */
498   void optimize_layout_for_1D();
499 
500   /**
501    * Checks the validity of the extra element from var-sized offsets of
502    * attributes
503    */
504   Status check_extra_element();
505 
506   /**
507    * Writes in an ordered layout (col- or row-major order). Applicable only
508    * to dense arrays.
509    */
510   Status ordered_write();
511 
512   /**
513    * Writes in an ordered layout (col- or row-major order). Applicable only
514    * to dense arrays.
515    *
516    * @tparam T The domain type.
517    */
518   template <class T>
519   Status ordered_write();
520 
521   /**
522    * Return an element of the offsets buffer at a certain position
523    * taking into account the configured bitsize
524    */
525   uint64_t get_offset_buffer_element(
526       const void* buffer, const uint64_t pos) const;
527 
528   /**
529    * Return the size of an offsets buffer according to the configured
530    * options for variable-sized attributes
531    */
532   inline uint64_t get_offset_buffer_size(const uint64_t buffer_size) const;
533 
534   /**
535    * Return a buffer offset according to the configured options for
536    * variable-sized attributes (e.g. transform a byte offset to element offset)
537    */
538   uint64_t prepare_buffer_offset(
539       const void* buffer, const uint64_t pos, const uint64_t datasize) const;
540 
541   /**
542    * Applicable only to write in global order. It prepares only full
543    * tiles, storing the last potentially non-full tile in
544    * `global_write_state->last_tiles_` as part of the state to be used in
545    * the next write invocation. The last tiles are written to storage
546    * upon `finalize`. Upon each invocation, the function first
547    * populates the partially full last tile from the previous
548    * invocation.
549    *
550    * @param coord_dups The positions of the duplicate coordinates.
551    * @param tiles The **full** tiles to be created.
552    * @return Status
553    */
554   Status prepare_full_tiles(
555       const std::set<uint64_t>& coord_dups,
556       std::unordered_map<std::string, std::vector<Tile>>* tiles) const;
557 
558   /**
559    * Applicable only to write in global order. It prepares only full
560    * tiles, storing the last potentially non-full tile in
561    * `global_write_state->last_tiles_` as part of the state to be used in
562    * the next write invocation. The last tiles are written to storage
563    * upon `finalize`. Upon each invocation, the function first
564    * populates the partially full last tile from the previous
565    * invocation.
566    *
567    * @param name The attribute/dimension to prepare the tiles for.
568    * @param coord_dups The positions of the duplicate coordinates.
569    * @param tiles The **full** tiles to be created.
570    * @return Status
571    */
572   Status prepare_full_tiles(
573       const std::string& name,
574       const std::set<uint64_t>& coord_dups,
575       std::vector<Tile>* tiles) const;
576 
577   /**
578    * Applicable only to write in global order. It prepares only full
579    * tiles, storing the last potentially non-full tile in
580    * `global_write_state_->last_tiles_` as part of the state to be used in
581    * the next write invocation. The last tiles are written to storage
582    * upon `finalize`. Upon each invocation, the function first
583    * populates the partially full last tile from the previous
584    * invocation. Applicable only to fixed-sized attributes.
585    *
586    * @param name The attribute/dimension to prepare the tiles for.
587    * @param coord_dups The positions of the duplicate coordinates.
588    * @param tiles The **full** tiles to be created.
589    * @return Status
590    */
591   Status prepare_full_tiles_fixed(
592       const std::string& name,
593       const std::set<uint64_t>& coord_dups,
594       std::vector<Tile>* tiles) const;
595 
596   /**
597    * Applicable only to write in global order. It prepares only full
598    * tiles, storing the last potentially non-full tile in
599    * `global_write_state_->last_tiles_` as part of the state to be used in
600    * the next write invocation. The last tiles are written to storage
601    * upon `finalize`. Upon each invocation, the function first
602    * populates the partially full last tile from the previous
603    * invocation. Applicable only to var-sized attributes.
604    *
605    * @param name The attribute/dimension to prepare the tiles for.
606    * @param coord_dups The positions of the duplicate coordinates.
607    * @param tiles The **full** tiles to be created.
608    * @return Status
609    */
610   Status prepare_full_tiles_var(
611       const std::string& name,
612       const std::set<uint64_t>& coord_dups,
613       std::vector<Tile>* tiles) const;
614 
615   /**
616    * It prepares the attribute and coordinate tiles, re-organizing the cells
617    * from the user buffers based on the input sorted positions and coordinate
618    * duplicates.
619    *
620    * @param cell_pos The positions that resulted from sorting and
621    *     according to which the cells must be re-arranged.
622    * @param coord_dups The set with the positions
623    *     of duplicate coordinates/cells.
624    * @param tiles The tiles to be created, one vector per attribute or
625    *     coordinate.
626    * @return Status
627    */
628   Status prepare_tiles(
629       const std::vector<uint64_t>& cell_pos,
630       const std::set<uint64_t>& coord_dups,
631       std::unordered_map<std::string, std::vector<Tile>>* tiles) const;
632 
633   /**
634    * It prepares the tiles for the input attribute or dimension, re-organizing
635    * the cells from the user buffers based on the input sorted positions.
636    *
637    * @param name The attribute or dimension to prepare the tiles for.
638    * @param cell_pos The positions that resulted from sorting and
639    *     according to which the cells must be re-arranged.
640    * @param coord_dups The set with the positions
641    *     of duplicate coordinates/cells.
642    * @param tiles The tiles to be created.
643    * @return Status
644    */
645   Status prepare_tiles(
646       const std::string& name,
647       const std::vector<uint64_t>& cell_pos,
648       const std::set<uint64_t>& coord_dups,
649       std::vector<Tile>* tiles) const;
650 
651   /**
652    * It prepares the tiles for the input attribute or dimension, re-organizing
653    * the cells from the user buffers based on the input sorted positions.
654    * Applicable only to fixed-sized attributes or dimensions.
655    *
656    * @param name The attribute or dimension to prepare the tiles for.
657    * @param cell_pos The positions that resulted from sorting and
658    *     according to which the cells must be re-arranged.
659    * @param coord_dups The set with the positions
660    *     of duplicate coordinates/cells.
661    * @param tiles The tiles to be created.
662    * @return Status
663    */
664   Status prepare_tiles_fixed(
665       const std::string& name,
666       const std::vector<uint64_t>& cell_pos,
667       const std::set<uint64_t>& coord_dups,
668       std::vector<Tile>* tiles) const;
669 
670   /**
671    * It prepares the tiles for the input attribute or dimension, re-organizing
672    * the cells from the user buffers based on the input sorted positions.
673    * Applicable only to var-sized attributes or dimensions.
674    *
675    * @param name The attribute to prepare the tiles for.
676    * @param cell_pos The positions that resulted from sorting and
677    *     according to which the cells must be re-arranged.
678    * @param coord_dups The set with the positions
679    *     of duplicate coordinates/cells.
680    * @param tiles The tiles to be created.
681    * @return Status
682    */
683   Status prepare_tiles_var(
684       const std::string& name,
685       const std::vector<uint64_t>& cell_pos,
686       const std::set<uint64_t>& coord_dups,
687       std::vector<Tile>* tiles) const;
688 
689   /** Resets the writer object, rendering it incomplete. */
690   void reset();
691 
692   /**
693    * Sorts the coordinates of the user buffers, creating a vector with
694    * the sorted positions.
695    *
696    * @param cell_pos The sorted cell positions to be created.
697    * @return Status
698    */
699   Status sort_coords(std::vector<uint64_t>* cell_pos) const;
700 
701   /**
702    * Splits the coordinates buffer into separate coordinate
703    * buffers, one per dimension. Note that this will require extra memory
704    * allocation, which will be cleaned up in the class destructor.
705    *
706    * @return Status
707    */
708   Status split_coords_buffer();
709 
710   /**
711    * Writes in unordered layout. Applicable to both dense and sparse arrays.
712    * Explicit coordinates must be provided for this write.
713    */
714   Status unordered_write();
715 
716   /**
717    * Writes an empty cell range to the input tile.
718    * Applicable to **fixed-sized** attributes.
719    *
720    * @param cell_num Number of empty cells to write.
721    * @param cell_val_num Number of values per cell.
722    * @param tile The tile to write to.
723    * @return Status
724    */
725   Status write_empty_cell_range_to_tile(
726       uint64_t num, uint32_t cell_val_num, Tile* tile) const;
727 
728   /**
729    * Writes an empty cell range to the input tile.
730    * Applicable to **fixed-sized** attributes.
731    *
732    * @param cell_num Number of empty cells to write.
733    * @param cell_val_num Number of values per cell.
734    * @param tile The tile to write to.
735    * @param tile_validity The tile with the validity cells to write to.
736    * @return Status
737    */
738   Status write_empty_cell_range_to_tile_nullable(
739       uint64_t num,
740       uint32_t cell_val_num,
741       Tile* tile,
742       Tile* tile_validity) const;
743 
744   /**
745    * Writes an empty cell range to the input tile.
746    * Applicable to **variable-sized** attributes.
747    *
748    * @param num Number of empty values to write.
749    * @param tile The tile offsets to write to.
750    * @param tile_var The tile with the var-sized cells to write to.
751    * @return Status
752    */
753   Status write_empty_cell_range_to_tile_var(
754       uint64_t num, Tile* tile, Tile* tile_var) const;
755 
756   /**
757    * Writes an empty cell range to the input tile.
758    * Applicable to **variable-sized** attributes.
759    *
760    * @param num Number of empty values to write.
761    * @param tile The tile offsets to write to.
762    * @param tile_var The tile with the var-sized cells to write to.
763    * @param tile_validity The tile with the validity cells to write to.
764    * @return Status
765    */
766   Status write_empty_cell_range_to_tile_var_nullable(
767       uint64_t num, Tile* tile, Tile* tile_var, Tile* tile_validity) const;
768 
769   /**
770    * Writes the input cell range to the input tile, for a particular
771    * buffer. Applicable to **fixed-sized** attributes.
772    *
773    * @param buff The write buffer where the cells will be copied from.
774    * @param start The start element in the write buffer.
775    * @param end The end element in the write buffer.
776    * @param tile The tile to write to.
777    * @return Status
778    */
779   Status write_cell_range_to_tile(
780       ConstBuffer* buff, uint64_t start, uint64_t end, Tile* tile) const;
781 
782   /**
783    * Writes the input cell range to the input tile, for a particular
784    * buffer. Applicable to **fixed-sized** attributes.
785    *
786    * @param buff The write buffer where the cells will be copied from.
787    * @param buff_validity The write buffer where the validity cell values will
788    * be copied from.
789    * @param start The start element in the write buffer.
790    * @param end The end element in the write buffer.
791    * @param tile The tile to write to.
792    * @param tile_validity The validity tile to be initialized.
793    * @return Status
794    */
795   Status write_cell_range_to_tile_nullable(
796       ConstBuffer* buff,
797       ConstBuffer* buff_validity,
798       uint64_t start,
799       uint64_t end,
800       Tile* tile,
801       Tile* tile_validity) const;
802 
803   /**
804    * Writes the input cell range to the input tile, for a particular
805    * buffer. Applicable to **variable-sized** attributes.
806    *
807    * @param buff The write buffer where the cell offsets will be copied from.
808    * @param buff_var The write buffer where the cell values will be copied from.
809    * @param start The start element in the write buffer.
810    * @param end The end element in the write buffer.
811    * @param attr_datatype_size The size of each attribute value in `buff_var`.
812    * @param tile The tile offsets to write to.
813    * @param tile_var The tile with the var-sized cells to write to.
814    * @return Status
815    */
816   Status write_cell_range_to_tile_var(
817       ConstBuffer* buff,
818       ConstBuffer* buff_var,
819       uint64_t start,
820       uint64_t end,
821       uint64_t attr_datatype_size,
822       Tile* tile,
823       Tile* tile_var) const;
824 
825   /**
826    * Writes the input cell range to the input tile, for a particular
827    * buffer. Applicable to **variable-sized**, nullable attributes.
828    *
829    * @param buff The write buffer where the cell offsets will be copied from.
830    * @param buff_var The write buffer where the cell values will be copied from.
831    * @param buff_validity The write buffer where the validity cell values will
832    * be copied from.
833    * @param start The start element in the write buffer.
834    * @param end The end element in the write buffer.
835    * @param attr_datatype_size The size of each attribute value in `buff_var`.
836    * @param tile The tile offsets to write to.
837    * @param tile_var The tile with the var-sized cells to write to.
838    * @param tile_validity The validity tile to be initialized.
839    * @return Status
840    */
841   Status write_cell_range_to_tile_var_nullable(
842       ConstBuffer* buff,
843       ConstBuffer* buff_var,
844       ConstBuffer* buff_validity,
845       uint64_t start,
846       uint64_t end,
847       uint64_t attr_datatype_size,
848       Tile* tile,
849       Tile* tile_var,
850       Tile* tile_validity) const;
851 
852   /**
853    * Writes all the input tiles to storage.
854    *
855    * @param tiles Attribute/Coordinate tiles to be written, one element per
856    *     attribute or dimension.
857    * @param tiles Attribute/Coordinate tiles to be written.
858    * @return Status
859    */
860   Status write_all_tiles(
861       tdb_shared_ptr<FragmentMetadata> frag_meta,
862       std::unordered_map<std::string, std::vector<Tile>>* tiles);
863 
864   /**
865    * Writes the input tiles for the input attribute/dimension to storage.
866    *
867    * @param name The attribute/dimension the tiles belong to.
868    * @param frag_meta The fragment metadata.
869    * @param start_tile_id The function will start writing tiles
870    *     with ids in the fragment that start with this value.
871    * @param tiles The tiles to be written.
872    * @param close_files Whether to close the attribute/coordinate
873    *     file in the end of the function call.
874    * @return Status
875    */
876   Status write_tiles(
877       const std::string& name,
878       tdb_shared_ptr<FragmentMetadata> frag_meta,
879       uint64_t start_tile_id,
880       std::vector<Tile>* tiles,
881       bool close_files = true);
882 
883   /**
884    * Returns the i-th coordinates in the coordinate buffers in string
885    * format.
886    */
887   std::string coords_to_str(uint64_t i) const;
888 
889   /**
890    * Invoked on error. It removes the directory of the input URI and
891    * resets the global write state.
892    */
893   void clean_up(const URI& uri);
894 
895   /**
896    * Applicable only to global writes. Returns true if all last tiles stored
897    * in the global write state are empty.
898    */
899   bool all_last_tiles_empty() const;
900 
901   /** Calculates the hilbert values of the input coordinate buffers. */
902   Status calculate_hilbert_values(
903       const std::vector<const QueryBuffer*>& buffs,
904       std::vector<uint64_t>* hilbert_values) const;
905 
906   /**
907    * Prepares, filters and writes dense tiles for the given attribute.
908    *
909    * @tparam T The array domain datatype.
910    * @param name The attribute name.
911    * @param frag_meta The metadata of the new fragment.
912    * @param dense_tiler The dense tiler that will prepare the tiles.
913    * @param thread_num The number of threads to be used for the function.
914    * @param stats Statistics to gather in the function.
915    */
916   template <class T>
917   Status prepare_filter_and_write_tiles(
918       const std::string& name,
919       tdb_shared_ptr<FragmentMetadata> frag_meta,
920       DenseTiler<T>* dense_tiler,
921       uint64_t thread_num);
922 };
923 
924 }  // namespace sm
925 }  // namespace tiledb
926 
927 #endif  // TILEDB_WRITER_H
928