1 /**
2  * @file   domain.h
3  *
4  * @section LICENSE
5  *
6  * The MIT License
7  *
8  * @copyright Copyright (c) 2017-2021 TileDB, Inc.
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  * THE SOFTWARE.
27  *
28  * @section DESCRIPTION
29  *
30  * This file defines class Domain.
31  */
32 
33 #ifndef TILEDB_DOMAIN_H
34 #define TILEDB_DOMAIN_H
35 
36 #include "tiledb/common/macros.h"
37 #include "tiledb/common/status.h"
38 #include "tiledb/sm/misc/types.h"
39 #include "tiledb/sm/query/query_buffer.h"
40 #include "tiledb/sm/query/result_coords.h"
41 
42 #include <vector>
43 
44 using namespace tiledb::common;
45 
46 namespace tiledb {
47 namespace sm {
48 
49 class Buffer;
50 class ConstBuffer;
51 class Dimension;
52 
53 enum class Datatype : uint8_t;
54 enum class Layout : uint8_t;
55 
56 /** Defines an array domain, which consists of dimensions. */
57 class Domain {
58  public:
59   /* ********************************* */
60   /*     CONSTRUCTORS & DESTRUCTORS    */
61   /* ********************************* */
62 
63   /** Empty constructor. */
64   Domain();
65 
66   /**
67    * Constructor that clones the input domain.
68    *
69    * @param domain The object to clone.
70    */
71   explicit Domain(const Domain* domain);
72 
73   /** Copy constructor. */
74   DISABLE_COPY(Domain);
75 
76   /** Move constructor. */
77   Domain(Domain&& rhs);
78 
79   /** Destructor. */
80   ~Domain() = default;
81 
82   /* ********************************* */
83   /*             OPERATORS             */
84   /* ********************************* */
85 
86   /** Copy-assignment operator. */
87   DISABLE_COPY_ASSIGN(Domain);
88 
89   /** Move-assignment operator. */
90   Domain& operator=(Domain&& rhs);
91 
92   /* ********************************* */
93   /*                 API               */
94   /* ********************************* */
95 
96   /**
97    * Adds a dimension to the domain.
98    *
99    * @param dim The dimension to be added.
100    * @return Status
101    */
102   Status add_dimension(const Dimension* dim);
103 
104   /** Returns true if all dimensions have fixed-sized domain datatypes. */
105   bool all_dims_fixed() const;
106 
107   /** Returns true if all dimensions have integer domain types. */
108   bool all_dims_int() const;
109 
110   /** Returns true if all dimensions have string domain types. */
111   bool all_dims_string() const;
112 
113   /** Returns true if all dimensions have real domain types. */
114   bool all_dims_real() const;
115 
116   /** Returns true if all dimensions have the same type. */
117   bool all_dims_same_type() const;
118 
119   /** Returns the number of cells per tile (only for the dense case). */
120   uint64_t cell_num_per_tile() const;
121 
122   /**
123    * Checks the cell order of the input coordinates. Note that, in the presence
124    * of a regular tile grid, this function assumes that the cells are in the
125    * same regular tile.
126    *
127    * @tparam T The coordinates type.
128    * @param dim_idx The dimension to compare the coordinates on.
129    * @param a The first input coordinates.
130    * @param b The second input coordinates.
131    * @return One of the following:
132    *    - -1 if the first coordinate precedes the second
133    *    -  0 if the two coordinates are identical
134    *    - +1 if the first coordinate succeeds the second
135    */
136   int cell_order_cmp(
137       unsigned dim_idx, const ResultCoords& a, const ResultCoords& b) const;
138 
139   /**
140    * Checks the cell order of the input coordinates. Since the coordinates
141    * are given for a single dimension, this function simply checks which
142    * coordinate is larger.
143    *
144    * @param coord_a The first coordinate.
145    * @param coord_b The second coordinate.
146    * @return One of the following:
147    *    - -1 if the first coordinate is smaller than the second
148    *    -  0 if the two coordinates have the same cell order
149    *    - +1 if the first coordinate is larger than the second
150    */
151   template <class T>
152   static int cell_order_cmp_2(const void* coord_a, const void* coord_b);
153 
154   /**
155    * Checks the cell order of the input coordinates. Since the coordinates
156    * are given for a single dimension, this function simply checks which
157    * coordinate is larger.
158    *
159    * @param dim The dimension to check the coordinates on.
160    * @param buff The buffer that stores all coordinates.
161    * @param a The position of the first coordinate in the buffer to check.
162    * @param b The position of the second coordinate in the buffer to check.
163    * @return One of the following:
164    *    - -1 if the first coordinate is smaller than the second
165    *    -  0 if the two coordinates have the same cell order
166    *    - +1 if the first coordinate is larger than the second
167    */
168   template <class T>
169   static int cell_order_cmp(
170       const Dimension* dim, const QueryBuffer* buff, uint64_t a, uint64_t b);
171 
172   /**
173    * Checks the cell order of the input coordinates.
174    *
175    * @param coord_buffs The input coordinates, given n separate buffers,
176    *     one per dimension. The buffers are sorted in the same order of the
177    *     dimensions as defined in the array schema.
178    * @param a The position of the first coordinate tuple across all buffers.
179    * @param b The position of the second coordinate tuple across all buffers.
180    * @return One of the following:
181    *    - -1 if the first coordinates precede the second on the cell order
182    *    -  0 if the two coordinates have the same cell order
183    *    - +1 if the first coordinates succeed the second on the cell order
184    */
185   int cell_order_cmp(
186       const std::vector<const QueryBuffer*>& coord_buffs,
187       uint64_t a,
188       uint64_t b) const;
189 
190   /**
191    * Populates the object members from the data in the input binary buffer.
192    *
193    * @param buff The buffer to deserialize from.
194    * @param version The array schema version.
195    * @return Status
196    */
197   Status deserialize(ConstBuffer* buff, uint32_t version);
198 
199   /** Returns the cell order. */
200   Layout cell_order() const;
201 
202   /**
203    * Crops the input ND range such that it does not exceed the array domain.
204    */
205   void crop_ndrange(NDRange* ndrange) const;
206 
207   /** Returns the tile order. */
208   Layout tile_order() const;
209 
210   /** Returns the number of dimensions. */
211   unsigned int dim_num() const;
212 
213   /** Returns the domain along the i-th dimension. */
214   const Range& domain(unsigned i) const;
215 
216   /** Returns the domain as a N-dimensional range object. */
217   NDRange domain() const;
218 
219   /** Returns the i-th dimensions (nullptr upon error). */
220   const Dimension* dimension(unsigned int i) const;
221 
222   /** Returns the dimension given a name (nullptr upon error). */
223   const Dimension* dimension(const std::string& name) const;
224 
225   /** Dumps the domain in ASCII format in the selected output. */
226   void dump(FILE* out) const;
227 
228   /** Expands ND range `r2` using ND range `r1`. */
229   void expand_ndrange(const NDRange& r1, NDRange* r2) const;
230 
231   /**
232    * Expands the input domain such that it coincides with the boundaries of
233    * the array's regular tiles (i.e., it maps it on the regular tile grid).
234    * If the array has no regular tile grid or real domain, the function
235    * does not do anything.
236    */
237   void expand_to_tiles(NDRange* ndrange) const;
238 
239   /**
240    * Retrieves the tile coordinates of the input cell coordinates.
241    *
242    * @tparam T The domain type.
243    * @param coords The cell coordinates.
244    * @param tile_coords The tile coordinates of the cell coordinates to
245    *     be retrieved.
246    */
247   template <class T>
248   void get_tile_coords(const T* coords, T* tile_coords) const;
249 
250   /**
251    * Retrieves the end of a cell slab starting at the `start` input
252    * coordinates. The cell slab is determined based on the domain
253    * tile/cell order and the input query `layout`. Essentially a
254    * cell slab is a contiguous (in the global cell order) range of
255    * cells that follow the query layout.
256    *
257    * @tparam T The domain type.
258    * @param subarray The subarray in which the end of the cell slab must
259    *     be contained.
260    * @param start The start coordinates.
261    * @param layout The query layout.
262    * @param end The cell slab end coordinates to be retrieved.
263    */
264   template <class T>
265   void get_end_of_cell_slab(T* subarray, T* start, Layout layout, T* end) const;
266 
267   /**
268    * Retrieves the next tile coordinates along the array tile order within a
269    * given tile domain. Applicable only to **dense** arrays.
270    *
271    * @tparam T The coordinates type.
272    * @param domain The targeted domain.
273    * @param tile_coords The input tile coordinates, which the function modifies
274    *     to store the next tile coordinates at termination.
275    * @return void
276    */
277   template <class T>
278   void get_next_tile_coords(const T* domain, T* tile_coords) const;
279 
280   /**
281    * Retrieves the next tile coordinates along the array tile order within a
282    * given tile domain. Applicable only to **dense** arrays.
283    *
284    * @tparam T The coordinates type.
285    * @param domain The targeted domain.
286    * @param tile_coords The input tile coordinates, which the function modifies
287    *     to store the next tile coordinates at termination.
288    * @param in Set to `true` if the retrieve coords are inside the domain.
289    * @return void
290    */
291   template <class T>
292   void get_next_tile_coords(const T* domain, T* tile_coords, bool* in) const;
293 
294   /**
295    * Gets the tile domain of the input cell `subarray`.
296    *
297    * @tparam T The domain type.
298    * @param subarray The input (cell) subarray.
299    * @param tile_subarray The tile subarray in the tile domain to be retrieved.
300    * @return void
301    */
302   template <class T>
303   void get_tile_domain(const T* subarray, T* tile_subarray) const;
304 
305   /**
306    * Returns the tile position along the array tile order within the input
307    * domain. Applicable only to **dense** arrays.
308    *
309    * @tparam T The domain type.
310    * @param domain The input domain, which is a cell domain partitioned into
311    *     regular tiles in the same manner as that of the array domain (however
312    *     *domain* may be a sub-domain of the array domain).
313    * @param tile_coords The tile coordinates, normalized inside the tile
314    *     domain of cell `domain`.
315    * @return The tile position of *tile_coords* along the tile order of the
316    *     array inside the input domain.
317    */
318   template <class T>
319   uint64_t get_tile_pos(const T* domain, const T* tile_coords) const;
320 
321   /**
322    * Gets the tile subarray for the input tile coordinates.
323    *
324    * @tparam T The coordinates type.
325    * @param tile_coords The input tile coordinates.
326    * @param tile_subarray The output tile subarray.
327    * @return void.
328    */
329   template <class T>
330   void get_tile_subarray(const T* tile_coords, T* tile_subarray) const;
331 
332   /**
333    * Gets the tile subarray for the input tile coordinates. The tile
334    * coordinates are with respect to the input `domain`.
335    *
336    * @tparam T The coordinates type.
337    * @param domain The domain `tile_coords` are in reference to.
338    * @param tile_coords The input tile coordinates.
339    * @param tile_subarray The output tile subarray.
340    * @return void.
341    */
342   template <class T>
343   void get_tile_subarray(
344       const T* domain, const T* tile_coords, T* tile_subarray) const;
345 
346   /**
347    * Checks if the domain has a dimension of the given name.
348    *
349    * @param name Name of dimension to check for
350    * @param has_dim Set to true if the domain has a dimension of the given name.
351    * @return Status
352    */
353   Status has_dimension(const std::string& name, bool* has_dim) const;
354 
355   /**
356    * Gets the index in the domain of a given dimension name
357    *
358    * @param name Name of dimension to check for
359    * @param dim_idx The index of this dimension in the domain
360    * @return Status
361    */
362   Status get_dimension_index(const std::string& name, unsigned* dim_idx) const;
363 
364   /**
365    * Initializes the domain.
366    *
367    * @param cell_order The cell order of the array the domain belongs to.
368    * @param tile_order The cell order of the array the domain belongs to.
369    * @return Status
370    */
371   Status init(Layout cell_order, Layout tile_order);
372 
373   /** Returns true if at least one dimension has null tile extent. */
374   bool null_tile_extents() const;
375 
376   /**
377    * Serializes the object members into a binary buffer.
378    *
379    * @param buff The buffer to serialize the data into.
380    * @param version The array schema version.
381    * @return Status
382    */
383   Status serialize(Buffer* buff, uint32_t version);
384 
385   /**
386    * For every dimension that has a null tile extent, it sets
387    * the tile extent to that dimension domain range.
388    */
389   Status set_null_tile_extents_to_range();
390 
391   /**
392    * Based on the input subarray layout and the domain's cell layout
393    * and tile extents, this returns the number of cells that each
394    * pair of adjacent cells in a result cell slab (produced during the
395    * read algorithm for that subarray) are apart. If it is
396    * `UINT64_MAX`, then the cells in the result cell slabs are contiguous.
397    */
398   template <class T>
399   uint64_t stride(Layout subarray_layout) const;
400 
401   /** Returns the tile extent along the i-th dimension. */
402   const ByteVecValue& tile_extent(unsigned i) const;
403 
404   /** Returns the tile extents. */
405   std::vector<ByteVecValue> tile_extents() const;
406 
407   /**
408    * Returns the number of tiles intersecting the input ND range.
409    * Returns 0 if even a single dimension has non-integral type.
410    */
411   uint64_t tile_num(const NDRange& ndrange) const;
412 
413   /**
414    * Returns the number of cells in the input range.
415    * If there is an overflow, then the function returns MAX_UINT64.
416    * If at least one dimension had a non-integer domain, the
417    * function returns MAX_UINT64.
418    */
419   uint64_t cell_num(const NDRange& ndrange) const;
420 
421   /** Returns true if r1 is fully covered by r2. */
422   bool covered(const NDRange& r1, const NDRange& r2) const;
423 
424   /** Returns true if the two ND ranges overlap. */
425   bool overlap(const NDRange& r1, const NDRange& r2) const;
426 
427   /**
428    * Return ratio of the overalp of the two input ND ranges over
429    * the volume of `r2`.
430    */
431   double overlap_ratio(const NDRange& r1, const NDRange& r2) const;
432 
433   /**
434    * Checks the tile order of the input coordinates on the given dimension.
435    *
436    * @param The dimension to compare on.
437    * @param coord_a The first coordinate.
438    * @param coord_b The second coordinate.
439    * @return One of the following:
440    *    - -1 if the first coordinate precedes the second on the tile order
441    *    -  0 if the two coordinates have the same tile order
442    *    - +1 if the first coordinate succeeds the second on the tile order
443    */
444   template <class T>
445   static int tile_order_cmp(
446       const Dimension* dim, const void* coord_a, const void* coord_b);
447 
448   /**
449    * Checks the tile order of the input coordinates.
450    *
451    * @param coord_buffs The input coordinates, given n separate buffers,
452    *     one per dimension. The buffers are sorted in the same order of the
453    *     dimensions as defined in the array schema.
454    * @param a The position of the first coordinate tuple across all buffers.
455    * @param b The position of the second coordinate tuple across all buffers.
456    * @return One of the following:
457    *    - -1 if the first coordinates precede the second on the tile order
458    *    -  0 if the two coordinates have the same tile order
459    *    - +1 if the first coordinates succeed the second on the tile order
460    */
461   int tile_order_cmp(
462       const std::vector<const QueryBuffer*>& coord_buffs,
463       uint64_t a,
464       uint64_t b) const;
465 
466   /**
467    * Checks the tile order of the input coordinates for a given dimension.
468    *
469    * @param dim_idx The index of the dimension to focus on.
470    * @param coord_a The first coordinate on the given dimension.
471    * @param coord_b The second coordinate on the given dimension.
472    * @return One of the following:
473    *    - -1 if the first coordinate precedes the second on the tile order
474    *    -  0 if the two coordinates have the same tile order
475    *    - +1 if the first coordinate succeeds the second on the tile order
476    */
477   int tile_order_cmp(
478       unsigned dim_idx, const void* coord_a, const void* coord_b) const;
479 
480  private:
481   /* ********************************* */
482   /*         PRIVATE ATTRIBUTES        */
483   /* ********************************* */
484 
485   /** The number of cells per tile. Meaningful only for the **dense** case. */
486   uint64_t cell_num_per_tile_;
487 
488   /** The cell order of the array the domain belongs to. */
489   Layout cell_order_;
490 
491   /** The domain dimensions. */
492   std::vector<tdb_unique_ptr<Dimension>> dimensions_;
493 
494   /** The number of dimensions. */
495   unsigned dim_num_;
496 
497   /** The tile order of the array the domain belongs to. */
498   Layout tile_order_;
499 
500   /**
501    * Vector of functions, one per dimension, for comparing the cell order of
502    * two coordinates. The inputs to the function are:
503    *
504    * - dim: The dimension to check the coordinates on.
505    * - buff: The buffer that stores all coorinates;
506    * - a, b: The positions of the two coordinates in the buffer to compare.
507    */
508   std::vector<int (*)(
509       const Dimension* dim, const QueryBuffer* buff, uint64_t a, uint64_t b)>
510       cell_order_cmp_func_;
511 
512   /**
513    * Vector of functions, one per dimension, for comparing the cell order of
514    * two coordinates. The inputs to the function are:
515    *
516    * - coord_a, coord_b: The two coordinates to compare.
517    */
518   std::vector<int (*)(const void* coord_a, const void* coord_b)>
519       cell_order_cmp_func_2_;
520 
521   /**
522    * Vector of functions, one per dimension, for comparing the tile order of
523    * two coordinates. The inputs to the function are:
524    *
525    * - dim: The dimension to compare on.
526    * - coord_a, coord_b: The two coordinates to compare.
527    */
528   std::vector<int (*)(
529       const Dimension* dim, const void* coord_a, const void* coord_b)>
530       tile_order_cmp_func_;
531 
532   /* ********************************* */
533   /*           PRIVATE METHODS         */
534   /* ********************************* */
535 
536   /** Compute the number of cells per tile. */
537   void compute_cell_num_per_tile();
538 
539   /**
540    * Compute the number of cells per tile.
541    *
542    * @tparam T The coordinates type.
543    * @return void
544    */
545   template <class T>
546   void compute_cell_num_per_tile();
547 
548   /** Prepares the comparator functions for each dimension. */
549   void set_tile_cell_order_cmp_funcs();
550 
551   /**
552    * Retrieves the next tile coordinates along the array tile order within a
553    * given tile domain. Applicable only to **dense** arrays, and focusing on
554    * the **column-major** tile order.
555    *
556    * @tparam T The coordinates type.
557    * @param domain The targeted domain.
558    * @param tile_coords The input tile coordinates, which the function modifies
559    *     to store the next tile coordinates at termination.
560    * @return void
561    */
562   template <class T>
563   void get_next_tile_coords_col(const T* domain, T* tile_coords) const;
564 
565   /**
566    * Retrieves the next tile coordinates along the array tile order within a
567    * given tile domain. Applicable only to **dense** arrays, and focusing on
568    * the **column-major** tile order.
569    *
570    * @tparam T The coordinates type.
571    * @param domain The targeted domain.
572    * @param tile_coords The input tile coordinates, which the function modifies
573    *     to store the next tile coordinates at termination.
574    * @param in Set to `true` if the retrieved coords are inside the domain.
575    * @return void
576    */
577   template <class T>
578   void get_next_tile_coords_col(
579       const T* domain, T* tile_coords, bool* in) const;
580 
581   /**
582    * Retrieves the next tile coordinates along the array tile order within a
583    * given tile domain. Applicable only to **dense** arrays, and focusing on
584    * the **row-major** tile order.
585    *
586    * @tparam T The coordinates type.
587    * @param domain The targeted domain.
588    * @param tile_coords The input tile coordinates, which the function modifies
589    *     to store the next tile coordinates at termination.
590    * @return void
591    */
592   template <class T>
593   void get_next_tile_coords_row(const T* domain, T* tile_coords) const;
594 
595   /**
596    * Retrieves the next tile coordinates along the array tile order within a
597    * given tile domain. Applicable only to **dense** arrays, and focusing on
598    * the **row-major** tile order.
599    *
600    * @tparam T The coordinates type.
601    * @param domain The targeted domain.
602    * @param tile_coords The input tile coordinates, which the function modifies
603    *     to store the next tile coordinates at termination.
604    * @param in Set to `true` if the retrieved coords are inside the domain.
605    * @return void
606    */
607   template <class T>
608   void get_next_tile_coords_row(
609       const T* domain, T* tile_coords, bool* in) const;
610 
611   /**
612    * Returns the tile position along the array tile order within the input
613    * domain. Applicable only to **dense** arrays, and focusing on the
614    * **column-major** tile order.
615    *
616    * @tparam T The domain type.
617    * @param domain The input domain, which is a cell domain partitioned into
618    *     regular tiles in the same manner as that of the array domain
619    *     (however *domain* may be a sub-domain of the array domain).
620    * @param tile_coords The tile coordinates.
621    * @return The tile position of *tile_coords* along the tile order of the
622    *     array inside the input domain.
623    */
624   template <class T>
625   uint64_t get_tile_pos_col(const T* domain, const T* tile_coords) const;
626 
627   /**
628    * Returns the tile position along the array tile order within the input
629    * domain. Applicable only to **dense** arrays, and focusing on the
630    * **row-major** tile order.
631    *
632    * @tparam T The domain type.
633    * @param domain The input domain, which is a cell domain partitioned into
634    *     regular tiles in the same manner as that of the array domain (however
635    *     *domain* may be a sub-domain of the array domain).
636    * @param tile_coords The tile coordinates.
637    * @return The tile position of *tile_coords* along the tile order of the
638    *     array inside the input domain.
639    */
640   template <class T>
641   uint64_t get_tile_pos_row(const T* domain, const T* tile_coords) const;
642 };
643 
644 }  // namespace sm
645 }  // namespace tiledb
646 
647 #endif  // TILEDB_DOMAIN_H
648