1 /**
2  * @file   dimension.h
3  *
4  * @section LICENSE
5  *
6  * The MIT License
7  *
8  * @copyright Copyright (c) 2017-2021 TileDB, Inc.
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  * THE SOFTWARE.
27  *
28  * @section DESCRIPTION
29  *
30  * This file defines class Dimension.
31  */
32 
33 #ifndef TILEDB_DIMENSION_H
34 #define TILEDB_DIMENSION_H
35 
36 #include <bitset>
37 #include <cmath>
38 #include <iomanip>
39 #include <iostream>
40 #include <sstream>
41 #include <string>
42 
43 #include "tiledb/common/blank.h"
44 #include "tiledb/common/logger_public.h"
45 #include "tiledb/common/status.h"
46 #include "tiledb/sm/misc/types.h"
47 #include "tiledb/sm/misc/utils.h"
48 #include "tiledb/sm/query/query_buffer.h"
49 #include "tiledb/sm/query/result_coords.h"
50 #include "tiledb/sm/tile/tile.h"
51 
52 using namespace tiledb::common;
53 
54 namespace tiledb {
55 namespace sm {
56 
57 class Buffer;
58 class ConstBuffer;
59 class FilterPipeline;
60 
61 enum class Compressor : uint8_t;
62 enum class Datatype : uint8_t;
63 
64 /** Manipulates a TileDB dimension. */
65 class Dimension {
66  public:
67   /* ********************************* */
68   /*     CONSTRUCTORS & DESTRUCTORS    */
69   /* ********************************* */
70 
71   /**
72    * No default constructor by C.41
73    */
74   Dimension() = delete;
75 
76   /**
77    * Constructor.
78    *
79    * @param name The name of the dimension.
80    * @param type The type of the dimension.
81    */
82   Dimension(const std::string& name, Datatype type);
83 
84   /**
85    * Constructor. It clones the input.
86    *
87    * @param dim The dimension to clone.
88    */
89   explicit Dimension(const Dimension* dim);
90 
91   /** Copy constructor. */
92   DISABLE_COPY(Dimension);
93 
94   /** Destructor. */
95   ~Dimension() = default;
96 
97   /* ********************************* */
98   /*             OPERATORS             */
99   /* ********************************* */
100 
101   /** Copy-assignment operator. */
102   DISABLE_COPY_ASSIGN(Dimension);
103 
104   /* ********************************* */
105   /*                API                */
106   /* ********************************* */
107 
108   /** Returns the number of values per coordinate. */
109   unsigned cell_val_num() const;
110 
111   /** Sets the number of values per coordinate. */
112   Status set_cell_val_num(unsigned int cell_val_num);
113 
114   /** Returns the size (in bytes) of a coordinate in this dimension. */
115   uint64_t coord_size() const;
116 
117   /**
118    *  Returns a coordinate in string format.
119    *
120    * @param buff The query buffer that contains all coordinates.
121    * @param i The position of the coordinate in the buffer.
122    * @return The coordinate in string format.
123    */
124   std::string coord_to_str(const QueryBuffer& buff, uint64_t i) const;
125 
126   /**
127    * Populates the object members from the data in the input binary buffer.
128    *
129    * @param buff The buffer to deserialize from.
130    * @param type The type of the dimension.
131    * @param version The array schema version.
132    * @return Status
133    */
134   Status deserialize(ConstBuffer* buff, uint32_t version, Datatype type);
135 
136   /** Returns the domain. */
137   const Range& domain() const;
138 
139   /** Dumps the dimension contents in ASCII form in the selected output. */
140   void dump(FILE* out) const;
141 
142   /** Returns the filter pipeline of this dimension. */
143   const FilterPipeline& filters() const;
144 
145   /** Returns the dimension name. */
146   const std::string& name() const;
147 
148   /**
149    *  Returns the tile index for integer values.
150    *
151    * @param v The value.
152    * @param domain_low The minimum value for the domain.
153    * @param tile_extent The tile extent.
154    * @return The index of the tile.
155    */
156   template <
157       class T,
158       typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
tile_idx(const T & v,const T & domain_low,const T & tile_extent)159   static uint64_t tile_idx(
160       const T& v, const T& domain_low, const T& tile_extent) {
161     typedef typename std::make_unsigned<T>::type unsigned_t;
162     return ((unsigned_t)v - (unsigned_t)domain_low) / (unsigned_t)tile_extent;
163   }
164 
165   /**
166    *  Returns the tile index for floating point values.
167    *
168    * @param v The value.
169    * @param domain_low The minimum value for the domain.
170    * @param tile_extent The tile extent.
171    * @return The index of the tile.
172    */
173   template <
174       class T,
175       typename std::enable_if<!std::is_integral<T>::value>::type* = nullptr>
tile_idx(const T & v,const T & domain_low,const T & tile_extent)176   static uint64_t tile_idx(
177       const T& v, const T& domain_low, const T& tile_extent) {
178     return (v - domain_low) / tile_extent;
179   }
180 
181   /**
182    *  Rounds the value down to the tile boundary for integer values.
183    *
184    * @param v The value.
185    * @param domain_low The minimum value for the domain.
186    * @param tile_extent The tile extent.
187    * @return The value rounded down to the tile boundary.
188    */
189   template <
190       class T,
191       typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
round_to_tile(const T & v,const T & domain_low,const T & tile_extent)192   static T round_to_tile(
193       const T& v, const T& domain_low, const T& tile_extent) {
194     typedef typename std::make_unsigned<T>::type unsigned_t;
195     return ((unsigned_t)v - (unsigned_t)domain_low) / (unsigned_t)tile_extent *
196                (unsigned_t)tile_extent +
197            (unsigned_t)domain_low;
198   }
199 
200   /**
201    *  Rounds the value down to the tile boundary for floating point values.
202    *
203    * @param v The value.
204    * @param domain_low The minimum value for the domain.
205    * @param tile_extent The tile extent.
206    * @return The value rounded down to the tile boundary.
207    */
208   template <
209       class T,
210       typename std::enable_if<!std::is_integral<T>::value>::type* = nullptr>
round_to_tile(const T & v,const T & domain_low,const T & tile_extent)211   static T round_to_tile(
212       const T& v, const T& domain_low, const T& tile_extent) {
213     return floor((v - domain_low) / tile_extent) * tile_extent + domain_low;
214   }
215 
216   /**
217    * Returns the tile lower coordinate for integer values.
218    *
219    * @param tile_num The tile index.
220    * @param domain_low The minimum value for the domain.
221    * @param tile_extent The tile extent.
222    * @return The tile lower coordinate.
223    */
224   template <
225       class T,
226       typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
tile_coord_low(uint64_t tile_num,const T & domain_low,const T & tile_extent)227   static T tile_coord_low(
228       uint64_t tile_num, const T& domain_low, const T& tile_extent) {
229     typedef typename std::make_unsigned<T>::type unsigned_t;
230     return (unsigned_t)domain_low + tile_num * (unsigned_t)tile_extent;
231   }
232 
233   /**
234    * Returns the tile lower coordinate for floating point values.
235    *
236    * @param tile_num The tile index.
237    * @param domain_low The minimum value for the domain.
238    * @param tile_extent The tile extent.
239    * @return The tile lower coordinate.
240    */
241   template <
242       class T,
243       typename std::enable_if<!std::is_integral<T>::value>::type* = nullptr>
tile_coord_low(uint64_t tile_num,const T & domain_low,const T & tile_extent)244   static T tile_coord_low(
245       uint64_t tile_num, const T& domain_low, const T& tile_extent) {
246     return domain_low + tile_num * tile_extent;
247   }
248 
249   /**
250    * Returns the tile upper coordinate for integer values.
251    *
252    * @param tile_num The tile index.
253    * @param domain_low The minimum value for the domain.
254    * @param tile_extent The tile extent.
255    * @return The tile upper coordinate.
256    */
257   template <
258       class T,
259       typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
tile_coord_high(uint64_t tile_num,const T & domain_low,const T & tile_extent)260   static T tile_coord_high(
261       uint64_t tile_num, const T& domain_low, const T& tile_extent) {
262     typedef typename std::make_unsigned<T>::type unsigned_t;
263     if ((unsigned_t)tile_extent == std::numeric_limits<unsigned_t>::max())
264       return std::numeric_limits<T>::max() -
265              (domain_low == std::numeric_limits<T>::min());
266     return (unsigned_t)domain_low + ++tile_num * (unsigned_t)tile_extent - 1;
267   }
268 
269   /**
270    * Returns the tile upper coordinate for floating point values.
271    *
272    * @param tile_num The tile index.
273    * @param domain_low The minimum value for the domain.
274    * @param tile_extent The tile extent.
275    * @return The tile upper coordinate.
276    */
277   template <
278       class T,
279       typename std::enable_if<!std::is_integral<T>::value>::type* = nullptr>
tile_coord_high(uint64_t tile_num,const T & domain_low,const T & tile_extent)280   static T tile_coord_high(
281       uint64_t tile_num, const T& domain_low, const T& tile_extent) {
282     return std::nextafter(
283         domain_low + ++tile_num * tile_extent, std::numeric_limits<T>::min());
284   }
285 
286   /**
287    * Used to multiply values by the tile extent for integer values.
288    *
289    * @param v The value to multiply.
290    * @param tile_extent The tile extent.
291    * @return The result of the multiplication.
292    */
293   template <
294       class T,
295       typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
tile_extent_mult(const T & v,const T & tile_extent)296   static T tile_extent_mult(const T& v, const T& tile_extent) {
297     typedef typename std::make_unsigned<T>::type unsigned_t;
298     return (unsigned_t)v * (unsigned_t)tile_extent;
299   }
300 
301   /**
302    * Used to multiply values by the tile extent for floating point values.
303    *
304    * @param v The value to multiply.
305    * @param tile_extent The tile extent.
306    * @return The result of the multiplication.
307    */
308   template <
309       class T,
310       typename std::enable_if<!std::is_integral<T>::value>::type* = nullptr>
tile_extent_mult(const T & v,const T & tile_extent)311   static T tile_extent_mult(const T& v, const T& tile_extent) {
312     return v * tile_extent;
313   }
314 
315   /**
316    * Retrieves the value `v` that lies at the end (ceil) of the tile
317    * that is `tile_num` tiles apart from the beginning of `r`.
318    */
319   void ceil_to_tile(const Range& r, uint64_t tile_num, ByteVecValue* v) const;
320 
321   /**
322    * Returns the value that lies at the end (ceil) of the tile
323    * that is `tile_num` tiles apart from the beginning of `r`.
324    */
325   template <class T>
326   static void ceil_to_tile(
327       const Dimension* dim, const Range& r, uint64_t tile_num, ByteVecValue* v);
328 
329   /**
330    * Performs correctness checks on the input range.
331    *
332    * Specifically, it checks
333    *     - if the lower range bound is larger than the upper
334    *     - if the range falls outside the dimension domain
335    *     - for real domains, if any range bound is NaN
336    *
337    */
338   Status check_range(const Range& range) const;
339 
340   /**
341    * Adjust a range so that the upper/lower bounds are within the dimension's
342    * domain.
343    * @param range Query range object that might be mutated
344    * @return status if error
345    */
346   Status adjust_range_oob(Range* range) const;
347 
348   /**
349    * Performs correctness checks on the input range. Returns `true`
350    * upon error and stores an error message to `err_msg`.
351    *
352    * Applicable to integral domains.
353    */
354   template <
355       typename T,
356       typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
check_range(const Dimension * dim,const Range & range,std::string * err_msg)357   static bool check_range(
358       const Dimension* dim, const Range& range, std::string* err_msg) {
359     auto domain = (const T*)dim->domain().data();
360     auto r = (const T*)range.data();
361 
362     // Check range bounds
363     if (r[0] > r[1]) {
364       std::stringstream ss;
365       ss << "Cannot add range to dimension; Lower range "
366          << "bound " << r[0] << " cannot be larger than the higher bound "
367          << r[1];
368       *err_msg = ss.str();
369       return false;
370     }
371 
372     // Check out-of-bounds
373     if (r[0] < domain[0] || r[1] > domain[1]) {
374       std::stringstream ss;
375       ss << "Range [" << r[0] << ", " << r[1] << "] is out of domain bounds ["
376          << domain[0] << ", " << domain[1] << "] on dimension '" << dim->name()
377          << "'";
378       *err_msg = ss.str();
379       return false;
380     }
381 
382     return true;
383   }
384 
385   /**
386    * Performs correctness checks on the input range. Returns `true`
387    * upon error and stores an error message to `err_msg`.
388    *
389    * Applicable to real domains.
390    */
391   template <
392       typename T,
393       typename std::enable_if<!std::is_integral<T>::value>::type* = nullptr>
check_range(const Dimension * dim,const Range & range,std::string * err_msg)394   static bool check_range(
395       const Dimension* dim, const Range& range, std::string* err_msg) {
396     auto domain = (const T*)dim->domain().data();
397     auto r = (const T*)range.data();
398 
399     // Check for NaN
400     if (std::isnan(r[0]) || std::isnan(r[1])) {
401       *err_msg = "Cannot add range to dimension; Range contains NaN";
402       return false;
403     }
404 
405     // Check range bounds
406     if (r[0] > r[1]) {
407       std::stringstream ss;
408       ss << "Cannot add range to dimension; Lower range "
409          << "bound " << r[0] << " cannot be larger than the higher bound "
410          << r[1];
411       *err_msg = ss.str();
412       return false;
413     }
414 
415     // Check out-of-bounds
416     if (r[0] < domain[0] || r[1] > domain[1]) {
417       std::stringstream ss;
418       ss << "Range [" << r[0] << ", " << r[1] << "] is out of domain bounds ["
419          << domain[0] << ", " << domain[1] << "] on dimension '" << dim->name()
420          << "'";
421       *err_msg = ss.str();
422 
423       return false;
424     }
425 
426     return true;
427   }
428 
429   /**
430    * Takes a range from a query and might mutate it so the lower/upper values
431    * are within the domain of the dimension. If mutation occurs a warning is
432    * logged
433    *
434    * @tparam T datatype
435    * @param dim dimension object to get domain from
436    * @param range Query range objects to mutate
437    */
438   template <
439       typename T,
440       typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
adjust_range_oob(const Dimension * dim,const Range * range)441   static void adjust_range_oob(const Dimension* dim, const Range* range) {
442     auto domain = (const T*)dim->domain().data();
443     auto r = (T*)range->data();
444 
445     // Check out-of-bounds
446     if (r[0] < domain[0]) {
447       std::stringstream ss;
448       ss << "Range lower bound " << r[0] << " is out of domain bounds ["
449          << domain[0] << ", " << domain[1]
450          << "]. Adjusting range lower bound to be " << domain[0]
451          << " on dimension '" << dim->name() << "'";
452       LOG_WARN(ss.str());
453 
454       r[0] = domain[0];
455     }
456 
457     if (r[1] > domain[1]) {
458       std::stringstream ss;
459       ss << "Range upper bound " << r[1] << " is out of domain bounds ["
460          << domain[0] << ", " << domain[1]
461          << "]. Adjusting range upper bound to be " << domain[1]
462          << " on dimension '" << dim->name() << "'";
463       LOG_WARN(ss.str());
464 
465       r[1] = domain[1];
466     }
467   }
468 
469   /**
470    * Takes a range from a query and might mutate it so the lower/upper values
471    * are within the domain of the dimension. If mutation occurs a warning is
472    * logged
473    *
474    * @tparam T datatype
475    * @param dim dimension object to get domain from
476    * @param range Query range objects to mutate
477    */
478   template <
479       typename T,
480       typename std::enable_if<!std::is_integral<T>::value>::type* = nullptr>
adjust_range_oob(const Dimension * dim,const Range * range)481   static void adjust_range_oob(const Dimension* dim, const Range* range) {
482     auto domain = (const T*)dim->domain().data();
483     auto r = (T*)range->data();
484 
485     // Check out-of-bounds
486     if (r[0] < domain[0]) {
487       std::stringstream ss;
488       ss << "Range lower bound " << r[0] << " is out of domain bounds ["
489          << domain[0] << ", " << domain[1]
490          << "]. Adjusting range lower bound to be " << domain[0]
491          << " on dimension '" << dim->name() << "'";
492       LOG_WARN(ss.str());
493 
494       r[0] = domain[0];
495     }
496 
497     if (r[1] > domain[1]) {
498       std::stringstream ss;
499       ss << "Range upper bound " << r[1] << " is out of domain bounds ["
500          << domain[0] << ", " << domain[1]
501          << "]. Adjusting range upper bound to be " << domain[1]
502          << " on dimension '" << dim->name() << "'";
503       LOG_WARN(ss.str());
504 
505       r[1] = domain[1];
506     }
507   }
508 
509   /** Returns true if the input range coincides with tile boundaries. */
510   bool coincides_with_tiles(const Range& r) const;
511 
512   /** Returns true if the input range coincides with tile boundaries. */
513   template <class T>
514   static bool coincides_with_tiles(const Dimension* dim, const Range& r);
515 
516   /**
517    * Computes the minimum bounding range of the values stored in
518    * `tile`. Applicable only to fixed-size dimensions.
519    */
520   Status compute_mbr(const Tile& tile, Range* mbr) const;
521 
522   /**
523    * Computed the minimum bounding range of the values stored in
524    * `tile`.
525    */
526   template <class T>
527   static Status compute_mbr(const Tile& tile, Range* mbr);
528 
529   /**
530    * Computes the minimum bounding range of the values stored in
531    * `tile_val`. Applicable only to var-sized dimensions.
532    */
533   Status compute_mbr_var(
534       const Tile& tile_off, const Tile& tile_val, Range* mbr) const;
535 
536   /**
537    * Computes the minimum bounding range of the values stored in
538    * `tile_val`. Applicable only to var-sized dimensions.
539    */
540   template <class T>
541   static Status compute_mbr_var(
542       const Tile& tile_off, const Tile& tile_val, Range* mbr);
543 
544   /**
545    * Crops the input 1D range such that it does not exceed the
546    * dimension domain.
547    */
548   void crop_range(Range* range) const;
549 
550   /**
551    * Crops the input 1D range such that it does not exceed the
552    * dimension domain.
553    */
554   template <class T>
555   static void crop_range(const Dimension* dim, Range* range);
556 
557   /**
558    * Returns the domain range (high - low + 1) of the input
559    * 1D range. It returns 0 in case the dimension datatype
560    * is not integer or if there is an overflow.
561    */
562   uint64_t domain_range(const Range& range) const;
563 
564   /**
565    * Returns the domain range (high - low + 1) of the input
566    * 1D range. It returns MAX_UINT64 in case the dimension datatype
567    * is not integer or if there is an overflow.
568    */
569   template <class T>
570   static uint64_t domain_range(const Range& range);
571 
572   /** Expand fixed-sized 1D range `r` using value `v`. */
573   void expand_range_v(const void* v, Range* r) const;
574 
575   /** Expand fixed-sized 1D range `r` using value `v`. */
576   template <class T>
577   static void expand_range_v(const void* v, Range* r);
578 
579   /** Expand var-sized 1D range `r` using value `v`. */
580   static void expand_range_var_v(const char* v, uint64_t v_size, Range* r);
581 
582   /** Expand 1D range `r2` using 1D range `r1`. */
583   void expand_range(const Range& r1, Range* r2) const;
584 
585   /** Expand 1D range `r2` using 1D range `r1`. */
586   template <class T>
587   static void expand_range(const Range& r1, Range* r2);
588 
589   /**
590    * Expand 1D range `r2` using 1D range `r1`.
591    * Applicable to var-sized ranges.
592    */
593   void expand_range_var(const Range& r1, Range* r2) const;
594 
595   /**
596    * Expands the input 1D range to coincide with the dimension tiles.
597    * It is a noop if the tile extents are null and for real domains.
598    */
599   void expand_to_tile(Range* range) const;
600 
601   /**
602    * Expands the input 1D range to coincide with the dimension tiles.
603    * It is a noop if the tile extents are null and for real domains.
604    */
605   template <class T>
606   static void expand_to_tile(const Dimension* dim, Range* range);
607 
608   /**
609    * Returns error if the input coordinate is out-of-bounds with respect
610    * to the dimension domain.
611    */
612   Status oob(const void* coord) const;
613 
614   /**
615    * Returns true if the input coordinate is out-of-bounds with respect
616    * to the dimension domain.
617    *
618    * @param dim The dimension to apply the oob check on.
619    * @param coord The coordinate to be checked. It will properly be
620    *     type-cast to the dimension datatype.
621    * @param err_msg An error message to be retrieved in case the function
622    *     returns true.
623    * @return True if the input coordinates is out-of-bounds.
624    */
625   template <class T>
626   static bool oob(
627       const Dimension* dim, const void* coord, std::string* err_msg);
628 
629   /** Return true if r1 is fully covered by r2. */
630   bool covered(const Range& r1, const Range& r2) const;
631 
632   /** Return true if r1 is fully covered by r2. */
633   template <class T>
634   static bool covered(const Range& r1, const Range& r2);
635 
636   /** Return true if the input 1D ranges overlap. */
637   bool overlap(const Range& r1, const Range& r2) const;
638 
639   /** Return true if the input 1D ranges overlap. */
640   template <class T>
641   static bool overlap(const Range& r1, const Range& r2);
642 
643   /** Return ratio of the overlap of the two input 1D ranges over `r2`. */
644   double overlap_ratio(const Range& r1, const Range& r2) const;
645 
646   /** Return ratio of the overlap of the two input 1D ranges over `r2`. */
647   template <class T>
648   static double overlap_ratio(const Range& r1, const Range& r2);
649 
650   /** Splits `r` at point `v`, producing 1D ranges `r1` and `r2`. */
651   void split_range(
652       const Range& r, const ByteVecValue& v, Range* r1, Range* r2) const;
653 
654   /** Splits `r` at point `v`, producing 1D ranges `r1` and `r2`. */
655   template <class T>
656   static void split_range(
657       const Range& r, const ByteVecValue& v, Range* r1, Range* r2);
658 
659   /**
660    * Computes the splitting point `v` of `r`, and sets `unsplittable`
661    * to true if `r` cannot be split.
662    */
663   void splitting_value(
664       const Range& r, ByteVecValue* v, bool* unsplittable) const;
665 
666   /**
667    * Computes the splitting point `v` of `r`, and sets `unsplittable`
668    * to true if `r` cannot be split.
669    */
670   template <class T>
671   static void splitting_value(
672       const Range& r, ByteVecValue* v, bool* unsplittable);
673 
674   /** Return the number of tiles the input range intersects. */
675   uint64_t tile_num(const Range& range) const;
676 
677   /** Return the number of tiles the input range intersects. */
678   template <class T>
679   static uint64_t tile_num(const Dimension* dim, const Range& range);
680 
681   /**
682    * Maps the c-th cell in the input query buffer to a uint64 value,
683    * based on discretizing the domain from 0 to `max_bucket_val`.
684    * This value is used to compute a Hilbert value.
685    */
686   uint64_t map_to_uint64(
687       const QueryBuffer* buff,
688       uint64_t c,
689       uint64_t coords_num,
690       int bits,
691       uint64_t max_bucket_val) const;
692 
693   /**
694    * Maps the c-th cell in the input query buffer to a uint64 value,
695    * based on discretizing the domain from 0 to `max_bucket_val`.
696    * This value is used to compute a Hilbert value.
697    */
698   template <class T>
699   static uint64_t map_to_uint64(
700       const Dimension* dim,
701       const QueryBuffer* buff,
702       uint64_t c,
703       uint64_t coords_num,
704       int bits,
705       uint64_t max_bucket_val);
706 
707   /**
708    * Maps the input coordinate to a uint64 value,
709    * based on discretizing the domain from 0 to `max_bucket_val`.
710    * This value is used to compute a Hilbert value.
711    */
712   uint64_t map_to_uint64(
713       const void* coord,
714       uint64_t coord_size,
715       int bits,
716       uint64_t max_bucket_val) const;
717 
718   /**
719    * Maps the input coordinate to a uint64 value,
720    * based on discretizing the domain from 0 to `max_bucket_val`.
721    * This value is used to compute a Hilbert value.
722    */
723   template <class T>
724   static uint64_t map_to_uint64_2(
725       const Dimension* dim,
726       const void* coord,
727       uint64_t coord_size,
728       int bits,
729       uint64_t max_bucket_val);
730 
731   /**
732    * Maps the input result coordinate to a uint64 value,
733    * based on discretizing the domain from 0 to `max_bucket_val`.
734    * This value is used to compute a Hilbert value.
735    */
736   uint64_t map_to_uint64(
737       const ResultCoords& coord,
738       uint32_t dim_idx,
739       int bits,
740       uint64_t max_bucket_val) const;
741 
742   /**
743    * Maps the input result coordinate to a uint64 value,
744    * based on discretizing the domain from 0 to `max_bucket_val`.
745    * This value is used to compute a Hilbert value.
746    */
747   template <class T>
748   static uint64_t map_to_uint64_3(
749       const Dimension* dim,
750       const ResultCoords& coord,
751       uint32_t dim_idx,
752       int bits,
753       uint64_t max_bucket_val);
754 
755   /**
756    * Maps a uint64 value (produced by `map_to_uint64`) to its corresponding
757    * value in the original dimension domain. `max_bucket_val` is the maximum
758    * value used to discretize the original value.
759    */
760   ByteVecValue map_from_uint64(
761       uint64_t value, int bits, uint64_t max_bucket_val) const;
762 
763   /**
764    * Maps a uint64 value (produced by `map_to_uint64`) to its corresponding
765    * value in the original dimension domain. `max_bucket_val` is the maximum
766    * value used to discretize the original value.
767    */
768   template <class T>
769   static ByteVecValue map_from_uint64(
770       const Dimension* dim, uint64_t value, int bits, uint64_t max_bucket_val);
771 
772   /** Returns `true` if `value` is smaller than the start of `range`. */
773   bool smaller_than(const ByteVecValue& value, const Range& range) const;
774 
775   /** Returns `true` if `value` is smaller than the start of `range`. */
776   template <class T>
777   static bool smaller_than(
778       const Dimension* dim, const ByteVecValue& value, const Range& range);
779 
780   /**
781    * Serializes the object members into a binary buffer.
782    *
783    * @param buff The buffer to serialize the data into.
784    * @param version The array schema version
785    * @return Status
786    */
787   Status serialize(Buffer* buff, uint32_t version);
788 
789   /** Sets the domain. */
790   Status set_domain(const void* domain);
791 
792   /** Sets the domain. */
793   Status set_domain(const Range& domain);
794 
795   /** Sets the domain without type, null, or bounds checks. */
796   Status set_domain_unsafe(const void* domain);
797 
798   /** Sets the filter pipeline for this dimension. */
799   Status set_filter_pipeline(const FilterPipeline* pipeline);
800 
801   /** Sets the tile extent. */
802   Status set_tile_extent(const void* tile_extent);
803 
804   /** Sets the tile extent. */
805   Status set_tile_extent(const ByteVecValue& tile_extent);
806 
807   /**
808    * If the tile extent is `null`, this function sets the
809    * the tile extent to the dimension domain range.
810    *
811    * @note This is applicable only to dense arrays.
812    */
813   Status set_null_tile_extent_to_range();
814 
815   /**
816    * If the tile extent is `null`, this function sets the
817    * the tile extent to the dimension domain range.
818    *
819    * @tparam T The dimension type.
820    *
821    * @note This is applicable only to dense arrays.
822    */
823   template <class T>
824   Status set_null_tile_extent_to_range();
825 
826   /** Returns the tile extent. */
827   const ByteVecValue& tile_extent() const;
828 
829   /** Returns the dimension type. */
830   Datatype type() const;
831 
832   /** Returns true if the dimension is var-sized. */
833   bool var_size() const;
834 
835  private:
836   /* ********************************* */
837   /*         PRIVATE ATTRIBUTES        */
838   /* ********************************* */
839 
840   /** The number of values per coordinate. */
841   unsigned cell_val_num_;
842 
843   /** The dimension domain. */
844   Range domain_;
845 
846   /** The dimension filter pipeline. */
847   FilterPipeline filters_;
848 
849   /** The dimension name. */
850   std::string name_;
851 
852   /** The tile extent of the dimension. */
853   ByteVecValue tile_extent_;
854 
855   /** The dimension type. */
856   Datatype type_;
857 
858   /**
859    * Stores the appropriate templated ceil_to_tile() function based on the
860    * dimension datatype.
861    */
862   std::function<void(const Dimension*, const Range&, uint64_t, ByteVecValue*)>
863       ceil_to_tile_func_;
864 
865   /**
866    * Stores the appropriate templated check_range() function based on the
867    * dimension datatype.
868    */
869   std::function<bool(const Dimension*, const Range&, std::string*)>
870       check_range_func_;
871 
872   /**
873    * Stores the appropriate templated check_range() function based on the
874    * dimension datatype.
875    */
876   std::function<void(const Dimension*, const Range*)> adjust_range_oob_func_;
877 
878   /**
879    * Stores the appropriate templated coincides_with_tiles() function based on
880    * the dimension datatype.
881    */
882   std::function<bool(const Dimension*, const Range&)>
883       coincides_with_tiles_func_;
884 
885   /**
886    * Stores the appropriate templated compute_mbr() function based on the
887    * dimension datatype.
888    */
889   std::function<Status(const Tile&, Range*)> compute_mbr_func_;
890 
891   /**
892    * Stores the appropriate templated compute_mbr_var() function based on the
893    * dimension datatype.
894    */
895   std::function<Status(const Tile&, const Tile&, Range*)> compute_mbr_var_func_;
896 
897   /**
898    * Stores the appropriate templated crop_range() function based on the
899    * dimension datatype.
900    */
901   std::function<void(const Dimension* dim, Range*)> crop_range_func_;
902 
903   /**
904    * Stores the appropriate templated crop_range() function based on the
905    * dimension datatype.
906    */
907   std::function<uint64_t(const Range&)> domain_range_func_;
908 
909   /**
910    * Stores the appropriate templated expand_range() function based on the
911    * dimension datatype.
912    */
913   std::function<void(const void*, Range*)> expand_range_v_func_;
914 
915   /**
916    * Stores the appropriate templated expand_range() function based on the
917    * dimension datatype.
918    */
919   std::function<void(const Range&, Range*)> expand_range_func_;
920 
921   /**
922    * Stores the appropriate templated expand_to_tile() function based on the
923    * dimension datatype.
924    */
925   std::function<void(const Dimension* dim, Range*)> expand_to_tile_func_;
926 
927   /**
928    * Stores the appropriate templated oob() function based on the
929    * dimension datatype.
930    */
931   std::function<bool(const Dimension* dim, const void*, std::string*)>
932       oob_func_;
933 
934   /**
935    * Stores the appropriate templated covered() function based on the
936    * dimension datatype.
937    */
938   std::function<bool(const Range&, const Range&)> covered_func_;
939 
940   /**
941    * Stores the appropriate templated overlap() function based on the
942    * dimension datatype.
943    */
944   std::function<bool(const Range&, const Range&)> overlap_func_;
945 
946   /**
947    * Stores the appropriate templated overlap_ratio() function based on the
948    * dimension datatype.
949    */
950   std::function<double(const Range&, const Range&)> overlap_ratio_func_;
951 
952   /**
953    * Stores the appropriate templated split_range() function based on the
954    * dimension datatype.
955    */
956   std::function<void(const Range&, const ByteVecValue&, Range*, Range*)>
957       split_range_func_;
958 
959   /**
960    * Stores the appropriate templated splitting_value() function based on the
961    * dimension datatype.
962    */
963   std::function<void(const Range&, ByteVecValue*, bool* unsplittable)>
964       splitting_value_func_;
965 
966   /**
967    * Stores the appropriate templated tile_num() function based on the
968    * dimension datatype.
969    */
970   std::function<uint64_t(const Dimension* dim, const Range&)> tile_num_func_;
971 
972   /**
973    * Stores the appropriate templated map_to_uint64() function based on
974    * the dimension datatype.
975    */
976   std::function<uint64_t(
977       const Dimension*, const QueryBuffer*, uint64_t, uint64_t, int, uint64_t)>
978       map_to_uint64_func_;
979 
980   /**
981    * Stores the appropriate templated map_to_uint64_2() function based on
982    * the dimension datatype.
983    */
984   std::function<uint64_t(
985       const Dimension*, const void*, uint64_t, int, uint64_t)>
986       map_to_uint64_2_func_;
987 
988   /**
989    * Stores the appropriate templated map_to_uint64_3() function based on
990    * the dimension datatype.
991    */
992   std::function<uint64_t(
993       const Dimension*, const ResultCoords&, uint32_t, int, uint64_t)>
994       map_to_uint64_3_func_;
995 
996   /**
997    * Stores the appropriate templated map_from_uint64() function based on
998    * the dimension datatype.
999    */
1000   std::function<ByteVecValue(const Dimension*, uint64_t, int, uint64_t)>
1001       map_from_uint64_func_;
1002 
1003   /**
1004    * Stores the appropriate templated smaller_than() function based on
1005    * the dimension datatype.
1006    */
1007   std::function<bool(const Dimension*, const ByteVecValue&, const Range&)>
1008       smaller_than_func_;
1009 
1010   /* ********************************* */
1011   /*          PRIVATE METHODS          */
1012   /* ********************************* */
1013 
1014   /** Returns an error if the set domain is invalid. */
1015   Status check_domain() const;
1016 
1017   /**
1018    * Returns an error if the set domain is invalid.
1019    * Applicable to integral domains.
1020    */
1021   template <
1022       typename T,
1023       typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
check_domain()1024   Status check_domain() const {
1025     assert(!domain_.empty());
1026     auto domain = (const T*)domain_.data();
1027 
1028     // Upper bound should not be smaller than lower
1029     if (domain[1] < domain[0])
1030       return LOG_STATUS(Status::DimensionError(
1031           "Domain check failed; Upper domain bound should "
1032           "not be smaller than the lower one"));
1033 
1034     // Domain range must not exceed the maximum unsigned number
1035     // for integer domains
1036     if (domain[0] == std::numeric_limits<T>::min() &&
1037         domain[1] == std::numeric_limits<T>::max())
1038       return LOG_STATUS(Status::DimensionError(
1039           "Domain check failed; Domain range (upper + lower + 1) is larger "
1040           "than the maximum unsigned number"));
1041 
1042     return Status::Ok();
1043   }
1044 
1045   /**
1046    * Returns an error if the set domain is invalid.
1047    * Applicable to real domains.
1048    */
1049   template <
1050       typename T,
1051       typename std::enable_if<!std::is_integral<T>::value>::type* = nullptr>
check_domain()1052   Status check_domain() const {
1053     assert(!domain_.empty());
1054     auto domain = (const T*)domain_.data();
1055 
1056     // Check for NAN and INF
1057     if (std::isinf(domain[0]) || std::isinf(domain[1]))
1058       return LOG_STATUS(
1059           Status::DimensionError("Domain check failed; domain contains NaN"));
1060     if (std::isnan(domain[0]) || std::isnan(domain[1]))
1061       return LOG_STATUS(
1062           Status::DimensionError("Domain check failed; domain contains NaN"));
1063 
1064     // Upper bound should not be smaller than lower
1065     if (domain[1] < domain[0])
1066       return LOG_STATUS(Status::DimensionError(
1067           "Domain check failed; Upper domain bound should "
1068           "not be smaller than the lower one"));
1069 
1070     return Status::Ok();
1071   }
1072 
1073   /** Returns an error if the set tile extent is invalid. */
1074   Status check_tile_extent() const;
1075 
1076   /** Returns an error if the set tile extent is invalid. */
1077   template <class T>
1078   Status check_tile_extent() const;
1079 
1080   /**
1081    * Returns an error if the set tile extent exceeds the
1082    * upper floor.
1083    */
1084   template <typename T>
1085   Status check_tile_extent_upper_floor(const T* domain, T tile_extent) const;
1086 
1087   /**
1088    * The internal work routine for `check_tile_extent_upper_floor`
1089    * that accepts a template type for the floor type.
1090    */
1091   template <typename T_EXTENT, typename T_FLOOR>
1092   Status check_tile_extent_upper_floor_internal(
1093       const T_EXTENT* domain, T_EXTENT tile_extent) const;
1094 
1095   /** Returns the domain in string format. */
1096   std::string domain_str() const;
1097 
1098   /** Returns the tile extent in string format. */
1099   std::string tile_extent_str() const;
1100 
1101   /** Sets the templated ceil_to_tile() function. */
1102   void set_ceil_to_tile_func();
1103 
1104   /** Sets the templated check_range() function. */
1105   void set_check_range_func();
1106 
1107   /** Set the templated adjust_range_oob_func() function. */
1108   void set_adjust_range_oob_func();
1109 
1110   /** Sets the templated coincides_with_tiles() function. */
1111   void set_coincides_with_tiles_func();
1112 
1113   /** Sets the templated compute_mbr() function. */
1114   void set_compute_mbr_func();
1115 
1116   /** Sets the templated crop_range() function. */
1117   void set_crop_range_func();
1118 
1119   /** Sets the templated domain_range() function. */
1120   void set_domain_range_func();
1121 
1122   /** Sets the templated expand_range() function. */
1123   void set_expand_range_func();
1124 
1125   /** Sets the templated expand_range_v() function. */
1126   void set_expand_range_v_func();
1127 
1128   /** Sets the templated expand_to_tile() function. */
1129   void set_expand_to_tile_func();
1130 
1131   /** Sets the templated oob() function. */
1132   void set_oob_func();
1133 
1134   /** Sets the templated covered() function. */
1135   void set_covered_func();
1136 
1137   /** Sets the templated overlap() function. */
1138   void set_overlap_func();
1139 
1140   /** Sets the templated overlap_ratio() function. */
1141   void set_overlap_ratio_func();
1142 
1143   /** Sets the templated split_range() function. */
1144   void set_split_range_func();
1145 
1146   /** Sets the templated splitting_value() function. */
1147   void set_splitting_value_func();
1148 
1149   /** Sets the templated tile_num() function. */
1150   void set_tile_num_func();
1151 
1152   /** Sets the templated map_to_uint64() function. */
1153   void set_map_to_uint64_func();
1154 
1155   /** Sets the templated map_to_uint64_2() function. */
1156   void set_map_to_uint64_2_func();
1157 
1158   /** Sets the templated map_to_uint64_3() function. */
1159   void set_map_to_uint64_3_func();
1160 
1161   /** Sets the templated map_from_uint64() function. */
1162   void set_map_from_uint64_func();
1163 
1164   /** Sets the templated smaller_than() function. */
1165   void set_smaller_than_func();
1166 };
1167 
1168 }  // namespace sm
1169 }  // namespace tiledb
1170 
1171 namespace tiledb::common {
1172 template <>
1173 struct blank<tiledb::sm::Dimension> : public tiledb::sm::Dimension {
1174   blank();
1175 };
1176 }  // namespace tiledb::common
1177 
1178 #endif  // TILEDB_DIMENSION_H
1179