1 /**
2  * @file   types.h
3  *
4  * @section LICENSE
5  *
6  * The MIT License
7  *
8  * @copyright Copyright (c) 2017-2021 TileDB, Inc.
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  * THE SOFTWARE.
27  *
28  * @section DESCRIPTION
29  *
30  * This file defines common types for Query/Write/Read class usage
31  */
32 
33 #ifndef TILEDB_TYPES_H
34 #define TILEDB_TYPES_H
35 
36 #include <cassert>
37 #include <cstring>
38 #include <string>
39 #include <vector>
40 #include "tiledb/common/logger_public.h"
41 
42 namespace tiledb {
43 namespace sm {
44 
45 /* ********************************* */
46 /*          TYPE DEFINITIONS         */
47 /* ********************************* */
48 
49 /**
50  * Defines a 1D range (low, high), flattened in a sequence of bytes.
51  * If the range consists of var-sized values (e.g., strings), then
52  * the format is:
53  *
54  * low_nbytes (uint32) | low | high_nbytes (uint32) | high
55  */
56 class Range {
57  public:
58   /** Default constructor. */
Range()59   Range()
60       : range_start_size_(0)
61       , var_size_(false)
62       , partition_depth_(0) {
63   }
64 
65   /** Constructor setting a range. */
Range(const void * range,uint64_t range_size)66   Range(const void* range, uint64_t range_size)
67       : Range() {
68     set_range(range, range_size);
69   }
70 
71   /** Constructor setting a range. */
Range(const void * range,uint64_t range_size,uint64_t range_start_size)72   Range(const void* range, uint64_t range_size, uint64_t range_start_size)
73       : Range() {
74     set_range(range, range_size, range_start_size);
75   }
76 
77   /** Copy constructor. */
78   Range(const Range&) = default;
79 
80   /** Move constructor. */
81   Range(Range&&) = default;
82 
83   /** Destructor. */
84   ~Range() = default;
85 
86   /** Copy-assign operator.*/
87   Range& operator=(const Range&) = default;
88 
89   /** Move-assign operator. */
90   Range& operator=(Range&&) = default;
91 
92   /** Sets a fixed-sized range serialized in `r`. */
set_range(const void * r,uint64_t r_size)93   void set_range(const void* r, uint64_t r_size) {
94     range_.resize(r_size);
95     std::memcpy(&range_[0], r, r_size);
96   }
97 
98   /** Sets a var-sized range serialized in `r`. */
set_range(const void * r,uint64_t r_size,uint64_t range_start_size)99   void set_range(const void* r, uint64_t r_size, uint64_t range_start_size) {
100     range_.resize(r_size);
101     std::memcpy(&range_[0], r, r_size);
102     range_start_size_ = range_start_size;
103     var_size_ = true;
104   }
105 
106   /** Sets a var-sized range `[r1, r2]`. */
set_range_var(const void * r1,uint64_t r1_size,const void * r2,uint64_t r2_size)107   void set_range_var(
108       const void* r1, uint64_t r1_size, const void* r2, uint64_t r2_size) {
109     range_.resize(r1_size + r2_size);
110     std::memcpy(&range_[0], r1, r1_size);
111     auto c = (char*)(&range_[0]);
112     std::memcpy(c + r1_size, r2, r2_size);
113     range_start_size_ = r1_size;
114     var_size_ = true;
115   }
116 
117   /** Sets a string range. */
set_str_range(const std::string & s1,const std::string & s2)118   void set_str_range(const std::string& s1, const std::string& s2) {
119     auto size = s1.size() + s2.size();
120     if (size == 0) {
121       range_.clear();
122       range_start_size_ = 0;
123       return;
124     }
125 
126     set_range_var(s1.data(), s1.size(), s2.data(), s2.size());
127   }
128 
129   /** Returns the pointer to the range flattened bytes. */
data()130   const void* data() const {
131     return range_.empty() ? nullptr : &range_[0];
132   }
133 
134   /** Returns a pointer to the start of the range. */
start()135   const void* start() const {
136     return &range_[0];
137   }
138 
139   /** Copies 'start' into this range's start bytes for fixed-size ranges. */
set_start(const void * const start)140   void set_start(const void* const start) {
141     if (var_size_)
142       LOG_FATAL("Unexpected var-sized range; cannot set end range.");
143     const size_t fixed_size = range_.size() / 2;
144     std::memcpy(&range_[0], start, fixed_size);
145   }
146 
147   /** Returns the start as a string. */
start_str()148   std::string start_str() const {
149     if (start_size() == 0)
150       return std::string();
151     return std::string((const char*)start(), start_size());
152   }
153 
154   /** Returns the end as a string. */
end_str()155   std::string end_str() const {
156     if (end_size() == 0)
157       return std::string();
158     return std::string((const char*)end(), end_size());
159   }
160 
161   /**
162    * Returns the size of the start of the range.
163    * Non-zero only for var-sized ranges.
164    */
start_size()165   uint64_t start_size() const {
166     return range_start_size_;
167   }
168 
169   /**
170    * Returns the size of the end of the range.
171    * Non-zero only for var-sized ranges.
172    */
end_size()173   uint64_t end_size() const {
174     if (!var_size_)
175       return 0;
176     return range_.size() - range_start_size_;
177   }
178 
179   /** Returns a pointer to the end of the range. */
end()180   const void* end() const {
181     auto end_pos = var_size_ ? range_start_size_ : range_.size() / 2;
182     return range_.empty() ? nullptr : &range_[end_pos];
183   }
184 
185   /** Copies 'end' into this range's end bytes for fixed-size ranges. */
set_end(const void * const end)186   void set_end(const void* const end) {
187     if (var_size_)
188       LOG_FATAL("Unexpected var-sized range; cannot set end range.");
189     const size_t fixed_size = range_.size() / 2;
190     std::memcpy(&range_[fixed_size], end, fixed_size);
191   }
192 
193   /** Returns true if the range is empty. */
empty()194   bool empty() const {
195     return range_.empty();
196   }
197 
198   /** Clears the range. */
clear()199   void clear() {
200     range_.clear();
201   }
202 
203   /** Returns the range size in bytes. */
size()204   uint64_t size() const {
205     return range_.size();
206   }
207 
208   /** Equality operator. */
209   bool operator==(const Range& r) const {
210     return range_ == r.range_ && range_start_size_ == r.range_start_size_;
211   }
212 
213   /** Returns true if the range start is the same as its end. */
unary()214   bool unary() const {
215     // If the range is empty, then it corresponds to strings
216     // covering the whole domain (so it is not unary)
217     if (range_.empty())
218       return false;
219 
220     bool same_size = !var_size_ || 2 * range_start_size_ == range_.size();
221     return same_size &&
222            !std::memcmp(
223                &range_[0], &range_[range_.size() / 2], range_.size() / 2);
224   }
225 
226   /** True if the range is variable sized. */
var_size()227   bool var_size() const {
228     return var_size_;
229   }
230 
231   /** Sets the partition depth. */
set_partition_depth(uint64_t partition_depth)232   void set_partition_depth(uint64_t partition_depth) {
233     partition_depth_ = partition_depth;
234   }
235 
236   /** Returns the partition depth. */
partition_depth()237   uint64_t partition_depth() const {
238     return partition_depth_;
239   }
240 
241  private:
242   /** The range as a flat byte vector. */
243   std::vector<uint8_t> range_;
244 
245   /** The size of the start of `range_`. */
246   uint64_t range_start_size_;
247 
248   /** Is the range var sized. */
249   bool var_size_;
250 
251   /**
252    * The ranges in a query's initial subarray have a depth of 0.
253    * When a range is split, the depth on the split ranges are
254    * set to +1 the depth of the original range.
255    */
256   uint64_t partition_depth_;
257 };
258 
259 /** An N-dimensional range, consisting of a vector of 1D ranges. */
260 typedef std::vector<Range> NDRange;
261 
262 /** An untyped value, barely more than raw storage. This class is only
263  * transitional. All uses should be rewritten to use ordinary types. Consider
264  * it deprecated at creation.
265  *
266  * This class started off as a typedef for a byte vector. In its current state,
267  * it provides methods that capture common patterns of usage, avoiding bleeding
268  * all its abstraction into calling code. It's not perfect, and never will be.
269  *
270  * A minimal number of vector methods are forwarded outside the class to allow
271  * not-yet-converted code its legacy behavior. The incremental goal is to remove
272  * such functions as the code base evolves away from untyped variables entirely.
273  */
274 
275 class ByteVecValue {
276   typedef std::vector<uint8_t> Base;
277   std::vector<uint8_t> x_;
278 
279  public:
280   typedef Base::size_type size_type;
281   typedef Base::reference reference;
282   /** Default constructor */
ByteVecValue()283   ByteVecValue()
284       : x_() {
285   }
286   /** Fixed-size constructor */
ByteVecValue(Base::size_type n)287   explicit ByteVecValue(Base::size_type n)
288       : x_(n) {
289   }
290   /** Move constructor from underlying vector type */
ByteVecValue(std::vector<uint8_t> && y)291   explicit ByteVecValue(std::vector<uint8_t>&& y)
292       : x_(std::move(y)) {
293   }
294 
295   /**
296    * Performs an assignment as if a variable of type T were located at the
297    * beginning of storage.
298    *
299    * @post size() >= sizeof(T)
300    *
301    * @tparam T
302    * @return A reference to the phantom variable which was assigned.
303    */
304   template <class T>
305   T& assign_as(T val = T()) {
306     if (size() < sizeof(T))
307       x_.resize(sizeof(T));
308     T& a = *reinterpret_cast<T*>(data());
309     a = val;
310     return a;
311   }
312 
313   /// Remove any existing value.
assign_as_void()314   void assign_as_void() noexcept {
315     x_.clear();
316   }
317 
318   /**
319    * Returns the value of a variable of type T as if it were located at the
320    * beginning of storage.
321    *
322    * Intentionally unimplemented in general and only certain specializations
323    * are available.
324    *
325    * @tparam T
326    * @return
327    */
328   template <class T>
329   T rvalue_as() const;
330 
331   /// Forwarded from vector
resize(size_type count)332   void resize(size_type count) {
333     x_.resize(count);
334   }
335   /// Forwarded from vector
shrink_to_fit()336   void shrink_to_fit() {
337     x_.shrink_to_fit();
338   }
339   /// Forwarded from vector
data()340   uint8_t* data() noexcept {
341     return x_.data();
342   }
343   /// Forwarded from vector
data()344   const uint8_t* data() const noexcept {
345     return x_.data();
346   }
347   /// Forwarded from vector
size()348   Base::size_type size() const noexcept {
349     return x_.size();
350   }
351   /**
352    * Conversion to boolean in the style of std::optional.
353    * @return True if a value is present, false otherwise.
354    */
355   explicit operator bool() const noexcept {
356     return !x_.empty();
357   }
358 };
359 
360 /** A byte vector. */
361 typedef std::vector<uint8_t> ByteVec;
362 
363 }  // namespace sm
364 }  // namespace tiledb
365 
366 #endif  // TILEDB_TYPES_H
367