1 /**
2  * @file query_condition.cc
3  *
4  * @section LICENSE
5  *
6  * The MIT License
7  *
8  * @copyright Copyright (c) 2021 TileDB, Inc.
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  * THE SOFTWARE.
27  *
28  * @section DESCRIPTION
29  *
30  * Implements the QueryCondition class.
31  */
32 
33 #include "tiledb/sm/query/query_condition.h"
34 #include "tiledb/common/logger.h"
35 #include "tiledb/sm/enums/datatype.h"
36 #include "tiledb/sm/enums/query_condition_combination_op.h"
37 #include "tiledb/sm/enums/query_condition_op.h"
38 #include "tiledb/sm/misc/utils.h"
39 
40 #include <iostream>
41 
42 #include <map>
43 #include <mutex>
44 
45 using namespace tiledb::common;
46 
47 namespace tiledb {
48 namespace sm {
49 
QueryCondition()50 QueryCondition::QueryCondition() {
51 }
52 
QueryCondition(const QueryCondition & rhs)53 QueryCondition::QueryCondition(const QueryCondition& rhs)
54     : clauses_(rhs.clauses_)
55     , combination_ops_(rhs.combination_ops_) {
56 }
57 
QueryCondition(QueryCondition && rhs)58 QueryCondition::QueryCondition(QueryCondition&& rhs)
59     : clauses_(std::move(rhs.clauses_))
60     , combination_ops_(std::move(rhs.combination_ops_)) {
61 }
62 
~QueryCondition()63 QueryCondition::~QueryCondition() {
64 }
65 
operator =(const QueryCondition & rhs)66 QueryCondition& QueryCondition::operator=(const QueryCondition& rhs) {
67   clauses_ = rhs.clauses_;
68   combination_ops_ = rhs.combination_ops_;
69 
70   return *this;
71 }
72 
operator =(QueryCondition && rhs)73 QueryCondition& QueryCondition::operator=(QueryCondition&& rhs) {
74   clauses_ = std::move(rhs.clauses_);
75   combination_ops_ = std::move(rhs.combination_ops_);
76 
77   return *this;
78 }
79 
init(std::string && field_name,const void * const condition_value,const uint64_t condition_value_size,const QueryConditionOp op)80 Status QueryCondition::init(
81     std::string&& field_name,
82     const void* const condition_value,
83     const uint64_t condition_value_size,
84     const QueryConditionOp op) {
85   if (!clauses_.empty()) {
86     return Status::QueryConditionError("Cannot reinitialize query condition");
87   }
88 
89   clauses_.emplace_back(
90       std::move(field_name), condition_value, condition_value_size, op);
91 
92   return Status::Ok();
93 }
94 
check(const ArraySchema * const array_schema) const95 Status QueryCondition::check(const ArraySchema* const array_schema) const {
96   for (const auto& clause : clauses_) {
97     const std::string field_name = clause.field_name_;
98     const uint64_t condition_value_size = clause.condition_value_data_.size();
99 
100     const Attribute* const attribute = array_schema->attribute(field_name);
101     if (!attribute) {
102       return Status::QueryConditionError(
103           "Clause field name is not an attribute " + field_name);
104     }
105 
106     if (clause.condition_value_ == nullptr) {
107       if (clause.op_ != QueryConditionOp::EQ &&
108           clause.op_ != QueryConditionOp::NE) {
109         return Status::QueryConditionError(
110             "Null value can only be used with equality operators");
111       }
112 
113       if ((!attribute->nullable()) &&
114           attribute->type() != Datatype::STRING_ASCII) {
115         return Status::QueryConditionError(
116             "Null value can only be used with nullable attributes");
117       }
118     }
119 
120     if (attribute->var_size() && attribute->type() != Datatype::STRING_ASCII &&
121         clause.condition_value_ != nullptr) {
122       return Status::QueryConditionError(
123           "Clause non-empty attribute may only be var-sized for ASCII "
124           "strings: " +
125           field_name);
126     }
127 
128     if (attribute->cell_val_num() != 1 &&
129         attribute->type() != Datatype::STRING_ASCII &&
130         (!attribute->var_size())) {
131       return Status::QueryConditionError(
132           "Clause attribute must have one value per cell for non-string fixed "
133           "size "
134           "attributes: " +
135           field_name);
136     }
137 
138     if (attribute->cell_size() != constants::var_size &&
139         attribute->cell_size() != condition_value_size &&
140         !(attribute->nullable() && clause.condition_value_ == nullptr) &&
141         attribute->type() != Datatype::STRING_ASCII &&
142         (!attribute->var_size())) {
143       return Status::QueryConditionError(
144           "Clause condition value size mismatch: " +
145           std::to_string(attribute->cell_size()) +
146           " != " + std::to_string(condition_value_size));
147     }
148 
149     switch (attribute->type()) {
150       case Datatype::ANY:
151         return Status::QueryConditionError(
152             "Clause attribute type may not be of type 'ANY': " + field_name);
153       case Datatype::STRING_UTF8:
154       case Datatype::STRING_UTF16:
155       case Datatype::STRING_UTF32:
156       case Datatype::STRING_UCS2:
157       case Datatype::STRING_UCS4:
158         return Status::QueryConditionError(
159             "Clause attribute type may not be a UTF/UCS string: " + field_name);
160       default:
161         break;
162     }
163   }
164 
165   return Status::Ok();
166 }
167 
combine(const QueryCondition & rhs,const QueryConditionCombinationOp combination_op,QueryCondition * const combined_cond) const168 Status QueryCondition::combine(
169     const QueryCondition& rhs,
170     const QueryConditionCombinationOp combination_op,
171     QueryCondition* const combined_cond) const {
172   assert(combination_op == QueryConditionCombinationOp::AND);
173   if (combination_op != QueryConditionCombinationOp::AND) {
174     return Status::QueryConditionError(
175         "Cannot combine query conditions; Only the 'AND' "
176         "combination op is supported");
177   }
178 
179   combined_cond->clauses_ = clauses_;
180   combined_cond->clauses_.insert(
181       combined_cond->clauses_.end(), rhs.clauses_.begin(), rhs.clauses_.end());
182 
183   combined_cond->combination_ops_ = combination_ops_;
184   combined_cond->combination_ops_.emplace_back(combination_op);
185   combined_cond->combination_ops_.insert(
186       combined_cond->combination_ops_.end(),
187       rhs.combination_ops_.begin(),
188       rhs.combination_ops_.end());
189 
190   combined_cond->field_names_.clear();
191 
192   return Status::Ok();
193 }
194 
empty() const195 bool QueryCondition::empty() const {
196   return clauses_.empty();
197 }
198 
field_names() const199 std::unordered_set<std::string> QueryCondition::field_names() const {
200   if (field_names_.empty()) {
201     for (const auto& clause : clauses_) {
202       field_names_.insert(clause.field_name_);
203     }
204   }
205 
206   return field_names_;
207 }
208 
209 /** Full template specialization for `char*` and `QueryConditionOp::LT`. */
210 template <>
211 struct QueryCondition::BinaryCmp<char*, QueryConditionOp::LT> {
cmptiledb::sm::QueryCondition::BinaryCmp212   static inline bool cmp(
213       const void* lhs, uint64_t lhs_size, const void* rhs, uint64_t rhs_size) {
214     if (lhs == nullptr) {
215       return false;
216     }
217 
218     const size_t min_size = std::min<size_t>(lhs_size, rhs_size);
219     const int cmp = strncmp(
220         static_cast<const char*>(lhs), static_cast<const char*>(rhs), min_size);
221     if (cmp != 0) {
222       return cmp < 0;
223     }
224 
225     return lhs_size < rhs_size;
226   }
227 };
228 
229 /** Partial template specialization for `char*` and `QueryConditionOp::LE. */
230 template <>
231 struct QueryCondition::BinaryCmp<char*, QueryConditionOp::LE> {
cmptiledb::sm::QueryCondition::BinaryCmp232   static inline bool cmp(
233       const void* lhs, uint64_t lhs_size, const void* rhs, uint64_t rhs_size) {
234     if (lhs == nullptr) {
235       return false;
236     }
237 
238     const size_t min_size = std::min<size_t>(lhs_size, rhs_size);
239     const int cmp = strncmp(
240         static_cast<const char*>(lhs), static_cast<const char*>(rhs), min_size);
241     if (cmp != 0) {
242       return cmp < 0;
243     }
244 
245     return lhs_size <= rhs_size;
246   }
247 };
248 
249 /** Partial template specialization for `char*` and `QueryConditionOp::GT`. */
250 template <>
251 struct QueryCondition::BinaryCmp<char*, QueryConditionOp::GT> {
cmptiledb::sm::QueryCondition::BinaryCmp252   static inline bool cmp(
253       const void* lhs, uint64_t lhs_size, const void* rhs, uint64_t rhs_size) {
254     if (lhs == nullptr) {
255       return false;
256     }
257 
258     const size_t min_size = std::min<size_t>(lhs_size, rhs_size);
259     const int cmp = strncmp(
260         static_cast<const char*>(lhs), static_cast<const char*>(rhs), min_size);
261     if (cmp != 0) {
262       return cmp > 0;
263     }
264 
265     return lhs_size > rhs_size;
266   }
267 };
268 
269 /** Partial template specialization for `char*` and `QueryConditionOp::GE`. */
270 template <>
271 struct QueryCondition::BinaryCmp<char*, QueryConditionOp::GE> {
cmptiledb::sm::QueryCondition::BinaryCmp272   static inline bool cmp(
273       const void* lhs, uint64_t lhs_size, const void* rhs, uint64_t rhs_size) {
274     if (lhs == nullptr) {
275       return false;
276     }
277 
278     const size_t min_size = std::min<size_t>(lhs_size, rhs_size);
279     const int cmp = strncmp(
280         static_cast<const char*>(lhs), static_cast<const char*>(rhs), min_size);
281     if (cmp != 0) {
282       return cmp > 0;
283     }
284 
285     return lhs_size >= rhs_size;
286   }
287 };
288 
289 /** Partial template specialization for `char*` and `QueryConditionOp::EQ`. */
290 template <>
291 struct QueryCondition::BinaryCmp<char*, QueryConditionOp::EQ> {
cmptiledb::sm::QueryCondition::BinaryCmp292   static inline bool cmp(
293       const void* lhs, uint64_t lhs_size, const void* rhs, uint64_t rhs_size) {
294     if (lhs == rhs) {
295       return true;
296     }
297 
298     if (lhs == nullptr || rhs == nullptr) {
299       return false;
300     }
301 
302     if (lhs_size != rhs_size) {
303       return false;
304     }
305 
306     return strncmp(
307                static_cast<const char*>(lhs),
308                static_cast<const char*>(rhs),
309                lhs_size) == 0;
310   }
311 };
312 
313 /** Partial template specialization for `char*` and `QueryConditionOp::NE`. */
314 template <>
315 struct QueryCondition::BinaryCmp<char*, QueryConditionOp::NE> {
cmptiledb::sm::QueryCondition::BinaryCmp316   static inline bool cmp(
317       const void* lhs, uint64_t lhs_size, const void* rhs, uint64_t rhs_size) {
318     if (rhs == nullptr && lhs != nullptr) {
319       return true;
320     }
321 
322     if (lhs == nullptr || rhs == nullptr) {
323       return false;
324     }
325 
326     if (lhs_size != rhs_size) {
327       return true;
328     }
329 
330     return strncmp(
331                static_cast<const char*>(lhs),
332                static_cast<const char*>(rhs),
333                lhs_size) != 0;
334   }
335 };
336 
337 /** Partial template specialization for `QueryConditionOp::LT`. */
338 template <typename T>
339 struct QueryCondition::BinaryCmp<T, QueryConditionOp::LT> {
cmptiledb::sm::QueryCondition::BinaryCmp340   static inline bool cmp(const void* lhs, uint64_t, const void* rhs, uint64_t) {
341     return lhs != nullptr &&
342            *static_cast<const T*>(lhs) < *static_cast<const T*>(rhs);
343   }
344 };
345 
346 /** Partial template specialization for `QueryConditionOp::LE`. */
347 template <typename T>
348 struct QueryCondition::BinaryCmp<T, QueryConditionOp::LE> {
cmptiledb::sm::QueryCondition::BinaryCmp349   static inline bool cmp(const void* lhs, uint64_t, const void* rhs, uint64_t) {
350     return lhs != nullptr &&
351            *static_cast<const T*>(lhs) <= *static_cast<const T*>(rhs);
352   }
353 };
354 
355 /** Partial template specialization for `QueryConditionOp::GT`. */
356 template <typename T>
357 struct QueryCondition::BinaryCmp<T, QueryConditionOp::GT> {
cmptiledb::sm::QueryCondition::BinaryCmp358   static inline bool cmp(const void* lhs, uint64_t, const void* rhs, uint64_t) {
359     return lhs != nullptr &&
360            *static_cast<const T*>(lhs) > *static_cast<const T*>(rhs);
361   }
362 };
363 
364 /** Partial template specialization for `QueryConditionOp::GE`. */
365 template <typename T>
366 struct QueryCondition::BinaryCmp<T, QueryConditionOp::GE> {
cmptiledb::sm::QueryCondition::BinaryCmp367   static inline bool cmp(const void* lhs, uint64_t, const void* rhs, uint64_t) {
368     return lhs != nullptr &&
369            *static_cast<const T*>(lhs) >= *static_cast<const T*>(rhs);
370   }
371 };
372 
373 /** Partial template specialization for `QueryConditionOp::EQ`. */
374 template <typename T>
375 struct QueryCondition::BinaryCmp<T, QueryConditionOp::EQ> {
cmptiledb::sm::QueryCondition::BinaryCmp376   static inline bool cmp(const void* lhs, uint64_t, const void* rhs, uint64_t) {
377     if (lhs == rhs) {
378       return true;
379     }
380 
381     if (lhs == nullptr || rhs == nullptr) {
382       return false;
383     }
384 
385     return *static_cast<const T*>(lhs) == *static_cast<const T*>(rhs);
386   }
387 };
388 
389 /** Partial template specialization for `QueryConditionOp::NE`. */
390 template <typename T>
391 struct QueryCondition::BinaryCmp<T, QueryConditionOp::NE> {
cmptiledb::sm::QueryCondition::BinaryCmp392   static inline bool cmp(const void* lhs, uint64_t, const void* rhs, uint64_t) {
393     if (rhs == nullptr && lhs != nullptr) {
394       return true;
395     }
396 
397     if (lhs == nullptr || rhs == nullptr) {
398       return false;
399     }
400 
401     return *static_cast<const T*>(lhs) != *static_cast<const T*>(rhs);
402   }
403 };
404 
405 /** Used to create a new result slab in QueryCondition::apply_clause. */
create_new_result_slab(uint64_t start,uint64_t pending_start,uint64_t stride,uint64_t current,ResultTile * const result_tile,std::vector<ResultCellSlab> * const out_result_cell_slabs)406 uint64_t create_new_result_slab(
407     uint64_t start,
408     uint64_t pending_start,
409     uint64_t stride,
410     uint64_t current,
411     ResultTile* const result_tile,
412     std::vector<ResultCellSlab>* const out_result_cell_slabs) {
413   // Create a result cell slab if there are pending cells.
414   if (pending_start != start + current) {
415     const uint64_t rcs_start = start + ((pending_start - start) * stride);
416     const uint64_t rcs_length = current - (pending_start - start);
417     out_result_cell_slabs->emplace_back(result_tile, rcs_start, rcs_length);
418   }
419 
420   // Return the new start of the pending result cell slab.
421   return start + current + 1;
422 }
423 
424 template <typename T, QueryConditionOp Op>
apply_clause(const QueryCondition::Clause & clause,const uint64_t stride,const bool var_size,const bool nullable,const ByteVecValue & fill_value,const std::vector<ResultCellSlab> & result_cell_slabs,std::vector<ResultCellSlab> * const out_result_cell_slabs) const425 void QueryCondition::apply_clause(
426     const QueryCondition::Clause& clause,
427     const uint64_t stride,
428     const bool var_size,
429     const bool nullable,
430     const ByteVecValue& fill_value,
431     const std::vector<ResultCellSlab>& result_cell_slabs,
432     std::vector<ResultCellSlab>* const out_result_cell_slabs) const {
433   const std::string& field_name = clause.field_name_;
434 
435   for (const auto& rcs : result_cell_slabs) {
436     ResultTile* const result_tile = rcs.tile_;
437     const uint64_t start = rcs.start_;
438     const uint64_t length = rcs.length_;
439 
440     // Handle an empty range.
441     if (result_tile == nullptr && !nullable) {
442       const bool cmp = BinaryCmp<T, Op>::cmp(
443           fill_value.data(),
444           fill_value.size(),
445           clause.condition_value_,
446           clause.condition_value_data_.size());
447       if (cmp) {
448         out_result_cell_slabs->emplace_back(result_tile, start, length);
449       }
450     } else {
451       const auto tile_tuple = result_tile->tile_tuple(field_name);
452       uint8_t* buffer_validity = nullptr;
453 
454       if (nullable) {
455         const auto& tile_validity = std::get<2>(*tile_tuple);
456         buffer_validity = static_cast<uint8_t*>(tile_validity.buffer()->data());
457       }
458 
459       // Start the pending result cell slab at the start position
460       // of the current result cell slab.
461       uint64_t pending_start = start;
462       uint64_t c = 0;
463 
464       if (var_size) {
465         const auto& tile = std::get<1>(*tile_tuple);
466         const char* buffer = static_cast<char*>(tile.buffer()->data());
467         const uint64_t buffer_size = tile.size();
468 
469         const auto& tile_offsets = std::get<0>(*tile_tuple);
470         const uint64_t* buffer_offsets =
471             static_cast<uint64_t*>(tile_offsets.buffer()->data());
472         const uint64_t buffer_offsets_el =
473             tile_offsets.size() / constants::cell_var_offset_size;
474 
475         // Iterate through each cell in this slab.
476         while (c < length) {
477           const uint64_t buffer_offset = buffer_offsets[start + c * stride];
478           const uint64_t next_cell_offset =
479               (start + c * stride + 1 < buffer_offsets_el) ?
480                   buffer_offsets[start + c * stride + 1] :
481                   buffer_size;
482           const uint64_t cell_size = next_cell_offset - buffer_offset;
483 
484           const bool null_cell =
485               (nullable && buffer_validity[start + c * stride] == 0) ||
486               (cell_size == 0);
487 
488           // Get the cell value.
489           const void* const cell_value =
490               null_cell ? nullptr : buffer + buffer_offset;
491 
492           // Compare the cell value against the value in the clause.
493           const bool cmp = BinaryCmp<T, Op>::cmp(
494               cell_value,
495               cell_size,
496               clause.condition_value_,
497               clause.condition_value_data_.size());
498           if (!cmp) {
499             pending_start = create_new_result_slab(
500                 start,
501                 pending_start,
502                 stride,
503                 c,
504                 result_tile,
505                 out_result_cell_slabs);
506           }
507 
508           ++c;
509         }
510       } else {
511         const auto& tile = std::get<0>(*tile_tuple);
512         const char* buffer = static_cast<char*>(tile.buffer()->data());
513         const uint64_t cell_size = tile.cell_size();
514         uint64_t buffer_offset = start * cell_size;
515         const uint64_t buffer_offset_inc = stride * cell_size;
516 
517         // Iterate through each cell in this slab.
518         while (c < length) {
519           const bool null_cell =
520               nullable && buffer_validity[start + c * stride] == 0;
521 
522           // Get the cell value.
523           const void* const cell_value =
524               null_cell ? nullptr : buffer + buffer_offset;
525           buffer_offset += buffer_offset_inc;
526 
527           // Compare the cell value against the value in the clause.
528           const bool cmp = BinaryCmp<T, Op>::cmp(
529               cell_value,
530               cell_size,
531               clause.condition_value_,
532               clause.condition_value_data_.size());
533           if (!cmp) {
534             pending_start = create_new_result_slab(
535                 start,
536                 pending_start,
537                 stride,
538                 c,
539                 result_tile,
540                 out_result_cell_slabs);
541           }
542 
543           ++c;
544         }
545       }
546 
547       // Create the final result cell slab if there are pending cells.
548       create_new_result_slab(
549           start, pending_start, stride, c, result_tile, out_result_cell_slabs);
550     }
551   }
552 }
553 
554 template <typename T>
apply_clause(const Clause & clause,const uint64_t stride,const bool var_size,const bool nullable,const ByteVecValue & fill_value,const std::vector<ResultCellSlab> & result_cell_slabs,std::vector<ResultCellSlab> * const out_result_cell_slabs) const555 Status QueryCondition::apply_clause(
556     const Clause& clause,
557     const uint64_t stride,
558     const bool var_size,
559     const bool nullable,
560     const ByteVecValue& fill_value,
561     const std::vector<ResultCellSlab>& result_cell_slabs,
562     std::vector<ResultCellSlab>* const out_result_cell_slabs) const {
563   switch (clause.op_) {
564     case QueryConditionOp::LT:
565       apply_clause<T, QueryConditionOp::LT>(
566           clause,
567           stride,
568           var_size,
569           nullable,
570           fill_value,
571           result_cell_slabs,
572           out_result_cell_slabs);
573       break;
574     case QueryConditionOp::LE:
575       apply_clause<T, QueryConditionOp::LE>(
576           clause,
577           stride,
578           var_size,
579           nullable,
580           fill_value,
581           result_cell_slabs,
582           out_result_cell_slabs);
583       break;
584     case QueryConditionOp::GT:
585       apply_clause<T, QueryConditionOp::GT>(
586           clause,
587           stride,
588           var_size,
589           nullable,
590           fill_value,
591           result_cell_slabs,
592           out_result_cell_slabs);
593       break;
594     case QueryConditionOp::GE:
595       apply_clause<T, QueryConditionOp::GE>(
596           clause,
597           stride,
598           var_size,
599           nullable,
600           fill_value,
601           result_cell_slabs,
602           out_result_cell_slabs);
603       break;
604     case QueryConditionOp::EQ:
605       apply_clause<T, QueryConditionOp::EQ>(
606           clause,
607           stride,
608           var_size,
609           nullable,
610           fill_value,
611           result_cell_slabs,
612           out_result_cell_slabs);
613       break;
614     case QueryConditionOp::NE:
615       apply_clause<T, QueryConditionOp::NE>(
616           clause,
617           stride,
618           var_size,
619           nullable,
620           fill_value,
621           result_cell_slabs,
622           out_result_cell_slabs);
623       break;
624     default:
625       return Status::QueryConditionError(
626           "Cannot perform query comparison; Unknown query "
627           "condition operator");
628   }
629 
630   return Status::Ok();
631 }
632 
apply_clause(const QueryCondition::Clause & clause,const ArraySchema * const array_schema,const uint64_t stride,const std::vector<ResultCellSlab> & result_cell_slabs,std::vector<ResultCellSlab> * const out_result_cell_slabs) const633 Status QueryCondition::apply_clause(
634     const QueryCondition::Clause& clause,
635     const ArraySchema* const array_schema,
636     const uint64_t stride,
637     const std::vector<ResultCellSlab>& result_cell_slabs,
638     std::vector<ResultCellSlab>* const out_result_cell_slabs) const {
639   const Attribute* const attribute =
640       array_schema->attribute(clause.field_name_);
641   if (!attribute) {
642     return Status::QueryConditionError(
643         "Unknown attribute " + clause.field_name_);
644   }
645 
646   const ByteVecValue fill_value = attribute->fill_value();
647   const bool var_size = attribute->var_size();
648   const bool nullable = attribute->nullable();
649   switch (attribute->type()) {
650     case Datatype::INT8:
651       return apply_clause<int8_t>(
652           clause,
653           stride,
654           var_size,
655           nullable,
656           fill_value,
657           result_cell_slabs,
658           out_result_cell_slabs);
659     case Datatype::UINT8:
660       return apply_clause<uint8_t>(
661           clause,
662           stride,
663           var_size,
664           nullable,
665           fill_value,
666           result_cell_slabs,
667           out_result_cell_slabs);
668     case Datatype::INT16:
669       return apply_clause<int16_t>(
670           clause,
671           stride,
672           var_size,
673           nullable,
674           fill_value,
675           result_cell_slabs,
676           out_result_cell_slabs);
677     case Datatype::UINT16:
678       return apply_clause<uint16_t>(
679           clause,
680           stride,
681           var_size,
682           nullable,
683           fill_value,
684           result_cell_slabs,
685           out_result_cell_slabs);
686     case Datatype::INT32:
687       return apply_clause<int32_t>(
688           clause,
689           stride,
690           var_size,
691           nullable,
692           fill_value,
693           result_cell_slabs,
694           out_result_cell_slabs);
695     case Datatype::UINT32:
696       return apply_clause<uint32_t>(
697           clause,
698           stride,
699           var_size,
700           nullable,
701           fill_value,
702           result_cell_slabs,
703           out_result_cell_slabs);
704     case Datatype::INT64:
705       return apply_clause<int64_t>(
706           clause,
707           stride,
708           var_size,
709           nullable,
710           fill_value,
711           result_cell_slabs,
712           out_result_cell_slabs);
713     case Datatype::UINT64:
714       return apply_clause<uint64_t>(
715           clause,
716           stride,
717           var_size,
718           nullable,
719           fill_value,
720           result_cell_slabs,
721           out_result_cell_slabs);
722     case Datatype::FLOAT32:
723       return apply_clause<float>(
724           clause,
725           stride,
726           var_size,
727           nullable,
728           fill_value,
729           result_cell_slabs,
730           out_result_cell_slabs);
731     case Datatype::FLOAT64:
732       return apply_clause<double>(
733           clause,
734           stride,
735           var_size,
736           nullable,
737           fill_value,
738           result_cell_slabs,
739           out_result_cell_slabs);
740     case Datatype::STRING_ASCII:
741       return apply_clause<char*>(
742           clause,
743           stride,
744           var_size,
745           nullable,
746           fill_value,
747           result_cell_slabs,
748           out_result_cell_slabs);
749     case Datatype::CHAR:
750       return apply_clause<char>(
751           clause,
752           stride,
753           var_size,
754           nullable,
755           fill_value,
756           result_cell_slabs,
757           out_result_cell_slabs);
758     case Datatype::DATETIME_YEAR:
759     case Datatype::DATETIME_MONTH:
760     case Datatype::DATETIME_WEEK:
761     case Datatype::DATETIME_DAY:
762     case Datatype::DATETIME_HR:
763     case Datatype::DATETIME_MIN:
764     case Datatype::DATETIME_SEC:
765     case Datatype::DATETIME_MS:
766     case Datatype::DATETIME_US:
767     case Datatype::DATETIME_NS:
768     case Datatype::DATETIME_PS:
769     case Datatype::DATETIME_FS:
770     case Datatype::DATETIME_AS:
771       return apply_clause<int64_t>(
772           clause,
773           stride,
774           var_size,
775           nullable,
776           fill_value,
777           result_cell_slabs,
778           out_result_cell_slabs);
779     case Datatype::ANY:
780     case Datatype::STRING_UTF8:
781     case Datatype::STRING_UTF16:
782     case Datatype::STRING_UTF32:
783     case Datatype::STRING_UCS2:
784     case Datatype::STRING_UCS4:
785     default:
786       return Status::QueryConditionError(
787           "Cannot perform query comparison; Unsupported query "
788           "conditional type on " +
789           clause.field_name_);
790   }
791 
792   return Status::Ok();
793 }
794 
apply(const ArraySchema * const array_schema,std::vector<ResultCellSlab> * const result_cell_slabs,const uint64_t stride,uint64_t memory_budget) const795 Status QueryCondition::apply(
796     const ArraySchema* const array_schema,
797     std::vector<ResultCellSlab>* const result_cell_slabs,
798     const uint64_t stride,
799     uint64_t memory_budget) const {
800   if (clauses_.empty()) {
801     return Status::Ok();
802   }
803 
804   // Iterate through each clause, mutating the result cell
805   // slabs to skip cells that do not fit into any of the
806   // clauses. This assumes all clauses are combined with a
807   // logical "AND".
808   for (const auto& clause : clauses_) {
809     std::vector<ResultCellSlab> tmp_result_cell_slabs;
810     RETURN_NOT_OK(apply_clause(
811         clause,
812         array_schema,
813         stride,
814         *result_cell_slabs,
815         &tmp_result_cell_slabs));
816     if (tmp_result_cell_slabs.size() > result_cell_slabs->size()) {
817       uint64_t memory_increase =
818           tmp_result_cell_slabs.size() - result_cell_slabs->size();
819       memory_increase *= sizeof(ResultCellSlab);
820       if (memory_increase > memory_budget) {
821         return Status::QueryConditionError(
822             "Exceeded result cell slab budget applying query condition");
823       }
824     }
825     *result_cell_slabs = tmp_result_cell_slabs;
826   }
827 
828   return Status::Ok();
829 }
830 
831 template <typename T, QueryConditionOp Op>
apply_clause_dense(const QueryCondition::Clause & clause,ResultTile * result_tile,const uint64_t start,const uint64_t length,const uint64_t src_cell,const uint64_t stride,const bool var_size,const bool nullable,const uint8_t previous_result_bitmask,const uint8_t current_result_bitmask,uint8_t * result_buffer) const832 void QueryCondition::apply_clause_dense(
833     const QueryCondition::Clause& clause,
834     ResultTile* result_tile,
835     const uint64_t start,
836     const uint64_t length,
837     const uint64_t src_cell,
838     const uint64_t stride,
839     const bool var_size,
840     const bool nullable,
841     const uint8_t previous_result_bitmask,
842     const uint8_t current_result_bitmask,
843     uint8_t* result_buffer) const {
844   const std::string& field_name = clause.field_name_;
845 
846   // Get the nullable buffer.
847   const auto tile_tuple = result_tile->tile_tuple(field_name);
848   uint8_t* buffer_validity = nullptr;
849 
850   if (nullable) {
851     const auto& tile_validity = std::get<2>(*tile_tuple);
852     buffer_validity =
853         static_cast<uint8_t*>(tile_validity.buffer()->data()) + src_cell;
854   }
855 
856   if (var_size) {
857     // Get var data buffer and tile offsets buffer.
858     const auto& tile = std::get<1>(*tile_tuple);
859     const char* buffer = static_cast<char*>(tile.buffer()->data());
860     const uint64_t buffer_size = tile.size();
861 
862     const auto& tile_offsets = std::get<0>(*tile_tuple);
863     const uint64_t* buffer_offsets =
864         static_cast<uint64_t*>(tile_offsets.buffer()->data()) + src_cell;
865     const uint64_t buffer_offsets_el =
866         tile_offsets.size() / constants::cell_var_offset_size;
867 
868     // Iterate through each cell in this slab.
869     for (uint64_t c = 0; c < length; ++c) {
870       const uint64_t buffer_offset = buffer_offsets[start + c * stride];
871       const uint64_t next_cell_offset =
872           (start + c * stride + 1 < buffer_offsets_el) ?
873               buffer_offsets[start + c * stride + 1] :
874               buffer_size;
875       const uint64_t cell_size = next_cell_offset - buffer_offset;
876 
877       const bool null_cell =
878           nullable && buffer_validity[start + c * stride] == 0;
879 
880       // Get the cell value.
881       const void* const cell_value =
882           null_cell ? nullptr : buffer + buffer_offset;
883 
884       // Compare the cell value against the value in the clause.
885       const bool cmp = BinaryCmp<T, Op>::cmp(
886           cell_value,
887           cell_size,
888           clause.condition_value_,
889           clause.condition_value_data_.size());
890 
891       // Set the value.
892       if (cmp && (result_buffer[start + c] & previous_result_bitmask)) {
893         result_buffer[start + c] |= current_result_bitmask;
894       } else {
895         result_buffer[start + c] &= ~(current_result_bitmask);
896       }
897     }
898   } else {
899     // Get the fixed size data buffers.
900     const auto& tile = std::get<0>(*tile_tuple);
901     const char* buffer = static_cast<char*>(tile.buffer()->data());
902     const uint64_t cell_size = tile.cell_size();
903     uint64_t buffer_offset = (start + src_cell) * cell_size;
904     const uint64_t buffer_offset_inc = stride * cell_size;
905 
906     // Iterate through each cell in this slab.
907     for (uint64_t c = 0; c < length; ++c) {
908       const bool null_cell =
909           nullable && buffer_validity[start + c * stride] == 0;
910 
911       // Get the cell value.
912       const void* const cell_value =
913           null_cell ? nullptr : buffer + buffer_offset;
914       buffer_offset += buffer_offset_inc;
915 
916       // Compare the cell value against the value in the clause.
917       const bool cmp = BinaryCmp<T, Op>::cmp(
918           cell_value,
919           cell_size,
920           clause.condition_value_,
921           clause.condition_value_data_.size());
922 
923       // Set the value.
924       if (cmp && (result_buffer[start + c] & previous_result_bitmask)) {
925         result_buffer[start + c] |= current_result_bitmask;
926       } else {
927         result_buffer[start + c] &= ~(current_result_bitmask);
928       }
929     }
930   }
931 }
932 
933 template <typename T>
apply_clause_dense(const Clause & clause,ResultTile * result_tile,const uint64_t start,const uint64_t length,const uint64_t src_cell,const uint64_t stride,const bool var_size,const bool nullable,const uint8_t previous_result_bitmask,const uint8_t current_result_bitmask,uint8_t * result_buffer) const934 Status QueryCondition::apply_clause_dense(
935     const Clause& clause,
936     ResultTile* result_tile,
937     const uint64_t start,
938     const uint64_t length,
939     const uint64_t src_cell,
940     const uint64_t stride,
941     const bool var_size,
942     const bool nullable,
943     const uint8_t previous_result_bitmask,
944     const uint8_t current_result_bitmask,
945     uint8_t* result_buffer) const {
946   switch (clause.op_) {
947     case QueryConditionOp::LT:
948       apply_clause_dense<T, QueryConditionOp::LT>(
949           clause,
950           result_tile,
951           start,
952           length,
953           src_cell,
954           stride,
955           var_size,
956           nullable,
957           previous_result_bitmask,
958           current_result_bitmask,
959           result_buffer);
960       break;
961     case QueryConditionOp::LE:
962       apply_clause_dense<T, QueryConditionOp::LE>(
963           clause,
964           result_tile,
965           start,
966           length,
967           src_cell,
968           stride,
969           var_size,
970           nullable,
971           previous_result_bitmask,
972           current_result_bitmask,
973           result_buffer);
974       break;
975     case QueryConditionOp::GT:
976       apply_clause_dense<T, QueryConditionOp::GT>(
977           clause,
978           result_tile,
979           start,
980           length,
981           src_cell,
982           stride,
983           var_size,
984           nullable,
985           previous_result_bitmask,
986           current_result_bitmask,
987           result_buffer);
988       break;
989     case QueryConditionOp::GE:
990       apply_clause_dense<T, QueryConditionOp::GE>(
991           clause,
992           result_tile,
993           start,
994           length,
995           src_cell,
996           stride,
997           var_size,
998           nullable,
999           previous_result_bitmask,
1000           current_result_bitmask,
1001           result_buffer);
1002       break;
1003     case QueryConditionOp::EQ:
1004       apply_clause_dense<T, QueryConditionOp::EQ>(
1005           clause,
1006           result_tile,
1007           start,
1008           length,
1009           src_cell,
1010           stride,
1011           var_size,
1012           nullable,
1013           previous_result_bitmask,
1014           current_result_bitmask,
1015           result_buffer);
1016       break;
1017     case QueryConditionOp::NE:
1018       apply_clause_dense<T, QueryConditionOp::NE>(
1019           clause,
1020           result_tile,
1021           start,
1022           length,
1023           src_cell,
1024           stride,
1025           var_size,
1026           nullable,
1027           previous_result_bitmask,
1028           current_result_bitmask,
1029           result_buffer);
1030       break;
1031     default:
1032       return Status::QueryConditionError(
1033           "Cannot perform query comparison; Unknown query "
1034           "condition operator");
1035   }
1036 
1037   return Status::Ok();
1038 }
1039 
apply_clause_dense(const QueryCondition::Clause & clause,const ArraySchema * const array_schema,ResultTile * result_tile,const uint64_t start,const uint64_t length,const uint64_t src_cell,const uint64_t stride,const uint8_t previous_result_bitmask,const uint8_t current_result_bitmask,uint8_t * result_buffer) const1040 Status QueryCondition::apply_clause_dense(
1041     const QueryCondition::Clause& clause,
1042     const ArraySchema* const array_schema,
1043     ResultTile* result_tile,
1044     const uint64_t start,
1045     const uint64_t length,
1046     const uint64_t src_cell,
1047     const uint64_t stride,
1048     const uint8_t previous_result_bitmask,
1049     const uint8_t current_result_bitmask,
1050     uint8_t* result_buffer) const {
1051   const Attribute* const attribute =
1052       array_schema->attribute(clause.field_name_);
1053   if (!attribute) {
1054     return Status::QueryConditionError(
1055         "Unknown attribute " + clause.field_name_);
1056   }
1057 
1058   const bool var_size = attribute->var_size();
1059   const bool nullable = attribute->nullable();
1060   switch (attribute->type()) {
1061     case Datatype::INT8:
1062       return apply_clause_dense<int8_t>(
1063           clause,
1064           result_tile,
1065           start,
1066           length,
1067           src_cell,
1068           stride,
1069           var_size,
1070           nullable,
1071           previous_result_bitmask,
1072           current_result_bitmask,
1073           result_buffer);
1074     case Datatype::UINT8:
1075       return apply_clause_dense<uint8_t>(
1076           clause,
1077           result_tile,
1078           start,
1079           length,
1080           src_cell,
1081           stride,
1082           var_size,
1083           nullable,
1084           previous_result_bitmask,
1085           current_result_bitmask,
1086           result_buffer);
1087     case Datatype::INT16:
1088       return apply_clause_dense<int16_t>(
1089           clause,
1090           result_tile,
1091           start,
1092           length,
1093           src_cell,
1094           stride,
1095           var_size,
1096           nullable,
1097           previous_result_bitmask,
1098           current_result_bitmask,
1099           result_buffer);
1100     case Datatype::UINT16:
1101       return apply_clause_dense<uint16_t>(
1102           clause,
1103           result_tile,
1104           start,
1105           length,
1106           src_cell,
1107           stride,
1108           var_size,
1109           nullable,
1110           previous_result_bitmask,
1111           current_result_bitmask,
1112           result_buffer);
1113     case Datatype::INT32:
1114       return apply_clause_dense<int32_t>(
1115           clause,
1116           result_tile,
1117           start,
1118           length,
1119           src_cell,
1120           stride,
1121           var_size,
1122           nullable,
1123           previous_result_bitmask,
1124           current_result_bitmask,
1125           result_buffer);
1126     case Datatype::UINT32:
1127       return apply_clause_dense<uint32_t>(
1128           clause,
1129           result_tile,
1130           start,
1131           length,
1132           src_cell,
1133           stride,
1134           var_size,
1135           nullable,
1136           previous_result_bitmask,
1137           current_result_bitmask,
1138           result_buffer);
1139     case Datatype::INT64:
1140       return apply_clause_dense<int64_t>(
1141           clause,
1142           result_tile,
1143           start,
1144           length,
1145           src_cell,
1146           stride,
1147           var_size,
1148           nullable,
1149           previous_result_bitmask,
1150           current_result_bitmask,
1151           result_buffer);
1152     case Datatype::UINT64:
1153       return apply_clause_dense<uint64_t>(
1154           clause,
1155           result_tile,
1156           start,
1157           length,
1158           src_cell,
1159           stride,
1160           var_size,
1161           nullable,
1162           previous_result_bitmask,
1163           current_result_bitmask,
1164           result_buffer);
1165     case Datatype::FLOAT32:
1166       return apply_clause_dense<float>(
1167           clause,
1168           result_tile,
1169           start,
1170           length,
1171           src_cell,
1172           stride,
1173           var_size,
1174           nullable,
1175           previous_result_bitmask,
1176           current_result_bitmask,
1177           result_buffer);
1178     case Datatype::FLOAT64:
1179       return apply_clause_dense<double>(
1180           clause,
1181           result_tile,
1182           start,
1183           length,
1184           src_cell,
1185           stride,
1186           var_size,
1187           nullable,
1188           previous_result_bitmask,
1189           current_result_bitmask,
1190           result_buffer);
1191     case Datatype::STRING_ASCII:
1192       return apply_clause_dense<char*>(
1193           clause,
1194           result_tile,
1195           start,
1196           length,
1197           src_cell,
1198           stride,
1199           var_size,
1200           nullable,
1201           previous_result_bitmask,
1202           current_result_bitmask,
1203           result_buffer);
1204     case Datatype::CHAR:
1205       return apply_clause_dense<char>(
1206           clause,
1207           result_tile,
1208           start,
1209           length,
1210           src_cell,
1211           stride,
1212           var_size,
1213           nullable,
1214           previous_result_bitmask,
1215           current_result_bitmask,
1216           result_buffer);
1217     case Datatype::DATETIME_YEAR:
1218     case Datatype::DATETIME_MONTH:
1219     case Datatype::DATETIME_WEEK:
1220     case Datatype::DATETIME_DAY:
1221     case Datatype::DATETIME_HR:
1222     case Datatype::DATETIME_MIN:
1223     case Datatype::DATETIME_SEC:
1224     case Datatype::DATETIME_MS:
1225     case Datatype::DATETIME_US:
1226     case Datatype::DATETIME_NS:
1227     case Datatype::DATETIME_PS:
1228     case Datatype::DATETIME_FS:
1229     case Datatype::DATETIME_AS:
1230       return apply_clause_dense<int64_t>(
1231           clause,
1232           result_tile,
1233           start,
1234           length,
1235           src_cell,
1236           stride,
1237           var_size,
1238           nullable,
1239           previous_result_bitmask,
1240           current_result_bitmask,
1241           result_buffer);
1242     case Datatype::ANY:
1243     case Datatype::STRING_UTF8:
1244     case Datatype::STRING_UTF16:
1245     case Datatype::STRING_UTF32:
1246     case Datatype::STRING_UCS2:
1247     case Datatype::STRING_UCS4:
1248     default:
1249       return Status::QueryConditionError(
1250           "Cannot perform query comparison; Unsupported query "
1251           "conditional type on " +
1252           clause.field_name_);
1253   }
1254 
1255   return Status::Ok();
1256 }
1257 
1258 template <typename T>
apply_dense(const ArraySchema * const array_schema,ResultTile * result_tile,const uint64_t start,const uint64_t length,const uint64_t src_cell,const uint64_t stride,uint8_t * result_buffer)1259 Status QueryCondition::apply_dense(
1260     const ArraySchema* const array_schema,
1261     ResultTile* result_tile,
1262     const uint64_t start,
1263     const uint64_t length,
1264     const uint64_t src_cell,
1265     const uint64_t stride,
1266     uint8_t* result_buffer) {
1267   uint8_t previous_result_bitmask = clauses_.size() % 2 == 1 ? 0x2 : 0x1;
1268   uint8_t current_result_bitmask = clauses_.size() % 2 == 1 ? 0x1 : 0x2;
1269 
1270   // Iterate through each clause.
1271   // This assumes all clauses are combined with a logical "AND".
1272   for (const auto& clause : clauses_) {
1273     RETURN_NOT_OK(apply_clause_dense(
1274         clause,
1275         array_schema,
1276         result_tile,
1277         start,
1278         length,
1279         src_cell,
1280         stride,
1281         previous_result_bitmask,
1282         current_result_bitmask,
1283         result_buffer));
1284 
1285     // Switch the result bitmasks for the next condition.
1286     std::swap(previous_result_bitmask, current_result_bitmask);
1287   }
1288 
1289   return Status::Ok();
1290 }
1291 
set_clauses(std::vector<Clause> && clauses)1292 void QueryCondition::set_clauses(std::vector<Clause>&& clauses) {
1293   clauses_ = std::move(clauses);
1294 }
1295 
set_combination_ops(std::vector<QueryConditionCombinationOp> && combination_ops)1296 void QueryCondition::set_combination_ops(
1297     std::vector<QueryConditionCombinationOp>&& combination_ops) {
1298   combination_ops_ = std::move(combination_ops);
1299 }
1300 
clauses() const1301 std::vector<QueryCondition::Clause> QueryCondition::clauses() const {
1302   return clauses_;
1303 }
1304 
combination_ops() const1305 std::vector<QueryConditionCombinationOp> QueryCondition::combination_ops()
1306     const {
1307   return combination_ops_;
1308 }
1309 
1310 // Explicit template instantiations.
1311 template Status QueryCondition::apply_dense<int8_t>(
1312     const ArraySchema* const,
1313     ResultTile*,
1314     const uint64_t,
1315     const uint64_t,
1316     const uint64_t,
1317     const uint64_t,
1318     uint8_t*);
1319 template Status QueryCondition::apply_dense<uint8_t>(
1320     const ArraySchema* const,
1321     ResultTile*,
1322     const uint64_t,
1323     const uint64_t,
1324     const uint64_t,
1325     const uint64_t,
1326     uint8_t*);
1327 template Status QueryCondition::apply_dense<int16_t>(
1328     const ArraySchema* const,
1329     ResultTile*,
1330     const uint64_t,
1331     const uint64_t,
1332     const uint64_t,
1333     const uint64_t,
1334     uint8_t*);
1335 template Status QueryCondition::apply_dense<uint16_t>(
1336     const ArraySchema* const,
1337     ResultTile*,
1338     const uint64_t,
1339     const uint64_t,
1340     const uint64_t,
1341     const uint64_t,
1342     uint8_t*);
1343 template Status QueryCondition::apply_dense<int32_t>(
1344     const ArraySchema* const,
1345     ResultTile*,
1346     const uint64_t,
1347     const uint64_t,
1348     const uint64_t,
1349     const uint64_t,
1350     uint8_t*);
1351 template Status QueryCondition::apply_dense<uint32_t>(
1352     const ArraySchema* const,
1353     ResultTile*,
1354     const uint64_t,
1355     const uint64_t,
1356     const uint64_t,
1357     const uint64_t,
1358     uint8_t*);
1359 template Status QueryCondition::apply_dense<int64_t>(
1360     const ArraySchema* const,
1361     ResultTile*,
1362     const uint64_t,
1363     const uint64_t,
1364     const uint64_t,
1365     const uint64_t,
1366     uint8_t*);
1367 template Status QueryCondition::apply_dense<uint64_t>(
1368     const ArraySchema* const,
1369     ResultTile*,
1370     const uint64_t,
1371     const uint64_t,
1372     const uint64_t,
1373     const uint64_t,
1374     uint8_t*);
1375 
1376 }  // namespace sm
1377 }  // namespace tiledb
1378