1 /**
2 * @file query_condition.cc
3 *
4 * @section LICENSE
5 *
6 * The MIT License
7 *
8 * @copyright Copyright (c) 2021 TileDB, Inc.
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 *
28 * @section DESCRIPTION
29 *
30 * Implements the QueryCondition class.
31 */
32
33 #include "tiledb/sm/query/query_condition.h"
34 #include "tiledb/common/logger.h"
35 #include "tiledb/sm/enums/datatype.h"
36 #include "tiledb/sm/enums/query_condition_combination_op.h"
37 #include "tiledb/sm/enums/query_condition_op.h"
38 #include "tiledb/sm/misc/utils.h"
39
40 #include <iostream>
41
42 #include <map>
43 #include <mutex>
44
45 using namespace tiledb::common;
46
47 namespace tiledb {
48 namespace sm {
49
QueryCondition()50 QueryCondition::QueryCondition() {
51 }
52
QueryCondition(const QueryCondition & rhs)53 QueryCondition::QueryCondition(const QueryCondition& rhs)
54 : clauses_(rhs.clauses_)
55 , combination_ops_(rhs.combination_ops_) {
56 }
57
QueryCondition(QueryCondition && rhs)58 QueryCondition::QueryCondition(QueryCondition&& rhs)
59 : clauses_(std::move(rhs.clauses_))
60 , combination_ops_(std::move(rhs.combination_ops_)) {
61 }
62
~QueryCondition()63 QueryCondition::~QueryCondition() {
64 }
65
operator =(const QueryCondition & rhs)66 QueryCondition& QueryCondition::operator=(const QueryCondition& rhs) {
67 clauses_ = rhs.clauses_;
68 combination_ops_ = rhs.combination_ops_;
69
70 return *this;
71 }
72
operator =(QueryCondition && rhs)73 QueryCondition& QueryCondition::operator=(QueryCondition&& rhs) {
74 clauses_ = std::move(rhs.clauses_);
75 combination_ops_ = std::move(rhs.combination_ops_);
76
77 return *this;
78 }
79
init(std::string && field_name,const void * const condition_value,const uint64_t condition_value_size,const QueryConditionOp op)80 Status QueryCondition::init(
81 std::string&& field_name,
82 const void* const condition_value,
83 const uint64_t condition_value_size,
84 const QueryConditionOp op) {
85 if (!clauses_.empty()) {
86 return Status::QueryConditionError("Cannot reinitialize query condition");
87 }
88
89 clauses_.emplace_back(
90 std::move(field_name), condition_value, condition_value_size, op);
91
92 return Status::Ok();
93 }
94
check(const ArraySchema * const array_schema) const95 Status QueryCondition::check(const ArraySchema* const array_schema) const {
96 for (const auto& clause : clauses_) {
97 const std::string field_name = clause.field_name_;
98 const uint64_t condition_value_size = clause.condition_value_data_.size();
99
100 const Attribute* const attribute = array_schema->attribute(field_name);
101 if (!attribute) {
102 return Status::QueryConditionError(
103 "Clause field name is not an attribute " + field_name);
104 }
105
106 if (clause.condition_value_ == nullptr) {
107 if (clause.op_ != QueryConditionOp::EQ &&
108 clause.op_ != QueryConditionOp::NE) {
109 return Status::QueryConditionError(
110 "Null value can only be used with equality operators");
111 }
112
113 if ((!attribute->nullable()) &&
114 attribute->type() != Datatype::STRING_ASCII) {
115 return Status::QueryConditionError(
116 "Null value can only be used with nullable attributes");
117 }
118 }
119
120 if (attribute->var_size() && attribute->type() != Datatype::STRING_ASCII &&
121 clause.condition_value_ != nullptr) {
122 return Status::QueryConditionError(
123 "Clause non-empty attribute may only be var-sized for ASCII "
124 "strings: " +
125 field_name);
126 }
127
128 if (attribute->cell_val_num() != 1 &&
129 attribute->type() != Datatype::STRING_ASCII &&
130 (!attribute->var_size())) {
131 return Status::QueryConditionError(
132 "Clause attribute must have one value per cell for non-string fixed "
133 "size "
134 "attributes: " +
135 field_name);
136 }
137
138 if (attribute->cell_size() != constants::var_size &&
139 attribute->cell_size() != condition_value_size &&
140 !(attribute->nullable() && clause.condition_value_ == nullptr) &&
141 attribute->type() != Datatype::STRING_ASCII &&
142 (!attribute->var_size())) {
143 return Status::QueryConditionError(
144 "Clause condition value size mismatch: " +
145 std::to_string(attribute->cell_size()) +
146 " != " + std::to_string(condition_value_size));
147 }
148
149 switch (attribute->type()) {
150 case Datatype::ANY:
151 return Status::QueryConditionError(
152 "Clause attribute type may not be of type 'ANY': " + field_name);
153 case Datatype::STRING_UTF8:
154 case Datatype::STRING_UTF16:
155 case Datatype::STRING_UTF32:
156 case Datatype::STRING_UCS2:
157 case Datatype::STRING_UCS4:
158 return Status::QueryConditionError(
159 "Clause attribute type may not be a UTF/UCS string: " + field_name);
160 default:
161 break;
162 }
163 }
164
165 return Status::Ok();
166 }
167
combine(const QueryCondition & rhs,const QueryConditionCombinationOp combination_op,QueryCondition * const combined_cond) const168 Status QueryCondition::combine(
169 const QueryCondition& rhs,
170 const QueryConditionCombinationOp combination_op,
171 QueryCondition* const combined_cond) const {
172 assert(combination_op == QueryConditionCombinationOp::AND);
173 if (combination_op != QueryConditionCombinationOp::AND) {
174 return Status::QueryConditionError(
175 "Cannot combine query conditions; Only the 'AND' "
176 "combination op is supported");
177 }
178
179 combined_cond->clauses_ = clauses_;
180 combined_cond->clauses_.insert(
181 combined_cond->clauses_.end(), rhs.clauses_.begin(), rhs.clauses_.end());
182
183 combined_cond->combination_ops_ = combination_ops_;
184 combined_cond->combination_ops_.emplace_back(combination_op);
185 combined_cond->combination_ops_.insert(
186 combined_cond->combination_ops_.end(),
187 rhs.combination_ops_.begin(),
188 rhs.combination_ops_.end());
189
190 combined_cond->field_names_.clear();
191
192 return Status::Ok();
193 }
194
empty() const195 bool QueryCondition::empty() const {
196 return clauses_.empty();
197 }
198
field_names() const199 std::unordered_set<std::string> QueryCondition::field_names() const {
200 if (field_names_.empty()) {
201 for (const auto& clause : clauses_) {
202 field_names_.insert(clause.field_name_);
203 }
204 }
205
206 return field_names_;
207 }
208
209 /** Full template specialization for `char*` and `QueryConditionOp::LT`. */
210 template <>
211 struct QueryCondition::BinaryCmp<char*, QueryConditionOp::LT> {
cmptiledb::sm::QueryCondition::BinaryCmp212 static inline bool cmp(
213 const void* lhs, uint64_t lhs_size, const void* rhs, uint64_t rhs_size) {
214 if (lhs == nullptr) {
215 return false;
216 }
217
218 const size_t min_size = std::min<size_t>(lhs_size, rhs_size);
219 const int cmp = strncmp(
220 static_cast<const char*>(lhs), static_cast<const char*>(rhs), min_size);
221 if (cmp != 0) {
222 return cmp < 0;
223 }
224
225 return lhs_size < rhs_size;
226 }
227 };
228
229 /** Partial template specialization for `char*` and `QueryConditionOp::LE. */
230 template <>
231 struct QueryCondition::BinaryCmp<char*, QueryConditionOp::LE> {
cmptiledb::sm::QueryCondition::BinaryCmp232 static inline bool cmp(
233 const void* lhs, uint64_t lhs_size, const void* rhs, uint64_t rhs_size) {
234 if (lhs == nullptr) {
235 return false;
236 }
237
238 const size_t min_size = std::min<size_t>(lhs_size, rhs_size);
239 const int cmp = strncmp(
240 static_cast<const char*>(lhs), static_cast<const char*>(rhs), min_size);
241 if (cmp != 0) {
242 return cmp < 0;
243 }
244
245 return lhs_size <= rhs_size;
246 }
247 };
248
249 /** Partial template specialization for `char*` and `QueryConditionOp::GT`. */
250 template <>
251 struct QueryCondition::BinaryCmp<char*, QueryConditionOp::GT> {
cmptiledb::sm::QueryCondition::BinaryCmp252 static inline bool cmp(
253 const void* lhs, uint64_t lhs_size, const void* rhs, uint64_t rhs_size) {
254 if (lhs == nullptr) {
255 return false;
256 }
257
258 const size_t min_size = std::min<size_t>(lhs_size, rhs_size);
259 const int cmp = strncmp(
260 static_cast<const char*>(lhs), static_cast<const char*>(rhs), min_size);
261 if (cmp != 0) {
262 return cmp > 0;
263 }
264
265 return lhs_size > rhs_size;
266 }
267 };
268
269 /** Partial template specialization for `char*` and `QueryConditionOp::GE`. */
270 template <>
271 struct QueryCondition::BinaryCmp<char*, QueryConditionOp::GE> {
cmptiledb::sm::QueryCondition::BinaryCmp272 static inline bool cmp(
273 const void* lhs, uint64_t lhs_size, const void* rhs, uint64_t rhs_size) {
274 if (lhs == nullptr) {
275 return false;
276 }
277
278 const size_t min_size = std::min<size_t>(lhs_size, rhs_size);
279 const int cmp = strncmp(
280 static_cast<const char*>(lhs), static_cast<const char*>(rhs), min_size);
281 if (cmp != 0) {
282 return cmp > 0;
283 }
284
285 return lhs_size >= rhs_size;
286 }
287 };
288
289 /** Partial template specialization for `char*` and `QueryConditionOp::EQ`. */
290 template <>
291 struct QueryCondition::BinaryCmp<char*, QueryConditionOp::EQ> {
cmptiledb::sm::QueryCondition::BinaryCmp292 static inline bool cmp(
293 const void* lhs, uint64_t lhs_size, const void* rhs, uint64_t rhs_size) {
294 if (lhs == rhs) {
295 return true;
296 }
297
298 if (lhs == nullptr || rhs == nullptr) {
299 return false;
300 }
301
302 if (lhs_size != rhs_size) {
303 return false;
304 }
305
306 return strncmp(
307 static_cast<const char*>(lhs),
308 static_cast<const char*>(rhs),
309 lhs_size) == 0;
310 }
311 };
312
313 /** Partial template specialization for `char*` and `QueryConditionOp::NE`. */
314 template <>
315 struct QueryCondition::BinaryCmp<char*, QueryConditionOp::NE> {
cmptiledb::sm::QueryCondition::BinaryCmp316 static inline bool cmp(
317 const void* lhs, uint64_t lhs_size, const void* rhs, uint64_t rhs_size) {
318 if (rhs == nullptr && lhs != nullptr) {
319 return true;
320 }
321
322 if (lhs == nullptr || rhs == nullptr) {
323 return false;
324 }
325
326 if (lhs_size != rhs_size) {
327 return true;
328 }
329
330 return strncmp(
331 static_cast<const char*>(lhs),
332 static_cast<const char*>(rhs),
333 lhs_size) != 0;
334 }
335 };
336
337 /** Partial template specialization for `QueryConditionOp::LT`. */
338 template <typename T>
339 struct QueryCondition::BinaryCmp<T, QueryConditionOp::LT> {
cmptiledb::sm::QueryCondition::BinaryCmp340 static inline bool cmp(const void* lhs, uint64_t, const void* rhs, uint64_t) {
341 return lhs != nullptr &&
342 *static_cast<const T*>(lhs) < *static_cast<const T*>(rhs);
343 }
344 };
345
346 /** Partial template specialization for `QueryConditionOp::LE`. */
347 template <typename T>
348 struct QueryCondition::BinaryCmp<T, QueryConditionOp::LE> {
cmptiledb::sm::QueryCondition::BinaryCmp349 static inline bool cmp(const void* lhs, uint64_t, const void* rhs, uint64_t) {
350 return lhs != nullptr &&
351 *static_cast<const T*>(lhs) <= *static_cast<const T*>(rhs);
352 }
353 };
354
355 /** Partial template specialization for `QueryConditionOp::GT`. */
356 template <typename T>
357 struct QueryCondition::BinaryCmp<T, QueryConditionOp::GT> {
cmptiledb::sm::QueryCondition::BinaryCmp358 static inline bool cmp(const void* lhs, uint64_t, const void* rhs, uint64_t) {
359 return lhs != nullptr &&
360 *static_cast<const T*>(lhs) > *static_cast<const T*>(rhs);
361 }
362 };
363
364 /** Partial template specialization for `QueryConditionOp::GE`. */
365 template <typename T>
366 struct QueryCondition::BinaryCmp<T, QueryConditionOp::GE> {
cmptiledb::sm::QueryCondition::BinaryCmp367 static inline bool cmp(const void* lhs, uint64_t, const void* rhs, uint64_t) {
368 return lhs != nullptr &&
369 *static_cast<const T*>(lhs) >= *static_cast<const T*>(rhs);
370 }
371 };
372
373 /** Partial template specialization for `QueryConditionOp::EQ`. */
374 template <typename T>
375 struct QueryCondition::BinaryCmp<T, QueryConditionOp::EQ> {
cmptiledb::sm::QueryCondition::BinaryCmp376 static inline bool cmp(const void* lhs, uint64_t, const void* rhs, uint64_t) {
377 if (lhs == rhs) {
378 return true;
379 }
380
381 if (lhs == nullptr || rhs == nullptr) {
382 return false;
383 }
384
385 return *static_cast<const T*>(lhs) == *static_cast<const T*>(rhs);
386 }
387 };
388
389 /** Partial template specialization for `QueryConditionOp::NE`. */
390 template <typename T>
391 struct QueryCondition::BinaryCmp<T, QueryConditionOp::NE> {
cmptiledb::sm::QueryCondition::BinaryCmp392 static inline bool cmp(const void* lhs, uint64_t, const void* rhs, uint64_t) {
393 if (rhs == nullptr && lhs != nullptr) {
394 return true;
395 }
396
397 if (lhs == nullptr || rhs == nullptr) {
398 return false;
399 }
400
401 return *static_cast<const T*>(lhs) != *static_cast<const T*>(rhs);
402 }
403 };
404
405 /** Used to create a new result slab in QueryCondition::apply_clause. */
create_new_result_slab(uint64_t start,uint64_t pending_start,uint64_t stride,uint64_t current,ResultTile * const result_tile,std::vector<ResultCellSlab> * const out_result_cell_slabs)406 uint64_t create_new_result_slab(
407 uint64_t start,
408 uint64_t pending_start,
409 uint64_t stride,
410 uint64_t current,
411 ResultTile* const result_tile,
412 std::vector<ResultCellSlab>* const out_result_cell_slabs) {
413 // Create a result cell slab if there are pending cells.
414 if (pending_start != start + current) {
415 const uint64_t rcs_start = start + ((pending_start - start) * stride);
416 const uint64_t rcs_length = current - (pending_start - start);
417 out_result_cell_slabs->emplace_back(result_tile, rcs_start, rcs_length);
418 }
419
420 // Return the new start of the pending result cell slab.
421 return start + current + 1;
422 }
423
424 template <typename T, QueryConditionOp Op>
apply_clause(const QueryCondition::Clause & clause,const uint64_t stride,const bool var_size,const bool nullable,const ByteVecValue & fill_value,const std::vector<ResultCellSlab> & result_cell_slabs,std::vector<ResultCellSlab> * const out_result_cell_slabs) const425 void QueryCondition::apply_clause(
426 const QueryCondition::Clause& clause,
427 const uint64_t stride,
428 const bool var_size,
429 const bool nullable,
430 const ByteVecValue& fill_value,
431 const std::vector<ResultCellSlab>& result_cell_slabs,
432 std::vector<ResultCellSlab>* const out_result_cell_slabs) const {
433 const std::string& field_name = clause.field_name_;
434
435 for (const auto& rcs : result_cell_slabs) {
436 ResultTile* const result_tile = rcs.tile_;
437 const uint64_t start = rcs.start_;
438 const uint64_t length = rcs.length_;
439
440 // Handle an empty range.
441 if (result_tile == nullptr && !nullable) {
442 const bool cmp = BinaryCmp<T, Op>::cmp(
443 fill_value.data(),
444 fill_value.size(),
445 clause.condition_value_,
446 clause.condition_value_data_.size());
447 if (cmp) {
448 out_result_cell_slabs->emplace_back(result_tile, start, length);
449 }
450 } else {
451 const auto tile_tuple = result_tile->tile_tuple(field_name);
452 uint8_t* buffer_validity = nullptr;
453
454 if (nullable) {
455 const auto& tile_validity = std::get<2>(*tile_tuple);
456 buffer_validity = static_cast<uint8_t*>(tile_validity.buffer()->data());
457 }
458
459 // Start the pending result cell slab at the start position
460 // of the current result cell slab.
461 uint64_t pending_start = start;
462 uint64_t c = 0;
463
464 if (var_size) {
465 const auto& tile = std::get<1>(*tile_tuple);
466 const char* buffer = static_cast<char*>(tile.buffer()->data());
467 const uint64_t buffer_size = tile.size();
468
469 const auto& tile_offsets = std::get<0>(*tile_tuple);
470 const uint64_t* buffer_offsets =
471 static_cast<uint64_t*>(tile_offsets.buffer()->data());
472 const uint64_t buffer_offsets_el =
473 tile_offsets.size() / constants::cell_var_offset_size;
474
475 // Iterate through each cell in this slab.
476 while (c < length) {
477 const uint64_t buffer_offset = buffer_offsets[start + c * stride];
478 const uint64_t next_cell_offset =
479 (start + c * stride + 1 < buffer_offsets_el) ?
480 buffer_offsets[start + c * stride + 1] :
481 buffer_size;
482 const uint64_t cell_size = next_cell_offset - buffer_offset;
483
484 const bool null_cell =
485 (nullable && buffer_validity[start + c * stride] == 0) ||
486 (cell_size == 0);
487
488 // Get the cell value.
489 const void* const cell_value =
490 null_cell ? nullptr : buffer + buffer_offset;
491
492 // Compare the cell value against the value in the clause.
493 const bool cmp = BinaryCmp<T, Op>::cmp(
494 cell_value,
495 cell_size,
496 clause.condition_value_,
497 clause.condition_value_data_.size());
498 if (!cmp) {
499 pending_start = create_new_result_slab(
500 start,
501 pending_start,
502 stride,
503 c,
504 result_tile,
505 out_result_cell_slabs);
506 }
507
508 ++c;
509 }
510 } else {
511 const auto& tile = std::get<0>(*tile_tuple);
512 const char* buffer = static_cast<char*>(tile.buffer()->data());
513 const uint64_t cell_size = tile.cell_size();
514 uint64_t buffer_offset = start * cell_size;
515 const uint64_t buffer_offset_inc = stride * cell_size;
516
517 // Iterate through each cell in this slab.
518 while (c < length) {
519 const bool null_cell =
520 nullable && buffer_validity[start + c * stride] == 0;
521
522 // Get the cell value.
523 const void* const cell_value =
524 null_cell ? nullptr : buffer + buffer_offset;
525 buffer_offset += buffer_offset_inc;
526
527 // Compare the cell value against the value in the clause.
528 const bool cmp = BinaryCmp<T, Op>::cmp(
529 cell_value,
530 cell_size,
531 clause.condition_value_,
532 clause.condition_value_data_.size());
533 if (!cmp) {
534 pending_start = create_new_result_slab(
535 start,
536 pending_start,
537 stride,
538 c,
539 result_tile,
540 out_result_cell_slabs);
541 }
542
543 ++c;
544 }
545 }
546
547 // Create the final result cell slab if there are pending cells.
548 create_new_result_slab(
549 start, pending_start, stride, c, result_tile, out_result_cell_slabs);
550 }
551 }
552 }
553
554 template <typename T>
apply_clause(const Clause & clause,const uint64_t stride,const bool var_size,const bool nullable,const ByteVecValue & fill_value,const std::vector<ResultCellSlab> & result_cell_slabs,std::vector<ResultCellSlab> * const out_result_cell_slabs) const555 Status QueryCondition::apply_clause(
556 const Clause& clause,
557 const uint64_t stride,
558 const bool var_size,
559 const bool nullable,
560 const ByteVecValue& fill_value,
561 const std::vector<ResultCellSlab>& result_cell_slabs,
562 std::vector<ResultCellSlab>* const out_result_cell_slabs) const {
563 switch (clause.op_) {
564 case QueryConditionOp::LT:
565 apply_clause<T, QueryConditionOp::LT>(
566 clause,
567 stride,
568 var_size,
569 nullable,
570 fill_value,
571 result_cell_slabs,
572 out_result_cell_slabs);
573 break;
574 case QueryConditionOp::LE:
575 apply_clause<T, QueryConditionOp::LE>(
576 clause,
577 stride,
578 var_size,
579 nullable,
580 fill_value,
581 result_cell_slabs,
582 out_result_cell_slabs);
583 break;
584 case QueryConditionOp::GT:
585 apply_clause<T, QueryConditionOp::GT>(
586 clause,
587 stride,
588 var_size,
589 nullable,
590 fill_value,
591 result_cell_slabs,
592 out_result_cell_slabs);
593 break;
594 case QueryConditionOp::GE:
595 apply_clause<T, QueryConditionOp::GE>(
596 clause,
597 stride,
598 var_size,
599 nullable,
600 fill_value,
601 result_cell_slabs,
602 out_result_cell_slabs);
603 break;
604 case QueryConditionOp::EQ:
605 apply_clause<T, QueryConditionOp::EQ>(
606 clause,
607 stride,
608 var_size,
609 nullable,
610 fill_value,
611 result_cell_slabs,
612 out_result_cell_slabs);
613 break;
614 case QueryConditionOp::NE:
615 apply_clause<T, QueryConditionOp::NE>(
616 clause,
617 stride,
618 var_size,
619 nullable,
620 fill_value,
621 result_cell_slabs,
622 out_result_cell_slabs);
623 break;
624 default:
625 return Status::QueryConditionError(
626 "Cannot perform query comparison; Unknown query "
627 "condition operator");
628 }
629
630 return Status::Ok();
631 }
632
apply_clause(const QueryCondition::Clause & clause,const ArraySchema * const array_schema,const uint64_t stride,const std::vector<ResultCellSlab> & result_cell_slabs,std::vector<ResultCellSlab> * const out_result_cell_slabs) const633 Status QueryCondition::apply_clause(
634 const QueryCondition::Clause& clause,
635 const ArraySchema* const array_schema,
636 const uint64_t stride,
637 const std::vector<ResultCellSlab>& result_cell_slabs,
638 std::vector<ResultCellSlab>* const out_result_cell_slabs) const {
639 const Attribute* const attribute =
640 array_schema->attribute(clause.field_name_);
641 if (!attribute) {
642 return Status::QueryConditionError(
643 "Unknown attribute " + clause.field_name_);
644 }
645
646 const ByteVecValue fill_value = attribute->fill_value();
647 const bool var_size = attribute->var_size();
648 const bool nullable = attribute->nullable();
649 switch (attribute->type()) {
650 case Datatype::INT8:
651 return apply_clause<int8_t>(
652 clause,
653 stride,
654 var_size,
655 nullable,
656 fill_value,
657 result_cell_slabs,
658 out_result_cell_slabs);
659 case Datatype::UINT8:
660 return apply_clause<uint8_t>(
661 clause,
662 stride,
663 var_size,
664 nullable,
665 fill_value,
666 result_cell_slabs,
667 out_result_cell_slabs);
668 case Datatype::INT16:
669 return apply_clause<int16_t>(
670 clause,
671 stride,
672 var_size,
673 nullable,
674 fill_value,
675 result_cell_slabs,
676 out_result_cell_slabs);
677 case Datatype::UINT16:
678 return apply_clause<uint16_t>(
679 clause,
680 stride,
681 var_size,
682 nullable,
683 fill_value,
684 result_cell_slabs,
685 out_result_cell_slabs);
686 case Datatype::INT32:
687 return apply_clause<int32_t>(
688 clause,
689 stride,
690 var_size,
691 nullable,
692 fill_value,
693 result_cell_slabs,
694 out_result_cell_slabs);
695 case Datatype::UINT32:
696 return apply_clause<uint32_t>(
697 clause,
698 stride,
699 var_size,
700 nullable,
701 fill_value,
702 result_cell_slabs,
703 out_result_cell_slabs);
704 case Datatype::INT64:
705 return apply_clause<int64_t>(
706 clause,
707 stride,
708 var_size,
709 nullable,
710 fill_value,
711 result_cell_slabs,
712 out_result_cell_slabs);
713 case Datatype::UINT64:
714 return apply_clause<uint64_t>(
715 clause,
716 stride,
717 var_size,
718 nullable,
719 fill_value,
720 result_cell_slabs,
721 out_result_cell_slabs);
722 case Datatype::FLOAT32:
723 return apply_clause<float>(
724 clause,
725 stride,
726 var_size,
727 nullable,
728 fill_value,
729 result_cell_slabs,
730 out_result_cell_slabs);
731 case Datatype::FLOAT64:
732 return apply_clause<double>(
733 clause,
734 stride,
735 var_size,
736 nullable,
737 fill_value,
738 result_cell_slabs,
739 out_result_cell_slabs);
740 case Datatype::STRING_ASCII:
741 return apply_clause<char*>(
742 clause,
743 stride,
744 var_size,
745 nullable,
746 fill_value,
747 result_cell_slabs,
748 out_result_cell_slabs);
749 case Datatype::CHAR:
750 return apply_clause<char>(
751 clause,
752 stride,
753 var_size,
754 nullable,
755 fill_value,
756 result_cell_slabs,
757 out_result_cell_slabs);
758 case Datatype::DATETIME_YEAR:
759 case Datatype::DATETIME_MONTH:
760 case Datatype::DATETIME_WEEK:
761 case Datatype::DATETIME_DAY:
762 case Datatype::DATETIME_HR:
763 case Datatype::DATETIME_MIN:
764 case Datatype::DATETIME_SEC:
765 case Datatype::DATETIME_MS:
766 case Datatype::DATETIME_US:
767 case Datatype::DATETIME_NS:
768 case Datatype::DATETIME_PS:
769 case Datatype::DATETIME_FS:
770 case Datatype::DATETIME_AS:
771 return apply_clause<int64_t>(
772 clause,
773 stride,
774 var_size,
775 nullable,
776 fill_value,
777 result_cell_slabs,
778 out_result_cell_slabs);
779 case Datatype::ANY:
780 case Datatype::STRING_UTF8:
781 case Datatype::STRING_UTF16:
782 case Datatype::STRING_UTF32:
783 case Datatype::STRING_UCS2:
784 case Datatype::STRING_UCS4:
785 default:
786 return Status::QueryConditionError(
787 "Cannot perform query comparison; Unsupported query "
788 "conditional type on " +
789 clause.field_name_);
790 }
791
792 return Status::Ok();
793 }
794
apply(const ArraySchema * const array_schema,std::vector<ResultCellSlab> * const result_cell_slabs,const uint64_t stride,uint64_t memory_budget) const795 Status QueryCondition::apply(
796 const ArraySchema* const array_schema,
797 std::vector<ResultCellSlab>* const result_cell_slabs,
798 const uint64_t stride,
799 uint64_t memory_budget) const {
800 if (clauses_.empty()) {
801 return Status::Ok();
802 }
803
804 // Iterate through each clause, mutating the result cell
805 // slabs to skip cells that do not fit into any of the
806 // clauses. This assumes all clauses are combined with a
807 // logical "AND".
808 for (const auto& clause : clauses_) {
809 std::vector<ResultCellSlab> tmp_result_cell_slabs;
810 RETURN_NOT_OK(apply_clause(
811 clause,
812 array_schema,
813 stride,
814 *result_cell_slabs,
815 &tmp_result_cell_slabs));
816 if (tmp_result_cell_slabs.size() > result_cell_slabs->size()) {
817 uint64_t memory_increase =
818 tmp_result_cell_slabs.size() - result_cell_slabs->size();
819 memory_increase *= sizeof(ResultCellSlab);
820 if (memory_increase > memory_budget) {
821 return Status::QueryConditionError(
822 "Exceeded result cell slab budget applying query condition");
823 }
824 }
825 *result_cell_slabs = tmp_result_cell_slabs;
826 }
827
828 return Status::Ok();
829 }
830
831 template <typename T, QueryConditionOp Op>
apply_clause_dense(const QueryCondition::Clause & clause,ResultTile * result_tile,const uint64_t start,const uint64_t length,const uint64_t src_cell,const uint64_t stride,const bool var_size,const bool nullable,const uint8_t previous_result_bitmask,const uint8_t current_result_bitmask,uint8_t * result_buffer) const832 void QueryCondition::apply_clause_dense(
833 const QueryCondition::Clause& clause,
834 ResultTile* result_tile,
835 const uint64_t start,
836 const uint64_t length,
837 const uint64_t src_cell,
838 const uint64_t stride,
839 const bool var_size,
840 const bool nullable,
841 const uint8_t previous_result_bitmask,
842 const uint8_t current_result_bitmask,
843 uint8_t* result_buffer) const {
844 const std::string& field_name = clause.field_name_;
845
846 // Get the nullable buffer.
847 const auto tile_tuple = result_tile->tile_tuple(field_name);
848 uint8_t* buffer_validity = nullptr;
849
850 if (nullable) {
851 const auto& tile_validity = std::get<2>(*tile_tuple);
852 buffer_validity =
853 static_cast<uint8_t*>(tile_validity.buffer()->data()) + src_cell;
854 }
855
856 if (var_size) {
857 // Get var data buffer and tile offsets buffer.
858 const auto& tile = std::get<1>(*tile_tuple);
859 const char* buffer = static_cast<char*>(tile.buffer()->data());
860 const uint64_t buffer_size = tile.size();
861
862 const auto& tile_offsets = std::get<0>(*tile_tuple);
863 const uint64_t* buffer_offsets =
864 static_cast<uint64_t*>(tile_offsets.buffer()->data()) + src_cell;
865 const uint64_t buffer_offsets_el =
866 tile_offsets.size() / constants::cell_var_offset_size;
867
868 // Iterate through each cell in this slab.
869 for (uint64_t c = 0; c < length; ++c) {
870 const uint64_t buffer_offset = buffer_offsets[start + c * stride];
871 const uint64_t next_cell_offset =
872 (start + c * stride + 1 < buffer_offsets_el) ?
873 buffer_offsets[start + c * stride + 1] :
874 buffer_size;
875 const uint64_t cell_size = next_cell_offset - buffer_offset;
876
877 const bool null_cell =
878 nullable && buffer_validity[start + c * stride] == 0;
879
880 // Get the cell value.
881 const void* const cell_value =
882 null_cell ? nullptr : buffer + buffer_offset;
883
884 // Compare the cell value against the value in the clause.
885 const bool cmp = BinaryCmp<T, Op>::cmp(
886 cell_value,
887 cell_size,
888 clause.condition_value_,
889 clause.condition_value_data_.size());
890
891 // Set the value.
892 if (cmp && (result_buffer[start + c] & previous_result_bitmask)) {
893 result_buffer[start + c] |= current_result_bitmask;
894 } else {
895 result_buffer[start + c] &= ~(current_result_bitmask);
896 }
897 }
898 } else {
899 // Get the fixed size data buffers.
900 const auto& tile = std::get<0>(*tile_tuple);
901 const char* buffer = static_cast<char*>(tile.buffer()->data());
902 const uint64_t cell_size = tile.cell_size();
903 uint64_t buffer_offset = (start + src_cell) * cell_size;
904 const uint64_t buffer_offset_inc = stride * cell_size;
905
906 // Iterate through each cell in this slab.
907 for (uint64_t c = 0; c < length; ++c) {
908 const bool null_cell =
909 nullable && buffer_validity[start + c * stride] == 0;
910
911 // Get the cell value.
912 const void* const cell_value =
913 null_cell ? nullptr : buffer + buffer_offset;
914 buffer_offset += buffer_offset_inc;
915
916 // Compare the cell value against the value in the clause.
917 const bool cmp = BinaryCmp<T, Op>::cmp(
918 cell_value,
919 cell_size,
920 clause.condition_value_,
921 clause.condition_value_data_.size());
922
923 // Set the value.
924 if (cmp && (result_buffer[start + c] & previous_result_bitmask)) {
925 result_buffer[start + c] |= current_result_bitmask;
926 } else {
927 result_buffer[start + c] &= ~(current_result_bitmask);
928 }
929 }
930 }
931 }
932
933 template <typename T>
apply_clause_dense(const Clause & clause,ResultTile * result_tile,const uint64_t start,const uint64_t length,const uint64_t src_cell,const uint64_t stride,const bool var_size,const bool nullable,const uint8_t previous_result_bitmask,const uint8_t current_result_bitmask,uint8_t * result_buffer) const934 Status QueryCondition::apply_clause_dense(
935 const Clause& clause,
936 ResultTile* result_tile,
937 const uint64_t start,
938 const uint64_t length,
939 const uint64_t src_cell,
940 const uint64_t stride,
941 const bool var_size,
942 const bool nullable,
943 const uint8_t previous_result_bitmask,
944 const uint8_t current_result_bitmask,
945 uint8_t* result_buffer) const {
946 switch (clause.op_) {
947 case QueryConditionOp::LT:
948 apply_clause_dense<T, QueryConditionOp::LT>(
949 clause,
950 result_tile,
951 start,
952 length,
953 src_cell,
954 stride,
955 var_size,
956 nullable,
957 previous_result_bitmask,
958 current_result_bitmask,
959 result_buffer);
960 break;
961 case QueryConditionOp::LE:
962 apply_clause_dense<T, QueryConditionOp::LE>(
963 clause,
964 result_tile,
965 start,
966 length,
967 src_cell,
968 stride,
969 var_size,
970 nullable,
971 previous_result_bitmask,
972 current_result_bitmask,
973 result_buffer);
974 break;
975 case QueryConditionOp::GT:
976 apply_clause_dense<T, QueryConditionOp::GT>(
977 clause,
978 result_tile,
979 start,
980 length,
981 src_cell,
982 stride,
983 var_size,
984 nullable,
985 previous_result_bitmask,
986 current_result_bitmask,
987 result_buffer);
988 break;
989 case QueryConditionOp::GE:
990 apply_clause_dense<T, QueryConditionOp::GE>(
991 clause,
992 result_tile,
993 start,
994 length,
995 src_cell,
996 stride,
997 var_size,
998 nullable,
999 previous_result_bitmask,
1000 current_result_bitmask,
1001 result_buffer);
1002 break;
1003 case QueryConditionOp::EQ:
1004 apply_clause_dense<T, QueryConditionOp::EQ>(
1005 clause,
1006 result_tile,
1007 start,
1008 length,
1009 src_cell,
1010 stride,
1011 var_size,
1012 nullable,
1013 previous_result_bitmask,
1014 current_result_bitmask,
1015 result_buffer);
1016 break;
1017 case QueryConditionOp::NE:
1018 apply_clause_dense<T, QueryConditionOp::NE>(
1019 clause,
1020 result_tile,
1021 start,
1022 length,
1023 src_cell,
1024 stride,
1025 var_size,
1026 nullable,
1027 previous_result_bitmask,
1028 current_result_bitmask,
1029 result_buffer);
1030 break;
1031 default:
1032 return Status::QueryConditionError(
1033 "Cannot perform query comparison; Unknown query "
1034 "condition operator");
1035 }
1036
1037 return Status::Ok();
1038 }
1039
apply_clause_dense(const QueryCondition::Clause & clause,const ArraySchema * const array_schema,ResultTile * result_tile,const uint64_t start,const uint64_t length,const uint64_t src_cell,const uint64_t stride,const uint8_t previous_result_bitmask,const uint8_t current_result_bitmask,uint8_t * result_buffer) const1040 Status QueryCondition::apply_clause_dense(
1041 const QueryCondition::Clause& clause,
1042 const ArraySchema* const array_schema,
1043 ResultTile* result_tile,
1044 const uint64_t start,
1045 const uint64_t length,
1046 const uint64_t src_cell,
1047 const uint64_t stride,
1048 const uint8_t previous_result_bitmask,
1049 const uint8_t current_result_bitmask,
1050 uint8_t* result_buffer) const {
1051 const Attribute* const attribute =
1052 array_schema->attribute(clause.field_name_);
1053 if (!attribute) {
1054 return Status::QueryConditionError(
1055 "Unknown attribute " + clause.field_name_);
1056 }
1057
1058 const bool var_size = attribute->var_size();
1059 const bool nullable = attribute->nullable();
1060 switch (attribute->type()) {
1061 case Datatype::INT8:
1062 return apply_clause_dense<int8_t>(
1063 clause,
1064 result_tile,
1065 start,
1066 length,
1067 src_cell,
1068 stride,
1069 var_size,
1070 nullable,
1071 previous_result_bitmask,
1072 current_result_bitmask,
1073 result_buffer);
1074 case Datatype::UINT8:
1075 return apply_clause_dense<uint8_t>(
1076 clause,
1077 result_tile,
1078 start,
1079 length,
1080 src_cell,
1081 stride,
1082 var_size,
1083 nullable,
1084 previous_result_bitmask,
1085 current_result_bitmask,
1086 result_buffer);
1087 case Datatype::INT16:
1088 return apply_clause_dense<int16_t>(
1089 clause,
1090 result_tile,
1091 start,
1092 length,
1093 src_cell,
1094 stride,
1095 var_size,
1096 nullable,
1097 previous_result_bitmask,
1098 current_result_bitmask,
1099 result_buffer);
1100 case Datatype::UINT16:
1101 return apply_clause_dense<uint16_t>(
1102 clause,
1103 result_tile,
1104 start,
1105 length,
1106 src_cell,
1107 stride,
1108 var_size,
1109 nullable,
1110 previous_result_bitmask,
1111 current_result_bitmask,
1112 result_buffer);
1113 case Datatype::INT32:
1114 return apply_clause_dense<int32_t>(
1115 clause,
1116 result_tile,
1117 start,
1118 length,
1119 src_cell,
1120 stride,
1121 var_size,
1122 nullable,
1123 previous_result_bitmask,
1124 current_result_bitmask,
1125 result_buffer);
1126 case Datatype::UINT32:
1127 return apply_clause_dense<uint32_t>(
1128 clause,
1129 result_tile,
1130 start,
1131 length,
1132 src_cell,
1133 stride,
1134 var_size,
1135 nullable,
1136 previous_result_bitmask,
1137 current_result_bitmask,
1138 result_buffer);
1139 case Datatype::INT64:
1140 return apply_clause_dense<int64_t>(
1141 clause,
1142 result_tile,
1143 start,
1144 length,
1145 src_cell,
1146 stride,
1147 var_size,
1148 nullable,
1149 previous_result_bitmask,
1150 current_result_bitmask,
1151 result_buffer);
1152 case Datatype::UINT64:
1153 return apply_clause_dense<uint64_t>(
1154 clause,
1155 result_tile,
1156 start,
1157 length,
1158 src_cell,
1159 stride,
1160 var_size,
1161 nullable,
1162 previous_result_bitmask,
1163 current_result_bitmask,
1164 result_buffer);
1165 case Datatype::FLOAT32:
1166 return apply_clause_dense<float>(
1167 clause,
1168 result_tile,
1169 start,
1170 length,
1171 src_cell,
1172 stride,
1173 var_size,
1174 nullable,
1175 previous_result_bitmask,
1176 current_result_bitmask,
1177 result_buffer);
1178 case Datatype::FLOAT64:
1179 return apply_clause_dense<double>(
1180 clause,
1181 result_tile,
1182 start,
1183 length,
1184 src_cell,
1185 stride,
1186 var_size,
1187 nullable,
1188 previous_result_bitmask,
1189 current_result_bitmask,
1190 result_buffer);
1191 case Datatype::STRING_ASCII:
1192 return apply_clause_dense<char*>(
1193 clause,
1194 result_tile,
1195 start,
1196 length,
1197 src_cell,
1198 stride,
1199 var_size,
1200 nullable,
1201 previous_result_bitmask,
1202 current_result_bitmask,
1203 result_buffer);
1204 case Datatype::CHAR:
1205 return apply_clause_dense<char>(
1206 clause,
1207 result_tile,
1208 start,
1209 length,
1210 src_cell,
1211 stride,
1212 var_size,
1213 nullable,
1214 previous_result_bitmask,
1215 current_result_bitmask,
1216 result_buffer);
1217 case Datatype::DATETIME_YEAR:
1218 case Datatype::DATETIME_MONTH:
1219 case Datatype::DATETIME_WEEK:
1220 case Datatype::DATETIME_DAY:
1221 case Datatype::DATETIME_HR:
1222 case Datatype::DATETIME_MIN:
1223 case Datatype::DATETIME_SEC:
1224 case Datatype::DATETIME_MS:
1225 case Datatype::DATETIME_US:
1226 case Datatype::DATETIME_NS:
1227 case Datatype::DATETIME_PS:
1228 case Datatype::DATETIME_FS:
1229 case Datatype::DATETIME_AS:
1230 return apply_clause_dense<int64_t>(
1231 clause,
1232 result_tile,
1233 start,
1234 length,
1235 src_cell,
1236 stride,
1237 var_size,
1238 nullable,
1239 previous_result_bitmask,
1240 current_result_bitmask,
1241 result_buffer);
1242 case Datatype::ANY:
1243 case Datatype::STRING_UTF8:
1244 case Datatype::STRING_UTF16:
1245 case Datatype::STRING_UTF32:
1246 case Datatype::STRING_UCS2:
1247 case Datatype::STRING_UCS4:
1248 default:
1249 return Status::QueryConditionError(
1250 "Cannot perform query comparison; Unsupported query "
1251 "conditional type on " +
1252 clause.field_name_);
1253 }
1254
1255 return Status::Ok();
1256 }
1257
1258 template <typename T>
apply_dense(const ArraySchema * const array_schema,ResultTile * result_tile,const uint64_t start,const uint64_t length,const uint64_t src_cell,const uint64_t stride,uint8_t * result_buffer)1259 Status QueryCondition::apply_dense(
1260 const ArraySchema* const array_schema,
1261 ResultTile* result_tile,
1262 const uint64_t start,
1263 const uint64_t length,
1264 const uint64_t src_cell,
1265 const uint64_t stride,
1266 uint8_t* result_buffer) {
1267 uint8_t previous_result_bitmask = clauses_.size() % 2 == 1 ? 0x2 : 0x1;
1268 uint8_t current_result_bitmask = clauses_.size() % 2 == 1 ? 0x1 : 0x2;
1269
1270 // Iterate through each clause.
1271 // This assumes all clauses are combined with a logical "AND".
1272 for (const auto& clause : clauses_) {
1273 RETURN_NOT_OK(apply_clause_dense(
1274 clause,
1275 array_schema,
1276 result_tile,
1277 start,
1278 length,
1279 src_cell,
1280 stride,
1281 previous_result_bitmask,
1282 current_result_bitmask,
1283 result_buffer));
1284
1285 // Switch the result bitmasks for the next condition.
1286 std::swap(previous_result_bitmask, current_result_bitmask);
1287 }
1288
1289 return Status::Ok();
1290 }
1291
set_clauses(std::vector<Clause> && clauses)1292 void QueryCondition::set_clauses(std::vector<Clause>&& clauses) {
1293 clauses_ = std::move(clauses);
1294 }
1295
set_combination_ops(std::vector<QueryConditionCombinationOp> && combination_ops)1296 void QueryCondition::set_combination_ops(
1297 std::vector<QueryConditionCombinationOp>&& combination_ops) {
1298 combination_ops_ = std::move(combination_ops);
1299 }
1300
clauses() const1301 std::vector<QueryCondition::Clause> QueryCondition::clauses() const {
1302 return clauses_;
1303 }
1304
combination_ops() const1305 std::vector<QueryConditionCombinationOp> QueryCondition::combination_ops()
1306 const {
1307 return combination_ops_;
1308 }
1309
1310 // Explicit template instantiations.
1311 template Status QueryCondition::apply_dense<int8_t>(
1312 const ArraySchema* const,
1313 ResultTile*,
1314 const uint64_t,
1315 const uint64_t,
1316 const uint64_t,
1317 const uint64_t,
1318 uint8_t*);
1319 template Status QueryCondition::apply_dense<uint8_t>(
1320 const ArraySchema* const,
1321 ResultTile*,
1322 const uint64_t,
1323 const uint64_t,
1324 const uint64_t,
1325 const uint64_t,
1326 uint8_t*);
1327 template Status QueryCondition::apply_dense<int16_t>(
1328 const ArraySchema* const,
1329 ResultTile*,
1330 const uint64_t,
1331 const uint64_t,
1332 const uint64_t,
1333 const uint64_t,
1334 uint8_t*);
1335 template Status QueryCondition::apply_dense<uint16_t>(
1336 const ArraySchema* const,
1337 ResultTile*,
1338 const uint64_t,
1339 const uint64_t,
1340 const uint64_t,
1341 const uint64_t,
1342 uint8_t*);
1343 template Status QueryCondition::apply_dense<int32_t>(
1344 const ArraySchema* const,
1345 ResultTile*,
1346 const uint64_t,
1347 const uint64_t,
1348 const uint64_t,
1349 const uint64_t,
1350 uint8_t*);
1351 template Status QueryCondition::apply_dense<uint32_t>(
1352 const ArraySchema* const,
1353 ResultTile*,
1354 const uint64_t,
1355 const uint64_t,
1356 const uint64_t,
1357 const uint64_t,
1358 uint8_t*);
1359 template Status QueryCondition::apply_dense<int64_t>(
1360 const ArraySchema* const,
1361 ResultTile*,
1362 const uint64_t,
1363 const uint64_t,
1364 const uint64_t,
1365 const uint64_t,
1366 uint8_t*);
1367 template Status QueryCondition::apply_dense<uint64_t>(
1368 const ArraySchema* const,
1369 ResultTile*,
1370 const uint64_t,
1371 const uint64_t,
1372 const uint64_t,
1373 const uint64_t,
1374 uint8_t*);
1375
1376 } // namespace sm
1377 } // namespace tiledb
1378