1 /**
2 * @file unit-cppapi-var-offsets.cc
3 *
4 * @section LICENSE
5 *
6 * The MIT License
7 *
8 * @copyright Copyright (c) 2020-2021 TileDB Inc.
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 *
28 * @section DESCRIPTION
29 *
30 * Tests the different configurations of var-sized attribute offsets using the
31 * C++ API.
32 */
33
34 #include "catch.hpp"
35 #include "tiledb/sm/cpp_api/tiledb"
36
37 using namespace tiledb;
38
create_sparse_array(const std::string & array_name)39 void create_sparse_array(const std::string& array_name) {
40 Context ctx;
41 VFS vfs(ctx);
42
43 // Create the array
44 if (vfs.is_dir(array_name))
45 vfs.remove_dir(array_name);
46
47 Domain dom(ctx);
48 dom.add_dimension(Dimension::create<int64_t>(ctx, "d1", {{1, 4}}, 2))
49 .add_dimension(Dimension::create<int64_t>(ctx, "d2", {{1, 4}}, 2));
50
51 ArraySchema schema(ctx, TILEDB_SPARSE);
52 Attribute attr(ctx, "attr", TILEDB_INT32);
53 attr.set_cell_val_num(TILEDB_VAR_NUM);
54 schema.add_attribute(attr);
55 schema.set_tile_order(TILEDB_ROW_MAJOR);
56 schema.set_cell_order(TILEDB_ROW_MAJOR);
57 schema.set_domain(dom);
58 schema.set_allows_dups(true);
59
60 Array::create(array_name, schema);
61 }
62
write_sparse_array(Context ctx,const std::string & array_name,std::vector<int32_t> & data,std::vector<uint64_t> & data_offsets,tiledb_layout_t layout)63 void write_sparse_array(
64 Context ctx,
65 const std::string& array_name,
66 std::vector<int32_t>& data,
67 std::vector<uint64_t>& data_offsets,
68 tiledb_layout_t layout) {
69 std::vector<int64_t> d1 = {1, 2, 3, 4};
70 std::vector<int64_t> d2 = {2, 1, 3, 4};
71
72 Array array(ctx, array_name, TILEDB_WRITE);
73 Query query(ctx, array, TILEDB_WRITE);
74 query.set_layout(layout);
75 query.set_data_buffer("d1", d1);
76 query.set_data_buffer("d2", d2);
77 query.set_data_buffer("attr", data);
78 query.set_offsets_buffer("attr", data_offsets);
79 CHECK_NOTHROW(query.submit());
80
81 // Finalize is necessary in global writes, otherwise a no-op
82 query.finalize();
83
84 array.close();
85 }
86
write_sparse_array(Context ctx,const std::string & array_name,std::vector<int32_t> & data,std::vector<uint32_t> & data_offsets,tiledb_layout_t layout)87 void write_sparse_array(
88 Context ctx,
89 const std::string& array_name,
90 std::vector<int32_t>& data,
91 std::vector<uint32_t>& data_offsets,
92 tiledb_layout_t layout) {
93 std::vector<int64_t> d1 = {1, 2, 3, 4};
94 std::vector<int64_t> d2 = {2, 1, 3, 4};
95
96 Array array(ctx, array_name, TILEDB_WRITE);
97 Query query(ctx, array, TILEDB_WRITE);
98 query.set_layout(layout);
99 query.set_data_buffer("d1", d1);
100 query.set_data_buffer("d2", d2);
101 query.set_data_buffer("attr", data.data(), data.size());
102 query.set_offsets_buffer(
103 "attr",
104 reinterpret_cast<uint64_t*>(data_offsets.data()),
105 data_offsets.size());
106 CHECK_NOTHROW(query.submit());
107
108 // Finalize is necessary in global writes, otherwise a no-op
109 query.finalize();
110
111 array.close();
112 }
113
read_and_check_sparse_array(Context ctx,const std::string & array_name,std::vector<int32_t> & expected_data,std::vector<uint64_t> & expected_offsets,tiledb_layout_t layout)114 void read_and_check_sparse_array(
115 Context ctx,
116 const std::string& array_name,
117 std::vector<int32_t>& expected_data,
118 std::vector<uint64_t>& expected_offsets,
119 tiledb_layout_t layout) {
120 Array array(ctx, array_name, TILEDB_READ);
121 Query query(ctx, array, TILEDB_READ);
122
123 std::vector<int32_t> attr_val(expected_data.size());
124 std::vector<uint64_t> attr_off(expected_offsets.size());
125
126 query.set_layout(layout);
127 query.set_data_buffer("attr", attr_val);
128 query.set_offsets_buffer("attr", attr_off);
129
130 CHECK_NOTHROW(query.submit());
131
132 // Check the element offsets are properly returned
133 CHECK(attr_val == expected_data);
134 CHECK(attr_off == expected_offsets);
135
136 array.close();
137 }
138
read_and_check_sparse_array(Context ctx,const std::string & array_name,std::vector<int32_t> & expected_data,std::vector<uint32_t> & expected_offsets,tiledb_layout_t layout)139 void read_and_check_sparse_array(
140 Context ctx,
141 const std::string& array_name,
142 std::vector<int32_t>& expected_data,
143 std::vector<uint32_t>& expected_offsets,
144 tiledb_layout_t layout) {
145 Array array(ctx, array_name, TILEDB_READ);
146 Query query(ctx, array, TILEDB_READ);
147
148 std::vector<int32_t> attr_val(expected_data.size());
149 std::vector<uint32_t> attr_off(expected_offsets.size());
150 query.set_layout(layout);
151 // Read using a 32-bit vector, but cast it to 64-bit pointer so that the API
152 // accepts it
153 query.set_data_buffer("attr", attr_val.data(), attr_val.size());
154 query.set_offsets_buffer(
155 "attr", reinterpret_cast<uint64_t*>(attr_off.data()), attr_off.size());
156 CHECK_NOTHROW(query.submit());
157
158 // Check the element offsets are properly returned
159 CHECK(attr_val == expected_data);
160 CHECK(attr_off == expected_offsets);
161
162 array.close();
163 }
164
reset_read_buffers(std::vector<int32_t> & data,std::vector<uint64_t> & offsets)165 void reset_read_buffers(
166 std::vector<int32_t>& data, std::vector<uint64_t>& offsets) {
167 data.assign(data.size(), 0);
168 offsets.assign(offsets.size(), 0);
169 }
170
partial_read_and_check_sparse_array(Context ctx,const std::string & array_name,std::vector<int32_t> & exp_data_part1,std::vector<uint64_t> & exp_off_part1,std::vector<int32_t> & exp_data_part2,std::vector<uint64_t> & exp_off_part2,tiledb_layout_t layout)171 void partial_read_and_check_sparse_array(
172 Context ctx,
173 const std::string& array_name,
174 std::vector<int32_t>& exp_data_part1,
175 std::vector<uint64_t>& exp_off_part1,
176 std::vector<int32_t>& exp_data_part2,
177 std::vector<uint64_t>& exp_off_part2,
178 tiledb_layout_t layout) {
179 // The size of read buffers is smaller than the size
180 // of all the data, so we'll do partial reads
181 std::vector<int32_t> attr_val(exp_data_part1.size());
182 std::vector<uint64_t> attr_off(exp_off_part1.size());
183
184 Array array(ctx, array_name, TILEDB_READ);
185 Query query(ctx, array, TILEDB_READ);
186 query.set_layout(layout);
187 query.set_data_buffer("attr", attr_val);
188 query.set_offsets_buffer("attr", attr_off);
189
190 // Check that first partial read returns expected results
191 CHECK_NOTHROW(query.submit());
192 Query::Status status = query.query_status();
193 CHECK(status == Query::Status::INCOMPLETE);
194 CHECK(attr_val == exp_data_part1);
195 CHECK(attr_off == exp_off_part1);
196
197 // Check that second partial read returns expected results
198 CHECK_NOTHROW(query.submit());
199 status = query.query_status();
200 CHECK(status == Query::Status::COMPLETE);
201 CHECK(attr_val == exp_data_part2);
202 CHECK(attr_off == exp_off_part2);
203
204 array.close();
205 }
206
create_dense_array(const std::string & array_name)207 void create_dense_array(const std::string& array_name) {
208 Context ctx;
209 VFS vfs(ctx);
210
211 // Create the array
212 if (vfs.is_dir(array_name))
213 vfs.remove_dir(array_name);
214
215 Domain dom(ctx);
216 dom.add_dimension(Dimension::create<int64_t>(ctx, "d1", {{1, 4}}, 2))
217 .add_dimension(Dimension::create<int64_t>(ctx, "d2", {{1, 4}}, 2));
218
219 ArraySchema schema(ctx, TILEDB_DENSE);
220 Attribute attr(ctx, "attr", TILEDB_INT32);
221 attr.set_cell_val_num(TILEDB_VAR_NUM);
222 schema.add_attribute(attr);
223 schema.set_tile_order(TILEDB_ROW_MAJOR);
224 schema.set_cell_order(TILEDB_ROW_MAJOR);
225 schema.set_domain(dom);
226
227 Array::create(array_name, schema);
228 }
229
write_dense_array(Context ctx,const std::string & array_name,std::vector<int32_t> & data,std::vector<uint64_t> & data_offsets,tiledb_layout_t layout,std::shared_ptr<Config> config=nullptr)230 void write_dense_array(
231 Context ctx,
232 const std::string& array_name,
233 std::vector<int32_t>& data,
234 std::vector<uint64_t>& data_offsets,
235 tiledb_layout_t layout,
236 std::shared_ptr<Config> config = nullptr) {
237 std::vector<int64_t> d1 = {1, 1, 2, 2};
238 std::vector<int64_t> d2 = {1, 2, 1, 2};
239
240 Array array(ctx, array_name, TILEDB_WRITE);
241 Query query(ctx, array, TILEDB_WRITE);
242
243 if (config != nullptr) {
244 query.set_config(*config);
245
246 // Validate we can retrieve set config
247 Config config2 = query.config();
248 bool same = *config == config2;
249 CHECK(same == true);
250 }
251
252 query.set_data_buffer("attr", data);
253 query.set_offsets_buffer("attr", data_offsets);
254 query.set_layout(layout);
255 if (layout == TILEDB_UNORDERED) {
256 // sparse write to dense array
257 query.set_data_buffer("d1", d1);
258 query.set_data_buffer("d2", d2);
259 } else {
260 query.set_subarray<int64_t>({1, 2, 1, 2});
261 }
262
263 CHECK_NOTHROW(query.submit());
264
265 // Finalize is necessary in global writes, otherwise a no-op
266 query.finalize();
267
268 array.close();
269 }
270
write_dense_array(Context ctx,const std::string & array_name,std::vector<int32_t> & data,std::vector<uint32_t> & data_offsets,tiledb_layout_t layout,std::shared_ptr<Config> config=nullptr)271 void write_dense_array(
272 Context ctx,
273 const std::string& array_name,
274 std::vector<int32_t>& data,
275 std::vector<uint32_t>& data_offsets,
276 tiledb_layout_t layout,
277 std::shared_ptr<Config> config = nullptr) {
278 std::vector<int64_t> d1 = {1, 1, 2, 2};
279 std::vector<int64_t> d2 = {1, 2, 1, 2};
280
281 Array array(ctx, array_name, TILEDB_WRITE);
282 Query query(ctx, array, TILEDB_WRITE);
283
284 if (config != nullptr) {
285 query.set_config(*config);
286
287 // Validate we can retrieve set config
288 Config config2 = query.config();
289 bool same = *config == config2;
290 CHECK(same == true);
291 }
292
293 // Write using a 32-bit vector, but cast it to 64-bit pointer so that the API
294 // accepts it
295 query.set_data_buffer("attr", data.data(), data.size());
296 query.set_offsets_buffer(
297 "attr",
298 reinterpret_cast<uint64_t*>(data_offsets.data()),
299 data_offsets.size());
300 query.set_layout(layout);
301 if (layout == TILEDB_UNORDERED) {
302 // sparse write to dense array
303 query.set_data_buffer("d1", d1);
304 query.set_data_buffer("d2", d2);
305 } else {
306 query.set_subarray<int64_t>({1, 2, 1, 2});
307 }
308
309 CHECK_NOTHROW(query.submit());
310 // Finalize is necessary in global writes, otherwise a no-op
311 query.finalize();
312
313 array.close();
314 }
315
read_and_check_dense_array(Context ctx,const std::string & array_name,std::vector<int32_t> & expected_data,std::vector<uint64_t> & expected_offsets,std::shared_ptr<Config> config=nullptr)316 void read_and_check_dense_array(
317 Context ctx,
318 const std::string& array_name,
319 std::vector<int32_t>& expected_data,
320 std::vector<uint64_t>& expected_offsets,
321 std::shared_ptr<Config> config = nullptr) {
322 Array array(ctx, array_name, TILEDB_READ);
323 Query query(ctx, array, TILEDB_READ);
324
325 if (config != nullptr) {
326 query.set_config(*config);
327
328 // Validate we can retrieve set config
329 Config config2 = query.config();
330 bool same = *config == config2;
331 CHECK(same == true);
332 }
333
334 std::vector<int32_t> attr_val(expected_data.size());
335 std::vector<uint64_t> attr_off(expected_offsets.size());
336 query.set_subarray<int64_t>({1, 2, 1, 2});
337 query.set_data_buffer("attr", attr_val);
338 query.set_offsets_buffer("attr", attr_off);
339 CHECK_NOTHROW(query.submit());
340
341 // Check the element offsets are properly returned
342 CHECK(attr_val == expected_data);
343 CHECK(attr_off == expected_offsets);
344
345 array.close();
346 }
347
read_and_check_dense_array(Context ctx,const std::string & array_name,std::vector<int32_t> & expected_data,std::vector<uint32_t> & expected_offsets,std::shared_ptr<Config> config=nullptr)348 void read_and_check_dense_array(
349 Context ctx,
350 const std::string& array_name,
351 std::vector<int32_t>& expected_data,
352 std::vector<uint32_t>& expected_offsets,
353 std::shared_ptr<Config> config = nullptr) {
354 Array array(ctx, array_name, TILEDB_READ);
355 Query query(ctx, array, TILEDB_READ);
356
357 if (config != nullptr) {
358 query.set_config(*config);
359
360 // Validate we can retrieve set config
361 Config config2 = query.config();
362 bool same = *config == config2;
363 CHECK(same == true);
364 }
365
366 std::vector<int32_t> attr_val(expected_data.size());
367 std::vector<uint32_t> attr_off(expected_offsets.size());
368 query.set_subarray<int64_t>({1, 2, 1, 2});
369 // Read using a 32-bit vector, but cast it to 64-bit pointer so that the API
370 // accepts it
371 query.set_data_buffer("attr", attr_val.data(), attr_val.size());
372 query.set_offsets_buffer(
373 "attr", reinterpret_cast<uint64_t*>(attr_off.data()), attr_off.size());
374 CHECK_NOTHROW(query.submit());
375
376 // Check the element offsets are properly returned
377 CHECK(attr_val == expected_data);
378 CHECK(attr_off == expected_offsets);
379
380 array.close();
381 }
382
partial_read_and_check_dense_array(Context ctx,const std::string & array_name,std::vector<int32_t> & exp_data_part1,std::vector<uint64_t> & exp_off_part1,std::vector<int32_t> & exp_data_part2,std::vector<uint64_t> & exp_off_part2)383 void partial_read_and_check_dense_array(
384 Context ctx,
385 const std::string& array_name,
386 std::vector<int32_t>& exp_data_part1,
387 std::vector<uint64_t>& exp_off_part1,
388 std::vector<int32_t>& exp_data_part2,
389 std::vector<uint64_t>& exp_off_part2) {
390 // The size of read buffers is smaller than the size
391 // of all the data, so we'll do partial reads
392 std::vector<int32_t> attr_val(exp_data_part1.size());
393 std::vector<uint64_t> attr_off(exp_off_part1.size());
394
395 Array array(ctx, array_name, TILEDB_READ);
396 Query query(ctx, array, TILEDB_READ);
397 query.set_subarray<int64_t>({1, 2, 1, 2});
398 query.set_data_buffer("attr", attr_val);
399 query.set_offsets_buffer("attr", attr_off);
400
401 // Check that first partial read returns expected results
402 CHECK_NOTHROW(query.submit());
403 CHECK(attr_val == exp_data_part1);
404 CHECK(attr_off == exp_off_part1);
405
406 // Check that second partial read returns expected results
407 CHECK_NOTHROW(query.submit());
408 CHECK(attr_val == exp_data_part2);
409 CHECK(attr_off == exp_off_part2);
410
411 array.close();
412 }
413
414 TEST_CASE(
415 "C++ API: Test element offsets : sparse array",
416 "[var-offsets][element-offset][sparse]") {
417 std::string array_name = "test_element_offset";
418 create_sparse_array(array_name);
419
420 std::vector<int32_t> data = {1, 2, 3, 4, 5, 6};
421 Context ctx;
422
423 SECTION("Byte offsets (default case)") {
424 Config config = ctx.config();
425 CHECK((std::string)config["sm.var_offsets.mode"] == "bytes");
426
427 std::vector<uint64_t> byte_offsets = {0, 4, 12, 20};
428
429 SECTION("Unordered write") {
430 write_sparse_array(ctx, array_name, data, byte_offsets, TILEDB_UNORDERED);
431 SECTION("Row major read") {
432 read_and_check_sparse_array(
433 ctx, array_name, data, byte_offsets, TILEDB_ROW_MAJOR);
434 }
435 SECTION("Global order read") {
436 read_and_check_sparse_array(
437 ctx, array_name, data, byte_offsets, TILEDB_GLOBAL_ORDER);
438 }
439 SECTION("Unordered read") {
440 read_and_check_sparse_array(
441 ctx, array_name, data, byte_offsets, TILEDB_UNORDERED);
442 }
443 }
444 SECTION("Global order write") {
445 write_sparse_array(
446 ctx, array_name, data, byte_offsets, TILEDB_GLOBAL_ORDER);
447 SECTION("Row major read") {
448 read_and_check_sparse_array(
449 ctx, array_name, data, byte_offsets, TILEDB_ROW_MAJOR);
450 }
451 SECTION("Global order read") {
452 read_and_check_sparse_array(
453 ctx, array_name, data, byte_offsets, TILEDB_GLOBAL_ORDER);
454 }
455 SECTION("Unordered read") {
456 read_and_check_sparse_array(
457 ctx, array_name, data, byte_offsets, TILEDB_UNORDERED);
458 }
459 }
460 }
461
462 SECTION("Element offsets") {
463 Config config;
464 // Change config of offsets format from bytes to elements
465 config["sm.var_offsets.mode"] = "elements";
466 Context ctx(config);
467
468 std::vector<uint64_t> element_offsets = {0, 1, 3, 5};
469
470 SECTION("Unordered write") {
471 write_sparse_array(
472 ctx, array_name, data, element_offsets, TILEDB_UNORDERED);
473 SECTION("Row major read") {
474 read_and_check_sparse_array(
475 ctx, array_name, data, element_offsets, TILEDB_ROW_MAJOR);
476 }
477 SECTION("Global order read") {
478 read_and_check_sparse_array(
479 ctx, array_name, data, element_offsets, TILEDB_GLOBAL_ORDER);
480 }
481 SECTION("Unordered read") {
482 read_and_check_sparse_array(
483 ctx, array_name, data, element_offsets, TILEDB_UNORDERED);
484 }
485 }
486 SECTION("Global order write") {
487 write_sparse_array(
488 ctx, array_name, data, element_offsets, TILEDB_GLOBAL_ORDER);
489 SECTION("Row major read") {
490 read_and_check_sparse_array(
491 ctx, array_name, data, element_offsets, TILEDB_ROW_MAJOR);
492 }
493 SECTION("Global order read") {
494 read_and_check_sparse_array(
495 ctx, array_name, data, element_offsets, TILEDB_GLOBAL_ORDER);
496 }
497 SECTION("Unordered read") {
498 read_and_check_sparse_array(
499 ctx, array_name, data, element_offsets, TILEDB_UNORDERED);
500 }
501 }
502 }
503
504 // Clean up
505 VFS vfs(ctx);
506 if (vfs.is_dir(array_name))
507 vfs.remove_dir(array_name);
508 }
509
510 TEST_CASE(
511 "C++ API: Test element offsets : dense array",
512 "[var-offsets][element-offset][dense]") {
513 std::string array_name = "test_element_offset";
514 create_dense_array(array_name);
515
516 std::vector<int32_t> data = {1, 2, 3, 4, 5, 6};
517 Context ctx;
518
519 SECTION("Byte offsets (default case)") {
520 Config config = ctx.config();
521 CHECK((std::string)config["sm.var_offsets.mode"] == "bytes");
522
523 std::vector<uint64_t> byte_offsets = {0, 4, 12, 20};
524
525 SECTION("Ordered write") {
526 write_dense_array(ctx, array_name, data, byte_offsets, TILEDB_ROW_MAJOR);
527 read_and_check_dense_array(ctx, array_name, data, byte_offsets);
528 }
529 SECTION("Global order write") {
530 write_dense_array(
531 ctx, array_name, data, byte_offsets, TILEDB_GLOBAL_ORDER);
532 read_and_check_dense_array(ctx, array_name, data, byte_offsets);
533 }
534 }
535
536 SECTION("Element offsets") {
537 Config config;
538 // Change config of offsets format from bytes to elements
539 config["sm.var_offsets.mode"] = "elements";
540 Context ctx(config);
541
542 std::vector<uint64_t> element_offsets = {0, 1, 3, 5};
543
544 SECTION("Ordered write") {
545 write_dense_array(
546 ctx, array_name, data, element_offsets, TILEDB_ROW_MAJOR);
547 read_and_check_dense_array(ctx, array_name, data, element_offsets);
548 }
549 SECTION("Global order write") {
550 write_dense_array(
551 ctx, array_name, data, element_offsets, TILEDB_GLOBAL_ORDER);
552 read_and_check_dense_array(ctx, array_name, data, element_offsets);
553 }
554 }
555
556 // Clean up
557 VFS vfs(ctx);
558 if (vfs.is_dir(array_name))
559 vfs.remove_dir(array_name);
560 }
561
562 TEST_CASE(
563 "C++ API: Test offsets extra element: sparse array",
564 "[var-offsets][extra-offset][sparse]") {
565 std::string array_name = "test_extra_offset";
566 create_sparse_array(array_name);
567
568 Context ctx;
569 std::vector<int32_t> data = {1, 2, 3, 4, 5, 6};
570 std::vector<uint64_t> data_offsets = {0, 4, 12, 20};
571 std::vector<uint64_t> element_offsets = {0, 1, 3, 5};
572
573 SECTION("Full read") {
574 Config config;
575
576 SECTION("No extra element (default case)") {
577 config = ctx.config();
578 CHECK((std::string)config["sm.var_offsets.extra_element"] == "false");
579
580 write_sparse_array(ctx, array_name, data, data_offsets, TILEDB_UNORDERED);
581 SECTION("Row major read") {
582 read_and_check_sparse_array(
583 ctx, array_name, data, data_offsets, TILEDB_ROW_MAJOR);
584 }
585 SECTION("Global order read") {
586 read_and_check_sparse_array(
587 ctx, array_name, data, data_offsets, TILEDB_GLOBAL_ORDER);
588 }
589 SECTION("Unordered read") {
590 read_and_check_sparse_array(
591 ctx, array_name, data, data_offsets, TILEDB_UNORDERED);
592 }
593 }
594
595 SECTION("Extra element") {
596 config["sm.var_offsets.extra_element"] = "true";
597
598 SECTION("Byte offsets (default config)") {
599 CHECK((std::string)config["sm.var_offsets.mode"] == "bytes");
600 Context ctx(config);
601
602 // Write data with extra element indicating total number of bytes
603 data_offsets.push_back(sizeof(data[0]) * data.size());
604
605 SECTION("Unordered write") {
606 write_sparse_array(
607 ctx, array_name, data, data_offsets, TILEDB_UNORDERED);
608 SECTION("Row major read") {
609 read_and_check_sparse_array(
610 ctx, array_name, data, data_offsets, TILEDB_ROW_MAJOR);
611 }
612 SECTION("Global order read") {
613 read_and_check_sparse_array(
614 ctx, array_name, data, data_offsets, TILEDB_GLOBAL_ORDER);
615 }
616 SECTION("UNORDERED read") {
617 read_and_check_sparse_array(
618 ctx, array_name, data, data_offsets, TILEDB_UNORDERED);
619 }
620 }
621 SECTION("Global order write") {
622 write_sparse_array(
623 ctx, array_name, data, data_offsets, TILEDB_GLOBAL_ORDER);
624 SECTION("Row major read") {
625 read_and_check_sparse_array(
626 ctx, array_name, data, data_offsets, TILEDB_ROW_MAJOR);
627 }
628 SECTION("Global order read") {
629 read_and_check_sparse_array(
630 ctx, array_name, data, data_offsets, TILEDB_GLOBAL_ORDER);
631 }
632 SECTION("Unordered read") {
633 read_and_check_sparse_array(
634 ctx, array_name, data, data_offsets, TILEDB_UNORDERED);
635 }
636 }
637 }
638
639 SECTION("Element offsets") {
640 config["sm.var_offsets.mode"] = "elements";
641 Context ctx(config);
642
643 // Write data with extra element indicating the total number of elements
644 element_offsets.push_back(data.size());
645
646 SECTION("Unordered write") {
647 write_sparse_array(
648 ctx, array_name, data, element_offsets, TILEDB_UNORDERED);
649 SECTION("Row major read") {
650 read_and_check_sparse_array(
651 ctx, array_name, data, element_offsets, TILEDB_ROW_MAJOR);
652 }
653 SECTION("Global order read") {
654 read_and_check_sparse_array(
655 ctx, array_name, data, element_offsets, TILEDB_GLOBAL_ORDER);
656 }
657 SECTION("Unordered read") {
658 read_and_check_sparse_array(
659 ctx, array_name, data, element_offsets, TILEDB_UNORDERED);
660 }
661 }
662 SECTION("Global order write") {
663 write_sparse_array(
664 ctx, array_name, data, element_offsets, TILEDB_GLOBAL_ORDER);
665 SECTION("Row major read") {
666 read_and_check_sparse_array(
667 ctx, array_name, data, element_offsets, TILEDB_ROW_MAJOR);
668 }
669 SECTION("Global order read") {
670 read_and_check_sparse_array(
671 ctx, array_name, data, element_offsets, TILEDB_GLOBAL_ORDER);
672 }
673 SECTION("Unordered read") {
674 read_and_check_sparse_array(
675 ctx, array_name, data, element_offsets, TILEDB_UNORDERED);
676 }
677 }
678 }
679
680 SECTION("User offsets buffer too small") {
681 Context ctx(config);
682
683 Array array_w(ctx, array_name, TILEDB_WRITE);
684 std::vector<int64_t> d1 = {1, 2, 3, 4};
685 std::vector<int64_t> d2 = {2, 1, 3, 4};
686 Query query_w(ctx, array_w, TILEDB_WRITE);
687 query_w.set_layout(TILEDB_UNORDERED)
688 .set_data_buffer("d1", d1)
689 .set_data_buffer("d2", d2);
690
691 // Try to write without allocating memory for the extra element
692 query_w.set_data_buffer("attr", data);
693 query_w.set_offsets_buffer("attr", data_offsets);
694 CHECK_THROWS(query_w.submit());
695
696 // Write data with extra element
697 data_offsets.push_back(sizeof(data[0]) * data.size());
698 query_w.set_data_buffer("attr", data);
699 query_w.set_offsets_buffer("attr", data_offsets);
700 CHECK_NOTHROW(query_w.submit());
701 array_w.close();
702
703 // Submit read query
704 Array array_r(ctx, array_name, TILEDB_READ);
705 Query query_r(ctx, array_r, TILEDB_READ);
706
707 // Assume no size for the extra element
708 std::vector<int32_t> attr_val(data.size());
709 std::vector<uint64_t> attr_off(data_offsets.size() - 1);
710 query_r.set_data_buffer("attr", attr_val);
711 query_r.set_offsets_buffer("attr", attr_off);
712
713 // First partial read because offsets don't fit
714 CHECK_NOTHROW(query_r.submit());
715 CHECK(query_r.query_status() == Query::Status::INCOMPLETE);
716 // check returned data
717 auto data_num = query_r.result_buffer_elements()["attr"].second;
718 CHECK(data_num == 3);
719 std::vector<int32_t> data_exp1 = {1, 2, 3, 0, 0, 0};
720 CHECK(attr_val == data_exp1);
721 // check returned offsets
722 auto offset_num = query_r.result_buffer_elements()["attr"].first;
723 CHECK(offset_num == 3);
724 std::vector<uint64_t> data_off_exp1 = {0, 4, 12, 0};
725 CHECK(attr_off == data_off_exp1);
726
727 // check returned data with nullable API
728 auto result_els = query_r.result_buffer_elements_nullable()["attr"];
729 CHECK(std::get<0>(result_els) == 3);
730 CHECK(std::get<1>(result_els) == 3);
731 CHECK(std::get<2>(result_els) == 0);
732
733 // Second partial read
734 reset_read_buffers(attr_val, attr_off);
735 CHECK_NOTHROW(query_r.submit());
736 CHECK(query_r.query_status() == Query::Status::COMPLETE);
737 // check returned data
738 data_num = query_r.result_buffer_elements()["attr"].second;
739 CHECK(data_num == 3);
740 std::vector<int32_t> data_exp2 = {4, 5, 6, 0, 0, 0};
741 CHECK(attr_val == data_exp2);
742 // check returned offsets
743 offset_num = query_r.result_buffer_elements()["attr"].first;
744 CHECK(offset_num == 3);
745 std::vector<uint64_t> data_off_exp2 = {0, 8, 12, 0};
746 CHECK(attr_off == data_off_exp2);
747
748 array_r.close();
749 }
750 }
751 }
752
753 SECTION("Partial read") {
754 Config config;
755
756 // The expected buffers to be returned after 2 partial reads with
757 // read buffers of size data.size() / 2
758 std::vector<int32_t> data_part1 = {1, 2, 3};
759 std::vector<uint64_t> data_off_part1 = {0, 4};
760 std::vector<uint64_t> data_elem_off_part1 = {0, 1};
761 std::vector<int32_t> data_part2 = {4, 5, 6};
762 std::vector<uint64_t> data_off_part2 = {0, 8};
763 std::vector<uint64_t> data_elem_off_part2 = {0, 2};
764
765 SECTION("No extra element (default case)") {
766 config = ctx.config();
767 CHECK((std::string)config["sm.var_offsets.extra_element"] == "false");
768
769 write_sparse_array(ctx, array_name, data, data_offsets, TILEDB_UNORDERED);
770 SECTION("Row major read") {
771 partial_read_and_check_sparse_array(
772 ctx,
773 array_name,
774 data_part1,
775 data_off_part1,
776 data_part2,
777 data_off_part2,
778 TILEDB_ROW_MAJOR);
779 }
780 SECTION("Global order read") {
781 partial_read_and_check_sparse_array(
782 ctx,
783 array_name,
784 data_part1,
785 data_off_part1,
786 data_part2,
787 data_off_part2,
788 TILEDB_GLOBAL_ORDER);
789 }
790 SECTION("Unordered read") {
791 partial_read_and_check_sparse_array(
792 ctx,
793 array_name,
794 data_part1,
795 data_off_part1,
796 data_part2,
797 data_off_part2,
798 TILEDB_UNORDERED);
799 }
800 }
801
802 SECTION("Extra element") {
803 config["sm.var_offsets.extra_element"] = "true";
804
805 SECTION("Byte offsets (default config)") {
806 CHECK((std::string)config["sm.var_offsets.mode"] == "bytes");
807 Context ctx(config);
808
809 // Write data with extra element indicating total number of bytes
810 data_offsets.push_back(sizeof(data[0]) * data.size());
811
812 // Expect an extra element offset on each read
813 data_off_part1.push_back(sizeof(data_part1[0]) * data_part1.size());
814 data_off_part2.push_back(sizeof(data_part2[0]) * data_part2.size());
815
816 SECTION("Unordered write") {
817 write_sparse_array(
818 ctx, array_name, data, data_offsets, TILEDB_UNORDERED);
819 SECTION("Row major read") {
820 partial_read_and_check_sparse_array(
821 ctx,
822 array_name,
823 data_part1,
824 data_off_part1,
825 data_part2,
826 data_off_part2,
827 TILEDB_ROW_MAJOR);
828 }
829 SECTION("Global order read") {
830 partial_read_and_check_sparse_array(
831 ctx,
832 array_name,
833 data_part1,
834 data_off_part1,
835 data_part2,
836 data_off_part2,
837 TILEDB_GLOBAL_ORDER);
838 }
839 SECTION("Unordered read") {
840 partial_read_and_check_sparse_array(
841 ctx,
842 array_name,
843 data_part1,
844 data_off_part1,
845 data_part2,
846 data_off_part2,
847 TILEDB_UNORDERED);
848 }
849 }
850 SECTION("Global order write") {
851 write_sparse_array(
852 ctx, array_name, data, data_offsets, TILEDB_GLOBAL_ORDER);
853 SECTION("Row major read") {
854 partial_read_and_check_sparse_array(
855 ctx,
856 array_name,
857 data_part1,
858 data_off_part1,
859 data_part2,
860 data_off_part2,
861 TILEDB_ROW_MAJOR);
862 }
863 SECTION("Global order read") {
864 partial_read_and_check_sparse_array(
865 ctx,
866 array_name,
867 data_part1,
868 data_off_part1,
869 data_part2,
870 data_off_part2,
871 TILEDB_GLOBAL_ORDER);
872 }
873 SECTION("Unordered read") {
874 partial_read_and_check_sparse_array(
875 ctx,
876 array_name,
877 data_part1,
878 data_off_part1,
879 data_part2,
880 data_off_part2,
881 TILEDB_UNORDERED);
882 }
883 }
884 }
885
886 SECTION("Element offsets") {
887 config["sm.var_offsets.mode"] = "elements";
888 Context ctx(config);
889
890 // Write data with extra element indicating total number of elements
891 element_offsets.push_back(data.size());
892
893 // Expect an extra element offset on each read
894 data_elem_off_part1.push_back(data_part1.size());
895 data_elem_off_part2.push_back(data_part2.size());
896
897 SECTION("Unordered write") {
898 write_sparse_array(
899 ctx, array_name, data, element_offsets, TILEDB_UNORDERED);
900 SECTION("Row major read") {
901 partial_read_and_check_sparse_array(
902 ctx,
903 array_name,
904 data_part1,
905 data_elem_off_part1,
906 data_part2,
907 data_elem_off_part2,
908 TILEDB_ROW_MAJOR);
909 }
910 SECTION("Global order read") {
911 partial_read_and_check_sparse_array(
912 ctx,
913 array_name,
914 data_part1,
915 data_elem_off_part1,
916 data_part2,
917 data_elem_off_part2,
918 TILEDB_GLOBAL_ORDER);
919 }
920 SECTION("Unordered read") {
921 partial_read_and_check_sparse_array(
922 ctx,
923 array_name,
924 data_part1,
925 data_elem_off_part1,
926 data_part2,
927 data_elem_off_part2,
928 TILEDB_UNORDERED);
929 }
930 }
931 SECTION("Global order write") {
932 write_sparse_array(
933 ctx, array_name, data, element_offsets, TILEDB_GLOBAL_ORDER);
934 SECTION("Row major read") {
935 partial_read_and_check_sparse_array(
936 ctx,
937 array_name,
938 data_part1,
939 data_elem_off_part1,
940 data_part2,
941 data_elem_off_part2,
942 TILEDB_ROW_MAJOR);
943 }
944 SECTION("Global order read") {
945 partial_read_and_check_sparse_array(
946 ctx,
947 array_name,
948 data_part1,
949 data_elem_off_part1,
950 data_part2,
951 data_elem_off_part2,
952 TILEDB_GLOBAL_ORDER);
953 }
954 SECTION("Unordered read") {
955 partial_read_and_check_sparse_array(
956 ctx,
957 array_name,
958 data_part1,
959 data_elem_off_part1,
960 data_part2,
961 data_elem_off_part2,
962 TILEDB_UNORDERED);
963 }
964 }
965 }
966
967 SECTION("User offsets buffer too small") {
968 // Write data with extra element
969 data_offsets.push_back(sizeof(data[0]) * data.size());
970 write_sparse_array(
971 ctx, array_name, data, data_offsets, TILEDB_UNORDERED);
972
973 // Submit read query
974 Context ctx(config);
975 Array array(ctx, array_name, TILEDB_READ);
976 Query query(ctx, array, TILEDB_READ);
977
978 // Assume no size for the extra element
979 std::vector<int32_t> attr_val(data_part1.size());
980 std::vector<uint64_t> attr_off(data_off_part1.size());
981 query.set_data_buffer("attr", attr_val);
982 query.set_offsets_buffer("attr", attr_off);
983
984 // First partial read
985 CHECK_NOTHROW(query.submit());
986 CHECK(query.query_status() == Query::Status::INCOMPLETE);
987 std::vector<int32_t> data_exp1 = {1, 0, 0};
988 std::vector<uint64_t> data_off_exp1 = {0, 4};
989 // check returned data
990 auto data_num = query.result_buffer_elements()["attr"].second;
991 CHECK(data_num == 1);
992 CHECK(attr_val == data_exp1);
993 // check returned offsets
994 auto offset_num = query.result_buffer_elements()["attr"].first;
995 CHECK(offset_num == 2);
996 CHECK(attr_off == data_off_exp1);
997
998 // Second partial read
999 reset_read_buffers(attr_val, attr_off);
1000 CHECK_NOTHROW(query.submit());
1001 CHECK(query.query_status() == Query::Status::INCOMPLETE);
1002 std::vector<int32_t> data_exp2 = {2, 3, 0};
1003 std::vector<uint64_t> data_off_exp2 = {0, 8};
1004 // check returned data
1005 data_num = query.result_buffer_elements()["attr"].second;
1006 CHECK(data_num == 2);
1007 CHECK(attr_val == data_exp2);
1008 // check returned offsets
1009 offset_num = query.result_buffer_elements()["attr"].first;
1010 CHECK(offset_num == 2);
1011 CHECK(attr_off == data_off_exp2);
1012
1013 // Third partial read
1014 reset_read_buffers(attr_val, attr_off);
1015 CHECK_NOTHROW(query.submit());
1016 CHECK(query.query_status() == Query::Status::INCOMPLETE);
1017 std::vector<int32_t> data_exp3 = {4, 5, 0};
1018 std::vector<uint64_t> data_off_exp3 = {0, 8};
1019 // check returned data
1020 data_num = query.result_buffer_elements()["attr"].second;
1021 CHECK(data_num == 2);
1022 CHECK(attr_val == data_exp3);
1023 // check returned offsets
1024 offset_num = query.result_buffer_elements()["attr"].first;
1025 CHECK(offset_num == 2);
1026 CHECK(attr_off == data_off_exp3);
1027
1028 // Last partial read
1029 reset_read_buffers(attr_val, attr_off);
1030 CHECK_NOTHROW(query.submit());
1031 CHECK(query.query_status() == Query::Status::COMPLETE);
1032 std::vector<int32_t> data_exp4 = {6, 0, 0};
1033 std::vector<uint64_t> data_off_exp4 = {0, 4};
1034 // check returned data
1035 data_num = query.result_buffer_elements()["attr"].second;
1036 CHECK(data_num == 1);
1037 CHECK(attr_val == data_exp4);
1038 // check returned offsets
1039 offset_num = query.result_buffer_elements()["attr"].first;
1040 CHECK(offset_num == 2);
1041 CHECK(attr_off == data_off_exp4);
1042
1043 array.close();
1044 }
1045 }
1046 }
1047
1048 // Clean up
1049 VFS vfs(ctx);
1050 if (vfs.is_dir(array_name))
1051 vfs.remove_dir(array_name);
1052 }
1053
1054 TEST_CASE(
1055 "C++ API: Test offsets extra element: dense array",
1056 "[var-offsets][extra-offset][dense]") {
1057 std::string array_name = "test_extra_offset";
1058 create_dense_array(array_name);
1059
1060 Context ctx;
1061 std::vector<int32_t> data = {1, 2, 3, 4, 5, 6};
1062 std::vector<uint64_t> data_offsets = {0, 4, 12, 20};
1063 std::vector<uint64_t> element_offsets = {0, 1, 3, 5};
1064
1065 SECTION("Full read") {
1066 Config config;
1067
1068 SECTION("No extra element (default case)") {
1069 config = ctx.config();
1070 CHECK((std::string)config["sm.var_offsets.extra_element"] == "false");
1071
1072 write_dense_array(ctx, array_name, data, data_offsets, TILEDB_ROW_MAJOR);
1073 read_and_check_dense_array(ctx, array_name, data, data_offsets);
1074 }
1075
1076 SECTION("Extra element") {
1077 config["sm.var_offsets.extra_element"] = "true";
1078
1079 SECTION("Byte offsets (default config)") {
1080 CHECK((std::string)config["sm.var_offsets.mode"] == "bytes");
1081 Context ctx(config);
1082
1083 // Write data with extra element indicating total number of bytes
1084 data_offsets.push_back(sizeof(data[0]) * data.size());
1085
1086 SECTION("Ordered write") {
1087 write_dense_array(
1088 ctx, array_name, data, data_offsets, TILEDB_ROW_MAJOR);
1089 read_and_check_dense_array(ctx, array_name, data, data_offsets);
1090 }
1091 SECTION("Global order write") {
1092 write_dense_array(
1093 ctx, array_name, data, data_offsets, TILEDB_GLOBAL_ORDER);
1094 read_and_check_dense_array(ctx, array_name, data, data_offsets);
1095 }
1096 }
1097
1098 SECTION("Element offsets") {
1099 config["sm.var_offsets.mode"] = "elements";
1100 Context ctx(config);
1101
1102 // Write data with extra element indicating the total number of elements
1103 element_offsets.push_back(data.size());
1104
1105 SECTION("Ordered write") {
1106 write_dense_array(
1107 ctx, array_name, data, element_offsets, TILEDB_ROW_MAJOR);
1108 read_and_check_dense_array(ctx, array_name, data, element_offsets);
1109 }
1110 SECTION("Global order write") {
1111 write_dense_array(
1112 ctx, array_name, data, element_offsets, TILEDB_GLOBAL_ORDER);
1113 read_and_check_dense_array(ctx, array_name, data, element_offsets);
1114 }
1115 }
1116
1117 SECTION("User offsets buffer too small") {
1118 // Use element offsets to cover this code path as well
1119 config["sm.var_offsets.mode"] = "elements";
1120 Context ctx(config);
1121
1122 Array array_w(ctx, array_name, TILEDB_WRITE);
1123 Query query_w(ctx, array_w, TILEDB_WRITE);
1124 query_w.set_layout(TILEDB_ROW_MAJOR)
1125 .set_subarray<int64_t>({1, 2, 1, 2});
1126
1127 // Try to write without allocating memory for the extra element
1128 query_w.set_data_buffer("attr", data);
1129 query_w.set_offsets_buffer("attr", element_offsets);
1130 CHECK_THROWS(query_w.submit());
1131
1132 // Write data with extra element
1133 element_offsets.push_back(data.size());
1134 query_w.set_data_buffer("attr", data);
1135 query_w.set_offsets_buffer("attr", element_offsets);
1136 CHECK_NOTHROW(query_w.submit());
1137 array_w.close();
1138
1139 // Submit read query
1140 Array array_r(ctx, array_name, TILEDB_READ);
1141 Query query_r(ctx, array_r, TILEDB_READ);
1142
1143 // Assume no size for the extra element
1144 std::vector<int32_t> attr_val(data.size());
1145 std::vector<uint64_t> attr_off(element_offsets.size() - 1);
1146 query_r.set_data_buffer("attr", attr_val);
1147 query_r.set_offsets_buffer("attr", attr_off);
1148 query_r.set_subarray<int64_t>({1, 2, 1, 2});
1149
1150 // First partial read because offsets don't fit
1151 CHECK_NOTHROW(query_r.submit());
1152 CHECK(query_r.query_status() == Query::Status::INCOMPLETE);
1153 std::vector<int32_t> data_exp1 = {1, 2, 3, 0, 0, 0};
1154 std::vector<uint64_t> data_off_exp1 = {0, 1, 3, 0};
1155 // check returned data
1156 auto data_num = query_r.result_buffer_elements()["attr"].second;
1157 CHECK(data_num == 3);
1158 CHECK(attr_val == data_exp1);
1159 // check returned offsets
1160 auto offset_num = query_r.result_buffer_elements()["attr"].first;
1161 CHECK(offset_num == 3);
1162 CHECK(attr_off == data_off_exp1);
1163
1164 // Second partial read
1165 reset_read_buffers(attr_val, attr_off);
1166 CHECK_NOTHROW(query_r.submit());
1167 CHECK(query_r.query_status() == Query::Status::COMPLETE);
1168 std::vector<int32_t> data_exp2 = {4, 5, 6, 0, 0, 0};
1169 std::vector<uint64_t> data_off_exp2 = {0, 2, 3, 0};
1170 // check returned data
1171 data_num = query_r.result_buffer_elements()["attr"].second;
1172 CHECK(data_num == 3);
1173 CHECK(attr_val == data_exp2);
1174 // check returned offsets
1175 offset_num = query_r.result_buffer_elements()["attr"].first;
1176 CHECK(offset_num == 3);
1177 CHECK(attr_off == data_off_exp2);
1178
1179 array_r.close();
1180 }
1181 }
1182 }
1183
1184 SECTION("Partial read") {
1185 Config config;
1186
1187 // The expected buffers to be returned after 2 partial reads with
1188 // read buffers of size data.size() / 2
1189 std::vector<int32_t> data_part1 = {1, 2, 3};
1190 std::vector<uint64_t> data_off_part1 = {0, 4};
1191 std::vector<uint64_t> data_elem_off_part1 = {0, 1};
1192 std::vector<int32_t> data_part2 = {4, 5, 6};
1193 std::vector<uint64_t> data_off_part2 = {0, 8};
1194 std::vector<uint64_t> data_elem_off_part2 = {0, 2};
1195
1196 SECTION("No extra element (default case)") {
1197 config = ctx.config();
1198 CHECK((std::string)config["sm.var_offsets.extra_element"] == "false");
1199
1200 write_dense_array(ctx, array_name, data, data_offsets, TILEDB_ROW_MAJOR);
1201 partial_read_and_check_dense_array(
1202 ctx,
1203 array_name,
1204 data_part1,
1205 data_off_part1,
1206 data_part2,
1207 data_off_part2);
1208 }
1209
1210 SECTION("Extra element") {
1211 config["sm.var_offsets.extra_element"] = "true";
1212
1213 SECTION("Byte offsets (default config)") {
1214 CHECK((std::string)config["sm.var_offsets.mode"] == "bytes");
1215 Context ctx(config);
1216
1217 // Write data with extra element indicating total number of bytes
1218 data_offsets.push_back(sizeof(data[0]) * data.size());
1219
1220 // Expect an extra element offset on each read
1221 data_off_part1.push_back(sizeof(data_part1[0]) * data_part1.size());
1222 data_off_part2.push_back(sizeof(data_part2[0]) * data_part2.size());
1223
1224 SECTION("Ordered write") {
1225 write_dense_array(
1226 ctx, array_name, data, data_offsets, TILEDB_ROW_MAJOR);
1227 partial_read_and_check_dense_array(
1228 ctx,
1229 array_name,
1230 data_part1,
1231 data_off_part1,
1232 data_part2,
1233 data_off_part2);
1234 }
1235 SECTION("Global order write") {
1236 write_dense_array(
1237 ctx, array_name, data, data_offsets, TILEDB_GLOBAL_ORDER);
1238 partial_read_and_check_dense_array(
1239 ctx,
1240 array_name,
1241 data_part1,
1242 data_off_part1,
1243 data_part2,
1244 data_off_part2);
1245 }
1246 }
1247
1248 SECTION("Element offsets") {
1249 config["sm.var_offsets.mode"] = "elements";
1250 Context ctx(config);
1251
1252 // Write data with extra element indicating total number of elements
1253 element_offsets.push_back(data.size());
1254
1255 // Expect an extra element offset on each read
1256 data_elem_off_part1.push_back(data_part1.size());
1257 data_elem_off_part2.push_back(data_part2.size());
1258
1259 SECTION("Ordered write") {
1260 write_dense_array(
1261 ctx, array_name, data, element_offsets, TILEDB_ROW_MAJOR);
1262 partial_read_and_check_dense_array(
1263 ctx,
1264 array_name,
1265 data_part1,
1266 data_elem_off_part1,
1267 data_part2,
1268 data_elem_off_part2);
1269 }
1270 SECTION("Global order write") {
1271 write_dense_array(
1272 ctx, array_name, data, element_offsets, TILEDB_GLOBAL_ORDER);
1273 partial_read_and_check_dense_array(
1274 ctx,
1275 array_name,
1276 data_part1,
1277 data_elem_off_part1,
1278 data_part2,
1279 data_elem_off_part2);
1280 }
1281 }
1282
1283 SECTION("User offsets buffer too small") {
1284 Context ctx(config);
1285 // Write data with extra element
1286 data_offsets.push_back(sizeof(data[0]) * data.size());
1287 write_dense_array(
1288 ctx, array_name, data, data_offsets, TILEDB_ROW_MAJOR);
1289
1290 // Submit read query
1291 Array array(ctx, array_name, TILEDB_READ);
1292 Query query(ctx, array, TILEDB_READ);
1293
1294 // Assume smaller offset buffer than data buffer
1295 std::vector<int32_t> attr_val(data_part1.size());
1296 std::vector<uint64_t> attr_off(data_off_part1.size());
1297 query.set_data_buffer("attr", attr_val);
1298 query.set_offsets_buffer("attr", attr_off);
1299 query.set_subarray<int64_t>({1, 2, 1, 2});
1300
1301 // First partial read
1302 CHECK_NOTHROW(query.submit());
1303 CHECK(query.query_status() == Query::Status::INCOMPLETE);
1304 std::vector<int32_t> data_exp1 = {1, 0, 0};
1305 std::vector<uint64_t> data_off_exp1 = {0, 4};
1306 // check returned data
1307 auto data_num = query.result_buffer_elements()["attr"].second;
1308 CHECK(data_num == 1);
1309 CHECK(attr_val == data_exp1);
1310 // check returned offsets
1311 auto offset_num = query.result_buffer_elements()["attr"].first;
1312 CHECK(offset_num == 2);
1313 CHECK(attr_off == data_off_exp1);
1314
1315 // Second partial read
1316 reset_read_buffers(attr_val, attr_off);
1317 CHECK_NOTHROW(query.submit());
1318 CHECK(query.query_status() == Query::Status::INCOMPLETE);
1319 std::vector<int32_t> data_exp2 = {2, 3, 0};
1320 std::vector<uint64_t> data_off_exp2 = {0, 8};
1321 // check returned data
1322 data_num = query.result_buffer_elements()["attr"].second;
1323 CHECK(data_num == 2);
1324 CHECK(attr_val == data_exp2);
1325 // check returned offsets
1326 offset_num = query.result_buffer_elements()["attr"].first;
1327 CHECK(offset_num == 2);
1328 CHECK(attr_off == data_off_exp2);
1329
1330 // Third partial read
1331 reset_read_buffers(attr_val, attr_off);
1332 CHECK_NOTHROW(query.submit());
1333 CHECK(query.query_status() == Query::Status::INCOMPLETE);
1334 std::vector<int32_t> data_exp3 = {4, 5, 0};
1335 std::vector<uint64_t> data_off_exp3 = {0, 8};
1336 // check returned data
1337 data_num = query.result_buffer_elements()["attr"].second;
1338 CHECK(data_num == 2);
1339 CHECK(attr_val == data_exp3);
1340 // check returned offsets
1341 offset_num = query.result_buffer_elements()["attr"].first;
1342 CHECK(offset_num == 2);
1343 CHECK(attr_off == data_off_exp3);
1344
1345 // Last partial read
1346 reset_read_buffers(attr_val, attr_off);
1347 CHECK_NOTHROW(query.submit());
1348 CHECK(query.query_status() == Query::Status::COMPLETE);
1349 std::vector<int32_t> data_exp4 = {6, 0, 0};
1350 std::vector<uint64_t> data_off_exp4 = {0, 4};
1351 // check returned data
1352 data_num = query.result_buffer_elements()["attr"].second;
1353 CHECK(data_num == 1);
1354 CHECK(attr_val == data_exp4);
1355 // check returned offsets
1356 offset_num = query.result_buffer_elements()["attr"].first;
1357 CHECK(offset_num == 2);
1358 CHECK(attr_off == data_off_exp4);
1359
1360 array.close();
1361 }
1362 }
1363 }
1364
1365 // Clean up
1366 VFS vfs(ctx);
1367 if (vfs.is_dir(array_name))
1368 vfs.remove_dir(array_name);
1369 }
1370
1371 TEST_CASE(
1372 "C++ API: Test 32-bit offsets: sparse array",
1373 "[var-offsets][32bit-offset][sparse]") {
1374 std::string array_name = "test_32bit_offset";
1375 create_sparse_array(array_name);
1376
1377 std::vector<int32_t> data = {1, 2, 3, 4, 5, 6};
1378 // Create 32 bit byte offsets buffer to use
1379 std::vector<uint32_t> data_byte_offsets = {0, 4, 12, 20};
1380
1381 Config config;
1382 // Change config of offsets bitsize from 64 to 32
1383 config["sm.var_offsets.bitsize"] = 32;
1384 Context ctx(config);
1385
1386 SECTION("Byte offsets (default case)") {
1387 CHECK((std::string)config["sm.var_offsets.mode"] == "bytes");
1388
1389 SECTION("Unordered write") {
1390 write_sparse_array(
1391 ctx, array_name, data, data_byte_offsets, TILEDB_UNORDERED);
1392 SECTION("Row major read") {
1393 read_and_check_sparse_array(
1394 ctx, array_name, data, data_byte_offsets, TILEDB_ROW_MAJOR);
1395 }
1396 SECTION("Global order read") {
1397 read_and_check_sparse_array(
1398 ctx, array_name, data, data_byte_offsets, TILEDB_GLOBAL_ORDER);
1399 }
1400 SECTION("Unordered read") {
1401 read_and_check_sparse_array(
1402 ctx, array_name, data, data_byte_offsets, TILEDB_UNORDERED);
1403 }
1404 }
1405 SECTION("Global order write") {
1406 write_sparse_array(
1407 ctx, array_name, data, data_byte_offsets, TILEDB_GLOBAL_ORDER);
1408 SECTION("Row major read") {
1409 read_and_check_sparse_array(
1410 ctx, array_name, data, data_byte_offsets, TILEDB_ROW_MAJOR);
1411 }
1412 SECTION("Global order read") {
1413 read_and_check_sparse_array(
1414 ctx, array_name, data, data_byte_offsets, TILEDB_GLOBAL_ORDER);
1415 }
1416 SECTION("Unordered read") {
1417 read_and_check_sparse_array(
1418 ctx, array_name, data, data_byte_offsets, TILEDB_UNORDERED);
1419 }
1420 }
1421 }
1422
1423 SECTION("Element offsets") {
1424 // Change config of offsets format from bytes to elements
1425 config["sm.var_offsets.mode"] = "elements";
1426 Context ctx(config);
1427
1428 // Create 32 bit element offsets buffer to use
1429 std::vector<uint32_t> data_element_offsets = {0, 1, 3, 5};
1430
1431 SECTION("Unordered write") {
1432 write_sparse_array(
1433 ctx, array_name, data, data_element_offsets, TILEDB_UNORDERED);
1434 SECTION("Row major read") {
1435 read_and_check_sparse_array(
1436 ctx, array_name, data, data_element_offsets, TILEDB_ROW_MAJOR);
1437 }
1438 SECTION("Global order read") {
1439 read_and_check_sparse_array(
1440 ctx, array_name, data, data_element_offsets, TILEDB_GLOBAL_ORDER);
1441 }
1442 SECTION("Unoredered read") {
1443 read_and_check_sparse_array(
1444 ctx, array_name, data, data_element_offsets, TILEDB_UNORDERED);
1445 }
1446 }
1447 SECTION("Global order write") {
1448 write_sparse_array(
1449 ctx, array_name, data, data_element_offsets, TILEDB_GLOBAL_ORDER);
1450 SECTION("Row major read") {
1451 read_and_check_sparse_array(
1452 ctx, array_name, data, data_element_offsets, TILEDB_ROW_MAJOR);
1453 }
1454 SECTION("Global order read") {
1455 read_and_check_sparse_array(
1456 ctx, array_name, data, data_element_offsets, TILEDB_GLOBAL_ORDER);
1457 }
1458 SECTION("Unordered read") {
1459 read_and_check_sparse_array(
1460 ctx, array_name, data, data_element_offsets, TILEDB_UNORDERED);
1461 }
1462 }
1463 }
1464
1465 SECTION("Extra element") {
1466 config["sm.var_offsets.extra_element"] = "true";
1467 Context ctx(config);
1468
1469 // Check the extra element is included in the offsets
1470 uint32_t data_size = static_cast<uint32_t>(sizeof(data[0]) * data.size());
1471 std::vector<uint32_t> data_byte_offsets = {0, 4, 12, 20, data_size};
1472
1473 SECTION("Unordered write") {
1474 write_sparse_array(
1475 ctx, array_name, data, data_byte_offsets, TILEDB_UNORDERED);
1476 SECTION("Row major read") {
1477 read_and_check_sparse_array(
1478 ctx, array_name, data, data_byte_offsets, TILEDB_ROW_MAJOR);
1479 }
1480 SECTION("Global order read") {
1481 read_and_check_sparse_array(
1482 ctx, array_name, data, data_byte_offsets, TILEDB_GLOBAL_ORDER);
1483 }
1484 SECTION("Unordered read") {
1485 read_and_check_sparse_array(
1486 ctx, array_name, data, data_byte_offsets, TILEDB_UNORDERED);
1487 }
1488 }
1489 SECTION("Global order write") {
1490 write_sparse_array(
1491 ctx, array_name, data, data_byte_offsets, TILEDB_GLOBAL_ORDER);
1492 SECTION("Row major read") {
1493 read_and_check_sparse_array(
1494 ctx, array_name, data, data_byte_offsets, TILEDB_ROW_MAJOR);
1495 }
1496 SECTION("Global order read") {
1497 read_and_check_sparse_array(
1498 ctx, array_name, data, data_byte_offsets, TILEDB_GLOBAL_ORDER);
1499 }
1500 SECTION("Unordered read") {
1501 read_and_check_sparse_array(
1502 ctx, array_name, data, data_byte_offsets, TILEDB_UNORDERED);
1503 }
1504 }
1505 }
1506
1507 // Clean up
1508 config["sm.var_offsets.extra_element"] = "false";
1509 config["sm.var_offsets.mode"] = "bytes";
1510 config["sm.var_offsets.bitsize"] = 64;
1511 Context ctx2(config);
1512 VFS vfs(ctx2);
1513 if (vfs.is_dir(array_name))
1514 vfs.remove_dir(array_name);
1515 }
1516
1517 TEST_CASE(
1518 "C++ API: Test 32-bit offsets: dense array",
1519 "[var-offsets][32bit-offset][dense]") {
1520 std::string array_name = "test_32bit_offset";
1521 create_dense_array(array_name);
1522
1523 std::vector<int32_t> data = {1, 2, 3, 4, 5, 6};
1524 // Create 32 bit offsets byte buffer to use
1525 std::vector<uint32_t> data_byte_offsets = {0, 4, 12, 20};
1526
1527 Config config;
1528 // Change config of offsets bitsize from 64 to 32
1529 config["sm.var_offsets.bitsize"] = 32;
1530 Context ctx(config);
1531
1532 SECTION("Byte offsets (default case)") {
1533 CHECK((std::string)config["sm.var_offsets.mode"] == "bytes");
1534
1535 SECTION("Ordered write") {
1536 write_dense_array(
1537 ctx, array_name, data, data_byte_offsets, TILEDB_ROW_MAJOR);
1538 read_and_check_dense_array(ctx, array_name, data, data_byte_offsets);
1539 }
1540 SECTION("Global order write") {
1541 write_dense_array(
1542 ctx, array_name, data, data_byte_offsets, TILEDB_GLOBAL_ORDER);
1543 read_and_check_dense_array(ctx, array_name, data, data_byte_offsets);
1544 }
1545 }
1546
1547 SECTION("Element offsets") {
1548 // Change config of offsets format from bytes to elements
1549 config["sm.var_offsets.mode"] = "elements";
1550 Context ctx(config);
1551
1552 // Create 32 bit element offsets buffer to use
1553 std::vector<uint32_t> data_element_offsets = {0, 1, 3, 5};
1554
1555 SECTION("Ordered write") {
1556 write_dense_array(
1557 ctx, array_name, data, data_element_offsets, TILEDB_ROW_MAJOR);
1558 read_and_check_dense_array(ctx, array_name, data, data_element_offsets);
1559 }
1560 SECTION("Global order write") {
1561 write_dense_array(
1562 ctx, array_name, data, data_element_offsets, TILEDB_GLOBAL_ORDER);
1563 read_and_check_dense_array(ctx, array_name, data, data_element_offsets);
1564 }
1565 }
1566
1567 SECTION("Extra element") {
1568 config["sm.var_offsets.extra_element"] = "true";
1569 Context ctx(config);
1570
1571 // Check the extra element is included in the offsets
1572 uint32_t data_size = static_cast<uint32_t>(sizeof(data[0]) * data.size());
1573 std::vector<uint32_t> data_byte_offsets = {0, 4, 12, 20, data_size};
1574
1575 SECTION("Ordered write") {
1576 write_dense_array(
1577 ctx, array_name, data, data_byte_offsets, TILEDB_ROW_MAJOR);
1578 read_and_check_dense_array(ctx, array_name, data, data_byte_offsets);
1579 }
1580 SECTION("Global order write") {
1581 write_dense_array(
1582 ctx, array_name, data, data_byte_offsets, TILEDB_GLOBAL_ORDER);
1583 read_and_check_dense_array(ctx, array_name, data, data_byte_offsets);
1584 }
1585 }
1586
1587 // Clean up
1588 config["sm.var_offsets.extra_element"] = "false";
1589 config["sm.var_offsets.mode"] = "bytes";
1590 config["sm.var_offsets.bitsize"] = 64;
1591 Context ctx2(config);
1592 VFS vfs(ctx2);
1593 if (vfs.is_dir(array_name))
1594 vfs.remove_dir(array_name);
1595 }
1596
1597 TEST_CASE(
1598 "C++ API: Test 32-bit offsets: sparse array with string dimension",
1599 "[var-offsets-dim][32bit-offset][sparse]") {
1600 std::string array_name = "test_32bit_offset_string_dim";
1601
1602 /*
1603 Write an array with string dimension and make sure we get back
1604 proper offsets along with extra element in read.
1605 */
1606
1607 // Create data buffer to use
1608 std::string data = "aabbbcdddd";
1609 // Create 32 bit offsets byte buffer to use
1610 std::vector<uint64_t> data_elem_offsets = {0, 2, 5, 6};
1611
1612 // Create and write array
1613 {
1614 Context ctx;
1615 Domain domain(ctx);
1616 domain.add_dimension(
1617 Dimension::create(ctx, "dim1", TILEDB_STRING_ASCII, nullptr, nullptr));
1618
1619 ArraySchema schema(ctx, TILEDB_SPARSE);
1620 schema.set_domain(domain);
1621
1622 tiledb::Array::create(array_name, schema);
1623
1624 auto array = tiledb::Array(ctx, array_name, TILEDB_WRITE);
1625 Query query(ctx, array, TILEDB_WRITE);
1626 query.set_data_buffer("dim1", (char*)data.data(), data.size());
1627 query.set_offsets_buffer(
1628 "dim1", data_elem_offsets.data(), data_elem_offsets.size());
1629
1630 query.set_layout(TILEDB_UNORDERED);
1631 query.submit();
1632 query.finalize();
1633 array.close();
1634 }
1635
1636 {
1637 Config config;
1638 // Change config of offsets bitsize from 64 to 32
1639 config["sm.var_offsets.bitsize"] = 32;
1640 // Add extra element
1641 config["sm.var_offsets.extra_element"] = "true";
1642 Context ctx(config);
1643
1644 std::vector<uint32_t> offsets_back(5);
1645 std::string data_back;
1646 data_back.resize(data.size());
1647
1648 auto array = tiledb::Array(ctx, array_name, TILEDB_READ);
1649 Query query(ctx, array, TILEDB_READ);
1650 query.add_range(0, std::string("aa"), std::string("dddd"));
1651 query.set_data_buffer("dim1", (char*)data_back.data(), data_back.size());
1652 query.set_offsets_buffer(
1653 "dim1", (uint64_t*)offsets_back.data(), offsets_back.size());
1654
1655 query.submit();
1656
1657 CHECK(query.query_status() == Query::Status::COMPLETE);
1658 CHECK(offsets_back[4] == data.size());
1659 }
1660
1661 // Regression test for https://github.com/TileDB-Inc/TileDB/pull/2540
1662 // Test that the query execution with empty result does not write out
1663 // of buffer bounds. We create an oversize buffer with guard values
1664 // ahead of the actual pointer range given to libtiledb; we run the
1665 // query to completion with empty result; then we check the guard
1666 // values. This test fails prior to PR#2540.
1667 {
1668 Config config;
1669 // Change config of offsets bitsize from 64 to 32
1670 config["sm.var_offsets.bitsize"] = 32;
1671 // Add extra element
1672 config["sm.var_offsets.extra_element"] = "true";
1673 Context ctx(config);
1674
1675 std::vector<uint32_t> offsets_back(14);
1676
1677 const std::vector<size_t> guard_idx = {0, 1, 2, 3, 10, 11, 12, 13};
1678 const uint32_t guard_val =
1679 std::numeric_limits<uint32_t>::max() - (uint32_t)10;
1680 for (auto idx : guard_idx) {
1681 offsets_back[idx] = guard_val;
1682 }
1683 std::string data_back;
1684 data_back.resize(data.size());
1685
1686 auto array = tiledb::Array(ctx, array_name, TILEDB_READ);
1687 Query query(ctx, array, TILEDB_READ);
1688 // this query range should return empty result
1689 query.add_range(0, std::string("xyz"), std::string("xyz"));
1690 query.set_data_buffer("dim1", (char*)data_back.data(), data_back.size());
1691
1692 // here we set the buffer at an offset of 2*uint64_t (== 4 * uint32_t)
1693 // from the real start because we cast to uint64_t* to keep the C++
1694 // API type-check happy
1695 query.set_offsets_buffer(
1696 "dim1", (uint64_t*)offsets_back.data() + 2, offsets_back.size() - 2);
1697
1698 query.submit();
1699
1700 CHECK(query.query_status() == Query::Status::COMPLETE);
1701
1702 // check the guard values match on both sides of the buffer
1703 for (auto idx : guard_idx) {
1704 CHECK(offsets_back[idx] == guard_val);
1705 }
1706 }
1707
1708 Context ctx;
1709 VFS vfs(ctx);
1710 if (vfs.is_dir(array_name))
1711 vfs.remove_dir(array_name);
1712 }