1 /**
2  * @file   unit-cppapi-var-offsets.cc
3  *
4  * @section LICENSE
5  *
6  * The MIT License
7  *
8  * @copyright Copyright (c) 2020-2021 TileDB Inc.
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  * THE SOFTWARE.
27  *
28  * @section DESCRIPTION
29  *
30  * Tests the different configurations of var-sized attribute offsets using the
31  * C++ API.
32  */
33 
34 #include "catch.hpp"
35 #include "tiledb/sm/cpp_api/tiledb"
36 
37 using namespace tiledb;
38 
create_sparse_array(const std::string & array_name)39 void create_sparse_array(const std::string& array_name) {
40   Context ctx;
41   VFS vfs(ctx);
42 
43   // Create the array
44   if (vfs.is_dir(array_name))
45     vfs.remove_dir(array_name);
46 
47   Domain dom(ctx);
48   dom.add_dimension(Dimension::create<int64_t>(ctx, "d1", {{1, 4}}, 2))
49       .add_dimension(Dimension::create<int64_t>(ctx, "d2", {{1, 4}}, 2));
50 
51   ArraySchema schema(ctx, TILEDB_SPARSE);
52   Attribute attr(ctx, "attr", TILEDB_INT32);
53   attr.set_cell_val_num(TILEDB_VAR_NUM);
54   schema.add_attribute(attr);
55   schema.set_tile_order(TILEDB_ROW_MAJOR);
56   schema.set_cell_order(TILEDB_ROW_MAJOR);
57   schema.set_domain(dom);
58   schema.set_allows_dups(true);
59 
60   Array::create(array_name, schema);
61 }
62 
write_sparse_array(Context ctx,const std::string & array_name,std::vector<int32_t> & data,std::vector<uint64_t> & data_offsets,tiledb_layout_t layout)63 void write_sparse_array(
64     Context ctx,
65     const std::string& array_name,
66     std::vector<int32_t>& data,
67     std::vector<uint64_t>& data_offsets,
68     tiledb_layout_t layout) {
69   std::vector<int64_t> d1 = {1, 2, 3, 4};
70   std::vector<int64_t> d2 = {2, 1, 3, 4};
71 
72   Array array(ctx, array_name, TILEDB_WRITE);
73   Query query(ctx, array, TILEDB_WRITE);
74   query.set_layout(layout);
75   query.set_data_buffer("d1", d1);
76   query.set_data_buffer("d2", d2);
77   query.set_data_buffer("attr", data);
78   query.set_offsets_buffer("attr", data_offsets);
79   CHECK_NOTHROW(query.submit());
80 
81   // Finalize is necessary in global writes, otherwise a no-op
82   query.finalize();
83 
84   array.close();
85 }
86 
write_sparse_array(Context ctx,const std::string & array_name,std::vector<int32_t> & data,std::vector<uint32_t> & data_offsets,tiledb_layout_t layout)87 void write_sparse_array(
88     Context ctx,
89     const std::string& array_name,
90     std::vector<int32_t>& data,
91     std::vector<uint32_t>& data_offsets,
92     tiledb_layout_t layout) {
93   std::vector<int64_t> d1 = {1, 2, 3, 4};
94   std::vector<int64_t> d2 = {2, 1, 3, 4};
95 
96   Array array(ctx, array_name, TILEDB_WRITE);
97   Query query(ctx, array, TILEDB_WRITE);
98   query.set_layout(layout);
99   query.set_data_buffer("d1", d1);
100   query.set_data_buffer("d2", d2);
101   query.set_data_buffer("attr", data.data(), data.size());
102   query.set_offsets_buffer(
103       "attr",
104       reinterpret_cast<uint64_t*>(data_offsets.data()),
105       data_offsets.size());
106   CHECK_NOTHROW(query.submit());
107 
108   // Finalize is necessary in global writes, otherwise a no-op
109   query.finalize();
110 
111   array.close();
112 }
113 
read_and_check_sparse_array(Context ctx,const std::string & array_name,std::vector<int32_t> & expected_data,std::vector<uint64_t> & expected_offsets,tiledb_layout_t layout)114 void read_and_check_sparse_array(
115     Context ctx,
116     const std::string& array_name,
117     std::vector<int32_t>& expected_data,
118     std::vector<uint64_t>& expected_offsets,
119     tiledb_layout_t layout) {
120   Array array(ctx, array_name, TILEDB_READ);
121   Query query(ctx, array, TILEDB_READ);
122 
123   std::vector<int32_t> attr_val(expected_data.size());
124   std::vector<uint64_t> attr_off(expected_offsets.size());
125 
126   query.set_layout(layout);
127   query.set_data_buffer("attr", attr_val);
128   query.set_offsets_buffer("attr", attr_off);
129 
130   CHECK_NOTHROW(query.submit());
131 
132   // Check the element offsets are properly returned
133   CHECK(attr_val == expected_data);
134   CHECK(attr_off == expected_offsets);
135 
136   array.close();
137 }
138 
read_and_check_sparse_array(Context ctx,const std::string & array_name,std::vector<int32_t> & expected_data,std::vector<uint32_t> & expected_offsets,tiledb_layout_t layout)139 void read_and_check_sparse_array(
140     Context ctx,
141     const std::string& array_name,
142     std::vector<int32_t>& expected_data,
143     std::vector<uint32_t>& expected_offsets,
144     tiledb_layout_t layout) {
145   Array array(ctx, array_name, TILEDB_READ);
146   Query query(ctx, array, TILEDB_READ);
147 
148   std::vector<int32_t> attr_val(expected_data.size());
149   std::vector<uint32_t> attr_off(expected_offsets.size());
150   query.set_layout(layout);
151   // Read using a 32-bit vector, but cast it to 64-bit pointer so that the API
152   // accepts it
153   query.set_data_buffer("attr", attr_val.data(), attr_val.size());
154   query.set_offsets_buffer(
155       "attr", reinterpret_cast<uint64_t*>(attr_off.data()), attr_off.size());
156   CHECK_NOTHROW(query.submit());
157 
158   // Check the element offsets are properly returned
159   CHECK(attr_val == expected_data);
160   CHECK(attr_off == expected_offsets);
161 
162   array.close();
163 }
164 
reset_read_buffers(std::vector<int32_t> & data,std::vector<uint64_t> & offsets)165 void reset_read_buffers(
166     std::vector<int32_t>& data, std::vector<uint64_t>& offsets) {
167   data.assign(data.size(), 0);
168   offsets.assign(offsets.size(), 0);
169 }
170 
partial_read_and_check_sparse_array(Context ctx,const std::string & array_name,std::vector<int32_t> & exp_data_part1,std::vector<uint64_t> & exp_off_part1,std::vector<int32_t> & exp_data_part2,std::vector<uint64_t> & exp_off_part2,tiledb_layout_t layout)171 void partial_read_and_check_sparse_array(
172     Context ctx,
173     const std::string& array_name,
174     std::vector<int32_t>& exp_data_part1,
175     std::vector<uint64_t>& exp_off_part1,
176     std::vector<int32_t>& exp_data_part2,
177     std::vector<uint64_t>& exp_off_part2,
178     tiledb_layout_t layout) {
179   // The size of read buffers is smaller than the size
180   // of all the data, so we'll do partial reads
181   std::vector<int32_t> attr_val(exp_data_part1.size());
182   std::vector<uint64_t> attr_off(exp_off_part1.size());
183 
184   Array array(ctx, array_name, TILEDB_READ);
185   Query query(ctx, array, TILEDB_READ);
186   query.set_layout(layout);
187   query.set_data_buffer("attr", attr_val);
188   query.set_offsets_buffer("attr", attr_off);
189 
190   // Check that first partial read returns expected results
191   CHECK_NOTHROW(query.submit());
192   Query::Status status = query.query_status();
193   CHECK(status == Query::Status::INCOMPLETE);
194   CHECK(attr_val == exp_data_part1);
195   CHECK(attr_off == exp_off_part1);
196 
197   // Check that second partial read returns expected results
198   CHECK_NOTHROW(query.submit());
199   status = query.query_status();
200   CHECK(status == Query::Status::COMPLETE);
201   CHECK(attr_val == exp_data_part2);
202   CHECK(attr_off == exp_off_part2);
203 
204   array.close();
205 }
206 
create_dense_array(const std::string & array_name)207 void create_dense_array(const std::string& array_name) {
208   Context ctx;
209   VFS vfs(ctx);
210 
211   // Create the array
212   if (vfs.is_dir(array_name))
213     vfs.remove_dir(array_name);
214 
215   Domain dom(ctx);
216   dom.add_dimension(Dimension::create<int64_t>(ctx, "d1", {{1, 4}}, 2))
217       .add_dimension(Dimension::create<int64_t>(ctx, "d2", {{1, 4}}, 2));
218 
219   ArraySchema schema(ctx, TILEDB_DENSE);
220   Attribute attr(ctx, "attr", TILEDB_INT32);
221   attr.set_cell_val_num(TILEDB_VAR_NUM);
222   schema.add_attribute(attr);
223   schema.set_tile_order(TILEDB_ROW_MAJOR);
224   schema.set_cell_order(TILEDB_ROW_MAJOR);
225   schema.set_domain(dom);
226 
227   Array::create(array_name, schema);
228 }
229 
write_dense_array(Context ctx,const std::string & array_name,std::vector<int32_t> & data,std::vector<uint64_t> & data_offsets,tiledb_layout_t layout,std::shared_ptr<Config> config=nullptr)230 void write_dense_array(
231     Context ctx,
232     const std::string& array_name,
233     std::vector<int32_t>& data,
234     std::vector<uint64_t>& data_offsets,
235     tiledb_layout_t layout,
236     std::shared_ptr<Config> config = nullptr) {
237   std::vector<int64_t> d1 = {1, 1, 2, 2};
238   std::vector<int64_t> d2 = {1, 2, 1, 2};
239 
240   Array array(ctx, array_name, TILEDB_WRITE);
241   Query query(ctx, array, TILEDB_WRITE);
242 
243   if (config != nullptr) {
244     query.set_config(*config);
245 
246     // Validate we can retrieve set config
247     Config config2 = query.config();
248     bool same = *config == config2;
249     CHECK(same == true);
250   }
251 
252   query.set_data_buffer("attr", data);
253   query.set_offsets_buffer("attr", data_offsets);
254   query.set_layout(layout);
255   if (layout == TILEDB_UNORDERED) {
256     // sparse write to dense array
257     query.set_data_buffer("d1", d1);
258     query.set_data_buffer("d2", d2);
259   } else {
260     query.set_subarray<int64_t>({1, 2, 1, 2});
261   }
262 
263   CHECK_NOTHROW(query.submit());
264 
265   // Finalize is necessary in global writes, otherwise a no-op
266   query.finalize();
267 
268   array.close();
269 }
270 
write_dense_array(Context ctx,const std::string & array_name,std::vector<int32_t> & data,std::vector<uint32_t> & data_offsets,tiledb_layout_t layout,std::shared_ptr<Config> config=nullptr)271 void write_dense_array(
272     Context ctx,
273     const std::string& array_name,
274     std::vector<int32_t>& data,
275     std::vector<uint32_t>& data_offsets,
276     tiledb_layout_t layout,
277     std::shared_ptr<Config> config = nullptr) {
278   std::vector<int64_t> d1 = {1, 1, 2, 2};
279   std::vector<int64_t> d2 = {1, 2, 1, 2};
280 
281   Array array(ctx, array_name, TILEDB_WRITE);
282   Query query(ctx, array, TILEDB_WRITE);
283 
284   if (config != nullptr) {
285     query.set_config(*config);
286 
287     // Validate we can retrieve set config
288     Config config2 = query.config();
289     bool same = *config == config2;
290     CHECK(same == true);
291   }
292 
293   // Write using a 32-bit vector, but cast it to 64-bit pointer so that the API
294   // accepts it
295   query.set_data_buffer("attr", data.data(), data.size());
296   query.set_offsets_buffer(
297       "attr",
298       reinterpret_cast<uint64_t*>(data_offsets.data()),
299       data_offsets.size());
300   query.set_layout(layout);
301   if (layout == TILEDB_UNORDERED) {
302     // sparse write to dense array
303     query.set_data_buffer("d1", d1);
304     query.set_data_buffer("d2", d2);
305   } else {
306     query.set_subarray<int64_t>({1, 2, 1, 2});
307   }
308 
309   CHECK_NOTHROW(query.submit());
310   // Finalize is necessary in global writes, otherwise a no-op
311   query.finalize();
312 
313   array.close();
314 }
315 
read_and_check_dense_array(Context ctx,const std::string & array_name,std::vector<int32_t> & expected_data,std::vector<uint64_t> & expected_offsets,std::shared_ptr<Config> config=nullptr)316 void read_and_check_dense_array(
317     Context ctx,
318     const std::string& array_name,
319     std::vector<int32_t>& expected_data,
320     std::vector<uint64_t>& expected_offsets,
321     std::shared_ptr<Config> config = nullptr) {
322   Array array(ctx, array_name, TILEDB_READ);
323   Query query(ctx, array, TILEDB_READ);
324 
325   if (config != nullptr) {
326     query.set_config(*config);
327 
328     // Validate we can retrieve set config
329     Config config2 = query.config();
330     bool same = *config == config2;
331     CHECK(same == true);
332   }
333 
334   std::vector<int32_t> attr_val(expected_data.size());
335   std::vector<uint64_t> attr_off(expected_offsets.size());
336   query.set_subarray<int64_t>({1, 2, 1, 2});
337   query.set_data_buffer("attr", attr_val);
338   query.set_offsets_buffer("attr", attr_off);
339   CHECK_NOTHROW(query.submit());
340 
341   // Check the element offsets are properly returned
342   CHECK(attr_val == expected_data);
343   CHECK(attr_off == expected_offsets);
344 
345   array.close();
346 }
347 
read_and_check_dense_array(Context ctx,const std::string & array_name,std::vector<int32_t> & expected_data,std::vector<uint32_t> & expected_offsets,std::shared_ptr<Config> config=nullptr)348 void read_and_check_dense_array(
349     Context ctx,
350     const std::string& array_name,
351     std::vector<int32_t>& expected_data,
352     std::vector<uint32_t>& expected_offsets,
353     std::shared_ptr<Config> config = nullptr) {
354   Array array(ctx, array_name, TILEDB_READ);
355   Query query(ctx, array, TILEDB_READ);
356 
357   if (config != nullptr) {
358     query.set_config(*config);
359 
360     // Validate we can retrieve set config
361     Config config2 = query.config();
362     bool same = *config == config2;
363     CHECK(same == true);
364   }
365 
366   std::vector<int32_t> attr_val(expected_data.size());
367   std::vector<uint32_t> attr_off(expected_offsets.size());
368   query.set_subarray<int64_t>({1, 2, 1, 2});
369   // Read using a 32-bit vector, but cast it to 64-bit pointer so that the API
370   // accepts it
371   query.set_data_buffer("attr", attr_val.data(), attr_val.size());
372   query.set_offsets_buffer(
373       "attr", reinterpret_cast<uint64_t*>(attr_off.data()), attr_off.size());
374   CHECK_NOTHROW(query.submit());
375 
376   // Check the element offsets are properly returned
377   CHECK(attr_val == expected_data);
378   CHECK(attr_off == expected_offsets);
379 
380   array.close();
381 }
382 
partial_read_and_check_dense_array(Context ctx,const std::string & array_name,std::vector<int32_t> & exp_data_part1,std::vector<uint64_t> & exp_off_part1,std::vector<int32_t> & exp_data_part2,std::vector<uint64_t> & exp_off_part2)383 void partial_read_and_check_dense_array(
384     Context ctx,
385     const std::string& array_name,
386     std::vector<int32_t>& exp_data_part1,
387     std::vector<uint64_t>& exp_off_part1,
388     std::vector<int32_t>& exp_data_part2,
389     std::vector<uint64_t>& exp_off_part2) {
390   // The size of read buffers is smaller than the size
391   // of all the data, so we'll do partial reads
392   std::vector<int32_t> attr_val(exp_data_part1.size());
393   std::vector<uint64_t> attr_off(exp_off_part1.size());
394 
395   Array array(ctx, array_name, TILEDB_READ);
396   Query query(ctx, array, TILEDB_READ);
397   query.set_subarray<int64_t>({1, 2, 1, 2});
398   query.set_data_buffer("attr", attr_val);
399   query.set_offsets_buffer("attr", attr_off);
400 
401   // Check that first partial read returns expected results
402   CHECK_NOTHROW(query.submit());
403   CHECK(attr_val == exp_data_part1);
404   CHECK(attr_off == exp_off_part1);
405 
406   // Check that second partial read returns expected results
407   CHECK_NOTHROW(query.submit());
408   CHECK(attr_val == exp_data_part2);
409   CHECK(attr_off == exp_off_part2);
410 
411   array.close();
412 }
413 
414 TEST_CASE(
415     "C++ API: Test element offsets : sparse array",
416     "[var-offsets][element-offset][sparse]") {
417   std::string array_name = "test_element_offset";
418   create_sparse_array(array_name);
419 
420   std::vector<int32_t> data = {1, 2, 3, 4, 5, 6};
421   Context ctx;
422 
423   SECTION("Byte offsets (default case)") {
424     Config config = ctx.config();
425     CHECK((std::string)config["sm.var_offsets.mode"] == "bytes");
426 
427     std::vector<uint64_t> byte_offsets = {0, 4, 12, 20};
428 
429     SECTION("Unordered write") {
430       write_sparse_array(ctx, array_name, data, byte_offsets, TILEDB_UNORDERED);
431       SECTION("Row major read") {
432         read_and_check_sparse_array(
433             ctx, array_name, data, byte_offsets, TILEDB_ROW_MAJOR);
434       }
435       SECTION("Global order read") {
436         read_and_check_sparse_array(
437             ctx, array_name, data, byte_offsets, TILEDB_GLOBAL_ORDER);
438       }
439       SECTION("Unordered read") {
440         read_and_check_sparse_array(
441             ctx, array_name, data, byte_offsets, TILEDB_UNORDERED);
442       }
443     }
444     SECTION("Global order write") {
445       write_sparse_array(
446           ctx, array_name, data, byte_offsets, TILEDB_GLOBAL_ORDER);
447       SECTION("Row major read") {
448         read_and_check_sparse_array(
449             ctx, array_name, data, byte_offsets, TILEDB_ROW_MAJOR);
450       }
451       SECTION("Global order read") {
452         read_and_check_sparse_array(
453             ctx, array_name, data, byte_offsets, TILEDB_GLOBAL_ORDER);
454       }
455       SECTION("Unordered read") {
456         read_and_check_sparse_array(
457             ctx, array_name, data, byte_offsets, TILEDB_UNORDERED);
458       }
459     }
460   }
461 
462   SECTION("Element offsets") {
463     Config config;
464     // Change config of offsets format from bytes to elements
465     config["sm.var_offsets.mode"] = "elements";
466     Context ctx(config);
467 
468     std::vector<uint64_t> element_offsets = {0, 1, 3, 5};
469 
470     SECTION("Unordered write") {
471       write_sparse_array(
472           ctx, array_name, data, element_offsets, TILEDB_UNORDERED);
473       SECTION("Row major read") {
474         read_and_check_sparse_array(
475             ctx, array_name, data, element_offsets, TILEDB_ROW_MAJOR);
476       }
477       SECTION("Global order read") {
478         read_and_check_sparse_array(
479             ctx, array_name, data, element_offsets, TILEDB_GLOBAL_ORDER);
480       }
481       SECTION("Unordered read") {
482         read_and_check_sparse_array(
483             ctx, array_name, data, element_offsets, TILEDB_UNORDERED);
484       }
485     }
486     SECTION("Global order write") {
487       write_sparse_array(
488           ctx, array_name, data, element_offsets, TILEDB_GLOBAL_ORDER);
489       SECTION("Row major read") {
490         read_and_check_sparse_array(
491             ctx, array_name, data, element_offsets, TILEDB_ROW_MAJOR);
492       }
493       SECTION("Global order read") {
494         read_and_check_sparse_array(
495             ctx, array_name, data, element_offsets, TILEDB_GLOBAL_ORDER);
496       }
497       SECTION("Unordered read") {
498         read_and_check_sparse_array(
499             ctx, array_name, data, element_offsets, TILEDB_UNORDERED);
500       }
501     }
502   }
503 
504   // Clean up
505   VFS vfs(ctx);
506   if (vfs.is_dir(array_name))
507     vfs.remove_dir(array_name);
508 }
509 
510 TEST_CASE(
511     "C++ API: Test element offsets : dense array",
512     "[var-offsets][element-offset][dense]") {
513   std::string array_name = "test_element_offset";
514   create_dense_array(array_name);
515 
516   std::vector<int32_t> data = {1, 2, 3, 4, 5, 6};
517   Context ctx;
518 
519   SECTION("Byte offsets (default case)") {
520     Config config = ctx.config();
521     CHECK((std::string)config["sm.var_offsets.mode"] == "bytes");
522 
523     std::vector<uint64_t> byte_offsets = {0, 4, 12, 20};
524 
525     SECTION("Ordered write") {
526       write_dense_array(ctx, array_name, data, byte_offsets, TILEDB_ROW_MAJOR);
527       read_and_check_dense_array(ctx, array_name, data, byte_offsets);
528     }
529     SECTION("Global order write") {
530       write_dense_array(
531           ctx, array_name, data, byte_offsets, TILEDB_GLOBAL_ORDER);
532       read_and_check_dense_array(ctx, array_name, data, byte_offsets);
533     }
534   }
535 
536   SECTION("Element offsets") {
537     Config config;
538     // Change config of offsets format from bytes to elements
539     config["sm.var_offsets.mode"] = "elements";
540     Context ctx(config);
541 
542     std::vector<uint64_t> element_offsets = {0, 1, 3, 5};
543 
544     SECTION("Ordered write") {
545       write_dense_array(
546           ctx, array_name, data, element_offsets, TILEDB_ROW_MAJOR);
547       read_and_check_dense_array(ctx, array_name, data, element_offsets);
548     }
549     SECTION("Global order write") {
550       write_dense_array(
551           ctx, array_name, data, element_offsets, TILEDB_GLOBAL_ORDER);
552       read_and_check_dense_array(ctx, array_name, data, element_offsets);
553     }
554   }
555 
556   // Clean up
557   VFS vfs(ctx);
558   if (vfs.is_dir(array_name))
559     vfs.remove_dir(array_name);
560 }
561 
562 TEST_CASE(
563     "C++ API: Test offsets extra element: sparse array",
564     "[var-offsets][extra-offset][sparse]") {
565   std::string array_name = "test_extra_offset";
566   create_sparse_array(array_name);
567 
568   Context ctx;
569   std::vector<int32_t> data = {1, 2, 3, 4, 5, 6};
570   std::vector<uint64_t> data_offsets = {0, 4, 12, 20};
571   std::vector<uint64_t> element_offsets = {0, 1, 3, 5};
572 
573   SECTION("Full read") {
574     Config config;
575 
576     SECTION("No extra element (default case)") {
577       config = ctx.config();
578       CHECK((std::string)config["sm.var_offsets.extra_element"] == "false");
579 
580       write_sparse_array(ctx, array_name, data, data_offsets, TILEDB_UNORDERED);
581       SECTION("Row major read") {
582         read_and_check_sparse_array(
583             ctx, array_name, data, data_offsets, TILEDB_ROW_MAJOR);
584       }
585       SECTION("Global order read") {
586         read_and_check_sparse_array(
587             ctx, array_name, data, data_offsets, TILEDB_GLOBAL_ORDER);
588       }
589       SECTION("Unordered read") {
590         read_and_check_sparse_array(
591             ctx, array_name, data, data_offsets, TILEDB_UNORDERED);
592       }
593     }
594 
595     SECTION("Extra element") {
596       config["sm.var_offsets.extra_element"] = "true";
597 
598       SECTION("Byte offsets (default config)") {
599         CHECK((std::string)config["sm.var_offsets.mode"] == "bytes");
600         Context ctx(config);
601 
602         // Write data with extra element indicating total number of bytes
603         data_offsets.push_back(sizeof(data[0]) * data.size());
604 
605         SECTION("Unordered write") {
606           write_sparse_array(
607               ctx, array_name, data, data_offsets, TILEDB_UNORDERED);
608           SECTION("Row major read") {
609             read_and_check_sparse_array(
610                 ctx, array_name, data, data_offsets, TILEDB_ROW_MAJOR);
611           }
612           SECTION("Global order read") {
613             read_and_check_sparse_array(
614                 ctx, array_name, data, data_offsets, TILEDB_GLOBAL_ORDER);
615           }
616           SECTION("UNORDERED read") {
617             read_and_check_sparse_array(
618                 ctx, array_name, data, data_offsets, TILEDB_UNORDERED);
619           }
620         }
621         SECTION("Global order write") {
622           write_sparse_array(
623               ctx, array_name, data, data_offsets, TILEDB_GLOBAL_ORDER);
624           SECTION("Row major read") {
625             read_and_check_sparse_array(
626                 ctx, array_name, data, data_offsets, TILEDB_ROW_MAJOR);
627           }
628           SECTION("Global order read") {
629             read_and_check_sparse_array(
630                 ctx, array_name, data, data_offsets, TILEDB_GLOBAL_ORDER);
631           }
632           SECTION("Unordered read") {
633             read_and_check_sparse_array(
634                 ctx, array_name, data, data_offsets, TILEDB_UNORDERED);
635           }
636         }
637       }
638 
639       SECTION("Element offsets") {
640         config["sm.var_offsets.mode"] = "elements";
641         Context ctx(config);
642 
643         // Write data with extra element indicating the total number of elements
644         element_offsets.push_back(data.size());
645 
646         SECTION("Unordered write") {
647           write_sparse_array(
648               ctx, array_name, data, element_offsets, TILEDB_UNORDERED);
649           SECTION("Row major read") {
650             read_and_check_sparse_array(
651                 ctx, array_name, data, element_offsets, TILEDB_ROW_MAJOR);
652           }
653           SECTION("Global order read") {
654             read_and_check_sparse_array(
655                 ctx, array_name, data, element_offsets, TILEDB_GLOBAL_ORDER);
656           }
657           SECTION("Unordered read") {
658             read_and_check_sparse_array(
659                 ctx, array_name, data, element_offsets, TILEDB_UNORDERED);
660           }
661         }
662         SECTION("Global order write") {
663           write_sparse_array(
664               ctx, array_name, data, element_offsets, TILEDB_GLOBAL_ORDER);
665           SECTION("Row major read") {
666             read_and_check_sparse_array(
667                 ctx, array_name, data, element_offsets, TILEDB_ROW_MAJOR);
668           }
669           SECTION("Global order read") {
670             read_and_check_sparse_array(
671                 ctx, array_name, data, element_offsets, TILEDB_GLOBAL_ORDER);
672           }
673           SECTION("Unordered read") {
674             read_and_check_sparse_array(
675                 ctx, array_name, data, element_offsets, TILEDB_UNORDERED);
676           }
677         }
678       }
679 
680       SECTION("User offsets buffer too small") {
681         Context ctx(config);
682 
683         Array array_w(ctx, array_name, TILEDB_WRITE);
684         std::vector<int64_t> d1 = {1, 2, 3, 4};
685         std::vector<int64_t> d2 = {2, 1, 3, 4};
686         Query query_w(ctx, array_w, TILEDB_WRITE);
687         query_w.set_layout(TILEDB_UNORDERED)
688             .set_data_buffer("d1", d1)
689             .set_data_buffer("d2", d2);
690 
691         // Try to write without allocating memory for the extra element
692         query_w.set_data_buffer("attr", data);
693         query_w.set_offsets_buffer("attr", data_offsets);
694         CHECK_THROWS(query_w.submit());
695 
696         // Write data with extra element
697         data_offsets.push_back(sizeof(data[0]) * data.size());
698         query_w.set_data_buffer("attr", data);
699         query_w.set_offsets_buffer("attr", data_offsets);
700         CHECK_NOTHROW(query_w.submit());
701         array_w.close();
702 
703         // Submit read query
704         Array array_r(ctx, array_name, TILEDB_READ);
705         Query query_r(ctx, array_r, TILEDB_READ);
706 
707         // Assume no size for the extra element
708         std::vector<int32_t> attr_val(data.size());
709         std::vector<uint64_t> attr_off(data_offsets.size() - 1);
710         query_r.set_data_buffer("attr", attr_val);
711         query_r.set_offsets_buffer("attr", attr_off);
712 
713         // First partial read because offsets don't fit
714         CHECK_NOTHROW(query_r.submit());
715         CHECK(query_r.query_status() == Query::Status::INCOMPLETE);
716         // check returned data
717         auto data_num = query_r.result_buffer_elements()["attr"].second;
718         CHECK(data_num == 3);
719         std::vector<int32_t> data_exp1 = {1, 2, 3, 0, 0, 0};
720         CHECK(attr_val == data_exp1);
721         // check returned offsets
722         auto offset_num = query_r.result_buffer_elements()["attr"].first;
723         CHECK(offset_num == 3);
724         std::vector<uint64_t> data_off_exp1 = {0, 4, 12, 0};
725         CHECK(attr_off == data_off_exp1);
726 
727         // check returned data with nullable API
728         auto result_els = query_r.result_buffer_elements_nullable()["attr"];
729         CHECK(std::get<0>(result_els) == 3);
730         CHECK(std::get<1>(result_els) == 3);
731         CHECK(std::get<2>(result_els) == 0);
732 
733         // Second partial read
734         reset_read_buffers(attr_val, attr_off);
735         CHECK_NOTHROW(query_r.submit());
736         CHECK(query_r.query_status() == Query::Status::COMPLETE);
737         // check returned data
738         data_num = query_r.result_buffer_elements()["attr"].second;
739         CHECK(data_num == 3);
740         std::vector<int32_t> data_exp2 = {4, 5, 6, 0, 0, 0};
741         CHECK(attr_val == data_exp2);
742         // check returned offsets
743         offset_num = query_r.result_buffer_elements()["attr"].first;
744         CHECK(offset_num == 3);
745         std::vector<uint64_t> data_off_exp2 = {0, 8, 12, 0};
746         CHECK(attr_off == data_off_exp2);
747 
748         array_r.close();
749       }
750     }
751   }
752 
753   SECTION("Partial read") {
754     Config config;
755 
756     // The expected buffers to be returned after 2 partial reads with
757     // read buffers of size data.size() / 2
758     std::vector<int32_t> data_part1 = {1, 2, 3};
759     std::vector<uint64_t> data_off_part1 = {0, 4};
760     std::vector<uint64_t> data_elem_off_part1 = {0, 1};
761     std::vector<int32_t> data_part2 = {4, 5, 6};
762     std::vector<uint64_t> data_off_part2 = {0, 8};
763     std::vector<uint64_t> data_elem_off_part2 = {0, 2};
764 
765     SECTION("No extra element (default case)") {
766       config = ctx.config();
767       CHECK((std::string)config["sm.var_offsets.extra_element"] == "false");
768 
769       write_sparse_array(ctx, array_name, data, data_offsets, TILEDB_UNORDERED);
770       SECTION("Row major read") {
771         partial_read_and_check_sparse_array(
772             ctx,
773             array_name,
774             data_part1,
775             data_off_part1,
776             data_part2,
777             data_off_part2,
778             TILEDB_ROW_MAJOR);
779       }
780       SECTION("Global order read") {
781         partial_read_and_check_sparse_array(
782             ctx,
783             array_name,
784             data_part1,
785             data_off_part1,
786             data_part2,
787             data_off_part2,
788             TILEDB_GLOBAL_ORDER);
789       }
790       SECTION("Unordered read") {
791         partial_read_and_check_sparse_array(
792             ctx,
793             array_name,
794             data_part1,
795             data_off_part1,
796             data_part2,
797             data_off_part2,
798             TILEDB_UNORDERED);
799       }
800     }
801 
802     SECTION("Extra element") {
803       config["sm.var_offsets.extra_element"] = "true";
804 
805       SECTION("Byte offsets (default config)") {
806         CHECK((std::string)config["sm.var_offsets.mode"] == "bytes");
807         Context ctx(config);
808 
809         // Write data with extra element indicating total number of bytes
810         data_offsets.push_back(sizeof(data[0]) * data.size());
811 
812         // Expect an extra element offset on each read
813         data_off_part1.push_back(sizeof(data_part1[0]) * data_part1.size());
814         data_off_part2.push_back(sizeof(data_part2[0]) * data_part2.size());
815 
816         SECTION("Unordered write") {
817           write_sparse_array(
818               ctx, array_name, data, data_offsets, TILEDB_UNORDERED);
819           SECTION("Row major read") {
820             partial_read_and_check_sparse_array(
821                 ctx,
822                 array_name,
823                 data_part1,
824                 data_off_part1,
825                 data_part2,
826                 data_off_part2,
827                 TILEDB_ROW_MAJOR);
828           }
829           SECTION("Global order read") {
830             partial_read_and_check_sparse_array(
831                 ctx,
832                 array_name,
833                 data_part1,
834                 data_off_part1,
835                 data_part2,
836                 data_off_part2,
837                 TILEDB_GLOBAL_ORDER);
838           }
839           SECTION("Unordered read") {
840             partial_read_and_check_sparse_array(
841                 ctx,
842                 array_name,
843                 data_part1,
844                 data_off_part1,
845                 data_part2,
846                 data_off_part2,
847                 TILEDB_UNORDERED);
848           }
849         }
850         SECTION("Global order write") {
851           write_sparse_array(
852               ctx, array_name, data, data_offsets, TILEDB_GLOBAL_ORDER);
853           SECTION("Row major read") {
854             partial_read_and_check_sparse_array(
855                 ctx,
856                 array_name,
857                 data_part1,
858                 data_off_part1,
859                 data_part2,
860                 data_off_part2,
861                 TILEDB_ROW_MAJOR);
862           }
863           SECTION("Global order read") {
864             partial_read_and_check_sparse_array(
865                 ctx,
866                 array_name,
867                 data_part1,
868                 data_off_part1,
869                 data_part2,
870                 data_off_part2,
871                 TILEDB_GLOBAL_ORDER);
872           }
873           SECTION("Unordered read") {
874             partial_read_and_check_sparse_array(
875                 ctx,
876                 array_name,
877                 data_part1,
878                 data_off_part1,
879                 data_part2,
880                 data_off_part2,
881                 TILEDB_UNORDERED);
882           }
883         }
884       }
885 
886       SECTION("Element offsets") {
887         config["sm.var_offsets.mode"] = "elements";
888         Context ctx(config);
889 
890         // Write data with extra element indicating total number of elements
891         element_offsets.push_back(data.size());
892 
893         // Expect an extra element offset on each read
894         data_elem_off_part1.push_back(data_part1.size());
895         data_elem_off_part2.push_back(data_part2.size());
896 
897         SECTION("Unordered write") {
898           write_sparse_array(
899               ctx, array_name, data, element_offsets, TILEDB_UNORDERED);
900           SECTION("Row major read") {
901             partial_read_and_check_sparse_array(
902                 ctx,
903                 array_name,
904                 data_part1,
905                 data_elem_off_part1,
906                 data_part2,
907                 data_elem_off_part2,
908                 TILEDB_ROW_MAJOR);
909           }
910           SECTION("Global order read") {
911             partial_read_and_check_sparse_array(
912                 ctx,
913                 array_name,
914                 data_part1,
915                 data_elem_off_part1,
916                 data_part2,
917                 data_elem_off_part2,
918                 TILEDB_GLOBAL_ORDER);
919           }
920           SECTION("Unordered read") {
921             partial_read_and_check_sparse_array(
922                 ctx,
923                 array_name,
924                 data_part1,
925                 data_elem_off_part1,
926                 data_part2,
927                 data_elem_off_part2,
928                 TILEDB_UNORDERED);
929           }
930         }
931         SECTION("Global order write") {
932           write_sparse_array(
933               ctx, array_name, data, element_offsets, TILEDB_GLOBAL_ORDER);
934           SECTION("Row major read") {
935             partial_read_and_check_sparse_array(
936                 ctx,
937                 array_name,
938                 data_part1,
939                 data_elem_off_part1,
940                 data_part2,
941                 data_elem_off_part2,
942                 TILEDB_ROW_MAJOR);
943           }
944           SECTION("Global order read") {
945             partial_read_and_check_sparse_array(
946                 ctx,
947                 array_name,
948                 data_part1,
949                 data_elem_off_part1,
950                 data_part2,
951                 data_elem_off_part2,
952                 TILEDB_GLOBAL_ORDER);
953           }
954           SECTION("Unordered read") {
955             partial_read_and_check_sparse_array(
956                 ctx,
957                 array_name,
958                 data_part1,
959                 data_elem_off_part1,
960                 data_part2,
961                 data_elem_off_part2,
962                 TILEDB_UNORDERED);
963           }
964         }
965       }
966 
967       SECTION("User offsets buffer too small") {
968         // Write data with extra element
969         data_offsets.push_back(sizeof(data[0]) * data.size());
970         write_sparse_array(
971             ctx, array_name, data, data_offsets, TILEDB_UNORDERED);
972 
973         // Submit read query
974         Context ctx(config);
975         Array array(ctx, array_name, TILEDB_READ);
976         Query query(ctx, array, TILEDB_READ);
977 
978         // Assume no size for the extra element
979         std::vector<int32_t> attr_val(data_part1.size());
980         std::vector<uint64_t> attr_off(data_off_part1.size());
981         query.set_data_buffer("attr", attr_val);
982         query.set_offsets_buffer("attr", attr_off);
983 
984         // First partial read
985         CHECK_NOTHROW(query.submit());
986         CHECK(query.query_status() == Query::Status::INCOMPLETE);
987         std::vector<int32_t> data_exp1 = {1, 0, 0};
988         std::vector<uint64_t> data_off_exp1 = {0, 4};
989         // check returned data
990         auto data_num = query.result_buffer_elements()["attr"].second;
991         CHECK(data_num == 1);
992         CHECK(attr_val == data_exp1);
993         // check returned offsets
994         auto offset_num = query.result_buffer_elements()["attr"].first;
995         CHECK(offset_num == 2);
996         CHECK(attr_off == data_off_exp1);
997 
998         // Second partial read
999         reset_read_buffers(attr_val, attr_off);
1000         CHECK_NOTHROW(query.submit());
1001         CHECK(query.query_status() == Query::Status::INCOMPLETE);
1002         std::vector<int32_t> data_exp2 = {2, 3, 0};
1003         std::vector<uint64_t> data_off_exp2 = {0, 8};
1004         // check returned data
1005         data_num = query.result_buffer_elements()["attr"].second;
1006         CHECK(data_num == 2);
1007         CHECK(attr_val == data_exp2);
1008         // check returned offsets
1009         offset_num = query.result_buffer_elements()["attr"].first;
1010         CHECK(offset_num == 2);
1011         CHECK(attr_off == data_off_exp2);
1012 
1013         // Third partial read
1014         reset_read_buffers(attr_val, attr_off);
1015         CHECK_NOTHROW(query.submit());
1016         CHECK(query.query_status() == Query::Status::INCOMPLETE);
1017         std::vector<int32_t> data_exp3 = {4, 5, 0};
1018         std::vector<uint64_t> data_off_exp3 = {0, 8};
1019         // check returned data
1020         data_num = query.result_buffer_elements()["attr"].second;
1021         CHECK(data_num == 2);
1022         CHECK(attr_val == data_exp3);
1023         // check returned offsets
1024         offset_num = query.result_buffer_elements()["attr"].first;
1025         CHECK(offset_num == 2);
1026         CHECK(attr_off == data_off_exp3);
1027 
1028         // Last partial read
1029         reset_read_buffers(attr_val, attr_off);
1030         CHECK_NOTHROW(query.submit());
1031         CHECK(query.query_status() == Query::Status::COMPLETE);
1032         std::vector<int32_t> data_exp4 = {6, 0, 0};
1033         std::vector<uint64_t> data_off_exp4 = {0, 4};
1034         // check returned data
1035         data_num = query.result_buffer_elements()["attr"].second;
1036         CHECK(data_num == 1);
1037         CHECK(attr_val == data_exp4);
1038         // check returned offsets
1039         offset_num = query.result_buffer_elements()["attr"].first;
1040         CHECK(offset_num == 2);
1041         CHECK(attr_off == data_off_exp4);
1042 
1043         array.close();
1044       }
1045     }
1046   }
1047 
1048   // Clean up
1049   VFS vfs(ctx);
1050   if (vfs.is_dir(array_name))
1051     vfs.remove_dir(array_name);
1052 }
1053 
1054 TEST_CASE(
1055     "C++ API: Test offsets extra element: dense array",
1056     "[var-offsets][extra-offset][dense]") {
1057   std::string array_name = "test_extra_offset";
1058   create_dense_array(array_name);
1059 
1060   Context ctx;
1061   std::vector<int32_t> data = {1, 2, 3, 4, 5, 6};
1062   std::vector<uint64_t> data_offsets = {0, 4, 12, 20};
1063   std::vector<uint64_t> element_offsets = {0, 1, 3, 5};
1064 
1065   SECTION("Full read") {
1066     Config config;
1067 
1068     SECTION("No extra element (default case)") {
1069       config = ctx.config();
1070       CHECK((std::string)config["sm.var_offsets.extra_element"] == "false");
1071 
1072       write_dense_array(ctx, array_name, data, data_offsets, TILEDB_ROW_MAJOR);
1073       read_and_check_dense_array(ctx, array_name, data, data_offsets);
1074     }
1075 
1076     SECTION("Extra element") {
1077       config["sm.var_offsets.extra_element"] = "true";
1078 
1079       SECTION("Byte offsets (default config)") {
1080         CHECK((std::string)config["sm.var_offsets.mode"] == "bytes");
1081         Context ctx(config);
1082 
1083         // Write data with extra element indicating total number of bytes
1084         data_offsets.push_back(sizeof(data[0]) * data.size());
1085 
1086         SECTION("Ordered write") {
1087           write_dense_array(
1088               ctx, array_name, data, data_offsets, TILEDB_ROW_MAJOR);
1089           read_and_check_dense_array(ctx, array_name, data, data_offsets);
1090         }
1091         SECTION("Global order write") {
1092           write_dense_array(
1093               ctx, array_name, data, data_offsets, TILEDB_GLOBAL_ORDER);
1094           read_and_check_dense_array(ctx, array_name, data, data_offsets);
1095         }
1096       }
1097 
1098       SECTION("Element offsets") {
1099         config["sm.var_offsets.mode"] = "elements";
1100         Context ctx(config);
1101 
1102         // Write data with extra element indicating the total number of elements
1103         element_offsets.push_back(data.size());
1104 
1105         SECTION("Ordered write") {
1106           write_dense_array(
1107               ctx, array_name, data, element_offsets, TILEDB_ROW_MAJOR);
1108           read_and_check_dense_array(ctx, array_name, data, element_offsets);
1109         }
1110         SECTION("Global order write") {
1111           write_dense_array(
1112               ctx, array_name, data, element_offsets, TILEDB_GLOBAL_ORDER);
1113           read_and_check_dense_array(ctx, array_name, data, element_offsets);
1114         }
1115       }
1116 
1117       SECTION("User offsets buffer too small") {
1118         // Use element offsets to cover this code path as well
1119         config["sm.var_offsets.mode"] = "elements";
1120         Context ctx(config);
1121 
1122         Array array_w(ctx, array_name, TILEDB_WRITE);
1123         Query query_w(ctx, array_w, TILEDB_WRITE);
1124         query_w.set_layout(TILEDB_ROW_MAJOR)
1125             .set_subarray<int64_t>({1, 2, 1, 2});
1126 
1127         // Try to write without allocating memory for the extra element
1128         query_w.set_data_buffer("attr", data);
1129         query_w.set_offsets_buffer("attr", element_offsets);
1130         CHECK_THROWS(query_w.submit());
1131 
1132         // Write data with extra element
1133         element_offsets.push_back(data.size());
1134         query_w.set_data_buffer("attr", data);
1135         query_w.set_offsets_buffer("attr", element_offsets);
1136         CHECK_NOTHROW(query_w.submit());
1137         array_w.close();
1138 
1139         // Submit read query
1140         Array array_r(ctx, array_name, TILEDB_READ);
1141         Query query_r(ctx, array_r, TILEDB_READ);
1142 
1143         // Assume no size for the extra element
1144         std::vector<int32_t> attr_val(data.size());
1145         std::vector<uint64_t> attr_off(element_offsets.size() - 1);
1146         query_r.set_data_buffer("attr", attr_val);
1147         query_r.set_offsets_buffer("attr", attr_off);
1148         query_r.set_subarray<int64_t>({1, 2, 1, 2});
1149 
1150         // First partial read because offsets don't fit
1151         CHECK_NOTHROW(query_r.submit());
1152         CHECK(query_r.query_status() == Query::Status::INCOMPLETE);
1153         std::vector<int32_t> data_exp1 = {1, 2, 3, 0, 0, 0};
1154         std::vector<uint64_t> data_off_exp1 = {0, 1, 3, 0};
1155         // check returned data
1156         auto data_num = query_r.result_buffer_elements()["attr"].second;
1157         CHECK(data_num == 3);
1158         CHECK(attr_val == data_exp1);
1159         // check returned offsets
1160         auto offset_num = query_r.result_buffer_elements()["attr"].first;
1161         CHECK(offset_num == 3);
1162         CHECK(attr_off == data_off_exp1);
1163 
1164         // Second partial read
1165         reset_read_buffers(attr_val, attr_off);
1166         CHECK_NOTHROW(query_r.submit());
1167         CHECK(query_r.query_status() == Query::Status::COMPLETE);
1168         std::vector<int32_t> data_exp2 = {4, 5, 6, 0, 0, 0};
1169         std::vector<uint64_t> data_off_exp2 = {0, 2, 3, 0};
1170         // check returned data
1171         data_num = query_r.result_buffer_elements()["attr"].second;
1172         CHECK(data_num == 3);
1173         CHECK(attr_val == data_exp2);
1174         // check returned offsets
1175         offset_num = query_r.result_buffer_elements()["attr"].first;
1176         CHECK(offset_num == 3);
1177         CHECK(attr_off == data_off_exp2);
1178 
1179         array_r.close();
1180       }
1181     }
1182   }
1183 
1184   SECTION("Partial read") {
1185     Config config;
1186 
1187     // The expected buffers to be returned after 2 partial reads with
1188     // read buffers of size data.size() / 2
1189     std::vector<int32_t> data_part1 = {1, 2, 3};
1190     std::vector<uint64_t> data_off_part1 = {0, 4};
1191     std::vector<uint64_t> data_elem_off_part1 = {0, 1};
1192     std::vector<int32_t> data_part2 = {4, 5, 6};
1193     std::vector<uint64_t> data_off_part2 = {0, 8};
1194     std::vector<uint64_t> data_elem_off_part2 = {0, 2};
1195 
1196     SECTION("No extra element (default case)") {
1197       config = ctx.config();
1198       CHECK((std::string)config["sm.var_offsets.extra_element"] == "false");
1199 
1200       write_dense_array(ctx, array_name, data, data_offsets, TILEDB_ROW_MAJOR);
1201       partial_read_and_check_dense_array(
1202           ctx,
1203           array_name,
1204           data_part1,
1205           data_off_part1,
1206           data_part2,
1207           data_off_part2);
1208     }
1209 
1210     SECTION("Extra element") {
1211       config["sm.var_offsets.extra_element"] = "true";
1212 
1213       SECTION("Byte offsets (default config)") {
1214         CHECK((std::string)config["sm.var_offsets.mode"] == "bytes");
1215         Context ctx(config);
1216 
1217         // Write data with extra element indicating total number of bytes
1218         data_offsets.push_back(sizeof(data[0]) * data.size());
1219 
1220         // Expect an extra element offset on each read
1221         data_off_part1.push_back(sizeof(data_part1[0]) * data_part1.size());
1222         data_off_part2.push_back(sizeof(data_part2[0]) * data_part2.size());
1223 
1224         SECTION("Ordered write") {
1225           write_dense_array(
1226               ctx, array_name, data, data_offsets, TILEDB_ROW_MAJOR);
1227           partial_read_and_check_dense_array(
1228               ctx,
1229               array_name,
1230               data_part1,
1231               data_off_part1,
1232               data_part2,
1233               data_off_part2);
1234         }
1235         SECTION("Global order write") {
1236           write_dense_array(
1237               ctx, array_name, data, data_offsets, TILEDB_GLOBAL_ORDER);
1238           partial_read_and_check_dense_array(
1239               ctx,
1240               array_name,
1241               data_part1,
1242               data_off_part1,
1243               data_part2,
1244               data_off_part2);
1245         }
1246       }
1247 
1248       SECTION("Element offsets") {
1249         config["sm.var_offsets.mode"] = "elements";
1250         Context ctx(config);
1251 
1252         // Write data with extra element indicating total number of elements
1253         element_offsets.push_back(data.size());
1254 
1255         // Expect an extra element offset on each read
1256         data_elem_off_part1.push_back(data_part1.size());
1257         data_elem_off_part2.push_back(data_part2.size());
1258 
1259         SECTION("Ordered write") {
1260           write_dense_array(
1261               ctx, array_name, data, element_offsets, TILEDB_ROW_MAJOR);
1262           partial_read_and_check_dense_array(
1263               ctx,
1264               array_name,
1265               data_part1,
1266               data_elem_off_part1,
1267               data_part2,
1268               data_elem_off_part2);
1269         }
1270         SECTION("Global order write") {
1271           write_dense_array(
1272               ctx, array_name, data, element_offsets, TILEDB_GLOBAL_ORDER);
1273           partial_read_and_check_dense_array(
1274               ctx,
1275               array_name,
1276               data_part1,
1277               data_elem_off_part1,
1278               data_part2,
1279               data_elem_off_part2);
1280         }
1281       }
1282 
1283       SECTION("User offsets buffer too small") {
1284         Context ctx(config);
1285         // Write data with extra element
1286         data_offsets.push_back(sizeof(data[0]) * data.size());
1287         write_dense_array(
1288             ctx, array_name, data, data_offsets, TILEDB_ROW_MAJOR);
1289 
1290         // Submit read query
1291         Array array(ctx, array_name, TILEDB_READ);
1292         Query query(ctx, array, TILEDB_READ);
1293 
1294         // Assume smaller offset buffer than data buffer
1295         std::vector<int32_t> attr_val(data_part1.size());
1296         std::vector<uint64_t> attr_off(data_off_part1.size());
1297         query.set_data_buffer("attr", attr_val);
1298         query.set_offsets_buffer("attr", attr_off);
1299         query.set_subarray<int64_t>({1, 2, 1, 2});
1300 
1301         // First partial read
1302         CHECK_NOTHROW(query.submit());
1303         CHECK(query.query_status() == Query::Status::INCOMPLETE);
1304         std::vector<int32_t> data_exp1 = {1, 0, 0};
1305         std::vector<uint64_t> data_off_exp1 = {0, 4};
1306         // check returned data
1307         auto data_num = query.result_buffer_elements()["attr"].second;
1308         CHECK(data_num == 1);
1309         CHECK(attr_val == data_exp1);
1310         // check returned offsets
1311         auto offset_num = query.result_buffer_elements()["attr"].first;
1312         CHECK(offset_num == 2);
1313         CHECK(attr_off == data_off_exp1);
1314 
1315         // Second partial read
1316         reset_read_buffers(attr_val, attr_off);
1317         CHECK_NOTHROW(query.submit());
1318         CHECK(query.query_status() == Query::Status::INCOMPLETE);
1319         std::vector<int32_t> data_exp2 = {2, 3, 0};
1320         std::vector<uint64_t> data_off_exp2 = {0, 8};
1321         // check returned data
1322         data_num = query.result_buffer_elements()["attr"].second;
1323         CHECK(data_num == 2);
1324         CHECK(attr_val == data_exp2);
1325         // check returned offsets
1326         offset_num = query.result_buffer_elements()["attr"].first;
1327         CHECK(offset_num == 2);
1328         CHECK(attr_off == data_off_exp2);
1329 
1330         // Third partial read
1331         reset_read_buffers(attr_val, attr_off);
1332         CHECK_NOTHROW(query.submit());
1333         CHECK(query.query_status() == Query::Status::INCOMPLETE);
1334         std::vector<int32_t> data_exp3 = {4, 5, 0};
1335         std::vector<uint64_t> data_off_exp3 = {0, 8};
1336         // check returned data
1337         data_num = query.result_buffer_elements()["attr"].second;
1338         CHECK(data_num == 2);
1339         CHECK(attr_val == data_exp3);
1340         // check returned offsets
1341         offset_num = query.result_buffer_elements()["attr"].first;
1342         CHECK(offset_num == 2);
1343         CHECK(attr_off == data_off_exp3);
1344 
1345         // Last partial read
1346         reset_read_buffers(attr_val, attr_off);
1347         CHECK_NOTHROW(query.submit());
1348         CHECK(query.query_status() == Query::Status::COMPLETE);
1349         std::vector<int32_t> data_exp4 = {6, 0, 0};
1350         std::vector<uint64_t> data_off_exp4 = {0, 4};
1351         // check returned data
1352         data_num = query.result_buffer_elements()["attr"].second;
1353         CHECK(data_num == 1);
1354         CHECK(attr_val == data_exp4);
1355         // check returned offsets
1356         offset_num = query.result_buffer_elements()["attr"].first;
1357         CHECK(offset_num == 2);
1358         CHECK(attr_off == data_off_exp4);
1359 
1360         array.close();
1361       }
1362     }
1363   }
1364 
1365   // Clean up
1366   VFS vfs(ctx);
1367   if (vfs.is_dir(array_name))
1368     vfs.remove_dir(array_name);
1369 }
1370 
1371 TEST_CASE(
1372     "C++ API: Test 32-bit offsets: sparse array",
1373     "[var-offsets][32bit-offset][sparse]") {
1374   std::string array_name = "test_32bit_offset";
1375   create_sparse_array(array_name);
1376 
1377   std::vector<int32_t> data = {1, 2, 3, 4, 5, 6};
1378   // Create 32 bit byte offsets buffer to use
1379   std::vector<uint32_t> data_byte_offsets = {0, 4, 12, 20};
1380 
1381   Config config;
1382   // Change config of offsets bitsize from 64 to 32
1383   config["sm.var_offsets.bitsize"] = 32;
1384   Context ctx(config);
1385 
1386   SECTION("Byte offsets (default case)") {
1387     CHECK((std::string)config["sm.var_offsets.mode"] == "bytes");
1388 
1389     SECTION("Unordered write") {
1390       write_sparse_array(
1391           ctx, array_name, data, data_byte_offsets, TILEDB_UNORDERED);
1392       SECTION("Row major read") {
1393         read_and_check_sparse_array(
1394             ctx, array_name, data, data_byte_offsets, TILEDB_ROW_MAJOR);
1395       }
1396       SECTION("Global order read") {
1397         read_and_check_sparse_array(
1398             ctx, array_name, data, data_byte_offsets, TILEDB_GLOBAL_ORDER);
1399       }
1400       SECTION("Unordered read") {
1401         read_and_check_sparse_array(
1402             ctx, array_name, data, data_byte_offsets, TILEDB_UNORDERED);
1403       }
1404     }
1405     SECTION("Global order write") {
1406       write_sparse_array(
1407           ctx, array_name, data, data_byte_offsets, TILEDB_GLOBAL_ORDER);
1408       SECTION("Row major read") {
1409         read_and_check_sparse_array(
1410             ctx, array_name, data, data_byte_offsets, TILEDB_ROW_MAJOR);
1411       }
1412       SECTION("Global order read") {
1413         read_and_check_sparse_array(
1414             ctx, array_name, data, data_byte_offsets, TILEDB_GLOBAL_ORDER);
1415       }
1416       SECTION("Unordered read") {
1417         read_and_check_sparse_array(
1418             ctx, array_name, data, data_byte_offsets, TILEDB_UNORDERED);
1419       }
1420     }
1421   }
1422 
1423   SECTION("Element offsets") {
1424     // Change config of offsets format from bytes to elements
1425     config["sm.var_offsets.mode"] = "elements";
1426     Context ctx(config);
1427 
1428     // Create 32 bit element offsets buffer to use
1429     std::vector<uint32_t> data_element_offsets = {0, 1, 3, 5};
1430 
1431     SECTION("Unordered write") {
1432       write_sparse_array(
1433           ctx, array_name, data, data_element_offsets, TILEDB_UNORDERED);
1434       SECTION("Row major read") {
1435         read_and_check_sparse_array(
1436             ctx, array_name, data, data_element_offsets, TILEDB_ROW_MAJOR);
1437       }
1438       SECTION("Global order read") {
1439         read_and_check_sparse_array(
1440             ctx, array_name, data, data_element_offsets, TILEDB_GLOBAL_ORDER);
1441       }
1442       SECTION("Unoredered read") {
1443         read_and_check_sparse_array(
1444             ctx, array_name, data, data_element_offsets, TILEDB_UNORDERED);
1445       }
1446     }
1447     SECTION("Global order write") {
1448       write_sparse_array(
1449           ctx, array_name, data, data_element_offsets, TILEDB_GLOBAL_ORDER);
1450       SECTION("Row major read") {
1451         read_and_check_sparse_array(
1452             ctx, array_name, data, data_element_offsets, TILEDB_ROW_MAJOR);
1453       }
1454       SECTION("Global order read") {
1455         read_and_check_sparse_array(
1456             ctx, array_name, data, data_element_offsets, TILEDB_GLOBAL_ORDER);
1457       }
1458       SECTION("Unordered read") {
1459         read_and_check_sparse_array(
1460             ctx, array_name, data, data_element_offsets, TILEDB_UNORDERED);
1461       }
1462     }
1463   }
1464 
1465   SECTION("Extra element") {
1466     config["sm.var_offsets.extra_element"] = "true";
1467     Context ctx(config);
1468 
1469     // Check the extra element is included in the offsets
1470     uint32_t data_size = static_cast<uint32_t>(sizeof(data[0]) * data.size());
1471     std::vector<uint32_t> data_byte_offsets = {0, 4, 12, 20, data_size};
1472 
1473     SECTION("Unordered write") {
1474       write_sparse_array(
1475           ctx, array_name, data, data_byte_offsets, TILEDB_UNORDERED);
1476       SECTION("Row major read") {
1477         read_and_check_sparse_array(
1478             ctx, array_name, data, data_byte_offsets, TILEDB_ROW_MAJOR);
1479       }
1480       SECTION("Global order read") {
1481         read_and_check_sparse_array(
1482             ctx, array_name, data, data_byte_offsets, TILEDB_GLOBAL_ORDER);
1483       }
1484       SECTION("Unordered read") {
1485         read_and_check_sparse_array(
1486             ctx, array_name, data, data_byte_offsets, TILEDB_UNORDERED);
1487       }
1488     }
1489     SECTION("Global order write") {
1490       write_sparse_array(
1491           ctx, array_name, data, data_byte_offsets, TILEDB_GLOBAL_ORDER);
1492       SECTION("Row major read") {
1493         read_and_check_sparse_array(
1494             ctx, array_name, data, data_byte_offsets, TILEDB_ROW_MAJOR);
1495       }
1496       SECTION("Global order read") {
1497         read_and_check_sparse_array(
1498             ctx, array_name, data, data_byte_offsets, TILEDB_GLOBAL_ORDER);
1499       }
1500       SECTION("Unordered read") {
1501         read_and_check_sparse_array(
1502             ctx, array_name, data, data_byte_offsets, TILEDB_UNORDERED);
1503       }
1504     }
1505   }
1506 
1507   // Clean up
1508   config["sm.var_offsets.extra_element"] = "false";
1509   config["sm.var_offsets.mode"] = "bytes";
1510   config["sm.var_offsets.bitsize"] = 64;
1511   Context ctx2(config);
1512   VFS vfs(ctx2);
1513   if (vfs.is_dir(array_name))
1514     vfs.remove_dir(array_name);
1515 }
1516 
1517 TEST_CASE(
1518     "C++ API: Test 32-bit offsets: dense array",
1519     "[var-offsets][32bit-offset][dense]") {
1520   std::string array_name = "test_32bit_offset";
1521   create_dense_array(array_name);
1522 
1523   std::vector<int32_t> data = {1, 2, 3, 4, 5, 6};
1524   // Create 32 bit offsets byte buffer to use
1525   std::vector<uint32_t> data_byte_offsets = {0, 4, 12, 20};
1526 
1527   Config config;
1528   // Change config of offsets bitsize from 64 to 32
1529   config["sm.var_offsets.bitsize"] = 32;
1530   Context ctx(config);
1531 
1532   SECTION("Byte offsets (default case)") {
1533     CHECK((std::string)config["sm.var_offsets.mode"] == "bytes");
1534 
1535     SECTION("Ordered write") {
1536       write_dense_array(
1537           ctx, array_name, data, data_byte_offsets, TILEDB_ROW_MAJOR);
1538       read_and_check_dense_array(ctx, array_name, data, data_byte_offsets);
1539     }
1540     SECTION("Global order write") {
1541       write_dense_array(
1542           ctx, array_name, data, data_byte_offsets, TILEDB_GLOBAL_ORDER);
1543       read_and_check_dense_array(ctx, array_name, data, data_byte_offsets);
1544     }
1545   }
1546 
1547   SECTION("Element offsets") {
1548     // Change config of offsets format from bytes to elements
1549     config["sm.var_offsets.mode"] = "elements";
1550     Context ctx(config);
1551 
1552     // Create 32 bit element offsets buffer to use
1553     std::vector<uint32_t> data_element_offsets = {0, 1, 3, 5};
1554 
1555     SECTION("Ordered write") {
1556       write_dense_array(
1557           ctx, array_name, data, data_element_offsets, TILEDB_ROW_MAJOR);
1558       read_and_check_dense_array(ctx, array_name, data, data_element_offsets);
1559     }
1560     SECTION("Global order write") {
1561       write_dense_array(
1562           ctx, array_name, data, data_element_offsets, TILEDB_GLOBAL_ORDER);
1563       read_and_check_dense_array(ctx, array_name, data, data_element_offsets);
1564     }
1565   }
1566 
1567   SECTION("Extra element") {
1568     config["sm.var_offsets.extra_element"] = "true";
1569     Context ctx(config);
1570 
1571     // Check the extra element is included in the offsets
1572     uint32_t data_size = static_cast<uint32_t>(sizeof(data[0]) * data.size());
1573     std::vector<uint32_t> data_byte_offsets = {0, 4, 12, 20, data_size};
1574 
1575     SECTION("Ordered write") {
1576       write_dense_array(
1577           ctx, array_name, data, data_byte_offsets, TILEDB_ROW_MAJOR);
1578       read_and_check_dense_array(ctx, array_name, data, data_byte_offsets);
1579     }
1580     SECTION("Global order write") {
1581       write_dense_array(
1582           ctx, array_name, data, data_byte_offsets, TILEDB_GLOBAL_ORDER);
1583       read_and_check_dense_array(ctx, array_name, data, data_byte_offsets);
1584     }
1585   }
1586 
1587   // Clean up
1588   config["sm.var_offsets.extra_element"] = "false";
1589   config["sm.var_offsets.mode"] = "bytes";
1590   config["sm.var_offsets.bitsize"] = 64;
1591   Context ctx2(config);
1592   VFS vfs(ctx2);
1593   if (vfs.is_dir(array_name))
1594     vfs.remove_dir(array_name);
1595 }
1596 
1597 TEST_CASE(
1598     "C++ API: Test 32-bit offsets: sparse array with string dimension",
1599     "[var-offsets-dim][32bit-offset][sparse]") {
1600   std::string array_name = "test_32bit_offset_string_dim";
1601 
1602   /*
1603     Write an array with string dimension and make sure we get back
1604     proper offsets along with extra element in read.
1605   */
1606 
1607   // Create data buffer to use
1608   std::string data = "aabbbcdddd";
1609   // Create 32 bit offsets byte buffer to use
1610   std::vector<uint64_t> data_elem_offsets = {0, 2, 5, 6};
1611 
1612   // Create and write array
1613   {
1614     Context ctx;
1615     Domain domain(ctx);
1616     domain.add_dimension(
1617         Dimension::create(ctx, "dim1", TILEDB_STRING_ASCII, nullptr, nullptr));
1618 
1619     ArraySchema schema(ctx, TILEDB_SPARSE);
1620     schema.set_domain(domain);
1621 
1622     tiledb::Array::create(array_name, schema);
1623 
1624     auto array = tiledb::Array(ctx, array_name, TILEDB_WRITE);
1625     Query query(ctx, array, TILEDB_WRITE);
1626     query.set_data_buffer("dim1", (char*)data.data(), data.size());
1627     query.set_offsets_buffer(
1628         "dim1", data_elem_offsets.data(), data_elem_offsets.size());
1629 
1630     query.set_layout(TILEDB_UNORDERED);
1631     query.submit();
1632     query.finalize();
1633     array.close();
1634   }
1635 
1636   {
1637     Config config;
1638     // Change config of offsets bitsize from 64 to 32
1639     config["sm.var_offsets.bitsize"] = 32;
1640     // Add extra element
1641     config["sm.var_offsets.extra_element"] = "true";
1642     Context ctx(config);
1643 
1644     std::vector<uint32_t> offsets_back(5);
1645     std::string data_back;
1646     data_back.resize(data.size());
1647 
1648     auto array = tiledb::Array(ctx, array_name, TILEDB_READ);
1649     Query query(ctx, array, TILEDB_READ);
1650     query.add_range(0, std::string("aa"), std::string("dddd"));
1651     query.set_data_buffer("dim1", (char*)data_back.data(), data_back.size());
1652     query.set_offsets_buffer(
1653         "dim1", (uint64_t*)offsets_back.data(), offsets_back.size());
1654 
1655     query.submit();
1656 
1657     CHECK(query.query_status() == Query::Status::COMPLETE);
1658     CHECK(offsets_back[4] == data.size());
1659   }
1660 
1661   // Regression test for https://github.com/TileDB-Inc/TileDB/pull/2540
1662   // Test that the query execution with empty result does not write out
1663   // of buffer bounds. We create an oversize buffer with guard values
1664   // ahead of the actual pointer range given to libtiledb; we run the
1665   // query to completion with empty result; then we check the guard
1666   // values. This test fails prior to PR#2540.
1667   {
1668     Config config;
1669     // Change config of offsets bitsize from 64 to 32
1670     config["sm.var_offsets.bitsize"] = 32;
1671     // Add extra element
1672     config["sm.var_offsets.extra_element"] = "true";
1673     Context ctx(config);
1674 
1675     std::vector<uint32_t> offsets_back(14);
1676 
1677     const std::vector<size_t> guard_idx = {0, 1, 2, 3, 10, 11, 12, 13};
1678     const uint32_t guard_val =
1679         std::numeric_limits<uint32_t>::max() - (uint32_t)10;
1680     for (auto idx : guard_idx) {
1681       offsets_back[idx] = guard_val;
1682     }
1683     std::string data_back;
1684     data_back.resize(data.size());
1685 
1686     auto array = tiledb::Array(ctx, array_name, TILEDB_READ);
1687     Query query(ctx, array, TILEDB_READ);
1688     // this query range should return empty result
1689     query.add_range(0, std::string("xyz"), std::string("xyz"));
1690     query.set_data_buffer("dim1", (char*)data_back.data(), data_back.size());
1691 
1692     // here we set the buffer at an offset of 2*uint64_t (== 4 * uint32_t)
1693     // from the real start because we cast to uint64_t* to keep the C++
1694     // API type-check happy
1695     query.set_offsets_buffer(
1696         "dim1", (uint64_t*)offsets_back.data() + 2, offsets_back.size() - 2);
1697 
1698     query.submit();
1699 
1700     CHECK(query.query_status() == Query::Status::COMPLETE);
1701 
1702     // check the guard values match on both sides of the buffer
1703     for (auto idx : guard_idx) {
1704       CHECK(offsets_back[idx] == guard_val);
1705     }
1706   }
1707 
1708   Context ctx;
1709   VFS vfs(ctx);
1710   if (vfs.is_dir(array_name))
1711     vfs.remove_dir(array_name);
1712 }