1 /**
2 * @file fragment_metadata.cc
3 *
4 * @section LICENSE
5 *
6 * The MIT License
7 *
8 * @copyright Copyright (c) 2017-2021 TileDB, Inc.
9 * @copyright Copyright (c) 2016 MIT and Intel Corporation
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a copy
12 * of this software and associated documentation files (the "Software"), to deal
13 * in the Software without restriction, including without limitation the rights
14 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15 * copies of the Software, and to permit persons to whom the Software is
16 * furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice shall be included in
19 * all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27 * THE SOFTWARE.
28 *
29 * @section DESCRIPTION
30 *
31 * This file implements the FragmentMetadata class.
32 */
33
34 #include "tiledb/sm/fragment/fragment_metadata.h"
35 #include "tiledb/common/heap_memory.h"
36 #include "tiledb/common/logger.h"
37 #include "tiledb/sm/array_schema/array_schema.h"
38 #include "tiledb/sm/array_schema/attribute.h"
39 #include "tiledb/sm/array_schema/dimension.h"
40 #include "tiledb/sm/array_schema/domain.h"
41 #include "tiledb/sm/buffer/buffer.h"
42 #include "tiledb/sm/filesystem/vfs.h"
43 #include "tiledb/sm/misc/constants.h"
44 #include "tiledb/sm/misc/utils.h"
45 #include "tiledb/sm/stats/global_stats.h"
46 #include "tiledb/sm/storage_manager/open_array_memory_tracker.h"
47 #include "tiledb/sm/storage_manager/storage_manager.h"
48 #include "tiledb/sm/tile/generic_tile_io.h"
49 #include "tiledb/sm/tile/tile.h"
50
51 #include <cassert>
52 #include <iostream>
53 #include <string>
54
55 using namespace tiledb::common;
56
57 namespace tiledb {
58 namespace sm {
59
60 /* ****************************** */
61 /* CONSTRUCTORS & DESTRUCTORS */
62 /* ****************************** */
63
FragmentMetadata(StorageManager * storage_manager,const ArraySchema * array_schema,const URI & fragment_uri,const std::pair<uint64_t,uint64_t> & timestamp_range,bool dense)64 FragmentMetadata::FragmentMetadata(
65 StorageManager* storage_manager,
66 const ArraySchema* array_schema,
67 const URI& fragment_uri,
68 const std::pair<uint64_t, uint64_t>& timestamp_range,
69 bool dense)
70 : storage_manager_(storage_manager)
71 , array_schema_(array_schema)
72 , dense_(dense)
73 , fragment_uri_(fragment_uri)
74 , timestamp_range_(timestamp_range) {
75 has_consolidated_footer_ = false;
76 rtree_ = RTree(array_schema_->domain(), constants::rtree_fanout);
77 meta_file_size_ = 0;
78 version_ = array_schema_->write_version();
79 tile_index_base_ = 0;
80 sparse_tile_num_ = 0;
81 footer_size_ = 0;
82 footer_offset_ = 0;
83
84 build_idx_map();
85 array_schema_->get_name(&array_schema_name_);
86 array_uri_ = array_schema_->array_uri();
87 }
88
89 FragmentMetadata::~FragmentMetadata() = default;
90
91 // Copy initialization
FragmentMetadata(const FragmentMetadata & other)92 FragmentMetadata::FragmentMetadata(const FragmentMetadata& other) {
93 storage_manager_ = other.storage_manager_;
94 array_schema_ = other.array_schema_;
95 dense_ = other.dense_;
96 fragment_uri_ = other.fragment_uri_;
97 timestamp_range_ = other.timestamp_range_;
98 has_consolidated_footer_ = other.has_consolidated_footer_;
99 rtree_ = other.rtree_;
100 meta_file_size_ = other.meta_file_size_;
101 version_ = other.version_;
102 tile_index_base_ = other.tile_index_base_;
103 sparse_tile_num_ = other.sparse_tile_num_;
104 footer_size_ = other.footer_size_;
105 footer_offset_ = other.footer_offset_;
106 idx_map_ = other.idx_map_;
107 array_schema_name_ = other.array_schema_name_;
108 array_uri_ = other.array_uri_;
109 }
110
operator =(const FragmentMetadata & other)111 FragmentMetadata& FragmentMetadata::operator=(const FragmentMetadata& other) {
112 storage_manager_ = other.storage_manager_;
113 array_schema_ = other.array_schema_;
114 dense_ = other.dense_;
115 fragment_uri_ = other.fragment_uri_;
116 timestamp_range_ = other.timestamp_range_;
117 has_consolidated_footer_ = other.has_consolidated_footer_;
118 rtree_ = other.rtree_;
119 meta_file_size_ = other.meta_file_size_;
120 version_ = other.version_;
121 tile_index_base_ = other.tile_index_base_;
122 sparse_tile_num_ = other.sparse_tile_num_;
123 footer_size_ = other.footer_size_;
124 footer_offset_ = other.footer_offset_;
125 idx_map_ = other.idx_map_;
126 array_schema_name_ = other.array_schema_name_;
127 array_uri_ = other.array_uri_;
128
129 return *this;
130 }
131
132 /* ****************************** */
133 /* API */
134 /* ****************************** */
135
set_mbr(uint64_t tile,const NDRange & mbr)136 Status FragmentMetadata::set_mbr(uint64_t tile, const NDRange& mbr) {
137 // For easy reference
138 tile += tile_index_base_;
139 RETURN_NOT_OK(rtree_.set_leaf(tile, mbr));
140 return expand_non_empty_domain(mbr);
141 }
142
set_tile_index_base(uint64_t tile_base)143 void FragmentMetadata::set_tile_index_base(uint64_t tile_base) {
144 tile_index_base_ = tile_base;
145 }
146
set_tile_offset(const std::string & name,uint64_t tid,uint64_t step)147 void FragmentMetadata::set_tile_offset(
148 const std::string& name, uint64_t tid, uint64_t step) {
149 auto it = idx_map_.find(name);
150 assert(it != idx_map_.end());
151 auto idx = it->second;
152 tid += tile_index_base_;
153 assert(tid < tile_offsets_[idx].size());
154 tile_offsets_[idx][tid] = file_sizes_[idx];
155 file_sizes_[idx] += step;
156 }
157
set_tile_var_offset(const std::string & name,uint64_t tid,uint64_t step)158 void FragmentMetadata::set_tile_var_offset(
159 const std::string& name, uint64_t tid, uint64_t step) {
160 auto it = idx_map_.find(name);
161 assert(it != idx_map_.end());
162 auto idx = it->second;
163 tid += tile_index_base_;
164 assert(tid < tile_var_offsets_[idx].size());
165 tile_var_offsets_[idx][tid] = file_var_sizes_[idx];
166 file_var_sizes_[idx] += step;
167 }
168
set_tile_var_size(const std::string & name,uint64_t tid,uint64_t size)169 void FragmentMetadata::set_tile_var_size(
170 const std::string& name, uint64_t tid, uint64_t size) {
171 auto it = idx_map_.find(name);
172 assert(it != idx_map_.end());
173 auto idx = it->second;
174 tid += tile_index_base_;
175 assert(tid < tile_var_sizes_[idx].size());
176 tile_var_sizes_[idx][tid] = size;
177 }
178
set_tile_validity_offset(const std::string & name,uint64_t tid,uint64_t step)179 void FragmentMetadata::set_tile_validity_offset(
180 const std::string& name, uint64_t tid, uint64_t step) {
181 auto it = idx_map_.find(name);
182 assert(it != idx_map_.end());
183 auto idx = it->second;
184 tid += tile_index_base_;
185 assert(tid < tile_validity_offsets_[idx].size());
186 tile_validity_offsets_[idx][tid] = file_validity_sizes_[idx];
187 file_validity_sizes_[idx] += step;
188 }
189
set_array_schema(ArraySchema * array_schema)190 void FragmentMetadata::set_array_schema(ArraySchema* array_schema) {
191 array_schema_ = array_schema;
192
193 // Rebuild index mapping
194 build_idx_map();
195 }
196
cell_num() const197 uint64_t FragmentMetadata::cell_num() const {
198 auto tile_num = this->tile_num();
199 assert(tile_num != 0);
200 if (dense_) { // Dense fragment
201 return tile_num * array_schema_->domain()->cell_num_per_tile();
202 } else { // Sparse fragment
203 return (tile_num - 1) * array_schema_->capacity() + last_tile_cell_num();
204 }
205 }
206
cell_num(uint64_t tile_pos) const207 uint64_t FragmentMetadata::cell_num(uint64_t tile_pos) const {
208 if (dense_)
209 return array_schema_->domain()->cell_num_per_tile();
210
211 uint64_t tile_num = this->tile_num();
212 if (tile_pos != tile_num - 1)
213 return array_schema_->capacity();
214
215 return last_tile_cell_num();
216 }
217
add_max_buffer_sizes(const EncryptionKey & encryption_key,const void * subarray,std::unordered_map<std::string,std::pair<uint64_t,uint64_t>> * buffer_sizes)218 Status FragmentMetadata::add_max_buffer_sizes(
219 const EncryptionKey& encryption_key,
220 const void* subarray,
221 std::unordered_map<std::string, std::pair<uint64_t, uint64_t>>*
222 buffer_sizes) {
223 // Dense case
224 if (dense_)
225 return add_max_buffer_sizes_dense(subarray, buffer_sizes);
226
227 // Convert subarray to NDRange
228 auto dim_num = array_schema_->dim_num();
229 auto sub_ptr = (const unsigned char*)subarray;
230 NDRange sub_nd(dim_num);
231 uint64_t offset = 0;
232 for (unsigned d = 0; d < dim_num; ++d) {
233 auto r_size = 2 * array_schema_->dimension(d)->coord_size();
234 sub_nd[d].set_range(&sub_ptr[offset], r_size);
235 offset += r_size;
236 }
237
238 // Sparse case
239 return add_max_buffer_sizes_sparse(encryption_key, sub_nd, buffer_sizes);
240 }
241
add_max_buffer_sizes_dense(const void * subarray,std::unordered_map<std::string,std::pair<uint64_t,uint64_t>> * buffer_sizes)242 Status FragmentMetadata::add_max_buffer_sizes_dense(
243 const void* subarray,
244 std::unordered_map<std::string, std::pair<uint64_t, uint64_t>>*
245 buffer_sizes) {
246 // Note: applicable only to the dense case where all dimensions
247 // have the same type
248 auto type = array_schema_->dimension(0)->type();
249 switch (type) {
250 case Datatype::INT32:
251 return add_max_buffer_sizes_dense<int32_t>(
252 static_cast<const int32_t*>(subarray), buffer_sizes);
253 case Datatype::INT64:
254 return add_max_buffer_sizes_dense<int64_t>(
255 static_cast<const int64_t*>(subarray), buffer_sizes);
256 case Datatype::FLOAT32:
257 return add_max_buffer_sizes_dense<float>(
258 static_cast<const float*>(subarray), buffer_sizes);
259 case Datatype::FLOAT64:
260 return add_max_buffer_sizes_dense<double>(
261 static_cast<const double*>(subarray), buffer_sizes);
262 case Datatype::INT8:
263 return add_max_buffer_sizes_dense<int8_t>(
264 static_cast<const int8_t*>(subarray), buffer_sizes);
265 case Datatype::UINT8:
266 return add_max_buffer_sizes_dense<uint8_t>(
267 static_cast<const uint8_t*>(subarray), buffer_sizes);
268 case Datatype::INT16:
269 return add_max_buffer_sizes_dense<int16_t>(
270 static_cast<const int16_t*>(subarray), buffer_sizes);
271 case Datatype::UINT16:
272 return add_max_buffer_sizes_dense<uint16_t>(
273 static_cast<const uint16_t*>(subarray), buffer_sizes);
274 case Datatype::UINT32:
275 return add_max_buffer_sizes_dense<uint32_t>(
276 static_cast<const uint32_t*>(subarray), buffer_sizes);
277 case Datatype::UINT64:
278 return add_max_buffer_sizes_dense<uint64_t>(
279 static_cast<const uint64_t*>(subarray), buffer_sizes);
280 case Datatype::DATETIME_YEAR:
281 case Datatype::DATETIME_MONTH:
282 case Datatype::DATETIME_WEEK:
283 case Datatype::DATETIME_DAY:
284 case Datatype::DATETIME_HR:
285 case Datatype::DATETIME_MIN:
286 case Datatype::DATETIME_SEC:
287 case Datatype::DATETIME_MS:
288 case Datatype::DATETIME_US:
289 case Datatype::DATETIME_NS:
290 case Datatype::DATETIME_PS:
291 case Datatype::DATETIME_FS:
292 case Datatype::DATETIME_AS:
293 case Datatype::TIME_HR:
294 case Datatype::TIME_MIN:
295 case Datatype::TIME_SEC:
296 case Datatype::TIME_MS:
297 case Datatype::TIME_US:
298 case Datatype::TIME_NS:
299 case Datatype::TIME_PS:
300 case Datatype::TIME_FS:
301 case Datatype::TIME_AS:
302 return add_max_buffer_sizes_dense<int64_t>(
303 static_cast<const int64_t*>(subarray), buffer_sizes);
304 default:
305 return LOG_STATUS(Status::FragmentMetadataError(
306 "Cannot compute add read buffer sizes for dense array; Unsupported "
307 "domain type"));
308 }
309
310 return Status::Ok();
311 }
312
313 template <class T>
add_max_buffer_sizes_dense(const T * subarray,std::unordered_map<std::string,std::pair<uint64_t,uint64_t>> * buffer_sizes)314 Status FragmentMetadata::add_max_buffer_sizes_dense(
315 const T* subarray,
316 std::unordered_map<std::string, std::pair<uint64_t, uint64_t>>*
317 buffer_sizes) {
318 // Calculate the ids of all tiles overlapping with subarray
319 auto tids = compute_overlapping_tile_ids(subarray);
320 uint64_t size = 0;
321
322 // Compute buffer sizes
323 for (auto& tid : tids) {
324 for (auto& it : *buffer_sizes) {
325 if (array_schema_->var_size(it.first)) {
326 auto cell_num = this->cell_num(tid);
327 it.second.first += cell_num * constants::cell_var_offset_size;
328 RETURN_NOT_OK(tile_var_size(it.first, tid, &size));
329 it.second.second += size;
330 } else {
331 it.second.first += cell_num(tid) * array_schema_->cell_size(it.first);
332 }
333 }
334 }
335
336 return Status::Ok();
337 }
338
add_max_buffer_sizes_sparse(const EncryptionKey & encryption_key,const NDRange & subarray,std::unordered_map<std::string,std::pair<uint64_t,uint64_t>> * buffer_sizes)339 Status FragmentMetadata::add_max_buffer_sizes_sparse(
340 const EncryptionKey& encryption_key,
341 const NDRange& subarray,
342 std::unordered_map<std::string, std::pair<uint64_t, uint64_t>>*
343 buffer_sizes) {
344 RETURN_NOT_OK(load_rtree(encryption_key));
345
346 // Get tile overlap
347 auto tile_overlap = rtree_.get_tile_overlap(subarray);
348 uint64_t size = 0;
349
350 // Handle tile ranges
351 for (const auto& tr : tile_overlap.tile_ranges_) {
352 for (uint64_t tid = tr.first; tid <= tr.second; ++tid) {
353 for (auto& it : *buffer_sizes) {
354 if (array_schema_->var_size(it.first)) {
355 auto cell_num = this->cell_num(tid);
356 it.second.first += cell_num * constants::cell_var_offset_size;
357 RETURN_NOT_OK(tile_var_size(it.first, tid, &size));
358 it.second.second += size;
359 } else {
360 it.second.first += cell_num(tid) * array_schema_->cell_size(it.first);
361 }
362 }
363 }
364 }
365
366 // Handle individual tiles
367 for (const auto& t : tile_overlap.tiles_) {
368 auto tid = t.first;
369 for (auto& it : *buffer_sizes) {
370 if (array_schema_->var_size(it.first)) {
371 auto cell_num = this->cell_num(tid);
372 it.second.first += cell_num * constants::cell_var_offset_size;
373 RETURN_NOT_OK(tile_var_size(it.first, tid, &size));
374 it.second.second += size;
375 } else {
376 it.second.first += cell_num(tid) * array_schema_->cell_size(it.first);
377 }
378 }
379 }
380
381 return Status::Ok();
382 }
383
dense() const384 bool FragmentMetadata::dense() const {
385 return dense_;
386 }
387
domain() const388 const NDRange& FragmentMetadata::domain() const {
389 return domain_;
390 }
391
format_version() const392 uint32_t FragmentMetadata::format_version() const {
393 return version_;
394 }
395
fragment_size(uint64_t * size) const396 Status FragmentMetadata::fragment_size(uint64_t* size) const {
397 // Add file sizes
398 *size = 0;
399 for (const auto& file_size : file_sizes_)
400 *size += file_size;
401 for (const auto& file_var_size : file_var_sizes_)
402 *size += file_var_size;
403 for (const auto& file_validity_size : file_validity_sizes_)
404 *size += file_validity_size;
405
406 // The fragment metadata file size can be empty when we've loaded consolidated
407 // metadata
408 uint64_t meta_file_size = meta_file_size_;
409 if (meta_file_size == 0) {
410 auto meta_uri = fragment_uri_.join_path(
411 std::string(constants::fragment_metadata_filename));
412 RETURN_NOT_OK(
413 storage_manager_->vfs()->file_size(meta_uri, &meta_file_size));
414 }
415 // Validate that the meta_file_size is not zero, either preloaded or fetched
416 // above
417 assert(meta_file_size != 0);
418
419 // Add fragment metadata file size
420 *size += meta_file_size;
421
422 return Status::Ok();
423 }
424
fragment_uri() const425 const URI& FragmentMetadata::fragment_uri() const {
426 return fragment_uri_;
427 }
428
has_consolidated_footer() const429 bool FragmentMetadata::has_consolidated_footer() const {
430 return has_consolidated_footer_;
431 }
432
overlaps_non_empty_domain(const NDRange & range) const433 bool FragmentMetadata::overlaps_non_empty_domain(const NDRange& range) const {
434 return array_schema_->domain()->overlap(range, non_empty_domain_);
435 }
436
get_tile_overlap(const NDRange & range,TileOverlap * tile_overlap)437 Status FragmentMetadata::get_tile_overlap(
438 const NDRange& range, TileOverlap* tile_overlap) {
439 assert(version_ <= 2 || loaded_metadata_.rtree_);
440 *tile_overlap = rtree_.get_tile_overlap(range);
441 return Status::Ok();
442 }
443
compute_tile_bitmap(const Range & range,unsigned d,std::vector<uint8_t> * tile_bitmap)444 void FragmentMetadata::compute_tile_bitmap(
445 const Range& range, unsigned d, std::vector<uint8_t>* tile_bitmap) {
446 assert(version_ <= 2 || loaded_metadata_.rtree_);
447 rtree_.compute_tile_bitmap(range, d, tile_bitmap);
448 }
449
init(const NDRange & non_empty_domain)450 Status FragmentMetadata::init(const NDRange& non_empty_domain) {
451 // For easy reference
452 auto dim_num = array_schema_->dim_num();
453 auto num = array_schema_->attribute_num() + dim_num + 1;
454 auto domain = array_schema_->domain();
455
456 // Sanity check
457 assert(!non_empty_domain.empty());
458 assert(non_empty_domain_.empty());
459 assert(domain_.empty());
460
461 // Set non-empty domain for dense arrays (for sparse it will be calculated
462 // via the MBRs)
463 if (dense_) {
464 non_empty_domain_ = non_empty_domain;
465
466 // The following is needed in case the fragment is a result of
467 // dense consolidation, as the consolidator may have expanded
468 // the fragment domain beyond the array domain to include
469 // integral space tiles
470 domain->crop_ndrange(&non_empty_domain_);
471
472 // Set expanded domain
473 domain_ = non_empty_domain_;
474 domain->expand_to_tiles(&domain_);
475 }
476
477 // Set last tile cell number
478 last_tile_cell_num_ = 0;
479
480 // Initialize tile offsets
481 tile_offsets_.resize(num);
482 tile_offsets_mtx_.resize(num);
483 file_sizes_.resize(num);
484 for (unsigned int i = 0; i < num; ++i)
485 file_sizes_[i] = 0;
486
487 // Initialize variable tile offsets
488 tile_var_offsets_.resize(num);
489 tile_var_offsets_mtx_.resize(num);
490 file_var_sizes_.resize(num);
491 for (unsigned int i = 0; i < num; ++i)
492 file_var_sizes_[i] = 0;
493
494 // Initialize variable tile sizes
495 tile_var_sizes_.resize(num);
496
497 // Initialize validity tile offsets
498 tile_validity_offsets_.resize(num);
499 file_validity_sizes_.resize(num);
500 for (unsigned int i = 0; i < num; ++i)
501 file_validity_sizes_[i] = 0;
502
503 return Status::Ok();
504 }
505
last_tile_cell_num() const506 uint64_t FragmentMetadata::last_tile_cell_num() const {
507 return last_tile_cell_num_;
508 }
509
load(const EncryptionKey & encryption_key,Buffer * f_buff,uint64_t offset,std::unordered_map<std::string,tiledb_shared_ptr<ArraySchema>> array_schemas)510 Status FragmentMetadata::load(
511 const EncryptionKey& encryption_key,
512 Buffer* f_buff,
513 uint64_t offset,
514 std::unordered_map<std::string, tiledb_shared_ptr<ArraySchema>>
515 array_schemas) {
516 auto meta_uri = fragment_uri_.join_path(
517 std::string(constants::fragment_metadata_filename));
518 // Load the metadata file size when we are not reading from consolidated
519 // buffer
520 if (f_buff == nullptr)
521 RETURN_NOT_OK(
522 storage_manager_->vfs()->file_size(meta_uri, &meta_file_size_));
523
524 // Get fragment name version
525 uint32_t f_version;
526 auto name = fragment_uri_.remove_trailing_slash().last_path_part();
527 RETURN_NOT_OK(utils::parse::get_fragment_name_version(name, &f_version));
528
529 // Note: The fragment name version is different from the fragment format
530 // version.
531 // - Version 1 corresponds to format versions 1 and 2
532 // * __uuid_<t1>{_t2}
533 // - Version 2 corresponds to version 3 and 4
534 // * __t1_t2_uuid
535 // - Version 3 corresponds to version 5 or higher
536 // * __t1_t2_uuid_version
537 if (f_version == 1)
538 return load_v1_v2(encryption_key, array_schemas);
539 return load_v3_or_higher(encryption_key, f_buff, offset, array_schemas);
540 }
541
store(const EncryptionKey & encryption_key)542 Status FragmentMetadata::store(const EncryptionKey& encryption_key) {
543 auto timer_se =
544 storage_manager_->stats()->start_timer("write_store_frag_meta");
545
546 auto fragment_metadata_uri =
547 fragment_uri_.join_path(constants::fragment_metadata_filename);
548 auto num = array_schema_->attribute_num() + array_schema_->dim_num() + 1;
549 uint64_t offset = 0, nbytes;
550
551 // Store R-Tree
552 gt_offsets_.rtree_ = offset;
553 RETURN_NOT_OK_ELSE(store_rtree(encryption_key, &nbytes), clean_up());
554 offset += nbytes;
555
556 // Store tile offsets
557 gt_offsets_.tile_offsets_.resize(num);
558 for (unsigned int i = 0; i < num; ++i) {
559 gt_offsets_.tile_offsets_[i] = offset;
560 RETURN_NOT_OK_ELSE(
561 store_tile_offsets(i, encryption_key, &nbytes), clean_up());
562 offset += nbytes;
563 }
564
565 // Store tile var offsets
566 gt_offsets_.tile_var_offsets_.resize(num);
567 for (unsigned int i = 0; i < num; ++i) {
568 gt_offsets_.tile_var_offsets_[i] = offset;
569 RETURN_NOT_OK_ELSE(
570 store_tile_var_offsets(i, encryption_key, &nbytes), clean_up());
571 offset += nbytes;
572 }
573
574 // Store tile var sizes
575 gt_offsets_.tile_var_sizes_.resize(num);
576 for (unsigned int i = 0; i < num; ++i) {
577 gt_offsets_.tile_var_sizes_[i] = offset;
578 RETURN_NOT_OK_ELSE(
579 store_tile_var_sizes(i, encryption_key, &nbytes), clean_up());
580 offset += nbytes;
581 }
582
583 // Store validity tile offsets
584 if (version_ >= 7) {
585 gt_offsets_.tile_validity_offsets_.resize(num);
586 for (unsigned int i = 0; i < num; ++i) {
587 gt_offsets_.tile_validity_offsets_[i] = offset;
588 RETURN_NOT_OK_ELSE(
589 store_tile_validity_offsets(i, encryption_key, &nbytes), clean_up());
590 offset += nbytes;
591 }
592 }
593
594 // Store footer
595 RETURN_NOT_OK_ELSE(store_footer(encryption_key), clean_up());
596
597 // Close file
598 return storage_manager_->close_file(fragment_metadata_uri);
599 }
600
non_empty_domain()601 const NDRange& FragmentMetadata::non_empty_domain() {
602 return non_empty_domain_;
603 }
604
set_num_tiles(uint64_t num_tiles)605 Status FragmentMetadata::set_num_tiles(uint64_t num_tiles) {
606 auto num = array_schema_->attribute_num() + 1 + array_schema_->dim_num();
607
608 for (unsigned i = 0; i < num; i++) {
609 assert(num_tiles >= tile_offsets_[i].size());
610 tile_offsets_[i].resize(num_tiles, 0);
611 tile_var_offsets_[i].resize(num_tiles, 0);
612 tile_var_sizes_[i].resize(num_tiles, 0);
613 tile_validity_offsets_[i].resize(num_tiles, 0);
614 }
615
616 if (!dense_) {
617 rtree_.set_leaf_num(num_tiles);
618 sparse_tile_num_ = num_tiles;
619 }
620
621 return Status::Ok();
622 }
623
set_last_tile_cell_num(uint64_t cell_num)624 void FragmentMetadata::set_last_tile_cell_num(uint64_t cell_num) {
625 last_tile_cell_num_ = cell_num;
626 }
627
tile_index_base() const628 uint64_t FragmentMetadata::tile_index_base() const {
629 return tile_index_base_;
630 }
631
tile_num() const632 uint64_t FragmentMetadata::tile_num() const {
633 if (dense_)
634 return array_schema_->domain()->tile_num(domain_);
635
636 return sparse_tile_num_;
637 }
638
encode_name(const std::string & name) const639 std::string FragmentMetadata::encode_name(const std::string& name) const {
640 if (version_ <= 7)
641 return name;
642
643 if (version_ == 8) {
644 static const std::unordered_map<char, std::string> percent_encoding{
645 // RFC 3986
646 {'!', "%21"},
647 {'#', "%23"},
648 {'$', "%24"},
649 {'%', "%25"},
650 {'&', "%26"},
651 {'\'', "%27"},
652 {'(', "%28"},
653 {')', "%29"},
654 {'*', "%2A"},
655 {'+', "%2B"},
656 {',', "%2C"},
657 {'/', "%2F"},
658 {':', "%3A"},
659 {';', "%3B"},
660 {'=', "%3D"},
661 {'?', "%3F"},
662 {'@', "%40"},
663 {'[', "%5B"},
664 {']', "%5D"},
665 // Extra encodings to cover illegal characters on Windows
666 {'\"', "%22"},
667 {'<', "%20"},
668 {'>', "%2D"},
669 {'\\', "%30"},
670 {'|', "%3C"}};
671
672 std::stringstream percent_encoded_name;
673 for (const char c : name) {
674 if (percent_encoding.count(c) == 0)
675 percent_encoded_name << c;
676 else
677 percent_encoded_name << percent_encoding.at(c);
678 }
679
680 return percent_encoded_name.str();
681 }
682
683 assert(version_ > 8);
684 const auto iter = idx_map_.find(name);
685 if (iter == idx_map_.end())
686 LOG_FATAL("Name " + name + " not in idx_map_");
687 const unsigned idx = iter->second;
688
689 const std::vector<tiledb::sm::Attribute*> attributes =
690 array_schema_->attributes();
691 for (unsigned i = 0; i < attributes.size(); ++i) {
692 const std::string attr_name = attributes[i]->name();
693 if (attr_name == name) {
694 return "a" + std::to_string(idx);
695 }
696 }
697
698 for (unsigned i = 0; i < array_schema_->dim_num(); ++i) {
699 const std::string dim_name = array_schema_->dimension(i)->name();
700 if (dim_name == name) {
701 const unsigned dim_idx = idx - array_schema_->attribute_num() - 1;
702 return "d" + std::to_string(dim_idx);
703 }
704 }
705
706 if (name == constants::coords) {
707 return name;
708 }
709
710 LOG_FATAL("Unable to locate dimension/attribute " + name);
711 return "";
712 }
713
uri(const std::string & name) const714 URI FragmentMetadata::uri(const std::string& name) const {
715 return fragment_uri_.join_path(encode_name(name) + constants::file_suffix);
716 }
717
var_uri(const std::string & name) const718 URI FragmentMetadata::var_uri(const std::string& name) const {
719 return fragment_uri_.join_path(
720 encode_name(name) + "_var" + constants::file_suffix);
721 }
722
validity_uri(const std::string & name) const723 URI FragmentMetadata::validity_uri(const std::string& name) const {
724 return fragment_uri_.join_path(
725 encode_name(name) + "_validity" + constants::file_suffix);
726 }
727
array_schema_name()728 const std::string& FragmentMetadata::array_schema_name() {
729 return array_schema_name_;
730 }
731
load_tile_offsets(const EncryptionKey & encryption_key,std::vector<std::string> && names)732 Status FragmentMetadata::load_tile_offsets(
733 const EncryptionKey& encryption_key, std::vector<std::string>&& names) {
734 // Sort 'names' in ascending order of their index. The
735 // motivation is to load the offsets in order of their
736 // layout for sequential reads to the file.
737 std::sort(
738 names.begin(),
739 names.end(),
740 [&](const std::string& lhs, const std::string& rhs) {
741 assert(idx_map_.count(lhs) > 0);
742 assert(idx_map_.count(rhs) > 0);
743 return idx_map_[lhs] < idx_map_[rhs];
744 });
745
746 // The fixed offsets are located before the
747 // var offsets. Load all of the fixed offsets
748 // first.
749 for (const auto& name : names) {
750 RETURN_NOT_OK(load_tile_offsets(encryption_key, idx_map_[name]));
751 }
752
753 // Load all of the var offsets.
754 for (const auto& name : names) {
755 if (array_schema_->var_size(name))
756 RETURN_NOT_OK(load_tile_var_offsets(encryption_key, idx_map_[name]));
757 }
758
759 // Load all of the var offsets.
760 for (const auto& name : names) {
761 if (array_schema_->is_nullable(name))
762 RETURN_NOT_OK(load_tile_validity_offsets(encryption_key, idx_map_[name]));
763 }
764
765 return Status::Ok();
766 }
767
file_offset(const std::string & name,uint64_t tile_idx,uint64_t * offset)768 Status FragmentMetadata::file_offset(
769 const std::string& name, uint64_t tile_idx, uint64_t* offset) {
770 auto it = idx_map_.find(name);
771 assert(it != idx_map_.end());
772 auto idx = it->second;
773 if (!loaded_metadata_.tile_offsets_[idx])
774 return LOG_STATUS(Status::FragmentMetadataError(
775 "Trying to access metadata that's not loaded"));
776
777 *offset = tile_offsets_[idx][tile_idx];
778 return Status::Ok();
779 }
780
file_var_offset(const std::string & name,uint64_t tile_idx,uint64_t * offset)781 Status FragmentMetadata::file_var_offset(
782 const std::string& name, uint64_t tile_idx, uint64_t* offset) {
783 auto it = idx_map_.find(name);
784 assert(it != idx_map_.end());
785 auto idx = it->second;
786 if (!loaded_metadata_.tile_var_offsets_[idx])
787 return LOG_STATUS(Status::FragmentMetadataError(
788 "Trying to access metadata that's not loaded"));
789
790 *offset = tile_var_offsets_[idx][tile_idx];
791 return Status::Ok();
792 }
793
file_validity_offset(const std::string & name,uint64_t tile_idx,uint64_t * offset)794 Status FragmentMetadata::file_validity_offset(
795 const std::string& name, uint64_t tile_idx, uint64_t* offset) {
796 auto it = idx_map_.find(name);
797 assert(it != idx_map_.end());
798 auto idx = it->second;
799 if (!loaded_metadata_.tile_validity_offsets_[idx])
800 return LOG_STATUS(Status::FragmentMetadataError(
801 "Trying to access metadata that's not loaded"));
802
803 *offset = tile_validity_offsets_[idx][tile_idx];
804 return Status::Ok();
805 }
806
mbr(uint64_t tile_idx) const807 const NDRange& FragmentMetadata::mbr(uint64_t tile_idx) const {
808 return rtree_.leaf(tile_idx);
809 }
810
mbrs() const811 const std::vector<NDRange>& FragmentMetadata::mbrs() const {
812 return rtree_.leaves();
813 }
814
persisted_tile_size(const std::string & name,uint64_t tile_idx,uint64_t * tile_size)815 Status FragmentMetadata::persisted_tile_size(
816 const std::string& name, uint64_t tile_idx, uint64_t* tile_size) {
817 auto it = idx_map_.find(name);
818 assert(it != idx_map_.end());
819 auto idx = it->second;
820 if (!loaded_metadata_.tile_offsets_[idx])
821 return LOG_STATUS(Status::FragmentMetadataError(
822 "Trying to access metadata that's not loaded"));
823
824 auto tile_num = this->tile_num();
825
826 *tile_size =
827 (tile_idx != tile_num - 1) ?
828 tile_offsets_[idx][tile_idx + 1] - tile_offsets_[idx][tile_idx] :
829 file_sizes_[idx] - tile_offsets_[idx][tile_idx];
830
831 return Status::Ok();
832 }
833
persisted_tile_var_size(const std::string & name,uint64_t tile_idx,uint64_t * tile_size)834 Status FragmentMetadata::persisted_tile_var_size(
835 const std::string& name, uint64_t tile_idx, uint64_t* tile_size) {
836 auto it = idx_map_.find(name);
837 assert(it != idx_map_.end());
838 auto idx = it->second;
839
840 if (!loaded_metadata_.tile_var_offsets_[idx])
841 return LOG_STATUS(Status::FragmentMetadataError(
842 "Trying to access metadata that's not loaded"));
843
844 auto tile_num = this->tile_num();
845
846 *tile_size = (tile_idx != tile_num - 1) ?
847 tile_var_offsets_[idx][tile_idx + 1] -
848 tile_var_offsets_[idx][tile_idx] :
849 file_var_sizes_[idx] - tile_var_offsets_[idx][tile_idx];
850
851 return Status::Ok();
852 }
853
persisted_tile_validity_size(const std::string & name,uint64_t tile_idx,uint64_t * tile_size)854 Status FragmentMetadata::persisted_tile_validity_size(
855 const std::string& name, uint64_t tile_idx, uint64_t* tile_size) {
856 auto it = idx_map_.find(name);
857 assert(it != idx_map_.end());
858 auto idx = it->second;
859 if (!loaded_metadata_.tile_validity_offsets_[idx])
860 return LOG_STATUS(Status::FragmentMetadataError(
861 "Trying to access metadata that's not loaded"));
862
863 auto tile_num = this->tile_num();
864
865 *tile_size =
866 (tile_idx != tile_num - 1) ?
867 tile_validity_offsets_[idx][tile_idx + 1] -
868 tile_validity_offsets_[idx][tile_idx] :
869 file_validity_sizes_[idx] - tile_validity_offsets_[idx][tile_idx];
870
871 return Status::Ok();
872 }
873
tile_size(const std::string & name,uint64_t tile_idx) const874 uint64_t FragmentMetadata::tile_size(
875 const std::string& name, uint64_t tile_idx) const {
876 auto var_size = array_schema_->var_size(name);
877 auto cell_num = this->cell_num(tile_idx);
878 return (var_size) ? cell_num * constants::cell_var_offset_size :
879 cell_num * array_schema_->cell_size(name);
880 }
881
tile_var_size(const std::string & name,uint64_t tile_idx,uint64_t * tile_size)882 Status FragmentMetadata::tile_var_size(
883 const std::string& name, uint64_t tile_idx, uint64_t* tile_size) {
884 auto it = idx_map_.find(name);
885 assert(it != idx_map_.end());
886 auto idx = it->second;
887 if (!loaded_metadata_.tile_var_sizes_[idx])
888 return LOG_STATUS(Status::FragmentMetadataError(
889 "Trying to access metadata that's not loaded"));
890 *tile_size = tile_var_sizes_[idx][tile_idx];
891
892 return Status::Ok();
893 }
894
first_timestamp() const895 uint64_t FragmentMetadata::first_timestamp() const {
896 return timestamp_range_.first;
897 }
898
timestamp_range() const899 const std::pair<uint64_t, uint64_t>& FragmentMetadata::timestamp_range() const {
900 return timestamp_range_;
901 }
902
operator <(const FragmentMetadata & metadata) const903 bool FragmentMetadata::operator<(const FragmentMetadata& metadata) const {
904 return (timestamp_range_.first < metadata.timestamp_range_.first) ||
905 (timestamp_range_.first == metadata.timestamp_range_.first &&
906 fragment_uri_ < metadata.fragment_uri_);
907 }
908
write_footer(Buffer * buff) const909 Status FragmentMetadata::write_footer(Buffer* buff) const {
910 RETURN_NOT_OK(write_version(buff));
911 if (version_ >= 10) {
912 RETURN_NOT_OK(write_array_schema_name(buff));
913 }
914 RETURN_NOT_OK(write_dense(buff));
915 RETURN_NOT_OK(write_non_empty_domain(buff));
916 RETURN_NOT_OK(write_sparse_tile_num(buff));
917 RETURN_NOT_OK(write_last_tile_cell_num(buff));
918 RETURN_NOT_OK(write_file_sizes(buff));
919 RETURN_NOT_OK(write_file_var_sizes(buff));
920 RETURN_NOT_OK(write_file_validity_sizes(buff));
921 RETURN_NOT_OK(write_generic_tile_offsets(buff));
922 return Status::Ok();
923 }
924
load_rtree(const EncryptionKey & encryption_key)925 Status FragmentMetadata::load_rtree(const EncryptionKey& encryption_key) {
926 if (version_ <= 2)
927 return Status::Ok();
928
929 std::lock_guard<std::mutex> lock(mtx_);
930
931 if (loaded_metadata_.rtree_)
932 return Status::Ok();
933
934 Buffer buff;
935 RETURN_NOT_OK(
936 read_generic_tile_from_file(encryption_key, gt_offsets_.rtree_, &buff));
937
938 storage_manager_->stats()->add_counter("read_rtree_size", buff.size());
939
940 // Use the serialized buffer size to approximate memory usage of the rtree.
941 auto memory_tracker = storage_manager_->array_memory_tracker(array_uri_);
942 assert(memory_tracker);
943 if (!memory_tracker->take_memory(buff.size())) {
944 return LOG_STATUS(Status::FragmentMetadataError(
945 "Cannot load R-tree; Insufficient memory budget"));
946 }
947
948 ConstBuffer cbuff(&buff);
949 RETURN_NOT_OK(rtree_.deserialize(&cbuff, array_schema_->domain(), version_));
950
951 loaded_metadata_.rtree_ = true;
952
953 return Status::Ok();
954 }
955
free_rtree()956 void FragmentMetadata::free_rtree() {
957 auto freed = rtree_.free_memory();
958 auto memory_tracker = storage_manager_->array_memory_tracker(array_uri_);
959 memory_tracker->release_memory(freed);
960 loaded_metadata_.rtree_ = false;
961 }
962
load_tile_var_sizes(const EncryptionKey & encryption_key,const std::string & name)963 Status FragmentMetadata::load_tile_var_sizes(
964 const EncryptionKey& encryption_key, const std::string& name) {
965 if (version_ <= 2)
966 return Status::Ok();
967
968 auto it = idx_map_.find(name);
969 assert(it != idx_map_.end());
970 auto idx = it->second;
971 return (load_tile_var_sizes(encryption_key, idx));
972 }
973
974 /* ****************************** */
975 /* PRIVATE METHODS */
976 /* ****************************** */
977
get_footer_size(uint32_t version,uint64_t * size) const978 Status FragmentMetadata::get_footer_size(
979 uint32_t version, uint64_t* size) const {
980 if (version < 3) {
981 *size = footer_size_v3_v4();
982 } else if (version < 4) {
983 *size = footer_size_v5_v6();
984 } else {
985 *size = footer_size_v7_or_higher();
986 }
987
988 return Status::Ok();
989 }
990
footer_size() const991 uint64_t FragmentMetadata::footer_size() const {
992 return footer_size_;
993 }
994
get_footer_offset_and_size(uint64_t * offset,uint64_t * size) const995 Status FragmentMetadata::get_footer_offset_and_size(
996 uint64_t* offset, uint64_t* size) const {
997 uint32_t f_version;
998 auto name = fragment_uri_.remove_trailing_slash().last_path_part();
999 RETURN_NOT_OK(utils::parse::get_fragment_name_version(name, &f_version));
1000 if (array_schema_->domain()->all_dims_fixed() && f_version < 5) {
1001 RETURN_NOT_OK(get_footer_size(f_version, size));
1002 *offset = meta_file_size_ - *size;
1003 } else {
1004 URI fragment_metadata_uri = fragment_uri_.join_path(
1005 std::string(constants::fragment_metadata_filename));
1006 uint64_t size_offset = meta_file_size_ - sizeof(uint64_t);
1007 Buffer buff;
1008 RETURN_NOT_OK(storage_manager_->read(
1009 fragment_metadata_uri, size_offset, &buff, sizeof(uint64_t)));
1010 buff.reset_offset();
1011 RETURN_NOT_OK(buff.read(size, sizeof(uint64_t)));
1012 *offset = meta_file_size_ - *size - sizeof(uint64_t);
1013 storage_manager_->stats()->add_counter(
1014 "read_frag_meta_size", sizeof(uint64_t));
1015 }
1016
1017 return Status::Ok();
1018 }
1019
footer_size_v3_v4() const1020 uint64_t FragmentMetadata::footer_size_v3_v4() const {
1021 auto attribute_num = array_schema_->attribute_num();
1022 auto dim_num = array_schema_->dim_num();
1023 // v3 and v4 support only arrays where all dimensions have the same type
1024 auto domain_size = 2 * dim_num * array_schema_->dimension(0)->coord_size();
1025
1026 // Get footer size
1027 uint64_t size = 0;
1028 size += sizeof(uint32_t); // version
1029 size += sizeof(char); // dense
1030 size += sizeof(char); // null non-empty domain
1031 size += domain_size; // non-empty domain
1032 size += sizeof(uint64_t); // sparse tile num
1033 size += sizeof(uint64_t); // last tile cell num
1034 size += (attribute_num + 1) * sizeof(uint64_t); // file sizes
1035 size += attribute_num * sizeof(uint64_t); // file var sizes
1036 size += sizeof(uint64_t); // R-Tree offset
1037 size += (attribute_num + 1) * sizeof(uint64_t); // tile offsets
1038 size += attribute_num * sizeof(uint64_t); // tile var offsets
1039 size += attribute_num * sizeof(uint64_t); // tile var sizes
1040
1041 return size;
1042 }
1043
footer_size_v5_v6() const1044 uint64_t FragmentMetadata::footer_size_v5_v6() const {
1045 auto dim_num = array_schema_->dim_num();
1046 auto num = array_schema_->attribute_num() + dim_num + 1;
1047 uint64_t domain_size = 0;
1048
1049 if (non_empty_domain_.empty()) {
1050 // For var-sized dimensions, this function would be called only upon
1051 // writing the footer to storage, in which case the non-empty domain
1052 // would not be empty. For reading the footer from storage, the footer
1053 // size is explicitly stored to and retrieved from storage, so this
1054 // function is not called then.
1055 assert(array_schema_->domain()->all_dims_fixed());
1056 for (unsigned d = 0; d < dim_num; ++d)
1057 domain_size += 2 * array_schema_->domain()->dimension(d)->coord_size();
1058 } else {
1059 for (unsigned d = 0; d < dim_num; ++d) {
1060 domain_size += non_empty_domain_[d].size();
1061 if (array_schema_->dimension(d)->var_size())
1062 domain_size += 2 * sizeof(uint64_t); // Two more sizes get serialized
1063 }
1064 }
1065
1066 // Get footer size
1067 uint64_t size = 0;
1068 size += sizeof(uint32_t); // version
1069 size += sizeof(char); // dense
1070 size += sizeof(char); // null non-empty domain
1071 size += domain_size; // non-empty domain
1072 size += sizeof(uint64_t); // sparse tile num
1073 size += sizeof(uint64_t); // last tile cell num
1074 size += num * sizeof(uint64_t); // file sizes
1075 size += num * sizeof(uint64_t); // file var sizes
1076 size += sizeof(uint64_t); // R-Tree offset
1077 size += num * sizeof(uint64_t); // tile offsets
1078 size += num * sizeof(uint64_t); // tile var offsets
1079 size += num * sizeof(uint64_t); // tile var sizes
1080
1081 return size;
1082 }
1083
footer_size_v7_or_higher() const1084 uint64_t FragmentMetadata::footer_size_v7_or_higher() const {
1085 auto dim_num = array_schema_->dim_num();
1086 auto num = array_schema_->attribute_num() + dim_num + 1;
1087 uint64_t domain_size = 0;
1088
1089 if (non_empty_domain_.empty()) {
1090 // For var-sized dimensions, this function would be called only upon
1091 // writing the footer to storage, in which case the non-empty domain
1092 // would not be empty. For reading the footer from storage, the footer
1093 // size is explicitly stored to and retrieved from storage, so this
1094 // function is not called then.
1095 assert(array_schema_->domain()->all_dims_fixed());
1096 for (unsigned d = 0; d < dim_num; ++d)
1097 domain_size += 2 * array_schema_->domain()->dimension(d)->coord_size();
1098 } else {
1099 for (unsigned d = 0; d < dim_num; ++d) {
1100 domain_size += non_empty_domain_[d].size();
1101 if (array_schema_->dimension(d)->var_size())
1102 domain_size += 2 * sizeof(uint64_t); // Two more sizes get serialized
1103 }
1104 }
1105
1106 // Get footer size
1107 uint64_t size = 0;
1108 size += sizeof(uint32_t); // version
1109 size += sizeof(char); // dense
1110 size += sizeof(char); // null non-empty domain
1111 size += domain_size; // non-empty domain
1112 size += sizeof(uint64_t); // sparse tile num
1113 size += sizeof(uint64_t); // last tile cell num
1114 size += num * sizeof(uint64_t); // file sizes
1115 size += num * sizeof(uint64_t); // file var sizes
1116 size += num * sizeof(uint64_t); // file validity sizes
1117 size += sizeof(uint64_t); // R-Tree offset
1118 size += num * sizeof(uint64_t); // tile offsets
1119 size += num * sizeof(uint64_t); // tile var offsets
1120 size += num * sizeof(uint64_t); // tile var sizes
1121 size += num * sizeof(uint64_t); // tile validity sizes
1122
1123 return size;
1124 }
1125
1126 template <class T>
compute_overlapping_tile_ids(const T * subarray) const1127 std::vector<uint64_t> FragmentMetadata::compute_overlapping_tile_ids(
1128 const T* subarray) const {
1129 assert(dense_);
1130 std::vector<uint64_t> tids;
1131 auto dim_num = array_schema_->dim_num();
1132
1133 // Temporary domain vector
1134 auto coord_size = array_schema_->domain()->dimension(0)->coord_size();
1135 auto temp_size = 2 * dim_num * coord_size;
1136 std::vector<uint8_t> temp(temp_size);
1137 uint8_t offset = 0;
1138 for (unsigned d = 0; d < dim_num; ++d) {
1139 std::memcpy(&temp[offset], domain_[d].data(), domain_[d].size());
1140 offset += domain_[d].size();
1141 }
1142 auto metadata_domain = (const T*)&temp[0];
1143
1144 // Check if there is any overlap
1145 if (!utils::geometry::overlap(subarray, metadata_domain, dim_num))
1146 return tids;
1147
1148 // Initialize subarray tile domain
1149 auto subarray_tile_domain = tdb_new_array(T, 2 * dim_num);
1150 get_subarray_tile_domain(subarray, subarray_tile_domain);
1151
1152 // Initialize tile coordinates
1153 auto tile_coords = tdb_new_array(T, dim_num);
1154 for (unsigned int i = 0; i < dim_num; ++i)
1155 tile_coords[i] = subarray_tile_domain[2 * i];
1156
1157 // Walk through all tiles in subarray tile domain
1158 auto domain = array_schema_->domain();
1159 uint64_t tile_pos;
1160 do {
1161 tile_pos = domain->get_tile_pos(metadata_domain, tile_coords);
1162 tids.emplace_back(tile_pos);
1163 domain->get_next_tile_coords(subarray_tile_domain, tile_coords);
1164 } while (utils::geometry::coords_in_rect(
1165 tile_coords, subarray_tile_domain, dim_num));
1166
1167 // Clean up
1168 tdb_delete_array(subarray_tile_domain);
1169 tdb_delete_array(tile_coords);
1170
1171 return tids;
1172 }
1173
1174 template <class T>
1175 std::vector<std::pair<uint64_t, double>>
compute_overlapping_tile_ids_cov(const T * subarray) const1176 FragmentMetadata::compute_overlapping_tile_ids_cov(const T* subarray) const {
1177 assert(dense_);
1178 std::vector<std::pair<uint64_t, double>> tids;
1179 auto dim_num = array_schema_->dim_num();
1180
1181 // Temporary domain vector
1182 auto coord_size = array_schema_->domain()->dimension(0)->coord_size();
1183 auto temp_size = 2 * dim_num * coord_size;
1184 std::vector<uint8_t> temp(temp_size);
1185 uint8_t offset = 0;
1186 for (unsigned d = 0; d < dim_num; ++d) {
1187 std::memcpy(&temp[offset], domain_[d].data(), domain_[d].size());
1188 offset += domain_[d].size();
1189 }
1190 auto metadata_domain = (const T*)&temp[0];
1191
1192 // Check if there is any overlap
1193 if (!utils::geometry::overlap(subarray, metadata_domain, dim_num))
1194 return tids;
1195
1196 // Initialize subarray tile domain
1197 auto subarray_tile_domain = tdb_new_array(T, 2 * dim_num);
1198 get_subarray_tile_domain(subarray, subarray_tile_domain);
1199
1200 auto tile_subarray = tdb_new_array(T, 2 * dim_num);
1201 auto tile_overlap = tdb_new_array(T, 2 * dim_num);
1202 bool overlap;
1203 double cov;
1204
1205 // Initialize tile coordinates
1206 auto tile_coords = tdb_new_array(T, dim_num);
1207 for (unsigned int i = 0; i < dim_num; ++i)
1208 tile_coords[i] = subarray_tile_domain[2 * i];
1209
1210 // Walk through all tiles in subarray tile domain
1211 auto domain = array_schema_->domain();
1212 uint64_t tile_pos;
1213 do {
1214 domain->get_tile_subarray(metadata_domain, tile_coords, tile_subarray);
1215 utils::geometry::overlap(
1216 subarray, tile_subarray, dim_num, tile_overlap, &overlap);
1217 assert(overlap);
1218 cov = utils::geometry::coverage(tile_overlap, tile_subarray, dim_num);
1219 tile_pos = domain->get_tile_pos(metadata_domain, tile_coords);
1220 tids.emplace_back(tile_pos, cov);
1221 domain->get_next_tile_coords(subarray_tile_domain, tile_coords);
1222 } while (utils::geometry::coords_in_rect(
1223 tile_coords, subarray_tile_domain, dim_num));
1224
1225 // Clean up
1226 tdb_delete_array(subarray_tile_domain);
1227 tdb_delete_array(tile_coords);
1228 tdb_delete_array(tile_subarray);
1229 tdb_delete_array(tile_overlap);
1230
1231 return tids;
1232 }
1233
1234 template <class T>
get_subarray_tile_domain(const T * subarray,T * subarray_tile_domain) const1235 void FragmentMetadata::get_subarray_tile_domain(
1236 const T* subarray, T* subarray_tile_domain) const {
1237 // For easy reference
1238 auto dim_num = array_schema_->dim_num();
1239
1240 // Calculate subarray in tile domain
1241 for (unsigned d = 0; d < dim_num; ++d) {
1242 auto domain = (const T*)domain_[d].data();
1243 auto tile_extent =
1244 *(const T*)array_schema_->domain()->tile_extent(d).data();
1245 auto overlap = std::max(subarray[2 * d], domain[0]);
1246 subarray_tile_domain[2 * d] =
1247 Dimension::tile_idx(overlap, domain[0], tile_extent);
1248
1249 overlap = std::min(subarray[2 * d + 1], domain[1]);
1250 subarray_tile_domain[2 * d + 1] =
1251 Dimension::tile_idx(overlap, domain[0], tile_extent);
1252 }
1253 }
1254
expand_non_empty_domain(const NDRange & mbr)1255 Status FragmentMetadata::expand_non_empty_domain(const NDRange& mbr) {
1256 std::lock_guard<std::mutex> lock(mtx_);
1257
1258 // Case the non-empty domain is not initialized yet
1259 if (non_empty_domain_.empty()) {
1260 non_empty_domain_ = mbr;
1261 return Status::Ok();
1262 }
1263
1264 // Expand existing non-empty domain
1265 array_schema_->domain()->expand_ndrange(mbr, &non_empty_domain_);
1266
1267 return Status::Ok();
1268 }
1269
load_tile_offsets(const EncryptionKey & encryption_key,unsigned idx)1270 Status FragmentMetadata::load_tile_offsets(
1271 const EncryptionKey& encryption_key, unsigned idx) {
1272 if (version_ <= 2)
1273 return Status::Ok();
1274
1275 // If the tile offset is already loaded, exit early to avoid the lock
1276 if (loaded_metadata_.tile_offsets_[idx])
1277 return Status::Ok();
1278
1279 std::lock_guard<std::mutex> lock(tile_offsets_mtx_[idx]);
1280
1281 if (loaded_metadata_.tile_offsets_[idx])
1282 return Status::Ok();
1283
1284 Buffer buff;
1285 RETURN_NOT_OK(read_generic_tile_from_file(
1286 encryption_key, gt_offsets_.tile_offsets_[idx], &buff));
1287
1288 storage_manager_->stats()->add_counter("read_tile_offsets_size", buff.size());
1289
1290 ConstBuffer cbuff(&buff);
1291 RETURN_NOT_OK(load_tile_offsets(idx, &cbuff));
1292
1293 loaded_metadata_.tile_offsets_[idx] = true;
1294
1295 return Status::Ok();
1296 }
1297
load_tile_var_offsets(const EncryptionKey & encryption_key,unsigned idx)1298 Status FragmentMetadata::load_tile_var_offsets(
1299 const EncryptionKey& encryption_key, unsigned idx) {
1300 if (version_ <= 2)
1301 return Status::Ok();
1302
1303 // If the tile var offset is already loaded, exit early to avoid the lock
1304 if (loaded_metadata_.tile_var_offsets_[idx])
1305 return Status::Ok();
1306
1307 std::lock_guard<std::mutex> lock(tile_var_offsets_mtx_[idx]);
1308
1309 if (loaded_metadata_.tile_var_offsets_[idx])
1310 return Status::Ok();
1311
1312 Buffer buff;
1313 RETURN_NOT_OK(read_generic_tile_from_file(
1314 encryption_key, gt_offsets_.tile_var_offsets_[idx], &buff));
1315
1316 storage_manager_->stats()->add_counter(
1317 "read_tile_var_offsets_size", buff.size());
1318
1319 ConstBuffer cbuff(&buff);
1320 RETURN_NOT_OK(load_tile_var_offsets(idx, &cbuff));
1321
1322 loaded_metadata_.tile_var_offsets_[idx] = true;
1323
1324 return Status::Ok();
1325 }
1326
load_tile_var_sizes(const EncryptionKey & encryption_key,unsigned idx)1327 Status FragmentMetadata::load_tile_var_sizes(
1328 const EncryptionKey& encryption_key, unsigned idx) {
1329 if (version_ <= 2)
1330 return Status::Ok();
1331
1332 std::lock_guard<std::mutex> lock(mtx_);
1333
1334 if (loaded_metadata_.tile_var_sizes_[idx])
1335 return Status::Ok();
1336
1337 Buffer buff;
1338 RETURN_NOT_OK(read_generic_tile_from_file(
1339 encryption_key, gt_offsets_.tile_var_sizes_[idx], &buff));
1340
1341 storage_manager_->stats()->add_counter(
1342 "read_tile_var_sizes_size", buff.size());
1343
1344 ConstBuffer cbuff(&buff);
1345 RETURN_NOT_OK(load_tile_var_sizes(idx, &cbuff));
1346
1347 loaded_metadata_.tile_var_sizes_[idx] = true;
1348
1349 return Status::Ok();
1350 }
1351
load_tile_validity_offsets(const EncryptionKey & encryption_key,unsigned idx)1352 Status FragmentMetadata::load_tile_validity_offsets(
1353 const EncryptionKey& encryption_key, unsigned idx) {
1354 if (version_ <= 6)
1355 return Status::Ok();
1356
1357 std::lock_guard<std::mutex> lock(mtx_);
1358
1359 if (loaded_metadata_.tile_validity_offsets_[idx])
1360 return Status::Ok();
1361
1362 Buffer buff;
1363 RETURN_NOT_OK(read_generic_tile_from_file(
1364 encryption_key, gt_offsets_.tile_validity_offsets_[idx], &buff));
1365
1366 storage_manager_->stats()->add_counter(
1367 "read_tile_validity_offsets_size", buff.size());
1368
1369 ConstBuffer cbuff(&buff);
1370 RETURN_NOT_OK(load_tile_validity_offsets(idx, &cbuff));
1371
1372 loaded_metadata_.tile_validity_offsets_[idx] = true;
1373
1374 return Status::Ok();
1375 }
1376
1377 // ===== FORMAT =====
1378 // bounding_coords_num (uint64_t)
1379 // bounding_coords_#1 (void*) bounding_coords_#2 (void*) ...
load_bounding_coords(ConstBuffer * buff)1380 Status FragmentMetadata::load_bounding_coords(ConstBuffer* buff) {
1381 // Get number of bounding coordinates
1382 uint64_t bounding_coords_num = 0;
1383 RETURN_NOT_OK(buff->read(&bounding_coords_num, sizeof(uint64_t)));
1384
1385 // Get bounding coordinates
1386 // Note: This version supports only dimensions domains with the same type
1387 auto coord_size = array_schema_->domain()->dimension(0)->coord_size();
1388 auto dim_num = array_schema_->domain()->dim_num();
1389 uint64_t bounding_coords_size = 2 * dim_num * coord_size;
1390 bounding_coords_.resize(bounding_coords_num);
1391 for (uint64_t i = 0; i < bounding_coords_num; ++i) {
1392 bounding_coords_[i].resize(bounding_coords_size);
1393 RETURN_NOT_OK(buff->read(&bounding_coords_[i][0], bounding_coords_size));
1394 }
1395
1396 return Status::Ok();
1397 }
1398
load_file_sizes(ConstBuffer * buff)1399 Status FragmentMetadata::load_file_sizes(ConstBuffer* buff) {
1400 if (version_ < 5)
1401 return load_file_sizes_v1_v4(buff);
1402 else
1403 return load_file_sizes_v5_or_higher(buff);
1404 }
1405
1406 // ===== FORMAT =====
1407 // file_sizes#0 (uint64_t)
1408 // ...
1409 // file_sizes#attribute_num (uint64_t)
load_file_sizes_v1_v4(ConstBuffer * buff)1410 Status FragmentMetadata::load_file_sizes_v1_v4(ConstBuffer* buff) {
1411 auto attribute_num = array_schema_->attribute_num();
1412 file_sizes_.resize(attribute_num + 1);
1413 Status st =
1414 buff->read(&file_sizes_[0], (attribute_num + 1) * sizeof(uint64_t));
1415
1416 if (!st.ok()) {
1417 return LOG_STATUS(Status::FragmentMetadataError(
1418 "Cannot load fragment metadata; Reading tile offsets failed"));
1419 }
1420
1421 return Status::Ok();
1422 }
1423
1424 // ===== FORMAT =====
1425 // file_sizes#0 (uint64_t)
1426 // ...
1427 // file_sizes#{attribute_num+dim_num} (uint64_t)
load_file_sizes_v5_or_higher(ConstBuffer * buff)1428 Status FragmentMetadata::load_file_sizes_v5_or_higher(ConstBuffer* buff) {
1429 auto num = array_schema_->attribute_num() + array_schema_->dim_num() + 1;
1430 file_sizes_.resize(num);
1431 Status st = buff->read(&file_sizes_[0], num * sizeof(uint64_t));
1432
1433 if (!st.ok()) {
1434 return LOG_STATUS(Status::FragmentMetadataError(
1435 "Cannot load fragment metadata; Reading tile offsets failed"));
1436 }
1437
1438 return Status::Ok();
1439 }
1440
load_file_var_sizes(ConstBuffer * buff)1441 Status FragmentMetadata::load_file_var_sizes(ConstBuffer* buff) {
1442 if (version_ < 5)
1443 return load_file_var_sizes_v1_v4(buff);
1444 else
1445 return load_file_var_sizes_v5_or_higher(buff);
1446 }
1447
1448 // ===== FORMAT =====
1449 // file_var_sizes#0 (uint64_t)
1450 // ...
1451 // file_var_sizes#attribute_num (uint64_t)
load_file_var_sizes_v1_v4(ConstBuffer * buff)1452 Status FragmentMetadata::load_file_var_sizes_v1_v4(ConstBuffer* buff) {
1453 auto attribute_num = array_schema_->attribute_num();
1454 file_var_sizes_.resize(attribute_num);
1455 Status st = buff->read(&file_var_sizes_[0], attribute_num * sizeof(uint64_t));
1456
1457 if (!st.ok()) {
1458 return LOG_STATUS(Status::FragmentMetadataError(
1459 "Cannot load fragment metadata; Reading tile offsets failed"));
1460 }
1461
1462 return Status::Ok();
1463 }
1464
1465 // ===== FORMAT =====
1466 // file_var_sizes#0 (uint64_t)
1467 // ...
1468 // file_var_sizes#{attribute_num+dim_num} (uint64_t)
load_file_var_sizes_v5_or_higher(ConstBuffer * buff)1469 Status FragmentMetadata::load_file_var_sizes_v5_or_higher(ConstBuffer* buff) {
1470 auto num = array_schema_->attribute_num() + array_schema_->dim_num() + 1;
1471 file_var_sizes_.resize(num);
1472 Status st = buff->read(&file_var_sizes_[0], num * sizeof(uint64_t));
1473
1474 if (!st.ok()) {
1475 return LOG_STATUS(Status::FragmentMetadataError(
1476 "Cannot load fragment metadata; Reading tile offsets failed"));
1477 }
1478
1479 return Status::Ok();
1480 }
1481
load_file_validity_sizes(ConstBuffer * buff)1482 Status FragmentMetadata::load_file_validity_sizes(ConstBuffer* buff) {
1483 if (version_ <= 6)
1484 return Status::Ok();
1485
1486 auto num = array_schema_->attribute_num() + array_schema_->dim_num() + 1;
1487 file_validity_sizes_.resize(num);
1488 Status st = buff->read(&file_validity_sizes_[0], num * sizeof(uint64_t));
1489
1490 if (!st.ok()) {
1491 return LOG_STATUS(Status::FragmentMetadataError(
1492 "Cannot load fragment metadata; Reading tile offsets failed"));
1493 }
1494
1495 return Status::Ok();
1496 }
1497
1498 // ===== FORMAT =====
1499 // last_tile_cell_num (uint64_t)
load_last_tile_cell_num(ConstBuffer * buff)1500 Status FragmentMetadata::load_last_tile_cell_num(ConstBuffer* buff) {
1501 // Get last tile cell number
1502 Status st = buff->read(&last_tile_cell_num_, sizeof(uint64_t));
1503 if (!st.ok()) {
1504 return LOG_STATUS(Status::FragmentMetadataError(
1505 "Cannot load fragment metadata; Reading last tile cell number "
1506 "failed"));
1507 }
1508 return Status::Ok();
1509 }
1510
1511 // ===== FORMAT =====
1512 // mbr_num (uint64_t)
1513 // mbr_#1 (void*)
1514 // mbr_#2 (void*)
1515 // ...
load_mbrs(ConstBuffer * buff)1516 Status FragmentMetadata::load_mbrs(ConstBuffer* buff) {
1517 // Get number of MBRs
1518 uint64_t mbr_num = 0;
1519 RETURN_NOT_OK(buff->read(&mbr_num, sizeof(uint64_t)));
1520
1521 // Set leaf level
1522 rtree_.set_leaf_num(mbr_num);
1523 auto domain = array_schema_->domain();
1524 auto dim_num = domain->dim_num();
1525 for (uint64_t m = 0; m < mbr_num; ++m) {
1526 NDRange mbr(dim_num);
1527 for (unsigned d = 0; d < dim_num; ++d) {
1528 auto r_size = 2 * domain->dimension(d)->coord_size();
1529 mbr[d].set_range(buff->cur_data(), r_size);
1530 buff->advance_offset(r_size);
1531 }
1532 rtree_.set_leaf(m, mbr);
1533 }
1534
1535 // Build R-tree bottom-up
1536 if (mbr_num > 0) {
1537 rtree_.build_tree();
1538 }
1539
1540 sparse_tile_num_ = mbr_num;
1541
1542 return Status::Ok();
1543 }
1544
load_non_empty_domain(ConstBuffer * buff)1545 Status FragmentMetadata::load_non_empty_domain(ConstBuffer* buff) {
1546 if (version_ <= 2)
1547 return load_non_empty_domain_v1_v2(buff);
1548 else if (version_ == 3 || version_ == 4)
1549 return load_non_empty_domain_v3_v4(buff);
1550 return load_non_empty_domain_v5_or_higher(buff);
1551 }
1552
1553 // ===== FORMAT =====
1554 // non_empty_domain_size (uint64_t)
1555 // non_empty_domain (void*)
load_non_empty_domain_v1_v2(ConstBuffer * buff)1556 Status FragmentMetadata::load_non_empty_domain_v1_v2(ConstBuffer* buff) {
1557 // Get domain size
1558 uint64_t domain_size = 0;
1559 RETURN_NOT_OK(buff->read(&domain_size, sizeof(uint64_t)));
1560
1561 // Get non-empty domain
1562 if (domain_size != 0) {
1563 auto dim_num = array_schema_->dim_num();
1564 std::vector<uint8_t> temp(domain_size);
1565 RETURN_NOT_OK(buff->read(&temp[0], domain_size));
1566 non_empty_domain_.resize(dim_num);
1567 uint64_t offset = 0;
1568 for (unsigned d = 0; d < dim_num; ++d) {
1569 auto coord_size = array_schema_->dimension(d)->coord_size();
1570 Range r(&temp[offset], 2 * coord_size);
1571 non_empty_domain_[d] = std::move(r);
1572 offset += 2 * coord_size;
1573 }
1574 }
1575
1576 // Get expanded domain
1577 if (!non_empty_domain_.empty()) {
1578 domain_ = non_empty_domain_;
1579 array_schema_->domain()->expand_to_tiles(&domain_);
1580 }
1581
1582 return Status::Ok();
1583 }
1584
1585 // ===== FORMAT =====
1586 // null non_empty_domain (char)
1587 // non_empty_domain (domain_size)
load_non_empty_domain_v3_v4(ConstBuffer * buff)1588 Status FragmentMetadata::load_non_empty_domain_v3_v4(ConstBuffer* buff) {
1589 // Get null non-empty domain
1590 bool null_non_empty_domain = false;
1591 RETURN_NOT_OK(buff->read(&null_non_empty_domain, sizeof(char)));
1592
1593 // Get non-empty domain
1594 if (!null_non_empty_domain) {
1595 auto dim_num = array_schema_->dim_num();
1596 // Note: These versions supports only dimensions domains with the same type
1597 auto coord_size = array_schema_->domain()->dimension(0)->coord_size();
1598 auto domain_size = 2 * dim_num * coord_size;
1599 std::vector<uint8_t> temp(domain_size);
1600 RETURN_NOT_OK(buff->read(&temp[0], domain_size));
1601 non_empty_domain_.resize(dim_num);
1602 uint64_t offset = 0;
1603 for (unsigned d = 0; d < dim_num; ++d) {
1604 auto coord_size = array_schema_->dimension(d)->coord_size();
1605 Range r(&temp[offset], 2 * coord_size);
1606 non_empty_domain_[d] = std::move(r);
1607 offset += 2 * coord_size;
1608 }
1609 }
1610
1611 // Get expanded domain
1612 if (!non_empty_domain_.empty()) {
1613 domain_ = non_empty_domain_;
1614 array_schema_->domain()->expand_to_tiles(&domain_);
1615 }
1616
1617 return Status::Ok();
1618 }
1619
1620 // ===== FORMAT =====
1621 // null_non_empty_domain
1622 // fix-sized: range(void*)
1623 // var-sized: range_size(uint64_t) | start_range_size(uint64_t) | range(void*)
load_non_empty_domain_v5_or_higher(ConstBuffer * buff)1624 Status FragmentMetadata::load_non_empty_domain_v5_or_higher(ConstBuffer* buff) {
1625 // Get null non-empty domain
1626 char null_non_empty_domain = 0;
1627 RETURN_NOT_OK(buff->read(&null_non_empty_domain, sizeof(char)));
1628
1629 auto domain = array_schema_->domain();
1630 if (null_non_empty_domain == 0) {
1631 auto dim_num = array_schema_->dim_num();
1632 non_empty_domain_.resize(dim_num);
1633 for (unsigned d = 0; d < dim_num; ++d) {
1634 auto dim = domain->dimension(d);
1635 if (!dim->var_size()) { // Fixed-sized
1636 auto r_size = 2 * dim->coord_size();
1637 non_empty_domain_[d].set_range(buff->cur_data(), r_size);
1638 buff->advance_offset(r_size);
1639 } else { // Var-sized
1640 uint64_t r_size, start_size;
1641 RETURN_NOT_OK(buff->read(&r_size, sizeof(uint64_t)));
1642 RETURN_NOT_OK(buff->read(&start_size, sizeof(uint64_t)));
1643 non_empty_domain_[d].set_range(buff->cur_data(), r_size, start_size);
1644 buff->advance_offset(r_size);
1645 }
1646 }
1647 }
1648
1649 // Get expanded domain
1650 if (!non_empty_domain_.empty()) {
1651 domain_ = non_empty_domain_;
1652 array_schema_->domain()->expand_to_tiles(&domain_);
1653 }
1654
1655 return Status::Ok();
1656 }
1657
1658 // Applicable only to versions 1 and 2
load_tile_offsets(ConstBuffer * buff)1659 Status FragmentMetadata::load_tile_offsets(ConstBuffer* buff) {
1660 Status st;
1661 uint64_t tile_offsets_num = 0;
1662 unsigned int attribute_num = array_schema_->attribute_num();
1663
1664 // Allocate tile offsets
1665 tile_offsets_.resize(attribute_num + 1);
1666 tile_offsets_mtx_.resize(attribute_num + 1);
1667
1668 // For all attributes, get the tile offsets
1669 for (unsigned int i = 0; i < attribute_num + 1; ++i) {
1670 // Get number of tile offsets
1671 st = buff->read(&tile_offsets_num, sizeof(uint64_t));
1672 if (!st.ok()) {
1673 return LOG_STATUS(Status::FragmentMetadataError(
1674 "Cannot load fragment metadata; Reading number of tile offsets "
1675 "failed"));
1676 }
1677
1678 if (tile_offsets_num == 0)
1679 continue;
1680
1681 auto size = tile_offsets_num * sizeof(uint64_t);
1682 auto memory_tracker = storage_manager_->array_memory_tracker(array_uri_);
1683 assert(memory_tracker);
1684 if (!memory_tracker->take_memory(size)) {
1685 return LOG_STATUS(Status::FragmentMetadataError(
1686 "Cannot load tile offsets; Insufficient memory budget"));
1687 }
1688
1689 // Get tile offsets
1690 tile_offsets_[i].resize(tile_offsets_num);
1691 st = buff->read(&tile_offsets_[i][0], size);
1692 if (!st.ok()) {
1693 return LOG_STATUS(Status::FragmentMetadataError(
1694 "Cannot load fragment metadata; Reading tile offsets failed"));
1695 }
1696 }
1697
1698 loaded_metadata_.tile_offsets_.resize(
1699 array_schema_->attribute_num() + 1, true);
1700
1701 return Status::Ok();
1702 }
1703
load_tile_offsets(unsigned idx,ConstBuffer * buff)1704 Status FragmentMetadata::load_tile_offsets(unsigned idx, ConstBuffer* buff) {
1705 Status st;
1706 uint64_t tile_offsets_num = 0;
1707
1708 // Get number of tile offsets
1709 st = buff->read(&tile_offsets_num, sizeof(uint64_t));
1710 if (!st.ok()) {
1711 return LOG_STATUS(Status::FragmentMetadataError(
1712 "Cannot load fragment metadata; Reading number of tile offsets "
1713 "failed"));
1714 }
1715
1716 // Get tile offsets
1717 if (tile_offsets_num != 0) {
1718 auto size = tile_offsets_num * sizeof(uint64_t);
1719 auto memory_tracker = storage_manager_->array_memory_tracker(array_uri_);
1720 assert(memory_tracker);
1721 if (!memory_tracker->take_memory(size)) {
1722 return LOG_STATUS(Status::FragmentMetadataError(
1723 "Cannot load tile offsets; Insufficient memory budget"));
1724 }
1725
1726 tile_offsets_[idx].resize(tile_offsets_num);
1727 st = buff->read(&tile_offsets_[idx][0], size);
1728 if (!st.ok()) {
1729 return LOG_STATUS(Status::FragmentMetadataError(
1730 "Cannot load fragment metadata; Reading tile offsets failed"));
1731 }
1732 }
1733
1734 return Status::Ok();
1735 }
1736
1737 // ===== FORMAT =====
1738 // tile_var_offsets_attr#0_num (uint64_t)
1739 // tile_var_offsets_attr#0_#1 (uint64_t) tile_var_offsets_attr#0_#2 (uint64_t)
1740 // ...
1741 // ...
1742 // tile_var_offsets_attr#<attribute_num-1>_num(uint64_t)
1743 // tile_var_offsets_attr#<attribute_num-1>_#1 (uint64_t)
1744 // tile_ver_offsets_attr#<attribute_num-1>_#2 (uint64_t) ...
load_tile_var_offsets(ConstBuffer * buff)1745 Status FragmentMetadata::load_tile_var_offsets(ConstBuffer* buff) {
1746 Status st;
1747 unsigned int attribute_num = array_schema_->attribute_num();
1748 uint64_t tile_var_offsets_num = 0;
1749
1750 // Allocate tile offsets
1751 tile_var_offsets_.resize(attribute_num);
1752 tile_var_offsets_mtx_.resize(attribute_num);
1753
1754 // For all attributes, get the variable tile offsets
1755 for (unsigned int i = 0; i < attribute_num; ++i) {
1756 // Get number of tile offsets
1757 st = buff->read(&tile_var_offsets_num, sizeof(uint64_t));
1758 if (!st.ok()) {
1759 LOG_STATUS(st);
1760 return LOG_STATUS(Status::FragmentMetadataError(
1761 "Cannot load fragment metadata; Reading number of variable tile "
1762 "offsets failed"));
1763 }
1764
1765 if (tile_var_offsets_num == 0)
1766 continue;
1767
1768 auto size = tile_var_offsets_num * sizeof(uint64_t);
1769 auto memory_tracker = storage_manager_->array_memory_tracker(array_uri_);
1770 assert(memory_tracker);
1771 if (!memory_tracker->take_memory(size)) {
1772 return LOG_STATUS(Status::FragmentMetadataError(
1773 "Cannot load tile var offsets; Insufficient memory budget"));
1774 }
1775
1776 // Get variable tile offsets
1777 tile_var_offsets_[i].resize(tile_var_offsets_num);
1778 st = buff->read(&tile_var_offsets_[i][0], size);
1779 if (!st.ok()) {
1780 LOG_STATUS(st);
1781 return LOG_STATUS(Status::FragmentMetadataError(
1782 "Cannot load fragment metadata; Reading variable tile offsets "
1783 "failed"));
1784 }
1785 }
1786
1787 loaded_metadata_.tile_var_offsets_.resize(
1788 array_schema_->attribute_num(), true);
1789
1790 return Status::Ok();
1791 }
1792
load_tile_var_offsets(unsigned idx,ConstBuffer * buff)1793 Status FragmentMetadata::load_tile_var_offsets(
1794 unsigned idx, ConstBuffer* buff) {
1795 Status st;
1796 uint64_t tile_var_offsets_num = 0;
1797
1798 // Get number of tile offsets
1799 st = buff->read(&tile_var_offsets_num, sizeof(uint64_t));
1800 if (!st.ok()) {
1801 LOG_STATUS(st);
1802 return LOG_STATUS(Status::FragmentMetadataError(
1803 "Cannot load fragment metadata; Reading number of variable tile "
1804 "offsets failed"));
1805 }
1806
1807 // Get variable tile offsets
1808 if (tile_var_offsets_num != 0) {
1809 auto size = tile_var_offsets_num * sizeof(uint64_t);
1810 auto memory_tracker = storage_manager_->array_memory_tracker(array_uri_);
1811 assert(memory_tracker);
1812 if (!memory_tracker->take_memory(size)) {
1813 return LOG_STATUS(Status::FragmentMetadataError(
1814 "Cannot load tile var offsets; Insufficient memory budget"));
1815 }
1816
1817 tile_var_offsets_[idx].resize(tile_var_offsets_num);
1818 st = buff->read(&tile_var_offsets_[idx][0], size);
1819 if (!st.ok()) {
1820 LOG_STATUS(st);
1821 return LOG_STATUS(Status::FragmentMetadataError(
1822 "Cannot load fragment metadata; Reading variable tile offsets "
1823 "failed"));
1824 }
1825 }
1826
1827 return Status::Ok();
1828 }
1829
1830 // ===== FORMAT =====
1831 // tile_var_sizes_attr#0_num (uint64_t)
1832 // tile_var_sizes_attr#0_#1 (uint64_t) tile_sizes_attr#0_#2 (uint64_t) ...
1833 // ...
1834 // tile_var_sizes_attr#<attribute_num-1>_num(uint64_t)
1835 // tile_var_sizes__attr#<attribute_num-1>_#1 (uint64_t)
1836 // tile_var_sizes_attr#<attribute_num-1>_#2 (uint64_t) ...
load_tile_var_sizes(ConstBuffer * buff)1837 Status FragmentMetadata::load_tile_var_sizes(ConstBuffer* buff) {
1838 Status st;
1839 unsigned int attribute_num = array_schema_->attribute_num();
1840 uint64_t tile_var_sizes_num = 0;
1841
1842 // Allocate tile sizes
1843 tile_var_sizes_.resize(attribute_num);
1844
1845 // For all attributes, get the variable tile sizes
1846 for (unsigned int i = 0; i < attribute_num; ++i) {
1847 // Get number of tile sizes
1848 st = buff->read(&tile_var_sizes_num, sizeof(uint64_t));
1849 if (!st.ok()) {
1850 return LOG_STATUS(Status::FragmentMetadataError(
1851 "Cannot load fragment metadata; Reading number of variable tile "
1852 "sizes failed"));
1853 }
1854
1855 if (tile_var_sizes_num == 0)
1856 continue;
1857
1858 auto size = tile_var_sizes_num * sizeof(uint64_t);
1859 auto memory_tracker = storage_manager_->array_memory_tracker(array_uri_);
1860 assert(memory_tracker);
1861 if (!memory_tracker->take_memory(size)) {
1862 return LOG_STATUS(Status::FragmentMetadataError(
1863 "Cannot load tile var sizes; Insufficient memory budget"));
1864 }
1865
1866 // Get variable tile sizes
1867 tile_var_sizes_[i].resize(tile_var_sizes_num);
1868 st = buff->read(&tile_var_sizes_[i][0], size);
1869 if (!st.ok()) {
1870 return LOG_STATUS(Status::FragmentMetadataError(
1871 "Cannot load fragment metadata; Reading variable tile sizes "
1872 "failed"));
1873 }
1874 }
1875
1876 loaded_metadata_.tile_var_sizes_.resize(array_schema_->attribute_num(), true);
1877
1878 return Status::Ok();
1879 }
1880
load_tile_var_sizes(unsigned idx,ConstBuffer * buff)1881 Status FragmentMetadata::load_tile_var_sizes(unsigned idx, ConstBuffer* buff) {
1882 Status st;
1883 uint64_t tile_var_sizes_num = 0;
1884
1885 // Get number of tile sizes
1886 st = buff->read(&tile_var_sizes_num, sizeof(uint64_t));
1887 if (!st.ok()) {
1888 return LOG_STATUS(Status::FragmentMetadataError(
1889 "Cannot load fragment metadata; Reading number of variable tile "
1890 "sizes failed"));
1891 }
1892
1893 // Get variable tile sizes
1894 if (tile_var_sizes_num != 0) {
1895 auto size = tile_var_sizes_num * sizeof(uint64_t);
1896 auto memory_tracker = storage_manager_->array_memory_tracker(array_uri_);
1897 assert(memory_tracker);
1898 if (!memory_tracker->take_memory(size)) {
1899 return LOG_STATUS(Status::FragmentMetadataError(
1900 "Cannot load tile var sizes; Insufficient memory budget"));
1901 }
1902
1903 tile_var_sizes_[idx].resize(tile_var_sizes_num);
1904 st = buff->read(&tile_var_sizes_[idx][0], size);
1905 if (!st.ok()) {
1906 return LOG_STATUS(Status::FragmentMetadataError(
1907 "Cannot load fragment metadata; Reading variable tile sizes "
1908 "failed"));
1909 }
1910 }
1911
1912 return Status::Ok();
1913 }
1914
load_tile_validity_offsets(unsigned idx,ConstBuffer * buff)1915 Status FragmentMetadata::load_tile_validity_offsets(
1916 unsigned idx, ConstBuffer* buff) {
1917 Status st;
1918 uint64_t tile_validity_offsets_num = 0;
1919
1920 // Get number of tile offsets
1921 st = buff->read(&tile_validity_offsets_num, sizeof(uint64_t));
1922 if (!st.ok()) {
1923 return LOG_STATUS(
1924 Status::FragmentMetadataError("Cannot load fragment metadata; Reading "
1925 "number of validity tile offsets "
1926 "failed"));
1927 }
1928
1929 // Get tile offsets
1930 if (tile_validity_offsets_num != 0) {
1931 auto size = tile_validity_offsets_num * sizeof(uint64_t);
1932 auto memory_tracker = storage_manager_->array_memory_tracker(array_uri_);
1933 assert(memory_tracker);
1934 if (!memory_tracker->take_memory(size)) {
1935 return LOG_STATUS(Status::FragmentMetadataError(
1936 "Cannot load tile validity offsets; Insufficient memory budget"));
1937 }
1938
1939 tile_validity_offsets_[idx].resize(tile_validity_offsets_num);
1940 st = buff->read(&tile_validity_offsets_[idx][0], size);
1941
1942 if (!st.ok()) {
1943 return LOG_STATUS(Status::FragmentMetadataError(
1944 "Cannot load fragment metadata; Reading validity tile offsets "
1945 "failed"));
1946 }
1947 }
1948
1949 return Status::Ok();
1950 }
1951
load_version(ConstBuffer * buff)1952 Status FragmentMetadata::load_version(ConstBuffer* buff) {
1953 RETURN_NOT_OK(buff->read(&version_, sizeof(uint32_t)));
1954 return Status::Ok();
1955 }
1956
load_dense(ConstBuffer * buff)1957 Status FragmentMetadata::load_dense(ConstBuffer* buff) {
1958 RETURN_NOT_OK(buff->read(&dense_, sizeof(char)));
1959 return Status::Ok();
1960 }
1961
load_sparse_tile_num(ConstBuffer * buff)1962 Status FragmentMetadata::load_sparse_tile_num(ConstBuffer* buff) {
1963 RETURN_NOT_OK(buff->read(&sparse_tile_num_, sizeof(uint64_t)));
1964 return Status::Ok();
1965 }
1966
load_generic_tile_offsets(ConstBuffer * buff)1967 Status FragmentMetadata::load_generic_tile_offsets(ConstBuffer* buff) {
1968 if (version_ == 3 || version_ == 4)
1969 return load_generic_tile_offsets_v3_v4(buff);
1970 else if (version_ >= 5 && version_ < 7)
1971 return load_generic_tile_offsets_v5_v6(buff);
1972 else if (version_ >= 7)
1973 return load_generic_tile_offsets_v7_or_higher(buff);
1974
1975 assert(false);
1976 return Status::Ok();
1977 }
1978
load_generic_tile_offsets_v3_v4(ConstBuffer * buff)1979 Status FragmentMetadata::load_generic_tile_offsets_v3_v4(ConstBuffer* buff) {
1980 // Load R-Tree offset
1981 RETURN_NOT_OK(buff->read(>_offsets_.rtree_, sizeof(uint64_t)));
1982
1983 // Load offsets for tile offsets
1984 unsigned int attribute_num = array_schema_->attribute_num();
1985 gt_offsets_.tile_offsets_.resize(attribute_num + 1);
1986 for (unsigned i = 0; i < attribute_num + 1; ++i) {
1987 RETURN_NOT_OK(buff->read(>_offsets_.tile_offsets_[i], sizeof(uint64_t)));
1988 }
1989
1990 // Load offsets for tile var offsets
1991 gt_offsets_.tile_var_offsets_.resize(attribute_num);
1992 for (unsigned i = 0; i < attribute_num; ++i) {
1993 RETURN_NOT_OK(
1994 buff->read(>_offsets_.tile_var_offsets_[i], sizeof(uint64_t)));
1995 }
1996
1997 // Load offsets for tile var sizes
1998 gt_offsets_.tile_var_sizes_.resize(attribute_num);
1999 for (unsigned i = 0; i < attribute_num; ++i) {
2000 RETURN_NOT_OK(
2001 buff->read(>_offsets_.tile_var_sizes_[i], sizeof(uint64_t)));
2002 }
2003
2004 return Status::Ok();
2005 }
2006
load_generic_tile_offsets_v5_v6(ConstBuffer * buff)2007 Status FragmentMetadata::load_generic_tile_offsets_v5_v6(ConstBuffer* buff) {
2008 // Load R-Tree offset
2009 RETURN_NOT_OK(buff->read(>_offsets_.rtree_, sizeof(uint64_t)));
2010
2011 // Load offsets for tile offsets
2012 auto num = array_schema_->attribute_num() + array_schema_->dim_num() + 1;
2013 gt_offsets_.tile_offsets_.resize(num);
2014 for (unsigned i = 0; i < num; ++i) {
2015 RETURN_NOT_OK(buff->read(>_offsets_.tile_offsets_[i], sizeof(uint64_t)));
2016 }
2017
2018 // Load offsets for tile var offsets
2019 gt_offsets_.tile_var_offsets_.resize(num);
2020 for (unsigned i = 0; i < num; ++i) {
2021 RETURN_NOT_OK(
2022 buff->read(>_offsets_.tile_var_offsets_[i], sizeof(uint64_t)));
2023 }
2024
2025 // Load offsets for tile var sizes
2026 gt_offsets_.tile_var_sizes_.resize(num);
2027 for (unsigned i = 0; i < num; ++i) {
2028 RETURN_NOT_OK(
2029 buff->read(>_offsets_.tile_var_sizes_[i], sizeof(uint64_t)));
2030 }
2031
2032 return Status::Ok();
2033 }
2034
load_generic_tile_offsets_v7_or_higher(ConstBuffer * buff)2035 Status FragmentMetadata::load_generic_tile_offsets_v7_or_higher(
2036 ConstBuffer* buff) {
2037 // Load R-Tree offset
2038 RETURN_NOT_OK(buff->read(>_offsets_.rtree_, sizeof(uint64_t)));
2039
2040 // Load offsets for tile offsets
2041 auto num = array_schema_->attribute_num() + array_schema_->dim_num() + 1;
2042 gt_offsets_.tile_offsets_.resize(num);
2043 for (unsigned i = 0; i < num; ++i) {
2044 RETURN_NOT_OK(buff->read(>_offsets_.tile_offsets_[i], sizeof(uint64_t)));
2045 }
2046
2047 // Load offsets for tile var offsets
2048 gt_offsets_.tile_var_offsets_.resize(num);
2049 for (unsigned i = 0; i < num; ++i) {
2050 RETURN_NOT_OK(
2051 buff->read(>_offsets_.tile_var_offsets_[i], sizeof(uint64_t)));
2052 }
2053
2054 // Load offsets for tile var sizes
2055 gt_offsets_.tile_var_sizes_.resize(num);
2056 for (unsigned i = 0; i < num; ++i) {
2057 RETURN_NOT_OK(
2058 buff->read(>_offsets_.tile_var_sizes_[i], sizeof(uint64_t)));
2059 }
2060
2061 // Load offsets for tile validity offsets
2062 if (version_ >= 7) {
2063 gt_offsets_.tile_validity_offsets_.resize(num);
2064 for (unsigned i = 0; i < num; ++i) {
2065 RETURN_NOT_OK(
2066 buff->read(>_offsets_.tile_validity_offsets_[i], sizeof(uint64_t)));
2067 }
2068 }
2069
2070 return Status::Ok();
2071 }
2072
load_array_schema_name(ConstBuffer * buff)2073 Status FragmentMetadata::load_array_schema_name(ConstBuffer* buff) {
2074 uint64_t size = 0;
2075 RETURN_NOT_OK(buff->read(&size, sizeof(uint64_t)));
2076 if (size == 0) {
2077 return LOG_STATUS(Status::FragmentMetadataError(
2078 "Cannot load array schema name; Size of schema name is zero"));
2079 }
2080 array_schema_name_.resize(size);
2081
2082 RETURN_NOT_OK(buff->read(&array_schema_name_[0], size));
2083
2084 return Status::Ok();
2085 }
2086
load_v1_v2(const EncryptionKey & encryption_key,const std::unordered_map<std::string,tiledb_shared_ptr<ArraySchema>> & array_schemas)2087 Status FragmentMetadata::load_v1_v2(
2088 const EncryptionKey& encryption_key,
2089 const std::unordered_map<std::string, tiledb_shared_ptr<ArraySchema>>&
2090 array_schemas) {
2091 URI fragment_metadata_uri = fragment_uri_.join_path(
2092 std::string(constants::fragment_metadata_filename));
2093 // Read metadata
2094 GenericTileIO tile_io(storage_manager_, fragment_metadata_uri);
2095 auto tile = (Tile*)nullptr;
2096 RETURN_NOT_OK(tile_io.read_generic(
2097 &tile, 0, encryption_key, storage_manager_->config()));
2098
2099 auto buffer = tile->buffer();
2100 Buffer buff;
2101 RETURN_NOT_OK_ELSE(buff.realloc(buffer->size()), tdb_delete(tile));
2102 buff.set_size(buffer->size());
2103 buffer->reset_offset();
2104 RETURN_NOT_OK_ELSE(buffer->read(buff.data(), buff.size()), tdb_delete(tile));
2105 tdb_delete(tile);
2106
2107 storage_manager_->stats()->add_counter("read_frag_meta_size", buff.size());
2108
2109 // Pre-v10 format fragments we need to set the schema and schema name to
2110 // the "old" schema. This way "old" fragments are still loaded fine
2111 array_schema_name_ = tiledb::sm::constants::array_schema_filename;
2112 auto schema = array_schemas.find(array_schema_name_);
2113 if (schema != array_schemas.end()) {
2114 set_array_schema(schema->second.get());
2115 } else {
2116 return Status::FragmentMetadataError(
2117 "Could not find schema" + array_schema_name_ +
2118 " in map of schemas loaded.\n" +
2119 "Consider reloading the array to check for new array schemas.");
2120 }
2121
2122 // Deserialize
2123 ConstBuffer cbuff(&buff);
2124 RETURN_NOT_OK(load_version(&cbuff));
2125 RETURN_NOT_OK(load_non_empty_domain(&cbuff));
2126 RETURN_NOT_OK(load_mbrs(&cbuff));
2127 RETURN_NOT_OK(load_bounding_coords(&cbuff));
2128 RETURN_NOT_OK(load_tile_offsets(&cbuff));
2129 RETURN_NOT_OK(load_tile_var_offsets(&cbuff));
2130 RETURN_NOT_OK(load_tile_var_sizes(&cbuff));
2131 RETURN_NOT_OK(load_last_tile_cell_num(&cbuff));
2132 RETURN_NOT_OK(load_file_sizes(&cbuff));
2133 RETURN_NOT_OK(load_file_var_sizes(&cbuff));
2134 RETURN_NOT_OK(load_file_validity_sizes(&cbuff));
2135
2136 return Status::Ok();
2137 }
2138
load_v3_or_higher(const EncryptionKey & encryption_key,Buffer * f_buff,uint64_t offset,std::unordered_map<std::string,tiledb_shared_ptr<ArraySchema>> array_schemas)2139 Status FragmentMetadata::load_v3_or_higher(
2140 const EncryptionKey& encryption_key,
2141 Buffer* f_buff,
2142 uint64_t offset,
2143 std::unordered_map<std::string, tiledb_shared_ptr<ArraySchema>>
2144 array_schemas) {
2145 RETURN_NOT_OK(load_footer(encryption_key, f_buff, offset, array_schemas));
2146 return Status::Ok();
2147 }
2148
load_footer(const EncryptionKey & encryption_key,Buffer * f_buff,uint64_t offset,std::unordered_map<std::string,tiledb_shared_ptr<ArraySchema>> array_schemas)2149 Status FragmentMetadata::load_footer(
2150 const EncryptionKey& encryption_key,
2151 Buffer* f_buff,
2152 uint64_t offset,
2153 std::unordered_map<std::string, tiledb_shared_ptr<ArraySchema>>
2154 array_schemas) {
2155 (void)encryption_key; // Not used for now, perhaps in the future
2156 std::lock_guard<std::mutex> lock(mtx_);
2157
2158 if (loaded_metadata_.footer_)
2159 return Status::Ok();
2160
2161 Buffer buff;
2162 tdb_shared_ptr<ConstBuffer> cbuff = nullptr;
2163 if (f_buff == nullptr) {
2164 has_consolidated_footer_ = false;
2165 RETURN_NOT_OK(read_file_footer(&buff, &footer_offset_, &footer_size_));
2166 cbuff = tdb_make_shared(ConstBuffer, &buff);
2167 } else {
2168 footer_size_ = 0;
2169 footer_offset_ = offset;
2170 has_consolidated_footer_ = true;
2171 cbuff = tdb_make_shared(ConstBuffer, f_buff);
2172 cbuff->set_offset(offset);
2173 }
2174
2175 RETURN_NOT_OK(load_version(cbuff.get()));
2176 if (version_ >= 10) {
2177 RETURN_NOT_OK(load_array_schema_name(cbuff.get()));
2178 auto schema = array_schemas.find(array_schema_name_);
2179 if (schema != array_schemas.end()) {
2180 set_array_schema(schema->second.get());
2181 } else {
2182 return Status::FragmentMetadataError(
2183 "Could not find schema" + array_schema_name_ +
2184 " in map of schemas loaded.\n" +
2185 "Consider reloading the array to check for new array schemas.");
2186 }
2187 } else {
2188 // Pre-v10 format fragments we need to set the schema and schema name to
2189 // the "old" schema. This way "old" fragments are still loaded fine
2190 array_schema_name_ = tiledb::sm::constants::array_schema_filename;
2191 auto schema = array_schemas.find(array_schema_name_);
2192 if (schema != array_schemas.end()) {
2193 set_array_schema(schema->second.get());
2194 } else {
2195 return Status::FragmentMetadataError(
2196 "Could not find schema" + array_schema_name_ +
2197 " in map of schemas loaded.\n" +
2198 "Consider reloading the array to check for new array schemas.");
2199 }
2200 }
2201 RETURN_NOT_OK(load_dense(cbuff.get()));
2202 RETURN_NOT_OK(load_non_empty_domain(cbuff.get()));
2203 RETURN_NOT_OK(load_sparse_tile_num(cbuff.get()));
2204 RETURN_NOT_OK(load_last_tile_cell_num(cbuff.get()));
2205 RETURN_NOT_OK(load_file_sizes(cbuff.get()));
2206 RETURN_NOT_OK(load_file_var_sizes(cbuff.get()));
2207 RETURN_NOT_OK(load_file_validity_sizes(cbuff.get()));
2208
2209 unsigned num = array_schema_->attribute_num() + 1;
2210 num += (version_ >= 5) ? array_schema_->dim_num() : 0;
2211
2212 tile_offsets_.resize(num);
2213 tile_offsets_mtx_.resize(num);
2214 tile_var_offsets_.resize(num);
2215 tile_var_offsets_mtx_.resize(num);
2216 tile_var_sizes_.resize(num);
2217 tile_validity_offsets_.resize(num);
2218
2219 loaded_metadata_.tile_offsets_.resize(num, false);
2220 loaded_metadata_.tile_var_offsets_.resize(num, false);
2221 loaded_metadata_.tile_var_sizes_.resize(num, false);
2222 loaded_metadata_.tile_validity_offsets_.resize(num, false);
2223
2224 RETURN_NOT_OK(load_generic_tile_offsets(cbuff.get()));
2225
2226 loaded_metadata_.footer_ = true;
2227
2228 // If the footer_size is not set lets calculate from how much of the buffer we
2229 // read
2230 if (footer_size_ == 0)
2231 footer_size_ = cbuff->offset() - offset;
2232
2233 return Status::Ok();
2234 }
2235
2236 // ===== FORMAT =====
2237 // file_sizes#0 (uint64_t)
2238 // ...
2239 // file_sizes#{attribute_num+dim_num} (uint64_t)
write_file_sizes(Buffer * buff) const2240 Status FragmentMetadata::write_file_sizes(Buffer* buff) const {
2241 auto num = array_schema_->attribute_num() + array_schema_->dim_num() + 1;
2242 Status st = buff->write(&file_sizes_[0], num * sizeof(uint64_t));
2243 if (!st.ok()) {
2244 return LOG_STATUS(Status::FragmentMetadataError(
2245 "Cannot serialize fragment metadata; Writing file sizes failed"));
2246 }
2247
2248 return Status::Ok();
2249 }
2250
2251 // ===== FORMAT =====
2252 // file_var_sizes#0 (uint64_t)
2253 // ...
2254 // file_var_sizes#{attribute_num+dim_num} (uint64_t)
write_file_var_sizes(Buffer * buff) const2255 Status FragmentMetadata::write_file_var_sizes(Buffer* buff) const {
2256 auto num = array_schema_->attribute_num() + array_schema_->dim_num() + 1;
2257 Status st = buff->write(&file_var_sizes_[0], num * sizeof(uint64_t));
2258 if (!st.ok()) {
2259 return LOG_STATUS(Status::FragmentMetadataError(
2260 "Cannot serialize fragment metadata; Writing file sizes failed"));
2261 }
2262
2263 return Status::Ok();
2264 }
2265
2266 // ===== FORMAT =====
2267 // file_validity_sizes#0 (uint64_t)
2268 // ...
2269 // file_validity_sizes#{attribute_num+dim_num} (uint64_t)
write_file_validity_sizes(Buffer * buff) const2270 Status FragmentMetadata::write_file_validity_sizes(Buffer* buff) const {
2271 if (version_ <= 6)
2272 return Status::Ok();
2273
2274 auto num = array_schema_->attribute_num() + array_schema_->dim_num() + 1;
2275 Status st = buff->write(&file_validity_sizes_[0], num * sizeof(uint64_t));
2276 if (!st.ok()) {
2277 return LOG_STATUS(Status::FragmentMetadataError(
2278 "Cannot serialize fragment metadata; Writing file sizes failed"));
2279 }
2280
2281 return Status::Ok();
2282 }
2283
2284 // ===== FORMAT =====
2285 // rtree_offset(uint64_t)
2286 // tile_offsets_offset_0(uint64_t)
2287 // ...
2288 // tile_offsets_offset_{attr_num+dim_num}(uint64_t)
2289 // tile_var_offsets_0(uint64_t)
2290 // ...
2291 // tile_var_offsets_{attr_num+dim_num}(uint64_t)
2292 // tile_var_sizes_0(uint64_t)
2293 // ...
2294 // tile_var_sizes_{attr_num+dim_num}(uint64_t)
write_generic_tile_offsets(Buffer * buff) const2295 Status FragmentMetadata::write_generic_tile_offsets(Buffer* buff) const {
2296 auto num = array_schema_->attribute_num() + array_schema_->dim_num() + 1;
2297
2298 // Write R-Tree offset
2299 auto st = buff->write(>_offsets_.rtree_, sizeof(uint64_t));
2300 if (!st.ok()) {
2301 return LOG_STATUS(Status::FragmentMetadataError(
2302 "Cannot serialize fragment metadata; Writing R-Tree offset failed"));
2303 }
2304
2305 // Write tile offsets
2306 for (unsigned i = 0; i < num; ++i) {
2307 st = buff->write(>_offsets_.tile_offsets_[i], sizeof(uint64_t));
2308 if (!st.ok()) {
2309 return LOG_STATUS(Status::FragmentMetadataError(
2310 "Cannot serialize fragment metadata; Writing tile offsets failed"));
2311 }
2312 }
2313
2314 // Write tile var offsets
2315 for (unsigned i = 0; i < num; ++i) {
2316 st = buff->write(>_offsets_.tile_var_offsets_[i], sizeof(uint64_t));
2317 if (!st.ok()) {
2318 return LOG_STATUS(
2319 Status::FragmentMetadataError("Cannot serialize fragment metadata; "
2320 "Writing tile var offsets failed"));
2321 }
2322 }
2323
2324 // Write tile var sizes
2325 for (unsigned i = 0; i < num; ++i) {
2326 st = buff->write(>_offsets_.tile_var_sizes_[i], sizeof(uint64_t));
2327 if (!st.ok()) {
2328 return LOG_STATUS(Status::FragmentMetadataError(
2329 "Cannot serialize fragment metadata; Writing tile var sizes failed"));
2330 }
2331 }
2332
2333 // Write tile validity offsets
2334 if (version_ >= 7) {
2335 for (unsigned i = 0; i < num; ++i) {
2336 st =
2337 buff->write(>_offsets_.tile_validity_offsets_[i], sizeof(uint64_t));
2338 if (!st.ok()) {
2339 return LOG_STATUS(Status::FragmentMetadataError(
2340 "Cannot serialize fragment metadata; Writing tile offsets failed"));
2341 }
2342 }
2343 }
2344
2345 return Status::Ok();
2346 }
2347
write_array_schema_name(Buffer * buff) const2348 Status FragmentMetadata::write_array_schema_name(Buffer* buff) const {
2349 uint64_t size = array_schema_name_.size();
2350 if (size == 0) {
2351 return LOG_STATUS(Status::FragmentMetadataError(
2352 "Cannot write array schema name; Size of schema name is zero"));
2353 }
2354 RETURN_NOT_OK(buff->write(&size, sizeof(uint64_t)));
2355 return buff->write(array_schema_name_.c_str(), size);
2356 }
2357
2358 // ===== FORMAT =====
2359 // last_tile_cell_num(uint64_t)
write_last_tile_cell_num(Buffer * buff) const2360 Status FragmentMetadata::write_last_tile_cell_num(Buffer* buff) const {
2361 uint64_t cell_num_per_tile =
2362 dense_ ? array_schema_->domain()->cell_num_per_tile() :
2363 array_schema_->capacity();
2364
2365 // Handle the case of zero
2366 uint64_t last_tile_cell_num =
2367 (last_tile_cell_num_ == 0) ? cell_num_per_tile : last_tile_cell_num_;
2368
2369 Status st = buff->write(&last_tile_cell_num, sizeof(uint64_t));
2370 if (!st.ok()) {
2371 return LOG_STATUS(
2372 Status::FragmentMetadataError("Cannot serialize fragment metadata; "
2373 "Writing last tile cell number failed"));
2374 }
2375 return Status::Ok();
2376 }
2377
store_rtree(const EncryptionKey & encryption_key,uint64_t * nbytes)2378 Status FragmentMetadata::store_rtree(
2379 const EncryptionKey& encryption_key, uint64_t* nbytes) {
2380 Buffer buff;
2381 RETURN_NOT_OK(write_rtree(&buff));
2382
2383 RETURN_NOT_OK(
2384 write_generic_tile_to_file(encryption_key, std::move(buff), nbytes));
2385 storage_manager_->stats()->add_counter("write_rtree_size", *nbytes);
2386
2387 return Status::Ok();
2388 }
2389
write_rtree(Buffer * buff)2390 Status FragmentMetadata::write_rtree(Buffer* buff) {
2391 RETURN_NOT_OK(rtree_.build_tree());
2392 RETURN_NOT_OK(rtree_.serialize(buff));
2393 return Status::Ok();
2394 }
2395
2396 // ===== FORMAT =====
2397 // null_non_empty_domain(char)
2398 // fix-sized: range(void*)
2399 // var-sized: range_size(uint64_t) | start_range_size(uint64_t) | range(void*)
2400 // ...
write_non_empty_domain(Buffer * buff) const2401 Status FragmentMetadata::write_non_empty_domain(Buffer* buff) const {
2402 // Write null_non_empty_domain
2403 auto null_non_empty_domain = (char)non_empty_domain_.empty();
2404 RETURN_NOT_OK(buff->write(&null_non_empty_domain, sizeof(char)));
2405
2406 // Write domain size
2407 uint64_t domain_size = 0;
2408 auto domain = array_schema_->domain();
2409 auto dim_num = domain->dim_num();
2410 if (non_empty_domain_.empty()) {
2411 // Applicable only to homogeneous domains with fixed-sized types
2412 assert(domain->all_dims_fixed());
2413 assert(domain->all_dims_same_type());
2414 domain_size = 2 * dim_num * domain->dimension(0)->coord_size();
2415
2416 // Write domain (dummy values)
2417 std::vector<uint8_t> d(domain_size, 0);
2418 RETURN_NOT_OK(buff->write(&d[0], domain_size));
2419 } else {
2420 // Write non-empty domain
2421 for (unsigned d = 0; d < dim_num; ++d) {
2422 auto dim = domain->dimension(d);
2423 const auto& r = non_empty_domain_[d];
2424 if (!dim->var_size()) { // Fixed-sized
2425 RETURN_NOT_OK(buff->write(r.data(), r.size()));
2426 } else { // Var-sized
2427 auto r_size = r.size();
2428 auto r_start_size = r.start_size();
2429 RETURN_NOT_OK(buff->write(&r_size, sizeof(uint64_t)));
2430 RETURN_NOT_OK(buff->write(&r_start_size, sizeof(uint64_t)));
2431 RETURN_NOT_OK(buff->write(r.data(), r_size));
2432 }
2433 }
2434 }
2435
2436 return Status::Ok();
2437 }
2438
read_generic_tile_from_file(const EncryptionKey & encryption_key,uint64_t offset,Buffer * buff) const2439 Status FragmentMetadata::read_generic_tile_from_file(
2440 const EncryptionKey& encryption_key, uint64_t offset, Buffer* buff) const {
2441 URI fragment_metadata_uri = fragment_uri_.join_path(
2442 std::string(constants::fragment_metadata_filename));
2443
2444 // Read metadata
2445 GenericTileIO tile_io(storage_manager_, fragment_metadata_uri);
2446 Tile* tile = nullptr;
2447 RETURN_NOT_OK(tile_io.read_generic(
2448 &tile, offset, encryption_key, storage_manager_->config()));
2449
2450 const auto buffer = tile->buffer();
2451 buff->realloc(buffer->size());
2452 buff->set_size(buffer->size());
2453 buffer->reset_offset();
2454 RETURN_NOT_OK_ELSE(
2455 buffer->read(buff->data(), buff->size()), tdb_delete(tile));
2456 tdb_delete(tile);
2457
2458 return Status::Ok();
2459 }
2460
read_file_footer(Buffer * buff,uint64_t * footer_offset,uint64_t * footer_size) const2461 Status FragmentMetadata::read_file_footer(
2462 Buffer* buff, uint64_t* footer_offset, uint64_t* footer_size) const {
2463 URI fragment_metadata_uri = fragment_uri_.join_path(
2464 std::string(constants::fragment_metadata_filename));
2465
2466 // Get footer offset
2467 RETURN_NOT_OK(get_footer_offset_and_size(footer_offset, footer_size));
2468
2469 storage_manager_->stats()->add_counter("read_frag_meta_size", *footer_size);
2470
2471 auto memory_tracker = storage_manager_->array_memory_tracker(array_uri_);
2472 assert(memory_tracker);
2473 if (!memory_tracker->take_memory(*footer_size)) {
2474 return LOG_STATUS(Status::FragmentMetadataError(
2475 "Cannot load file footer; Insufficient memory budget"));
2476 }
2477
2478 // Read footer
2479 return storage_manager_->read(
2480 fragment_metadata_uri, *footer_offset, buff, *footer_size);
2481 }
2482
write_generic_tile_to_file(const EncryptionKey & encryption_key,Buffer && buff,uint64_t * nbytes) const2483 Status FragmentMetadata::write_generic_tile_to_file(
2484 const EncryptionKey& encryption_key,
2485 Buffer&& buff,
2486 uint64_t* nbytes) const {
2487 URI fragment_metadata_uri = fragment_uri_.join_path(
2488 std::string(constants::fragment_metadata_filename));
2489
2490 Buffer* const buffer = tdb_new(Buffer);
2491 buffer->swap(buff);
2492
2493 Tile tile(
2494 constants::generic_tile_datatype,
2495 constants::generic_tile_cell_size,
2496 0,
2497 buffer,
2498 true);
2499
2500 GenericTileIO tile_io(storage_manager_, fragment_metadata_uri);
2501 RETURN_NOT_OK(tile_io.write_generic(&tile, encryption_key, nbytes));
2502
2503 return Status::Ok();
2504 }
2505
write_footer_to_file(Buffer * buff) const2506 Status FragmentMetadata::write_footer_to_file(Buffer* buff) const {
2507 URI fragment_metadata_uri = fragment_uri_.join_path(
2508 std::string(constants::fragment_metadata_filename));
2509
2510 auto size = buff->size();
2511 RETURN_NOT_OK(storage_manager_->write(
2512 fragment_metadata_uri, buff->data(), buff->size()));
2513
2514 // Write the size in the end if there is at least one var-sized dimension
2515 if (!array_schema_->domain()->all_dims_fixed() || version_ >= 10)
2516 return storage_manager_->write(fragment_metadata_uri, &size, sizeof(size));
2517 return Status::Ok();
2518 }
2519
store_tile_offsets(unsigned idx,const EncryptionKey & encryption_key,uint64_t * nbytes)2520 Status FragmentMetadata::store_tile_offsets(
2521 unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes) {
2522 Buffer buff;
2523 RETURN_NOT_OK(write_tile_offsets(idx, &buff));
2524 RETURN_NOT_OK(
2525 write_generic_tile_to_file(encryption_key, std::move(buff), nbytes));
2526
2527 storage_manager_->stats()->add_counter("write_tile_offsets_size", *nbytes);
2528
2529 return Status::Ok();
2530 }
2531
write_tile_offsets(unsigned idx,Buffer * buff)2532 Status FragmentMetadata::write_tile_offsets(unsigned idx, Buffer* buff) {
2533 Status st;
2534
2535 // Write number of tile offsets
2536 uint64_t tile_offsets_num = tile_offsets_[idx].size();
2537 st = buff->write(&tile_offsets_num, sizeof(uint64_t));
2538 if (!st.ok()) {
2539 return LOG_STATUS(Status::FragmentMetadataError(
2540 "Cannot serialize fragment metadata; Writing number of tile offsets "
2541 "failed"));
2542 }
2543
2544 // Write tile offsets
2545 if (tile_offsets_num != 0) {
2546 st = buff->write(
2547 &tile_offsets_[idx][0], tile_offsets_num * sizeof(uint64_t));
2548 if (!st.ok()) {
2549 return LOG_STATUS(Status::FragmentMetadataError(
2550 "Cannot serialize fragment metadata; Writing tile offsets failed"));
2551 }
2552 }
2553
2554 return Status::Ok();
2555 }
2556
store_tile_var_offsets(unsigned idx,const EncryptionKey & encryption_key,uint64_t * nbytes)2557 Status FragmentMetadata::store_tile_var_offsets(
2558 unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes) {
2559 Buffer buff;
2560 RETURN_NOT_OK(write_tile_var_offsets(idx, &buff));
2561 RETURN_NOT_OK(
2562 write_generic_tile_to_file(encryption_key, std::move(buff), nbytes));
2563
2564 storage_manager_->stats()->add_counter(
2565 "write_tile_var_offsets_size", *nbytes);
2566
2567 return Status::Ok();
2568 }
2569
write_tile_var_offsets(unsigned idx,Buffer * buff)2570 Status FragmentMetadata::write_tile_var_offsets(unsigned idx, Buffer* buff) {
2571 Status st;
2572
2573 // Write tile offsets for each attribute
2574 // Write number of offsets
2575 uint64_t tile_var_offsets_num = tile_var_offsets_[idx].size();
2576 st = buff->write(&tile_var_offsets_num, sizeof(uint64_t));
2577 if (!st.ok()) {
2578 return LOG_STATUS(Status::FragmentMetadataError(
2579 "Cannot serialize fragment metadata; Writing number of "
2580 "variable tile offsets failed"));
2581 }
2582
2583 // Write tile offsets
2584 if (tile_var_offsets_num != 0) {
2585 st = buff->write(
2586 &tile_var_offsets_[idx][0], tile_var_offsets_num * sizeof(uint64_t));
2587 if (!st.ok()) {
2588 return LOG_STATUS(Status::FragmentMetadataError(
2589 "Cannot serialize fragment metadata; Writing "
2590 "variable tile offsets failed"));
2591 }
2592 }
2593
2594 return Status::Ok();
2595 }
2596
store_tile_var_sizes(unsigned idx,const EncryptionKey & encryption_key,uint64_t * nbytes)2597 Status FragmentMetadata::store_tile_var_sizes(
2598 unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes) {
2599 Buffer buff;
2600 RETURN_NOT_OK(write_tile_var_sizes(idx, &buff));
2601 RETURN_NOT_OK(
2602 write_generic_tile_to_file(encryption_key, std::move(buff), nbytes));
2603
2604 storage_manager_->stats()->add_counter("write_tile_var_sizes_size", *nbytes);
2605
2606 return Status::Ok();
2607 }
2608
write_tile_var_sizes(unsigned idx,Buffer * buff)2609 Status FragmentMetadata::write_tile_var_sizes(unsigned idx, Buffer* buff) {
2610 Status st;
2611
2612 // Write number of sizes
2613 uint64_t tile_var_sizes_num = tile_var_sizes_[idx].size();
2614 st = buff->write(&tile_var_sizes_num, sizeof(uint64_t));
2615 if (!st.ok()) {
2616 return LOG_STATUS(Status::FragmentMetadataError(
2617 "Cannot serialize fragment metadata; Writing number of "
2618 "variable tile sizes failed"));
2619 }
2620
2621 // Write tile sizes
2622 if (tile_var_sizes_num != 0) {
2623 st = buff->write(
2624 &tile_var_sizes_[idx][0], tile_var_sizes_num * sizeof(uint64_t));
2625 if (!st.ok()) {
2626 return LOG_STATUS(
2627 Status::FragmentMetadataError("Cannot serialize fragment metadata; "
2628 "Writing variable tile sizes failed"));
2629 }
2630 }
2631 return Status::Ok();
2632 }
2633
store_tile_validity_offsets(unsigned idx,const EncryptionKey & encryption_key,uint64_t * nbytes)2634 Status FragmentMetadata::store_tile_validity_offsets(
2635 unsigned idx, const EncryptionKey& encryption_key, uint64_t* nbytes) {
2636 Buffer buff;
2637 RETURN_NOT_OK(write_tile_validity_offsets(idx, &buff));
2638 RETURN_NOT_OK(
2639 write_generic_tile_to_file(encryption_key, std::move(buff), nbytes));
2640
2641 storage_manager_->stats()->add_counter(
2642 "write_tile_validity_offsets_size", *nbytes);
2643
2644 return Status::Ok();
2645 }
2646
write_tile_validity_offsets(unsigned idx,Buffer * buff)2647 Status FragmentMetadata::write_tile_validity_offsets(
2648 unsigned idx, Buffer* buff) {
2649 Status st;
2650
2651 // Write number of tile offsets
2652 uint64_t tile_validity_offsets_num = tile_validity_offsets_[idx].size();
2653 st = buff->write(&tile_validity_offsets_num, sizeof(uint64_t));
2654 if (!st.ok()) {
2655 return LOG_STATUS(
2656 Status::FragmentMetadataError("Cannot serialize fragment metadata; "
2657 "Writing number of validity tile offsets "
2658 "failed"));
2659 }
2660
2661 // Write tile offsets
2662 if (tile_validity_offsets_num != 0) {
2663 st = buff->write(
2664 &tile_validity_offsets_[idx][0],
2665 tile_validity_offsets_num * sizeof(uint64_t));
2666 if (!st.ok()) {
2667 return LOG_STATUS(Status::FragmentMetadataError(
2668 "Cannot serialize fragment metadata; Writing tile offsets failed"));
2669 }
2670 }
2671
2672 return Status::Ok();
2673 }
2674
write_version(Buffer * buff) const2675 Status FragmentMetadata::write_version(Buffer* buff) const {
2676 RETURN_NOT_OK(buff->write(&version_, sizeof(uint32_t)));
2677 return Status::Ok();
2678 }
2679
write_dense(Buffer * buff) const2680 Status FragmentMetadata::write_dense(Buffer* buff) const {
2681 RETURN_NOT_OK(buff->write(&dense_, sizeof(char)));
2682 return Status::Ok();
2683 }
2684
write_sparse_tile_num(Buffer * buff) const2685 Status FragmentMetadata::write_sparse_tile_num(Buffer* buff) const {
2686 RETURN_NOT_OK(buff->write(&sparse_tile_num_, sizeof(uint64_t)));
2687 return Status::Ok();
2688 }
2689
store_footer(const EncryptionKey & encryption_key)2690 Status FragmentMetadata::store_footer(const EncryptionKey& encryption_key) {
2691 (void)encryption_key; // Not used for now, maybe in the future
2692
2693 Buffer buff;
2694 RETURN_NOT_OK(write_footer(&buff));
2695 RETURN_NOT_OK(write_footer_to_file(&buff));
2696
2697 storage_manager_->stats()->add_counter(
2698 "write_frag_meta_footer_size", buff.size());
2699
2700 return Status::Ok();
2701 }
2702
clean_up()2703 void FragmentMetadata::clean_up() {
2704 auto fragment_metadata_uri =
2705 fragment_uri_.join_path(constants::fragment_metadata_filename);
2706
2707 storage_manager_->close_file(fragment_metadata_uri);
2708 storage_manager_->vfs()->remove_file(fragment_metadata_uri);
2709 storage_manager_->array_xunlock(array_uri_);
2710 }
2711
array_schema() const2712 const ArraySchema* FragmentMetadata::array_schema() const {
2713 return array_schema_;
2714 }
2715
build_idx_map()2716 void FragmentMetadata::build_idx_map() {
2717 idx_map_.clear();
2718
2719 auto attributes = array_schema_->attributes();
2720 for (unsigned i = 0; i < attributes.size(); ++i) {
2721 auto attr_name = attributes[i]->name();
2722 idx_map_[attr_name] = i;
2723 }
2724 idx_map_[constants::coords] = array_schema_->attribute_num();
2725 for (unsigned i = 0; i < array_schema_->dim_num(); ++i) {
2726 auto dim_name = array_schema_->dimension(i)->name();
2727 idx_map_[dim_name] = array_schema_->attribute_num() + 1 + i;
2728 }
2729 }
2730
2731 // Explicit template instantiations
2732 template std::vector<std::pair<uint64_t, double>>
2733 FragmentMetadata::compute_overlapping_tile_ids_cov<int8_t>(
2734 const int8_t* subarray) const;
2735 template std::vector<std::pair<uint64_t, double>>
2736 FragmentMetadata::compute_overlapping_tile_ids_cov<uint8_t>(
2737 const uint8_t* subarray) const;
2738 template std::vector<std::pair<uint64_t, double>>
2739 FragmentMetadata::compute_overlapping_tile_ids_cov<int16_t>(
2740 const int16_t* subarray) const;
2741 template std::vector<std::pair<uint64_t, double>>
2742 FragmentMetadata::compute_overlapping_tile_ids_cov<uint16_t>(
2743 const uint16_t* subarray) const;
2744 template std::vector<std::pair<uint64_t, double>>
2745 FragmentMetadata::compute_overlapping_tile_ids_cov<int32_t>(
2746 const int32_t* subarray) const;
2747 template std::vector<std::pair<uint64_t, double>>
2748 FragmentMetadata::compute_overlapping_tile_ids_cov<uint32_t>(
2749 const uint32_t* subarray) const;
2750 template std::vector<std::pair<uint64_t, double>>
2751 FragmentMetadata::compute_overlapping_tile_ids_cov<int64_t>(
2752 const int64_t* subarray) const;
2753 template std::vector<std::pair<uint64_t, double>>
2754 FragmentMetadata::compute_overlapping_tile_ids_cov<uint64_t>(
2755 const uint64_t* subarray) const;
2756 template std::vector<std::pair<uint64_t, double>>
2757 FragmentMetadata::compute_overlapping_tile_ids_cov<float>(
2758 const float* subarray) const;
2759 template std::vector<std::pair<uint64_t, double>>
2760 FragmentMetadata::compute_overlapping_tile_ids_cov<double>(
2761 const double* subarray) const;
2762
2763 } // namespace sm
2764 } // namespace tiledb
2765