1 /**
2 * @file info_command.cc
3 *
4 * @section LICENSE
5 *
6 * The MIT License
7 *
8 * @copyright Copyright (c) 2018-2021 TileDB, Inc.
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 *
28 * @section DESCRIPTION
29 *
30 * This file defines the info command.
31 */
32
33 #include "commands/info_command.h"
34 #include "misc/common.h"
35
36 #include "tiledb/common/logger.h"
37 #include "tiledb/sm/array/array.h"
38 #include "tiledb/sm/array_schema/array_schema.h"
39 #include "tiledb/sm/array_schema/attribute.h"
40 #include "tiledb/sm/array_schema/dimension.h"
41 #include "tiledb/sm/crypto/encryption_key.h"
42 #include "tiledb/sm/enums/encryption_type.h"
43 #include "tiledb/sm/enums/query_type.h"
44 #include "tiledb/sm/fragment/fragment_metadata.h"
45 #include "tiledb/sm/storage_manager/storage_manager.h"
46
47 #include <cassert>
48 #include <fstream>
49 #include <iostream>
50 #include <sstream>
51
52 namespace tiledb {
53 namespace cli {
54
55 using namespace tiledb::sm;
56
57 /** The thread pool for compute-bound tasks. */
58 ThreadPool compute_tp_;
59
60 /** The thread pool for io-bound tasks. */
61 ThreadPool io_tp_;
62
get_cli()63 clipp::group InfoCommand::get_cli() {
64 using namespace clipp;
65 auto array_arg =
66 ((option("-a", "--array").required(true) & value("uri", array_uri_)) %
67 "URI of TileDB array");
68
69 auto schema_info =
70 "array-schema: Prints basic information about the array's schema." %
71 (command("array-schema").set(type_, InfoType::ArraySchema), array_arg);
72
73 auto tile_sizes =
74 "tile-sizes: Prints statistics about tile sizes in the array." %
75 (command("tile-sizes").set(type_, InfoType::TileSizes), array_arg);
76
77 auto svg_mbrs =
78 "svg-mbrs: Produces an SVG visualizing the MBRs (2D arrays only)" %
79 (command("svg-mbrs").set(type_, InfoType::SVGMBRs),
80 array_arg,
81 option("-o", "--output").doc("Path to write output SVG") &
82 value("path", output_path_),
83 option("-w", "--width").doc("Width of output SVG") &
84 value("N", svg_width_),
85 option("-h", "--height").doc("Height of output SVG") &
86 value("N", svg_height_));
87
88 auto dump_mbrs =
89 "dump-mbrs: Dumps the MBRs in the array to text output." %
90 (command("dump-mbrs").set(type_, InfoType::DumpMBRs),
91 array_arg,
92 option("-o", "--output").doc("Path to write output text file") &
93 value("path", output_path_));
94
95 auto cli = schema_info | tile_sizes | dump_mbrs | svg_mbrs;
96 return cli;
97 }
98
run()99 void InfoCommand::run() {
100 io_tp_.init(std::thread::hardware_concurrency());
101 compute_tp_.init(std::thread::hardware_concurrency());
102
103 switch (type_) {
104 case InfoType::None:
105 break;
106 case InfoType::TileSizes:
107 print_tile_sizes();
108 break;
109 case InfoType::SVGMBRs:
110 write_svg_mbrs();
111 break;
112 case InfoType::DumpMBRs:
113 write_text_mbrs();
114 break;
115 case InfoType::ArraySchema:
116 print_schema_info();
117 break;
118 }
119 }
120
print_tile_sizes() const121 void InfoCommand::print_tile_sizes() const {
122 stats::Stats stats("");
123 StorageManager sm(&compute_tp_, &io_tp_, &stats, tdb_make_shared(Logger, ""));
124 THROW_NOT_OK(sm.init(nullptr));
125
126 // Open the array
127 URI uri(array_uri_);
128 Array array(uri, &sm);
129 THROW_NOT_OK(
130 array.open(QueryType::READ, EncryptionType::NO_ENCRYPTION, nullptr, 0));
131 EncryptionKey enc_key;
132
133 // Compute and report mean persisted tile sizes over all attributes.
134 const auto* schema = array.array_schema_latest();
135 auto fragment_metadata = array.fragment_metadata();
136 auto attributes = schema->attributes();
137 uint64_t total_persisted_size = 0, total_in_memory_size = 0;
138
139 // Helper function for processing each attribute.
140 auto process_attr = [&](const std::string& name, bool var_size) {
141 uint64_t persisted_tile_size = 0, in_memory_tile_size = 0;
142 uint64_t num_tiles = 0;
143 for (const auto& f : fragment_metadata) {
144 uint64_t tile_num = f->tile_num();
145 std::vector<std::string> names;
146 names.push_back(name);
147 THROW_NOT_OK(f->load_tile_offsets(enc_key, std::move(names)));
148 THROW_NOT_OK(f->load_tile_var_sizes(enc_key, name));
149 for (uint64_t tile_idx = 0; tile_idx < tile_num; tile_idx++) {
150 uint64_t tile_size = 0;
151 THROW_NOT_OK(f->persisted_tile_size(name, tile_idx, &tile_size));
152 persisted_tile_size += tile_size;
153 in_memory_tile_size += f->tile_size(name, tile_idx);
154 num_tiles++;
155 if (var_size) {
156 THROW_NOT_OK(f->persisted_tile_var_size(name, tile_idx, &tile_size));
157 persisted_tile_size += tile_size;
158 THROW_NOT_OK(f->tile_var_size(name, tile_idx, &tile_size));
159 in_memory_tile_size += tile_size;
160 num_tiles++;
161 }
162 }
163 }
164 total_persisted_size += persisted_tile_size;
165 total_in_memory_size += in_memory_tile_size;
166
167 std::cout << "- " << name << " (" << num_tiles << " tiles):" << std::endl;
168 std::cout << " Total persisted tile size: " << persisted_tile_size
169 << " bytes." << std::endl;
170 std::cout << " Total in-memory tile size: " << in_memory_tile_size
171 << " bytes." << std::endl;
172 };
173
174 // Print header
175 std::cout << "Array URI: " << uri.to_string() << std::endl;
176 std::cout << "Tile stats (per attribute):" << std::endl;
177
178 // Dump info about coords for sparse arrays.
179 if (!schema->dense())
180 process_attr(constants::coords, false);
181
182 // Dump info about the rest of the attributes
183 for (const auto* attr : attributes)
184 process_attr(attr->name(), attr->var_size());
185
186 std::cout << "Sum of attribute persisted size: " << total_persisted_size
187 << " bytes." << std::endl;
188 std::cout << "Sum of attribute in-memory size: " << total_in_memory_size
189 << " bytes." << std::endl;
190
191 // Close the array.
192 THROW_NOT_OK(array.close());
193 }
194
print_schema_info() const195 void InfoCommand::print_schema_info() const {
196 stats::Stats stats("");
197 StorageManager sm(&compute_tp_, &io_tp_, &stats, tdb_make_shared(Logger, ""));
198 THROW_NOT_OK(sm.init(nullptr));
199
200 // Open the array
201 URI uri(array_uri_);
202 Array array(uri, &sm);
203 THROW_NOT_OK(
204 array.open(QueryType::READ, EncryptionType::NO_ENCRYPTION, nullptr, 0));
205
206 array.array_schema_latest()->dump(stdout);
207
208 // Close the array.
209 THROW_NOT_OK(array.close());
210 }
211
write_svg_mbrs() const212 void InfoCommand::write_svg_mbrs() const {
213 stats::Stats stats("");
214 StorageManager sm(&compute_tp_, &io_tp_, &stats, tdb_make_shared(Logger, ""));
215 THROW_NOT_OK(sm.init(nullptr));
216
217 // Open the array
218 URI uri(array_uri_);
219 Array array(uri, &sm);
220 THROW_NOT_OK(
221 array.open(QueryType::READ, EncryptionType::NO_ENCRYPTION, nullptr, 0));
222
223 const auto* schema = array.array_schema_latest();
224 auto dim_num = schema->dim_num();
225 if (dim_num < 2) {
226 THROW_NOT_OK(array.close());
227 throw std::runtime_error("SVG MBRs only supported for >1D arrays.");
228 }
229
230 std::vector<std::tuple<double, double, double, double>> mbr_rects;
231 double min_x = std::numeric_limits<double>::max(),
232 max_x = std::numeric_limits<double>::min(),
233 min_y = std::numeric_limits<double>::max(),
234 max_y = std::numeric_limits<double>::min();
235 auto fragment_metadata = array.fragment_metadata();
236 for (const auto& f : fragment_metadata) {
237 const auto& mbrs = f->mbrs();
238 for (const auto& mbr : mbrs) {
239 auto tup = get_mbr(mbr, schema->domain());
240 min_x = std::min(min_x, std::get<0>(tup));
241 min_y = std::min(min_y, std::get<1>(tup));
242 max_x = std::max(max_x, std::get<0>(tup) + std::get<2>(tup));
243 max_y = std::max(max_y, std::get<1>(tup) + std::get<3>(tup));
244 mbr_rects.push_back(tup);
245 }
246 }
247
248 const double coord_width = max_x - min_x + 1;
249 const double coord_height = max_y - min_y + 1;
250 const double scale_x = svg_width_ / coord_width;
251 const double scale_y = svg_height_ / coord_height;
252 std::stringstream svg;
253 svg << "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n"
254 << "<svg version=\"1.1\" xmlns=\"http://www.w3.org/2000/svg\" "
255 "xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\""
256 << (svg_width_) << "px\" height=\"" << (svg_height_) << "px\" >\n";
257 svg << "<g>\n";
258 const uint16_t g_inc = std::max<uint16_t>(
259 1, static_cast<uint16_t>((size_t)0xff / mbr_rects.size()));
260 uint32_t r = 0, g = 0, b = 0xff;
261 for (const auto& tup : mbr_rects) {
262 double x = scale_x * (std::get<0>(tup) - min_x);
263 double y = scale_y * (std::get<1>(tup) - min_y);
264 double width = scale_x * std::get<2>(tup);
265 double height = scale_y * std::get<3>(tup);
266 svg << " <rect x=\"" << x << "\" y=\"" << y << "\" width=\"" << width
267 << "\" height=\"" << height << "\" "
268 << "style=\"fill:rgb(" << r << ", " << g << ", " << b
269 << ");stroke:none;fill-opacity:0.5\" "
270 "/>\n";
271 g = (g + g_inc) % 0xff;
272 }
273 svg << "</g>\n";
274 svg << "</svg>";
275
276 if (output_path_.empty()) {
277 std::cout << svg.str() << std::endl;
278 } else {
279 std::ofstream os(output_path_, std::ios::out | std::ios::trunc);
280 os << svg.str() << std::endl;
281 }
282
283 // Close the array.
284 THROW_NOT_OK(array.close());
285 }
286
write_text_mbrs() const287 void InfoCommand::write_text_mbrs() const {
288 stats::Stats stats("");
289 StorageManager sm(&compute_tp_, &io_tp_, &stats, tdb_make_shared(Logger, ""));
290 THROW_NOT_OK(sm.init(nullptr));
291
292 // Open the array
293 URI uri(array_uri_);
294 Array array(uri, &sm);
295 THROW_NOT_OK(
296 array.open(QueryType::READ, EncryptionType::NO_ENCRYPTION, nullptr, 0));
297
298 auto encryption_key = array.encryption_key();
299 const auto* schema = array.array_schema_latest();
300 auto dim_num = schema->dim_num();
301 auto fragment_metadata = array.fragment_metadata();
302 std::stringstream text;
303 for (const auto& f : fragment_metadata) {
304 f->load_rtree(*encryption_key);
305 const auto& mbrs = f->mbrs();
306 for (const auto& mbr : mbrs) {
307 auto str_mbr = mbr_to_string(mbr, schema->domain());
308 for (unsigned i = 0; i < dim_num; i++) {
309 text << str_mbr[2 * i + 0] << "," << str_mbr[2 * i + 1];
310 if (i < dim_num - 1)
311 text << "\t";
312 }
313 text << std::endl;
314 }
315 }
316
317 if (output_path_.empty()) {
318 std::cout << text.str() << std::endl;
319 } else {
320 std::ofstream os(output_path_, std::ios::out | std::ios::trunc);
321 os << text.str() << std::endl;
322 }
323
324 // Close the array.
325 THROW_NOT_OK(array.close());
326 }
327
get_mbr(const NDRange & mbr,const Domain * domain) const328 std::tuple<double, double, double, double> InfoCommand::get_mbr(
329 const NDRange& mbr, const Domain* domain) const {
330 assert(domain->dim_num() == 2);
331 double x, y, width, height;
332
333 // First dimension
334 auto d1_type = domain->dimension(0)->type();
335 switch (d1_type) {
336 case Datatype::INT8:
337 y = static_cast<const int8_t*>(mbr[0].data())[0];
338 height = static_cast<const int8_t*>(mbr[0].data())[1] - y + 1;
339 break;
340 case Datatype::UINT8:
341 y = static_cast<const uint8_t*>(mbr[0].data())[0];
342 height = static_cast<const uint8_t*>(mbr[0].data())[1] - y + 1;
343 break;
344 case Datatype::INT16:
345 y = static_cast<const int16_t*>(mbr[0].data())[0];
346 height = static_cast<const int16_t*>(mbr[0].data())[1] - y + 1;
347 break;
348 case Datatype::UINT16:
349 y = static_cast<const uint16_t*>(mbr[0].data())[0];
350 height = static_cast<const uint16_t*>(mbr[0].data())[1] - y + 1;
351 break;
352 case Datatype::INT32:
353 y = static_cast<const int32_t*>(mbr[0].data())[0];
354 height = static_cast<const int32_t*>(mbr[0].data())[1] - y + 1;
355 break;
356 case Datatype::UINT32:
357 y = static_cast<const uint32_t*>(mbr[0].data())[0];
358 height = static_cast<const uint32_t*>(mbr[0].data())[1] - y + 1;
359 break;
360 case Datatype::INT64:
361 y = static_cast<const int64_t*>(mbr[0].data())[0];
362 height = static_cast<const int64_t*>(mbr[0].data())[1] - y + 1;
363 break;
364 case Datatype::UINT64:
365 y = static_cast<const uint64_t*>(mbr[0].data())[0];
366 height = static_cast<const uint64_t*>(mbr[0].data())[1] - y + 1;
367 break;
368 case Datatype::FLOAT32:
369 y = static_cast<const float*>(mbr[0].data())[0];
370 height = static_cast<const float*>(mbr[0].data())[1] - y + 1;
371 break;
372 case Datatype::FLOAT64:
373 y = static_cast<const double*>(mbr[0].data())[0];
374 height = static_cast<const double*>(mbr[0].data())[1] - y + 1;
375 break;
376 case Datatype::DATETIME_YEAR:
377 case Datatype::DATETIME_MONTH:
378 case Datatype::DATETIME_WEEK:
379 case Datatype::DATETIME_DAY:
380 case Datatype::DATETIME_HR:
381 case Datatype::DATETIME_MIN:
382 case Datatype::DATETIME_SEC:
383 case Datatype::DATETIME_MS:
384 case Datatype::DATETIME_US:
385 case Datatype::DATETIME_NS:
386 case Datatype::DATETIME_PS:
387 case Datatype::DATETIME_FS:
388 case Datatype::DATETIME_AS:
389 case Datatype::TIME_HR:
390 case Datatype::TIME_MIN:
391 case Datatype::TIME_SEC:
392 case Datatype::TIME_MS:
393 case Datatype::TIME_US:
394 case Datatype::TIME_NS:
395 case Datatype::TIME_PS:
396 case Datatype::TIME_FS:
397 case Datatype::TIME_AS:
398 y = static_cast<const int64_t*>(mbr[0].data())[0];
399 height = static_cast<const int64_t*>(mbr[0].data())[1] - y + 1;
400 break;
401 default:
402 throw std::invalid_argument(
403 "Cannot get MBR; Unsupported coordinates type");
404 }
405
406 // Second dimension
407 auto d2_type = domain->dimension(1)->type();
408 switch (d2_type) {
409 case Datatype::INT8:
410 x = static_cast<const int8_t*>(mbr[1].data())[0];
411 width = static_cast<const int8_t*>(mbr[1].data())[1] - x + 1;
412 break;
413 case Datatype::UINT8:
414 x = static_cast<const uint8_t*>(mbr[1].data())[0];
415 width = static_cast<const uint8_t*>(mbr[1].data())[1] - x + 1;
416 break;
417 case Datatype::INT16:
418 x = static_cast<const int16_t*>(mbr[1].data())[0];
419 width = static_cast<const int16_t*>(mbr[1].data())[1] - x + 1;
420 break;
421 case Datatype::UINT16:
422 x = static_cast<const uint16_t*>(mbr[1].data())[0];
423 width = static_cast<const uint16_t*>(mbr[1].data())[1] - x + 1;
424 break;
425 case Datatype::INT32:
426 x = static_cast<const int32_t*>(mbr[1].data())[0];
427 width = static_cast<const int32_t*>(mbr[1].data())[1] - x + 1;
428 break;
429 case Datatype::UINT32:
430 x = static_cast<const uint32_t*>(mbr[1].data())[0];
431 width = static_cast<const uint32_t*>(mbr[1].data())[1] - x + 1;
432 break;
433 case Datatype::INT64:
434 x = static_cast<const int64_t*>(mbr[1].data())[0];
435 width = static_cast<const int64_t*>(mbr[1].data())[1] - x + 1;
436 break;
437 case Datatype::UINT64:
438 x = static_cast<const uint64_t*>(mbr[1].data())[0];
439 width = static_cast<const uint64_t*>(mbr[1].data())[1] - x + 1;
440 break;
441 case Datatype::FLOAT32:
442 x = static_cast<const float*>(mbr[1].data())[0];
443 width = static_cast<const float*>(mbr[1].data())[1] - x + 1;
444 break;
445 case Datatype::FLOAT64:
446 x = static_cast<const double*>(mbr[1].data())[0];
447 width = static_cast<const double*>(mbr[1].data())[1] - x + 1;
448 break;
449 case Datatype::DATETIME_YEAR:
450 case Datatype::DATETIME_MONTH:
451 case Datatype::DATETIME_WEEK:
452 case Datatype::DATETIME_DAY:
453 case Datatype::DATETIME_HR:
454 case Datatype::DATETIME_MIN:
455 case Datatype::DATETIME_SEC:
456 case Datatype::DATETIME_MS:
457 case Datatype::DATETIME_US:
458 case Datatype::DATETIME_NS:
459 case Datatype::DATETIME_PS:
460 case Datatype::DATETIME_FS:
461 case Datatype::DATETIME_AS:
462 case Datatype::TIME_HR:
463 case Datatype::TIME_MIN:
464 case Datatype::TIME_SEC:
465 case Datatype::TIME_MS:
466 case Datatype::TIME_US:
467 case Datatype::TIME_NS:
468 case Datatype::TIME_PS:
469 case Datatype::TIME_FS:
470 case Datatype::TIME_AS:
471 x = static_cast<const int64_t*>(mbr[1].data())[0];
472 width = static_cast<const int64_t*>(mbr[1].data())[1] - x + 1;
473 break;
474 default:
475 throw std::invalid_argument(
476 "Cannot get MBR; Unsupported coordinates type");
477 }
478
479 return std::make_tuple(x, y, width, height);
480 }
481
482 // Works only for fixed-sized coordinates
mbr_to_string(const NDRange & mbr,const Domain * domain) const483 std::vector<std::string> InfoCommand::mbr_to_string(
484 const NDRange& mbr, const Domain* domain) const {
485 std::vector<std::string> result;
486 const int8_t* r8;
487 const uint8_t* ru8;
488 const int16_t* r16;
489 const uint16_t* ru16;
490 const int32_t* r32;
491 const uint32_t* ru32;
492 const int64_t* r64;
493 const uint64_t* ru64;
494 const float* rf32;
495 const double* rf64;
496 auto dim_num = domain->dim_num();
497 for (unsigned d = 0; d < dim_num; d++) {
498 auto type = domain->dimension(d)->type();
499 switch (type) {
500 case sm::Datatype::STRING_ASCII:
501 result.push_back(mbr[d].start_str());
502 result.push_back(mbr[d].end_str());
503 break;
504 case Datatype::INT8:
505 r8 = (const int8_t*)mbr[d].data();
506 result.push_back(std::to_string(r8[0]));
507 result.push_back(std::to_string(r8[1]));
508 break;
509 case Datatype::UINT8:
510 ru8 = (const uint8_t*)mbr[d].data();
511 result.push_back(std::to_string(ru8[0]));
512 result.push_back(std::to_string(ru8[1]));
513 break;
514 case Datatype::INT16:
515 r16 = (const int16_t*)mbr[d].data();
516 result.push_back(std::to_string(r16[0]));
517 result.push_back(std::to_string(r16[1]));
518 break;
519 case Datatype::UINT16:
520 ru16 = (const uint16_t*)mbr[d].data();
521 result.push_back(std::to_string(ru16[0]));
522 result.push_back(std::to_string(ru16[1]));
523 break;
524 case Datatype::INT32:
525 r32 = (const int32_t*)mbr[d].data();
526 result.push_back(std::to_string(r32[0]));
527 result.push_back(std::to_string(r32[1]));
528 break;
529 case Datatype::UINT32:
530 ru32 = (const uint32_t*)mbr[d].data();
531 result.push_back(std::to_string(ru32[0]));
532 result.push_back(std::to_string(ru32[1]));
533 break;
534 case Datatype::INT64:
535 r64 = (const int64_t*)mbr[d].data();
536 result.push_back(std::to_string(r64[0]));
537 result.push_back(std::to_string(r64[1]));
538 break;
539 case Datatype::UINT64:
540 ru64 = (const uint64_t*)mbr[d].data();
541 result.push_back(std::to_string(ru64[0]));
542 result.push_back(std::to_string(ru64[1]));
543 break;
544 case Datatype::FLOAT32:
545 rf32 = (const float*)mbr[d].data();
546 result.push_back(std::to_string(rf32[0]));
547 result.push_back(std::to_string(rf32[1]));
548 break;
549 case Datatype::FLOAT64:
550 rf64 = (const double*)mbr[d].data();
551 result.push_back(std::to_string(rf64[0]));
552 result.push_back(std::to_string(rf64[1]));
553 break;
554 default:
555 throw std::invalid_argument(
556 "Cannot get MBR; Unsupported coordinates type");
557 }
558 }
559
560 return result;
561 }
562
563 } // namespace cli
564 } // namespace tiledb
565