1 /**
2  * @file  info_command.cc
3  *
4  * @section LICENSE
5  *
6  * The MIT License
7  *
8  * @copyright Copyright (c) 2018-2021 TileDB, Inc.
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  * THE SOFTWARE.
27  *
28  * @section DESCRIPTION
29  *
30  * This file defines the info command.
31  */
32 
33 #include "commands/info_command.h"
34 #include "misc/common.h"
35 
36 #include "tiledb/common/logger.h"
37 #include "tiledb/sm/array/array.h"
38 #include "tiledb/sm/array_schema/array_schema.h"
39 #include "tiledb/sm/array_schema/attribute.h"
40 #include "tiledb/sm/array_schema/dimension.h"
41 #include "tiledb/sm/crypto/encryption_key.h"
42 #include "tiledb/sm/enums/encryption_type.h"
43 #include "tiledb/sm/enums/query_type.h"
44 #include "tiledb/sm/fragment/fragment_metadata.h"
45 #include "tiledb/sm/storage_manager/storage_manager.h"
46 
47 #include <cassert>
48 #include <fstream>
49 #include <iostream>
50 #include <sstream>
51 
52 namespace tiledb {
53 namespace cli {
54 
55 using namespace tiledb::sm;
56 
57 /** The thread pool for compute-bound tasks. */
58 ThreadPool compute_tp_;
59 
60 /** The thread pool for io-bound tasks. */
61 ThreadPool io_tp_;
62 
get_cli()63 clipp::group InfoCommand::get_cli() {
64   using namespace clipp;
65   auto array_arg =
66       ((option("-a", "--array").required(true) & value("uri", array_uri_)) %
67        "URI of TileDB array");
68 
69   auto schema_info =
70       "array-schema: Prints basic information about the array's schema." %
71       (command("array-schema").set(type_, InfoType::ArraySchema), array_arg);
72 
73   auto tile_sizes =
74       "tile-sizes: Prints statistics about tile sizes in the array." %
75       (command("tile-sizes").set(type_, InfoType::TileSizes), array_arg);
76 
77   auto svg_mbrs =
78       "svg-mbrs: Produces an SVG visualizing the MBRs (2D arrays only)" %
79       (command("svg-mbrs").set(type_, InfoType::SVGMBRs),
80        array_arg,
81        option("-o", "--output").doc("Path to write output SVG") &
82            value("path", output_path_),
83        option("-w", "--width").doc("Width of output SVG") &
84            value("N", svg_width_),
85        option("-h", "--height").doc("Height of output SVG") &
86            value("N", svg_height_));
87 
88   auto dump_mbrs =
89       "dump-mbrs: Dumps the MBRs in the array to text output." %
90       (command("dump-mbrs").set(type_, InfoType::DumpMBRs),
91        array_arg,
92        option("-o", "--output").doc("Path to write output text file") &
93            value("path", output_path_));
94 
95   auto cli = schema_info | tile_sizes | dump_mbrs | svg_mbrs;
96   return cli;
97 }
98 
run()99 void InfoCommand::run() {
100   io_tp_.init(std::thread::hardware_concurrency());
101   compute_tp_.init(std::thread::hardware_concurrency());
102 
103   switch (type_) {
104     case InfoType::None:
105       break;
106     case InfoType::TileSizes:
107       print_tile_sizes();
108       break;
109     case InfoType::SVGMBRs:
110       write_svg_mbrs();
111       break;
112     case InfoType::DumpMBRs:
113       write_text_mbrs();
114       break;
115     case InfoType::ArraySchema:
116       print_schema_info();
117       break;
118   }
119 }
120 
print_tile_sizes() const121 void InfoCommand::print_tile_sizes() const {
122   stats::Stats stats("");
123   StorageManager sm(&compute_tp_, &io_tp_, &stats, tdb_make_shared(Logger, ""));
124   THROW_NOT_OK(sm.init(nullptr));
125 
126   // Open the array
127   URI uri(array_uri_);
128   Array array(uri, &sm);
129   THROW_NOT_OK(
130       array.open(QueryType::READ, EncryptionType::NO_ENCRYPTION, nullptr, 0));
131   EncryptionKey enc_key;
132 
133   // Compute and report mean persisted tile sizes over all attributes.
134   const auto* schema = array.array_schema_latest();
135   auto fragment_metadata = array.fragment_metadata();
136   auto attributes = schema->attributes();
137   uint64_t total_persisted_size = 0, total_in_memory_size = 0;
138 
139   // Helper function for processing each attribute.
140   auto process_attr = [&](const std::string& name, bool var_size) {
141     uint64_t persisted_tile_size = 0, in_memory_tile_size = 0;
142     uint64_t num_tiles = 0;
143     for (const auto& f : fragment_metadata) {
144       uint64_t tile_num = f->tile_num();
145       std::vector<std::string> names;
146       names.push_back(name);
147       THROW_NOT_OK(f->load_tile_offsets(enc_key, std::move(names)));
148       THROW_NOT_OK(f->load_tile_var_sizes(enc_key, name));
149       for (uint64_t tile_idx = 0; tile_idx < tile_num; tile_idx++) {
150         uint64_t tile_size = 0;
151         THROW_NOT_OK(f->persisted_tile_size(name, tile_idx, &tile_size));
152         persisted_tile_size += tile_size;
153         in_memory_tile_size += f->tile_size(name, tile_idx);
154         num_tiles++;
155         if (var_size) {
156           THROW_NOT_OK(f->persisted_tile_var_size(name, tile_idx, &tile_size));
157           persisted_tile_size += tile_size;
158           THROW_NOT_OK(f->tile_var_size(name, tile_idx, &tile_size));
159           in_memory_tile_size += tile_size;
160           num_tiles++;
161         }
162       }
163     }
164     total_persisted_size += persisted_tile_size;
165     total_in_memory_size += in_memory_tile_size;
166 
167     std::cout << "- " << name << " (" << num_tiles << " tiles):" << std::endl;
168     std::cout << "  Total persisted tile size: " << persisted_tile_size
169               << " bytes." << std::endl;
170     std::cout << "  Total in-memory tile size: " << in_memory_tile_size
171               << " bytes." << std::endl;
172   };
173 
174   // Print header
175   std::cout << "Array URI: " << uri.to_string() << std::endl;
176   std::cout << "Tile stats (per attribute):" << std::endl;
177 
178   // Dump info about coords for sparse arrays.
179   if (!schema->dense())
180     process_attr(constants::coords, false);
181 
182   // Dump info about the rest of the attributes
183   for (const auto* attr : attributes)
184     process_attr(attr->name(), attr->var_size());
185 
186   std::cout << "Sum of attribute persisted size: " << total_persisted_size
187             << " bytes." << std::endl;
188   std::cout << "Sum of attribute in-memory size: " << total_in_memory_size
189             << " bytes." << std::endl;
190 
191   // Close the array.
192   THROW_NOT_OK(array.close());
193 }
194 
print_schema_info() const195 void InfoCommand::print_schema_info() const {
196   stats::Stats stats("");
197   StorageManager sm(&compute_tp_, &io_tp_, &stats, tdb_make_shared(Logger, ""));
198   THROW_NOT_OK(sm.init(nullptr));
199 
200   // Open the array
201   URI uri(array_uri_);
202   Array array(uri, &sm);
203   THROW_NOT_OK(
204       array.open(QueryType::READ, EncryptionType::NO_ENCRYPTION, nullptr, 0));
205 
206   array.array_schema_latest()->dump(stdout);
207 
208   // Close the array.
209   THROW_NOT_OK(array.close());
210 }
211 
write_svg_mbrs() const212 void InfoCommand::write_svg_mbrs() const {
213   stats::Stats stats("");
214   StorageManager sm(&compute_tp_, &io_tp_, &stats, tdb_make_shared(Logger, ""));
215   THROW_NOT_OK(sm.init(nullptr));
216 
217   // Open the array
218   URI uri(array_uri_);
219   Array array(uri, &sm);
220   THROW_NOT_OK(
221       array.open(QueryType::READ, EncryptionType::NO_ENCRYPTION, nullptr, 0));
222 
223   const auto* schema = array.array_schema_latest();
224   auto dim_num = schema->dim_num();
225   if (dim_num < 2) {
226     THROW_NOT_OK(array.close());
227     throw std::runtime_error("SVG MBRs only supported for >1D arrays.");
228   }
229 
230   std::vector<std::tuple<double, double, double, double>> mbr_rects;
231   double min_x = std::numeric_limits<double>::max(),
232          max_x = std::numeric_limits<double>::min(),
233          min_y = std::numeric_limits<double>::max(),
234          max_y = std::numeric_limits<double>::min();
235   auto fragment_metadata = array.fragment_metadata();
236   for (const auto& f : fragment_metadata) {
237     const auto& mbrs = f->mbrs();
238     for (const auto& mbr : mbrs) {
239       auto tup = get_mbr(mbr, schema->domain());
240       min_x = std::min(min_x, std::get<0>(tup));
241       min_y = std::min(min_y, std::get<1>(tup));
242       max_x = std::max(max_x, std::get<0>(tup) + std::get<2>(tup));
243       max_y = std::max(max_y, std::get<1>(tup) + std::get<3>(tup));
244       mbr_rects.push_back(tup);
245     }
246   }
247 
248   const double coord_width = max_x - min_x + 1;
249   const double coord_height = max_y - min_y + 1;
250   const double scale_x = svg_width_ / coord_width;
251   const double scale_y = svg_height_ / coord_height;
252   std::stringstream svg;
253   svg << "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n"
254       << "<svg version=\"1.1\" xmlns=\"http://www.w3.org/2000/svg\" "
255          "xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\""
256       << (svg_width_) << "px\" height=\"" << (svg_height_) << "px\" >\n";
257   svg << "<g>\n";
258   const uint16_t g_inc = std::max<uint16_t>(
259       1, static_cast<uint16_t>((size_t)0xff / mbr_rects.size()));
260   uint32_t r = 0, g = 0, b = 0xff;
261   for (const auto& tup : mbr_rects) {
262     double x = scale_x * (std::get<0>(tup) - min_x);
263     double y = scale_y * (std::get<1>(tup) - min_y);
264     double width = scale_x * std::get<2>(tup);
265     double height = scale_y * std::get<3>(tup);
266     svg << "  <rect x=\"" << x << "\" y=\"" << y << "\" width=\"" << width
267         << "\" height=\"" << height << "\" "
268         << "style=\"fill:rgb(" << r << ", " << g << ", " << b
269         << ");stroke:none;fill-opacity:0.5\" "
270            "/>\n";
271     g = (g + g_inc) % 0xff;
272   }
273   svg << "</g>\n";
274   svg << "</svg>";
275 
276   if (output_path_.empty()) {
277     std::cout << svg.str() << std::endl;
278   } else {
279     std::ofstream os(output_path_, std::ios::out | std::ios::trunc);
280     os << svg.str() << std::endl;
281   }
282 
283   // Close the array.
284   THROW_NOT_OK(array.close());
285 }
286 
write_text_mbrs() const287 void InfoCommand::write_text_mbrs() const {
288   stats::Stats stats("");
289   StorageManager sm(&compute_tp_, &io_tp_, &stats, tdb_make_shared(Logger, ""));
290   THROW_NOT_OK(sm.init(nullptr));
291 
292   // Open the array
293   URI uri(array_uri_);
294   Array array(uri, &sm);
295   THROW_NOT_OK(
296       array.open(QueryType::READ, EncryptionType::NO_ENCRYPTION, nullptr, 0));
297 
298   auto encryption_key = array.encryption_key();
299   const auto* schema = array.array_schema_latest();
300   auto dim_num = schema->dim_num();
301   auto fragment_metadata = array.fragment_metadata();
302   std::stringstream text;
303   for (const auto& f : fragment_metadata) {
304     f->load_rtree(*encryption_key);
305     const auto& mbrs = f->mbrs();
306     for (const auto& mbr : mbrs) {
307       auto str_mbr = mbr_to_string(mbr, schema->domain());
308       for (unsigned i = 0; i < dim_num; i++) {
309         text << str_mbr[2 * i + 0] << "," << str_mbr[2 * i + 1];
310         if (i < dim_num - 1)
311           text << "\t";
312       }
313       text << std::endl;
314     }
315   }
316 
317   if (output_path_.empty()) {
318     std::cout << text.str() << std::endl;
319   } else {
320     std::ofstream os(output_path_, std::ios::out | std::ios::trunc);
321     os << text.str() << std::endl;
322   }
323 
324   // Close the array.
325   THROW_NOT_OK(array.close());
326 }
327 
get_mbr(const NDRange & mbr,const Domain * domain) const328 std::tuple<double, double, double, double> InfoCommand::get_mbr(
329     const NDRange& mbr, const Domain* domain) const {
330   assert(domain->dim_num() == 2);
331   double x, y, width, height;
332 
333   // First dimension
334   auto d1_type = domain->dimension(0)->type();
335   switch (d1_type) {
336     case Datatype::INT8:
337       y = static_cast<const int8_t*>(mbr[0].data())[0];
338       height = static_cast<const int8_t*>(mbr[0].data())[1] - y + 1;
339       break;
340     case Datatype::UINT8:
341       y = static_cast<const uint8_t*>(mbr[0].data())[0];
342       height = static_cast<const uint8_t*>(mbr[0].data())[1] - y + 1;
343       break;
344     case Datatype::INT16:
345       y = static_cast<const int16_t*>(mbr[0].data())[0];
346       height = static_cast<const int16_t*>(mbr[0].data())[1] - y + 1;
347       break;
348     case Datatype::UINT16:
349       y = static_cast<const uint16_t*>(mbr[0].data())[0];
350       height = static_cast<const uint16_t*>(mbr[0].data())[1] - y + 1;
351       break;
352     case Datatype::INT32:
353       y = static_cast<const int32_t*>(mbr[0].data())[0];
354       height = static_cast<const int32_t*>(mbr[0].data())[1] - y + 1;
355       break;
356     case Datatype::UINT32:
357       y = static_cast<const uint32_t*>(mbr[0].data())[0];
358       height = static_cast<const uint32_t*>(mbr[0].data())[1] - y + 1;
359       break;
360     case Datatype::INT64:
361       y = static_cast<const int64_t*>(mbr[0].data())[0];
362       height = static_cast<const int64_t*>(mbr[0].data())[1] - y + 1;
363       break;
364     case Datatype::UINT64:
365       y = static_cast<const uint64_t*>(mbr[0].data())[0];
366       height = static_cast<const uint64_t*>(mbr[0].data())[1] - y + 1;
367       break;
368     case Datatype::FLOAT32:
369       y = static_cast<const float*>(mbr[0].data())[0];
370       height = static_cast<const float*>(mbr[0].data())[1] - y + 1;
371       break;
372     case Datatype::FLOAT64:
373       y = static_cast<const double*>(mbr[0].data())[0];
374       height = static_cast<const double*>(mbr[0].data())[1] - y + 1;
375       break;
376     case Datatype::DATETIME_YEAR:
377     case Datatype::DATETIME_MONTH:
378     case Datatype::DATETIME_WEEK:
379     case Datatype::DATETIME_DAY:
380     case Datatype::DATETIME_HR:
381     case Datatype::DATETIME_MIN:
382     case Datatype::DATETIME_SEC:
383     case Datatype::DATETIME_MS:
384     case Datatype::DATETIME_US:
385     case Datatype::DATETIME_NS:
386     case Datatype::DATETIME_PS:
387     case Datatype::DATETIME_FS:
388     case Datatype::DATETIME_AS:
389     case Datatype::TIME_HR:
390     case Datatype::TIME_MIN:
391     case Datatype::TIME_SEC:
392     case Datatype::TIME_MS:
393     case Datatype::TIME_US:
394     case Datatype::TIME_NS:
395     case Datatype::TIME_PS:
396     case Datatype::TIME_FS:
397     case Datatype::TIME_AS:
398       y = static_cast<const int64_t*>(mbr[0].data())[0];
399       height = static_cast<const int64_t*>(mbr[0].data())[1] - y + 1;
400       break;
401     default:
402       throw std::invalid_argument(
403           "Cannot get MBR; Unsupported coordinates type");
404   }
405 
406   // Second dimension
407   auto d2_type = domain->dimension(1)->type();
408   switch (d2_type) {
409     case Datatype::INT8:
410       x = static_cast<const int8_t*>(mbr[1].data())[0];
411       width = static_cast<const int8_t*>(mbr[1].data())[1] - x + 1;
412       break;
413     case Datatype::UINT8:
414       x = static_cast<const uint8_t*>(mbr[1].data())[0];
415       width = static_cast<const uint8_t*>(mbr[1].data())[1] - x + 1;
416       break;
417     case Datatype::INT16:
418       x = static_cast<const int16_t*>(mbr[1].data())[0];
419       width = static_cast<const int16_t*>(mbr[1].data())[1] - x + 1;
420       break;
421     case Datatype::UINT16:
422       x = static_cast<const uint16_t*>(mbr[1].data())[0];
423       width = static_cast<const uint16_t*>(mbr[1].data())[1] - x + 1;
424       break;
425     case Datatype::INT32:
426       x = static_cast<const int32_t*>(mbr[1].data())[0];
427       width = static_cast<const int32_t*>(mbr[1].data())[1] - x + 1;
428       break;
429     case Datatype::UINT32:
430       x = static_cast<const uint32_t*>(mbr[1].data())[0];
431       width = static_cast<const uint32_t*>(mbr[1].data())[1] - x + 1;
432       break;
433     case Datatype::INT64:
434       x = static_cast<const int64_t*>(mbr[1].data())[0];
435       width = static_cast<const int64_t*>(mbr[1].data())[1] - x + 1;
436       break;
437     case Datatype::UINT64:
438       x = static_cast<const uint64_t*>(mbr[1].data())[0];
439       width = static_cast<const uint64_t*>(mbr[1].data())[1] - x + 1;
440       break;
441     case Datatype::FLOAT32:
442       x = static_cast<const float*>(mbr[1].data())[0];
443       width = static_cast<const float*>(mbr[1].data())[1] - x + 1;
444       break;
445     case Datatype::FLOAT64:
446       x = static_cast<const double*>(mbr[1].data())[0];
447       width = static_cast<const double*>(mbr[1].data())[1] - x + 1;
448       break;
449     case Datatype::DATETIME_YEAR:
450     case Datatype::DATETIME_MONTH:
451     case Datatype::DATETIME_WEEK:
452     case Datatype::DATETIME_DAY:
453     case Datatype::DATETIME_HR:
454     case Datatype::DATETIME_MIN:
455     case Datatype::DATETIME_SEC:
456     case Datatype::DATETIME_MS:
457     case Datatype::DATETIME_US:
458     case Datatype::DATETIME_NS:
459     case Datatype::DATETIME_PS:
460     case Datatype::DATETIME_FS:
461     case Datatype::DATETIME_AS:
462     case Datatype::TIME_HR:
463     case Datatype::TIME_MIN:
464     case Datatype::TIME_SEC:
465     case Datatype::TIME_MS:
466     case Datatype::TIME_US:
467     case Datatype::TIME_NS:
468     case Datatype::TIME_PS:
469     case Datatype::TIME_FS:
470     case Datatype::TIME_AS:
471       x = static_cast<const int64_t*>(mbr[1].data())[0];
472       width = static_cast<const int64_t*>(mbr[1].data())[1] - x + 1;
473       break;
474     default:
475       throw std::invalid_argument(
476           "Cannot get MBR; Unsupported coordinates type");
477   }
478 
479   return std::make_tuple(x, y, width, height);
480 }
481 
482 // Works only for fixed-sized coordinates
mbr_to_string(const NDRange & mbr,const Domain * domain) const483 std::vector<std::string> InfoCommand::mbr_to_string(
484     const NDRange& mbr, const Domain* domain) const {
485   std::vector<std::string> result;
486   const int8_t* r8;
487   const uint8_t* ru8;
488   const int16_t* r16;
489   const uint16_t* ru16;
490   const int32_t* r32;
491   const uint32_t* ru32;
492   const int64_t* r64;
493   const uint64_t* ru64;
494   const float* rf32;
495   const double* rf64;
496   auto dim_num = domain->dim_num();
497   for (unsigned d = 0; d < dim_num; d++) {
498     auto type = domain->dimension(d)->type();
499     switch (type) {
500       case sm::Datatype::STRING_ASCII:
501         result.push_back(mbr[d].start_str());
502         result.push_back(mbr[d].end_str());
503         break;
504       case Datatype::INT8:
505         r8 = (const int8_t*)mbr[d].data();
506         result.push_back(std::to_string(r8[0]));
507         result.push_back(std::to_string(r8[1]));
508         break;
509       case Datatype::UINT8:
510         ru8 = (const uint8_t*)mbr[d].data();
511         result.push_back(std::to_string(ru8[0]));
512         result.push_back(std::to_string(ru8[1]));
513         break;
514       case Datatype::INT16:
515         r16 = (const int16_t*)mbr[d].data();
516         result.push_back(std::to_string(r16[0]));
517         result.push_back(std::to_string(r16[1]));
518         break;
519       case Datatype::UINT16:
520         ru16 = (const uint16_t*)mbr[d].data();
521         result.push_back(std::to_string(ru16[0]));
522         result.push_back(std::to_string(ru16[1]));
523         break;
524       case Datatype::INT32:
525         r32 = (const int32_t*)mbr[d].data();
526         result.push_back(std::to_string(r32[0]));
527         result.push_back(std::to_string(r32[1]));
528         break;
529       case Datatype::UINT32:
530         ru32 = (const uint32_t*)mbr[d].data();
531         result.push_back(std::to_string(ru32[0]));
532         result.push_back(std::to_string(ru32[1]));
533         break;
534       case Datatype::INT64:
535         r64 = (const int64_t*)mbr[d].data();
536         result.push_back(std::to_string(r64[0]));
537         result.push_back(std::to_string(r64[1]));
538         break;
539       case Datatype::UINT64:
540         ru64 = (const uint64_t*)mbr[d].data();
541         result.push_back(std::to_string(ru64[0]));
542         result.push_back(std::to_string(ru64[1]));
543         break;
544       case Datatype::FLOAT32:
545         rf32 = (const float*)mbr[d].data();
546         result.push_back(std::to_string(rf32[0]));
547         result.push_back(std::to_string(rf32[1]));
548         break;
549       case Datatype::FLOAT64:
550         rf64 = (const double*)mbr[d].data();
551         result.push_back(std::to_string(rf64[0]));
552         result.push_back(std::to_string(rf64[1]));
553         break;
554       default:
555         throw std::invalid_argument(
556             "Cannot get MBR; Unsupported coordinates type");
557     }
558   }
559 
560   return result;
561 }
562 
563 }  // namespace cli
564 }  // namespace tiledb
565