1 #ifndef BMDBG__H__INCLUDED__
2 #define BMDBG__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17
18 For more information please visit: http://bitmagic.io
19 */
20
21 /*! \file bmdbg.h
22 \brief Debugging functions (internal)
23 */
24
25
26 #include <cstdio>
27 #include <stdlib.h>
28 #include <cassert>
29 #include <memory>
30 #include <time.h>
31
32 #include <iostream>
33 #include <sstream>
34 #include <fstream>
35 #include <iomanip>
36 #include <vector>
37
38 #include "bmalgo_similarity.h"
39 #include "bmsparsevec_serial.h"
40 #include "bmdef.h"
41
42
43
44 #ifdef _MSC_VER
45 #pragma warning( push )
46 #pragma warning( disable : 4311 4312 4127)
47 #endif
48
49 namespace bm
50 {
51
52 inline
PrintGap(const bm::gap_word_t * gap_buf)53 void PrintGap(const bm::gap_word_t* gap_buf)
54 {
55 unsigned len = (*gap_buf >> 3);
56 std::cout << "[" << *gap_buf << " len=" << len << "] ";
57 for (unsigned i = 0; i < len; ++i)
58 {
59 ++gap_buf;
60 std::cout << *gap_buf << "; ";
61 }
62 std::cout << std::endl;
63 }
64
65 inline
66 void PrintDGap(const bm::gap_word_t* gap_buf, unsigned gap_len=0)
67 {
68
69 unsigned len = gap_len ? gap_len : (*gap_buf >> 3);
70 std::cout << "[" " len=" << len << "] ";
71 unsigned i = gap_len ? 0 : 1;
72 for (; i < len; ++i)
73 {
74 std::cout << gap_buf[i] << "; ";
75 }
76 std::cout << std::endl;
77 }
78
iLog2(unsigned int value)79 inline unsigned int iLog2(unsigned int value)
80 {
81 unsigned int l = 0;
82 while( (value >> l) > 1 ) ++l;
83 return l;
84 }
85
86 inline
PrintGammaCode(unsigned value)87 unsigned PrintGammaCode(unsigned value)
88 {
89 unsigned bits = 0;
90 // Elias gamma encode
91 {
92 unsigned l = iLog2(value);
93 //cout << "log2=" << l << endl;
94 for (unsigned i = 0; i < l; ++i)
95 {
96 std::cout << 0;
97 ++bits;
98 }
99 std::cout << 1; ++bits;
100 for (unsigned i = 0; i < l; ++i)
101 {
102 if (value & 1 << i)
103 std::cout << 1;
104 else
105 std::cout << 0;
106 ++bits;
107 }
108 }
109 return bits;
110 }
111
112 inline
113 void PrintDGapGamma(const bm::gap_word_t* gap_buf, unsigned gap_len=0)
114 {
115 unsigned total = 0;
116 unsigned len = gap_len ? gap_len : (*gap_buf >> 3);
117 std::cout << "[" " len=" << len << "] ";
118 unsigned i = gap_len ? 0 : 1;
119 for (; i < len; ++i)
120 {
121 unsigned v = gap_buf[i];
122
123 unsigned bits = PrintGammaCode(v+1);
124 std::cout << "; ";
125 total += bits;
126 }
127 std::cout << " gamma_bits=" << total << " src_bits =" << len * 16;
128 std::cout << std::endl;
129
130 }
131
132 /// Read dump file into an STL container (vector of some basic type)
133 ///
134 /// @return 0 - if reading went well
135 ///
136 template<class VT>
read_dump_file(const std::string & fname,VT & data)137 int read_dump_file(const std::string& fname, VT& data)
138 {
139 typedef typename VT::value_type value_type;
140
141 size_t fsize;
142 std::ifstream fin(fname.c_str(), std::ios::in | std::ios::binary);
143 if (!fin.good())
144 {
145 return -1;
146 }
147 fin.seekg(0, std::ios::end);
148 fsize = (size_t)fin.tellg();
149
150 data.resize(fsize/sizeof(value_type));
151
152 if (!fsize)
153 {
154 return 0; // empty input
155 }
156 fin.seekg(0, std::ios::beg);
157 fin.read((char*) &data[0], std::streamsize(fsize));
158 if (!fin.good())
159 {
160 data.resize(0);
161 return -2;
162 }
163 return 0;
164 }
165
166 template<class TBV>
167 void LoadBVector(const char* fname, TBV& bvector, unsigned* file_size=0)
168 {
169 std::ifstream bv_file (fname, std::ios::in | std::ios::binary);
170 if (!bv_file.good())
171 {
172 std::cout << "Cannot open file: " << fname << std::endl;
173 exit(1);
174 }
175 bv_file.seekg(0, std::ios_base::end);
176 unsigned length = (unsigned)bv_file.tellg();
177 if (length == 0)
178 {
179 std::cout << "Empty file:" << fname << std::endl;
180 exit(1);
181 }
182 if (file_size)
183 *file_size = length;
184
185 bv_file.seekg(0, std::ios::beg);
186
187 char* buffer = new char[length];
188
189 bv_file.read(buffer, length);
190
191 bm::deserialize(bvector, (unsigned char*)buffer);
192
193 delete [] buffer;
194 }
195
196 template<class TBV>
SaveBVector(const char * fname,const TBV & bvector)197 void SaveBVector(const char* fname, const TBV& bvector)
198 {
199 std::ofstream bfile (fname, std::ios::out | std::ios::binary);
200 if (!bfile.good())
201 {
202 std::cout << "Cannot open file: " << fname << std::endl;
203 exit(1);
204 }
205 typename TBV::statistics st1;
206 bvector.calc_stat(&st1);
207
208 unsigned char* blob = new unsigned char[st1.max_serialize_mem];
209 size_t blob_size = bm::serialize(bvector, blob);
210
211
212 bfile.write((char*)blob, std::streamsize(blob_size));
213 bfile.close();
214
215 delete [] blob;
216 }
217
218 inline
SaveBlob(const char * name_prefix,unsigned num,const char * ext,const unsigned char * blob,size_t blob_size)219 void SaveBlob(const char* name_prefix, unsigned num, const char* ext,
220 const unsigned char* blob, size_t blob_size)
221 {
222 std::stringstream fname_str;
223 fname_str << name_prefix << "-" << num << ext;
224
225 std::string s = fname_str.str();
226 const char* fname = s.c_str();
227 std::ofstream bfile (fname, std::ios::out | std::ios::binary);
228 if (!bfile.good())
229 {
230 std::cout << "Cannot open file: " << fname << std::endl;
231 exit(1);
232 }
233 bfile.write((char*)blob, std::streamsize(blob_size));
234 bfile.close();
235 }
236
237
238 template<typename V>
PrintBinary(V val)239 void PrintBinary(V val)
240 {
241 for (unsigned i = 0; i < sizeof(V)*8; i++)
242 {
243 std::cout << (unsigned)((val >> i) & 1);
244 if (i == 15 && (sizeof(V)*8 > 16)) std::cout << "-";
245 }
246 // cout << " :" << val;
247 }
248
249 inline
PrintBits32(unsigned val)250 void PrintBits32(unsigned val)
251 {
252 PrintBinary(val);
253 }
254
255 inline
PrintDistanceMatrix(const unsigned distance[bm::set_block_plane_cnt][bm::set_block_plane_cnt])256 void PrintDistanceMatrix(
257 const unsigned distance[bm::set_block_plane_cnt][bm::set_block_plane_cnt])
258 {
259 for (unsigned i = 0; i < bm::set_block_plane_cnt; ++i)
260 {
261 const unsigned* row = distance[i];
262 std::cout << i << ": ";
263 for (unsigned j = i; j < bm::set_block_plane_cnt; ++j)
264 {
265 std::cout << std::setw(4) << std::setfill('0') << row[j] << " ";
266 }
267 std::cout << std::endl;
268 }
269 }
270
271 template<typename TM>
272 void PrintTMatrix(const TM& tmatrix, unsigned cols=0, bool binary = false)
273 {
274 unsigned columns = cols ? cols : tmatrix.cols();
275 for (unsigned i = 0; i < tmatrix.rows(); ++i)
276 {
277 const typename TM::value_type* row = tmatrix.row(i);
278 std::cout << i << ": ";
279 if (i < 10) std::cout << " ";
280 for (unsigned j = 0; j < columns; ++j)
281 {
282 if (!binary)
283 {
284 std::cout << std::setw(4) << std::setfill('0') << row[j] << " ";
285 }
286 else
287 {
288 PrintBinary(row[j]);
289 }
290 }
291 std::cout << std::endl;
292 }
293 }
294
295 /// Binary code string converted to number
296 /// Bits are expected left to right
297 ///
298 inline
BinStrLR(const char * str)299 unsigned BinStrLR(const char* str)
300 {
301 unsigned value = 0;
302 unsigned bit_idx = 0;
303 for (; *str; ++str)
304 {
305 switch(*str)
306 {
307 case '0':
308 ++bit_idx;
309 break;
310 case '1':
311 value |= (1 << bit_idx);
312 ++bit_idx;
313 break;
314 default:
315 assert(0);
316 }
317 if (bit_idx == sizeof(unsigned) * 8)
318 break;
319 }
320 return value;
321 }
322
323 template<class BV>
print_blocks_count(const BV & bv)324 void print_blocks_count(const BV& bv)
325 {
326 const unsigned sz = 128000;
327 unsigned* bc_arr = new unsigned[sz];
328 for(unsigned x = 0; x < sz; ++x) bc_arr[x] = 0;
329
330
331 unsigned last_block = bv.count_blocks(bc_arr);
332 unsigned sum = 0;
333
334 for (unsigned i = 0; i <= last_block; ++i)
335 {
336 std::cout << i << ":";
337
338 unsigned j = 0;
339 for (; i <= last_block; ++i)
340 {
341 std::cout << std::setw(5) << std::setfill('0') << bc_arr[i] << " ";
342 sum += bc_arr[i];
343 if (++j == 10) break;
344 }
345 std::cout << " | " << sum << std::endl;
346 }
347 std::cout << "Total=" << sum << std::endl;
348
349 delete [] bc_arr;
350 }
351 inline
print_bc(unsigned i,unsigned count)352 void print_bc(unsigned i, unsigned count)
353 {
354 static unsigned sum = 0;
355 static unsigned row_idx = 0;
356 static unsigned prev = 0;
357
358 if (i == 0)
359 {
360 sum = row_idx = 0;
361 }
362 else
363 {
364 if (prev +1 < i)
365 print_bc(prev+1, 0);
366 prev = i;
367 }
368
369 if (row_idx == 0)
370 {
371 std::cout << i << ":";
372 }
373
374 std::cout << std::setw(5) << std::setfill('0') << count << " ";
375 sum += count;
376
377 ++row_idx;
378 if (row_idx == 10)
379 {
380 row_idx = 0;
381 std::cout << " | " << sum << std::endl;
382 }
383 }
384
385 template<class BV>
print_bvector_stat(const BV & bvect)386 size_t print_bvector_stat(const BV& bvect)
387 {
388 typename BV::statistics st;
389 bvect.calc_stat(&st);
390
391 typename serializer<BV>::buffer buf;
392 bm::serializer<BV> ser;
393 ser.serialize(bvect, buf, &st);
394 auto ssize = buf.size();
395
396 std::cout << " - Blocks: [ "
397 << "B:" << st.bit_blocks
398 << ", G:" << st.gap_blocks << "] "
399 << " count() = " << bvect.count()
400 << ", mem = " << st.memory_used << " " << (st.memory_used / (1024 * 1024)) << "MB "
401 << ", max smem:" << st.max_serialize_mem << " " << (st.max_serialize_mem / (1024 * 1024)) << "MB "
402 << " compressed = " << ssize << " " << (ssize / (1024 * 1024)) << "MB "
403 << std::endl;
404 return ssize;
405 }
406
407
408 template<class BV>
409 void print_stat(const BV& bv, typename BV::block_idx_type blocks = 0)
410 {
411 const typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
412
413 bm::id_t count = 0;
414 int printed = 0;
415
416 int total_gap_eff = 0;
417
418 if (!blocks)
419 {
420 blocks = bm::set_total_blocks;
421 }
422
423 typename BV::block_idx_type nb;
424 typename BV::block_idx_type nb_prev = 0;
425 for (nb = 0; nb < blocks; ++nb)
426 {
427 unsigned i0, j0;
428 bm::get_block_coord(nb, i0, j0);
429 const bm::word_t* blk = bman.get_block(i0, j0);
430
431 if (!blk)
432 continue;
433
434 if (IS_FULL_BLOCK(blk))
435 {
436 if (BM_IS_GAP(blk)) // gap block
437 {
438 std::cout << "[Alert!" << nb << "]";
439 assert(0);
440 }
441
442 typename BV::block_idx_type start = nb;
443 for(auto i = nb+1; i < bm::set_total_blocks; ++i, ++nb)
444 {
445 bm::get_block_coord(nb, i0, j0);
446 blk = bman.get_block(i0, j0);
447 if (IS_FULL_BLOCK(blk))
448 {
449 if (BM_IS_GAP(blk)) // gap block
450 {
451 std::cout << "[Alert!" << nb << "]";
452 assert(0);
453 --nb;
454 break;
455 }
456
457 }
458 else
459 {
460 --nb;
461 break;
462 }
463 }
464
465 std::cout << "{F." << start << ":" << nb << "}";
466 ++printed;
467 }
468 else
469 {
470 if ((nb-1) != nb_prev)
471 {
472 std::cout << ".." << (size_t)nb-nb_prev << "..";
473 }
474
475 if (BM_IS_GAP(blk))
476 {
477 unsigned bc = bm::gap_bit_count(BMGAP_PTR(blk));
478 /*unsigned sum = */bm::gap_control_sum(BMGAP_PTR(blk));
479 unsigned level = bm::gap_level(BMGAP_PTR(blk));
480 count += bc;
481 unsigned len = bm::gap_length(BMGAP_PTR(blk))-1;
482 unsigned raw_size=bc*2;
483 unsigned cmr_len=len*2;
484 size_t mem_eff = raw_size - cmr_len;
485 total_gap_eff += unsigned(mem_eff);
486
487 unsigned i,j;
488 bm::get_block_coord(nb, i, j);
489 std::cout << " [GAP " << nb << "(" << i << "," << j << ")"
490 << "=" << bc << ":" << level << "-L" << len << "(" << mem_eff << ")]";
491 ++printed;
492 }
493 else // bitset
494 {
495 unsigned bc = bm::bit_block_count(blk);
496
497 unsigned zw = 0;
498 for (unsigned i = 0; i < bm::set_block_size; ++i)
499 {
500 zw += (blk[i] == 0);
501 }
502
503 count += bc;
504 std::cout << " (BIT " << nb << "=" << bc << "[" << zw << "])";
505 ++printed;
506 }
507 }
508 if (printed == 10)
509 {
510 printed = 0;
511 printf("\n");
512 }
513 nb_prev = nb;
514 } // for nb
515 std::cout << std::endl << "gap_efficiency=" << total_gap_eff << std::endl;
516
517 }
518
519 template<class BV>
compute_serialization_size(const BV & bv)520 size_t compute_serialization_size(const BV& bv)
521 {
522 BM_DECLARE_TEMP_BLOCK(tb)
523 unsigned char* buf = 0;
524 typename BV::size_type blob_size = 0;
525 try
526 {
527 bm::serializer<BV> bvs(typename BV::allocator_type(), tb);
528 //bvs.set_compression_level(4);
529
530 typename BV::statistics st;
531 bv.calc_stat(&st);
532
533 buf = new unsigned char[st.max_serialize_mem];
534 blob_size = (unsigned)bvs.serialize(bv, (unsigned char*)buf, st.max_serialize_mem);
535 }
536 catch (...)
537 {
538 delete [] buf;
539 throw;
540 }
541
542 delete [] buf;
543 return blob_size;
544 }
545
546 #if 0
547 template<class SV>
548 void print_svector_xor_stat(const SV& sv)
549 {
550 BM_DECLARE_TEMP_BLOCK(tb)
551 typename SV::size_type sz = sv.size();
552 if (!sz)
553 return;
554 typename SV::size_type nb_max = (sz >> bm::set_block_shift);
555
556 for (typename SV::size_type nb = 0; nb < nb_max; ++nb)
557 {
558 std::cout << "nb = " << nb << std::endl;
559
560 unsigned i0 = unsigned(nb >> bm::set_array_shift);
561 unsigned j0 = unsigned(nb & bm::set_array_mask);
562
563 auto planes = sv.planes();
564 for (unsigned i = 0; i < planes; ++i)
565 {
566 const typename SV::bvector_type* bv = sv.get_plane(i);
567 if (!bv)
568 continue;
569 const typename SV::bvector_type::blocks_manager_type& bman = bv->get_blocks_manager();
570 const bm::word_t* block = bman.get_block_ptr(i0, j0);
571 if (!IS_VALID_ADDR(block) || BM_IS_GAP(block))
572 continue;
573
574 // compute block complexity
575 bm::block_waves_xor_descr x_descr;
576 bm::compute_complexity_descr(block, x_descr);
577 unsigned gc, bc;
578 bm::bit_block_change_bc32(block, &gc, &bc);
579 unsigned best_metric, block_metric;
580 block_metric = best_metric = gc < bc ? gc : bc;
581
582 bool kb_found = false;
583 bm::id64_t d64 = 0;
584 for (unsigned k = i + 1; k < planes; ++k)
585 {
586 const typename SV::bvector_type* bv_x = sv.get_plane(i);
587 if (!bv_x)
588 continue;
589 const typename SV::bvector_type::blocks_manager_type& bman_x = bv_x->get_blocks_manager();
590 const bm::word_t* block_x = bman_x.get_block_ptr(i0, j0);
591 if (!IS_VALID_ADDR(block_x) || BM_IS_GAP(block_x))
592 continue;
593
594 // evaluate potential key block as XOR filter
595 bm::id64_t kb_d64 =
596 bm::compute_xor_complexity_descr(block, block_x, x_descr);
597 if (kb_d64) // candidate XOR filter found
598 {
599 bm::bit_block_xor_product(tb, block, block_x, kb_d64);
600 unsigned kb_bc, kb_gc;
601 bm::bit_block_change_bc32(tb, &kb_gc, &kb_bc);
602 if (kb_gc < best_metric && kb_gc < bm::bie_cut_off)
603 {
604 d64 = kb_d64;
605 best_metric = kb_gc;
606 kb_found = true;
607 //*kb_j = j0;
608 }
609 if (kb_bc < best_metric && kb_bc < bm::bie_cut_off)
610 {
611 d64 = kb_d64;
612 best_metric = kb_bc;
613 kb_found = true;
614 //*kb_j = j0;
615 }
616
617 }
618
619 } // for k
620
621 if (kb_found)
622 {
623 std::cout << "XOR match " << "metric gain = " << std::endl;
624 }
625
626
627 std::cout << std::endl;
628
629 } // for i
630
631 } // for nb
632 }
633 #endif
634
635 template<class SV>
636 void print_svector_stat(const SV& svect, bool print_sim = false)
637 {
638 typedef typename SV::bvector_type bvector_type;
639 /// Functor to compute jaccard similarity
640 /// \internal
641 struct Jaccard_Func
642 {
operatorJaccard_Func643 unsigned operator () (distance_metric_descriptor* dmit,
644 distance_metric_descriptor* /*dmit_end*/)
645 {
646 double d;
647 BM_ASSERT(dmit->metric == COUNT_AND);
648 typename bvector_type::size_type cnt_and = dmit->result;
649 ++dmit;
650 BM_ASSERT(dmit->metric == COUNT_OR);
651 typename bvector_type::size_type cnt_or = dmit->result;
652 if (cnt_and == 0 || cnt_or == 0)
653 {
654 d = 0.0;
655 }
656 else
657 {
658 d = double(cnt_and) / double(cnt_or);
659 }
660 unsigned res = unsigned(d * 100);
661 if (res > 100) res = 100;
662 return res;
663 }
664 };
665
666 typedef bm::similarity_descriptor<bvector_type, 2, unsigned, unsigned, Jaccard_Func> similarity_descriptor_type;
667 typedef bm::similarity_batch<similarity_descriptor_type> similarity_batch_type;
668
669 similarity_batch_type sbatch;
670
671 bm::build_jaccard_similarity_batch(sbatch, svect);
672
673 sbatch.calculate();
674 sbatch.sort();
675
676 typename similarity_batch_type::vector_type& sim_vec = sbatch.descr_vect_;
677 if (print_sim)
678 {
679 for (size_t k = 0; k < sim_vec.size(); ++k)
680 {
681 unsigned sim = sim_vec[k].similarity();
682 if (sim > 10)
683 {
684 const typename SV::bvector_type* bv1 = sim_vec[k].get_first();
685 const typename SV::bvector_type* bv2 = sim_vec[k].get_second();
686
687 auto bv_size2 = compute_serialization_size(*bv2);
688
689 typename SV::bvector_type bvx(*bv2);
690 bvx ^= *bv1;
691
692 auto bv_size_x = compute_serialization_size(bvx);
693 if (bv_size_x < bv_size2) // true savings
694 {
695 size_t diff = bv_size2 - bv_size_x;
696
697 // compute 10% cut-off
698 size_t sz10p = bv_size2 / 10;
699 if (diff > sz10p)
700 {
701 std:: cout << "[" << sim_vec[k].get_first_idx()
702 << ", " << sim_vec[k].get_second_idx()
703 << "] = " << sim
704 << " size(" << sim_vec[k].get_second_idx() << ")="
705 << bv_size2
706 << " size(x)=" << bv_size_x
707 << " diff=" << diff
708 << std:: endl;
709 }
710 }
711 }
712 } // for k
713 }
714
715
716 typename SV::statistics st;
717 svect.calc_stat(&st);
718
719 std::cout << "size = " << svect.size() << std::endl;
720 std::cout << "Bit blocks: " << st.bit_blocks << std::endl;
721 std::cout << "Gap blocks: " << st.gap_blocks << std::endl;
722 std::cout << "Max serialize mem:" << st.max_serialize_mem << " "
723 << (st.max_serialize_mem / (1024 * 1024)) << "MB" << std::endl;
724 std::cout << "Memory used: " << st.memory_used << " "
725 << (st.memory_used / (1024 * 1024)) << "MB" << std::endl;
726
727 auto eff_max_element = svect.effective_vector_max();
728 size_t std_vect_size = sizeof(typename SV::value_type) * svect.size() * eff_max_element;
729 std::cout << "Projected mem usage for vector<value_type>:"
730 << std_vect_size << " "
731 << std_vect_size / (1024 * 1024) << "MB"
732 << std::endl;
733 if (sizeof(typename SV::value_type) > 4 && (eff_max_element == 1))
734 {
735 std::cout << "Projected mem usage for vector<long long>:"
736 << sizeof(long long) * svect.size() << std::endl;
737 }
738
739 std::cout << "\nplanes:" << std::endl;
740
741 size_t ssize(0), octet_ssize(0);
742
743 typename SV::bvector_type bv_join; // global OR of all planes
744 auto planes = svect.planes();
745
746 unsigned octet_cnt(0), octet(0);
747 for (unsigned i = 0; i < planes; ++i)
748 {
749 const typename SV::bvector_type* bv_plane = svect.get_plane(i);
750 std::cout << i << "-" << octet_cnt << ":";
751 if (bv_plane == 0)
752 {
753 std::cout << "NULL\n";
754 bool any_else = false;
755 for (unsigned j = i+1; j < planes; ++j) // look ahead
756 {
757 if (svect.get_plane(j))
758 {
759 any_else = true;
760 break;
761 }
762 }
763 if (!any_else)
764 {
765 break;
766 }
767 }
768 else
769 {
770 bv_join |= *bv_plane;
771 auto pssize = bm::print_bvector_stat(*bv_plane);
772 ssize += pssize;
773 octet_ssize += pssize;
774 }
775 if (octet_cnt == 7)
776 {
777 std::cout << "--------------------" << std::endl;
778 std::cout << "octet N = " << octet <<
779 " compressed = " << octet_ssize <<
780 " " << octet_ssize/(1024*1024) << "MB" << std::endl;
781 octet_cnt = 0; octet_ssize = 0;
782 octet++;
783 std::cout << std::endl;
784 }
785 else
786 {
787 octet_cnt++;
788 }
789 } // for i
790
791 const typename SV::bvector_type* bv_null = svect.get_null_bvector();
792 if (bv_null)
793 {
794 std::cout << "(not) NULL plane:\n";
795 ssize += print_bvector_stat(*bv_null);
796 typename SV::size_type not_null_cnt = bv_null->count();
797 std::cout << " - Bitcount: " << not_null_cnt << std::endl;
798
799 std::cout << "Projected mem usage for std::vector<pair<unsigned, value_type> >:"
800 << ((sizeof(typename SV::value_type) + sizeof(unsigned)) * not_null_cnt) << " "
801 << ((sizeof(typename SV::value_type) + sizeof(unsigned)) * not_null_cnt) / (1024 * 1024) << "MB"
802 << std::endl;
803 }
804
805 std::cout << " Total serialized size (planes): " << ssize
806 << std::endl
807 << " " << ssize / (1024 * 1024) << " MB" << std::endl;
808
809 if (svect.size())
810 {
811 bm::id64_t bv_join_cnt = bv_join.count();
812 double fr = double(bv_join_cnt) / double (svect.size());
813 std::cout << "Non-zero elements: " << bv_join_cnt << " "
814 << "ratio=" << fr
815 << std::endl;
816 size_t non_zero_mem = size_t(bv_join_cnt) * sizeof(typename SV::value_type);
817 std::cout << "Projected mem usage for non-zero elements: " << non_zero_mem << " "
818 << non_zero_mem / (1024*1024) << " MB"
819 << std::endl;
820 }
821 }
822
823
824 template<class SV>
print_str_svector_stat(const SV & str_svect)825 void print_str_svector_stat(const SV& str_svect)
826 {
827 typename SV::plane_octet_matrix_type octet_stat_matr;
828
829 str_svect.calc_octet_stat(octet_stat_matr);
830
831 for (unsigned i = 0; i < octet_stat_matr.rows(); ++i)
832 {
833 const typename SV::plane_octet_matrix_type::value_type* row
834 = octet_stat_matr.row(i);
835 bool any = false;
836 for (unsigned j = 0; j < octet_stat_matr.cols(); ++j)
837 {
838 if (row[j]) // letter is present
839 {
840 any = true;
841 break;
842 }
843 }
844 if (!any)
845 continue;
846
847 std::cout << i << " : ";
848 unsigned cnt = 0;
849 for (unsigned j = 0; j < octet_stat_matr.cols(); ++j)
850 {
851 if (row[j]) // letter is present
852 {
853 std::cout << char(j);
854 ++cnt;
855 }
856 } // for j
857 if (cnt)
858 {
859 std::cout << "\t total= " << cnt;
860 }
861 else
862 {
863 std::cout << " (empty) ";
864 }
865 std::cout << std::endl;
866 } // for i
867 }
868
869
870
871 // save compressed collection to disk
872 //
873 template<class CBC>
874 int file_save_compressed_collection(const CBC& cbc, const std::string& fname, size_t* blob_size = 0)
875 {
876 bm::compressed_collection_serializer<CBC > cbcs;
877 typename CBC::buffer_type sbuf;
878
879 cbcs.serialize(cbc, sbuf);
880
881 std::ofstream fout(fname.c_str(), std::ios::binary);
882 if (!fout.good())
883 {
884 return -1;
885 }
886 const char* buf = (char*)sbuf.buf();
887 fout.write(buf, sbuf.size());
888 if (!fout.good())
889 {
890 return -1;
891 }
892
893 fout.close();
894
895 if (blob_size)
896 {
897 *blob_size = sbuf.size();
898 }
899 return 0;
900 }
901
902 // load compressed collection from disk
903 //
904 template<class CBC>
file_load_compressed_collection(CBC & cbc,const std::string & fname)905 int file_load_compressed_collection(CBC& cbc, const std::string& fname)
906 {
907 std::vector<unsigned char> buffer;
908
909 // read the input buffer, validate errors
910 auto ret = bm::read_dump_file(fname, buffer);
911 if (ret != 0)
912 {
913 return -2;
914 }
915 if (buffer.size() == 0)
916 {
917 return -3;
918 }
919
920 const unsigned char* buf = &buffer[0];
921
922 compressed_collection_deserializer<CBC> cbcd;
923 cbcd.deserialize(cbc, buf);
924
925 return 0;
926 }
927
928
929
930 // save sparse_vector dump to disk
931 //
932 template<class SV>
933 int file_save_svector(const SV& sv, const std::string& fname,
934 size_t* sv_blob_size=0, bool use_xor = true)
935 {
936 BM_ASSERT(!fname.empty());
937
938 bm::sparse_vector_serial_layout<SV> sv_lay;
939
940 bm::sparse_vector_serializer<SV> sv_serializer;
941 sv_serializer.set_xor_ref(use_xor);
942
943 sv_serializer.serialize(sv, sv_lay);
944 std::ofstream fout(fname.c_str(), std::ios::binary);
945 if (!fout.good())
946 {
947 return -1;
948 }
949 const char* buf = (char*)sv_lay.buf();
950 fout.write(buf, std::streamsize(sv_lay.size()));
951 if (!fout.good())
952 {
953 return -1;
954 }
955
956 fout.close();
957
958 if (sv_blob_size)
959 {
960 *sv_blob_size = sv_lay.size();
961 }
962 return 0;
963 }
964
965 template<class SV>
file_load_svector(SV & sv,const std::string & fname)966 int file_load_svector(SV& sv, const std::string& fname)
967 {
968 std::vector<unsigned char> buffer;
969
970 // read the input buffer, validate errors
971 auto ret = bm::read_dump_file(fname, buffer);
972 if (ret != 0)
973 {
974 return -2;
975 }
976 if (buffer.size() == 0)
977 {
978 return -3;
979 }
980
981 const unsigned char* buf = &buffer[0];
982 BM_DECLARE_TEMP_BLOCK(tb)
983 auto res = bm::sparse_vector_deserialize(sv, buf, tb);
984 if (res != 0)
985 {
986 return -4;
987 }
988 return 0;
989 }
990
991
992 // compare-check if sparse vector is excatly coresponds to vector
993 //
994 // returns 0 - if equal
995 // 1 - no size match
996 // 2 - element match fails
997 template<class SV, class V>
svector_check(const SV & sv,const V & vect)998 int svector_check(const SV& sv, const V& vect)
999 {
1000 if (sv.size() != vect.size())
1001 {
1002 return 1;
1003 }
1004 for (size_t i = 0; i < vect.size(); ++i)
1005 {
1006 unsigned v1 = sv[(unsigned)i];
1007 unsigned v2 = vect[i];
1008 if (v1 != v2)
1009 return 2;
1010 } // for i
1011 return 0;
1012 }
1013
1014
1015 template<class SV, class BV>
convert_bv2sv(SV & sv,const BV & bv)1016 void convert_bv2sv(SV& sv, const BV& bv)
1017 {
1018 typename SV::back_insert_iterator bit = sv.get_back_inserter();
1019 typename BV::enumerator en = bv.first();
1020 for (; en.valid(); ++en)
1021 {
1022 auto v = en.value();
1023 bit = v;
1024 }
1025 bit.flush();
1026 }
1027
1028
1029 } // namespace
1030
1031
1032
1033 #ifdef _MSC_VER
1034 #pragma warning( pop )
1035 #endif
1036
1037 #endif
1038