1 #ifndef BMSTRSPARSEVEC__H__INCLUDED__
2 #define BMSTRSPARSEVEC__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17
18 For more information please visit: http://bitmagic.io
19 */
20
21 /*! \file bmstrsparsevec.h
22 \brief string sparse vector based on bit-transposed matrix
23 */
24
25 #include <stddef.h>
26 #include "bmconst.h"
27
28 #ifndef BM_NO_STL
29 #include <stdexcept>
30 #endif
31
32 #ifndef BM__H__INCLUDED__
33 // BitMagic utility headers do not include main "bm.h" declaration
34 // #include "bm.h" or "bm64.h" explicitly
35 # error missing include (bm.h or bm64.h)
36 #endif
37
38 #include "bmtrans.h"
39 #include "bmalgo.h"
40 #include "bmbuffer.h"
41 #include "bmbmatrix.h"
42 #include "bmdef.h"
43
44 namespace bm
45 {
46
47 /*!
48 \brief succinct sparse vector for strings with compression using bit transposition method
49
50 Initial string is bit-transposed into bit-planes so collection may use less
51 memory due to prefix sum (GAP) compression in bit-planes.
52
53 @ingroup sv
54 */
55 template<typename CharType, typename BV, unsigned MAX_STR_SIZE>
56 class str_sparse_vector : public base_sparse_vector<CharType, BV, MAX_STR_SIZE>
57 {
58 public:
59 typedef BV bvector_type;
60 typedef bvector_type* bvector_type_ptr;
61 typedef const bvector_type* bvector_type_const_ptr;
62 typedef CharType value_type;
63 typedef CharType* value_type_prt;
64 typedef typename bvector_type::size_type size_type;
65 typedef typename BV::allocator_type allocator_type;
66 typedef typename bvector_type::allocation_policy allocation_policy_type;
67 typedef typename bvector_type::enumerator bvector_enumerator_type;
68 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
69 typedef bm::basic_bmatrix<BV> bmatrix_type;
70 typedef base_sparse_vector<CharType, BV, MAX_STR_SIZE> parent_type;
71
72 /*! Statistical information about memory allocation details. */
73 struct statistics : public bv_statistics
74 {};
75
76 enum octet_planes
77 {
78 sv_octet_planes = MAX_STR_SIZE
79 };
80
81 /**
82 Matrix of character remappings
83 @internal
84 */
85 typedef
86 bm::heap_matrix<unsigned char,
87 MAX_STR_SIZE, /* ROWS */
88 256, /* COLS = number of chars in the ASCII set */
89 typename bvector_type::allocator_type>
90 plane_octet_matrix_type;
91 typedef plane_octet_matrix_type remap_matrix_type;
92 /**
93 Matrix of character frequencies (for optimal code remap)
94 @internal
95 */
96 typedef
97 bm::heap_matrix<size_t,
98 MAX_STR_SIZE, /* ROWS */
99 256, /* COLS = number of chars in the ASCII set */
100 typename bvector_type::allocator_type>
101 octet_freq_matrix_type;
102
103 struct is_remap_support { enum trait { value = true }; };
104 struct is_rsc_support { enum trait { value = false }; };
105
106 /**
107 Reference class to access elements via common [] operator
108 @ingroup sv
109 */
110 class const_reference
111 {
112 public:
const_reference(const str_sparse_vector<CharType,BV,MAX_STR_SIZE> & str_sv,size_type idx)113 const_reference(
114 const str_sparse_vector<CharType, BV, MAX_STR_SIZE>& str_sv,
115 size_type idx) BMNOEXCEPT
116 : str_sv_(str_sv), idx_(idx)
117 {}
118
119 operator const value_type*() const BMNOEXCEPT
120 {
121 return get();
122 }
123
get()124 const value_type* get() const BMNOEXCEPT
125 {
126 str_sv_.get(idx_, buf_, MAX_STR_SIZE);
127 return &(buf_[0]);
128 }
129
130 bool operator==(const const_reference& ref) const BMNOEXCEPT
131 { return bool(*this) == bool(ref); }
is_null()132 bool is_null() const BMNOEXCEPT { return str_sv_.is_null(idx_); }
133 private:
134 const str_sparse_vector<CharType, BV, MAX_STR_SIZE>& str_sv_;
135 size_type idx_;
136 mutable CharType buf_[MAX_STR_SIZE];
137 };
138
139 /**
140 Reference class to access elements via common [] operator
141 @ingroup sv
142 */
143 class reference
144 {
145 public:
reference(str_sparse_vector<CharType,BV,MAX_STR_SIZE> & str_sv,size_type idx)146 reference(str_sparse_vector<CharType, BV, MAX_STR_SIZE>& str_sv,
147 size_type idx) BMNOEXCEPT
148 : str_sv_(str_sv), idx_(idx)
149 {}
150
151 operator const value_type*() const BMNOEXCEPT
152 {
153 return get();
154 }
155
get()156 const value_type* get() const BMNOEXCEPT
157 {
158 str_sv_.get(idx_, buf_, MAX_STR_SIZE);
159 return &(buf_[0]);
160 }
161
162 reference& operator=(const reference& ref)
163 {
164 // TO DO: implement element copy bit by bit
165 str_sv_.set(idx_, (const value_type*)ref);
166 return *this;
167 }
168
169 reference& operator=(const value_type* str)
170 {
171 str_sv_.set(idx_, str);
172 return *this;
173 }
174 bool operator==(const reference& ref) const BMNOEXCEPT
175 { return bool(*this) == bool(ref); }
is_null()176 bool is_null() const BMNOEXCEPT { return str_sv_.is_null(idx_); }
177 private:
178 str_sparse_vector<CharType, BV, MAX_STR_SIZE>& str_sv_;
179 size_type idx_;
180 mutable CharType buf_[MAX_STR_SIZE];
181 };
182
183 /**
184 Const iterator to do quick traverse of the sparse vector.
185
186 Implementation uses buffer for decoding so, competing changes
187 to the original vector may not match the iterator returned values.
188
189 This iterator keeps an operational buffer of transposed elements,
190 so memory footprint is not negligable.
191
192 @ingroup sv
193 */
194 class const_iterator
195 {
196 public:
197 friend class str_sparse_vector;
198 #ifndef BM_NO_STL
199 typedef std::input_iterator_tag iterator_category;
200 #endif
201 typedef str_sparse_vector<CharType, BV, MAX_STR_SIZE> str_sparse_vector_type;
202 typedef str_sparse_vector_type* str_sparse_vector_type_ptr;
203 typedef typename str_sparse_vector_type::value_type value_type;
204 typedef typename str_sparse_vector_type::size_type size_type;
205 typedef typename str_sparse_vector_type::bvector_type bvector_type;
206 typedef typename bvector_type::allocator_type allocator_type;
207 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
208
209 typedef long long difference_type;
210 typedef CharType* pointer;
211 typedef CharType*& reference;
212 public:
213 /**
214 Construct iterator (not attached to any particular vector)
215 */
216 const_iterator() BMNOEXCEPT;
217 /**
218 Construct iterator (attached to sparse vector)
219 @param sv - pointer to sparse vector
220 */
221 const_iterator(const str_sparse_vector_type* sv) BMNOEXCEPT;
222 /**
223 Construct iterator (attached to sparse vector) and positioned
224 @param sv - reference to sparse vector
225 @param pos - position in the vector to start
226 */
227 const_iterator(const str_sparse_vector_type* sv, size_type pos) BMNOEXCEPT;
228
229 const_iterator(const const_iterator& it) BMNOEXCEPT;
230
231 /**
232 setup iterator to retrieve a sub-string of a string
233 @param from - Position of the first character to be copied
234 @param len - length of a substring
235 */
236 void set_substr(unsigned from, unsigned len = MAX_STR_SIZE) BMNOEXCEPT;
237
238
239 bool operator==(const const_iterator& it) const BMNOEXCEPT
240 { return (pos_ == it.pos_) && (sv_ == it.sv_); }
241 bool operator!=(const const_iterator& it) const BMNOEXCEPT
242 { return ! operator==(it); }
243 bool operator < (const const_iterator& it) const BMNOEXCEPT
244 { return pos_ < it.pos_; }
245 bool operator <= (const const_iterator& it) const BMNOEXCEPT
246 { return pos_ <= it.pos_; }
247 bool operator > (const const_iterator& it) const BMNOEXCEPT
248 { return pos_ > it.pos_; }
249 bool operator >= (const const_iterator& it) const BMNOEXCEPT
250 { return pos_ >= it.pos_; }
251
252 /// \brief Get current position (value)
253 const value_type* operator*() const BMNOEXCEPT { return this->value(); }
254
255 /// \brief Advance to the next available value
256 const_iterator& operator++() BMNOEXCEPT
257 { this->advance(); return *this; }
258
259 /// \brief Advance to the next available value
260 const_iterator& operator++(int) BMNOEXCEPT
261 { const_iterator tmp(*this);this->advance(); return tmp; }
262
263
264 /// \brief Get current position (value)
265 const value_type* value() const BMNOEXCEPT;
266
267 /// \brief Get NULL status
is_null()268 bool is_null() const BMNOEXCEPT { return sv_->is_null(this->pos_); }
269
270 /// Returns true if iterator is at a valid position
valid()271 bool valid() const BMNOEXCEPT { return pos_ != bm::id_max; }
272
273 /// Invalidate current iterator
invalidate()274 void invalidate() BMNOEXCEPT { pos_ = bm::id_max; }
275
276 /// Current position (index) in the vector
pos()277 size_type pos() const BMNOEXCEPT { return pos_; }
278
279 /// re-position to a specified position
280 void go_to(size_type pos) BMNOEXCEPT;
281
282 /// advance iterator forward by one
283 void advance() BMNOEXCEPT;
284
285 protected:
286 enum buf_size_e
287 {
288 n_rows = 1024
289 };
290 typedef dynamic_heap_matrix<CharType, allocator_type> buffer_matrix_type;
291
292 private:
293 const str_sparse_vector_type* sv_; ///!< ptr to parent
294 unsigned substr_from_; ///!< substring from
295 unsigned substr_to_; ///!< substring to
296 mutable size_type pos_; ///!< Position
297 mutable buffer_matrix_type buf_matrix_; ///!< decode value buffer
298 mutable size_type pos_in_buf_; ///!< buffer position
299 };
300
301
302 /**
303 Back insert iterator implements buffered insert, faster than generic
304 access assignment.
305
306 Limitations for buffered inserter:
307 1. Do not use more than one inserter (into one vector) at the same time
308 2. Use method flush() at the end to send the rest of accumulated buffer
309 flush is happening automatically on destruction, but if flush produces an
310 exception (for whatever reason) it will be an exception in destructor.
311 As such, explicit flush() is safer way to finilize the sparse vector load.
312
313 @ingroup sv
314 */
315 class back_insert_iterator
316 {
317 public:
318 #ifndef BM_NO_STL
319 typedef std::output_iterator_tag iterator_category;
320 #endif
321 typedef str_sparse_vector<CharType, BV, MAX_STR_SIZE> str_sparse_vector_type;
322 typedef str_sparse_vector_type* str_sparse_vector_type_ptr;
323 typedef typename str_sparse_vector_type::value_type value_type;
324 typedef typename str_sparse_vector_type::size_type size_type;
325 typedef typename str_sparse_vector_type::bvector_type bvector_type;
326 typedef typename bvector_type::allocator_type allocator_type;
327 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
328
329 typedef void difference_type;
330 typedef void pointer;
331 typedef void reference;
332
333 public:
334 back_insert_iterator() BMNOEXCEPT;
335 back_insert_iterator(str_sparse_vector_type* sv) BMNOEXCEPT;
336 back_insert_iterator(const back_insert_iterator& bi) BMNOEXCEPT;
337
338 back_insert_iterator& operator=(const back_insert_iterator& bi)
339 {
340 BM_ASSERT(bi.empty());
341 this->flush(); sv_ = bi.sv_;
342 return *this;
343 }
344
345 ~back_insert_iterator();
346
347 /** push value to the vector */
348 back_insert_iterator& operator=(const value_type* v)
349 { this->add(v); return *this; }
350
351
352 /** push value to the vector */
353 template<typename StrType>
354 back_insert_iterator& operator=(const StrType& v)
355 {
356 this->add(v.c_str()); return *this; // TODO: avoid c_str()
357 }
358
359 /** noop */
360 back_insert_iterator& operator*() { return *this; }
361 /** noop */
362 back_insert_iterator& operator++() { return *this; }
363 /** noop */
364 back_insert_iterator& operator++( int ) { return *this; }
365
366 /** add value to the container*/
367 void add(const value_type* v);
368
369 /** add NULL (no-value) to the container */
370 void add_null();
371
372 /** add a series of consequitve NULLs (no-value) to the container */
373 void add_null(size_type count);
374
375 /** flush the accumulated buffer */
376 void flush();
377 protected:
378 /** return true if insertion buffer is empty */
379 bool empty() const BMNOEXCEPT;
380
381 typedef typename bvector_type::block_idx_type block_idx_type;
382
383 /** add value to the buffer without changing the NULL vector
384 @param v - value to push back
385 @internal
386 */
387 void add_value(const value_type* v);
388
389 private:
390 enum buf_size_e
391 {
392 n_buf_size = 1024 * 8
393 };
394
395 typedef bm::heap_matrix<CharType,
396 n_buf_size, // ROWS: number of strings in one batch
397 MAX_STR_SIZE, // COLS
398 allocator_type> buffer_matrix_type;
399
400 private:
401 str_sparse_vector_type* sv_; ///!< pointer on the parent vector
402 bvector_type* bv_null_; ///!< not NULL vector pointer
403 buffer_matrix_type buf_matrix_; ///!< value buffer
404 size_type pos_in_buf_; ///!< buffer position
405 block_idx_type prev_nb_; ///!< previous block added
406 };
407
408
409 public:
410
411 /*!
412 \brief Sparse vector constructor
413
414 \param null_able - defines if vector supports NULL values flag
415 by default it is OFF, use bm::use_null to enable it
416 \param ap - allocation strategy for underlying bit-vectors
417 Default allocation policy uses BM_BIT setting (fastest access)
418 \param bv_max_size - maximum possible size of underlying bit-vectors
419 Please note, this is NOT size of svector itself, it is dynamic upper limit
420 which should be used very carefully if we surely know the ultimate size
421 \param alloc - allocator for bit-vectors
422
423 \sa bvector<>
424 \sa bm::bvector<>::allocation_policy
425 \sa bm::startegy
426 */
427 str_sparse_vector(bm::null_support null_able = bm::no_null,
428 allocation_policy_type ap = allocation_policy_type(),
429 size_type bv_max_size = bm::id_max,
430 const allocator_type& alloc = allocator_type());
431
432 /*! copy-ctor */
433 str_sparse_vector(const str_sparse_vector& str_sv);
434
435 /*! copy assignmment operator */
436 str_sparse_vector<CharType, BV, MAX_STR_SIZE>& operator = (
437 const str_sparse_vector<CharType, BV, MAX_STR_SIZE>& str_sv)
438 {
439 if (this != &str_sv)
440 parent_type::copy_from(str_sv);
441 remap_flags_ = str_sv.remap_flags_;
442 remap_matrix1_ = str_sv.remap_matrix1_;
443 remap_matrix2_ = str_sv.remap_matrix2_;
444 return *this;
445 }
446 #ifndef BM_NO_CXX11
447 /*! move-ctor */
str_sparse_vector(str_sparse_vector<CharType,BV,MAX_STR_SIZE> && str_sv)448 str_sparse_vector(str_sparse_vector<CharType, BV, MAX_STR_SIZE>&& str_sv) BMNOEXCEPT
449 {
450 parent_type::swap(str_sv);
451 remap_flags_ = str_sv.remap_flags_;
452 remap_matrix1_.swap(str_sv.remap_matrix1_);
453 remap_matrix2_.swap(str_sv.remap_matrix2_);
454 }
455
456 /*! move assignmment operator */
457 str_sparse_vector<CharType, BV, MAX_STR_SIZE>& operator =
458 (str_sparse_vector<CharType, BV, MAX_STR_SIZE>&& str_sv) BMNOEXCEPT
459 {
460 if (this != &str_sv)
461 {
462 this->swap(str_sv);
463 }
464 return *this;
465 }
466 #endif
467
468 public:
469
470 // ------------------------------------------------------------
471 /*! @name String element access */
472 ///@{
473
474 /** \brief Operator to get read access to an element */
475 const const_reference operator[](size_type idx) const
476 { return const_reference(*this, idx); }
477
478 /** \brief Operator to get write access to an element */
479 reference operator[](size_type idx) { return reference(*this, idx); }
480
481 /*!
482 \brief set specified element with bounds checking and automatic resize
483 \param idx - element index (vector auto-resized if needs to)
484 \param str - string to set (zero terminated)
485 */
486 void set(size_type idx, const value_type* str);
487
488 /*!
489 \brief set NULL status for the specified element
490 Vector is resized automatically
491 \param idx - element index (vector auto-resized if needs to)
492 */
493 void set_null(size_type idx);
494
495
496 /*!
497 \brief insert the specified element
498 \param idx - element index (vector auto-resized if needs to)
499 \param str - string to set (zero terminated)
500 */
501 void insert(size_type idx, const value_type* str);
502
503
504 /*!
505 \brief insert STL string
506 \param idx - element index (vector auto-resized if needs to)
507 \param str - STL string to set
508 */
509 template<typename StrType>
insert(size_type idx,const StrType & str)510 void insert(size_type idx, const StrType& str)
511 {
512 this->insert(idx, str.c_str()); // TODO: avoid c_str()
513 }
514
515 /*!
516 \brief erase the specified element
517 \param idx - element index
518 */
519 void erase(size_type idx);
520
521 /*!
522 \brief get specified element
523
524 \param idx - element index
525 \param str - string buffer
526 \param buf_size - string buffer size
527
528 @return string length
529 */
530 size_type get(size_type idx,
531 value_type* str, size_type buf_size) const BMNOEXCEPT;
532
533 /*!
534 \brief set specified element with bounds checking and automatic resize
535
536 This is an equivalent of set() method, but templetized to be
537 more compatible with the STL std::string and the likes
538
539 \param idx - element index (vector auto-resized if needs to)
540 \param str - input string
541 expected an STL class with size() support,
542 like basic_string<> or vector<char>
543 */
544 template<typename StrType>
assign(size_type idx,const StrType & str)545 void assign(size_type idx, const StrType& str)
546 {
547 if (idx >= this->size())
548 this->size_ = idx+1;
549
550 size_type str_size = size_type(str.size());
551 size_type sz = size_type((str_size < MAX_STR_SIZE) ? str_size : MAX_STR_SIZE-1);
552 if (!sz)
553 {
554 this->clear_value_planes_from(0, idx);
555 return;
556 }
557 unsigned i = 0;
558 for (; i < sz; ++i)
559 {
560 CharType ch = str[i];
561 if (remap_flags_) // compressional re-mapping is in effect
562 {
563 unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
564 BM_ASSERT(remap_value);
565 ch = CharType(remap_value);
566 }
567 this->bmatr_.set_octet(idx, i, (unsigned char)ch);
568 if (!ch)
569 break;
570 } // for i
571 if (idx > sz)
572 return;
573 this->bmatr_.set_octet(idx, sz, 0);
574 this->clear_value_planes_from(unsigned(sz*8+1), idx);
575 }
576
577 /*!
578 \brief push back a string
579 \param str - string to set
580 (STL class with size() support, like basic_string)
581 */
582 template<typename StrType>
push_back(const StrType & str)583 void push_back(const StrType& str) { assign(this->size_, str); }
584
585 /*!
586 \brief push back a string (zero terminated)
587 \param str - string to set
588 */
push_back(const value_type * str)589 void push_back(const value_type* str) { set(this->size_, str); }
590
591
592 /*!
593 \brief get specified string element
594 Template method expects an STL-compatible type basic_string<>
595 \param idx - element index (vector auto-resized if needs to)
596 \param str - string to get [out]
597 */
598 template<typename StrType>
get(size_type idx,StrType & str)599 void get(size_type idx, StrType& str) const
600 {
601 str.clear();
602 for (unsigned i = 0; i < MAX_STR_SIZE; ++i)
603 {
604 CharType ch = CharType(this->bmatr_.get_octet(idx, i));
605 if (ch == 0)
606 break;
607 if (remap_flags_)
608 {
609 const unsigned char* remap_row = remap_matrix1_.row(i);
610 unsigned char remap_value = remap_row[unsigned(ch)];
611 BM_ASSERT(remap_value);
612 if (!remap_value) // unknown dictionary element
613 {
614 throw_bad_value(0);
615 break;
616 }
617 ch = CharType(remap_value);
618 }
619 str.push_back(ch);
620 } // for i
621 }
622
623 /*! Swap content */
624 void swap(str_sparse_vector& str_sv) BMNOEXCEPT;
625
626 ///@}
627
628 // ------------------------------------------------------------
629 /*! @name Element comparison functions */
630 ///@{
631
632 /**
633 \brief Compare vector element with argument lexicographically
634
635 NOTE: for a re-mapped container, input string may have no correct
636 remapping, in this case we have an ambiguity
637 (we know it is not equal (0) but LT or GT?).
638 Behavior is undefined.
639
640 \param idx - vactor element index
641 \param str - argument to compare with
642
643 \return 0 - equal, < 0 - vect[i] < str, >0 otherwise
644 */
645 int compare(size_type idx, const value_type* str) const BMNOEXCEPT;
646
647
648 /**
649 \brief Find size of common prefix between two vector elements in octets
650 \return size of common prefix
651 */
652 unsigned common_prefix_length(size_type idx1, size_type idx2) const BMNOEXCEPT;
653
654 ///@}
655
656
657 // ------------------------------------------------------------
658 /*! @name Clear */
659 ///@{
660
661 /*! \brief resize to zero, free memory */
662 void clear_all(bool free_mem) BMNOEXCEPT;
663
664 /*! \brief resize to zero, free memory */
clear()665 void clear() BMNOEXCEPT { clear_all(true); }
666
667 /*!
668 \brief clear range (assign bit 0 for all planes)
669 \param left - interval start
670 \param right - interval end (closed interval)
671 \param set_null - set cleared values to unassigned (NULL)
672 */
673 str_sparse_vector<CharType, BV, MAX_STR_SIZE>&
674 clear_range(size_type left, size_type right, bool set_null = false)
675 {
676 parent_type::clear_range(left, right, set_null);
677 return *this;
678 }
679
680
681 ///@}
682
683
684 // ------------------------------------------------------------
685 /*! @name Size, etc */
686 ///@{
687
688 /*! \brief return size of the vector
689 \return size of sparse vector
690 */
size()691 size_type size() const { return this->size_; }
692
693 /*! \brief return true if vector is empty
694 \return true if empty
695 */
empty()696 bool empty() const { return (size() == 0); }
697
698 /*! \brief resize vector
699 \param sz - new size
700 */
resize(size_type sz)701 void resize(size_type sz) { parent_type::resize(sz); }
702
703 /*! \brief get maximum string length capacity
704 \return maximum string length sparse vector can take
705 */
max_str()706 static size_type max_str() { return sv_octet_planes; }
707
708 /*! \brief get effective string length used in vector
709 Calculate and returns efficiency, how close are we
710 to the reserved maximum.
711 \return current string length maximum
712 */
713 size_type effective_max_str() const BMNOEXCEPT;
714
715 /*! \brief get effective string length used in vector
716 \return current string length maximum
717 */
effective_vector_max()718 size_type effective_vector_max() const { return effective_max_str(); }
719
720 /**
721 \brief recalculate size to exclude tail NULL elements
722 After this call size() will return the true size of the vector
723 */
724 void sync_size() BMNOEXCEPT;
725
726 ///@}
727
728
729 // ------------------------------------------------------------
730 /*! @name Memory optimization/compression */
731 ///@{
732
733 /*!
734 \brief run memory optimization for all vector planes
735 \param temp_block - pre-allocated memory block to avoid unnecessary re-allocs
736 \param opt_mode - requested compression depth
737 \param stat - memory allocation statistics after optimization
738 */
739 void optimize(
740 bm::word_t* temp_block = 0,
741 typename bvector_type::optmode opt_mode = bvector_type::opt_compress,
742 typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::statistics* stat = 0);
743
744 /*!
745 @brief Calculates memory statistics.
746
747 Function fills statistics structure containing information about how
748 this vector uses memory and estimation of max. amount of memory
749 bvector needs to serialize itself.
750
751 @param st - pointer on statistics structure to be filled in.
752
753 @sa statistics
754 */
755 void calc_stat(
756 struct str_sparse_vector<CharType, BV, MAX_STR_SIZE>::statistics* st
757 ) const BMNOEXCEPT;
758
759
760 ///@}
761
762 // ------------------------------------------------------------
763 /*! @name Iterator access */
764 ///@{
765
766 /** Provide const iterator access to container content */
767 const_iterator begin() const BMNOEXCEPT;
768
769 /** Provide const iterator access to the end */
end()770 const_iterator end() const BMNOEXCEPT { return const_iterator(this, bm::id_max); }
771
772 /** Get const_itertor re-positioned to specific element
773 @param idx - position in the sparse vector
774 */
get_const_iterator(size_type idx)775 const_iterator get_const_iterator(size_type idx) const BMNOEXCEPT
776 { return const_iterator(this, idx); }
777
778 /** Provide back insert iterator
779 Back insert iterator implements buffered insertion, which is faster, than random access
780 or push_back
781 */
get_back_inserter()782 back_insert_iterator get_back_inserter()
783 { return back_insert_iterator(this); }
784
785 ///@}
786
787
788
789 // ------------------------------------------------------------
790 /*! @name Various traits */
791 ///@{
792
793 /** \brief trait if sparse vector is "compressed" (false)
794 */
795 static
is_compressed()796 bool is_compressed() BMNOEXCEPT { return false; }
797
798 ///@}
799
800 // ------------------------------------------------------------
801 /*! @name Char remapping, succinct utilities
802
803 Remapping runs character usage analysis (frequency analysis)
804 based on that implements reduction of dit-depth thus improves
805 search performance and memory usage (both RAM and serialized).
806
807 Remapping limits farther modifications of sparse vector.
808 (Use remapped vector as read-only).
809 */
810
811 ///@{
812
813 /**
814 Get character remapping status (true|false)
815 */
is_remap()816 bool is_remap() const BMNOEXCEPT { return remap_flags_ != 0; }
817
818 /**
819 Build remapping profile and load content from another sparse vector
820 Remapped vector likely saves memory (both RAM and disk) but
821 should not be modified (should be read-only).
822
823 \param str_sv - source sparse vector (assumed it is not remapped)
824 */
825 void remap_from(const str_sparse_vector& str_sv);
826
827 /**
828 Build remapping profile and re-load content to save memory
829 */
830 void remap();
831
832 /*!
833 Calculate flags which octets are present on each byte-plane.
834 @internal
835 */
836 void calc_octet_stat(octet_freq_matrix_type& octet_matrix) const BMNOEXCEPT;
837
838 /*!
839 Compute optimal remap codes
840 @internal
841 */
842 static
843 void build_octet_remap(
844 plane_octet_matrix_type& octet_remap_matrix1,
845 plane_octet_matrix_type& octet_remap_matrix2,
846 octet_freq_matrix_type& octet_occupancy_matrix);
847 /*!
848 remap string from external (ASCII) system to matrix internal code
849 @return true if remapping was ok, false if found incorrect value
850 for the plane
851 @internal
852 */
853 static
854 bool remap_tosv(value_type* BMRESTRICT sv_str,
855 size_type buf_size,
856 const value_type* BMRESTRICT str,
857 const plane_octet_matrix_type& BMRESTRICT octet_remap_matrix2
858 ) BMNOEXCEPT;
859
860 /*!
861 remap string from external (ASCII) system to matrix internal code
862 @internal
863 */
remap_tosv(value_type * sv_str,size_type buf_size,const value_type * str)864 bool remap_tosv(value_type* sv_str,
865 size_type buf_size,
866 const value_type* str) const BMNOEXCEPT
867 {
868 return remap_tosv(sv_str, buf_size, str, remap_matrix2_);
869 }
870 /*!
871 remap string from internal code to external (ASCII) system
872 @return true if remapping was ok, false if found incorrect value
873 for the plane
874 @internal
875 */
876 static
877 bool remap_fromsv(
878 value_type* BMRESTRICT str,
879 size_type buf_size,
880 const value_type* BMRESTRICT sv_str,
881 const plane_octet_matrix_type& BMRESTRICT octet_remap_matrix1
882 ) BMNOEXCEPT;
883 /*!
884 re-calculate remap matrix2 based on matrix1
885 @internal
886 */
887 void recalc_remap_matrix2();
888
889 ///@}
890
891 // ------------------------------------------------------------
892 /*! @name Export content to C-style */
893 ///@{
894
895 /**
896 \brief Bulk export strings to a C-style matrix of chars
897
898 \param cmatr - dest matrix (bm::heap_matrix)
899 \param idx_from - index in the sparse vector to export from
900 \param dec_size - decoding size (matrix column allocation should match)
901 \param zero_mem - set to false if target array is pre-initialized
902 with 0s to avoid performance penalty
903
904 \return number of actually exported elements (can be less than requested)
905 */
906 template<typename CharMatrix>
907 size_type decode(CharMatrix& cmatr,
908 size_type idx_from,
909 size_type dec_size,
910 bool zero_mem = true) const
911 {
912 return decode_substr(cmatr, idx_from, dec_size,
913 0, MAX_STR_SIZE-1, zero_mem);
914 }
915
916 /**
917 \brief Bulk export strings to a C-style matrix of chars
918
919 \param cmatr - dest matrix (bm::heap_matrix)
920 \param idx_from - index in the sparse vector to export from
921 \param dec_size - decoding size (matrix column allocation should match)
922 \param substr_from - sub-string position from
923 \param substr_to - sub-string position to
924 \param zero_mem - set to false if target array is pre-initialized
925 with 0s to avoid performance penalty
926
927 \return number of actually exported elements (can be less than requested)
928 */
929 template<typename CharMatrix>
930 size_type decode_substr(CharMatrix& cmatr,
931 size_type idx_from,
932 size_type dec_size,
933 unsigned substr_from,
934 unsigned substr_to,
935 bool zero_mem = true) const
936 {
937
938 /// Decoder functor
939 /// @internal
940 ///
941 struct sv_decode_visitor_func
942 {
sv_decode_visitor_funcsv_decode_visitor_func943 sv_decode_visitor_func(CharMatrix& cmatr) BMNOEXCEPT2
944 : cmatr_(cmatr), mask_(0), sv_off_(0)
945 {}
946
add_bitssv_decode_visitor_func947 void add_bits(size_type bv_offset,
948 const unsigned char* BMRESTRICT bits, unsigned bits_size) BMNOEXCEPT
949 {
950 // can be negative (-1) when bv base offset = 0 and sv = 1,2..
951 size_type base = bv_offset - sv_off_;
952 value_type m = mask_;
953 const unsigned i = substr_i_;
954 for (unsigned j = 0; j < bits_size; ++j)
955 {
956 size_type idx = bits[j] + base;
957 value_type* BMRESTRICT str = cmatr_.row(idx);
958 str[i] |= m;
959 } // for i
960 }
add_rangesv_decode_visitor_func961 void add_range(size_type bv_offset, size_type sz) BMNOEXCEPT
962 {
963 auto base = bv_offset - sv_off_;
964 value_type m = mask_;
965 const unsigned i = substr_i_;
966 for (size_type j = 0; j < sz; ++j)
967 {
968 size_type idx = j + base;
969 value_type* BMRESTRICT str = cmatr_.row(idx);
970 str[i] |= m;
971 } // for i
972 }
973
974 CharMatrix& cmatr_; ///< target array for reverse transpose
975 value_type mask_; ///< bit-plane mask
976 unsigned substr_i_; ///< i
977 size_type sv_off_; ///< SV read offset
978 };
979
980
981 BM_ASSERT(substr_from <= substr_to);
982 BM_ASSERT(substr_from <= MAX_STR_SIZE);
983 BM_ASSERT(cmatr.is_init());
984
985 if (substr_to >= MAX_STR_SIZE)
986 substr_to = MAX_STR_SIZE-1;
987
988 if (zero_mem)
989 cmatr.set_zero(); // TODO: set zero based on requested capacity
990
991 size_type rows = size_type(cmatr.rows());
992 size_type max_sz = this->size() - idx_from;
993 if (max_sz < dec_size)
994 dec_size = max_sz;
995 if (rows < dec_size)
996 dec_size = rows;
997 if (!dec_size)
998 return dec_size;
999
1000 sv_decode_visitor_func func(cmatr);
1001
1002 for (unsigned i = substr_from; i <= substr_to; ++i)
1003 {
1004 unsigned bi = 0;
1005 func.substr_i_ = i - substr_from; // to put substr at the str[0]
1006
1007 for (unsigned k = i * 8; k < (i * 8) + 8; ++k, ++bi)
1008 {
1009 const bvector_type* bv = this->bmatr_.get_row(k);
1010 if (!bv)
1011 continue;
1012
1013 func.mask_ = value_type(1u << bi);
1014 func.sv_off_ = idx_from;
1015
1016 size_type end = idx_from + dec_size;
1017 bm::for_each_bit_range_no_check(*bv, idx_from, end-1, func);
1018
1019 } // for k
1020 } // for i
1021
1022 if (remap_flags_)
1023 {
1024 for (unsigned i = 0; i < dec_size; ++i)
1025 {
1026 typename CharMatrix::value_type* str = cmatr.row(i);
1027 remap_matrix1_.remapz(str);
1028 } // for i
1029 }
1030 return dec_size;
1031 }
1032
1033 /**
1034 \brief Bulk import of strings from a C-style matrix of chars
1035
1036 \param cmatr - source matrix (bm::heap_matrix)
1037 [in/out] parameter gets modified(corrupted)
1038 in the process
1039 \param idx_from - destination index in the sparse vector
1040 \param imp_size - import size (number or rows to import)
1041 */
1042 template<typename CharMatrix>
import(CharMatrix & cmatr,size_type idx_from,size_type imp_size)1043 void import(CharMatrix& cmatr, size_type idx_from, size_type imp_size)
1044 {
1045 if (!imp_size)
1046 return;
1047 if (idx_from < this->size_) // in case it touches existing elements
1048 {
1049 // clear all planes in the range to provide corrrect import of 0 values
1050 this->clear_range(idx_from, idx_from + imp_size - 1);
1051 }
1052 import_no_check(cmatr, idx_from, imp_size);
1053 }
1054
1055 /**
1056 \brief Bulk push-back import of strings from a C-style matrix of chars
1057
1058 \param cmatr - source matrix (bm::heap_matrix)
1059 [in/out] parameter gets modified(corrupted)
1060 in the process
1061 \param imp_size - import size (number or rows to import)
1062 */
1063 template<typename CharMatrix>
import_back(CharMatrix & cmatr,size_type imp_size)1064 void import_back(CharMatrix& cmatr, size_type imp_size)
1065 {
1066 if (!imp_size)
1067 return;
1068 import_no_check(cmatr, this->size(), imp_size);
1069 }
1070
1071
1072 ///@}
1073
1074 // ------------------------------------------------------------
1075 /*! @name Merge, split, partition data */
1076 ///@{
1077
1078 /**
1079 @brief copy range of values from another sparse vector
1080
1081 Copy [left..right] values from the source vector,
1082 clear everything outside the range.
1083
1084 \param sv - source vector
1085 \param left - index from in losed diapason of [left..right]
1086 \param right - index to in losed diapason of [left..right]
1087 \param splice_null - "use_null" copy range for NULL vector or
1088 do not copy it
1089 */
1090 void copy_range(const str_sparse_vector<CharType, BV, MAX_STR_SIZE>& sv,
1091 size_type left, size_type right,
1092 bm::null_support splice_null = bm::use_null);
1093
1094 /**
1095 \brief merge with another sparse vector using OR operation
1096 Merge is different from join(), because it borrows data from the source
1097 vector, so it gets modified (destructive join)
1098
1099 \param tr_sv - [in, out]argument vector to join with (vector mutates)
1100
1101 \return self reference
1102 */
1103 str_sparse_vector<CharType, BV, MAX_STR_SIZE>&
1104 merge(str_sparse_vector<CharType, BV, MAX_STR_SIZE>& str_sv);
1105
1106 /**
1107 Keep only specified interval in the sparse vector, clear all other
1108 elements.
1109
1110 \param left - interval start
1111 \param right - interval end (closed interval)
1112 \param splice_null - "use_null" copy range for NULL vector or not
1113 */
1114 void keep_range(size_type left, size_type right,
1115 bm::null_support splice_null = bm::use_null);
1116
1117 ///@}
1118
1119 // ------------------------------------------------------------
1120
1121 /*! \brief syncronize internal structures */
1122 void sync(bool force);
1123
1124 /*!
1125 \brief check if another sparse vector has the same content and size
1126
1127 \param sv - sparse vector for comparison
1128 \param null_able - flag to consider NULL vector in comparison (default)
1129 or compare only value content planes
1130
1131 \return true, if it is the same
1132 */
1133 bool equal(const str_sparse_vector<CharType, BV, MAX_STR_SIZE>& sv,
1134 bm::null_support null_able = bm::use_null) const BMNOEXCEPT;
1135
1136 /**
1137 \brief find position of compressed element by its rank
1138 */
1139 static
1140 bool find_rank(size_type rank, size_type& pos) BMNOEXCEPT;
1141
1142 /**
1143 \brief size of sparse vector (may be different for RSC)
1144 */
effective_size()1145 size_type effective_size() const BMNOEXCEPT { return size(); }
1146
1147 protected:
1148
1149 /// @internal
1150 template<typename CharMatrix>
1151 void import_no_check(CharMatrix& cmatr,
1152 size_type idx_from, size_type imp_size,
1153 bool set_not_null = true)
1154 {
1155 BM_ASSERT (cmatr.is_init());
1156
1157 unsigned max_str_size = 0;
1158 {
1159 for (unsigned j = 0; j < imp_size; ++j)
1160 {
1161 typename CharMatrix::value_type* str = cmatr.row(j);
1162 unsigned i;
1163 for (i = 0; i < MAX_STR_SIZE; ++i)
1164 {
1165 value_type ch = str[i];
1166 if (!ch)
1167 {
1168 max_str_size = (i > max_str_size) ? i : max_str_size;
1169 break;
1170 }
1171 if (remap_flags_) // re-mapping is in effect
1172 {
1173 unsigned char remap_value =
1174 remap_matrix2_.get(i, (unsigned char)(ch));
1175 BM_ASSERT(remap_value);
1176 if (!remap_value) // unknown dictionary element
1177 throw_bad_value(0);
1178 str[i] = CharType(remap_value);
1179 }
1180 } // for i
1181 if (i == MAX_STR_SIZE)
1182 max_str_size = i;
1183 } // for j
1184 }
1185
1186 size_type bit_list[CharMatrix::n_rows];
1187 for (unsigned i = 0; i < max_str_size; ++i)
1188 {
1189 for (unsigned bi = 0; bi < 8; ++bi)
1190 {
1191 unsigned n_bits = 0;
1192 value_type mask = value_type(1u << bi);
1193 for (size_type j = 0; j < imp_size; ++j)
1194 {
1195 typename CharMatrix::value_type* str = cmatr.row(j);
1196 value_type ch = str[i];
1197 if (!ch)
1198 continue;
1199 if (ch & mask)
1200 {
1201 bit_list[n_bits++] = idx_from + j;
1202 str[i] ^= mask;
1203 }
1204 } // for j
1205 if (n_bits) // set transposed bits to the target plane
1206 {
1207 unsigned plane = i*8 + bi;
1208 bvector_type* bv = this->bmatr_.get_row(plane);
1209 if (!bv)
1210 {
1211 bv = this->bmatr_.construct_row(plane);
1212 bv->init();
1213 }
1214 bv->set(&bit_list[0], n_bits, BM_SORTED);
1215 }
1216 } // for k
1217 } // for i
1218
1219 size_type idx_to = idx_from + imp_size - 1;
1220 if (set_not_null)
1221 {
1222 bvector_type* bv_null = this->get_null_bvect();
1223 if (bv_null)
1224 bv_null->set_range(idx_from, idx_to);
1225 }
1226 if (idx_to >= this->size())
1227 this->size_ = idx_to+1;
1228
1229 }
1230
1231 // ------------------------------------------------------------
1232 /*! @name Errors and exceptions */
1233 ///@{
1234
1235 /**
1236 \brief throw range error
1237 \internal
1238 */
1239 static
1240 void throw_range_error(const char* err_msg);
1241
1242 /**
1243 \brief throw domain error
1244 \internal
1245 */
1246 static
1247 void throw_bad_value(const char* err_msg);
1248
1249 ///@}
1250
1251 /*! \brief set value without checking boundaries */
1252 void set_value(size_type idx, const value_type* str);
1253
1254 /*! \brief set value without checking boundaries or support of NULL */
1255 void set_value_no_null(size_type idx, const value_type* str);
1256
1257 /*! \brief insert value without checking boundaries */
1258 void insert_value(size_type idx, const value_type* str);
1259
1260 /*! \brief insert value without checking boundaries or support of NULL */
1261 void insert_value_no_null(size_type idx, const value_type* str);
1262
1263
size_internal()1264 size_type size_internal() const { return size(); }
resize_internal(size_type sz)1265 void resize_internal(size_type sz) { resize(sz); }
1266
remap_size()1267 size_t remap_size() const { return remap_matrix1_.get_buffer().size(); }
get_remap_buffer()1268 const unsigned char* get_remap_buffer() const
1269 { return remap_matrix1_.get_buffer().buf(); }
init_remap_buffer()1270 unsigned char* init_remap_buffer()
1271 {
1272 remap_matrix1_.init(true);
1273 return remap_matrix1_.get_buffer().data();
1274 }
set_remap()1275 void set_remap() { remap_flags_ = 1; }
1276
1277 protected:
1278
resolve_range(size_type from,size_type to,size_type * idx_from,size_type * idx_to)1279 bool resolve_range(size_type from, size_type to,
1280 size_type* idx_from, size_type* idx_to) const
1281 {
1282 *idx_from = from; *idx_to = to; return true;
1283 }
1284
get_remap_matrix()1285 const remap_matrix_type* get_remap_matrix() const
1286 { return &remap_matrix1_; }
get_remap_matrix()1287 remap_matrix_type* get_remap_matrix()
1288 { return &remap_matrix1_; }
1289
1290 protected:
1291 template<class SVect> friend class sparse_vector_serializer;
1292 template<class SVect> friend class sparse_vector_deserializer;
1293
1294 protected:
1295 unsigned remap_flags_; ///< remapping status
1296 plane_octet_matrix_type remap_matrix1_; ///< octet remap table 1
1297 plane_octet_matrix_type remap_matrix2_; ///< octet remap table 2
1298 };
1299
1300 //---------------------------------------------------------------------
1301 //---------------------------------------------------------------------
1302
1303
1304 template<class CharType, class BV, unsigned MAX_STR_SIZE>
str_sparse_vector(bm::null_support null_able,allocation_policy_type ap,size_type bv_max_size,const allocator_type & alloc)1305 str_sparse_vector<CharType, BV, MAX_STR_SIZE>::str_sparse_vector(
1306 bm::null_support null_able,
1307 allocation_policy_type ap,
1308 size_type bv_max_size,
1309 const allocator_type& alloc)
1310 : parent_type(null_able, ap, bv_max_size, alloc),
1311 remap_flags_(0)
1312 {}
1313
1314
1315 //---------------------------------------------------------------------
1316
1317 template<class CharType, class BV, unsigned MAX_STR_SIZE>
str_sparse_vector(const str_sparse_vector & str_sv)1318 str_sparse_vector<CharType, BV, MAX_STR_SIZE>::str_sparse_vector(
1319 const str_sparse_vector& str_sv)
1320 : parent_type(str_sv),
1321 remap_flags_(str_sv.remap_flags_),
1322 remap_matrix1_(str_sv.remap_matrix1_),
1323 remap_matrix2_(str_sv.remap_matrix2_)
1324 {}
1325
1326 //---------------------------------------------------------------------
1327
1328 template<class CharType, class BV, unsigned MAX_STR_SIZE>
swap(str_sparse_vector & str_sv)1329 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::swap(
1330 str_sparse_vector& str_sv) BMNOEXCEPT
1331 {
1332 parent_type::swap(str_sv);
1333 bm::xor_swap(remap_flags_, str_sv.remap_flags_);
1334 remap_matrix1_.swap(str_sv.remap_matrix1_);
1335 remap_matrix2_.swap(str_sv.remap_matrix2_);
1336 }
1337
1338 //---------------------------------------------------------------------
1339
1340 template<class CharType, class BV, unsigned MAX_STR_SIZE>
set(size_type idx,const value_type * str)1341 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::set(
1342 size_type idx, const value_type* str)
1343 {
1344 if (idx >= this->size())
1345 this->size_ = idx+1;
1346 set_value(idx, str);
1347 }
1348
1349 //---------------------------------------------------------------------
1350
1351 template<class CharType, class BV, unsigned MAX_STR_SIZE>
insert(size_type idx,const value_type * str)1352 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::insert(
1353 size_type idx, const value_type* str)
1354 {
1355 if (idx >= this->size())
1356 {
1357 this->size_ = idx+1;
1358 set_value(idx, str);
1359 return;
1360 }
1361 insert_value(idx, str);
1362 this->size_++;
1363 }
1364
1365 //---------------------------------------------------------------------
1366
1367 template<class CharType, class BV, unsigned MAX_STR_SIZE>
erase(size_type idx)1368 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::erase(size_type idx)
1369 {
1370 BM_ASSERT(idx < this->size_);
1371 if (idx >= this->size_)
1372 return;
1373 this->erase_column(idx);
1374 this->size_--;
1375 }
1376
1377 //---------------------------------------------------------------------
1378
1379 template<class CharType, class BV, unsigned MAX_STR_SIZE>
set_null(size_type idx)1380 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::set_null(size_type idx)
1381 {
1382 bvector_type* bv_null = this->get_null_bvect();
1383 if (bv_null)
1384 bv_null->clear_bit_no_check(idx);
1385 if (idx >= this->size_)
1386 {
1387 this->size_ = idx + 1;
1388 }
1389 }
1390
1391 //---------------------------------------------------------------------
1392
1393 template<class CharType, class BV, unsigned MAX_STR_SIZE>
set_value(size_type idx,const value_type * str)1394 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::set_value(
1395 size_type idx, const value_type* str)
1396 {
1397 set_value_no_null(idx, str);
1398 bvector_type* bv_null = this->get_null_bvect();
1399 if (bv_null)
1400 bv_null->set_bit_no_check(idx);
1401 }
1402
1403 //---------------------------------------------------------------------
1404
1405 template<class CharType, class BV, unsigned MAX_STR_SIZE>
set_value_no_null(size_type idx,const value_type * str)1406 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::set_value_no_null(
1407 size_type idx, const value_type* str)
1408 {
1409 for (unsigned i = 0; i < MAX_STR_SIZE; ++i)
1410 {
1411 CharType ch = str[i];
1412 if (!ch)
1413 {
1414 this->clear_value_planes_from(i*8, idx);
1415 return;
1416 }
1417
1418 if (remap_flags_) // compressional re-mapping is in effect
1419 {
1420 unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
1421 BM_ASSERT(remap_value);
1422 if (!remap_value) // unknown dictionary element
1423 {
1424 this->clear_value_planes_from(i*8, idx);
1425 return;
1426 }
1427 ch = CharType(remap_value);
1428 }
1429 this->bmatr_.set_octet(idx, i, (unsigned char)ch);
1430 } // for i
1431 }
1432
1433 //---------------------------------------------------------------------
1434
1435 template<class CharType, class BV, unsigned MAX_STR_SIZE>
insert_value(size_type idx,const value_type * str)1436 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::insert_value(
1437 size_type idx, const value_type* str)
1438 {
1439 insert_value_no_null(idx, str);
1440 this->insert_null(idx, true);
1441 }
1442
1443 //---------------------------------------------------------------------
1444
1445 template<class CharType, class BV, unsigned MAX_STR_SIZE>
insert_value_no_null(size_type idx,const value_type * str)1446 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::insert_value_no_null(
1447 size_type idx, const value_type* str)
1448 {
1449 for (unsigned i = 0; i < MAX_STR_SIZE; ++i)
1450 {
1451 CharType ch = str[i];
1452 if (!ch)
1453 {
1454 this->insert_clear_value_planes_from(i*8, idx);
1455 return;
1456 }
1457
1458 if (remap_flags_) // compressional re-mapping is in effect
1459 {
1460 unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
1461 BM_ASSERT(remap_value);
1462 if (!remap_value) // unknown dictionary element
1463 {
1464 this->insert_clear_value_planes_from(i*8, idx);
1465 return;
1466 }
1467 ch = CharType(remap_value);
1468 }
1469 this->bmatr_.insert_octet(idx, i, (unsigned char)ch);
1470 } // for i
1471 }
1472
1473
1474 //---------------------------------------------------------------------
1475
1476 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1477 typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::size_type
get(size_type idx,value_type * str,size_type buf_size)1478 str_sparse_vector<CharType, BV, MAX_STR_SIZE>::get(
1479 size_type idx, value_type* str, size_type buf_size) const BMNOEXCEPT
1480 {
1481 size_type i = 0;
1482 for (; i < MAX_STR_SIZE; ++i)
1483 {
1484 if (i < buf_size)
1485 str[i] = 0;
1486 else
1487 break;
1488 CharType ch = CharType(this->bmatr_.get_octet(idx, i));
1489 if (ch == 0)
1490 {
1491 str[i] = 0;
1492 break;
1493 }
1494 str[i] = ch;
1495 } // for i
1496 if (remap_flags_)
1497 {
1498 remap_matrix1_.remap(str, i);
1499 }
1500 return i;
1501 }
1502
1503 //---------------------------------------------------------------------
1504
1505 template<class CharType, class BV, unsigned MAX_STR_SIZE>
optimize(bm::word_t * temp_block,typename bvector_type::optmode opt_mode,typename str_sparse_vector<CharType,BV,MAX_STR_SIZE>::statistics * st)1506 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::optimize(
1507 bm::word_t* temp_block,
1508 typename bvector_type::optmode opt_mode,
1509 typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::statistics* st)
1510 {
1511 typename bvector_type::statistics stbv;
1512 parent_type::optimize(temp_block, opt_mode, &stbv);
1513
1514 if (st)
1515 st->add(stbv);
1516 }
1517
1518 //---------------------------------------------------------------------
1519
1520 template<class CharType, class BV, unsigned MAX_STR_SIZE>
calc_stat(struct str_sparse_vector<CharType,BV,MAX_STR_SIZE>::statistics * st)1521 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::calc_stat(
1522 struct str_sparse_vector<CharType, BV, MAX_STR_SIZE>::statistics* st
1523 ) const BMNOEXCEPT
1524 {
1525 BM_ASSERT(st);
1526 typename bvector_type::statistics stbv;
1527 parent_type::calc_stat(&stbv);
1528
1529 st->reset();
1530
1531 st->bit_blocks += stbv.bit_blocks;
1532 st->gap_blocks += stbv.gap_blocks;
1533 st->ptr_sub_blocks += stbv.ptr_sub_blocks;
1534 st->bv_count += stbv.bv_count;
1535 st->max_serialize_mem += stbv.max_serialize_mem + 8;
1536 st->memory_used += stbv.memory_used;
1537 st->gap_cap_overhead += stbv.gap_cap_overhead;
1538
1539 size_t remap_mem_usage = sizeof(remap_flags_);
1540 remap_mem_usage += remap_matrix1_.get_buffer().mem_usage();
1541 remap_mem_usage += remap_matrix2_.get_buffer().mem_usage();
1542
1543 st->memory_used += remap_mem_usage;
1544 if (remap_flags_) // use of remapping requires some extra storage
1545 {
1546 st->max_serialize_mem += (remap_mem_usage * 2);
1547 }
1548 }
1549
1550 //---------------------------------------------------------------------
1551
1552 template<class CharType, class BV, unsigned MAX_STR_SIZE>
compare(size_type idx,const value_type * str)1553 int str_sparse_vector<CharType, BV, MAX_STR_SIZE>::compare(
1554 size_type idx,
1555 const value_type* str) const BMNOEXCEPT
1556 {
1557 BM_ASSERT(str);
1558 int res = 0;
1559 if (remap_flags_)
1560 {
1561 for (unsigned i = 0; i < MAX_STR_SIZE; ++i)
1562 {
1563 CharType octet = str[i];
1564 CharType sv_octet = (CharType)this->bmatr_.get_octet(idx, i);
1565 if (!sv_octet)
1566 {
1567 res = -octet; // -1 || 0
1568 break;
1569 }
1570 const unsigned char* remap_row = remap_matrix1_.row(i);
1571 unsigned char remap_value = remap_row[unsigned(sv_octet)];
1572 BM_ASSERT(remap_value);
1573 res = (remap_value > octet) - (remap_value < octet);
1574 if (res || !octet)
1575 break;
1576 } // for i
1577 }
1578 else
1579 {
1580 for (unsigned i = 0; i < MAX_STR_SIZE; ++i)
1581 {
1582 CharType octet = str[i];
1583 CharType sv_octet = (CharType)this->bmatr_.get_octet(idx, i);
1584 if (!sv_octet)
1585 {
1586 res = -octet; // -1 || 0
1587 break;
1588 }
1589 res = (sv_octet > octet) - (sv_octet < octet);
1590 if (res || !octet)
1591 break;
1592 } // for i
1593 }
1594 return res;
1595 }
1596
1597 //---------------------------------------------------------------------
1598
1599 template<class CharType, class BV, unsigned MAX_STR_SIZE>
common_prefix_length(size_type idx1,size_type idx2)1600 unsigned str_sparse_vector<CharType, BV, MAX_STR_SIZE>::common_prefix_length(
1601 size_type idx1, size_type idx2) const BMNOEXCEPT
1602 {
1603 unsigned i = 0;
1604 for (; i < MAX_STR_SIZE; ++i)
1605 {
1606 CharType ch1 = CharType(this->bmatr_.get_octet(idx1, i));
1607 CharType ch2 = CharType(this->bmatr_.get_octet(idx2, i));
1608 if (!ch1 || !ch2)
1609 {
1610 if (i)
1611 --i;
1612 break;
1613 }
1614 if (ch1 != ch2)
1615 {
1616 break;
1617 }
1618 } // for
1619
1620 return i;
1621 }
1622
1623 //---------------------------------------------------------------------
1624
1625 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1626 bool
find_rank(size_type rank,size_type & pos)1627 str_sparse_vector<CharType, BV, MAX_STR_SIZE>::find_rank(
1628 size_type rank,
1629 size_type& pos) BMNOEXCEPT
1630 {
1631 BM_ASSERT(rank);
1632 pos = rank - 1;
1633 return true;
1634 }
1635
1636 //---------------------------------------------------------------------
1637
1638 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1639 typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::size_type
effective_max_str()1640 str_sparse_vector<CharType, BV, MAX_STR_SIZE>::effective_max_str()
1641 const BMNOEXCEPT
1642 {
1643 for (int i = MAX_STR_SIZE-1; i >= 0; --i)
1644 {
1645 unsigned octet_plane = unsigned(i) * unsigned(sizeof(CharType)) * 8;
1646 for (unsigned j = 0; j < sizeof(CharType) * 8; ++j)
1647 {
1648 if (this->bmatr_.row(octet_plane+j))
1649 return unsigned(i)+1;
1650 } // for j
1651 } // for i
1652 return 0;
1653 }
1654
1655 //---------------------------------------------------------------------
1656
1657 template<class CharType, class BV, unsigned MAX_STR_SIZE>
calc_octet_stat(octet_freq_matrix_type & octet_matrix)1658 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::calc_octet_stat(
1659 octet_freq_matrix_type& octet_matrix) const BMNOEXCEPT
1660 {
1661 octet_matrix.init(true); // init and set zero
1662
1663 const_iterator it(this);
1664 for(; it.valid(); ++it)
1665 {
1666 const value_type* s = *it; // get asciiz char*
1667 for (unsigned i = 0; true; ++i) // for each char in str
1668 {
1669 value_type ch = s[i];
1670 if (!ch)
1671 break;
1672 typename octet_freq_matrix_type::value_type* row =
1673 octet_matrix.row(i);
1674 unsigned ch_idx = (unsigned char)ch;
1675 row[ch_idx] += 1;
1676 } // for i
1677 } // for it
1678
1679 }
1680
1681 //---------------------------------------------------------------------
1682
1683 template<class CharType, class BV, unsigned MAX_STR_SIZE>
build_octet_remap(plane_octet_matrix_type & octet_remap_matrix1,plane_octet_matrix_type & octet_remap_matrix2,octet_freq_matrix_type & octet_occupancy_matrix)1684 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::build_octet_remap(
1685 plane_octet_matrix_type& octet_remap_matrix1,
1686 plane_octet_matrix_type& octet_remap_matrix2,
1687 octet_freq_matrix_type& octet_occupancy_matrix)
1688 {
1689 octet_remap_matrix1.init(true); // init and set-zero
1690 octet_remap_matrix2.init(true);
1691
1692 for (unsigned i = 0; i < octet_occupancy_matrix.rows(); ++i)
1693 {
1694 typename octet_freq_matrix_type::value_type* frq_row =
1695 octet_occupancy_matrix.row(i);
1696
1697 unsigned char* remap_row1 = octet_remap_matrix1.row(i);
1698 unsigned char* remap_row2 = octet_remap_matrix2.row(i);
1699
1700 const typename plane_octet_matrix_type::size_type row_size =
1701 octet_occupancy_matrix.cols();
1702 for (unsigned remap_code = 1; true; ++remap_code)
1703 {
1704 typename octet_freq_matrix_type::size_type char_idx;
1705 bool found = bm::find_max_nz(frq_row, row_size, &char_idx);
1706 #if 0
1707 bool found = bm::find_first_nz(frq_row, row_size, &char_idx);
1708 #endif
1709 if (!found)
1710 break;
1711 BM_ASSERT(char_idx);
1712 unsigned char ch = (unsigned char)char_idx;
1713 remap_row1[remap_code] = ch;
1714 remap_row2[ch] = (unsigned char)remap_code;
1715 frq_row[char_idx] = 0; // clear the picked char
1716 } // for
1717 } // for i
1718 }
1719
1720 //---------------------------------------------------------------------
1721
1722 template<class CharType, class BV, unsigned MAX_STR_SIZE>
recalc_remap_matrix2()1723 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::recalc_remap_matrix2()
1724 {
1725 BM_ASSERT(remap_flags_);
1726
1727 remap_matrix2_.init(true);
1728
1729 for (unsigned i = 0; i < remap_matrix1_.rows(); ++i)
1730 {
1731 const unsigned char* remap_row1 = remap_matrix1_.row(i);
1732 unsigned char* remap_row2 = remap_matrix2_.row(i);
1733 for (unsigned j = 1; j < remap_matrix1_.cols(); ++j)
1734 {
1735 if (remap_row1[j])
1736 {
1737 unsigned ch_code = remap_row1[j];
1738 remap_row2[ch_code] = (unsigned char)j;
1739 BM_ASSERT(ch_code < 256);
1740 }
1741 } // for j
1742 } // for i
1743 }
1744
1745 //---------------------------------------------------------------------
1746
1747 template<class CharType, class BV, unsigned MAX_STR_SIZE>
remap_tosv(value_type * BMRESTRICT sv_str,size_type buf_size,const value_type * BMRESTRICT str,const plane_octet_matrix_type & BMRESTRICT octet_remap_matrix2)1748 bool str_sparse_vector<CharType, BV, MAX_STR_SIZE>::remap_tosv(
1749 value_type* BMRESTRICT sv_str,
1750 size_type buf_size,
1751 const value_type* BMRESTRICT str,
1752 const plane_octet_matrix_type& BMRESTRICT octet_remap_matrix2) BMNOEXCEPT
1753 {
1754 for (unsigned i = 0; i < buf_size; ++i)
1755 {
1756 CharType ch = str[i];
1757 if (!ch)
1758 {
1759 sv_str[i] = ch;
1760 break;
1761 }
1762 const unsigned char* remap_row = octet_remap_matrix2.row(i);
1763 unsigned char remap_value = remap_row[unsigned(ch)];
1764 if (!remap_value) // unknown dictionary element
1765 {
1766 return false;
1767 }
1768 sv_str[i] = CharType(remap_value);
1769 } // for i
1770 return true;
1771 }
1772
1773 //---------------------------------------------------------------------
1774
1775 template<class CharType, class BV, unsigned MAX_STR_SIZE>
remap_fromsv(value_type * BMRESTRICT str,size_type buf_size,const value_type * BMRESTRICT sv_str,const plane_octet_matrix_type & BMRESTRICT octet_remap_matrix1)1776 bool str_sparse_vector<CharType, BV, MAX_STR_SIZE>::remap_fromsv(
1777 value_type* BMRESTRICT str,
1778 size_type buf_size,
1779 const value_type* BMRESTRICT sv_str,
1780 const plane_octet_matrix_type& BMRESTRICT octet_remap_matrix1
1781 ) BMNOEXCEPT
1782 {
1783 for (unsigned i = 0; i < buf_size; ++i)
1784 {
1785 CharType ch = sv_str[i];
1786 if (!ch)
1787 {
1788 str[i] = ch;
1789 break;
1790 }
1791 const unsigned char* remap_row = octet_remap_matrix1.row(i);
1792 unsigned char remap_value = remap_row[unsigned(ch)];
1793 if (!remap_value) // unknown dictionary element
1794 {
1795 return false;
1796 }
1797 str[i] = CharType(remap_value);
1798 } // for i
1799 return true;
1800 }
1801
1802 //---------------------------------------------------------------------
1803
1804 template<class CharType, class BV, unsigned MAX_STR_SIZE>
remap()1805 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::remap()
1806 {
1807 // TODO: get rid of tmp, implement a move-remapping
1808 str_sparse_vector<CharType, BV, MAX_STR_SIZE>
1809 sv_tmp(this->get_null_support());
1810 sv_tmp.remap_from(*this);
1811 sv_tmp.swap(*this);
1812 }
1813
1814 //---------------------------------------------------------------------
1815
1816 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1817 void
remap_from(const str_sparse_vector & str_sv)1818 str_sparse_vector<CharType, BV, MAX_STR_SIZE>::remap_from(
1819 const str_sparse_vector& str_sv)
1820 {
1821 if (str_sv.is_remap())
1822 {
1823 *this = str_sv;
1824 return;
1825 }
1826 this->clear_all(true);
1827 if (str_sv.empty()) // no content to remap
1828 {
1829 return;
1830 }
1831
1832 octet_freq_matrix_type omatrix; // occupancy map
1833 str_sv.calc_octet_stat(omatrix);
1834
1835 str_sv.build_octet_remap(remap_matrix1_, remap_matrix2_, omatrix);
1836 remap_flags_ = 1; // turn ON remapped mode
1837
1838 const unsigned buffer_size = 1024 * 8;
1839
1840 typedef bm::heap_matrix<CharType,
1841 buffer_size, // ROWS: number of strings in one batch
1842 MAX_STR_SIZE, // COLS
1843 allocator_type> remap_buffer_type;
1844
1845 remap_buffer_type cmatr(true);
1846 for (size_type i = 0; true; )
1847 {
1848 size_type dsize = str_sv.decode(cmatr, i, buffer_size, true);
1849 if (!dsize)
1850 break;
1851 this->import(cmatr, i, dsize);
1852 i += dsize;
1853 } // for i
1854
1855 bvector_type* bv_null = this->get_null_bvect();
1856 if (bv_null)
1857 {
1858 const bvector_type* bv_null_arg = str_sv.get_null_bvector();
1859 if (bv_null_arg)
1860 *bv_null = *bv_null_arg;
1861 else
1862 {
1863 // TODO: exception? assert? maybe it is OK...
1864 }
1865 }
1866
1867 }
1868
1869 //---------------------------------------------------------------------
1870
1871 template<class CharType, class BV, unsigned MAX_STR_SIZE>
sync(bool)1872 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::sync(bool /*force*/)
1873 {
1874 if (remap_flags_)
1875 {
1876 recalc_remap_matrix2();
1877 }
1878 }
1879
1880 //---------------------------------------------------------------------
1881
1882 template<class CharType, class BV, unsigned MAX_STR_SIZE>
equal(const str_sparse_vector<CharType,BV,MAX_STR_SIZE> & sv,bm::null_support null_able)1883 bool str_sparse_vector<CharType, BV, MAX_STR_SIZE>::equal(
1884 const str_sparse_vector<CharType, BV, MAX_STR_SIZE>& sv,
1885 bm::null_support null_able) const BMNOEXCEPT
1886 {
1887 // at this point both vectors should have the same remap settings
1888 // to be considered "equal".
1889 // Strictly speaking this is incorrect, because re-map and non-remap
1890 // vectors may have the same content
1891
1892 if (remap_flags_ != sv.remap_flags_)
1893 return false;
1894 if (remap_flags_)
1895 {
1896 bool b;
1897 b = remap_matrix1_.get_buffer().equal(sv.remap_matrix1_.get_buffer());
1898 if (!b)
1899 return b;
1900 b = remap_matrix2_.get_buffer().equal(sv.remap_matrix2_.get_buffer());
1901 if (!b)
1902 return b;
1903 }
1904 return parent_type::equal(sv, null_able);
1905 }
1906
1907 //---------------------------------------------------------------------
1908
1909 template<class CharType, class BV, unsigned MAX_STR_SIZE>
copy_range(const str_sparse_vector<CharType,BV,MAX_STR_SIZE> & sv,size_type left,size_type right,bm::null_support splice_null)1910 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::copy_range(
1911 const str_sparse_vector<CharType, BV, MAX_STR_SIZE>& sv,
1912 size_type left, size_type right,
1913 bm::null_support splice_null)
1914 {
1915 if (left > right)
1916 bm::xor_swap(left, right);
1917 this->clear_all(true);
1918
1919 remap_flags_ = sv.remap_flags_;
1920 remap_matrix1_ = sv.remap_matrix1_;
1921 remap_matrix2_ = sv.remap_matrix2_;
1922
1923 this->copy_range_planes(sv, left, right, splice_null);
1924 this->resize(sv.size());
1925 }
1926
1927 //---------------------------------------------------------------------
1928
1929 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1930 str_sparse_vector<CharType, BV, MAX_STR_SIZE>&
merge(str_sparse_vector<CharType,BV,MAX_STR_SIZE> & str_sv)1931 str_sparse_vector<CharType, BV, MAX_STR_SIZE>::merge(str_sparse_vector<CharType, BV, MAX_STR_SIZE>& str_sv)
1932 {
1933 size_type arg_size = str_sv.size();
1934 if (this->size_ < arg_size)
1935 resize(arg_size);
1936
1937 // there is an assumption here that we only need to copy remap flags once
1938 // because we merge matrices with the same remaps
1939 // otherwise - undefined behavior
1940 //
1941 if (remap_flags_ != str_sv.remap_flags_)
1942 {
1943 remap_flags_ = str_sv.remap_flags_;
1944 remap_matrix1_ = str_sv.remap_matrix1_;
1945 remap_matrix2_ = str_sv.remap_matrix2_;
1946 }
1947
1948 bvector_type* bv_null = this->get_null_bvect();
1949 unsigned planes = bv_null ? this->stored_planes() : this->planes();
1950
1951 this->merge_matr(str_sv.bmatr_, planes);
1952
1953 // our vector is NULL-able but argument is not (assumed all values are real)
1954 if (bv_null && !str_sv.is_nullable())
1955 bv_null->set_range(0, arg_size-1);
1956
1957 return *this;
1958
1959 }
1960
1961 //---------------------------------------------------------------------
1962
1963 template<class CharType, class BV, unsigned MAX_STR_SIZE>
keep_range(size_type left,size_type right,bm::null_support splice_null)1964 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::keep_range(
1965 size_type left, size_type right,
1966 bm::null_support splice_null)
1967 {
1968 if (right < left)
1969 bm::xor_swap(left, right);
1970 this->keep_range_no_check(left, right, splice_null);
1971 }
1972
1973 //---------------------------------------------------------------------
1974
1975 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1976 typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator
begin()1977 str_sparse_vector<CharType, BV, MAX_STR_SIZE>::begin() const BMNOEXCEPT
1978 {
1979 typedef typename
1980 str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator it_type;
1981 return it_type(this);
1982 }
1983
1984 //---------------------------------------------------------------------
1985
1986 template<class CharType, class BV, unsigned MAX_STR_SIZE>
clear_all(bool free_mem)1987 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::clear_all(bool free_mem) BMNOEXCEPT
1988 {
1989 parent_type::clear_all(free_mem);
1990 }
1991
1992 //---------------------------------------------------------------------
1993
1994 template<class CharType, class BV, unsigned MAX_STR_SIZE>
throw_range_error(const char * err_msg)1995 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::throw_range_error(
1996 const char* err_msg)
1997 {
1998 #ifndef BM_NO_STL
1999 throw std::range_error(err_msg);
2000 #else
2001 BM_ASSERT_THROW(false, BM_ERR_RANGE);
2002 #endif
2003 }
2004
2005 //---------------------------------------------------------------------
2006
2007 template<class CharType, class BV, unsigned MAX_STR_SIZE>
throw_bad_value(const char * err_msg)2008 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::throw_bad_value(
2009 const char* err_msg)
2010 {
2011 #ifndef BM_NO_STL
2012 if (!err_msg)
2013 err_msg = "Unknown/incomparable dictionary character";
2014 throw std::domain_error(err_msg);
2015 #else
2016 BM_ASSERT_THROW(false, BM_BAD_VALUE);
2017 #endif
2018 }
2019
2020
2021 //---------------------------------------------------------------------
2022 //
2023 //---------------------------------------------------------------------
2024
2025
2026 template<class CharType, class BV, unsigned MAX_STR_SIZE>
const_iterator()2027 str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::const_iterator() BMNOEXCEPT
2028 : sv_(0), substr_from_(0), substr_to_(MAX_STR_SIZE),
2029 pos_(bm::id_max), pos_in_buf_(~size_type(0))
2030 {
2031 }
2032
2033 //---------------------------------------------------------------------
2034
2035 template<class CharType, class BV, unsigned MAX_STR_SIZE>
const_iterator(const str_sparse_vector<CharType,BV,MAX_STR_SIZE>::const_iterator & it)2036 str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::const_iterator(
2037 const str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator& it) BMNOEXCEPT
2038 : sv_(it.sv_),
2039 substr_from_(it.substr_from_), substr_to_(it.substr_to_),
2040 pos_(it.pos_), pos_in_buf_(~size_type(0))
2041 {
2042 }
2043
2044 //---------------------------------------------------------------------
2045
2046 template<class CharType, class BV, unsigned MAX_STR_SIZE>
const_iterator(const str_sparse_vector<CharType,BV,MAX_STR_SIZE> * sv)2047 str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::const_iterator(
2048 const str_sparse_vector<CharType, BV, MAX_STR_SIZE>* sv) BMNOEXCEPT
2049 : sv_(sv), pos_(sv->empty() ? bm::id_max : 0), pos_in_buf_(~size_type(0))
2050 {
2051 substr_from_ = 0;
2052 substr_to_ = (unsigned) sv_->effective_max_str();
2053 buf_matrix_.resize(n_rows, substr_to_+1);
2054
2055 }
2056
2057 //---------------------------------------------------------------------
2058
2059 template<class CharType, class BV, unsigned MAX_STR_SIZE>
const_iterator(const str_sparse_vector<CharType,BV,MAX_STR_SIZE> * sv,typename str_sparse_vector<CharType,BV,MAX_STR_SIZE>::size_type pos)2060 str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::const_iterator(
2061 const str_sparse_vector<CharType, BV, MAX_STR_SIZE>* sv,
2062 typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::size_type pos) BMNOEXCEPT
2063 : sv_(sv), pos_(pos >= sv->size() ? bm::id_max : pos), pos_in_buf_(~size_type(0))
2064 {
2065 substr_from_ = 0;
2066 substr_to_ = (unsigned) sv_->effective_max_str();
2067 buf_matrix_.resize(n_rows, substr_to_+1);
2068 }
2069
2070 //---------------------------------------------------------------------
2071
2072 template<class CharType, class BV, unsigned MAX_STR_SIZE>
set_substr(unsigned from,unsigned len)2073 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::set_substr(
2074 unsigned from, unsigned len) BMNOEXCEPT
2075 {
2076 BM_ASSERT(from < MAX_STR_SIZE);
2077 BM_ASSERT(len);
2078
2079 unsigned max_str = sv_->effective_max_str();
2080
2081 substr_from_ = from;
2082 substr_to_ = substr_from_ + len - 1;
2083 if (max_str < substr_to_)
2084 substr_to_ = max_str;
2085 buf_matrix_.resize(n_rows, len+1);
2086 }
2087
2088 //---------------------------------------------------------------------
2089
2090 template<class CharType, class BV, unsigned MAX_STR_SIZE>
2091 const typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::value_type*
value()2092 str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::value() const BMNOEXCEPT
2093 {
2094 BM_ASSERT(sv_);
2095 BM_ASSERT(this->valid());
2096 if (pos_in_buf_ == ~size_type(0))
2097 {
2098 if (!buf_matrix_.is_init())
2099 buf_matrix_.init();
2100 pos_in_buf_ = 0;
2101 size_type d = sv_->decode_substr(buf_matrix_, pos_, n_rows,
2102 substr_from_, substr_to_);
2103 if (!d)
2104 {
2105 pos_ = bm::id_max;
2106 return 0;
2107 }
2108 }
2109 return buf_matrix_.row(pos_in_buf_);
2110 }
2111
2112 //---------------------------------------------------------------------
2113
2114 template<class CharType, class BV, unsigned MAX_STR_SIZE>
2115 void
go_to(typename str_sparse_vector<CharType,BV,MAX_STR_SIZE>::size_type pos)2116 str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::go_to(
2117 typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::size_type pos
2118 ) BMNOEXCEPT
2119 {
2120 pos_ = (!sv_ || pos >= sv_->size()) ? bm::id_max : pos;
2121 pos_in_buf_ = ~size_type(0);
2122 }
2123
2124 //---------------------------------------------------------------------
2125
2126 template<class CharType, class BV, unsigned MAX_STR_SIZE>
2127 void
advance()2128 str_sparse_vector<CharType, BV, MAX_STR_SIZE>::const_iterator::advance() BMNOEXCEPT
2129 {
2130 if (pos_ == bm::id_max) // nothing to do, we are at the end
2131 return;
2132 ++pos_;
2133
2134 if (pos_ >= sv_->size())
2135 this->invalidate();
2136 else
2137 {
2138 if (pos_in_buf_ != ~size_type(0))
2139 {
2140 ++pos_in_buf_;
2141 if (pos_in_buf_ >= n_rows)
2142 pos_in_buf_ = ~size_type(0);
2143 }
2144 }
2145 }
2146
2147 //---------------------------------------------------------------------
2148 //
2149 //---------------------------------------------------------------------
2150
2151 template<class CharType, class BV, unsigned MAX_STR_SIZE>
back_insert_iterator()2152 str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::back_insert_iterator() BMNOEXCEPT
2153 : sv_(0), bv_null_(0), pos_in_buf_(~size_type(0)), prev_nb_(0)
2154 {}
2155
2156 //---------------------------------------------------------------------
2157
2158 template<class CharType, class BV, unsigned MAX_STR_SIZE>
back_insert_iterator(str_sparse_vector<CharType,BV,MAX_STR_SIZE> * sv)2159 str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::back_insert_iterator(
2160 str_sparse_vector<CharType, BV, MAX_STR_SIZE>* sv) BMNOEXCEPT
2161 : sv_(sv), pos_in_buf_(~size_type(0))
2162 {
2163 if (sv)
2164 {
2165 prev_nb_ = sv_->size() >> bm::set_block_shift;
2166 bv_null_ = sv_->get_null_bvect();
2167 }
2168 else
2169 {
2170 bv_null_ = 0; prev_nb_ = 0;
2171 }
2172 }
2173
2174 //---------------------------------------------------------------------
2175
2176 template<class CharType, class BV, unsigned MAX_STR_SIZE>
back_insert_iterator(const str_sparse_vector<CharType,BV,MAX_STR_SIZE>::back_insert_iterator & bi)2177 str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::back_insert_iterator(
2178 const str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator& bi) BMNOEXCEPT
2179 : sv_(bi.sv_), bv_null_(bi.bv_null_), pos_in_buf_(~size_type(0)), prev_nb_(bi.prev_nb_)
2180 {
2181 BM_ASSERT(bi.empty());
2182 }
2183
2184 //---------------------------------------------------------------------
2185
2186 template<class CharType, class BV, unsigned MAX_STR_SIZE>
~back_insert_iterator()2187 str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::~back_insert_iterator()
2188 {
2189 this->flush();
2190 }
2191
2192 //---------------------------------------------------------------------
2193
2194 template<class CharType, class BV, unsigned MAX_STR_SIZE>
2195 bool
empty()2196 str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::empty()
2197 const BMNOEXCEPT
2198 {
2199 return (pos_in_buf_ == ~size_type(0) || !sv_);
2200 }
2201
2202 //---------------------------------------------------------------------
2203
2204 template<class CharType, class BV, unsigned MAX_STR_SIZE>
flush()2205 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::flush()
2206 {
2207 if (this->empty())
2208 return;
2209
2210 size_type imp_idx = sv_->size();
2211 sv_->import_no_check(buf_matrix_, imp_idx, pos_in_buf_+1, false);
2212 pos_in_buf_ = ~size_type(0);
2213 block_idx_type nb = sv_->size() >> bm::set_block_shift;
2214 if (nb != prev_nb_)
2215 {
2216 // optimize all previous blocks in all planes
2217 sv_->optimize_block(prev_nb_);
2218 prev_nb_ = nb;
2219 }
2220 }
2221
2222 //---------------------------------------------------------------------
2223
2224 template<class CharType, class BV, unsigned MAX_STR_SIZE>
add(const typename str_sparse_vector<CharType,BV,MAX_STR_SIZE>::back_insert_iterator::value_type * v)2225 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::add(
2226 const typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::value_type* v)
2227 {
2228 if (!v)
2229 {
2230 this->add_null();
2231 return;
2232 }
2233 size_type buf_idx = this->pos_in_buf_; // offset in
2234 size_type sz = sv_->size();
2235
2236 this->add_value(v);
2237 if (bv_null_)
2238 {
2239 bv_null_->set_bit_no_check(sz + buf_idx + 1);
2240 }
2241 }
2242
2243 //---------------------------------------------------------------------
2244
2245 template<class CharType, class BV, unsigned MAX_STR_SIZE>
add_null()2246 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::add_null()
2247 {
2248 /*size_type buf_idx = */this->add_value("");
2249 }
2250
2251 //---------------------------------------------------------------------
2252
2253 template<class CharType, class BV, unsigned MAX_STR_SIZE>
add_null(typename str_sparse_vector<CharType,BV,MAX_STR_SIZE>::back_insert_iterator::size_type count)2254 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::add_null(
2255 typename str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::size_type count)
2256 {
2257 for (size_type i = 0; i < count; ++i) // TODO: optimization
2258 this->add_value("");
2259 }
2260
2261
2262 //---------------------------------------------------------------------
2263
2264 template<class CharType, class BV, unsigned MAX_STR_SIZE>
2265 void
add_value(const str_sparse_vector<CharType,BV,MAX_STR_SIZE>::back_insert_iterator::value_type * v)2266 str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::add_value(
2267 const str_sparse_vector<CharType, BV, MAX_STR_SIZE>::back_insert_iterator::value_type* v)
2268 {
2269 BM_ASSERT(sv_);
2270 BM_ASSERT(v);
2271 if (pos_in_buf_ == ~size_type(0))
2272 {
2273 if (!buf_matrix_.is_init())
2274 buf_matrix_.init();
2275 pos_in_buf_ = 0; buf_matrix_.set_zero();
2276 }
2277 else
2278 if (pos_in_buf_ >= buffer_matrix_type::n_rows-1)
2279 {
2280 this->flush();
2281 pos_in_buf_ = 0; buf_matrix_.set_zero();
2282 }
2283 else
2284 {
2285 ++pos_in_buf_;
2286 }
2287
2288 value_type* r = buf_matrix_.row(pos_in_buf_);
2289 for (unsigned i = 0; i < buffer_matrix_type::n_columns; ++i)
2290 {
2291 r[i] = v[i];
2292 if (!r[i])
2293 break;
2294 } // for i
2295 //return pos_in_buf_;
2296 }
2297
2298 //---------------------------------------------------------------------
2299
2300 template<class CharType, class BV, unsigned MAX_STR_SIZE>
sync_size()2301 void str_sparse_vector<CharType, BV, MAX_STR_SIZE>::sync_size() BMNOEXCEPT
2302 {
2303 const bvector_type* bv_null = this->get_null_bvector();
2304 if (!bv_null)
2305 return;
2306 bool found = bv_null->find_reverse(this->size_);
2307 this->size_ += found;
2308 }
2309
2310 //---------------------------------------------------------------------
2311
2312 } // namespace
2313
2314 #endif
2315