1 #ifndef BMSPARSEVEC_COMPR_H__INCLUDED__
2 #define BMSPARSEVEC_COMPR_H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10     http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit:  http://bitmagic.io
19 */
20 
21 /*! \file bmsparsevec_compr.h
22     \brief Compressed sparse container rsc_sparse_vector<> for integer types
23 */
24 
25 #include <memory.h>
26 
27 #ifndef BM_NO_STL
28 #include <stdexcept>
29 #endif
30 
31 #ifndef BM__H__INCLUDED__
32 // BitMagic utility headers do not include main "bm.h" declaration
33 // #include "bm.h" or "bm64.h" explicitly
34 # error missing include (bm.h or bm64.h)
35 #endif
36 
37 
38 #include "bmsparsevec.h"
39 #include "bmdef.h"
40 
41 namespace bm
42 {
43 
44 
45 /*!
46    \brief Rank-Select compressed sparse vector
47 
48    Container uses Rank-Select method of compression, where
49    all NULL columns gets dropped, effective address of columns is computed
50    using address bit-vector.
51 
52    Use for cases, where memory efficiency is preferable over
53    single element access latency.
54 
55    \ingroup sv
56 */
57 template<class Val, class SV>
58 class rsc_sparse_vector
59 {
60 public:
61     enum bit_planes
62     {
63         sv_planes = (sizeof(Val) * 8 + 1),
64         sv_value_planes = (sizeof(Val) * 8)
65     };
66 
67     typedef Val                                      value_type;
68     typedef const value_type&                        const_reference;
69     typedef typename SV::size_type                   size_type;
70     typedef SV                                       sparse_vector_type;
71     typedef typename SV::const_iterator              sparse_vector_const_iterator;
72     typedef typename SV::bvector_type                bvector_type;
73     typedef bvector_type*                            bvector_type_ptr;
74     typedef const bvector_type*                      bvector_type_const_ptr;
75     typedef typename bvector_type::allocator_type    allocator_type;
76     typedef typename bvector_type::allocation_policy allocation_policy_type;
77     typedef typename bvector_type::rs_index_type     rs_index_type;
78     typedef typename bvector_type::enumerator        bvector_enumerator_type;
79     typedef typename SV::bmatrix_type                bmatrix_type;
80 
81     enum vector_capacity
82     {
83         max_vector_size = 1
84     };
85 
86     struct is_remap_support { enum trait { value = false }; };
87     struct is_rsc_support { enum trait { value = true }; };
88 
89 public:
90     /*! Statistical information about  memory allocation details. */
91     struct statistics : public bv_statistics
92     {};
93 
94 public:
95     /**
96          Reference class to access elements via common [] operator
97     */
98     class reference
99     {
100     public:
reference(rsc_sparse_vector<Val,SV> & csv,size_type idx)101         reference(rsc_sparse_vector<Val, SV>& csv, size_type idx) BMNOEXCEPT
102         : csv_(csv), idx_(idx)
103         {}
value_type()104         operator value_type() const BMNOEXCEPT { return csv_.get(idx_); }
105         bool operator==(const reference& ref) const BMNOEXCEPT
106                                 { return bool(*this) == bool(ref); }
is_null()107         bool is_null() const BMNOEXCEPT { return csv_.is_null(idx_); }
108     private:
109         rsc_sparse_vector<Val, SV>& csv_;
110         size_type                   idx_;
111     };
112 
113     /**
114         Const iterator to traverse the rsc sparse vector.
115 
116         Implementation uses buffer for decoding so, competing changes
117         to the original vector may not match the iterator returned values.
118 
119         This iterator keeps an operational buffer, memory footprint is not
120         negligable
121 
122         @ingroup sv
123     */
124     class const_iterator
125     {
126     public:
127     friend class rsc_sparse_vector;
128 
129 #ifndef BM_NO_STL
130         typedef std::input_iterator_tag  iterator_category;
131 #endif
132         typedef rsc_sparse_vector<Val, SV>           rsc_sparse_vector_type;
133         typedef rsc_sparse_vector_type*              rsc_sparse_vector_type_ptr;
134         typedef typename rsc_sparse_vector_type::value_type    value_type;
135         typedef typename rsc_sparse_vector_type::size_type     size_type;
136         typedef typename rsc_sparse_vector_type::bvector_type  bvector_type;
137         typedef typename bvector_type::allocator_type          allocator_type;
138         typedef typename
139         bvector_type::allocator_type::allocator_pool_type allocator_pool_type;
140         typedef bm::byte_buffer<allocator_type>            buffer_type;
141 
142         typedef unsigned                    difference_type;
143         typedef unsigned*                   pointer;
144         typedef value_type&                 reference;
145 
146     public:
147         const_iterator() BMNOEXCEPT;
148         const_iterator(const rsc_sparse_vector_type* csv) BMNOEXCEPT;
149         const_iterator(const rsc_sparse_vector_type* csv, size_type pos) BMNOEXCEPT;
150         const_iterator(const const_iterator& it) BMNOEXCEPT;
151 
152         bool operator==(const const_iterator& it) const BMNOEXCEPT
153                                 { return (pos_ == it.pos_) && (csv_ == it.csv_); }
154         bool operator!=(const const_iterator& it) const BMNOEXCEPT
155                                 { return ! operator==(it); }
156         bool operator < (const const_iterator& it) const BMNOEXCEPT
157                                 { return pos_ < it.pos_; }
158         bool operator <= (const const_iterator& it) const BMNOEXCEPT
159                                 { return pos_ <= it.pos_; }
160         bool operator > (const const_iterator& it) const BMNOEXCEPT
161                                 { return pos_ > it.pos_; }
162         bool operator >= (const const_iterator& it) const BMNOEXCEPT
163                                 { return pos_ >= it.pos_; }
164 
165         /// \brief Get current position (value)
166         value_type operator*() const { return this->value(); }
167 
168 
169         /// \brief Advance to the next available value
170         const_iterator& operator++() BMNOEXCEPT { this->advance(); return *this; }
171 
172         /// \brief Advance to the next available value
173         const_iterator& operator++(int)
174             { const_iterator tmp(*this);this->advance(); return tmp; }
175 
176 
177         /// \brief Get current position (value)
178         value_type value() const;
179 
180         /// \brief Get NULL status
181         bool is_null() const BMNOEXCEPT;
182 
183         /// Returns true if iterator is at a valid position
valid()184         bool valid() const BMNOEXCEPT { return pos_ != bm::id_max; }
185 
186         /// Invalidate current iterator
invalidate()187         void invalidate() BMNOEXCEPT { pos_ = bm::id_max; }
188 
189         /// Current position (index) in the vector
pos()190         size_type pos() const BMNOEXCEPT{ return pos_; }
191 
192         /// re-position to a specified position
193         void go_to(size_type pos) BMNOEXCEPT;
194 
195         /// advance iterator forward by one
196         /// @return true if it is still valid
197         bool advance() BMNOEXCEPT;
198 
199         void skip_zero_values() BMNOEXCEPT;
200     private:
201         enum buf_size_e
202         {
203             n_buf_size = 1024 * 8
204         };
205 
206     private:
207         const rsc_sparse_vector_type*     csv_;     ///!< ptr to parent
208         size_type                         pos_;     ///!< Position
209         mutable buffer_type               vbuffer_; ///!< value buffer
210         mutable buffer_type               tbuffer_; ///!< temp buffer
211         mutable value_type*               buf_ptr_; ///!< position in the buffer
212     };
213 
214 
215 
216     /**
217         Back insert iterator implements buffered insert, faster than generic
218         access assignment.
219 
220         Limitations for buffered inserter:
221         1. Do not use more than one inserter per vector at a time
222         2. Use method flush() at the end to send the rest of accumulated buffer
223         flush is happening automatically on destruction, but if flush produces an
224         exception (for whatever reason) it will be an exception in destructor.
225         As such, explicit flush() is safer way to finilize the sparse vector load.
226 
227         @ingroup sv
228     */
229     class back_insert_iterator
230     {
231     public:
232 #ifndef BM_NO_STL
233         typedef std::output_iterator_tag  iterator_category;
234 #endif
235         typedef rsc_sparse_vector<Val, SV>       rsc_sparse_vector_type;
236         typedef rsc_sparse_vector_type*          rsc_sparse_vector_type_ptr;
237         typedef typename rsc_sparse_vector_type::value_type    value_type;
238         typedef typename rsc_sparse_vector_type::size_type     size_type;
239         typedef typename rsc_sparse_vector_type::bvector_type  bvector_type;
240 
241         typedef void difference_type;
242         typedef void pointer;
243         typedef void reference;
244 
245     public:
246         back_insert_iterator() BMNOEXCEPT;
247         back_insert_iterator(rsc_sparse_vector_type* csv) BMNOEXCEPT;
248 
249         back_insert_iterator& operator=(const back_insert_iterator& bi)
250         {
251             BM_ASSERT(bi.empty());
252             this->flush(); sv_bi_ = bi.sv_bi_;
253             return *this;
254         }
255 
256         ~back_insert_iterator();
257 
258         /** push value to the vector */
259         back_insert_iterator& operator=(value_type v)
260             { this->add(v); return *this; }
261         /** noop */
262         back_insert_iterator& operator*() { return *this; }
263         /** noop */
264         back_insert_iterator& operator++() { return *this; }
265         /** noop */
266         back_insert_iterator& operator++( int ) { return *this; }
267 
268         /** add value to the container*/
269         void add(value_type v);
270 
271         /** add NULL (no-value) to the container */
272         void add_null() BMNOEXCEPT;
273 
274         /** add a series of consequitve NULLs (no-value) to the container */
275         void add_null(size_type count) BMNOEXCEPT;
276 
277         /** flush the accumulated buffer */
278         void flush();
279     protected:
280 
281         /** add value to the buffer without changing the NULL vector
282             @param v - value to push back
283             @return index of added value in the internal buffer
284             @internal
285         */
286         ///size_type add_value(value_type v);
287 
288         typedef rsc_sparse_vector_type::sparse_vector_type sparse_vector_type;
289         typedef
290         typename sparse_vector_type::back_insert_iterator  sparse_vector_bi;
291     private:
292         rsc_sparse_vector_type* csv_;      ///!< pointer on the parent vector
293         sparse_vector_bi        sv_bi_;
294     };
295 
296 public:
297     // ------------------------------------------------------------
298     /*! @name Construction and assignment  */
299 
300     //@{
301 
302     rsc_sparse_vector(bm::null_support null_able = bm::use_null,
303                       allocation_policy_type ap = allocation_policy_type(),
304                       size_type bv_max_size = bm::id_max,
305                       const allocator_type&   alloc  = allocator_type());
306 
307     /**
308         Contructor to pre-initialize the list of assigned (not NULL) elements.
309 
310         If the list of not NULL elements is known upfront it can help to
311         pre-declare it, enable rank-select index and then use set function.
312         This scenario gives significant speed boost, comparing random assignment
313 
314         @param bv_null - not NULL vector for the container
315     */
316     rsc_sparse_vector(const bvector_type& bv_null);
317 
318     ~rsc_sparse_vector();
319 
320     /*! copy-ctor */
321     rsc_sparse_vector(const rsc_sparse_vector<Val, SV>& csv);
322 
323 
324     /*! copy assignmment operator */
325     rsc_sparse_vector<Val,SV>& operator=(const rsc_sparse_vector<Val, SV>& csv)
326     {
327         if (this != &csv)
328         {
329             sv_ = csv.sv_;
330             size_ = csv.size_; max_id_ = csv.max_id_;
331             in_sync_ = csv.in_sync_;
332             if (in_sync_)
333             {
334                 bv_blocks_ptr_->copy_from(*(csv.bv_blocks_ptr_));
335             }
336         }
337         return *this;
338     }
339 
340 #ifndef BM_NO_CXX11
341     /*! move-ctor */
342     rsc_sparse_vector(rsc_sparse_vector<Val,SV>&& csv) BMNOEXCEPT;
343 
344     /*! move assignmment operator */
345     rsc_sparse_vector<Val,SV>& operator=(rsc_sparse_vector<Val,SV>&& csv) BMNOEXCEPT
346     {
347         if (this != &csv)
348         {
349             clear_all(true);
350             sv_.swap(csv.sv_);
351             size_ = csv.size_; max_id_ = csv.max_id_; in_sync_ = csv.in_sync_;
352             if (in_sync_)
353             {
354                 bv_blocks_ptr_->copy_from(*(csv.bv_blocks_ptr_));
355             }
356         }
357         return *this;
358     }
359 #endif
360 
361     //@}
362     // ------------------------------------------------------------
363     /*! @name Size, etc                                          */
364     ///@{
365 
366     /*! \brief return size of the vector
367         \return size of sparse vector
368     */
369     size_type size() const BMNOEXCEPT;
370 
371     /*! \brief return true if vector is empty
372         \return true if empty
373     */
empty()374     bool empty() const BMNOEXCEPT { return sv_.empty(); }
375 
376     /**
377         \brief recalculate size to exclude tail NULL elements
378         After this call size() will return the true size of the vector
379      */
380     void sync_size() BMNOEXCEPT;
381 
382     /*
383        \brief Returns count of not NULL elements (population)
384               in the given range [left..right]
385        Uses rank-select index to accelerate the search (after sync())
386 
387        \param left   - index of first bit start counting from
388        \param right  - index of last bit
389 
390        @sa sync
391     */
392     size_type
393     count_range_notnull(size_type left, size_type right) const BMNOEXCEPT;
394 
395     ///@}
396 
397     // ------------------------------------------------------------
398     /*! @name Element access */
399     //@{
400 
401     /*!
402         \brief get specified element without bounds checking
403         \param idx - element index
404         \return value of the element
405     */
406     value_type operator[](size_type idx) const { return this->get(idx); }
407 
408     /*!
409         \brief access specified element with bounds checking
410         \param idx - element index
411         \return value of the element
412     */
413     value_type at(size_type idx) const;
414 
415     /*!
416         \brief get specified element without bounds checking
417         \param idx - element index
418         \return value of the element
419     */
420     value_type get(size_type idx) const BMNOEXCEPT;
421 
422     /*!
423         \brief set specified element with bounds checking and automatic resize
424 
425         Method cannot insert elements, so every new idx has to be greater or equal
426         than what it used before. Elements must be loaded in a sorted order.
427 
428         \param idx - element index
429         \param v   - element value
430     */
431     void push_back(size_type idx, value_type v);
432 
433     /*!
434         \brief set specified element with bounds checking and automatic resize
435         \param idx - element index
436         \param v   - element value
437     */
438     void set(size_type idx, value_type v);
439 
440 
441     /*!
442         \brief increment specified element by one
443         \param idx - element index
444     */
445     void inc(size_type idx);
446 
447     /*!
448         \brief increment specified element by one
449         \param idx - element index
450         \param v - increment value
451     */
452     void inc(size_type idx, value_type v);
453 
454     /*!
455         \brief increment specified element by one, element MUST be NOT NULL
456         Faster than just inc() if element is NULL - behavior is undefined
457         \param idx - element index
458         \param v - increment value
459         @sa inc
460     */
461     void inc_not_null(size_type idx, value_type v);
462 
463     /*!
464         \brief set specified element to NULL
465         RSC vector actually erases element when it is set to NULL (expensive).
466         \param idx - element index
467     */
468     void set_null(size_type idx);
469 
470 
471     /** \brief test if specified element is NULL
472         \param idx - element index
473         \return true if it is NULL false if it was assigned or container
474         is not configured to support assignment flags
475     */
476     bool is_null(size_type idx) const BMNOEXCEPT;
477 
478     /**
479         \brief Get bit-vector of assigned values (or NULL)
480     */
481     const bvector_type* get_null_bvector() const BMNOEXCEPT;
482 
483     /**
484         \brief find position of compressed element by its rank
485         \param rank - rank  (virtual index in sparse vector)
486         \param idx  - index (true position)
487     */
488     bool find_rank(size_type rank, size_type& idx) const BMNOEXCEPT;
489 
490     //@}
491 
492     // ------------------------------------------------------------
493     /*! @name Export content to C-stype array                    */
494     ///@{
495 
496     /**
497         \brief C-style decode
498         \param arr - decode target array (must be properly sized)
499         \param idx_from - start address to decode
500         \param size - number of elements to decode
501         \param zero_mem - flag if array needs to beset to zeros first
502 
503         @return actual decoded size
504         @sa decode_buf
505      */
506     size_type decode(value_type* arr,
507                      size_type   idx_from,
508                      size_type   size,
509                      bool        zero_mem = true) const;
510 
511 
512     /**
513         \brief C-style decode (variant with external memory)
514          Analog of decode, but requires two arrays.
515          Faster than decode in many cases.
516 
517         \param arr - decode target array (must be properly sized)
518         \param arr_buf_tmp - decode temp bufer (must be same size of arr)
519         \param idx_from - start address to decode
520         \param size - number of elements to decode
521         \param zero_mem - flag if array needs to beset to zeros first
522 
523         @return actual decoded size
524         @sa decode
525      */
526     size_type decode_buf(value_type* arr,
527                          value_type* arr_buf_tmp,
528                          size_type   idx_from,
529                          size_type   size,
530                          bool        zero_mem = true) const BMNOEXCEPT;
531 
532     ///@}
533 
534 
535     // ------------------------------------------------------------
536     /*! @name Various traits                                     */
537     ///@{
538     /**
539         \brief if container supports NULL(unassigned) values (true)
540     */
is_nullable()541     bool is_nullable() const { return true; }
542 
543     /** \brief trait if sparse vector is "compressed" (true)
544     */
545     static
is_compressed()546     bool is_compressed() { return true; }
547 
548     ///@}
549 
550 
551     // ------------------------------------------------------------
552     /*! @name Comparison */
553     ///@{
554 
555     /*!
556         \brief check if another vector has the same content
557         \return true, if it is the same
558     */
559     bool equal(const rsc_sparse_vector<Val, SV>& csv) const BMNOEXCEPT;
560     //@}
561 
562 
563     // ------------------------------------------------------------
564     /*! @name Load-Export compressed vector with data */
565     ///@{
566     /*!
567         \brief Load compressed vector from a sparse vector (with NULLs)
568         \param sv_src - source sparse vector
569     */
570     void load_from(const sparse_vector_type& sv_src);
571 
572     /*!
573         \brief Exort compressed vector to a sparse vector (with NULLs)
574         \param sv - target sparse vector
575     */
576     void load_to(sparse_vector_type& sv) const;
577 
578     ///@}
579 
580     // ------------------------------------------------------------
581     /*! @name Iterator access */
582     ///@{
583 
584     /** Provide const iterator access to container content  */
begin()585     const_iterator begin() const
586     {
587         if (!in_sync_)
588             throw_no_rsc_index(); // call sync() to build RSC fast access index
589         return const_iterator(this);
590     }
591 
592     /** Provide const iterator access to the end    */
end()593     const_iterator end() const BMNOEXCEPT
594         { return const_iterator(this, bm::id_max); }
595 
596     /** Get const_itertor re-positioned to specific element
597     @param idx - position in the sparse vector
598     */
get_const_iterator(size_type idx)599     const_iterator get_const_iterator(size_type idx) const BMNOEXCEPT
600         { return const_iterator(this, idx); }
601 
get_back_inserter()602     back_insert_iterator get_back_inserter() { return back_insert_iterator(this); }
603     ///@}
604 
605     // ------------------------------------------------------------
606     /*! @name Memory optimization                                */
607     ///@{
608 
609     /*!
610         \brief run memory optimization for all vector planes
611         \param temp_block - pre-allocated memory block to avoid unnecessary re-allocs
612         \param opt_mode - requested compression depth
613         \param stat - memory allocation statistics after optimization
614     */
615     void optimize(
616         bm::word_t* temp_block = 0,
617         typename bvector_type::optmode opt_mode = bvector_type::opt_compress,
618         statistics* stat = 0);
619 
620     /*! \brief resize to zero, free memory
621         @param free_mem - free bit vector planes if true
622     */
623     void clear_all(bool free_mem) BMNOEXCEPT;
624 
625     /*! \brief resize to zero, free memory
626         @param free_mem - free bit vector planes if true
627     */
clear()628     void clear() BMNOEXCEPT { clear_all(true); }
629 
630     /*!
631         @brief Calculates memory statistics.
632 
633         Function fills statistics structure containing information about how
634         this vector uses memory and estimation of max. amount of memory
635         bvector needs to serialize itself.
636 
637         @param st - pointer on statistics structure to be filled in.
638 
639         @sa statistics
640     */
641     void calc_stat(
642            struct rsc_sparse_vector<Val, SV>::statistics* st) const BMNOEXCEPT;
643 
644 
645     ///@}
646 
647     // ------------------------------------------------------------
648     /*! @name Merge, split, partition data                        */
649     ///@{
650 
651     /**
652         @brief copy range of values from another sparse vector
653 
654         Copy [left..right] values from the source vector,
655         clear everything outside the range.
656 
657         \param csv   - source vector
658         \param left  - index from in losed diapason of [left..right]
659         \param right - index to in losed diapason of [left..right]
660     */
661     void copy_range(const rsc_sparse_vector<Val, SV>& csv,
662         size_type left, size_type right);
663 
664     /**
665         @brief merge two vectors (argument gets destroyed)
666         It is important that both vectors have the same NULL vectors
667         @param csv - [in,out] argumnet vector to merge
668                      (works like move so arg should not be used after the merge)
669      */
670     void merge_not_null(rsc_sparse_vector<Val, SV>& csv);
671 
672     ///@}
673 
674     // ------------------------------------------------------------
675     /*! @name Fast access structues sync                         */
676     ///@{
677     /*!
678         \brief Re-calculate rank-select index for faster access to vector
679         \param force - force recalculation even if it is already recalculated
680     */
681     void sync(bool force);
682 
683     /*!
684         \brief Re-calculate prefix sum table used for rank search (if necessary)
685     */
sync()686     void sync() { sync(false); }
687 
688     /*!
689         \brief returns true if prefix sum table is in sync with the vector
690     */
in_sync()691     bool in_sync() const BMNOEXCEPT { return in_sync_; }
692 
693     /*!
694         \brief Unsync the prefix sum table
695     */
unsync()696     void unsync() BMNOEXCEPT { in_sync_ = false; }
697     ///@}
698 
699     // ------------------------------------------------------------
700     /*! @name Access to internals                                */
701     ///@{
702 
703     /*!
704         \brief get access to bit-plane, function checks and creates a plane
705         \return bit-vector for the bit plane
706     */
get_plane(unsigned i)707     bvector_type_const_ptr get_plane(unsigned i) const BMNOEXCEPT
708         { return sv_.get_plane(i); }
709 
get_plane(unsigned i)710     bvector_type_ptr get_plane(unsigned i) BMNOEXCEPT
711         { return sv_.get_plane(i); }
712 
713     /*!
714         Number of effective bit-planes in the value type
715     */
effective_planes()716     unsigned effective_planes() const BMNOEXCEPT
717         { return sv_.effective_planes(); }
718 
719     /*!
720         \brief get total number of bit-planes in the vector
721     */
planes()722     static unsigned planes() BMNOEXCEPT
723         { return sparse_vector_type::planes(); }
724 
725     /** Number of stored bit-planes (value planes + extra */
stored_planes()726     static unsigned stored_planes()
727         { return sparse_vector_type::stored_planes(); }
728 
729     /*!
730         \brief access dense vector
731     */
get_sv()732     const sparse_vector_type& get_sv() const BMNOEXCEPT { return sv_; }
733 
734     /*!
735         \brief size of internal dense vector
736     */
effective_size()737     size_type effective_size() const BMNOEXCEPT { return sv_.size(); }
738 
739     /**
740         \brief Always 1 (non-matrix type)
741     */
effective_vector_max()742     size_type effective_vector_max() const BMNOEXCEPT { return 1; }
743 
744     /*!
745         get read-only access to inetrnal bit-matrix
746     */
get_bmatrix()747     const bmatrix_type& get_bmatrix() const BMNOEXCEPT
748         { return sv_.get_bmatrix(); }
749 
750     /*! Get Rank-Select index pointer
751         @return NULL if sync() was not called to construct the index
752         @sa sync()
753     */
get_RS()754     const rs_index_type* get_RS() const BMNOEXCEPT
755         { return in_sync_ ? bv_blocks_ptr_ : 0; }
756 
757     ///@}
758 
759 protected:
760     enum octet_planes
761     {
762         sv_octet_planes = sizeof(value_type)
763     };
764 
765     /*!
766         \brief Resolve logical address to access via rank compressed address
767 
768         \param idx    - input id to resolve
769         \param idx_to - output id
770 
771         \return true if id is known and resolved successfully
772     */
773     bool resolve(size_type idx, size_type* idx_to) const BMNOEXCEPT;
774 
775     bool resolve_range(size_type from, size_type to,
776                        size_type* idx_from, size_type* idx_to) const BMNOEXCEPT;
777 
resize_internal(size_type sz)778     void resize_internal(size_type sz)
779     {
780         sv_.resize_internal(sz);
781     }
size_internal()782     size_type size_internal() const BMNOEXCEPT { return sv_.size(); }
783 
is_remap()784     bool is_remap() const BMNOEXCEPT { return false; }
remap_size()785     size_t remap_size() const BMNOEXCEPT { return 0; }
get_remap_buffer()786     const unsigned char* get_remap_buffer() const BMNOEXCEPT { return 0; }
init_remap_buffer()787     unsigned char* init_remap_buffer() BMNOEXCEPT { return 0; }
set_remap()788     void set_remap() BMNOEXCEPT { }
789 
790     /// unused remap matrix type for compatibility with the sparse serializer
791     typedef
792     bm::heap_matrix<unsigned char, 1, 1,
793                     typename bvector_type::allocator_type> remap_matrix_type;
794 
get_remap_matrix()795     const remap_matrix_type* get_remap_matrix() const { return 0; }
get_remap_matrix()796     remap_matrix_type* get_remap_matrix() { return 0; }
797 
798     void push_back_no_check(size_type idx, value_type v);
799 
800 
801 private:
802 
803     /// Allocate memory for RS index
804     void construct_rs_index();
805     /// Free rs-index
806     void free_rs_index();
807 
808     /**
809         \brief throw error that RSC index not constructed (call sync())
810         \internal
811         @sa sync
812     */
813     static
814     void throw_no_rsc_index();
815 
816 protected:
817     template<class SVect> friend class sparse_vector_scanner;
818     template<class SVect> friend class sparse_vector_serializer;
819     template<class SVect> friend class sparse_vector_deserializer;
820 
821 
822 private:
823     sparse_vector_type            sv_;       ///< transpose-sparse vector for "dense" packing
824     size_type                     size_;     ///< vector size (logical)
825     size_type                     max_id_;   ///< control variable for sorted load
826     bool                          in_sync_;  ///< flag if prefix sum is in-sync with vector
827     rs_index_type*                bv_blocks_ptr_ = 0; ///< prefix sum for rank translation
828 };
829 
830 //---------------------------------------------------------------------
831 //
832 //---------------------------------------------------------------------
833 
834 template<class Val, class SV>
rsc_sparse_vector(bm::null_support null_able,allocation_policy_type ap,size_type bv_max_size,const allocator_type & alloc)835 rsc_sparse_vector<Val, SV>::rsc_sparse_vector(bm::null_support null_able,
836                                               allocation_policy_type ap,
837                                               size_type bv_max_size,
838                                               const allocator_type&   alloc)
839 : sv_(null_able, ap, bv_max_size, alloc), in_sync_(false)
840 {
841     BM_ASSERT(null_able == bm::use_null);
842     BM_ASSERT(int(sv_value_planes) == int(SV::sv_value_planes));
843     size_ = max_id_ = 0;
844     construct_rs_index();
845 }
846 
847 //---------------------------------------------------------------------
848 
849 template<class Val, class SV>
rsc_sparse_vector(const bvector_type & bv_null)850 rsc_sparse_vector<Val, SV>::rsc_sparse_vector(const bvector_type& bv_null)
851 : sv_(bm::use_null), in_sync_(false)
852 {
853     construct_rs_index();
854     bvector_type* bv = sv_.get_null_bvect();
855     BM_ASSERT(bv);
856     *bv = bv_null;
857 
858     bool found = bv->find_reverse(max_id_);
859     if (found)
860     {
861         size_ = max_id_ + 1;
862         size_type sz = bv->count();
863         sv_.resize(sz);
864     }
865     else
866     {
867         BM_ASSERT(!bv->any());
868         size_ = max_id_ = 0;
869     }
870 }
871 
872 //---------------------------------------------------------------------
873 
874 template<class Val, class SV>
~rsc_sparse_vector()875 rsc_sparse_vector<Val, SV>::~rsc_sparse_vector()
876 {
877     free_rs_index();
878 }
879 
880 //---------------------------------------------------------------------
881 
882 template<class Val, class SV>
rsc_sparse_vector(const rsc_sparse_vector<Val,SV> & csv)883 rsc_sparse_vector<Val, SV>::rsc_sparse_vector(
884                           const rsc_sparse_vector<Val, SV>& csv)
885 : sv_(csv.sv_), size_(csv.size_), max_id_(csv.max_id_), in_sync_(csv.in_sync_)
886 {
887     BM_ASSERT(int(sv_value_planes) == int(SV::sv_value_planes));
888 
889     construct_rs_index();
890     if (in_sync_)
891         bv_blocks_ptr_->copy_from(*(csv.bv_blocks_ptr_));
892 }
893 
894 //---------------------------------------------------------------------
895 
896 template<class Val, class SV>
rsc_sparse_vector(rsc_sparse_vector<Val,SV> && csv)897 rsc_sparse_vector<Val, SV>::rsc_sparse_vector(
898                             rsc_sparse_vector<Val,SV>&& csv) BMNOEXCEPT
899 : sv_(bm::use_null),
900   size_(0),
901   max_id_(0), in_sync_(false)
902 {
903     if (this != &csv)
904     {
905         sv_.swap(csv.sv_);
906         size_ = csv.size_; max_id_ = csv.max_id_; in_sync_ = csv.in_sync_;
907         bv_blocks_ptr_ = csv.bv_blocks_ptr_; csv.bv_blocks_ptr_ = 0;
908     }
909 }
910 
911 //---------------------------------------------------------------------
912 
913 template<class Val, class SV>
914 typename rsc_sparse_vector<Val, SV>::size_type
size()915 rsc_sparse_vector<Val, SV>::size() const BMNOEXCEPT
916 {
917     return size_;
918 }
919 
920 //---------------------------------------------------------------------
921 
922 template<class Val, class SV>
push_back(size_type idx,value_type v)923 void rsc_sparse_vector<Val, SV>::push_back(size_type idx, value_type v)
924 {
925     if (sv_.empty())
926     {}
927     else
928     if (idx <= max_id_ && size_)
929     {
930         sv_.throw_range_error("compressed sparse vector push_back() range error");
931     }
932     push_back_no_check(idx, v);
933 }
934 
935 //---------------------------------------------------------------------
936 
937 template<class Val, class SV>
push_back_no_check(size_type idx,value_type v)938 void rsc_sparse_vector<Val, SV>::push_back_no_check(size_type idx, value_type v)
939 {
940     bvector_type* bv_null = sv_.get_null_bvect();
941     BM_ASSERT(bv_null);
942 
943     bv_null->set_bit_no_check(idx);
944     sv_.push_back_no_null(v);
945 
946     max_id_ = idx;
947     size_ = idx + 1;
948     in_sync_ = false;
949 }
950 
951 //---------------------------------------------------------------------
952 
953 template<class Val, class SV>
set_null(size_type idx)954 void rsc_sparse_vector<Val, SV>::set_null(size_type idx)
955 {
956     bvector_type* bv_null = sv_.get_null_bvect();
957     BM_ASSERT(bv_null);
958 
959     bool found = bv_null->test(idx);
960     if (found)
961     {
962         size_type sv_idx = bv_null->count_range(0, idx);
963         bv_null->clear_bit_no_check(idx);
964         sv_.erase(--sv_idx);
965         in_sync_ = false;
966     }
967 }
968 
969 //---------------------------------------------------------------------
970 
971 template<class Val, class SV>
inc(size_type idx)972 void rsc_sparse_vector<Val, SV>::inc(size_type idx)
973 {
974     bvector_type* bv_null = sv_.get_null_bvect();
975     BM_ASSERT(bv_null);
976 
977     size_type sv_idx;
978     bool found = bv_null->test(idx);
979 
980     sv_idx = in_sync_ ? bv_null->count_to(idx, *bv_blocks_ptr_)
981                       : bv_null->count_range(0, idx); // TODO: make test'n'count
982 
983     if (found)
984     {
985         sv_.inc_no_null(--sv_idx);
986     }
987     else
988     {
989         sv_.insert_value_no_null(sv_idx, 1);
990         bv_null->set_bit_no_check(idx);
991 
992         if (idx > max_id_)
993         {
994             max_id_ = idx;
995             size_ = max_id_ + 1;
996         }
997         in_sync_ = false;
998     }
999 }
1000 
1001 //---------------------------------------------------------------------
1002 
1003 template<class Val, class SV>
inc(size_type idx,value_type v)1004 void rsc_sparse_vector<Val, SV>::inc(size_type idx, value_type v)
1005 {
1006     bvector_type* bv_null = sv_.get_null_bvect();
1007     BM_ASSERT(bv_null);
1008 
1009     size_type sv_idx;
1010     bool found = bv_null->test(idx);
1011 
1012     sv_idx = in_sync_ ? bv_null->count_to(idx, *bv_blocks_ptr_)
1013                       : bv_null->count_range(0, idx); // TODO: make test'n'count
1014 
1015     if (found)
1016     {
1017         sv_.inc_no_null(--sv_idx, v);
1018     }
1019     else
1020     {
1021         sv_.insert_value_no_null(sv_idx, v);
1022         bv_null->set_bit_no_check(idx);
1023 
1024         if (idx > max_id_)
1025         {
1026             max_id_ = idx;
1027             size_ = max_id_ + 1;
1028         }
1029         in_sync_ = false;
1030     }
1031 }
1032 
1033 //---------------------------------------------------------------------
1034 
1035 template<class Val, class SV>
inc_not_null(size_type idx,value_type v)1036 void rsc_sparse_vector<Val, SV>::inc_not_null(size_type idx, value_type v)
1037 {
1038     bvector_type* bv_null = sv_.get_null_bvect();
1039     BM_ASSERT(bv_null->test(idx)); // idx must be NOT NULL
1040 
1041     size_type sv_idx;
1042     sv_idx = in_sync_ ? bv_null->count_to(idx, *bv_blocks_ptr_)
1043                       : bv_null->count_range(0, idx); // TODO: make test'n'count
1044     --sv_idx;
1045     if (v == 1)
1046         sv_.inc_no_null(sv_idx);
1047     else
1048         sv_.inc_no_null(sv_idx, v);
1049 }
1050 
1051 
1052 //---------------------------------------------------------------------
1053 
1054 template<class Val, class SV>
set(size_type idx,value_type v)1055 void rsc_sparse_vector<Val, SV>::set(size_type idx, value_type v)
1056 {
1057     bvector_type* bv_null = sv_.get_null_bvect();
1058     BM_ASSERT(bv_null);
1059 
1060     size_type sv_idx;
1061     bool found = bv_null->test(idx);
1062 
1063     sv_idx = in_sync_ ? bv_null->count_to(idx, *bv_blocks_ptr_)
1064                       : bv_null->count_range(0, idx); // TODO: make test'n'count
1065 
1066     if (found)
1067     {
1068         sv_.set_value_no_null(--sv_idx, v);
1069     }
1070     else
1071     {
1072         sv_.insert_value_no_null(sv_idx, v);
1073         bv_null->set_bit_no_check(idx);
1074 
1075         if (idx > max_id_)
1076         {
1077             max_id_ = idx;
1078             size_ = max_id_ + 1;
1079         }
1080         in_sync_ = false;
1081     }
1082 }
1083 
1084 //---------------------------------------------------------------------
1085 
1086 template<class Val, class SV>
equal(const rsc_sparse_vector<Val,SV> & csv)1087 bool rsc_sparse_vector<Val, SV>::equal(
1088                     const rsc_sparse_vector<Val, SV>& csv) const BMNOEXCEPT
1089 {
1090     if (this == &csv)
1091         return true;
1092     if (max_id_ != csv.max_id_ || size_ != csv.size_)
1093         return false;
1094     bool same_sv = sv_.equal(csv.sv_);
1095     return same_sv;
1096 }
1097 
1098 //---------------------------------------------------------------------
1099 
1100 template<class Val, class SV>
load_from(const sparse_vector_type & sv_src)1101 void rsc_sparse_vector<Val, SV>::load_from(
1102                                         const sparse_vector_type& sv_src)
1103 {
1104     max_id_ = size_ = 0;
1105 
1106     bvector_type* bv_null = sv_.get_null_bvect();
1107     BM_ASSERT(bv_null);
1108 
1109     const bvector_type* bv_null_src = sv_src.get_null_bvector();
1110     if (!bv_null_src) // dense vector (no NULL columns)
1111     {
1112         sv_ = sv_src;
1113         BM_ASSERT(sv_.get_null_bvect());
1114     }
1115     else
1116     {
1117         sv_.clear_all(true);
1118         *bv_null = *bv_null_src;
1119 
1120         bm::rank_compressor<bvector_type> rank_compr; // re-used for planes
1121 
1122         unsigned src_planes = sv_src.planes();
1123         for (unsigned i = 0; i < src_planes; ++i)
1124         {
1125             const bvector_type* bv_src_plane = sv_src.get_plane(i);
1126             if (bv_src_plane)
1127             {
1128                 bvector_type* bv_plane = sv_.get_plane(i);
1129                 rank_compr.compress(*bv_plane, *bv_null, *bv_src_plane);
1130             }
1131         } // for
1132         size_type count = bv_null->count(); // set correct sizes
1133         sv_.resize(count);
1134     }
1135 
1136     sync(true);
1137 }
1138 
1139 //---------------------------------------------------------------------
1140 
1141 template<class Val, class SV>
load_to(sparse_vector_type & sv)1142 void rsc_sparse_vector<Val, SV>::load_to(sparse_vector_type& sv) const
1143 {
1144     sv.clear_all(true);
1145 
1146     const bvector_type* bv_null_src = this->get_null_bvector();
1147     if (!bv_null_src)
1148     {
1149         BM_ASSERT(bv_null_src);
1150         return;
1151     }
1152 
1153     bvector_type* bv_null = sv.get_null_bvect();
1154     BM_ASSERT(bv_null);
1155     *bv_null = *bv_null_src;
1156 
1157     bm::rank_compressor<bvector_type> rank_compr; // re-used for planes
1158 
1159     unsigned src_planes = sv_.planes();
1160     for (unsigned i = 0; i < src_planes; ++i)
1161     {
1162         const bvector_type* bv_src_plane = sv_.get_plane(i);
1163         if (bv_src_plane)
1164         {
1165             bvector_type* bv_plane = sv.get_plane(i);
1166             rank_compr.decompress(*bv_plane, *bv_null, *bv_src_plane);
1167         }
1168     } // for
1169     sv.resize(this->size());
1170 }
1171 
1172 //---------------------------------------------------------------------
1173 
1174 template<class Val, class SV>
sync(bool force)1175 void rsc_sparse_vector<Val, SV>::sync(bool force)
1176 {
1177     if (in_sync_ && force == false)
1178         return;  // nothing to do
1179     const bvector_type* bv_null = sv_.get_null_bvector();
1180     BM_ASSERT(bv_null);
1181     bv_null->build_rs_index(bv_blocks_ptr_); // compute popcount prefix list
1182 
1183     if (force)
1184     {
1185         sync_size();
1186     }
1187     in_sync_ = true;
1188 }
1189 
1190 //---------------------------------------------------------------------
1191 
1192 template<class Val, class SV>
sync_size()1193 void rsc_sparse_vector<Val, SV>::sync_size() BMNOEXCEPT
1194 {
1195     const bvector_type* bv_null = sv_.get_null_bvector();
1196     BM_ASSERT(bv_null);
1197     // sync the max-id
1198     bool found = bv_null->find_reverse(max_id_);
1199     if (!found)
1200         max_id_ = size_ = 0;
1201     else
1202         size_ = max_id_ + 1;
1203     sync(false);
1204 }
1205 
1206 //---------------------------------------------------------------------
1207 
1208 template<class Val, class SV>
resolve(size_type idx,size_type * idx_to)1209 bool rsc_sparse_vector<Val, SV>::resolve(size_type idx,
1210                                          size_type* idx_to) const BMNOEXCEPT
1211 {
1212     BM_ASSERT(idx_to);
1213     const bvector_type* bv_null = sv_.get_null_bvector();
1214     if (in_sync_)
1215     {
1216         *idx_to = bv_null->count_to_test(idx, *bv_blocks_ptr_);
1217     }
1218     else  // slow access
1219     {
1220         bool found = bv_null->test(idx);
1221         *idx_to = found ? bv_null->count_range(0, idx) : 0;
1222     }
1223     return bool(*idx_to);
1224 }
1225 
1226 //---------------------------------------------------------------------
1227 
1228 template<class Val, class SV>
resolve_range(size_type from,size_type to,size_type * idx_from,size_type * idx_to)1229 bool rsc_sparse_vector<Val, SV>::resolve_range(
1230     size_type from, size_type to,
1231     size_type* idx_from, size_type* idx_to) const BMNOEXCEPT
1232 {
1233     BM_ASSERT(idx_to && idx_from);
1234     const bvector_type* bv_null = sv_.get_null_bvector();
1235     size_type copy_sz, sv_left;
1236     if (in_sync_)
1237         copy_sz = bv_null->count_range(from, to, *bv_blocks_ptr_);
1238     else  // slow access
1239         copy_sz = bv_null->count_range(from, to);
1240     if (!copy_sz)
1241         return false;
1242 
1243     if (in_sync_)
1244         sv_left = bv_null->rank_corrected(from, *bv_blocks_ptr_);
1245     else
1246     {
1247         sv_left = bv_null->count_range(0, from);
1248         bool tl = bv_null->test(from); // TODO: add count and test
1249         sv_left -= tl; // rank correction
1250     }
1251 
1252     *idx_from = sv_left; *idx_to = sv_left + copy_sz - 1;
1253     return true;
1254 }
1255 
1256 
1257 //---------------------------------------------------------------------
1258 
1259 template<class Val, class SV>
1260 typename rsc_sparse_vector<Val, SV>::value_type
at(size_type idx)1261 rsc_sparse_vector<Val, SV>::at(size_type idx) const
1262 {
1263     size_type sv_idx;
1264     if (idx >= size())
1265         sv_.throw_range_error("compressed collection access error");
1266 
1267     bool found = resolve(idx, &sv_idx);
1268     if (!found)
1269     {
1270         sv_.throw_range_error("compressed collection item not found");
1271     }
1272     return sv_.at(--sv_idx);
1273 }
1274 
1275 //---------------------------------------------------------------------
1276 
1277 template<class Val, class SV>
1278 typename rsc_sparse_vector<Val, SV>::value_type
get(size_type idx)1279 rsc_sparse_vector<Val, SV>::get(size_type idx) const BMNOEXCEPT
1280 {
1281     size_type sv_idx;
1282     bool found = resolve(idx, &sv_idx);
1283     if (!found)
1284         return value_type(0);
1285 
1286     return sv_.get(--sv_idx);
1287 }
1288 
1289 //---------------------------------------------------------------------
1290 
1291 template<class Val, class SV>
is_null(size_type idx)1292 bool rsc_sparse_vector<Val, SV>::is_null(size_type idx) const BMNOEXCEPT
1293 {
1294     const bvector_type* bv_null = sv_.get_null_bvector();
1295     BM_ASSERT(bv_null);
1296     return !(bv_null->test(idx));
1297 }
1298 
1299 //---------------------------------------------------------------------
1300 
1301 template<class Val, class SV>
optimize(bm::word_t * temp_block,typename bvector_type::optmode opt_mode,statistics * st)1302 void rsc_sparse_vector<Val, SV>::optimize(bm::word_t*  temp_block,
1303                     typename bvector_type::optmode opt_mode,
1304                     statistics* st)
1305 {
1306     sv_.optimize(temp_block, opt_mode, (typename sparse_vector_type::statistics*)st);
1307     if (st)
1308     {
1309         st->memory_used += sizeof(rs_index_type);
1310         st->memory_used += bv_blocks_ptr_->get_total() *
1311              (sizeof(typename rs_index_type::size_type)
1312              + sizeof(typename rs_index_type::sb_pair_type));
1313     }
1314 }
1315 
1316 //---------------------------------------------------------------------
1317 
1318 template<class Val, class SV>
clear_all(bool free_mem)1319 void rsc_sparse_vector<Val, SV>::clear_all(bool free_mem) BMNOEXCEPT
1320 {
1321     sv_.clear_all(free_mem);
1322     in_sync_ = false;  max_id_ = size_ = 0;
1323 }
1324 
1325 //---------------------------------------------------------------------
1326 
1327 template<class Val, class SV>
calc_stat(struct rsc_sparse_vector<Val,SV>::statistics * st)1328 void rsc_sparse_vector<Val, SV>::calc_stat(
1329             struct rsc_sparse_vector<Val, SV>::statistics* st) const BMNOEXCEPT
1330 {
1331     BM_ASSERT(st);
1332     sv_.calc_stat((typename sparse_vector_type::statistics*)st);
1333     if (st)
1334     {
1335         st->memory_used += sizeof(rs_index_type);
1336         st->memory_used += bv_blocks_ptr_->get_total() *
1337                    (sizeof(typename rs_index_type::size_type)
1338                    + sizeof(typename rs_index_type::sb_pair_type));
1339     }
1340 }
1341 
1342 //---------------------------------------------------------------------
1343 
1344 template<class Val, class SV>
1345 const typename rsc_sparse_vector<Val, SV>::bvector_type*
get_null_bvector()1346 rsc_sparse_vector<Val, SV>::get_null_bvector() const BMNOEXCEPT
1347 {
1348     return sv_.get_null_bvector();
1349 }
1350 
1351 //---------------------------------------------------------------------
1352 
1353 template<class Val, class SV>
1354 bool
find_rank(size_type rank,size_type & idx)1355 rsc_sparse_vector<Val, SV>::find_rank(size_type rank,
1356                                       size_type& idx) const BMNOEXCEPT
1357 {
1358     BM_ASSERT(rank);
1359     bool b;
1360     const bvector_type* bv_null = get_null_bvector();
1361     if (in_sync())
1362         b = bv_null->select(rank, idx, *bv_blocks_ptr_);
1363     else
1364         b = bv_null->find_rank(rank, 0, idx);
1365     return b;
1366 }
1367 
1368 //---------------------------------------------------------------------
1369 
1370 template<class Val, class SV>
throw_no_rsc_index()1371 void rsc_sparse_vector<Val, SV>::throw_no_rsc_index()
1372 {
1373 #ifndef BM_NO_STL
1374     throw std::domain_error("Rank-Select index not constructed, call sync() first");
1375 #else
1376     BM_ASSERT_THROW(false, BM_ERR_RANGE);
1377 #endif
1378 }
1379 
1380 //---------------------------------------------------------------------
1381 
1382 
1383 template<class Val, class SV>
1384 typename rsc_sparse_vector<Val, SV>::size_type
decode(value_type * arr,size_type idx_from,size_type size,bool zero_mem)1385 rsc_sparse_vector<Val, SV>::decode(value_type* arr,
1386                                    size_type   idx_from,
1387                                    size_type   size,
1388                                    bool        zero_mem) const
1389 {
1390     if (size == 0)
1391         return 0;
1392     if (!in_sync_)
1393         throw_no_rsc_index(); // call sync() to build RSC fast access index
1394 
1395     BM_ASSERT(arr);
1396     BM_ASSERT(bv_blocks_ptr_);
1397 
1398     if (idx_from >= this->size())
1399         return 0;
1400 
1401     if ((bm::id_max - size) <= idx_from)
1402         size = bm::id_max - idx_from;
1403     if ((idx_from + size) > this->size())
1404         size = this->size() - idx_from;
1405 
1406     const bvector_type* bv_null = sv_.get_null_bvector();
1407     size_type rank = bv_null->rank_corrected(idx_from, *bv_blocks_ptr_);
1408 
1409     BM_ASSERT(rank == bv_null->count_range(0, idx_from) - bv_null->test(idx_from));
1410 
1411     size_type i = 0;
1412 
1413     bvector_enumerator_type en_i = bv_null->get_enumerator(idx_from);
1414     if (!en_i.valid())
1415         return i;
1416 
1417     if (zero_mem)
1418         ::memset(arr, 0, sizeof(value_type)*size);
1419 
1420     sparse_vector_const_iterator it = sv_.get_const_iterator(rank);
1421     if (it.valid())
1422     {
1423         do
1424         {
1425             size_type en_idx = *en_i;
1426             size_type delta = en_idx - idx_from;
1427             idx_from += delta;
1428             i += delta;
1429             if (i >= size)
1430                 return size;
1431             arr[i++] = it.value();
1432             if (!en_i.advance())
1433                 break;
1434             if (!it.advance())
1435                 break;
1436             ++idx_from;
1437         } while (i < size);
1438     }
1439     return i;
1440 }
1441 
1442 
1443 template<class Val, class SV>
1444 typename rsc_sparse_vector<Val, SV>::size_type
decode_buf(value_type * arr,value_type * arr_buf_tmp,size_type idx_from,size_type size,bool zero_mem)1445 rsc_sparse_vector<Val, SV>::decode_buf(value_type*     arr,
1446                                        value_type*     arr_buf_tmp,
1447                                        size_type       idx_from,
1448                                        size_type       size,
1449                                        bool            zero_mem) const BMNOEXCEPT
1450 {
1451     if (!size || (idx_from >= this->size()))
1452         return 0;
1453 
1454     BM_ASSERT(arr && arr_buf_tmp);
1455     BM_ASSERT(arr != arr_buf_tmp);
1456     BM_ASSERT(in_sync_);  // call sync() to build RSC fast access index
1457     BM_ASSERT(bv_blocks_ptr_);
1458 
1459     if ((bm::id_max - size) <= idx_from)
1460         size = bm::id_max - idx_from;
1461     if ((idx_from + size) > this->size())
1462         size = this->size() - idx_from;
1463 
1464     if (zero_mem)
1465         ::memset(arr, 0, sizeof(value_type)*size);
1466 
1467     const bvector_type* bv_null = sv_.get_null_bvector();
1468     size_type rank = bv_null->rank_corrected(idx_from, *bv_blocks_ptr_);
1469 
1470     BM_ASSERT(rank == bv_null->count_range(0, idx_from) - bv_null->test(idx_from));
1471 
1472     bvector_enumerator_type en_i = bv_null->get_enumerator(idx_from);
1473     if (!en_i.valid())
1474         return size;
1475 
1476     size_type i = en_i.value();
1477     if (idx_from + size <= i)  // empty space (all zeros)
1478         return size;
1479 
1480     size_type extract_cnt =
1481         bv_null->count_range(idx_from, idx_from + size - 1, *bv_blocks_ptr_);
1482 
1483     BM_ASSERT(extract_cnt <= this->size());
1484     auto ex_sz = sv_.decode(arr_buf_tmp, rank, extract_cnt, true);
1485     BM_ASSERT(ex_sz == extract_cnt); (void) ex_sz;
1486 
1487     for (i = 0; i < extract_cnt; ++i)
1488     {
1489         BM_ASSERT(en_i.valid());
1490         size_type en_idx = *en_i;
1491         arr[en_idx-idx_from] = arr_buf_tmp[i];
1492         en_i.advance();
1493     } // for i
1494 
1495     return size;
1496 }
1497 
1498 
1499 //---------------------------------------------------------------------
1500 
1501 template<class Val, class SV>
construct_rs_index()1502 void rsc_sparse_vector<Val, SV>::construct_rs_index()
1503 {
1504     if (bv_blocks_ptr_)
1505         return;
1506     bv_blocks_ptr_ =
1507         (rs_index_type*) bm::aligned_new_malloc(sizeof(rs_index_type));
1508     bv_blocks_ptr_ = new(bv_blocks_ptr_) rs_index_type(); // placement new
1509 }
1510 
1511 //---------------------------------------------------------------------
1512 
1513 template<class Val, class SV>
free_rs_index()1514 void rsc_sparse_vector<Val, SV>::free_rs_index()
1515 {
1516     if (bv_blocks_ptr_)
1517     {
1518         bv_blocks_ptr_->~rs_index_type();
1519         bm::aligned_free(bv_blocks_ptr_);
1520     }
1521 }
1522 
1523 //---------------------------------------------------------------------
1524 
1525 template<class Val, class SV>
copy_range(const rsc_sparse_vector<Val,SV> & csv,size_type left,size_type right)1526 void rsc_sparse_vector<Val, SV>::copy_range(
1527                             const rsc_sparse_vector<Val, SV>& csv,
1528                             size_type left, size_type right)
1529 {
1530     if (left > right)
1531         bm::xor_swap(left, right);
1532 
1533     if (left >= csv.size())
1534         return;
1535 
1536     size_ = csv.size_; max_id_ = csv.max_id_;
1537     in_sync_ = false;
1538 
1539     const bvector_type* arg_bv_null = csv.sv_.get_null_bvector();
1540     size_type sv_left, sv_right;
1541     bool range_valid = csv.resolve_range(left, right, &sv_left, &sv_right);
1542     if (!range_valid)
1543     {
1544         sv_.clear_all(true); sv_.resize(size_);
1545         bvector_type* bv_null = sv_.get_null_bvect();
1546         bv_null->copy_range(*arg_bv_null, 0, right);
1547         return;
1548     }
1549     bvector_type* bv_null = sv_.get_null_bvect();
1550     bv_null->copy_range(*arg_bv_null, 0, right); // not NULL vector gets a full copy
1551     sv_.copy_range(csv.sv_, sv_left, sv_right, bm::no_null); // don't copy NULL
1552 }
1553 
1554 
1555 //---------------------------------------------------------------------
1556 
1557 template<class Val, class SV>
merge_not_null(rsc_sparse_vector<Val,SV> & csv)1558 void rsc_sparse_vector<Val, SV>::merge_not_null(rsc_sparse_vector<Val, SV>& csv)
1559 {
1560     // MUST have the same NULL to work
1561     BM_ASSERT(sv_.get_null_bvector()->equal(*csv.sv_.get_null_bvector()));
1562 
1563     sv_.merge(csv.sv_);
1564 }
1565 
1566 //---------------------------------------------------------------------
1567 
1568 template<class Val, class SV>
1569 typename rsc_sparse_vector<Val, SV>::size_type
count_range_notnull(size_type left,size_type right)1570 rsc_sparse_vector<Val, SV>::count_range_notnull(
1571                                         size_type left,
1572                                         size_type right) const BMNOEXCEPT
1573 {
1574     if (left > right)
1575         bm::xor_swap(left, right);
1576 
1577     const bvector_type* bv_null = sv_.get_null_bvector();
1578     size_type range = right - left;
1579     if ((range < rs3_border0) || !in_sync_)
1580     {
1581         return bv_null->count_range_no_check(left, right);
1582     }
1583     return bv_null->count_range_no_check(left, right, *bv_blocks_ptr_);
1584 }
1585 
1586 //---------------------------------------------------------------------
1587 //
1588 //---------------------------------------------------------------------
1589 
1590 
1591 template<class Val, class SV>
back_insert_iterator()1592 rsc_sparse_vector<Val, SV>::back_insert_iterator::back_insert_iterator() BMNOEXCEPT
1593 : csv_(0)
1594 {}
1595 
1596 
1597 //---------------------------------------------------------------------
1598 
1599 template<class Val, class SV>
back_insert_iterator(rsc_sparse_vector_type * csv)1600 rsc_sparse_vector<Val, SV>::back_insert_iterator::back_insert_iterator
1601                                 (rsc_sparse_vector_type* csv) BMNOEXCEPT
1602 {
1603     csv_ = csv;
1604     sv_bi_ = csv->sv_.get_back_inserter();
1605     sv_bi_.disable_set_null(); // NULL is handled outside
1606 }
1607 
1608 //---------------------------------------------------------------------
1609 
1610 template<class Val, class SV>
~back_insert_iterator()1611 rsc_sparse_vector<Val, SV>::back_insert_iterator::~back_insert_iterator()
1612 {
1613     this->flush();
1614 }
1615 
1616 //---------------------------------------------------------------------
1617 
1618 template<class Val, class SV>
add(typename rsc_sparse_vector<Val,SV>::back_insert_iterator::value_type v)1619 void rsc_sparse_vector<Val, SV>::back_insert_iterator::add(
1620     typename rsc_sparse_vector<Val, SV>::back_insert_iterator::value_type v)
1621 {
1622     BM_ASSERT(csv_);
1623     sv_bi_.add_value_no_null(v);
1624     bvector_type* bv_null = sv_bi_.get_null_bvect();
1625     BM_ASSERT(bv_null);
1626     bv_null->set_bit_no_check(csv_->size_);
1627 
1628     csv_->max_id_++;
1629     csv_->size_++;
1630 }
1631 
1632 //---------------------------------------------------------------------
1633 
1634 template<class Val, class SV>
add_null()1635 void rsc_sparse_vector<Val, SV>::back_insert_iterator::add_null() BMNOEXCEPT
1636 {
1637     BM_ASSERT(csv_);
1638     csv_->max_id_++;
1639     csv_->size_++;
1640 }
1641 
1642 //---------------------------------------------------------------------
1643 
1644 template<class Val, class SV>
add_null(rsc_sparse_vector<Val,SV>::back_insert_iterator::size_type count)1645 void rsc_sparse_vector<Val, SV>::back_insert_iterator::add_null(
1646     rsc_sparse_vector<Val, SV>::back_insert_iterator::size_type count) BMNOEXCEPT
1647 {
1648     BM_ASSERT(csv_);
1649     csv_->max_id_+=count;
1650     csv_->size_+=count;
1651 }
1652 
1653 //---------------------------------------------------------------------
1654 
1655 template<class Val, class SV>
flush()1656 void rsc_sparse_vector<Val, SV>::back_insert_iterator::flush()
1657 {
1658     sv_bi_.flush();
1659     csv_->in_sync_ = false;
1660 }
1661 
1662 //---------------------------------------------------------------------
1663 //
1664 //---------------------------------------------------------------------
1665 
1666 template<class Val, class BV>
const_iterator()1667 rsc_sparse_vector<Val, BV>::const_iterator::const_iterator() BMNOEXCEPT
1668 : csv_(0), pos_(bm::id_max), buf_ptr_(0)
1669 {}
1670 
1671 //---------------------------------------------------------------------
1672 
1673 template<class Val, class SV>
const_iterator(const typename rsc_sparse_vector<Val,SV>::const_iterator & it)1674 rsc_sparse_vector<Val, SV>::const_iterator::const_iterator(
1675     const typename rsc_sparse_vector<Val, SV>::const_iterator& it) BMNOEXCEPT
1676 : csv_(it.csv_), pos_(it.pos_), buf_ptr_(0)
1677 {}
1678 
1679 //---------------------------------------------------------------------
1680 
1681 template<class Val, class SV>
const_iterator(const typename rsc_sparse_vector<Val,SV>::const_iterator::rsc_sparse_vector_type * csv)1682 rsc_sparse_vector<Val, SV>::const_iterator::const_iterator(
1683   const typename rsc_sparse_vector<Val, SV>::const_iterator::rsc_sparse_vector_type* csv
1684   ) BMNOEXCEPT
1685 : csv_(csv), buf_ptr_(0)
1686 {
1687     BM_ASSERT(csv_);
1688     pos_ = csv_->empty() ? bm::id_max : 0u;
1689 }
1690 
1691 //---------------------------------------------------------------------
1692 
1693 template<class Val, class SV>
const_iterator(const typename rsc_sparse_vector<Val,SV>::const_iterator::rsc_sparse_vector_type * csv,typename rsc_sparse_vector<Val,SV>::size_type pos)1694 rsc_sparse_vector<Val, SV>::const_iterator::const_iterator(
1695  const typename rsc_sparse_vector<Val, SV>::const_iterator::rsc_sparse_vector_type* csv,
1696  typename rsc_sparse_vector<Val, SV>::size_type pos) BMNOEXCEPT
1697 : csv_(csv), buf_ptr_(0)
1698 {
1699     BM_ASSERT(csv_);
1700     this->go_to(pos);
1701 }
1702 
1703 //---------------------------------------------------------------------
1704 
1705 template<class Val, class SV>
go_to(size_type pos)1706 void rsc_sparse_vector<Val, SV>::const_iterator::go_to(size_type pos) BMNOEXCEPT
1707 {
1708     pos_ = (!csv_ || pos >= csv_->size()) ? bm::id_max : pos;
1709     buf_ptr_ = 0;
1710 }
1711 
1712 //---------------------------------------------------------------------
1713 
1714 template<class Val, class SV>
advance()1715 bool rsc_sparse_vector<Val, SV>::const_iterator::advance() BMNOEXCEPT
1716 {
1717     if (pos_ == bm::id_max) // nothing to do, we are at the end
1718         return false;
1719     ++pos_;
1720     if (pos_ >= csv_->size())
1721     {
1722         this->invalidate();
1723         return false;
1724     }
1725     if (buf_ptr_)
1726     {
1727         ++buf_ptr_;
1728         if (buf_ptr_ - ((value_type*)vbuffer_.data()) >= n_buf_size)
1729             buf_ptr_ = 0;
1730     }
1731     return true;
1732 }
1733 
1734 //---------------------------------------------------------------------
1735 
1736 template<class Val, class SV>
1737 typename rsc_sparse_vector<Val, SV>::const_iterator::value_type
value()1738 rsc_sparse_vector<Val, SV>::const_iterator::value() const
1739 {
1740     BM_ASSERT(this->valid());
1741     value_type v;
1742 
1743     if (!buf_ptr_)
1744     {
1745         vbuffer_.reserve(n_buf_size * sizeof(value_type));
1746         tbuffer_.reserve(n_buf_size * sizeof(value_type));
1747         buf_ptr_ = (value_type*)(vbuffer_.data());
1748         value_type* tmp_buf_ptr = (value_type*) (tbuffer_.data());
1749 
1750         csv_->decode_buf(buf_ptr_, tmp_buf_ptr, pos_, n_buf_size, true);
1751     }
1752     v = *buf_ptr_;
1753     return v;
1754 }
1755 
1756 //---------------------------------------------------------------------
1757 
1758 template<class Val, class SV>
skip_zero_values()1759 void rsc_sparse_vector<Val, SV>::const_iterator::skip_zero_values() BMNOEXCEPT
1760 {
1761     value_type v = value();
1762     if (buf_ptr_)
1763     {
1764         v = *buf_ptr_;
1765         value_type* buf_end = ((value_type*)vbuffer_.data()) + n_buf_size;
1766         while(!v)
1767         {
1768             ++pos_;
1769             if (++buf_ptr_ < buf_end)
1770                 v = *buf_ptr_;
1771             else
1772                 break;
1773         }
1774         if (pos_ >= csv_->size())
1775         {
1776             pos_ = bm::id_max;
1777             return;
1778         }
1779         if (buf_ptr_ >= buf_end)
1780             buf_ptr_ = 0;
1781     }
1782 }
1783 
1784 //---------------------------------------------------------------------
1785 
1786 template<class Val, class SV>
is_null()1787 bool rsc_sparse_vector<Val, SV>::const_iterator::is_null() const BMNOEXCEPT
1788 {
1789     return csv_->is_null(pos_);
1790 }
1791 
1792 
1793 //---------------------------------------------------------------------
1794 
1795 
1796 
1797 } // namespace bm
1798 
1799 
1800 
1801 #endif
1802