1 #ifndef BMCONST__H__INCLUDED__
2 #define BMCONST__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10     http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit:  http://bitmagic.io
19 */
20 
21 /*! \file bmconst.h
22     \brief Constants, tables and typedefs
23     @internal
24 */
25 
26 #include <cstdint>
27 
28 namespace bm
29 {
30 
31 #if defined(_WIN32) || defined (_WIN64)
32 typedef unsigned __int64 id64_t;
33 #else
34 typedef unsigned long long int id64_t;
35 #endif
36 
37 typedef unsigned int   id_t;
38 typedef unsigned int   word_t;
39 typedef unsigned short short_t;
40 
41 #ifndef BM_DEFAULT_POOL_SIZE
42 # define BM_DEFAULT_POOL_SIZE 4096
43 #endif
44 
45 #ifdef BM64ADDR
46 const unsigned long long id_max32 = 0xFFFFFFFFull;
47 const unsigned long long id_max48 = 0xFFFFFFFFFFFFull;
48 #else
49 const unsigned id_max32 = 0xFFFFFFFFu;
50 #endif
51 
52 // Data Block parameters
53 
54 const unsigned set_block_size  = 2048u;
55 const unsigned set_block_shift = 16u;
56 const unsigned set_block_mask  = 0xFFFFu;
57 const unsigned set_blkblk_mask = 0xFFFFFFu;
58 
59 // set block size in bytes
60 const unsigned set_block_alloc_size = bm::set_block_size * unsigned(sizeof(bm::word_t));
61 
62 const unsigned set_block_plane_size = bm::set_block_size / 32u;
63 const unsigned set_block_plane_cnt = (unsigned)(sizeof(bm::word_t) * 8u);
64 
65 const unsigned block_waves = 64;
66 const unsigned set_block_digest_wave_size = bm::set_block_size / bm::block_waves;
67 const unsigned set_block_digest_pos_shift = 10;
68 
69 // Word parameters
70 
71 const unsigned set_word_shift = 5u;
72 const unsigned set_word_mask  = 0x1Fu;
73 
74 
75 // GAP related parameters.
76 
77 typedef unsigned short gap_word_t;
78 
79 const unsigned gap_max_buff_len = 1280;
80 const unsigned gap_max_bits = 65536;
81 const unsigned gap_equiv_len = (unsigned)
82    ((sizeof(bm::word_t) * bm::set_block_size) / sizeof(bm::gap_word_t));
83 const unsigned gap_max_bits_cmrz = bm::gap_max_bits / 2;
84 const unsigned gap_levels = 4;
85 const unsigned gap_max_level = bm::gap_levels - 1;
86 
87 const unsigned bie_cut_off = 16384; // binary interpolative encode size cut-off
88 
89 
90 // Block Array parameters
91 
92 
93 const unsigned set_array_size32 = 256u;
94 const unsigned set_sub_array_size = set_array_size32;
95 const unsigned set_array_shift = 8u;
96 const unsigned set_array_mask  = 0xFFu;
97 
98 const unsigned set_total_blocks32 = (bm::set_array_size32 * bm::set_array_size32);
99 const unsigned set_sub_total_bits = bm::set_sub_array_size * bm::gap_max_bits;
100 
101 #ifdef BM64ADDR
102 const unsigned set_total_blocks48 = bm::id_max48 / bm::gap_max_bits;
103 const unsigned long long id_max = bm::id_max48;
104 const unsigned long long set_array_size48 = 1 + (bm::id_max48 / set_sub_total_bits);
105 const unsigned  set_top_array_size = bm::set_array_size48;
106 const id64_t set_total_blocks = id64_t(bm::set_top_array_size) * set_sub_array_size;
107 #else
108 const unsigned id_max = bm::id_max32;
109 const unsigned set_top_array_size = bm::set_array_size32;
110 const unsigned set_total_blocks = bm::set_total_blocks32;
111 #endif
112 
113 const unsigned bits_in_block = bm::set_block_size * unsigned((sizeof(bm::word_t) * 8));
114 const unsigned bits_in_array = bm::bits_in_block * bm::set_array_size32;
115 
116 
117 // Rank-Select parameters
118 const unsigned rs3_border0 = 21824; // 682 words by 32-bits
119 const unsigned rs3_border1 = (rs3_border0 * 2); // 43648
120 const unsigned rs3_half_span = rs3_border0 / 2;
121 
122 // misc parameters for sparse vec algorithms
123 const unsigned sub_block3_size = bm::gap_max_bits / 4;
124 
125 
126 #if defined(BM64OPT) || defined(BM64_SSE4)
127 typedef id64_t  wordop_t;
128 const id64_t    all_bits_mask = 0xffffffffffffffffULL;
129 const unsigned set_block_size_op  = bm::set_block_size / 2;
130 #else
131 typedef word_t wordop_t;
132 const word_t all_bits_mask = 0xffffffff;
133 const unsigned set_block_size_op  = bm::set_block_size;
134 #endif
135 
136 
137 /*!
138    @brief Block allocation strategies.
139    @ingroup bvector
140 */
141 enum strategy
142 {
143     BM_BIT = 0, //!< No GAP compression strategy. All new blocks are bit blocks.
144     BM_GAP = 1  //!< GAP compression is ON.
145 };
146 
147 
148 /**
149     Codes of set operations
150     @ingroup bvector
151 */
152 enum set_operation
153 {
154     set_AND         = 0,
155     set_OR          = 1,
156     set_SUB         = 2,
157     set_XOR         = 3,
158     set_ASSIGN      = 4,
159     set_COUNT       = 5,
160     set_COUNT_AND   = 6,
161     set_COUNT_XOR   = 7,
162     set_COUNT_OR    = 8,
163     set_COUNT_SUB_AB= 9,
164     set_COUNT_SUB_BA= 10,
165     set_COUNT_A     = 11,
166     set_COUNT_B     = 12,
167 
168     set_END
169 };
170 
171 /**
172     Bit operations
173     @ingroup bvector
174 */
175 enum operation
176 {
177     BM_AND = set_AND,
178     BM_OR  = set_OR,
179     BM_SUB = set_SUB,
180     BM_XOR = set_XOR
181 };
182 
183 
184 /*!
185    @brief Sort order declaration
186    @ingroup bvector
187 */
188 enum sort_order
189 {
190     BM_UNSORTED       = 0,  //!< input set is NOT sorted
191     BM_SORTED         = 1,  //!< input set is sorted (ascending order)
192     BM_SORTED_UNIFORM = 2,  //!< sorted and in one block (internal!)
193     BM_UNKNOWN        = 3   //!< sort order unknown
194 };
195 
196 
197 /*!
198     @brief set representation variants
199     @internal
200 */
201 enum set_representation
202 {
203     set_bitset  = 0,  //!< Simple bitset
204     set_gap     = 1,  //!< GAP-RLE compression
205     set_array1  = 2,  //!< array of set 1 values
206     set_array0  = 3   //!< array of 0 values
207 };
208 
209 /*!
210    @brief NULL-able value support
211    @ingroup bvector
212 */
213 enum null_support
214 {
215     use_null = 0, //!< support "non-assigned" or "NULL" logic
216     no_null  = 1   //!< do not support NULL values
217 };
218 
219 
220 /**
221     Internal structure. Copyright information.
222     @internal
223 */
224 template<bool T> struct _copyright
225 {
226     static const char _p[];
227     static const unsigned _v[3];
228 };
229 
230 template<bool T> const char _copyright<T>::_p[] =
231     "BitMagic C++ Library. v.7.3.3 (c) 2002-2021 Anatoliy Kuznetsov.";
232 template<bool T> const unsigned _copyright<T>::_v[3] = {7, 3, 3};
233 
234 
235 
236 /**
237     DeBruijn majic table
238     @internal
239 */
240 template<bool T> struct DeBruijn_bit_position
241 {
242     static const unsigned _multiply[32];
243 };
244 
245 template<bool T>
246 const unsigned DeBruijn_bit_position<T>::_multiply[32] = {
247   0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
248   31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
249 };
250 
251 /**
252     Structure keeps index of first right 1 bit for every byte.
253     @ingroup bitfunc
254 */
255 template<bool T> struct first_bit_table
256 {
257     static const signed char _idx[256];
258 };
259 
260 template<bool T>
261 const signed char first_bit_table<T>::_idx[256] = {
262   -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
263     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
264     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
265     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
266     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
267     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
268     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
269     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
270     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
271     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
272     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
273     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
274     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
275     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
276     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
277     7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
278 };
279 
280 //---------------------------------------------------------------------
281 
282 /** Structure to aid in counting bits
283     table contains count of bits in 0-255 diapason of numbers
284 
285    @ingroup bitfunc
286 */
287 template<bool T> struct bit_count_table
288 {
289   static const unsigned char _count[256];
290 };
291 
292 template<bool T>
293 const unsigned char bit_count_table<T>::_count[256] = {
294     0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
295     1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
296     1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
297     2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
298     1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
299     2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
300     2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
301     3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8
302 };
303 
304 //---------------------------------------------------------------------
305 
306 /** Structure for LZCNT constants (4-bit)
307     @ingroup bitfunc
308 */
309 template<bool T> struct lzcnt_table
310 {
311     static unsigned char const _lut[16];
312 };
313 
314 template<bool T>
315 const unsigned char lzcnt_table<T>::_lut[16] =
316 {
317     32U, 31U, 30U, 30U, 29U, 29U, 29U, 29U,
318     28U, 28U, 28U, 28U, 28U, 28U, 28U, 28U
319 };
320 
321 /** Structure for TZCNT constants
322     @ingroup bitfunc
323 */
324 template<bool T> struct tzcnt_table
325 {
326     static unsigned char const _lut[37];
327 };
328 
329 template<bool T>
330 const unsigned char tzcnt_table<T>::_lut[37] =
331 {
332     32, 0, 1, 26, 2, 23, 27, 0, 3, 16, 24, 30, 28, 11,
333     0, 13, 4, 7, 17, 0, 25, 22, 31, 15, 29, 10, 12, 6, 0,
334     21, 14, 9, 5, 20, 8, 19, 18
335 };
336 
337 
338 
339 /** Structure keeps all-left/right ON bits masks.
340     @ingroup bitfunc
341 */
342 template<bool T> struct block_set_table
343 {
344     static const unsigned _left[32];
345     static const unsigned _right[32];
346 };
347 
348 template<bool T>
349 const unsigned block_set_table<T>::_left[32] = {
350     0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff, 0x3ff, 0x7ff,
351     0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff, 0x1ffff, 0x3ffff, 0x7ffff,
352     0xfffff, 0x1fffff, 0x3fffff, 0x7fffff, 0xffffff, 0x1ffffff, 0x3ffffff,
353     0x7ffffff, 0xfffffff, 0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff
354 };
355 
356 template<bool T>
357 const unsigned block_set_table<T>::_right[32] = {
358     0xffffffff, 0xfffffffe, 0xfffffffc, 0xfffffff8, 0xfffffff0,
359     0xffffffe0, 0xffffffc0, 0xffffff80, 0xffffff00, 0xfffffe00,
360     0xfffffc00, 0xfffff800, 0xfffff000, 0xffffe000, 0xffffc000,
361     0xffff8000, 0xffff0000, 0xfffe0000, 0xfffc0000, 0xfff80000,
362     0xfff00000, 0xffe00000, 0xffc00000, 0xff800000, 0xff000000,
363     0xfe000000, 0xfc000000, 0xf8000000, 0xf0000000, 0xe0000000,
364     0xc0000000, 0x80000000
365 };
366 
367 //---------------------------------------------------------------------
368 
369 
370 
371 /*! @brief Default GAP lengths table.
372     @ingroup gapfunc
373 */
374 template<bool T> struct gap_len_table
375 {
376     static const gap_word_t _len[bm::gap_levels];
377 };
378 
379 template<bool T>
380 const gap_word_t gap_len_table<T>::_len[bm::gap_levels] =
381                 { 128, 256, 512, bm::gap_max_buff_len };
382 
383 
384 /*! @brief Alternative GAP lengths table.
385     Good for for memory saver mode and very sparse bitsets.
386 
387     @ingroup gapfunc
388 */
389 template<bool T> struct gap_len_table_min
390 {
391     static const gap_word_t _len[bm::gap_levels];
392 };
393 
394 template<bool T>
395 const gap_word_t gap_len_table_min<T>::_len[bm::gap_levels] =
396                                 { 32, 96, 128, 512 };
397 
398 
399 /*! @brief Non-linear size growth GAP lengths table.
400     @ingroup gapfunc
401 */
402 template<bool T> struct gap_len_table_nl
403 {
404     static const gap_word_t _len[bm::gap_levels];
405 };
406 
407 template<bool T>
408 const gap_word_t gap_len_table_nl<T>::_len[bm::gap_levels] =
409                 { 32, 128, 512, bm::gap_max_buff_len };
410 
411 /*!
412     @brief codes for supported SIMD optimizations
413 */
414 enum simd_codes
415 {
416     simd_none  = 0,   ///!< No SIMD or any other optimization
417     simd_sse2  = 1,   ///!< Intel SSE2
418     simd_sse42 = 2,   ///!< Intel SSE4.2
419     simd_avx2  = 5,   ///!< Intel AVX2
420     simd_avx512  = 6  ///!< Intel AVX512
421 };
422 
423 
424 /*!
425    \brief Byte orders recognized by the library.
426    @internal
427 */
428 enum ByteOrder
429 {
430     BigEndian    = 0,
431     LittleEndian = 1
432 };
433 
434 /**
435     Internal structure. Different global settings.
436     @internal
437 */
438 template<bool T> struct globals
439 {
440     struct bo
441     {
442         ByteOrder  _byte_order;
443 
boglobals::bo444         bo()
445         {
446             unsigned x;
447             unsigned char *s = (unsigned char *)&x;
448             s[0] = 1; s[1] = 2; s[2] = 3; s[3] = 4;
449             if(x == 0x04030201)
450             {
451                 _byte_order = LittleEndian;
452                 return;
453             }
454             if(x == 0x01020304)
455             {
456                 _byte_order = BigEndian;
457                 return;
458             }
459             _byte_order = LittleEndian;
460         }
461     };
462 
463     static bo  _bo;
byte_orderglobals464     static ByteOrder byte_order() { return _bo._byte_order; }
465 };
466 template<bool T> typename globals<T>::bo globals<T>::_bo;
467 
468 
469 } // namespace
470 
471 #endif
472 
473