1 /*
2  * Amalgamated copy of CRoaring 0.2.66, modified for GTK to reduce compiler
3  * warnings.
4  *
5  * Copyright 2016-2020 The CRoaring authors
6  * Copyright 2020 Benjamin Otte
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *    http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  * SPDX-License-Identifier: Apache-2.0
21  */
22 
23 /* begin file include/roaring/roaring_version.h */
24 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand
25 #ifndef ROARING_INCLUDE_ROARING_VERSION
26 #define ROARING_INCLUDE_ROARING_VERSION
27 #define ROARING_VERSION = 0.2.66,
28 enum {
29     ROARING_VERSION_MAJOR = 0,
30     ROARING_VERSION_MINOR = 2,
31     ROARING_VERSION_REVISION = 66
32 };
33 #endif // ROARING_INCLUDE_ROARING_VERSION
34 /* end file include/roaring/roaring_version.h */
35 /* begin file include/roaring/portability.h */
36 /*
37  * portability.h
38  *
39  */
40 
41 #ifndef INCLUDE_PORTABILITY_H_
42 #define INCLUDE_PORTABILITY_H_
43 
44 #ifndef _GNU_SOURCE
45 #define _GNU_SOURCE
46 #endif
47 #ifndef __STDC_FORMAT_MACROS
48 #define __STDC_FORMAT_MACROS 1
49 #endif
50 
51 #if !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L)
52 #define _POSIX_C_SOURCE 200809L
53 #endif
54 #if !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700)
55 #define _XOPEN_SOURCE 700
56 #endif
57 
58 #include <stdbool.h>
59 #include <stdint.h>
60 #include <stdlib.h>  // will provide posix_memalign with _POSIX_C_SOURCE as defined above
61 #if !(defined(__APPLE__)) && !(defined(__FreeBSD__)) && !(defined(__OpenBSD__)) && \
62 	!(defined(__DragonFly__))
63 #include <malloc.h>  // this should never be needed but there are some reports that it is needed.
64 #endif
65 
66 
67 #if defined(_MSC_VER) && !defined(__clang__) && !defined(_WIN64) && !defined(ROARING_ACK_32BIT)
68 #pragma message( \
69     "You appear to be attempting a 32-bit build under Visual Studio. We recommend a 64-bit build instead.")
70 #endif
71 
72 #if defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ != 8
73 #error This code assumes  64-bit long longs (by use of the GCC intrinsics). Your system is not currently supported.
74 #endif
75 
76 #if defined(_MSC_VER)
77 #define __restrict__ __restrict
78 #endif
79 
80 #ifndef DISABLE_X64  // some users may want to compile as if they did not have
81                      // an x64 processor
82 
83 ///////////////////////
84 /// We support X64 hardware in the following manner:
85 ///
86 /// if IS_X64 is defined then we have at least SSE and SSE2
87 /// (All Intel processors sold in the recent past have at least SSE and SSE2 support,
88 /// going back to the Pentium 4.)
89 ///
90 /// if USESSE4 is defined then we assume at least SSE4.2, SSE4.1,
91 ///                   SSSE3, SSE3... + IS_X64
92 /// if USEAVX is defined, then we assume AVX2, AVX + USESSE4
93 ///
94 /// So if you have hardware that supports AVX but not AVX2, then "USEAVX"
95 /// won't be enabled.
96 /// If you have hardware that supports SSE4.1, but not SSE4.2, then USESSE4
97 /// won't be defined.
98 //////////////////////
99 
100 // unless DISABLEAVX was defined, if we have __AVX2__, we enable AVX
101 #if (!defined(USEAVX)) && (!defined(DISABLEAVX)) && (defined(__AVX2__))
102 #define USEAVX
103 #endif
104 
105 // if we have __SSE4_2__, we enable SSE4
106 #if (defined(__POPCNT__)) && (defined(__SSE4_2__))
107 #define USESSE4
108 #endif
109 
110 #if defined(USEAVX) || defined(__x86_64__) || defined(_M_X64)
111 // we have an x64 processor
112 #define IS_X64
113 // we include the intrinsic header
114 #ifndef _MSC_VER
115 /* Non-Microsoft C/C++-compatible compiler */
116 #include <x86intrin.h>  // on some recent GCC, this will declare posix_memalign
117 #endif
118 #endif
119 
120 #if !defined(USENEON) && !defined(DISABLENEON) && defined(__ARM_NEON)
121 #  define USENEON
122 #endif
123 #if defined(USENEON)
124 #  include <arm_neon.h>
125 #endif
126 
127 #ifndef _MSC_VER
128 /* Non-Microsoft C/C++-compatible compiler, assumes that it supports inline
129  * assembly */
130 #define ROARING_INLINE_ASM
131 #endif
132 
133 #ifdef USEAVX
134 #define USESSE4             // if we have AVX, then we have SSE4
135 #define USE_BMI             // we assume that AVX2 and BMI go hand and hand
136 #define USEAVX2FORDECODING  // optimization
137 // vector operations should work on not just AVX
138 #define ROARING_VECTOR_OPERATIONS_ENABLED  // vector unions (optimization)
139 #endif
140 
141 #endif  // DISABLE_X64
142 
143 #ifdef _MSC_VER
144 /* Microsoft C/C++-compatible compiler */
145 #include <intrin.h>
146 
147 #ifndef __clang__  // if one compiles with MSVC *with* clang, then these
148                    // intrinsics are defined!!!
149 // sadly there is no way to check whether we are missing these intrinsics
150 // specifically.
151 
152 /* wrappers for Visual Studio built-ins that look like gcc built-ins */
153 /* result might be undefined when input_num is zero */
__builtin_ctzll(unsigned long long input_num)154 static inline int __builtin_ctzll(unsigned long long input_num) {
155     unsigned long index;
156 #ifdef _WIN64  // highly recommended!!!
157     _BitScanForward64(&index, input_num);
158 #else  // if we must support 32-bit Windows
159     if ((uint32_t)input_num != 0) {
160         _BitScanForward(&index, (uint32_t)input_num);
161     } else {
162         _BitScanForward(&index, (uint32_t)(input_num >> 32));
163         index += 32;
164     }
165 #endif
166     return index;
167 }
168 
169 /* result might be undefined when input_num is zero */
__builtin_clzll(unsigned long long input_num)170 static inline int __builtin_clzll(unsigned long long input_num) {
171     unsigned long index;
172 #ifdef _WIN64  // highly recommended!!!
173     _BitScanReverse64(&index, input_num);
174 #else  // if we must support 32-bit Windows
175     if (input_num > 0xFFFFFFFF) {
176         _BitScanReverse(&index, (uint32_t)(input_num >> 32));
177         index += 32;
178     } else {
179         _BitScanReverse(&index, (uint32_t)(input_num));
180     }
181 #endif
182     return 63 - index;
183 }
184 
185 /* result might be undefined when input_num is zero */
186 #ifdef USESSE4
187 /* POPCNT support was added to processors around the release of SSE4.2 */
188 /* USESSE4 flag guarantees POPCNT support */
__builtin_popcountll(unsigned long long input_num)189 static inline int __builtin_popcountll(unsigned long long input_num) {
190 #ifdef _WIN64  // highly recommended!!!
191 	return (int)__popcnt64(input_num);
192 #else  // if we must support 32-bit Windows
193 	return (int)(__popcnt((uint32_t)input_num) +
194 		__popcnt((uint32_t)(input_num >> 32)));
195 #endif
196 }
197 #else
198 /* software implementation avoids POPCNT */
__builtin_popcountll(unsigned long long input_num)199 static inline int __builtin_popcountll(unsigned long long input_num) {
200 	const uint64_t m1 = 0x5555555555555555; //binary: 0101...
201 	const uint64_t m2 = 0x3333333333333333; //binary: 00110011..
202 	const uint64_t m4 = 0x0f0f0f0f0f0f0f0f; //binary:  4 zeros,  4 ones ...
203 	const uint64_t h01 = 0x0101010101010101; //the sum of 256 to the power of 0,1,2,3...
204 
205 	input_num -= (input_num >> 1) & m1;
206 	input_num = (input_num & m2) + ((input_num >> 2) & m2);
207 	input_num = (input_num + (input_num >> 4)) & m4;
208 	return (input_num * h01) >> 56;
209 }
210 #endif
211 
212 /* Use #define so this is effective even under /Ob0 (no inline) */
213 #define __builtin_unreachable() __assume(0)
214 #endif
215 
216 #endif
217 
218 // portable version of  posix_memalign
roaring_bitmap_aligned_malloc(size_t alignment,size_t size)219 static inline void *roaring_bitmap_aligned_malloc(size_t alignment, size_t size) {
220     void *p;
221 #ifdef _MSC_VER
222     p = _aligned_malloc(size, alignment);
223 #elif defined(__MINGW32__) || defined(__MINGW64__)
224     p = __mingw_aligned_malloc(size, alignment);
225 #else
226     // somehow, if this is used before including "x86intrin.h", it creates an
227     // implicit defined warning.
228     if (posix_memalign(&p, alignment, size) != 0) return NULL;
229 #endif
230     return p;
231 }
232 
roaring_bitmap_aligned_free(void * memblock)233 static inline void roaring_bitmap_aligned_free(void *memblock) {
234 #ifdef _MSC_VER
235     _aligned_free(memblock);
236 #elif defined(__MINGW32__) || defined(__MINGW64__)
237     __mingw_aligned_free(memblock);
238 #else
239     free(memblock);
240 #endif
241 }
242 
243 #if defined(_MSC_VER)
244 #define ALIGNED(x) __declspec(align(x))
245 #else
246 #if defined(__GNUC__)
247 #define ALIGNED(x) __attribute__((aligned(x)))
248 #endif
249 #endif
250 
251 #ifdef __GNUC__
252 #define WARN_UNUSED __attribute__((warn_unused_result))
253 #else
254 #define WARN_UNUSED
255 #endif
256 
257 #define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100)
258 
hamming(uint64_t x)259 static inline int hamming(uint64_t x) {
260 #ifdef USESSE4
261     return (int) _mm_popcnt_u64(x);
262 #else
263     // won't work under visual studio, but hopeful we have _mm_popcnt_u64 in
264     // many cases
265     return __builtin_popcountll(x);
266 #endif
267 }
268 
269 #ifndef UINT64_C
270 #define UINT64_C(c) (c##ULL)
271 #endif
272 
273 #ifndef UINT32_C
274 #define UINT32_C(c) (c##UL)
275 #endif
276 
277 #endif /* INCLUDE_PORTABILITY_H_ */
278 /* end file include/roaring/portability.h */
279 /* begin file include/roaring/containers/perfparameters.h */
280 #ifndef PERFPARAMETERS_H_
281 #define PERFPARAMETERS_H_
282 
283 #include <stdbool.h>
284 
285 /**
286 During lazy computations, we can transform array containers into bitset
287 containers as
288 long as we can expect them to have  ARRAY_LAZY_LOWERBOUND values.
289 */
290 enum { ARRAY_LAZY_LOWERBOUND = 1024 };
291 
292 /* default initial size of a run container
293    setting it to zero delays the malloc.*/
294 enum { RUN_DEFAULT_INIT_SIZE = 0 };
295 
296 /* default initial size of an array container
297    setting it to zero delays the malloc */
298 enum { ARRAY_DEFAULT_INIT_SIZE = 0 };
299 
300 /* automatic bitset conversion during lazy or */
301 #ifndef LAZY_OR_BITSET_CONVERSION
302 #define LAZY_OR_BITSET_CONVERSION true
303 #endif
304 
305 /* automatically attempt to convert a bitset to a full run during lazy
306  * evaluation */
307 #ifndef LAZY_OR_BITSET_CONVERSION_TO_FULL
308 #define LAZY_OR_BITSET_CONVERSION_TO_FULL true
309 #endif
310 
311 /* automatically attempt to convert a bitset to a full run */
312 #ifndef OR_BITSET_CONVERSION_TO_FULL
313 #define OR_BITSET_CONVERSION_TO_FULL true
314 #endif
315 
316 #endif
317 /* end file include/roaring/containers/perfparameters.h */
318 /* begin file include/roaring/array_util.h */
319 #ifndef ARRAY_UTIL_H
320 #define ARRAY_UTIL_H
321 
322 #include <stddef.h>  // for size_t
323 #include <stdint.h>
324 
325 
326 /*
327  *  Good old binary search.
328  *  Assumes that array is sorted, has logarithmic complexity.
329  *  if the result is x, then:
330  *     if ( x>0 )  you have array[x] = ikey
331  *     if ( x<0 ) then inserting ikey at position -x-1 in array (insuring that array[-x-1]=ikey)
332  *                   keys the array sorted.
333  */
binarySearch(const uint16_t * array,int32_t lenarray,uint16_t ikey)334 static inline int32_t binarySearch(const uint16_t *array, int32_t lenarray,
335                             uint16_t ikey) {
336     int32_t low = 0;
337     int32_t high = lenarray - 1;
338     while (low <= high) {
339         int32_t middleIndex = (low + high) >> 1;
340         uint16_t middleValue = array[middleIndex];
341         if (middleValue < ikey) {
342             low = middleIndex + 1;
343         } else if (middleValue > ikey) {
344             high = middleIndex - 1;
345         } else {
346             return middleIndex;
347         }
348     }
349     return -(low + 1);
350 }
351 
352 /**
353  * Galloping search
354  * Assumes that array is sorted, has logarithmic complexity.
355  * if the result is x, then if x = length, you have that all values in array between pos and length
356  *    are smaller than min.
357  * otherwise returns the first index x such that array[x] >= min.
358  */
advanceUntil(const uint16_t * array,int32_t pos,int32_t length,uint16_t min)359 static inline int32_t advanceUntil(const uint16_t *array, int32_t pos,
360                                    int32_t length, uint16_t min) {
361     int32_t lower = pos + 1;
362 
363     if ((lower >= length) || (array[lower] >= min)) {
364         return lower;
365     }
366 
367     int32_t spansize = 1;
368 
369     while ((lower + spansize < length) && (array[lower + spansize] < min)) {
370         spansize <<= 1;
371     }
372     int32_t upper = (lower + spansize < length) ? lower + spansize : length - 1;
373 
374     if (array[upper] == min) {
375         return upper;
376     }
377     if (array[upper] < min) {
378         // means
379         // array
380         // has no
381         // item
382         // >= min
383         // pos = array.length;
384         return length;
385     }
386 
387     // we know that the next-smallest span was too small
388     lower += (spansize >> 1);
389 
390     int32_t mid = 0;
391     while (lower + 1 != upper) {
392         mid = (lower + upper) >> 1;
393         if (array[mid] == min) {
394             return mid;
395         } else if (array[mid] < min) {
396             lower = mid;
397         } else {
398             upper = mid;
399         }
400     }
401     return upper;
402 }
403 
404 /**
405  * Returns number of elements which are less then $ikey.
406  * Array elements must be unique and sorted.
407  */
count_less(const uint16_t * array,int32_t lenarray,uint16_t ikey)408 static inline int32_t count_less(const uint16_t *array, int32_t lenarray,
409                                  uint16_t ikey) {
410     if (lenarray == 0) return 0;
411     int32_t pos = binarySearch(array, lenarray, ikey);
412     return pos >= 0 ? pos : -(pos+1);
413 }
414 
415 /**
416  * Returns number of elements which are greater then $ikey.
417  * Array elements must be unique and sorted.
418  */
count_greater(const uint16_t * array,int32_t lenarray,uint16_t ikey)419 static inline int32_t count_greater(const uint16_t *array, int32_t lenarray,
420                                     uint16_t ikey) {
421     if (lenarray == 0) return 0;
422     int32_t pos = binarySearch(array, lenarray, ikey);
423     if (pos >= 0) {
424         return lenarray - (pos+1);
425     } else {
426         return lenarray - (-pos-1);
427     }
428 }
429 
430 /**
431  * From Schlegel et al., Fast Sorted-Set Intersection using SIMD Instructions
432  * Optimized by D. Lemire on May 3rd 2013
433  *
434  * C should have capacity greater than the minimum of s_1 and s_b + 8
435  * where 8 is sizeof(__m128i)/sizeof(uint16_t).
436  */
437 int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a,
438                            const uint16_t *__restrict__ B, size_t s_b,
439                            uint16_t *C);
440 
441 /**
442  * Compute the cardinality of the intersection using SSE4 instructions
443  */
444 int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A,
445                                        size_t s_a,
446                                        const uint16_t *__restrict__ B,
447                                        size_t s_b);
448 
449 /* Computes the intersection between one small and one large set of uint16_t.
450  * Stores the result into buffer and return the number of elements. */
451 int32_t intersect_skewed_uint16(const uint16_t *smallarray, size_t size_s,
452                                 const uint16_t *largearray, size_t size_l,
453                                 uint16_t *buffer);
454 
455 /* Computes the size of the intersection between one small and one large set of
456  * uint16_t. */
457 int32_t intersect_skewed_uint16_cardinality(const uint16_t *smallarray,
458                                             size_t size_s,
459                                             const uint16_t *largearray,
460                                             size_t size_l);
461 
462 
463 /* Check whether the size of the intersection between one small and one large set of uint16_t is non-zero. */
464 bool intersect_skewed_uint16_nonempty(const uint16_t *smallarray, size_t size_s,
465                                 const uint16_t *largearray, size_t size_l);
466 /**
467  * Generic intersection function.
468  */
469 int32_t intersect_uint16(const uint16_t *A, const size_t lenA,
470                          const uint16_t *B, const size_t lenB, uint16_t *out);
471 /**
472  * Compute the size of the intersection (generic).
473  */
474 int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA,
475                                      const uint16_t *B, const size_t lenB);
476 
477 /**
478  * Checking whether the size of the intersection  is non-zero.
479  */
480 bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA,
481                          const uint16_t *B, const size_t lenB);
482 /**
483  * Generic union function.
484  */
485 size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
486                     size_t size_2, uint16_t *buffer);
487 
488 /**
489  * Generic XOR function.
490  */
491 int32_t xor_uint16(const uint16_t *array_1, int32_t card_1,
492                    const uint16_t *array_2, int32_t card_2, uint16_t *out);
493 
494 /**
495  * Generic difference function (ANDNOT).
496  */
497 int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2,
498                       int length2, uint16_t *a_out);
499 
500 /**
501  * Generic intersection function.
502  */
503 size_t intersection_uint32(const uint32_t *A, const size_t lenA,
504                            const uint32_t *B, const size_t lenB, uint32_t *out);
505 
506 /**
507  * Generic intersection function, returns just the cardinality.
508  */
509 size_t intersection_uint32_card(const uint32_t *A, const size_t lenA,
510                                 const uint32_t *B, const size_t lenB);
511 
512 /**
513  * Generic union function.
514  */
515 size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2,
516                     size_t size_2, uint32_t *buffer);
517 
518 /**
519  * A fast SSE-based union function.
520  */
521 uint32_t union_vector16(const uint16_t *__restrict__ set_1, uint32_t size_1,
522                         const uint16_t *__restrict__ set_2, uint32_t size_2,
523                         uint16_t *__restrict__ buffer);
524 /**
525  * A fast SSE-based XOR function.
526  */
527 uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1,
528                       const uint16_t *__restrict__ array2, uint32_t length2,
529                       uint16_t *__restrict__ output);
530 
531 /**
532  * A fast SSE-based difference function.
533  */
534 int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a,
535                             const uint16_t *__restrict__ B, size_t s_b,
536                             uint16_t *C);
537 
538 /**
539  * Generic union function, returns just the cardinality.
540  */
541 size_t union_uint32_card(const uint32_t *set_1, size_t size_1,
542                          const uint32_t *set_2, size_t size_2);
543 
544 /**
545 * combines union_uint16 and  union_vector16 optimally
546 */
547 size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
548                     size_t size_2, uint16_t *buffer);
549 
550 
551 bool memequals(const void *s1, const void *s2, size_t n);
552 
553 #endif
554 /* end file include/roaring/array_util.h */
555 /* begin file include/roaring/roaring_types.h */
556 /*
557   Typedefs used by various components
558 */
559 
560 #ifndef ROARING_TYPES_H
561 #define ROARING_TYPES_H
562 
563 typedef bool (*roaring_iterator)(uint32_t value, void *param);
564 typedef bool (*roaring_iterator64)(uint64_t value, void *param);
565 
566 /**
567 *  (For advanced users.)
568 * The roaring_statistics_t can be used to collect detailed statistics about
569 * the composition of a roaring bitmap.
570 */
571 typedef struct roaring_statistics_s {
572     uint32_t n_containers; /* number of containers */
573 
574     uint32_t n_array_containers;  /* number of array containers */
575     uint32_t n_run_containers;    /* number of run containers */
576     uint32_t n_bitset_containers; /* number of bitmap containers */
577 
578     uint32_t
579         n_values_array_containers;    /* number of values in array containers */
580     uint32_t n_values_run_containers; /* number of values in run containers */
581     uint32_t
582         n_values_bitset_containers; /* number of values in  bitmap containers */
583 
584     uint32_t n_bytes_array_containers;  /* number of allocated bytes in array
585                                            containers */
586     uint32_t n_bytes_run_containers;    /* number of allocated bytes in run
587                                            containers */
588     uint32_t n_bytes_bitset_containers; /* number of allocated bytes in  bitmap
589                                            containers */
590 
591     uint32_t
592         max_value; /* the maximal value, undefined if cardinality is zero */
593     uint32_t
594         min_value; /* the minimal value, undefined if cardinality is zero */
595     uint64_t sum_value; /* the sum of all values (could be used to compute
596                            average) */
597 
598     uint64_t cardinality; /* total number of values stored in the bitmap */
599 
600     // and n_values_arrays, n_values_rle, n_values_bitmap
601 } roaring_statistics_t;
602 
603 #endif /* ROARING_TYPES_H */
604 /* end file include/roaring/roaring_types.h */
605 /* begin file include/roaring/utilasm.h */
606 /*
607  * utilasm.h
608  *
609  */
610 
611 #ifndef INCLUDE_UTILASM_H_
612 #define INCLUDE_UTILASM_H_
613 
614 
615 #if defined(USE_BMI) & defined(ROARING_INLINE_ASM)
616 #define ASMBITMANIPOPTIMIZATION  // optimization flag
617 
618 #define ASM_SHIFT_RIGHT(srcReg, bitsReg, destReg) \
619     __asm volatile("shrx %1, %2, %0"              \
620                    : "=r"(destReg)                \
621                    :             /* write */      \
622                    "r"(bitsReg), /* read only */  \
623                    "r"(srcReg)   /* read only */  \
624                    )
625 
626 #define ASM_INPLACESHIFT_RIGHT(srcReg, bitsReg)  \
627     __asm volatile("shrx %1, %0, %0"             \
628                    : "+r"(srcReg)                \
629                    :            /* read/write */ \
630                    "r"(bitsReg) /* read only */  \
631                    )
632 
633 #define ASM_SHIFT_LEFT(srcReg, bitsReg, destReg) \
634     __asm volatile("shlx %1, %2, %0"             \
635                    : "=r"(destReg)               \
636                    :             /* write */     \
637                    "r"(bitsReg), /* read only */ \
638                    "r"(srcReg)   /* read only */ \
639                    )
640 // set bit at position testBit within testByte to 1 and
641 // copy cmovDst to cmovSrc if that bit was previously clear
642 #define ASM_SET_BIT_INC_WAS_CLEAR(testByte, testBit, count) \
643     __asm volatile(                                         \
644         "bts %2, %0\n"                                      \
645         "sbb $-1, %1\n"                                     \
646         : "+r"(testByte), /* read/write */                  \
647           "+r"(count)                                       \
648         :            /* read/write */                       \
649         "r"(testBit) /* read only */                        \
650         )
651 
652 #define ASM_CLEAR_BIT_DEC_WAS_SET(testByte, testBit, count) \
653     __asm volatile(                                         \
654         "btr %2, %0\n"                                      \
655         "sbb $0, %1\n"                                      \
656         : "+r"(testByte), /* read/write */                  \
657           "+r"(count)                                       \
658         :            /* read/write */                       \
659         "r"(testBit) /* read only */                        \
660         )
661 
662 #define ASM_BT64(testByte, testBit, count) \
663     __asm volatile(                        \
664         "bt %2,%1\n"                       \
665         "sbb %0,%0" /*could use setb */    \
666         : "=r"(count)                      \
667         :              /* write */         \
668         "r"(testByte), /* read only */     \
669         "r"(testBit)   /* read only */     \
670         )
671 
672 #endif  // USE_BMI
673 #endif  /* INCLUDE_UTILASM_H_ */
674 /* end file include/roaring/utilasm.h */
675 /* begin file include/roaring/bitset_util.h */
676 #ifndef BITSET_UTIL_H
677 #define BITSET_UTIL_H
678 
679 #include <stdint.h>
680 
681 
682 /*
683  * Set all bits in indexes [begin,end) to true.
684  */
bitset_set_range(uint64_t * bitmap,uint32_t start,uint32_t end)685 static inline void bitset_set_range(uint64_t *bitmap, uint32_t start,
686                                     uint32_t end) {
687     if (start == end) return;
688     uint32_t firstword = start / 64;
689     uint32_t endword = (end - 1) / 64;
690     if (firstword == endword) {
691         bitmap[firstword] |= ((~UINT64_C(0)) << (start % 64)) &
692                              ((~UINT64_C(0)) >> ((~end + 1) % 64));
693         return;
694     }
695     bitmap[firstword] |= (~UINT64_C(0)) << (start % 64);
696     for (uint32_t i = firstword + 1; i < endword; i++) bitmap[i] = ~UINT64_C(0);
697     bitmap[endword] |= (~UINT64_C(0)) >> ((~end + 1) % 64);
698 }
699 
700 
701 /*
702  * Find the cardinality of the bitset in [begin,begin+lenminusone]
703  */
bitset_lenrange_cardinality(uint64_t * bitmap,uint32_t start,uint32_t lenminusone)704 static inline int bitset_lenrange_cardinality(uint64_t *bitmap, uint32_t start,
705                                               uint32_t lenminusone) {
706     uint32_t firstword = start / 64;
707     uint32_t endword = (start + lenminusone) / 64;
708     if (firstword == endword) {
709         return hamming(bitmap[firstword] &
710                        ((~UINT64_C(0)) >> ((63 - lenminusone) % 64))
711                            << (start % 64));
712     }
713     int answer = hamming(bitmap[firstword] & ((~UINT64_C(0)) << (start % 64)));
714     for (uint32_t i = firstword + 1; i < endword; i++) {
715         answer += hamming(bitmap[i]);
716     }
717     answer +=
718         hamming(bitmap[endword] &
719                 (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64));
720     return answer;
721 }
722 
723 /*
724  * Check whether the cardinality of the bitset in [begin,begin+lenminusone] is 0
725  */
bitset_lenrange_empty(uint64_t * bitmap,uint32_t start,uint32_t lenminusone)726 static inline bool bitset_lenrange_empty(uint64_t *bitmap, uint32_t start,
727         uint32_t lenminusone) {
728     uint32_t firstword = start / 64;
729     uint32_t endword = (start + lenminusone) / 64;
730     if (firstword == endword) {
731       return (bitmap[firstword] & ((~UINT64_C(0)) >> ((63 - lenminusone) % 64))
732               << (start % 64)) == 0;
733     }
734     if(((bitmap[firstword] & ((~UINT64_C(0)) << (start%64)))) != 0) return false;
735     for (uint32_t i = firstword + 1; i < endword; i++) {
736      if(bitmap[i] != 0) return false;
737     }
738     if((bitmap[endword] & (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64)) != 0) return false;
739     return true;
740 }
741 
742 
743 /*
744  * Set all bits in indexes [begin,begin+lenminusone] to true.
745  */
bitset_set_lenrange(uint64_t * bitmap,uint32_t start,uint32_t lenminusone)746 static inline void bitset_set_lenrange(uint64_t *bitmap, uint32_t start,
747                                        uint32_t lenminusone) {
748     uint32_t firstword = start / 64;
749     uint32_t endword = (start + lenminusone) / 64;
750     if (firstword == endword) {
751         bitmap[firstword] |= ((~UINT64_C(0)) >> ((63 - lenminusone) % 64))
752                              << (start % 64);
753         return;
754     }
755     uint64_t temp = bitmap[endword];
756     bitmap[firstword] |= (~UINT64_C(0)) << (start % 64);
757     for (uint32_t i = firstword + 1; i < endword; i += 2)
758         bitmap[i] = bitmap[i + 1] = ~UINT64_C(0);
759     bitmap[endword] =
760         temp | (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64);
761 }
762 
763 /*
764  * Flip all the bits in indexes [begin,end).
765  */
bitset_flip_range(uint64_t * bitmap,uint32_t start,uint32_t end)766 static inline void bitset_flip_range(uint64_t *bitmap, uint32_t start,
767                                      uint32_t end) {
768     if (start == end) return;
769     uint32_t firstword = start / 64;
770     uint32_t endword = (end - 1) / 64;
771     bitmap[firstword] ^= ~((~UINT64_C(0)) << (start % 64));
772     for (uint32_t i = firstword; i < endword; i++) bitmap[i] = ~bitmap[i];
773     bitmap[endword] ^= ((~UINT64_C(0)) >> ((~end + 1) % 64));
774 }
775 
776 /*
777  * Set all bits in indexes [begin,end) to false.
778  */
bitset_reset_range(uint64_t * bitmap,uint32_t start,uint32_t end)779 static inline void bitset_reset_range(uint64_t *bitmap, uint32_t start,
780                                       uint32_t end) {
781     if (start == end) return;
782     uint32_t firstword = start / 64;
783     uint32_t endword = (end - 1) / 64;
784     if (firstword == endword) {
785         bitmap[firstword] &= ~(((~UINT64_C(0)) << (start % 64)) &
786                                ((~UINT64_C(0)) >> ((~end + 1) % 64)));
787         return;
788     }
789     bitmap[firstword] &= ~((~UINT64_C(0)) << (start % 64));
790     for (uint32_t i = firstword + 1; i < endword; i++) bitmap[i] = UINT64_C(0);
791     bitmap[endword] &= ~((~UINT64_C(0)) >> ((~end + 1) % 64));
792 }
793 
794 /*
795  * Given a bitset containing "length" 64-bit words, write out the position
796  * of all the set bits to "out", values start at "base".
797  *
798  * The "out" pointer should be sufficient to store the actual number of bits
799  * set.
800  *
801  * Returns how many values were actually decoded.
802  *
803  * This function should only be expected to be faster than
804  * bitset_extract_setbits
805  * when the density of the bitset is high.
806  *
807  * This function uses AVX2 decoding.
808  */
809 size_t bitset_extract_setbits_avx2(uint64_t *bitset, size_t length, void *vout,
810                                    size_t outcapacity, uint32_t base);
811 
812 /*
813  * Given a bitset containing "length" 64-bit words, write out the position
814  * of all the set bits to "out", values start at "base".
815  *
816  * The "out" pointer should be sufficient to store the actual number of bits
817  *set.
818  *
819  * Returns how many values were actually decoded.
820  */
821 size_t bitset_extract_setbits(uint64_t *bitset, size_t length, void *vout,
822                               uint32_t base);
823 
824 /*
825  * Given a bitset containing "length" 64-bit words, write out the position
826  * of all the set bits to "out" as 16-bit integers, values start at "base" (can
827  *be set to zero)
828  *
829  * The "out" pointer should be sufficient to store the actual number of bits
830  *set.
831  *
832  * Returns how many values were actually decoded.
833  *
834  * This function should only be expected to be faster than
835  *bitset_extract_setbits_uint16
836  * when the density of the bitset is high.
837  *
838  * This function uses SSE decoding.
839  */
840 size_t bitset_extract_setbits_sse_uint16(const uint64_t *bitset, size_t length,
841                                          uint16_t *out, size_t outcapacity,
842                                          uint16_t base);
843 
844 /*
845  * Given a bitset containing "length" 64-bit words, write out the position
846  * of all the set bits to "out",  values start at "base"
847  * (can be set to zero)
848  *
849  * The "out" pointer should be sufficient to store the actual number of bits
850  *set.
851  *
852  * Returns how many values were actually decoded.
853  */
854 size_t bitset_extract_setbits_uint16(const uint64_t *bitset, size_t length,
855                                      uint16_t *out, uint16_t base);
856 
857 /*
858  * Given two bitsets containing "length" 64-bit words, write out the position
859  * of all the common set bits to "out", values start at "base"
860  * (can be set to zero)
861  *
862  * The "out" pointer should be sufficient to store the actual number of bits
863  * set.
864  *
865  * Returns how many values were actually decoded.
866  */
867 size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ bitset1,
868                                                   const uint64_t * __restrict__ bitset2,
869                                                   size_t length, uint16_t *out,
870                                                   uint16_t base);
871 
872 /*
873  * Given a bitset having cardinality card, set all bit values in the list (there
874  * are length of them)
875  * and return the updated cardinality. This evidently assumes that the bitset
876  * already contained data.
877  */
878 uint64_t bitset_set_list_withcard(void *bitset, uint64_t card,
879                                   const uint16_t *list, uint64_t length);
880 /*
881  * Given a bitset, set all bit values in the list (there
882  * are length of them).
883  */
884 void bitset_set_list(void *bitset, const uint16_t *list, uint64_t length);
885 
886 /*
887  * Given a bitset having cardinality card, unset all bit values in the list
888  * (there are length of them)
889  * and return the updated cardinality. This evidently assumes that the bitset
890  * already contained data.
891  */
892 uint64_t bitset_clear_list(void *bitset, uint64_t card, const uint16_t *list,
893                            uint64_t length);
894 
895 /*
896  * Given a bitset having cardinality card, toggle all bit values in the list
897  * (there are length of them)
898  * and return the updated cardinality. This evidently assumes that the bitset
899  * already contained data.
900  */
901 
902 uint64_t bitset_flip_list_withcard(void *bitset, uint64_t card,
903                                    const uint16_t *list, uint64_t length);
904 
905 void bitset_flip_list(void *bitset, const uint16_t *list, uint64_t length);
906 
907 #ifdef USEAVX
908 /***
909  * BEGIN Harley-Seal popcount functions.
910  */
911 
912 /**
913  * Compute the population count of a 256-bit word
914  * This is not especially fast, but it is convenient as part of other functions.
915  */
popcount256(__m256i v)916 static inline __m256i popcount256(__m256i v) {
917     const __m256i lookuppos = _mm256_setr_epi8(
918         /* 0 */ 4 + 0, /* 1 */ 4 + 1, /* 2 */ 4 + 1, /* 3 */ 4 + 2,
919         /* 4 */ 4 + 1, /* 5 */ 4 + 2, /* 6 */ 4 + 2, /* 7 */ 4 + 3,
920         /* 8 */ 4 + 1, /* 9 */ 4 + 2, /* a */ 4 + 2, /* b */ 4 + 3,
921         /* c */ 4 + 2, /* d */ 4 + 3, /* e */ 4 + 3, /* f */ 4 + 4,
922 
923         /* 0 */ 4 + 0, /* 1 */ 4 + 1, /* 2 */ 4 + 1, /* 3 */ 4 + 2,
924         /* 4 */ 4 + 1, /* 5 */ 4 + 2, /* 6 */ 4 + 2, /* 7 */ 4 + 3,
925         /* 8 */ 4 + 1, /* 9 */ 4 + 2, /* a */ 4 + 2, /* b */ 4 + 3,
926         /* c */ 4 + 2, /* d */ 4 + 3, /* e */ 4 + 3, /* f */ 4 + 4);
927     const __m256i lookupneg = _mm256_setr_epi8(
928         /* 0 */ 4 - 0, /* 1 */ 4 - 1, /* 2 */ 4 - 1, /* 3 */ 4 - 2,
929         /* 4 */ 4 - 1, /* 5 */ 4 - 2, /* 6 */ 4 - 2, /* 7 */ 4 - 3,
930         /* 8 */ 4 - 1, /* 9 */ 4 - 2, /* a */ 4 - 2, /* b */ 4 - 3,
931         /* c */ 4 - 2, /* d */ 4 - 3, /* e */ 4 - 3, /* f */ 4 - 4,
932 
933         /* 0 */ 4 - 0, /* 1 */ 4 - 1, /* 2 */ 4 - 1, /* 3 */ 4 - 2,
934         /* 4 */ 4 - 1, /* 5 */ 4 - 2, /* 6 */ 4 - 2, /* 7 */ 4 - 3,
935         /* 8 */ 4 - 1, /* 9 */ 4 - 2, /* a */ 4 - 2, /* b */ 4 - 3,
936         /* c */ 4 - 2, /* d */ 4 - 3, /* e */ 4 - 3, /* f */ 4 - 4);
937     const __m256i low_mask = _mm256_set1_epi8(0x0f);
938 
939     const __m256i lo = _mm256_and_si256(v, low_mask);
940     const __m256i hi = _mm256_and_si256(_mm256_srli_epi16(v, 4), low_mask);
941     const __m256i popcnt1 = _mm256_shuffle_epi8(lookuppos, lo);
942     const __m256i popcnt2 = _mm256_shuffle_epi8(lookupneg, hi);
943     return _mm256_sad_epu8(popcnt1, popcnt2);
944 }
945 
946 /**
947  * Simple CSA over 256 bits
948  */
CSA(__m256i * h,__m256i * l,__m256i a,__m256i b,__m256i c)949 static inline void CSA(__m256i *h, __m256i *l, __m256i a, __m256i b,
950                        __m256i c) {
951     const __m256i u = _mm256_xor_si256(a, b);
952     *h = _mm256_or_si256(_mm256_and_si256(a, b), _mm256_and_si256(u, c));
953     *l = _mm256_xor_si256(u, c);
954 }
955 
956 /**
957  * Fast Harley-Seal AVX population count function
958  */
avx2_harley_seal_popcount256(const __m256i * data,const uint64_t size)959 inline static uint64_t avx2_harley_seal_popcount256(const __m256i *data,
960                                                     const uint64_t size) {
961     __m256i total = _mm256_setzero_si256();
962     __m256i ones = _mm256_setzero_si256();
963     __m256i twos = _mm256_setzero_si256();
964     __m256i fours = _mm256_setzero_si256();
965     __m256i eights = _mm256_setzero_si256();
966     __m256i sixteens = _mm256_setzero_si256();
967     __m256i twosA, twosB, foursA, foursB, eightsA, eightsB;
968 
969     const uint64_t limit = size - size % 16;
970     uint64_t i = 0;
971 
972     for (; i < limit; i += 16) {
973         CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i),
974             _mm256_lddqu_si256(data + i + 1));
975         CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 2),
976             _mm256_lddqu_si256(data + i + 3));
977         CSA(&foursA, &twos, twos, twosA, twosB);
978         CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 4),
979             _mm256_lddqu_si256(data + i + 5));
980         CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 6),
981             _mm256_lddqu_si256(data + i + 7));
982         CSA(&foursB, &twos, twos, twosA, twosB);
983         CSA(&eightsA, &fours, fours, foursA, foursB);
984         CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 8),
985             _mm256_lddqu_si256(data + i + 9));
986         CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 10),
987             _mm256_lddqu_si256(data + i + 11));
988         CSA(&foursA, &twos, twos, twosA, twosB);
989         CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 12),
990             _mm256_lddqu_si256(data + i + 13));
991         CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 14),
992             _mm256_lddqu_si256(data + i + 15));
993         CSA(&foursB, &twos, twos, twosA, twosB);
994         CSA(&eightsB, &fours, fours, foursA, foursB);
995         CSA(&sixteens, &eights, eights, eightsA, eightsB);
996 
997         total = _mm256_add_epi64(total, popcount256(sixteens));
998     }
999 
1000     total = _mm256_slli_epi64(total, 4);  // * 16
1001     total = _mm256_add_epi64(
1002         total, _mm256_slli_epi64(popcount256(eights), 3));  // += 8 * ...
1003     total = _mm256_add_epi64(
1004         total, _mm256_slli_epi64(popcount256(fours), 2));  // += 4 * ...
1005     total = _mm256_add_epi64(
1006         total, _mm256_slli_epi64(popcount256(twos), 1));  // += 2 * ...
1007     total = _mm256_add_epi64(total, popcount256(ones));
1008     for (; i < size; i++)
1009         total =
1010             _mm256_add_epi64(total, popcount256(_mm256_lddqu_si256(data + i)));
1011 
1012     return (uint64_t)(_mm256_extract_epi64(total, 0)) +
1013            (uint64_t)(_mm256_extract_epi64(total, 1)) +
1014            (uint64_t)(_mm256_extract_epi64(total, 2)) +
1015            (uint64_t)(_mm256_extract_epi64(total, 3));
1016 }
1017 
1018 #define AVXPOPCNTFNC(opname, avx_intrinsic)                                    \
1019     static inline uint64_t avx2_harley_seal_popcount256_##opname(              \
1020         const __m256i *data1, const __m256i *data2, const uint64_t size) {     \
1021         __m256i total = _mm256_setzero_si256();                                \
1022         __m256i ones = _mm256_setzero_si256();                                 \
1023         __m256i twos = _mm256_setzero_si256();                                 \
1024         __m256i fours = _mm256_setzero_si256();                                \
1025         __m256i eights = _mm256_setzero_si256();                               \
1026         __m256i sixteens = _mm256_setzero_si256();                             \
1027         __m256i twosA, twosB, foursA, foursB, eightsA, eightsB;                \
1028         __m256i A1, A2;                                                        \
1029         const uint64_t limit = size - size % 16;                               \
1030         uint64_t i = 0;                                                        \
1031         for (; i < limit; i += 16) {                                           \
1032             A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i),                  \
1033                                _mm256_lddqu_si256(data2 + i));                 \
1034             A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 1),              \
1035                                _mm256_lddqu_si256(data2 + i + 1));             \
1036             CSA(&twosA, &ones, ones, A1, A2);                                  \
1037             A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 2),              \
1038                                _mm256_lddqu_si256(data2 + i + 2));             \
1039             A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 3),              \
1040                                _mm256_lddqu_si256(data2 + i + 3));             \
1041             CSA(&twosB, &ones, ones, A1, A2);                                  \
1042             CSA(&foursA, &twos, twos, twosA, twosB);                           \
1043             A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 4),              \
1044                                _mm256_lddqu_si256(data2 + i + 4));             \
1045             A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 5),              \
1046                                _mm256_lddqu_si256(data2 + i + 5));             \
1047             CSA(&twosA, &ones, ones, A1, A2);                                  \
1048             A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 6),              \
1049                                _mm256_lddqu_si256(data2 + i + 6));             \
1050             A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 7),              \
1051                                _mm256_lddqu_si256(data2 + i + 7));             \
1052             CSA(&twosB, &ones, ones, A1, A2);                                  \
1053             CSA(&foursB, &twos, twos, twosA, twosB);                           \
1054             CSA(&eightsA, &fours, fours, foursA, foursB);                      \
1055             A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 8),              \
1056                                _mm256_lddqu_si256(data2 + i + 8));             \
1057             A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 9),              \
1058                                _mm256_lddqu_si256(data2 + i + 9));             \
1059             CSA(&twosA, &ones, ones, A1, A2);                                  \
1060             A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 10),             \
1061                                _mm256_lddqu_si256(data2 + i + 10));            \
1062             A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 11),             \
1063                                _mm256_lddqu_si256(data2 + i + 11));            \
1064             CSA(&twosB, &ones, ones, A1, A2);                                  \
1065             CSA(&foursA, &twos, twos, twosA, twosB);                           \
1066             A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 12),             \
1067                                _mm256_lddqu_si256(data2 + i + 12));            \
1068             A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 13),             \
1069                                _mm256_lddqu_si256(data2 + i + 13));            \
1070             CSA(&twosA, &ones, ones, A1, A2);                                  \
1071             A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 14),             \
1072                                _mm256_lddqu_si256(data2 + i + 14));            \
1073             A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 15),             \
1074                                _mm256_lddqu_si256(data2 + i + 15));            \
1075             CSA(&twosB, &ones, ones, A1, A2);                                  \
1076             CSA(&foursB, &twos, twos, twosA, twosB);                           \
1077             CSA(&eightsB, &fours, fours, foursA, foursB);                      \
1078             CSA(&sixteens, &eights, eights, eightsA, eightsB);                 \
1079             total = _mm256_add_epi64(total, popcount256(sixteens));            \
1080         }                                                                      \
1081         total = _mm256_slli_epi64(total, 4);                                   \
1082         total = _mm256_add_epi64(total,                                        \
1083                                  _mm256_slli_epi64(popcount256(eights), 3));   \
1084         total =                                                                \
1085             _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(fours), 2)); \
1086         total =                                                                \
1087             _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(twos), 1));  \
1088         total = _mm256_add_epi64(total, popcount256(ones));                    \
1089         for (; i < size; i++) {                                                \
1090             A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i),                  \
1091                                _mm256_lddqu_si256(data2 + i));                 \
1092             total = _mm256_add_epi64(total, popcount256(A1));                  \
1093         }                                                                      \
1094         return (uint64_t)(_mm256_extract_epi64(total, 0)) +                    \
1095                (uint64_t)(_mm256_extract_epi64(total, 1)) +                    \
1096                (uint64_t)(_mm256_extract_epi64(total, 2)) +                    \
1097                (uint64_t)(_mm256_extract_epi64(total, 3));                     \
1098     }                                                                          \
1099     static inline uint64_t avx2_harley_seal_popcount256andstore_##opname(      \
1100         const __m256i *__restrict__ data1, const __m256i *__restrict__ data2,  \
1101         __m256i *__restrict__ out, const uint64_t size) {                      \
1102         __m256i total = _mm256_setzero_si256();                                \
1103         __m256i ones = _mm256_setzero_si256();                                 \
1104         __m256i twos = _mm256_setzero_si256();                                 \
1105         __m256i fours = _mm256_setzero_si256();                                \
1106         __m256i eights = _mm256_setzero_si256();                               \
1107         __m256i sixteens = _mm256_setzero_si256();                             \
1108         __m256i twosA, twosB, foursA, foursB, eightsA, eightsB;                \
1109         __m256i A1, A2;                                                        \
1110         const uint64_t limit = size - size % 16;                               \
1111         uint64_t i = 0;                                                        \
1112         for (; i < limit; i += 16) {                                           \
1113             A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i),                  \
1114                                _mm256_lddqu_si256(data2 + i));                 \
1115             _mm256_storeu_si256(out + i, A1);                                  \
1116             A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 1),              \
1117                                _mm256_lddqu_si256(data2 + i + 1));             \
1118             _mm256_storeu_si256(out + i + 1, A2);                              \
1119             CSA(&twosA, &ones, ones, A1, A2);                                  \
1120             A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 2),              \
1121                                _mm256_lddqu_si256(data2 + i + 2));             \
1122             _mm256_storeu_si256(out + i + 2, A1);                              \
1123             A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 3),              \
1124                                _mm256_lddqu_si256(data2 + i + 3));             \
1125             _mm256_storeu_si256(out + i + 3, A2);                              \
1126             CSA(&twosB, &ones, ones, A1, A2);                                  \
1127             CSA(&foursA, &twos, twos, twosA, twosB);                           \
1128             A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 4),              \
1129                                _mm256_lddqu_si256(data2 + i + 4));             \
1130             _mm256_storeu_si256(out + i + 4, A1);                              \
1131             A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 5),              \
1132                                _mm256_lddqu_si256(data2 + i + 5));             \
1133             _mm256_storeu_si256(out + i + 5, A2);                              \
1134             CSA(&twosA, &ones, ones, A1, A2);                                  \
1135             A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 6),              \
1136                                _mm256_lddqu_si256(data2 + i + 6));             \
1137             _mm256_storeu_si256(out + i + 6, A1);                              \
1138             A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 7),              \
1139                                _mm256_lddqu_si256(data2 + i + 7));             \
1140             _mm256_storeu_si256(out + i + 7, A2);                              \
1141             CSA(&twosB, &ones, ones, A1, A2);                                  \
1142             CSA(&foursB, &twos, twos, twosA, twosB);                           \
1143             CSA(&eightsA, &fours, fours, foursA, foursB);                      \
1144             A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 8),              \
1145                                _mm256_lddqu_si256(data2 + i + 8));             \
1146             _mm256_storeu_si256(out + i + 8, A1);                              \
1147             A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 9),              \
1148                                _mm256_lddqu_si256(data2 + i + 9));             \
1149             _mm256_storeu_si256(out + i + 9, A2);                              \
1150             CSA(&twosA, &ones, ones, A1, A2);                                  \
1151             A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 10),             \
1152                                _mm256_lddqu_si256(data2 + i + 10));            \
1153             _mm256_storeu_si256(out + i + 10, A1);                             \
1154             A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 11),             \
1155                                _mm256_lddqu_si256(data2 + i + 11));            \
1156             _mm256_storeu_si256(out + i + 11, A2);                             \
1157             CSA(&twosB, &ones, ones, A1, A2);                                  \
1158             CSA(&foursA, &twos, twos, twosA, twosB);                           \
1159             A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 12),             \
1160                                _mm256_lddqu_si256(data2 + i + 12));            \
1161             _mm256_storeu_si256(out + i + 12, A1);                             \
1162             A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 13),             \
1163                                _mm256_lddqu_si256(data2 + i + 13));            \
1164             _mm256_storeu_si256(out + i + 13, A2);                             \
1165             CSA(&twosA, &ones, ones, A1, A2);                                  \
1166             A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 14),             \
1167                                _mm256_lddqu_si256(data2 + i + 14));            \
1168             _mm256_storeu_si256(out + i + 14, A1);                             \
1169             A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 15),             \
1170                                _mm256_lddqu_si256(data2 + i + 15));            \
1171             _mm256_storeu_si256(out + i + 15, A2);                             \
1172             CSA(&twosB, &ones, ones, A1, A2);                                  \
1173             CSA(&foursB, &twos, twos, twosA, twosB);                           \
1174             CSA(&eightsB, &fours, fours, foursA, foursB);                      \
1175             CSA(&sixteens, &eights, eights, eightsA, eightsB);                 \
1176             total = _mm256_add_epi64(total, popcount256(sixteens));            \
1177         }                                                                      \
1178         total = _mm256_slli_epi64(total, 4);                                   \
1179         total = _mm256_add_epi64(total,                                        \
1180                                  _mm256_slli_epi64(popcount256(eights), 3));   \
1181         total =                                                                \
1182             _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(fours), 2)); \
1183         total =                                                                \
1184             _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(twos), 1));  \
1185         total = _mm256_add_epi64(total, popcount256(ones));                    \
1186         for (; i < size; i++) {                                                \
1187             A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i),                  \
1188                                _mm256_lddqu_si256(data2 + i));                 \
1189             _mm256_storeu_si256(out + i, A1);                                  \
1190             total = _mm256_add_epi64(total, popcount256(A1));                  \
1191         }                                                                      \
1192         return (uint64_t)(_mm256_extract_epi64(total, 0)) +                    \
1193                (uint64_t)(_mm256_extract_epi64(total, 1)) +                    \
1194                (uint64_t)(_mm256_extract_epi64(total, 2)) +                    \
1195                (uint64_t)(_mm256_extract_epi64(total, 3));                     \
1196     }
1197 
1198 AVXPOPCNTFNC(or, _mm256_or_si256)
1199 AVXPOPCNTFNC(union, _mm256_or_si256)
1200 AVXPOPCNTFNC(and, _mm256_and_si256)
1201 AVXPOPCNTFNC(intersection, _mm256_and_si256)
1202 AVXPOPCNTFNC (xor, _mm256_xor_si256)
1203 AVXPOPCNTFNC(andnot, _mm256_andnot_si256)
1204 
1205 /***
1206  * END Harley-Seal popcount functions.
1207  */
1208 
1209 #endif  // USEAVX
1210 
1211 #endif
1212 /* end file include/roaring/bitset_util.h */
1213 /* begin file include/roaring/containers/array.h */
1214 /*
1215  * array.h
1216  *
1217  */
1218 
1219 #ifndef INCLUDE_CONTAINERS_ARRAY_H_
1220 #define INCLUDE_CONTAINERS_ARRAY_H_
1221 
1222 #include <string.h>
1223 
1224 
1225 /* Containers with DEFAULT_MAX_SIZE or less integers should be arrays */
1226 enum { DEFAULT_MAX_SIZE = 4096 };
1227 
1228 /* struct array_container - sparse representation of a bitmap
1229  *
1230  * @cardinality: number of indices in `array` (and the bitmap)
1231  * @capacity:    allocated size of `array`
1232  * @array:       sorted list of integers
1233  */
1234 struct array_container_s {
1235     int32_t cardinality;
1236     int32_t capacity;
1237     uint16_t *array;
1238 };
1239 
1240 typedef struct array_container_s array_container_t;
1241 
1242 /* Create a new array with default. Return NULL in case of failure. See also
1243  * array_container_create_given_capacity. */
1244 array_container_t *array_container_create(void);
1245 
1246 /* Create a new array with a specified capacity size. Return NULL in case of
1247  * failure. */
1248 array_container_t *array_container_create_given_capacity(int32_t size);
1249 
1250 /* Create a new array containing all values in [min,max). */
1251 array_container_t * array_container_create_range(uint32_t min, uint32_t max);
1252 
1253 /*
1254  * Shrink the capacity to the actual size, return the number of bytes saved.
1255  */
1256 int array_container_shrink_to_fit(array_container_t *src);
1257 
1258 /* Free memory owned by `array'. */
1259 void array_container_free(array_container_t *array);
1260 
1261 /* Duplicate container */
1262 array_container_t *array_container_clone(const array_container_t *src);
1263 
1264 int32_t array_container_serialize(const array_container_t *container,
1265                                   char *buf) WARN_UNUSED;
1266 
1267 uint32_t array_container_serialization_len(const array_container_t *container);
1268 
1269 void *array_container_deserialize(const char *buf, size_t buf_len);
1270 
1271 /* Get the cardinality of `array'. */
array_container_cardinality(const array_container_t * array)1272 static inline int array_container_cardinality(const array_container_t *array) {
1273     return array->cardinality;
1274 }
1275 
array_container_nonzero_cardinality(const array_container_t * array)1276 static inline bool array_container_nonzero_cardinality(
1277     const array_container_t *array) {
1278     return array->cardinality > 0;
1279 }
1280 
1281 /* Copy one container into another. We assume that they are distinct. */
1282 void array_container_copy(const array_container_t *src, array_container_t *dst);
1283 
1284 /*  Add all the values in [min,max) (included) at a distance k*step from min.
1285     The container must have a size less or equal to DEFAULT_MAX_SIZE after this
1286    addition. */
1287 void array_container_add_from_range(array_container_t *arr, uint32_t min,
1288                                     uint32_t max, uint16_t step);
1289 
1290 /* Set the cardinality to zero (does not release memory). */
array_container_clear(array_container_t * array)1291 static inline void array_container_clear(array_container_t *array) {
1292     array->cardinality = 0;
1293 }
1294 
array_container_empty(const array_container_t * array)1295 static inline bool array_container_empty(const array_container_t *array) {
1296     return array->cardinality == 0;
1297 }
1298 
1299 /* check whether the cardinality is equal to the capacity (this does not mean
1300 * that it contains 1<<16 elements) */
array_container_full(const array_container_t * array)1301 static inline bool array_container_full(const array_container_t *array) {
1302     return array->cardinality == array->capacity;
1303 }
1304 
1305 
1306 /* Compute the union of `src_1' and `src_2' and write the result to `dst'
1307  * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
1308 void array_container_union(const array_container_t *src_1,
1309                            const array_container_t *src_2,
1310                            array_container_t *dst);
1311 
1312 /* symmetric difference, see array_container_union */
1313 void array_container_xor(const array_container_t *array_1,
1314                          const array_container_t *array_2,
1315                          array_container_t *out);
1316 
1317 /* Computes the intersection of src_1 and src_2 and write the result to
1318  * dst. It is assumed that dst is distinct from both src_1 and src_2. */
1319 void array_container_intersection(const array_container_t *src_1,
1320                                   const array_container_t *src_2,
1321                                   array_container_t *dst);
1322 
1323 /* Check whether src_1 and src_2 intersect. */
1324 bool array_container_intersect(const array_container_t *src_1,
1325                                   const array_container_t *src_2);
1326 
1327 
1328 /* computers the size of the intersection between two arrays.
1329  */
1330 int array_container_intersection_cardinality(const array_container_t *src_1,
1331                                              const array_container_t *src_2);
1332 
1333 /* computes the intersection of array1 and array2 and write the result to
1334  * array1.
1335  * */
1336 void array_container_intersection_inplace(array_container_t *src_1,
1337                                           const array_container_t *src_2);
1338 
1339 /*
1340  * Write out the 16-bit integers contained in this container as a list of 32-bit
1341  * integers using base
1342  * as the starting value (it might be expected that base has zeros in its 16
1343  * least significant bits).
1344  * The function returns the number of values written.
1345  * The caller is responsible for allocating enough memory in out.
1346  */
1347 int array_container_to_uint32_array(void *vout, const array_container_t *cont,
1348                                     uint32_t base);
1349 
1350 /* Compute the number of runs */
1351 int32_t array_container_number_of_runs(const array_container_t *a);
1352 
1353 /*
1354  * Print this container using printf (useful for debugging).
1355  */
1356 void array_container_printf(const array_container_t *v);
1357 
1358 /*
1359  * Print this container using printf as a comma-separated list of 32-bit
1360  * integers starting at base.
1361  */
1362 void array_container_printf_as_uint32_array(const array_container_t *v,
1363                                             uint32_t base);
1364 
1365 /**
1366  * Return the serialized size in bytes of a container having cardinality "card".
1367  */
array_container_serialized_size_in_bytes(int32_t card)1368 static inline int32_t array_container_serialized_size_in_bytes(int32_t card) {
1369     return card * 2 + 2;
1370 }
1371 
1372 /**
1373  * Increase capacity to at least min.
1374  * Whether the existing data needs to be copied over depends on the "preserve"
1375  * parameter. If preserve is false, then the new content will be uninitialized,
1376  * otherwise the old content is copied.
1377  */
1378 void array_container_grow(array_container_t *container, int32_t min,
1379                           bool preserve);
1380 
1381 bool array_container_iterate(const array_container_t *cont, uint32_t base,
1382                              roaring_iterator iterator, void *ptr);
1383 bool array_container_iterate64(const array_container_t *cont, uint32_t base,
1384                                roaring_iterator64 iterator, uint64_t high_bits,
1385                                void *ptr);
1386 
1387 /**
1388  * Writes the underlying array to buf, outputs how many bytes were written.
1389  * This is meant to be byte-by-byte compatible with the Java and Go versions of
1390  * Roaring.
1391  * The number of bytes written should be
1392  * array_container_size_in_bytes(container).
1393  *
1394  */
1395 int32_t array_container_write(const array_container_t *container, char *buf);
1396 /**
1397  * Reads the instance from buf, outputs how many bytes were read.
1398  * This is meant to be byte-by-byte compatible with the Java and Go versions of
1399  * Roaring.
1400  * The number of bytes read should be array_container_size_in_bytes(container).
1401  * You need to provide the (known) cardinality.
1402  */
1403 int32_t array_container_read(int32_t cardinality, array_container_t *container,
1404                              const char *buf);
1405 
1406 /**
1407  * Return the serialized size in bytes of a container (see
1408  * bitset_container_write)
1409  * This is meant to be compatible with the Java and Go versions of Roaring and
1410  * assumes
1411  * that the cardinality of the container is already known.
1412  *
1413  */
array_container_size_in_bytes(const array_container_t * container)1414 static inline int32_t array_container_size_in_bytes(
1415     const array_container_t *container) {
1416     return container->cardinality * sizeof(uint16_t);
1417 }
1418 
1419 /**
1420  * Return true if the two arrays have the same content.
1421  */
array_container_equals(const array_container_t * container1,const array_container_t * container2)1422 static inline bool array_container_equals(
1423     const array_container_t *container1,
1424     const array_container_t *container2) {
1425 
1426     if (container1->cardinality != container2->cardinality) {
1427         return false;
1428     }
1429     return memequals(container1->array, container2->array, container1->cardinality*2);
1430 }
1431 
1432 /**
1433  * Return true if container1 is a subset of container2.
1434  */
1435 bool array_container_is_subset(const array_container_t *container1,
1436                                const array_container_t *container2);
1437 
1438 /**
1439  * If the element of given rank is in this container, supposing that the first
1440  * element has rank start_rank, then the function returns true and sets element
1441  * accordingly.
1442  * Otherwise, it returns false and update start_rank.
1443  */
array_container_select(const array_container_t * container,uint32_t * start_rank,uint32_t rank,uint32_t * element)1444 static inline bool array_container_select(const array_container_t *container,
1445                                           uint32_t *start_rank, uint32_t rank,
1446                                           uint32_t *element) {
1447     int card = array_container_cardinality(container);
1448     if (*start_rank + card <= rank) {
1449         *start_rank += card;
1450         return false;
1451     } else {
1452         *element = container->array[rank - *start_rank];
1453         return true;
1454     }
1455 }
1456 
1457 /* Computes the  difference of array1 and array2 and write the result
1458  * to array out.
1459  * Array out does not need to be distinct from array_1
1460  */
1461 void array_container_andnot(const array_container_t *array_1,
1462                             const array_container_t *array_2,
1463                             array_container_t *out);
1464 
1465 /* Append x to the set. Assumes that the value is larger than any preceding
1466  * values.  */
array_container_append(array_container_t * arr,uint16_t pos)1467 static inline void array_container_append(array_container_t *arr,
1468                                           uint16_t pos) {
1469     const int32_t capacity = arr->capacity;
1470 
1471     if (array_container_full(arr)) {
1472         array_container_grow(arr, capacity + 1, true);
1473     }
1474 
1475     arr->array[arr->cardinality++] = pos;
1476 }
1477 
1478 /**
1479  * Add value to the set if final cardinality doesn't exceed max_cardinality.
1480  * Return code:
1481  * 1  -- value was added
1482  * 0  -- value was already present
1483  * -1 -- value was not added because cardinality would exceed max_cardinality
1484  */
array_container_try_add(array_container_t * arr,uint16_t value,int32_t max_cardinality)1485 static inline int array_container_try_add(array_container_t *arr, uint16_t value,
1486                                           int32_t max_cardinality) {
1487     const int32_t cardinality = arr->cardinality;
1488 
1489     // best case, we can append.
1490     if ((array_container_empty(arr) || arr->array[cardinality - 1] < value) &&
1491         cardinality < max_cardinality) {
1492         array_container_append(arr, value);
1493         return 1;
1494     }
1495 
1496     const int32_t loc = binarySearch(arr->array, cardinality, value);
1497 
1498     if (loc >= 0) {
1499         return 0;
1500     } else if (cardinality < max_cardinality) {
1501         if (array_container_full(arr)) {
1502             array_container_grow(arr, arr->capacity + 1, true);
1503         }
1504         const int32_t insert_idx = -loc - 1;
1505         memmove(arr->array + insert_idx + 1, arr->array + insert_idx,
1506                 (cardinality - insert_idx) * sizeof(uint16_t));
1507         arr->array[insert_idx] = value;
1508         arr->cardinality++;
1509         return 1;
1510     } else {
1511         return -1;
1512     }
1513 }
1514 
1515 /* Add value to the set. Returns true if x was not already present.  */
array_container_add(array_container_t * arr,uint16_t value)1516 static inline bool array_container_add(array_container_t *arr, uint16_t value) {
1517     return array_container_try_add(arr, value, INT32_MAX) == 1;
1518 }
1519 
1520 /* Remove x from the set. Returns true if x was present.  */
array_container_remove(array_container_t * arr,uint16_t pos)1521 static inline bool array_container_remove(array_container_t *arr,
1522                                           uint16_t pos) {
1523     const int32_t idx = binarySearch(arr->array, arr->cardinality, pos);
1524     const bool is_present = idx >= 0;
1525     if (is_present) {
1526         memmove(arr->array + idx, arr->array + idx + 1,
1527                 (arr->cardinality - idx - 1) * sizeof(uint16_t));
1528         arr->cardinality--;
1529     }
1530 
1531     return is_present;
1532 }
1533 
1534 /* Check whether x is present.  */
array_container_contains(const array_container_t * arr,uint16_t pos)1535 static inline bool array_container_contains(const array_container_t *arr,
1536                                      uint16_t pos) {
1537     //    return binarySearch(arr->array, arr->cardinality, pos) >= 0;
1538     // binary search with fallback to linear search for short ranges
1539     int32_t low = 0;
1540     const uint16_t * carr = (const uint16_t *) arr->array;
1541     int32_t high = arr->cardinality - 1;
1542     //    while (high - low >= 0) {
1543     while(high >= low + 16) {
1544         int32_t middleIndex = (low + high)>>1;
1545         uint16_t middleValue = carr[middleIndex];
1546         if (middleValue < pos) {
1547             low = middleIndex + 1;
1548         } else if (middleValue > pos) {
1549             high = middleIndex - 1;
1550         } else {
1551             return true;
1552         }
1553     }
1554 
1555     for (int i=low; i <= high; i++) {
1556         uint16_t v = carr[i];
1557         if (v == pos) {
1558             return true;
1559         }
1560         if ( v > pos ) return false;
1561     }
1562     return false;
1563 
1564 }
1565 
1566 //* Check whether a range of values from range_start (included) to range_end (excluded) is present. */
array_container_contains_range(const array_container_t * arr,uint32_t range_start,uint32_t range_end)1567 static inline bool array_container_contains_range(const array_container_t *arr,
1568                                                     uint32_t range_start, uint32_t range_end) {
1569 
1570     const uint16_t rs_included = range_start;
1571     const uint16_t re_included = range_end - 1;
1572 
1573     const uint16_t *carr = (const uint16_t *) arr->array;
1574 
1575     const int32_t start = advanceUntil(carr, -1, arr->cardinality, rs_included);
1576     const int32_t end = advanceUntil(carr, start - 1, arr->cardinality, re_included);
1577 
1578     return (start < arr->cardinality) && (end < arr->cardinality)
1579             && (((uint16_t)(end - start)) == re_included - rs_included)
1580             && (carr[start] == rs_included) && (carr[end] == re_included);
1581 }
1582 
1583 /* Returns the smallest value (assumes not empty) */
array_container_minimum(const array_container_t * arr)1584 static inline uint16_t array_container_minimum(const array_container_t *arr) {
1585     if (arr->cardinality == 0) return 0;
1586     return arr->array[0];
1587 }
1588 
1589 /* Returns the largest value (assumes not empty) */
array_container_maximum(const array_container_t * arr)1590 static inline uint16_t array_container_maximum(const array_container_t *arr) {
1591     if (arr->cardinality == 0) return 0;
1592     return arr->array[arr->cardinality - 1];
1593 }
1594 
1595 /* Returns the number of values equal or smaller than x */
array_container_rank(const array_container_t * arr,uint16_t x)1596 static inline int array_container_rank(const array_container_t *arr, uint16_t x) {
1597     const int32_t idx = binarySearch(arr->array, arr->cardinality, x);
1598     const bool is_present = idx >= 0;
1599     if (is_present) {
1600         return idx + 1;
1601     } else {
1602         return -idx - 1;
1603     }
1604 }
1605 
1606 /* Returns the index of the first value equal or smaller than x, or -1 */
array_container_index_equalorlarger(const array_container_t * arr,uint16_t x)1607 static inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x) {
1608     const int32_t idx = binarySearch(arr->array, arr->cardinality, x);
1609     const bool is_present = idx >= 0;
1610     if (is_present) {
1611         return idx;
1612     } else {
1613         int32_t candidate = - idx - 1;
1614         if(candidate < arr->cardinality) return candidate;
1615         return -1;
1616     }
1617 }
1618 
1619 /*
1620  * Adds all values in range [min,max] using hint:
1621  *   nvals_less is the number of array values less than $min
1622  *   nvals_greater is the number of array values greater than $max
1623  */
array_container_add_range_nvals(array_container_t * array,uint32_t min,uint32_t max,int32_t nvals_less,int32_t nvals_greater)1624 static inline void array_container_add_range_nvals(array_container_t *array,
1625                                                    uint32_t min, uint32_t max,
1626                                                    int32_t nvals_less,
1627                                                    int32_t nvals_greater) {
1628     int32_t union_cardinality = nvals_less + (max - min + 1) + nvals_greater;
1629     if (union_cardinality > array->capacity) {
1630         array_container_grow(array, union_cardinality, true);
1631     }
1632     memmove(&(array->array[union_cardinality - nvals_greater]),
1633             &(array->array[array->cardinality - nvals_greater]),
1634             nvals_greater * sizeof(uint16_t));
1635     for (uint32_t i = 0; i <= max - min; i++) {
1636         array->array[nvals_less + i] = min + i;
1637     }
1638     array->cardinality = union_cardinality;
1639 }
1640 
1641 /**
1642  * Adds all values in range [min,max].
1643  */
array_container_add_range(array_container_t * array,uint32_t min,uint32_t max)1644 static inline void array_container_add_range(array_container_t *array,
1645                                              uint32_t min, uint32_t max) {
1646     int32_t nvals_greater = count_greater(array->array, array->cardinality, max);
1647     int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min);
1648     array_container_add_range_nvals(array, min, max, nvals_less, nvals_greater);
1649 }
1650 
1651 /*
1652  * Removes all elements array[pos] .. array[pos+count-1]
1653  */
array_container_remove_range(array_container_t * array,uint32_t pos,uint32_t count)1654 static inline void array_container_remove_range(array_container_t *array,
1655                                                 uint32_t pos, uint32_t count) {
1656   if (count != 0) {
1657       memmove(&(array->array[pos]), &(array->array[pos+count]),
1658               (array->cardinality - pos - count) * sizeof(uint16_t));
1659       array->cardinality -= count;
1660   }
1661 }
1662 
1663 #endif /* INCLUDE_CONTAINERS_ARRAY_H_ */
1664 /* end file include/roaring/containers/array.h */
1665 /* begin file include/roaring/containers/bitset.h */
1666 /*
1667  * bitset.h
1668  *
1669  */
1670 
1671 #ifndef INCLUDE_CONTAINERS_BITSET_H_
1672 #define INCLUDE_CONTAINERS_BITSET_H_
1673 
1674 #include <stdbool.h>
1675 #include <stdint.h>
1676 
1677 #ifdef USEAVX
1678 #define ALIGN_AVX __attribute__((aligned(sizeof(__m256i))))
1679 #else
1680 #define ALIGN_AVX
1681 #endif
1682 
1683 enum {
1684     BITSET_CONTAINER_SIZE_IN_WORDS = (1 << 16) / 64,
1685     BITSET_UNKNOWN_CARDINALITY = -1
1686 };
1687 
1688 struct bitset_container_s {
1689     int32_t cardinality;
1690     uint64_t *array;
1691 };
1692 
1693 typedef struct bitset_container_s bitset_container_t;
1694 
1695 /* Create a new bitset. Return NULL in case of failure. */
1696 bitset_container_t *bitset_container_create(void);
1697 
1698 /* Free memory. */
1699 void bitset_container_free(bitset_container_t *bitset);
1700 
1701 /* Clear bitset (sets bits to 0). */
1702 void bitset_container_clear(bitset_container_t *bitset);
1703 
1704 /* Set all bits to 1. */
1705 void bitset_container_set_all(bitset_container_t *bitset);
1706 
1707 /* Duplicate bitset */
1708 bitset_container_t *bitset_container_clone(const bitset_container_t *src);
1709 
1710 int32_t bitset_container_serialize(const bitset_container_t *container,
1711                                    char *buf) WARN_UNUSED;
1712 
1713 uint32_t bitset_container_serialization_len(void);
1714 
1715 void *bitset_container_deserialize(const char *buf, size_t buf_len);
1716 
1717 /* Set the bit in [begin,end). WARNING: as of April 2016, this method is slow
1718  * and
1719  * should not be used in performance-sensitive code. Ever.  */
1720 void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin,
1721                                 uint32_t end);
1722 
1723 #ifdef ASMBITMANIPOPTIMIZATION
1724 /* Set the ith bit.  */
bitset_container_set(bitset_container_t * bitset,uint16_t pos)1725 static inline void bitset_container_set(bitset_container_t *bitset,
1726                                         uint16_t pos) {
1727     uint64_t shift = 6;
1728     uint64_t offset;
1729     uint64_t p = pos;
1730     ASM_SHIFT_RIGHT(p, shift, offset);
1731     uint64_t load = bitset->array[offset];
1732     ASM_SET_BIT_INC_WAS_CLEAR(load, p, bitset->cardinality);
1733     bitset->array[offset] = load;
1734 }
1735 
1736 /* Unset the ith bit.  */
bitset_container_unset(bitset_container_t * bitset,uint16_t pos)1737 static inline void bitset_container_unset(bitset_container_t *bitset,
1738                                           uint16_t pos) {
1739     uint64_t shift = 6;
1740     uint64_t offset;
1741     uint64_t p = pos;
1742     ASM_SHIFT_RIGHT(p, shift, offset);
1743     uint64_t load = bitset->array[offset];
1744     ASM_CLEAR_BIT_DEC_WAS_SET(load, p, bitset->cardinality);
1745     bitset->array[offset] = load;
1746 }
1747 
1748 /* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower
1749  * than bitset_container_set.  */
bitset_container_add(bitset_container_t * bitset,uint16_t pos)1750 static inline bool bitset_container_add(bitset_container_t *bitset,
1751                                         uint16_t pos) {
1752     uint64_t shift = 6;
1753     uint64_t offset;
1754     uint64_t p = pos;
1755     ASM_SHIFT_RIGHT(p, shift, offset);
1756     uint64_t load = bitset->array[offset];
1757     // could be possibly slightly further optimized
1758     const int32_t oldcard = bitset->cardinality;
1759     ASM_SET_BIT_INC_WAS_CLEAR(load, p, bitset->cardinality);
1760     bitset->array[offset] = load;
1761     return bitset->cardinality - oldcard;
1762 }
1763 
1764 /* Remove `pos' from `bitset'. Returns true if `pos' was present.  Might be
1765  * slower than bitset_container_unset.  */
bitset_container_remove(bitset_container_t * bitset,uint16_t pos)1766 static inline bool bitset_container_remove(bitset_container_t *bitset,
1767                                            uint16_t pos) {
1768     uint64_t shift = 6;
1769     uint64_t offset;
1770     uint64_t p = pos;
1771     ASM_SHIFT_RIGHT(p, shift, offset);
1772     uint64_t load = bitset->array[offset];
1773     // could be possibly slightly further optimized
1774     const int32_t oldcard = bitset->cardinality;
1775     ASM_CLEAR_BIT_DEC_WAS_SET(load, p, bitset->cardinality);
1776     bitset->array[offset] = load;
1777     return oldcard - bitset->cardinality;
1778 }
1779 
1780 /* Get the value of the ith bit.  */
bitset_container_get(const bitset_container_t * bitset,uint16_t pos)1781 static inline bool bitset_container_get(const bitset_container_t *bitset,
1782                                  uint16_t pos) {
1783     uint64_t word = bitset->array[pos >> 6];
1784     const uint64_t p = pos;
1785     ASM_INPLACESHIFT_RIGHT(word, p);
1786     return word & 1;
1787 }
1788 
1789 #else
1790 
1791 /* Set the ith bit.  */
bitset_container_set(bitset_container_t * bitset,uint16_t pos)1792 static inline void bitset_container_set(bitset_container_t *bitset,
1793                                         uint16_t pos) {
1794     const uint64_t old_word = bitset->array[pos >> 6];
1795     const int index = pos & 63;
1796     const uint64_t new_word = old_word | (UINT64_C(1) << index);
1797     bitset->cardinality += (uint32_t)((old_word ^ new_word) >> index);
1798     bitset->array[pos >> 6] = new_word;
1799 }
1800 
1801 /* Unset the ith bit.  */
bitset_container_unset(bitset_container_t * bitset,uint16_t pos)1802 static inline void bitset_container_unset(bitset_container_t *bitset,
1803                                           uint16_t pos) {
1804     const uint64_t old_word = bitset->array[pos >> 6];
1805     const int index = pos & 63;
1806     const uint64_t new_word = old_word & (~(UINT64_C(1) << index));
1807     bitset->cardinality -= (uint32_t)((old_word ^ new_word) >> index);
1808     bitset->array[pos >> 6] = new_word;
1809 }
1810 
1811 /* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower
1812  * than bitset_container_set.  */
bitset_container_add(bitset_container_t * bitset,uint16_t pos)1813 static inline bool bitset_container_add(bitset_container_t *bitset,
1814                                         uint16_t pos) {
1815     const uint64_t old_word = bitset->array[pos >> 6];
1816     const int index = pos & 63;
1817     const uint64_t new_word = old_word | (UINT64_C(1) << index);
1818     const uint64_t increment = (old_word ^ new_word) >> index;
1819     bitset->cardinality += (uint32_t)increment;
1820     bitset->array[pos >> 6] = new_word;
1821     return increment > 0;
1822 }
1823 
1824 /* Remove `pos' from `bitset'. Returns true if `pos' was present.  Might be
1825  * slower than bitset_container_unset.  */
bitset_container_remove(bitset_container_t * bitset,uint16_t pos)1826 static inline bool bitset_container_remove(bitset_container_t *bitset,
1827                                            uint16_t pos) {
1828     const uint64_t old_word = bitset->array[pos >> 6];
1829     const int index = pos & 63;
1830     const uint64_t new_word = old_word & (~(UINT64_C(1) << index));
1831     const uint64_t increment = (old_word ^ new_word) >> index;
1832     bitset->cardinality -= (uint32_t)increment;
1833     bitset->array[pos >> 6] = new_word;
1834     return increment > 0;
1835 }
1836 
1837 /* Get the value of the ith bit.  */
bitset_container_get(const bitset_container_t * bitset,uint16_t pos)1838 static inline bool bitset_container_get(const bitset_container_t *bitset,
1839                                  uint16_t pos) {
1840     const uint64_t word = bitset->array[pos >> 6];
1841     return (word >> (pos & 63)) & 1;
1842 }
1843 
1844 #endif
1845 
1846 /*
1847 * Check if all bits are set in a range of positions from pos_start (included) to
1848 * pos_end (excluded).
1849 */
bitset_container_get_range(const bitset_container_t * bitset,uint32_t pos_start,uint32_t pos_end)1850 static inline bool bitset_container_get_range(const bitset_container_t *bitset,
1851                                                 uint32_t pos_start, uint32_t pos_end) {
1852 
1853     const uint32_t start = pos_start >> 6;
1854     const uint32_t end = pos_end >> 6;
1855 
1856     const uint64_t first = ~((1ULL << (pos_start & 0x3F)) - 1);
1857     const uint64_t last = (1ULL << (pos_end & 0x3F)) - 1;
1858 
1859     if (start == end) return ((bitset->array[end] & first & last) == (first & last));
1860     if ((bitset->array[start] & first) != first) return false;
1861 
1862     if ((end < BITSET_CONTAINER_SIZE_IN_WORDS) && ((bitset->array[end] & last) != last)){
1863 
1864         return false;
1865     }
1866 
1867     for (uint16_t i = start + 1; (i < BITSET_CONTAINER_SIZE_IN_WORDS) && (i < end); ++i){
1868 
1869         if (bitset->array[i] != UINT64_C(0xFFFFFFFFFFFFFFFF)) return false;
1870     }
1871 
1872     return true;
1873 }
1874 
1875 /* Check whether `bitset' is present in `array'.  Calls bitset_container_get. */
bitset_container_contains(const bitset_container_t * bitset,uint16_t pos)1876 static inline bool bitset_container_contains(const bitset_container_t *bitset,
1877                                       uint16_t pos) {
1878     return bitset_container_get(bitset, pos);
1879 }
1880 
1881 /*
1882 * Check whether a range of bits from position `pos_start' (included) to `pos_end' (excluded)
1883 * is present in `bitset'.  Calls bitset_container_get_all.
1884 */
bitset_container_contains_range(const bitset_container_t * bitset,uint32_t pos_start,uint32_t pos_end)1885 static inline bool bitset_container_contains_range(const bitset_container_t *bitset,
1886 					uint32_t pos_start, uint32_t pos_end) {
1887     return bitset_container_get_range(bitset, pos_start, pos_end);
1888 }
1889 
1890 /* Get the number of bits set */
bitset_container_cardinality(const bitset_container_t * bitset)1891 static inline int bitset_container_cardinality(
1892     const bitset_container_t *bitset) {
1893     return bitset->cardinality;
1894 }
1895 
1896 
1897 
1898 
1899 /* Copy one container into another. We assume that they are distinct. */
1900 void bitset_container_copy(const bitset_container_t *source,
1901                            bitset_container_t *dest);
1902 
1903 /*  Add all the values [min,max) at a distance k*step from min: min,
1904  * min+step,.... */
1905 void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min,
1906                                      uint32_t max, uint16_t step);
1907 
1908 /* Get the number of bits set (force computation). This does not modify bitset.
1909  * To update the cardinality, you should do
1910  * bitset->cardinality =  bitset_container_compute_cardinality(bitset).*/
1911 int bitset_container_compute_cardinality(const bitset_container_t *bitset);
1912 
1913 /* Get whether there is at least one bit set  (see bitset_container_empty for the reverse),
1914    when the cardinality is unknown, it is computed and stored in the struct */
bitset_container_nonzero_cardinality(bitset_container_t * bitset)1915 static inline bool bitset_container_nonzero_cardinality(
1916     bitset_container_t *bitset) {
1917     // account for laziness
1918     if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) {
1919         // could bail early instead with a nonzero result
1920         bitset->cardinality = bitset_container_compute_cardinality(bitset);
1921     }
1922     return bitset->cardinality > 0;
1923 }
1924 
1925 /* Check whether this bitset is empty (see bitset_container_nonzero_cardinality for the reverse),
1926  *  it never modifies the bitset struct. */
bitset_container_empty(const bitset_container_t * bitset)1927 static inline bool bitset_container_empty(
1928     const bitset_container_t *bitset) {
1929   if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) {
1930       for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) {
1931           if((bitset->array[i]) != 0) return false;
1932       }
1933       return true;
1934   }
1935   return bitset->cardinality == 0;
1936 }
1937 
1938 
1939 /* Get whether there is at least one bit set  (see bitset_container_empty for the reverse),
1940    the bitset is never modified */
bitset_container_const_nonzero_cardinality(const bitset_container_t * bitset)1941 static inline bool bitset_container_const_nonzero_cardinality(
1942     const bitset_container_t *bitset) {
1943     return !bitset_container_empty(bitset);
1944 }
1945 
1946 /*
1947  * Check whether the two bitsets intersect
1948  */
1949 bool bitset_container_intersect(const bitset_container_t *src_1,
1950                                   const bitset_container_t *src_2);
1951 
1952 /* Computes the union of bitsets `src_1' and `src_2' into `dst'  and return the
1953  * cardinality. */
1954 int bitset_container_or(const bitset_container_t *src_1,
1955                         const bitset_container_t *src_2,
1956                         bitset_container_t *dst);
1957 
1958 /* Computes the union of bitsets `src_1' and `src_2' and return the cardinality.
1959  */
1960 int bitset_container_or_justcard(const bitset_container_t *src_1,
1961                                  const bitset_container_t *src_2);
1962 
1963 /* Computes the union of bitsets `src_1' and `src_2' into `dst' and return the
1964  * cardinality. Same as bitset_container_or. */
1965 int bitset_container_union(const bitset_container_t *src_1,
1966                            const bitset_container_t *src_2,
1967                            bitset_container_t *dst);
1968 
1969 /* Computes the union of bitsets `src_1' and `src_2'  and return the
1970  * cardinality. Same as bitset_container_or_justcard. */
1971 int bitset_container_union_justcard(const bitset_container_t *src_1,
1972                                     const bitset_container_t *src_2);
1973 
1974 /* Computes the union of bitsets `src_1' and `src_2' into `dst', but does not
1975  * update the cardinality. Provided to optimize chained operations. */
1976 int bitset_container_or_nocard(const bitset_container_t *src_1,
1977                                const bitset_container_t *src_2,
1978                                bitset_container_t *dst);
1979 
1980 /* Computes the union of bitsets `src_1' and `src_2' into `dst', but does not
1981  * update the cardinality. Same as bitset_container_or_nocard */
1982 int bitset_container_union_nocard(const bitset_container_t *src_1,
1983                                   const bitset_container_t *src_2,
1984                                   bitset_container_t *dst);
1985 
1986 /* Computes the intersection of bitsets `src_1' and `src_2' into `dst' and
1987  * return the cardinality. */
1988 int bitset_container_and(const bitset_container_t *src_1,
1989                          const bitset_container_t *src_2,
1990                          bitset_container_t *dst);
1991 
1992 /* Computes the intersection of bitsets `src_1' and `src_2'  and return the
1993  * cardinality. */
1994 int bitset_container_and_justcard(const bitset_container_t *src_1,
1995                                   const bitset_container_t *src_2);
1996 
1997 /* Computes the intersection of bitsets `src_1' and `src_2' into `dst' and
1998  * return the cardinality. Same as bitset_container_and. */
1999 int bitset_container_intersection(const bitset_container_t *src_1,
2000                                   const bitset_container_t *src_2,
2001                                   bitset_container_t *dst);
2002 
2003 /* Computes the intersection of bitsets `src_1' and `src_2' and return the
2004  * cardinality. Same as bitset_container_and_justcard. */
2005 int bitset_container_intersection_justcard(const bitset_container_t *src_1,
2006                                            const bitset_container_t *src_2);
2007 
2008 /* Computes the intersection of bitsets `src_1' and `src_2' into `dst', but does
2009  * not update the cardinality. Provided to optimize chained operations. */
2010 int bitset_container_and_nocard(const bitset_container_t *src_1,
2011                                 const bitset_container_t *src_2,
2012                                 bitset_container_t *dst);
2013 
2014 /* Computes the intersection of bitsets `src_1' and `src_2' into `dst', but does
2015  * not update the cardinality. Same as bitset_container_and_nocard */
2016 int bitset_container_intersection_nocard(const bitset_container_t *src_1,
2017                                          const bitset_container_t *src_2,
2018                                          bitset_container_t *dst);
2019 
2020 /* Computes the exclusive or of bitsets `src_1' and `src_2' into `dst' and
2021  * return the cardinality. */
2022 int bitset_container_xor(const bitset_container_t *src_1,
2023                          const bitset_container_t *src_2,
2024                          bitset_container_t *dst);
2025 
2026 /* Computes the exclusive or of bitsets `src_1' and `src_2' and return the
2027  * cardinality. */
2028 int bitset_container_xor_justcard(const bitset_container_t *src_1,
2029                                   const bitset_container_t *src_2);
2030 
2031 /* Computes the exclusive or of bitsets `src_1' and `src_2' into `dst', but does
2032  * not update the cardinality. Provided to optimize chained operations. */
2033 int bitset_container_xor_nocard(const bitset_container_t *src_1,
2034                                 const bitset_container_t *src_2,
2035                                 bitset_container_t *dst);
2036 
2037 /* Computes the and not of bitsets `src_1' and `src_2' into `dst' and return the
2038  * cardinality. */
2039 int bitset_container_andnot(const bitset_container_t *src_1,
2040                             const bitset_container_t *src_2,
2041                             bitset_container_t *dst);
2042 
2043 /* Computes the and not of bitsets `src_1' and `src_2'  and return the
2044  * cardinality. */
2045 int bitset_container_andnot_justcard(const bitset_container_t *src_1,
2046                                      const bitset_container_t *src_2);
2047 
2048 /* Computes the and not or of bitsets `src_1' and `src_2' into `dst', but does
2049  * not update the cardinality. Provided to optimize chained operations. */
2050 int bitset_container_andnot_nocard(const bitset_container_t *src_1,
2051                                    const bitset_container_t *src_2,
2052                                    bitset_container_t *dst);
2053 
2054 /*
2055  * Write out the 16-bit integers contained in this container as a list of 32-bit
2056  * integers using base
2057  * as the starting value (it might be expected that base has zeros in its 16
2058  * least significant bits).
2059  * The function returns the number of values written.
2060  * The caller is responsible for allocating enough memory in out.
2061  * The out pointer should point to enough memory (the cardinality times 32
2062  * bits).
2063  */
2064 int bitset_container_to_uint32_array(void *out, const bitset_container_t *cont,
2065                                      uint32_t base);
2066 
2067 /*
2068  * Print this container using printf (useful for debugging).
2069  */
2070 void bitset_container_printf(const bitset_container_t *v);
2071 
2072 /*
2073  * Print this container using printf as a comma-separated list of 32-bit
2074  * integers starting at base.
2075  */
2076 void bitset_container_printf_as_uint32_array(const bitset_container_t *v,
2077                                              uint32_t base);
2078 
2079 /**
2080  * Return the serialized size in bytes of a container.
2081  */
bitset_container_serialized_size_in_bytes(void)2082 static inline int32_t bitset_container_serialized_size_in_bytes(void) {
2083     return BITSET_CONTAINER_SIZE_IN_WORDS * 8;
2084 }
2085 
2086 /**
2087  * Return the the number of runs.
2088  */
2089 int bitset_container_number_of_runs(bitset_container_t *b);
2090 
2091 bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base,
2092                               roaring_iterator iterator, void *ptr);
2093 bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base,
2094                                 roaring_iterator64 iterator, uint64_t high_bits,
2095                                 void *ptr);
2096 
2097 /**
2098  * Writes the underlying array to buf, outputs how many bytes were written.
2099  * This is meant to be byte-by-byte compatible with the Java and Go versions of
2100  * Roaring.
2101  * The number of bytes written should be
2102  * bitset_container_size_in_bytes(container).
2103  */
2104 int32_t bitset_container_write(const bitset_container_t *container, char *buf);
2105 
2106 /**
2107  * Reads the instance from buf, outputs how many bytes were read.
2108  * This is meant to be byte-by-byte compatible with the Java and Go versions of
2109  * Roaring.
2110  * The number of bytes read should be bitset_container_size_in_bytes(container).
2111  * You need to provide the (known) cardinality.
2112  */
2113 int32_t bitset_container_read(int32_t cardinality,
2114                               bitset_container_t *container, const char *buf);
2115 /**
2116  * Return the serialized size in bytes of a container (see
2117  * bitset_container_write).
2118  * This is meant to be compatible with the Java and Go versions of Roaring and
2119  * assumes
2120  * that the cardinality of the container is already known or can be computed.
2121  */
bitset_container_size_in_bytes(const bitset_container_t * container)2122 static inline int32_t bitset_container_size_in_bytes(
2123     const bitset_container_t *container) {
2124     (void)container;
2125     return BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
2126 }
2127 
2128 /**
2129  * Return true if the two containers have the same content.
2130  */
2131 bool bitset_container_equals(const bitset_container_t *container1,
2132                              const bitset_container_t *container2);
2133 
2134 /**
2135 * Return true if container1 is a subset of container2.
2136 */
2137 bool bitset_container_is_subset(const bitset_container_t *container1,
2138                                 const bitset_container_t *container2);
2139 
2140 /**
2141  * If the element of given rank is in this container, supposing that the first
2142  * element has rank start_rank, then the function returns true and sets element
2143  * accordingly.
2144  * Otherwise, it returns false and update start_rank.
2145  */
2146 bool bitset_container_select(const bitset_container_t *container,
2147                              uint32_t *start_rank, uint32_t rank,
2148                              uint32_t *element);
2149 
2150 /* Returns the smallest value (assumes not empty) */
2151 uint16_t bitset_container_minimum(const bitset_container_t *container);
2152 
2153 /* Returns the largest value (assumes not empty) */
2154 uint16_t bitset_container_maximum(const bitset_container_t *container);
2155 
2156 /* Returns the number of values equal or smaller than x */
2157 int bitset_container_rank(const bitset_container_t *container, uint16_t x);
2158 
2159 /* Returns the index of the first value equal or larger than x, or -1 */
2160 int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x);
2161 #endif /* INCLUDE_CONTAINERS_BITSET_H_ */
2162 /* end file include/roaring/containers/bitset.h */
2163 /* begin file include/roaring/containers/run.h */
2164 /*
2165  * run.h
2166  *
2167  */
2168 
2169 #ifndef INCLUDE_CONTAINERS_RUN_H_
2170 #define INCLUDE_CONTAINERS_RUN_H_
2171 
2172 #include <assert.h>
2173 #include <stdbool.h>
2174 #include <stdint.h>
2175 #include <string.h>
2176 
2177 
2178 /* struct rle16_s - run length pair
2179  *
2180  * @value:  start position of the run
2181  * @length: length of the run is `length + 1`
2182  *
2183  * An RLE pair {v, l} would represent the integers between the interval
2184  * [v, v+l+1], e.g. {3, 2} = [3, 4, 5].
2185  */
2186 struct rle16_s {
2187     uint16_t value;
2188     uint16_t length;
2189 };
2190 
2191 typedef struct rle16_s rle16_t;
2192 
2193 /* struct run_container_s - run container bitmap
2194  *
2195  * @n_runs:   number of rle_t pairs in `runs`.
2196  * @capacity: capacity in rle_t pairs `runs` can hold.
2197  * @runs:     pairs of rle_t.
2198  *
2199  */
2200 struct run_container_s {
2201     int32_t n_runs;
2202     int32_t capacity;
2203     rle16_t *runs;
2204 };
2205 
2206 typedef struct run_container_s run_container_t;
2207 
2208 /* Create a new run container. Return NULL in case of failure. */
2209 run_container_t *run_container_create(void);
2210 
2211 /* Create a new run container with given capacity. Return NULL in case of
2212  * failure. */
2213 run_container_t *run_container_create_given_capacity(int32_t size);
2214 
2215 /*
2216  * Shrink the capacity to the actual size, return the number of bytes saved.
2217  */
2218 int run_container_shrink_to_fit(run_container_t *src);
2219 
2220 /* Free memory owned by `run'. */
2221 void run_container_free(run_container_t *run);
2222 
2223 /* Duplicate container */
2224 run_container_t *run_container_clone(const run_container_t *src);
2225 
2226 int32_t run_container_serialize(const run_container_t *container,
2227                                 char *buf) WARN_UNUSED;
2228 
2229 uint32_t run_container_serialization_len(const run_container_t *container);
2230 
2231 void *run_container_deserialize(const char *buf, size_t buf_len);
2232 
2233 /*
2234  * Effectively deletes the value at index index, repacking data.
2235  */
recoverRoomAtIndex(run_container_t * run,uint16_t index)2236 static inline void recoverRoomAtIndex(run_container_t *run, uint16_t index) {
2237     memmove(run->runs + index, run->runs + (1 + index),
2238             (run->n_runs - index - 1) * sizeof(rle16_t));
2239     run->n_runs--;
2240 }
2241 
2242 /**
2243  * Good old binary search through rle data
2244  */
interleavedBinarySearch(const rle16_t * array,int32_t lenarray,uint16_t ikey)2245 static inline int32_t interleavedBinarySearch(const rle16_t *array, int32_t lenarray,
2246                                        uint16_t ikey) {
2247     int32_t low = 0;
2248     int32_t high = lenarray - 1;
2249     while (low <= high) {
2250         int32_t middleIndex = (low + high) >> 1;
2251         uint16_t middleValue = array[middleIndex].value;
2252         if (middleValue < ikey) {
2253             low = middleIndex + 1;
2254         } else if (middleValue > ikey) {
2255             high = middleIndex - 1;
2256         } else {
2257             return middleIndex;
2258         }
2259     }
2260     return -(low + 1);
2261 }
2262 
2263 /*
2264  * Returns index of the run which contains $ikey
2265  */
rle16_find_run(const rle16_t * array,int32_t lenarray,uint16_t ikey)2266 static inline int32_t rle16_find_run(const rle16_t *array, int32_t lenarray,
2267                                      uint16_t ikey) {
2268     int32_t low = 0;
2269     int32_t high = lenarray - 1;
2270     while (low <= high) {
2271         int32_t middleIndex = (low + high) >> 1;
2272         uint16_t min = array[middleIndex].value;
2273         uint16_t max = array[middleIndex].value + array[middleIndex].length;
2274         if (ikey > max) {
2275             low = middleIndex + 1;
2276         } else if (ikey < min) {
2277             high = middleIndex - 1;
2278         } else {
2279             return middleIndex;
2280         }
2281     }
2282     return -(low + 1);
2283 }
2284 
2285 
2286 /**
2287  * Returns number of runs which can'be be merged with the key because they
2288  * are less than the key.
2289  * Note that [5,6,7,8] can be merged with the key 9 and won't be counted.
2290  */
rle16_count_less(const rle16_t * array,int32_t lenarray,uint16_t key)2291 static inline int32_t rle16_count_less(const rle16_t* array, int32_t lenarray,
2292                                        uint16_t key) {
2293     if (lenarray == 0) return 0;
2294     int32_t low = 0;
2295     int32_t high = lenarray - 1;
2296     while (low <= high) {
2297         int32_t middleIndex = (low + high) >> 1;
2298         uint16_t min_value = array[middleIndex].value;
2299         uint16_t max_value = array[middleIndex].value + array[middleIndex].length;
2300         if (max_value + UINT32_C(1) < key) { // uint32 arithmetic
2301             low = middleIndex + 1;
2302         } else if (key < min_value) {
2303             high = middleIndex - 1;
2304         } else {
2305             return middleIndex;
2306         }
2307     }
2308     return low;
2309 }
2310 
rle16_count_greater(const rle16_t * array,int32_t lenarray,uint16_t key)2311 static inline int32_t rle16_count_greater(const rle16_t* array, int32_t lenarray,
2312                                           uint16_t key) {
2313     if (lenarray == 0) return 0;
2314     int32_t low = 0;
2315     int32_t high = lenarray - 1;
2316     while (low <= high) {
2317         int32_t middleIndex = (low + high) >> 1;
2318         uint16_t min_value = array[middleIndex].value;
2319         uint16_t max_value = array[middleIndex].value + array[middleIndex].length;
2320         if (max_value < key) {
2321             low = middleIndex + 1;
2322         } else if (key + UINT32_C(1) < min_value) { // uint32 arithmetic
2323             high = middleIndex - 1;
2324         } else {
2325             return lenarray - (middleIndex + 1);
2326         }
2327     }
2328     return lenarray - low;
2329 }
2330 
2331 /**
2332  * increase capacity to at least min. Whether the
2333  * existing data needs to be copied over depends on copy. If "copy" is false,
2334  * then the new content will be uninitialized, otherwise a copy is made.
2335  */
2336 void run_container_grow(run_container_t *run, int32_t min, bool copy);
2337 
2338 /**
2339  * Moves the data so that we can write data at index
2340  */
makeRoomAtIndex(run_container_t * run,uint16_t index)2341 static inline void makeRoomAtIndex(run_container_t *run, uint16_t index) {
2342     /* This function calls realloc + memmove sequentially to move by one index.
2343      * Potentially copying twice the array.
2344      */
2345     if (run->n_runs + 1 > run->capacity)
2346         run_container_grow(run, run->n_runs + 1, true);
2347     memmove(run->runs + 1 + index, run->runs + index,
2348             (run->n_runs - index) * sizeof(rle16_t));
2349     run->n_runs++;
2350 }
2351 
2352 /* Add `pos' to `run'. Returns true if `pos' was not present. */
2353 bool run_container_add(run_container_t *run, uint16_t pos);
2354 
2355 /* Remove `pos' from `run'. Returns true if `pos' was present. */
run_container_remove(run_container_t * run,uint16_t pos)2356 static inline bool run_container_remove(run_container_t *run, uint16_t pos) {
2357     int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos);
2358     if (index >= 0) {
2359         int32_t le = run->runs[index].length;
2360         if (le == 0) {
2361             recoverRoomAtIndex(run, (uint16_t)index);
2362         } else {
2363             run->runs[index].value++;
2364             run->runs[index].length--;
2365         }
2366         return true;
2367     }
2368     index = -index - 2;  // points to preceding value, possibly -1
2369     if (index >= 0) {    // possible match
2370         int32_t offset = pos - run->runs[index].value;
2371         int32_t le = run->runs[index].length;
2372         if (offset < le) {
2373             // need to break in two
2374             run->runs[index].length = (uint16_t)(offset - 1);
2375             // need to insert
2376             uint16_t newvalue = pos + 1;
2377             int32_t newlength = le - offset - 1;
2378             makeRoomAtIndex(run, (uint16_t)(index + 1));
2379             run->runs[index + 1].value = newvalue;
2380             run->runs[index + 1].length = (uint16_t)newlength;
2381             return true;
2382 
2383         } else if (offset == le) {
2384             run->runs[index].length--;
2385             return true;
2386         }
2387     }
2388     // no match
2389     return false;
2390 }
2391 
2392 /* Check whether `pos' is present in `run'.  */
run_container_contains(const run_container_t * run,uint16_t pos)2393 static inline bool run_container_contains(const run_container_t *run, uint16_t pos) {
2394     int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos);
2395     if (index >= 0) return true;
2396     index = -index - 2;  // points to preceding value, possibly -1
2397     if (index != -1) {   // possible match
2398         int32_t offset = pos - run->runs[index].value;
2399         int32_t le = run->runs[index].length;
2400         if (offset <= le) return true;
2401     }
2402     return false;
2403 }
2404 
2405 /*
2406 * Check whether all positions in a range of positions from pos_start (included)
2407 * to pos_end (excluded) is present in `run'.
2408 */
run_container_contains_range(const run_container_t * run,uint32_t pos_start,uint32_t pos_end)2409 static inline bool run_container_contains_range(const run_container_t *run,
2410                                                 uint32_t pos_start, uint32_t pos_end) {
2411     uint32_t count = 0;
2412     int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos_start);
2413     if (index < 0) {
2414         index = -index - 2;
2415         if ((index == -1) || ((pos_start - run->runs[index].value) > run->runs[index].length)){
2416             return false;
2417         }
2418     }
2419     for (int32_t i = index; i < run->n_runs; ++i) {
2420         const uint32_t stop = run->runs[i].value + run->runs[i].length;
2421         if (run->runs[i].value >= pos_end) break;
2422         if (stop >= pos_end) {
2423             count += (((pos_end - run->runs[i].value) > 0) ? (pos_end - run->runs[i].value) : 0);
2424             break;
2425         }
2426         const uint32_t min = (stop - pos_start) > 0 ? (stop - pos_start) : 0;
2427         count += (min < run->runs[i].length) ? min : run->runs[i].length;
2428     }
2429     return count >= (pos_end - pos_start - 1);
2430 }
2431 
2432 #ifdef USEAVX
2433 
2434 /* Get the cardinality of `run'. Requires an actual computation. */
run_container_cardinality(const run_container_t * run)2435 static inline int run_container_cardinality(const run_container_t *run) {
2436     const int32_t n_runs = run->n_runs;
2437     const rle16_t *runs = run->runs;
2438 
2439     /* by initializing with n_runs, we omit counting the +1 for each pair. */
2440     int sum = n_runs;
2441     int32_t k = 0;
2442     const int32_t step = sizeof(__m256i) / sizeof(rle16_t);
2443     if (n_runs > step) {
2444         __m256i total = _mm256_setzero_si256();
2445         for (; k + step <= n_runs; k += step) {
2446             __m256i ymm1 = _mm256_lddqu_si256((const __m256i *)(runs + k));
2447             __m256i justlengths = _mm256_srli_epi32(ymm1, 16);
2448             total = _mm256_add_epi32(total, justlengths);
2449         }
2450         // a store might be faster than extract?
2451         uint32_t buffer[sizeof(__m256i) / sizeof(rle16_t)];
2452         _mm256_storeu_si256((__m256i *)buffer, total);
2453         sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) +
2454                (buffer[4] + buffer[5]) + (buffer[6] + buffer[7]);
2455     }
2456     for (; k < n_runs; ++k) {
2457         sum += runs[k].length;
2458     }
2459 
2460     return sum;
2461 }
2462 
2463 #else
2464 
2465 /* Get the cardinality of `run'. Requires an actual computation. */
run_container_cardinality(const run_container_t * run)2466 static inline int run_container_cardinality(const run_container_t *run) {
2467     const int32_t n_runs = run->n_runs;
2468     const rle16_t *runs = run->runs;
2469 
2470     /* by initializing with n_runs, we omit counting the +1 for each pair. */
2471     int sum = n_runs;
2472     for (int k = 0; k < n_runs; ++k) {
2473         sum += runs[k].length;
2474     }
2475 
2476     return sum;
2477 }
2478 #endif
2479 
2480 /* Card > 0?, see run_container_empty for the reverse */
run_container_nonzero_cardinality(const run_container_t * run)2481 static inline bool run_container_nonzero_cardinality(
2482     const run_container_t *run) {
2483     return run->n_runs > 0;  // runs never empty
2484 }
2485 
2486 /* Card == 0?, see run_container_nonzero_cardinality for the reverse */
run_container_empty(const run_container_t * run)2487 static inline bool run_container_empty(
2488     const run_container_t *run) {
2489     return run->n_runs == 0;  // runs never empty
2490 }
2491 
2492 
2493 
2494 /* Copy one container into another. We assume that they are distinct. */
2495 void run_container_copy(const run_container_t *src, run_container_t *dst);
2496 
2497 /* Set the cardinality to zero (does not release memory). */
run_container_clear(run_container_t * run)2498 static inline void run_container_clear(run_container_t *run) {
2499     run->n_runs = 0;
2500 }
2501 
2502 /**
2503  * Append run described by vl to the run container, possibly merging.
2504  * It is assumed that the run would be inserted at the end of the container, no
2505  * check is made.
2506  * It is assumed that the run container has the necessary capacity: caller is
2507  * responsible for checking memory capacity.
2508  *
2509  *
2510  * This is not a safe function, it is meant for performance: use with care.
2511  */
run_container_append(run_container_t * run,rle16_t vl,rle16_t * previousrl)2512 static inline void run_container_append(run_container_t *run, rle16_t vl,
2513                                         rle16_t *previousrl) {
2514     const uint32_t previousend = previousrl->value + previousrl->length;
2515     if (vl.value > previousend + 1) {  // we add a new one
2516         run->runs[run->n_runs] = vl;
2517         run->n_runs++;
2518         *previousrl = vl;
2519     } else {
2520         uint32_t newend = vl.value + vl.length + UINT32_C(1);
2521         if (newend > previousend) {  // we merge
2522             previousrl->length = (uint16_t)(newend - 1 - previousrl->value);
2523             run->runs[run->n_runs - 1] = *previousrl;
2524         }
2525     }
2526 }
2527 
2528 /**
2529  * Like run_container_append but it is assumed that the content of run is empty.
2530  */
run_container_append_first(run_container_t * run,rle16_t vl)2531 static inline rle16_t run_container_append_first(run_container_t *run,
2532                                                  rle16_t vl) {
2533     run->runs[run->n_runs] = vl;
2534     run->n_runs++;
2535     return vl;
2536 }
2537 
2538 /**
2539  * append a single value  given by val to the run container, possibly merging.
2540  * It is assumed that the value would be inserted at the end of the container,
2541  * no check is made.
2542  * It is assumed that the run container has the necessary capacity: caller is
2543  * responsible for checking memory capacity.
2544  *
2545  * This is not a safe function, it is meant for performance: use with care.
2546  */
run_container_append_value(run_container_t * run,uint16_t val,rle16_t * previousrl)2547 static inline void run_container_append_value(run_container_t *run,
2548                                               uint16_t val,
2549                                               rle16_t *previousrl) {
2550     const uint32_t previousend = previousrl->value + previousrl->length;
2551     if (val > previousend + 1) {  // we add a new one
2552         //*previousrl = (rle16_t){.value = val, .length = 0};// requires C99
2553         previousrl->value = val;
2554         previousrl->length = 0;
2555 
2556         run->runs[run->n_runs] = *previousrl;
2557         run->n_runs++;
2558     } else if (val == previousend + 1) {  // we merge
2559         previousrl->length++;
2560         run->runs[run->n_runs - 1] = *previousrl;
2561     }
2562 }
2563 
2564 /**
2565  * Like run_container_append_value but it is assumed that the content of run is
2566  * empty.
2567  */
run_container_append_value_first(run_container_t * run,uint16_t val)2568 static inline rle16_t run_container_append_value_first(run_container_t *run,
2569                                                        uint16_t val) {
2570     // rle16_t newrle = (rle16_t){.value = val, .length = 0};// requires C99
2571     rle16_t newrle;
2572     newrle.value = val;
2573     newrle.length = 0;
2574 
2575     run->runs[run->n_runs] = newrle;
2576     run->n_runs++;
2577     return newrle;
2578 }
2579 
2580 /* Check whether the container spans the whole chunk (cardinality = 1<<16).
2581  * This check can be done in constant time (inexpensive). */
run_container_is_full(const run_container_t * run)2582 static inline bool run_container_is_full(const run_container_t *run) {
2583     rle16_t vl = run->runs[0];
2584     return (run->n_runs == 1) && (vl.value == 0) && (vl.length == 0xFFFF);
2585 }
2586 
2587 /* Compute the union of `src_1' and `src_2' and write the result to `dst'
2588  * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
2589 void run_container_union(const run_container_t *src_1,
2590                          const run_container_t *src_2, run_container_t *dst);
2591 
2592 /* Compute the union of `src_1' and `src_2' and write the result to `src_1' */
2593 void run_container_union_inplace(run_container_t *src_1,
2594                                  const run_container_t *src_2);
2595 
2596 /* Compute the intersection of src_1 and src_2 and write the result to
2597  * dst. It is assumed that dst is distinct from both src_1 and src_2. */
2598 void run_container_intersection(const run_container_t *src_1,
2599                                 const run_container_t *src_2,
2600                                 run_container_t *dst);
2601 
2602 /* Compute the size of the intersection of src_1 and src_2 . */
2603 int run_container_intersection_cardinality(const run_container_t *src_1,
2604                                            const run_container_t *src_2);
2605 
2606 /* Check whether src_1 and src_2 intersect. */
2607 bool run_container_intersect(const run_container_t *src_1,
2608                                 const run_container_t *src_2);
2609 
2610 /* Compute the symmetric difference of `src_1' and `src_2' and write the result
2611  * to `dst'
2612  * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
2613 void run_container_xor(const run_container_t *src_1,
2614                        const run_container_t *src_2, run_container_t *dst);
2615 
2616 /*
2617  * Write out the 16-bit integers contained in this container as a list of 32-bit
2618  * integers using base
2619  * as the starting value (it might be expected that base has zeros in its 16
2620  * least significant bits).
2621  * The function returns the number of values written.
2622  * The caller is responsible for allocating enough memory in out.
2623  */
2624 int run_container_to_uint32_array(void *vout, const run_container_t *cont,
2625                                   uint32_t base);
2626 
2627 /*
2628  * Print this container using printf (useful for debugging).
2629  */
2630 void run_container_printf(const run_container_t *v);
2631 
2632 /*
2633  * Print this container using printf as a comma-separated list of 32-bit
2634  * integers starting at base.
2635  */
2636 void run_container_printf_as_uint32_array(const run_container_t *v,
2637                                           uint32_t base);
2638 
2639 /**
2640  * Return the serialized size in bytes of a container having "num_runs" runs.
2641  */
run_container_serialized_size_in_bytes(int32_t num_runs)2642 static inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs) {
2643     return sizeof(uint16_t) +
2644            sizeof(rle16_t) * num_runs;  // each run requires 2 2-byte entries.
2645 }
2646 
2647 bool run_container_iterate(const run_container_t *cont, uint32_t base,
2648                            roaring_iterator iterator, void *ptr);
2649 bool run_container_iterate64(const run_container_t *cont, uint32_t base,
2650                              roaring_iterator64 iterator, uint64_t high_bits,
2651                              void *ptr);
2652 
2653 /**
2654  * Writes the underlying array to buf, outputs how many bytes were written.
2655  * This is meant to be byte-by-byte compatible with the Java and Go versions of
2656  * Roaring.
2657  * The number of bytes written should be run_container_size_in_bytes(container).
2658  */
2659 int32_t run_container_write(const run_container_t *container, char *buf);
2660 
2661 /**
2662  * Reads the instance from buf, outputs how many bytes were read.
2663  * This is meant to be byte-by-byte compatible with the Java and Go versions of
2664  * Roaring.
2665  * The number of bytes read should be bitset_container_size_in_bytes(container).
2666  * The cardinality parameter is provided for consistency with other containers,
2667  * but
2668  * it might be effectively ignored..
2669  */
2670 int32_t run_container_read(int32_t cardinality, run_container_t *container,
2671                            const char *buf);
2672 
2673 /**
2674  * Return the serialized size in bytes of a container (see run_container_write).
2675  * This is meant to be compatible with the Java and Go versions of Roaring.
2676  */
run_container_size_in_bytes(const run_container_t * container)2677 static inline int32_t run_container_size_in_bytes(
2678     const run_container_t *container) {
2679     return run_container_serialized_size_in_bytes(container->n_runs);
2680 }
2681 
2682 /**
2683  * Return true if the two containers have the same content.
2684  */
run_container_equals(const run_container_t * container1,const run_container_t * container2)2685 static inline bool run_container_equals(const run_container_t *container1,
2686                           const run_container_t *container2) {
2687     if (container1->n_runs != container2->n_runs) {
2688         return false;
2689     }
2690     return memequals(container1->runs, container2->runs,
2691                      container1->n_runs * sizeof(rle16_t));
2692 }
2693 
2694 /**
2695 * Return true if container1 is a subset of container2.
2696 */
2697 bool run_container_is_subset(const run_container_t *container1,
2698                              const run_container_t *container2);
2699 
2700 /**
2701  * Used in a start-finish scan that appends segments, for XOR and NOT
2702  */
2703 
2704 void run_container_smart_append_exclusive(run_container_t *src,
2705                                           const uint16_t start,
2706                                           const uint16_t length);
2707 
2708 /**
2709 * The new container consists of a single run [start,stop).
2710 * It is required that stop>start, the caller is responsibility for this check.
2711 * It is required that stop <= (1<<16), the caller is responsibility for this check.
2712 * The cardinality of the created container is stop - start.
2713 * Returns NULL on failure
2714 */
run_container_create_range(uint32_t start,uint32_t stop)2715 static inline run_container_t *run_container_create_range(uint32_t start,
2716                                                           uint32_t stop) {
2717     run_container_t *rc = run_container_create_given_capacity(1);
2718     if (rc) {
2719         rle16_t r;
2720         r.value = (uint16_t)start;
2721         r.length = (uint16_t)(stop - start - 1);
2722         run_container_append_first(rc, r);
2723     }
2724     return rc;
2725 }
2726 
2727 /**
2728  * If the element of given rank is in this container, supposing that the first
2729  * element has rank start_rank, then the function returns true and sets element
2730  * accordingly.
2731  * Otherwise, it returns false and update start_rank.
2732  */
2733 bool run_container_select(const run_container_t *container,
2734                           uint32_t *start_rank, uint32_t rank,
2735                           uint32_t *element);
2736 
2737 /* Compute the difference of src_1 and src_2 and write the result to
2738  * dst. It is assumed that dst is distinct from both src_1 and src_2. */
2739 
2740 void run_container_andnot(const run_container_t *src_1,
2741                           const run_container_t *src_2, run_container_t *dst);
2742 
2743 /* Returns the smallest value (assumes not empty) */
run_container_minimum(const run_container_t * run)2744 static inline uint16_t run_container_minimum(const run_container_t *run) {
2745     if (run->n_runs == 0) return 0;
2746     return run->runs[0].value;
2747 }
2748 
2749 /* Returns the largest value (assumes not empty) */
run_container_maximum(const run_container_t * run)2750 static inline uint16_t run_container_maximum(const run_container_t *run) {
2751     if (run->n_runs == 0) return 0;
2752     return run->runs[run->n_runs - 1].value + run->runs[run->n_runs - 1].length;
2753 }
2754 
2755 /* Returns the number of values equal or smaller than x */
2756 int run_container_rank(const run_container_t *arr, uint16_t x);
2757 
2758 /* Returns the index of the first run containing a value at least as large as x, or -1 */
run_container_index_equalorlarger(const run_container_t * arr,uint16_t x)2759 static inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x) {
2760     int32_t index = interleavedBinarySearch(arr->runs, arr->n_runs, x);
2761     if (index >= 0) return index;
2762     index = -index - 2;  // points to preceding run, possibly -1
2763     if (index != -1) {   // possible match
2764         int32_t offset = x - arr->runs[index].value;
2765         int32_t le = arr->runs[index].length;
2766         if (offset <= le) return index;
2767     }
2768     index += 1;
2769     if(index  < arr->n_runs) {
2770       return index;
2771     }
2772     return -1;
2773 }
2774 
2775 /*
2776  * Add all values in range [min, max] using hint.
2777  */
run_container_add_range_nruns(run_container_t * run,uint32_t min,uint32_t max,int32_t nruns_less,int32_t nruns_greater)2778 static inline void run_container_add_range_nruns(run_container_t* run,
2779                                                  uint32_t min, uint32_t max,
2780                                                  int32_t nruns_less,
2781                                                  int32_t nruns_greater) {
2782     int32_t nruns_common = run->n_runs - nruns_less - nruns_greater;
2783     if (nruns_common == 0) {
2784         makeRoomAtIndex(run, nruns_less);
2785         run->runs[nruns_less].value = min;
2786         run->runs[nruns_less].length = max - min;
2787     } else {
2788         uint32_t common_min = run->runs[nruns_less].value;
2789         uint32_t common_max = run->runs[nruns_less + nruns_common - 1].value +
2790                               run->runs[nruns_less + nruns_common - 1].length;
2791         uint32_t result_min = (common_min < min) ? common_min : min;
2792         uint32_t result_max = (common_max > max) ? common_max : max;
2793 
2794         run->runs[nruns_less].value = result_min;
2795         run->runs[nruns_less].length = result_max - result_min;
2796 
2797         memmove(&(run->runs[nruns_less + 1]),
2798                 &(run->runs[run->n_runs - nruns_greater]),
2799                 nruns_greater*sizeof(rle16_t));
2800         run->n_runs = nruns_less + 1 + nruns_greater;
2801     }
2802 }
2803 
2804 /**
2805  * Add all values in range [min, max]
2806  */
run_container_add_range(run_container_t * run,uint32_t min,uint32_t max)2807 static inline void run_container_add_range(run_container_t* run,
2808                                            uint32_t min, uint32_t max) {
2809     int32_t nruns_greater = rle16_count_greater(run->runs, run->n_runs, max);
2810     int32_t nruns_less = rle16_count_less(run->runs, run->n_runs - nruns_greater, min);
2811     run_container_add_range_nruns(run, min, max, nruns_less, nruns_greater);
2812 }
2813 
2814 /**
2815  * Shifts last $count elements either left (distance < 0) or right (distance > 0)
2816  */
run_container_shift_tail(run_container_t * run,int32_t count,int32_t distance)2817 static inline void run_container_shift_tail(run_container_t* run,
2818                                             int32_t count, int32_t distance) {
2819     if (distance > 0) {
2820         if (run->capacity < count+distance) {
2821             run_container_grow(run, count+distance, true);
2822         }
2823     }
2824     int32_t srcpos = run->n_runs - count;
2825     int32_t dstpos = srcpos + distance;
2826     memmove(&(run->runs[dstpos]), &(run->runs[srcpos]), sizeof(rle16_t) * count);
2827     run->n_runs += distance;
2828 }
2829 
2830 /**
2831  * Remove all elements in range [min, max]
2832  */
run_container_remove_range(run_container_t * run,uint32_t min,uint32_t max)2833 static inline void run_container_remove_range(run_container_t *run, uint32_t min, uint32_t max) {
2834     int32_t first = rle16_find_run(run->runs, run->n_runs, min);
2835     int32_t last = rle16_find_run(run->runs, run->n_runs, max);
2836 
2837     if (first >= 0 && min > run->runs[first].value &&
2838         max < ((uint32_t)run->runs[first].value + (uint32_t)run->runs[first].length)) {
2839         // split this run into two adjacent runs
2840 
2841         // right subinterval
2842         makeRoomAtIndex(run, first+1);
2843         run->runs[first+1].value = max + 1;
2844         run->runs[first+1].length = (run->runs[first].value + run->runs[first].length) - (max + 1);
2845 
2846         // left subinterval
2847         run->runs[first].length = (min - 1) - run->runs[first].value;
2848 
2849         return;
2850     }
2851 
2852     // update left-most partial run
2853     if (first >= 0) {
2854         if (min > run->runs[first].value) {
2855             run->runs[first].length = (min - 1) - run->runs[first].value;
2856             first++;
2857         }
2858     } else {
2859         first = -first-1;
2860     }
2861 
2862     // update right-most run
2863     if (last >= 0) {
2864         uint16_t run_max = run->runs[last].value + run->runs[last].length;
2865         if (run_max > max) {
2866             run->runs[last].value = max + 1;
2867             run->runs[last].length = run_max - (max + 1);
2868             last--;
2869         }
2870     } else {
2871         last = (-last-1) - 1;
2872     }
2873 
2874     // remove intermediate runs
2875     if (first <= last) {
2876         run_container_shift_tail(run, run->n_runs - (last+1), -(last-first+1));
2877     }
2878 }
2879 
2880 
2881 #endif /* INCLUDE_CONTAINERS_RUN_H_ */
2882 /* end file include/roaring/containers/run.h */
2883 /* begin file include/roaring/containers/convert.h */
2884 /*
2885  * convert.h
2886  *
2887  */
2888 
2889 #ifndef INCLUDE_CONTAINERS_CONVERT_H_
2890 #define INCLUDE_CONTAINERS_CONVERT_H_
2891 
2892 
2893 /* Convert an array into a bitset. The input container is not freed or modified.
2894  */
2895 bitset_container_t *bitset_container_from_array(const array_container_t *arr);
2896 
2897 /* Convert a run into a bitset. The input container is not freed or modified. */
2898 bitset_container_t *bitset_container_from_run(const run_container_t *arr);
2899 
2900 /* Convert a run into an array. The input container is not freed or modified. */
2901 array_container_t *array_container_from_run(const run_container_t *arr);
2902 
2903 /* Convert a bitset into an array. The input container is not freed or modified.
2904  */
2905 array_container_t *array_container_from_bitset(const bitset_container_t *bits);
2906 
2907 /* Convert an array into a run. The input container is not freed or modified.
2908  */
2909 run_container_t *run_container_from_array(const array_container_t *c);
2910 
2911 /* convert a run into either an array or a bitset
2912  * might free the container. This does not free the input run container. */
2913 void *convert_to_bitset_or_array_container(run_container_t *r, int32_t card,
2914                                            uint8_t *resulttype);
2915 
2916 /* convert containers to and from runcontainers, as is most space efficient.
2917  * The container might be freed. */
2918 void *convert_run_optimize(void *c, uint8_t typecode_original,
2919                            uint8_t *typecode_after);
2920 
2921 /* converts a run container to either an array or a bitset, IF it saves space.
2922  */
2923 /* If a conversion occurs, the caller is responsible to free the original
2924  * container and
2925  * he becomes responsible to free the new one. */
2926 void *convert_run_to_efficient_container(run_container_t *c,
2927                                          uint8_t *typecode_after);
2928 // like convert_run_to_efficient_container but frees the old result if needed
2929 void *convert_run_to_efficient_container_and_free(run_container_t *c,
2930                                                   uint8_t *typecode_after);
2931 
2932 /**
2933  * Create new bitset container which is a union of run container and
2934  * range [min, max]. Caller is responsible for freeing run container.
2935  */
2936 bitset_container_t *bitset_container_from_run_range(const run_container_t *run,
2937                                                     uint32_t min, uint32_t max);
2938 
2939 #endif /* INCLUDE_CONTAINERS_CONVERT_H_ */
2940 /* end file include/roaring/containers/convert.h */
2941 /* begin file include/roaring/containers/mixed_equal.h */
2942 /*
2943  * mixed_equal.h
2944  *
2945  */
2946 
2947 #ifndef CONTAINERS_MIXED_EQUAL_H_
2948 #define CONTAINERS_MIXED_EQUAL_H_
2949 
2950 
2951 /**
2952  * Return true if the two containers have the same content.
2953  */
2954 bool array_container_equal_bitset(const array_container_t* container1,
2955                                   const bitset_container_t* container2);
2956 
2957 /**
2958  * Return true if the two containers have the same content.
2959  */
2960 bool run_container_equals_array(const run_container_t* container1,
2961                                 const array_container_t* container2);
2962 /**
2963  * Return true if the two containers have the same content.
2964  */
2965 bool run_container_equals_bitset(const run_container_t* container1,
2966                                  const bitset_container_t* container2);
2967 
2968 #endif /* CONTAINERS_MIXED_EQUAL_H_ */
2969 /* end file include/roaring/containers/mixed_equal.h */
2970 /* begin file include/roaring/containers/mixed_subset.h */
2971 /*
2972  * mixed_subset.h
2973  *
2974  */
2975 
2976 #ifndef CONTAINERS_MIXED_SUBSET_H_
2977 #define CONTAINERS_MIXED_SUBSET_H_
2978 
2979 
2980 /**
2981  * Return true if container1 is a subset of container2.
2982  */
2983 bool array_container_is_subset_bitset(const array_container_t* container1,
2984                                       const bitset_container_t* container2);
2985 
2986 /**
2987 * Return true if container1 is a subset of container2.
2988  */
2989 bool run_container_is_subset_array(const run_container_t* container1,
2990                                    const array_container_t* container2);
2991 
2992 /**
2993 * Return true if container1 is a subset of container2.
2994  */
2995 bool array_container_is_subset_run(const array_container_t* container1,
2996                                    const run_container_t* container2);
2997 
2998 /**
2999 * Return true if container1 is a subset of container2.
3000  */
3001 bool run_container_is_subset_bitset(const run_container_t* container1,
3002                                     const bitset_container_t* container2);
3003 
3004 /**
3005 * Return true if container1 is a subset of container2.
3006 */
3007 bool bitset_container_is_subset_run(const bitset_container_t* container1,
3008                                     const run_container_t* container2);
3009 
3010 #endif /* CONTAINERS_MIXED_SUBSET_H_ */
3011 /* end file include/roaring/containers/mixed_subset.h */
3012 /* begin file include/roaring/containers/mixed_andnot.h */
3013 /*
3014  * mixed_andnot.h
3015  */
3016 #ifndef INCLUDE_CONTAINERS_MIXED_ANDNOT_H_
3017 #define INCLUDE_CONTAINERS_MIXED_ANDNOT_H_
3018 
3019 
3020 /* Compute the andnot of src_1 and src_2 and write the result to
3021  * dst, a valid array container that could be the same as dst.*/
3022 void array_bitset_container_andnot(const array_container_t *src_1,
3023                                    const bitset_container_t *src_2,
3024                                    array_container_t *dst);
3025 
3026 /* Compute the andnot of src_1 and src_2 and write the result to
3027  * src_1 */
3028 
3029 void array_bitset_container_iandnot(array_container_t *src_1,
3030                                     const bitset_container_t *src_2);
3031 
3032 /* Compute the andnot of src_1 and src_2 and write the result to
3033  * dst, which does not initially have a valid container.
3034  * Return true for a bitset result; false for array
3035  */
3036 
3037 bool bitset_array_container_andnot(const bitset_container_t *src_1,
3038                                    const array_container_t *src_2, void **dst);
3039 
3040 /* Compute the andnot of src_1 and src_2 and write the result to
3041  * dst (which has no container initially).  It will modify src_1
3042  * to be dst if the result is a bitset.  Otherwise, it will
3043  * free src_1 and dst will be a new array container.  In both
3044  * cases, the caller is responsible for deallocating dst.
3045  * Returns true iff dst is a bitset  */
3046 
3047 bool bitset_array_container_iandnot(bitset_container_t *src_1,
3048                                     const array_container_t *src_2, void **dst);
3049 
3050 /* Compute the andnot of src_1 and src_2 and write the result to
3051  * dst. Result may be either a bitset or an array container
3052  * (returns "result is bitset"). dst does not initially have
3053  * any container, but becomes either a bitset container (return
3054  * result true) or an array container.
3055  */
3056 
3057 bool run_bitset_container_andnot(const run_container_t *src_1,
3058                                  const bitset_container_t *src_2, void **dst);
3059 
3060 /* Compute the andnot of src_1 and src_2 and write the result to
3061  * dst. Result may be either a bitset or an array container
3062  * (returns "result is bitset"). dst does not initially have
3063  * any container, but becomes either a bitset container (return
3064  * result true) or an array container.
3065  */
3066 
3067 bool run_bitset_container_iandnot(run_container_t *src_1,
3068                                   const bitset_container_t *src_2, void **dst);
3069 
3070 /* Compute the andnot of src_1 and src_2 and write the result to
3071  * dst. Result may be either a bitset or an array container
3072  * (returns "result is bitset").  dst does not initially have
3073  * any container, but becomes either a bitset container (return
3074  * result true) or an array container.
3075  */
3076 
3077 bool bitset_run_container_andnot(const bitset_container_t *src_1,
3078                                  const run_container_t *src_2, void **dst);
3079 
3080 /* Compute the andnot of src_1 and src_2 and write the result to
3081  * dst (which has no container initially).  It will modify src_1
3082  * to be dst if the result is a bitset.  Otherwise, it will
3083  * free src_1 and dst will be a new array container.  In both
3084  * cases, the caller is responsible for deallocating dst.
3085  * Returns true iff dst is a bitset  */
3086 
3087 bool bitset_run_container_iandnot(bitset_container_t *src_1,
3088                                   const run_container_t *src_2, void **dst);
3089 
3090 /* dst does not indicate a valid container initially.  Eventually it
3091  * can become any type of container.
3092  */
3093 
3094 int run_array_container_andnot(const run_container_t *src_1,
3095                                const array_container_t *src_2, void **dst);
3096 
3097 /* Compute the andnot of src_1 and src_2 and write the result to
3098  * dst (which has no container initially).  It will modify src_1
3099  * to be dst if the result is a bitset.  Otherwise, it will
3100  * free src_1 and dst will be a new array container.  In both
3101  * cases, the caller is responsible for deallocating dst.
3102  * Returns true iff dst is a bitset  */
3103 
3104 int run_array_container_iandnot(run_container_t *src_1,
3105                                 const array_container_t *src_2, void **dst);
3106 
3107 /* dst must be a valid array container, allowed to be src_1 */
3108 
3109 void array_run_container_andnot(const array_container_t *src_1,
3110                                 const run_container_t *src_2,
3111                                 array_container_t *dst);
3112 
3113 /* dst does not indicate a valid container initially.  Eventually it
3114  * can become any kind of container.
3115  */
3116 
3117 void array_run_container_iandnot(array_container_t *src_1,
3118                                  const run_container_t *src_2);
3119 
3120 /* dst does not indicate a valid container initially.  Eventually it
3121  * can become any kind of container.
3122  */
3123 
3124 int run_run_container_andnot(const run_container_t *src_1,
3125                              const run_container_t *src_2, void **dst);
3126 
3127 /* Compute the andnot of src_1 and src_2 and write the result to
3128  * dst (which has no container initially).  It will modify src_1
3129  * to be dst if the result is a bitset.  Otherwise, it will
3130  * free src_1 and dst will be a new array container.  In both
3131  * cases, the caller is responsible for deallocating dst.
3132  * Returns true iff dst is a bitset  */
3133 
3134 int run_run_container_iandnot(run_container_t *src_1,
3135                               const run_container_t *src_2, void **dst);
3136 
3137 /*
3138  * dst is a valid array container and may be the same as src_1
3139  */
3140 
3141 void array_array_container_andnot(const array_container_t *src_1,
3142                                   const array_container_t *src_2,
3143                                   array_container_t *dst);
3144 
3145 /* inplace array-array andnot will always be able to reuse the space of
3146  * src_1 */
3147 void array_array_container_iandnot(array_container_t *src_1,
3148                                    const array_container_t *src_2);
3149 
3150 /* Compute the andnot of src_1 and src_2 and write the result to
3151  * dst (which has no container initially). Return value is
3152  * "dst is a bitset"
3153  */
3154 
3155 bool bitset_bitset_container_andnot(const bitset_container_t *src_1,
3156                                     const bitset_container_t *src_2,
3157                                     void **dst);
3158 
3159 /* Compute the andnot of src_1 and src_2 and write the result to
3160  * dst (which has no container initially).  It will modify src_1
3161  * to be dst if the result is a bitset.  Otherwise, it will
3162  * free src_1 and dst will be a new array container.  In both
3163  * cases, the caller is responsible for deallocating dst.
3164  * Returns true iff dst is a bitset  */
3165 
3166 bool bitset_bitset_container_iandnot(bitset_container_t *src_1,
3167                                      const bitset_container_t *src_2,
3168                                      void **dst);
3169 #endif
3170 /* end file include/roaring/containers/mixed_andnot.h */
3171 /* begin file include/roaring/containers/mixed_intersection.h */
3172 /*
3173  * mixed_intersection.h
3174  *
3175  */
3176 
3177 #ifndef INCLUDE_CONTAINERS_MIXED_INTERSECTION_H_
3178 #define INCLUDE_CONTAINERS_MIXED_INTERSECTION_H_
3179 
3180 /* These functions appear to exclude cases where the
3181  * inputs have the same type and the output is guaranteed
3182  * to have the same type as the inputs.  Eg, array intersection
3183  */
3184 
3185 
3186 /* Compute the intersection of src_1 and src_2 and write the result to
3187  * dst. It is allowed for dst to be equal to src_1. We assume that dst is a
3188  * valid container. */
3189 void array_bitset_container_intersection(const array_container_t *src_1,
3190                                          const bitset_container_t *src_2,
3191                                          array_container_t *dst);
3192 
3193 /* Compute the size of the intersection of src_1 and src_2. */
3194 int array_bitset_container_intersection_cardinality(
3195     const array_container_t *src_1, const bitset_container_t *src_2);
3196 
3197 
3198 
3199 /* Checking whether src_1 and src_2 intersect. */
3200 bool array_bitset_container_intersect(const array_container_t *src_1,
3201                                          const bitset_container_t *src_2);
3202 
3203 /*
3204  * Compute the intersection between src_1 and src_2 and write the result
3205  * to *dst. If the return function is true, the result is a bitset_container_t
3206  * otherwise is a array_container_t. We assume that dst is not pre-allocated. In
3207  * case of failure, *dst will be NULL.
3208  */
3209 bool bitset_bitset_container_intersection(const bitset_container_t *src_1,
3210                                           const bitset_container_t *src_2,
3211                                           void **dst);
3212 
3213 /* Compute the intersection between src_1 and src_2 and write the result to
3214  * dst. It is allowed for dst to be equal to src_1. We assume that dst is a
3215  * valid container. */
3216 void array_run_container_intersection(const array_container_t *src_1,
3217                                       const run_container_t *src_2,
3218                                       array_container_t *dst);
3219 
3220 /* Compute the intersection between src_1 and src_2 and write the result to
3221  * *dst. If the result is true then the result is a bitset_container_t
3222  * otherwise is a array_container_t.
3223  * If *dst == src_2, then an in-place intersection is attempted
3224  **/
3225 bool run_bitset_container_intersection(const run_container_t *src_1,
3226                                        const bitset_container_t *src_2,
3227                                        void **dst);
3228 
3229 /* Compute the size of the intersection between src_1 and src_2 . */
3230 int array_run_container_intersection_cardinality(const array_container_t *src_1,
3231                                                  const run_container_t *src_2);
3232 
3233 /* Compute the size of the intersection  between src_1 and src_2
3234  **/
3235 int run_bitset_container_intersection_cardinality(const run_container_t *src_1,
3236                                        const bitset_container_t *src_2);
3237 
3238 
3239 /* Check that src_1 and src_2 intersect. */
3240 bool array_run_container_intersect(const array_container_t *src_1,
3241                                       const run_container_t *src_2);
3242 
3243 /* Check that src_1 and src_2 intersect.
3244  **/
3245 bool run_bitset_container_intersect(const run_container_t *src_1,
3246                                        const bitset_container_t *src_2);
3247 
3248 /*
3249  * Same as bitset_bitset_container_intersection except that if the output is to
3250  * be a
3251  * bitset_container_t, then src_1 is modified and no allocation is made.
3252  * If the output is to be an array_container_t, then caller is responsible
3253  * to free the container.
3254  * In all cases, the result is in *dst.
3255  */
3256 bool bitset_bitset_container_intersection_inplace(
3257     bitset_container_t *src_1, const bitset_container_t *src_2, void **dst);
3258 
3259 #endif /* INCLUDE_CONTAINERS_MIXED_INTERSECTION_H_ */
3260 /* end file include/roaring/containers/mixed_intersection.h */
3261 /* begin file include/roaring/containers/mixed_negation.h */
3262 /*
3263  * mixed_negation.h
3264  *
3265  */
3266 
3267 #ifndef INCLUDE_CONTAINERS_MIXED_NEGATION_H_
3268 #define INCLUDE_CONTAINERS_MIXED_NEGATION_H_
3269 
3270 
3271 /* Negation across the entire range of the container.
3272  * Compute the  negation of src  and write the result
3273  * to *dst. The complement of a
3274  * sufficiently sparse set will always be dense and a hence a bitmap
3275  * We assume that dst is pre-allocated and a valid bitset container
3276  * There can be no in-place version.
3277  */
3278 void array_container_negation(const array_container_t *src,
3279                               bitset_container_t *dst);
3280 
3281 /* Negation across the entire range of the container
3282  * Compute the  negation of src  and write the result
3283  * to *dst.  A true return value indicates a bitset result,
3284  * otherwise the result is an array container.
3285  *  We assume that dst is not pre-allocated. In
3286  * case of failure, *dst will be NULL.
3287  */
3288 bool bitset_container_negation(const bitset_container_t *src, void **dst);
3289 
3290 /* inplace version */
3291 /*
3292  * Same as bitset_container_negation except that if the output is to
3293  * be a
3294  * bitset_container_t, then src is modified and no allocation is made.
3295  * If the output is to be an array_container_t, then caller is responsible
3296  * to free the container.
3297  * In all cases, the result is in *dst.
3298  */
3299 bool bitset_container_negation_inplace(bitset_container_t *src, void **dst);
3300 
3301 /* Negation across the entire range of container
3302  * Compute the  negation of src  and write the result
3303  * to *dst.
3304  * Return values are the *_TYPECODES as defined * in containers.h
3305  *  We assume that dst is not pre-allocated. In
3306  * case of failure, *dst will be NULL.
3307  */
3308 int run_container_negation(const run_container_t *src, void **dst);
3309 
3310 /*
3311  * Same as run_container_negation except that if the output is to
3312  * be a
3313  * run_container_t, and has the capacity to hold the result,
3314  * then src is modified and no allocation is made.
3315  * In all cases, the result is in *dst.
3316  */
3317 int run_container_negation_inplace(run_container_t *src, void **dst);
3318 
3319 /* Negation across a range of the container.
3320  * Compute the  negation of src  and write the result
3321  * to *dst. Returns true if the result is a bitset container
3322  * and false for an array container.  *dst is not preallocated.
3323  */
3324 bool array_container_negation_range(const array_container_t *src,
3325                                     const int range_start, const int range_end,
3326                                     void **dst);
3327 
3328 /* Even when the result would fit, it is unclear how to make an
3329  * inplace version without inefficient copying.  Thus this routine
3330  * may be a wrapper for the non-in-place version
3331  */
3332 bool array_container_negation_range_inplace(array_container_t *src,
3333                                             const int range_start,
3334                                             const int range_end, void **dst);
3335 
3336 /* Negation across a range of the container
3337  * Compute the  negation of src  and write the result
3338  * to *dst.  A true return value indicates a bitset result,
3339  * otherwise the result is an array container.
3340  *  We assume that dst is not pre-allocated. In
3341  * case of failure, *dst will be NULL.
3342  */
3343 bool bitset_container_negation_range(const bitset_container_t *src,
3344                                      const int range_start, const int range_end,
3345                                      void **dst);
3346 
3347 /* inplace version */
3348 /*
3349  * Same as bitset_container_negation except that if the output is to
3350  * be a
3351  * bitset_container_t, then src is modified and no allocation is made.
3352  * If the output is to be an array_container_t, then caller is responsible
3353  * to free the container.
3354  * In all cases, the result is in *dst.
3355  */
3356 bool bitset_container_negation_range_inplace(bitset_container_t *src,
3357                                              const int range_start,
3358                                              const int range_end, void **dst);
3359 
3360 /* Negation across a range of container
3361  * Compute the  negation of src  and write the result
3362  * to *dst.  Return values are the *_TYPECODES as defined * in containers.h
3363  *  We assume that dst is not pre-allocated. In
3364  * case of failure, *dst will be NULL.
3365  */
3366 int run_container_negation_range(const run_container_t *src,
3367                                  const int range_start, const int range_end,
3368                                  void **dst);
3369 
3370 /*
3371  * Same as run_container_negation except that if the output is to
3372  * be a
3373  * run_container_t, and has the capacity to hold the result,
3374  * then src is modified and no allocation is made.
3375  * In all cases, the result is in *dst.
3376  */
3377 int run_container_negation_range_inplace(run_container_t *src,
3378                                          const int range_start,
3379                                          const int range_end, void **dst);
3380 
3381 #endif /* INCLUDE_CONTAINERS_MIXED_NEGATION_H_ */
3382 /* end file include/roaring/containers/mixed_negation.h */
3383 /* begin file include/roaring/containers/mixed_union.h */
3384 /*
3385  * mixed_intersection.h
3386  *
3387  */
3388 
3389 #ifndef INCLUDE_CONTAINERS_MIXED_UNION_H_
3390 #define INCLUDE_CONTAINERS_MIXED_UNION_H_
3391 
3392 /* These functions appear to exclude cases where the
3393  * inputs have the same type and the output is guaranteed
3394  * to have the same type as the inputs.  Eg, bitset unions
3395  */
3396 
3397 
3398 /* Compute the union of src_1 and src_2 and write the result to
3399  * dst. It is allowed for src_2 to be dst.   */
3400 void array_bitset_container_union(const array_container_t *src_1,
3401                                   const bitset_container_t *src_2,
3402                                   bitset_container_t *dst);
3403 
3404 /* Compute the union of src_1 and src_2 and write the result to
3405  * dst. It is allowed for src_2 to be dst.  This version does not
3406  * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */
3407 void array_bitset_container_lazy_union(const array_container_t *src_1,
3408                                        const bitset_container_t *src_2,
3409                                        bitset_container_t *dst);
3410 
3411 /*
3412  * Compute the union between src_1 and src_2 and write the result
3413  * to *dst. If the return function is true, the result is a bitset_container_t
3414  * otherwise is a array_container_t. We assume that dst is not pre-allocated. In
3415  * case of failure, *dst will be NULL.
3416  */
3417 bool array_array_container_union(const array_container_t *src_1,
3418                                  const array_container_t *src_2, void **dst);
3419 
3420 /*
3421  * Compute the union between src_1 and src_2 and write the result
3422  * to *dst if it cannot be written to src_1. If the return function is true,
3423  * the result is a bitset_container_t
3424  * otherwise is a array_container_t. When the result is an array_container_t, it
3425  * it either written to src_1 (if *dst is null) or to *dst.
3426  * If the result is a bitset_container_t and *dst is null, then there was a failure.
3427  */
3428 bool array_array_container_inplace_union(array_container_t *src_1,
3429                                  const array_container_t *src_2, void **dst);
3430 
3431 /*
3432  * Same as array_array_container_union except that it will more eagerly produce
3433  * a bitset.
3434  */
3435 bool array_array_container_lazy_union(const array_container_t *src_1,
3436                                       const array_container_t *src_2,
3437                                       void **dst);
3438 
3439 /*
3440  * Same as array_array_container_inplace_union except that it will more eagerly produce
3441  * a bitset.
3442  */
3443 bool array_array_container_lazy_inplace_union(array_container_t *src_1,
3444                                       const array_container_t *src_2,
3445                                       void **dst);
3446 
3447 /* Compute the union of src_1 and src_2 and write the result to
3448  * dst. We assume that dst is a
3449  * valid container. The result might need to be further converted to array or
3450  * bitset container,
3451  * the caller is responsible for the eventual conversion. */
3452 void array_run_container_union(const array_container_t *src_1,
3453                                const run_container_t *src_2,
3454                                run_container_t *dst);
3455 
3456 /* Compute the union of src_1 and src_2 and write the result to
3457  * src2. The result might need to be further converted to array or
3458  * bitset container,
3459  * the caller is responsible for the eventual conversion. */
3460 void array_run_container_inplace_union(const array_container_t *src_1,
3461                                        run_container_t *src_2);
3462 
3463 /* Compute the union of src_1 and src_2 and write the result to
3464  * dst. It is allowed for dst to be src_2.
3465  * If run_container_is_full(src_1) is true, you must not be calling this
3466  *function.
3467  **/
3468 void run_bitset_container_union(const run_container_t *src_1,
3469                                 const bitset_container_t *src_2,
3470                                 bitset_container_t *dst);
3471 
3472 /* Compute the union of src_1 and src_2 and write the result to
3473  * dst. It is allowed for dst to be src_2.  This version does not
3474  * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY).
3475  * If run_container_is_full(src_1) is true, you must not be calling this
3476  * function.
3477  * */
3478 void run_bitset_container_lazy_union(const run_container_t *src_1,
3479                                      const bitset_container_t *src_2,
3480                                      bitset_container_t *dst);
3481 
3482 #endif /* INCLUDE_CONTAINERS_MIXED_UNION_H_ */
3483 /* end file include/roaring/containers/mixed_union.h */
3484 /* begin file include/roaring/containers/mixed_xor.h */
3485 /*
3486  * mixed_xor.h
3487  *
3488  */
3489 
3490 #ifndef INCLUDE_CONTAINERS_MIXED_XOR_H_
3491 #define INCLUDE_CONTAINERS_MIXED_XOR_H_
3492 
3493 /* These functions appear to exclude cases where the
3494  * inputs have the same type and the output is guaranteed
3495  * to have the same type as the inputs.  Eg, bitset unions
3496  */
3497 
3498 /*
3499  * Java implementation (as of May 2016) for array_run, run_run
3500  * and  bitset_run don't do anything different for inplace.
3501  * (They are not truly in place.)
3502  */
3503 
3504 
3505 
3506 /* Compute the xor of src_1 and src_2 and write the result to
3507  * dst (which has no container initially).
3508  * Result is true iff dst is a bitset  */
3509 bool array_bitset_container_xor(const array_container_t *src_1,
3510                                 const bitset_container_t *src_2, void **dst);
3511 
3512 /* Compute the xor of src_1 and src_2 and write the result to
3513  * dst. It is allowed for src_2 to be dst.  This version does not
3514  * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY).
3515  */
3516 
3517 void array_bitset_container_lazy_xor(const array_container_t *src_1,
3518                                      const bitset_container_t *src_2,
3519                                      bitset_container_t *dst);
3520 /* Compute the xor of src_1 and src_2 and write the result to
3521  * dst (which has no container initially). Return value is
3522  * "dst is a bitset"
3523  */
3524 
3525 bool bitset_bitset_container_xor(const bitset_container_t *src_1,
3526                                  const bitset_container_t *src_2, void **dst);
3527 
3528 /* Compute the xor of src_1 and src_2 and write the result to
3529  * dst. Result may be either a bitset or an array container
3530  * (returns "result is bitset"). dst does not initially have
3531  * any container, but becomes either a bitset container (return
3532  * result true) or an array container.
3533  */
3534 
3535 bool run_bitset_container_xor(const run_container_t *src_1,
3536                               const bitset_container_t *src_2, void **dst);
3537 
3538 /* lazy xor.  Dst is initialized and may be equal to src_2.
3539  *  Result is left as a bitset container, even if actual
3540  *  cardinality would dictate an array container.
3541  */
3542 
3543 void run_bitset_container_lazy_xor(const run_container_t *src_1,
3544                                    const bitset_container_t *src_2,
3545                                    bitset_container_t *dst);
3546 
3547 /* dst does not indicate a valid container initially.  Eventually it
3548  * can become any kind of container.
3549  */
3550 
3551 int array_run_container_xor(const array_container_t *src_1,
3552                             const run_container_t *src_2, void **dst);
3553 
3554 /* dst does not initially have a valid container.  Creates either
3555  * an array or a bitset container, indicated by return code
3556  */
3557 
3558 bool array_array_container_xor(const array_container_t *src_1,
3559                                const array_container_t *src_2, void **dst);
3560 
3561 /* dst does not initially have a valid container.  Creates either
3562  * an array or a bitset container, indicated by return code.
3563  * A bitset container will not have a valid cardinality and the
3564  * container type might not be correct for the actual cardinality
3565  */
3566 
3567 bool array_array_container_lazy_xor(const array_container_t *src_1,
3568                                     const array_container_t *src_2, void **dst);
3569 
3570 /* Dst is a valid run container. (Can it be src_2? Let's say not.)
3571  * Leaves result as run container, even if other options are
3572  * smaller.
3573  */
3574 
3575 void array_run_container_lazy_xor(const array_container_t *src_1,
3576                                   const run_container_t *src_2,
3577                                   run_container_t *dst);
3578 
3579 /* dst does not indicate a valid container initially.  Eventually it
3580  * can become any kind of container.
3581  */
3582 
3583 int run_run_container_xor(const run_container_t *src_1,
3584                           const run_container_t *src_2, void **dst);
3585 
3586 /* INPLACE versions (initial implementation may not exploit all inplace
3587  * opportunities (if any...)
3588  */
3589 
3590 /* Compute the xor of src_1 and src_2 and write the result to
3591  * dst (which has no container initially).  It will modify src_1
3592  * to be dst if the result is a bitset.  Otherwise, it will
3593  * free src_1 and dst will be a new array container.  In both
3594  * cases, the caller is responsible for deallocating dst.
3595  * Returns true iff dst is a bitset  */
3596 
3597 bool bitset_array_container_ixor(bitset_container_t *src_1,
3598                                  const array_container_t *src_2, void **dst);
3599 
3600 bool bitset_bitset_container_ixor(bitset_container_t *src_1,
3601                                   const bitset_container_t *src_2, void **dst);
3602 
3603 bool array_bitset_container_ixor(array_container_t *src_1,
3604                                  const bitset_container_t *src_2, void **dst);
3605 
3606 /* Compute the xor of src_1 and src_2 and write the result to
3607  * dst. Result may be either a bitset or an array container
3608  * (returns "result is bitset"). dst does not initially have
3609  * any container, but becomes either a bitset container (return
3610  * result true) or an array container.
3611  */
3612 
3613 bool run_bitset_container_ixor(run_container_t *src_1,
3614                                const bitset_container_t *src_2, void **dst);
3615 
3616 bool bitset_run_container_ixor(bitset_container_t *src_1,
3617                                const run_container_t *src_2, void **dst);
3618 
3619 /* dst does not indicate a valid container initially.  Eventually it
3620  * can become any kind of container.
3621  */
3622 
3623 int array_run_container_ixor(array_container_t *src_1,
3624                              const run_container_t *src_2, void **dst);
3625 
3626 int run_array_container_ixor(run_container_t *src_1,
3627                              const array_container_t *src_2, void **dst);
3628 
3629 bool array_array_container_ixor(array_container_t *src_1,
3630                                 const array_container_t *src_2, void **dst);
3631 
3632 int run_run_container_ixor(run_container_t *src_1, const run_container_t *src_2,
3633                            void **dst);
3634 #endif
3635 /* end file include/roaring/containers/mixed_xor.h */
3636 /* begin file include/roaring/containers/containers.h */
3637 #ifndef CONTAINERS_CONTAINERS_H
3638 #define CONTAINERS_CONTAINERS_H
3639 
3640 #include <assert.h>
3641 #include <stdbool.h>
3642 #include <stdio.h>
3643 
3644 
3645 // would enum be possible or better?
3646 
3647 /**
3648  * The switch case statements follow
3649  * BITSET_CONTAINER_TYPE_CODE -- ARRAY_CONTAINER_TYPE_CODE --
3650  * RUN_CONTAINER_TYPE_CODE
3651  * so it makes more sense to number them 1, 2, 3 (in the vague hope that the
3652  * compiler might exploit this ordering).
3653  */
3654 
3655 #define BITSET_CONTAINER_TYPE_CODE 1
3656 #define ARRAY_CONTAINER_TYPE_CODE 2
3657 #define RUN_CONTAINER_TYPE_CODE 3
3658 #define SHARED_CONTAINER_TYPE_CODE 4
3659 
3660 // macro for pairing container type codes
3661 #define CONTAINER_PAIR(c1, c2) (4 * (c1) + (c2))
3662 
3663 /**
3664  * A shared container is a wrapper around a container
3665  * with reference counting.
3666  */
3667 
3668 struct shared_container_s {
3669     void *container;
3670     uint8_t typecode;
3671     uint32_t counter;  // to be managed atomically
3672 };
3673 
3674 typedef struct shared_container_s shared_container_t;
3675 
3676 /*
3677  * With copy_on_write = true
3678  *  Create a new shared container if the typecode is not SHARED_CONTAINER_TYPE,
3679  * otherwise, increase the count
3680  * If copy_on_write = false, then clone.
3681  * Return NULL in case of failure.
3682  **/
3683 void *get_copy_of_container(void *container, uint8_t *typecode,
3684                             bool copy_on_write);
3685 
3686 /* Frees a shared container (actually decrement its counter and only frees when
3687  * the counter falls to zero). */
3688 void shared_container_free(shared_container_t *container);
3689 
3690 /* extract a copy from the shared container, freeing the shared container if
3691 there is just one instance left,
3692 clone instances when the counter is higher than one
3693 */
3694 void *shared_container_extract_copy(shared_container_t *container,
3695                                     uint8_t *typecode);
3696 
3697 /* access to container underneath */
container_unwrap_shared(const void * candidate_shared_container,uint8_t * type)3698 static inline const void *container_unwrap_shared(
3699     const void *candidate_shared_container, uint8_t *type) {
3700     if (*type == SHARED_CONTAINER_TYPE_CODE) {
3701         *type =
3702             ((const shared_container_t *)candidate_shared_container)->typecode;
3703         assert(*type != SHARED_CONTAINER_TYPE_CODE);
3704         return ((const shared_container_t *)candidate_shared_container)->container;
3705     } else {
3706         return candidate_shared_container;
3707     }
3708 }
3709 
3710 
3711 /* access to container underneath */
container_mutable_unwrap_shared(void * candidate_shared_container,uint8_t * type)3712 static inline void *container_mutable_unwrap_shared(
3713     void *candidate_shared_container, uint8_t *type) {
3714     if (*type == SHARED_CONTAINER_TYPE_CODE) {
3715         *type =
3716             ((shared_container_t *)candidate_shared_container)->typecode;
3717         assert(*type != SHARED_CONTAINER_TYPE_CODE);
3718         return ((shared_container_t *)candidate_shared_container)->container;
3719     } else {
3720         return candidate_shared_container;
3721     }
3722 }
3723 
3724 /* access to container underneath and queries its type */
get_container_type(const void * container,uint8_t type)3725 static inline uint8_t get_container_type(const void *container, uint8_t type) {
3726     if (type == SHARED_CONTAINER_TYPE_CODE) {
3727         return ((const shared_container_t *)container)->typecode;
3728     } else {
3729         return type;
3730     }
3731 }
3732 
3733 /**
3734  * Copies a container, requires a typecode. This allocates new memory, caller
3735  * is responsible for deallocation. If the container is not shared, then it is
3736  * physically cloned. Shareable containers are not clonable.
3737  */
3738 void *container_clone(const void *container, uint8_t typecode);
3739 
3740 /* access to container underneath, cloning it if needed */
get_writable_copy_if_shared(void * candidate_shared_container,uint8_t * type)3741 static inline void *get_writable_copy_if_shared(
3742     void *candidate_shared_container, uint8_t *type) {
3743     if (*type == SHARED_CONTAINER_TYPE_CODE) {
3744         return shared_container_extract_copy(
3745             (shared_container_t *)candidate_shared_container, type);
3746     } else {
3747         return candidate_shared_container;
3748     }
3749 }
3750 
3751 /**
3752  * End of shared container code
3753  */
3754 
3755 static const char *container_names[] = {"bitset", "array", "run", "shared"};
3756 static const char *shared_container_names[] = {
3757     "bitset (shared)", "array (shared)", "run (shared)"};
3758 
3759 // no matter what the initial container was, convert it to a bitset
3760 // if a new container is produced, caller responsible for freeing the previous
3761 // one
3762 // container should not be a shared container
container_to_bitset(void * container,uint8_t typecode)3763 static inline void *container_to_bitset(void *container, uint8_t typecode) {
3764     bitset_container_t *result = NULL;
3765     switch (typecode) {
3766         case BITSET_CONTAINER_TYPE_CODE:
3767             return container;  // nothing to do
3768         case ARRAY_CONTAINER_TYPE_CODE:
3769             result =
3770                 bitset_container_from_array((array_container_t *)container);
3771             return result;
3772         case RUN_CONTAINER_TYPE_CODE:
3773             result = bitset_container_from_run((run_container_t *)container);
3774             return result;
3775         case SHARED_CONTAINER_TYPE_CODE:
3776         default:
3777             assert(false);
3778             __builtin_unreachable();
3779             return 0;  // unreached
3780     }
3781 }
3782 
3783 /**
3784  * Get the container name from the typecode
3785  */
get_container_name(uint8_t typecode)3786 static inline const char *get_container_name(uint8_t typecode) {
3787     switch (typecode) {
3788         case BITSET_CONTAINER_TYPE_CODE:
3789             return container_names[0];
3790         case ARRAY_CONTAINER_TYPE_CODE:
3791             return container_names[1];
3792         case RUN_CONTAINER_TYPE_CODE:
3793             return container_names[2];
3794         case SHARED_CONTAINER_TYPE_CODE:
3795             return container_names[3];
3796         default:
3797             assert(false);
3798             __builtin_unreachable();
3799             return "unknown";
3800     }
3801 }
3802 
get_full_container_name(const void * container,uint8_t typecode)3803 static inline const char *get_full_container_name(const void *container,
3804                                                   uint8_t typecode) {
3805     switch (typecode) {
3806         case BITSET_CONTAINER_TYPE_CODE:
3807             return container_names[0];
3808         case ARRAY_CONTAINER_TYPE_CODE:
3809             return container_names[1];
3810         case RUN_CONTAINER_TYPE_CODE:
3811             return container_names[2];
3812         case SHARED_CONTAINER_TYPE_CODE:
3813             switch (((const shared_container_t *)container)->typecode) {
3814                 case BITSET_CONTAINER_TYPE_CODE:
3815                     return shared_container_names[0];
3816                 case ARRAY_CONTAINER_TYPE_CODE:
3817                     return shared_container_names[1];
3818                 case RUN_CONTAINER_TYPE_CODE:
3819                     return shared_container_names[2];
3820                 default:
3821                     assert(false);
3822                     __builtin_unreachable();
3823                     return "unknown";
3824             }
3825             break;
3826         default:
3827             assert(false);
3828             __builtin_unreachable();
3829             return "unknown";
3830     }
3831     __builtin_unreachable();
3832     return NULL;
3833 }
3834 
3835 /**
3836  * Get the container cardinality (number of elements), requires a  typecode
3837  */
container_get_cardinality(const void * container,uint8_t typecode)3838 static inline int container_get_cardinality(const void *container,
3839                                             uint8_t typecode) {
3840     container = container_unwrap_shared(container, &typecode);
3841     switch (typecode) {
3842         case BITSET_CONTAINER_TYPE_CODE:
3843             return bitset_container_cardinality(
3844                 (const bitset_container_t *)container);
3845         case ARRAY_CONTAINER_TYPE_CODE:
3846             return array_container_cardinality(
3847                 (const array_container_t *)container);
3848         case RUN_CONTAINER_TYPE_CODE:
3849             return run_container_cardinality(
3850                 (const run_container_t *)container);
3851         case SHARED_CONTAINER_TYPE_CODE:
3852         default:
3853             assert(false);
3854             __builtin_unreachable();
3855             return 0;  // unreached
3856     }
3857 }
3858 
3859 
3860 
3861 // returns true if a container is known to be full. Note that a lazy bitset
3862 // container
3863 // might be full without us knowing
container_is_full(const void * container,uint8_t typecode)3864 static inline bool container_is_full(const void *container, uint8_t typecode) {
3865     container = container_unwrap_shared(container, &typecode);
3866     switch (typecode) {
3867         case BITSET_CONTAINER_TYPE_CODE:
3868             return bitset_container_cardinality(
3869                        (const bitset_container_t *)container) == (1 << 16);
3870         case ARRAY_CONTAINER_TYPE_CODE:
3871             return array_container_cardinality(
3872                        (const array_container_t *)container) == (1 << 16);
3873         case RUN_CONTAINER_TYPE_CODE:
3874             return run_container_is_full((const run_container_t *)container);
3875         case SHARED_CONTAINER_TYPE_CODE:
3876         default:
3877             assert(false);
3878             __builtin_unreachable();
3879             return 0;  // unreached
3880     }
3881 }
3882 
container_shrink_to_fit(void * container,uint8_t typecode)3883 static inline int container_shrink_to_fit(void *container, uint8_t typecode) {
3884     container = container_mutable_unwrap_shared(container, &typecode);
3885     switch (typecode) {
3886         case BITSET_CONTAINER_TYPE_CODE:
3887             return 0;  // no shrinking possible
3888         case ARRAY_CONTAINER_TYPE_CODE:
3889             return array_container_shrink_to_fit(
3890                 (array_container_t *)container);
3891         case RUN_CONTAINER_TYPE_CODE:
3892             return run_container_shrink_to_fit((run_container_t *)container);
3893         case SHARED_CONTAINER_TYPE_CODE:
3894         default:
3895             assert(false);
3896             __builtin_unreachable();
3897             return 0;  // unreached
3898     }
3899 }
3900 
3901 
3902 /**
3903  * make a container with a run of ones
3904  */
3905 /* initially always use a run container, even if an array might be
3906  * marginally
3907  * smaller */
container_range_of_ones(uint32_t range_start,uint32_t range_end,uint8_t * result_type)3908 static inline void *container_range_of_ones(uint32_t range_start,
3909                                             uint32_t range_end,
3910                                             uint8_t *result_type) {
3911     assert(range_end >= range_start);
3912     uint64_t cardinality =  range_end - range_start + 1;
3913     if(cardinality <= 2) {
3914       *result_type = ARRAY_CONTAINER_TYPE_CODE;
3915       return array_container_create_range(range_start, range_end);
3916     } else {
3917       *result_type = RUN_CONTAINER_TYPE_CODE;
3918       return run_container_create_range(range_start, range_end);
3919     }
3920 }
3921 
3922 
3923 /*  Create a container with all the values between in [min,max) at a
3924     distance k*step from min. */
container_from_range(uint8_t * type,uint32_t min,uint32_t max,uint16_t step)3925 static inline void *container_from_range(uint8_t *type, uint32_t min,
3926                                          uint32_t max, uint16_t step) {
3927     if (step == 0) return NULL;  // being paranoid
3928     if (step == 1) {
3929         return container_range_of_ones(min,max,type);
3930         // Note: the result is not always a run (need to check the cardinality)
3931         //*type = RUN_CONTAINER_TYPE_CODE;
3932         //return run_container_create_range(min, max);
3933     }
3934     int size = (max - min + step - 1) / step;
3935     if (size <= DEFAULT_MAX_SIZE) {  // array container
3936         *type = ARRAY_CONTAINER_TYPE_CODE;
3937         array_container_t *array = array_container_create_given_capacity(size);
3938         array_container_add_from_range(array, min, max, step);
3939         assert(array->cardinality == size);
3940         return array;
3941     } else {  // bitset container
3942         *type = BITSET_CONTAINER_TYPE_CODE;
3943         bitset_container_t *bitset = bitset_container_create();
3944         bitset_container_add_from_range(bitset, min, max, step);
3945         assert(bitset->cardinality == size);
3946         return bitset;
3947     }
3948 }
3949 
3950 /**
3951  * "repair" the container after lazy operations.
3952  */
container_repair_after_lazy(void * container,uint8_t * typecode)3953 static inline void *container_repair_after_lazy(void *container,
3954                                                 uint8_t *typecode) {
3955     container = get_writable_copy_if_shared(
3956         container, typecode);  // TODO: this introduces unnecessary cloning
3957     void *result = NULL;
3958     switch (*typecode) {
3959         case BITSET_CONTAINER_TYPE_CODE:
3960             ((bitset_container_t *)container)->cardinality =
3961                 bitset_container_compute_cardinality(
3962                     (bitset_container_t *)container);
3963             if (((bitset_container_t *)container)->cardinality <=
3964                 DEFAULT_MAX_SIZE) {
3965                 result = array_container_from_bitset(
3966                     (const bitset_container_t *)container);
3967                 bitset_container_free((bitset_container_t *)container);
3968                 *typecode = ARRAY_CONTAINER_TYPE_CODE;
3969                 return result;
3970             }
3971             return container;
3972         case ARRAY_CONTAINER_TYPE_CODE:
3973             return container;  // nothing to do
3974         case RUN_CONTAINER_TYPE_CODE:
3975             return convert_run_to_efficient_container_and_free(
3976                 (run_container_t *)container, typecode);
3977         case SHARED_CONTAINER_TYPE_CODE:
3978         default:
3979             assert(false);
3980             __builtin_unreachable();
3981             return 0;  // unreached
3982     }
3983 }
3984 
3985 /**
3986  * Writes the underlying array to buf, outputs how many bytes were written.
3987  * This is meant to be byte-by-byte compatible with the Java and Go versions of
3988  * Roaring.
3989  * The number of bytes written should be
3990  * container_write(container, buf).
3991  *
3992  */
container_write(const void * container,uint8_t typecode,char * buf)3993 static inline int32_t container_write(const void *container, uint8_t typecode,
3994                                       char *buf) {
3995     container = container_unwrap_shared(container, &typecode);
3996     switch (typecode) {
3997         case BITSET_CONTAINER_TYPE_CODE:
3998             return bitset_container_write((const bitset_container_t *)container, buf);
3999         case ARRAY_CONTAINER_TYPE_CODE:
4000             return array_container_write((const array_container_t *)container, buf);
4001         case RUN_CONTAINER_TYPE_CODE:
4002             return run_container_write((const run_container_t *)container, buf);
4003         case SHARED_CONTAINER_TYPE_CODE:
4004         default:
4005             assert(false);
4006             __builtin_unreachable();
4007             return 0;  // unreached
4008     }
4009 }
4010 
4011 /**
4012  * Get the container size in bytes under portable serialization (see
4013  * container_write), requires a
4014  * typecode
4015  */
container_size_in_bytes(const void * container,uint8_t typecode)4016 static inline int32_t container_size_in_bytes(const void *container,
4017                                               uint8_t typecode) {
4018     container = container_unwrap_shared(container, &typecode);
4019     switch (typecode) {
4020         case BITSET_CONTAINER_TYPE_CODE:
4021             return bitset_container_size_in_bytes(
4022                 (const bitset_container_t *)container);
4023         case ARRAY_CONTAINER_TYPE_CODE:
4024             return array_container_size_in_bytes(
4025                 (const array_container_t *)container);
4026         case RUN_CONTAINER_TYPE_CODE:
4027             return run_container_size_in_bytes((const run_container_t *)container);
4028         case SHARED_CONTAINER_TYPE_CODE:
4029         default:
4030             assert(false);
4031             __builtin_unreachable();
4032             return 0;  // unreached
4033     }
4034 }
4035 
4036 /**
4037  * print the container (useful for debugging), requires a  typecode
4038  */
4039 void container_printf(const void *container, uint8_t typecode);
4040 
4041 /**
4042  * print the content of the container as a comma-separated list of 32-bit values
4043  * starting at base, requires a  typecode
4044  */
4045 void container_printf_as_uint32_array(const void *container, uint8_t typecode,
4046                                       uint32_t base);
4047 
4048 /**
4049  * Checks whether a container is not empty, requires a  typecode
4050  */
container_nonzero_cardinality(const void * container,uint8_t typecode)4051 static inline bool container_nonzero_cardinality(const void *container,
4052                                                  uint8_t typecode) {
4053     container = container_unwrap_shared(container, &typecode);
4054     switch (typecode) {
4055         case BITSET_CONTAINER_TYPE_CODE:
4056             return bitset_container_const_nonzero_cardinality(
4057                 (const bitset_container_t *)container);
4058         case ARRAY_CONTAINER_TYPE_CODE:
4059             return array_container_nonzero_cardinality(
4060                 (const array_container_t *)container);
4061         case RUN_CONTAINER_TYPE_CODE:
4062             return run_container_nonzero_cardinality(
4063                 (const run_container_t *)container);
4064         case SHARED_CONTAINER_TYPE_CODE:
4065         default:
4066             assert(false);
4067             __builtin_unreachable();
4068             return 0;  // unreached
4069     }
4070 }
4071 
4072 /**
4073  * Recover memory from a container, requires a  typecode
4074  */
4075 void container_free(void *container, uint8_t typecode);
4076 
4077 /**
4078  * Convert a container to an array of values, requires a  typecode as well as a
4079  * "base" (most significant values)
4080  * Returns number of ints added.
4081  */
container_to_uint32_array(uint32_t * output,const void * container,uint8_t typecode,uint32_t base)4082 static inline int container_to_uint32_array(uint32_t *output,
4083                                             const void *container,
4084                                             uint8_t typecode, uint32_t base) {
4085     container = container_unwrap_shared(container, &typecode);
4086     switch (typecode) {
4087         case BITSET_CONTAINER_TYPE_CODE:
4088             return bitset_container_to_uint32_array(
4089                 output, (const bitset_container_t *)container, base);
4090         case ARRAY_CONTAINER_TYPE_CODE:
4091             return array_container_to_uint32_array(
4092                 output, (const array_container_t *)container, base);
4093         case RUN_CONTAINER_TYPE_CODE:
4094             return run_container_to_uint32_array(
4095                 output, (const run_container_t *)container, base);
4096         case SHARED_CONTAINER_TYPE_CODE:
4097         default:
4098             assert(false);
4099             __builtin_unreachable();
4100             return 0;  // unreached
4101     }
4102 }
4103 
4104 /**
4105  * Add a value to a container, requires a  typecode, fills in new_typecode and
4106  * return (possibly different) container.
4107  * This function may allocate a new container, and caller is responsible for
4108  * memory deallocation
4109  */
container_add(void * container,uint16_t val,uint8_t typecode,uint8_t * new_typecode)4110 static inline void *container_add(void *container, uint16_t val,
4111                                   uint8_t typecode, uint8_t *new_typecode) {
4112     container = get_writable_copy_if_shared(container, &typecode);
4113     switch (typecode) {
4114         case BITSET_CONTAINER_TYPE_CODE:
4115             bitset_container_set((bitset_container_t *)container, val);
4116             *new_typecode = BITSET_CONTAINER_TYPE_CODE;
4117             return container;
4118         case ARRAY_CONTAINER_TYPE_CODE: {
4119             array_container_t *ac = (array_container_t *)container;
4120             if (array_container_try_add(ac, val, DEFAULT_MAX_SIZE) != -1) {
4121                 *new_typecode = ARRAY_CONTAINER_TYPE_CODE;
4122                 return ac;
4123             } else {
4124                 bitset_container_t* bitset = bitset_container_from_array(ac);
4125                 bitset_container_add(bitset, val);
4126                 *new_typecode = BITSET_CONTAINER_TYPE_CODE;
4127                 return bitset;
4128             }
4129         } break;
4130         case RUN_CONTAINER_TYPE_CODE:
4131             // per Java, no container type adjustments are done (revisit?)
4132             run_container_add((run_container_t *)container, val);
4133             *new_typecode = RUN_CONTAINER_TYPE_CODE;
4134             return container;
4135         case SHARED_CONTAINER_TYPE_CODE:
4136         default:
4137             assert(false);
4138             __builtin_unreachable();
4139             return NULL;
4140     }
4141 }
4142 
4143 /**
4144  * Remove a value from a container, requires a  typecode, fills in new_typecode
4145  * and
4146  * return (possibly different) container.
4147  * This function may allocate a new container, and caller is responsible for
4148  * memory deallocation
4149  */
container_remove(void * container,uint16_t val,uint8_t typecode,uint8_t * new_typecode)4150 static inline void *container_remove(void *container, uint16_t val,
4151                                      uint8_t typecode, uint8_t *new_typecode) {
4152     container = get_writable_copy_if_shared(container, &typecode);
4153     switch (typecode) {
4154         case BITSET_CONTAINER_TYPE_CODE:
4155             if (bitset_container_remove((bitset_container_t *)container, val)) {
4156                 if (bitset_container_cardinality(
4157                         (bitset_container_t *)container) <= DEFAULT_MAX_SIZE) {
4158                     *new_typecode = ARRAY_CONTAINER_TYPE_CODE;
4159                     return array_container_from_bitset(
4160                         (bitset_container_t *)container);
4161                 }
4162             }
4163             *new_typecode = typecode;
4164             return container;
4165         case ARRAY_CONTAINER_TYPE_CODE:
4166             *new_typecode = typecode;
4167             array_container_remove((array_container_t *)container, val);
4168             return container;
4169         case RUN_CONTAINER_TYPE_CODE:
4170             // per Java, no container type adjustments are done (revisit?)
4171             run_container_remove((run_container_t *)container, val);
4172             *new_typecode = RUN_CONTAINER_TYPE_CODE;
4173             return container;
4174         case SHARED_CONTAINER_TYPE_CODE:
4175         default:
4176             assert(false);
4177             __builtin_unreachable();
4178             return NULL;
4179     }
4180 }
4181 
4182 /**
4183  * Check whether a value is in a container, requires a  typecode
4184  */
container_contains(const void * container,uint16_t val,uint8_t typecode)4185 static inline bool container_contains(const void *container, uint16_t val,
4186                                uint8_t typecode) {
4187     container = container_unwrap_shared(container, &typecode);
4188     switch (typecode) {
4189         case BITSET_CONTAINER_TYPE_CODE:
4190             return bitset_container_get((const bitset_container_t *)container,
4191                                         val);
4192         case ARRAY_CONTAINER_TYPE_CODE:
4193             return array_container_contains(
4194                 (const array_container_t *)container, val);
4195         case RUN_CONTAINER_TYPE_CODE:
4196             return run_container_contains((const run_container_t *)container,
4197                                           val);
4198         case SHARED_CONTAINER_TYPE_CODE:
4199         default:
4200             assert(false);
4201             __builtin_unreachable();
4202             return false;
4203     }
4204 }
4205 
4206 /**
4207  * Check whether a range of values from range_start (included) to range_end (excluded)
4208  * is in a container, requires a typecode
4209  */
container_contains_range(const void * container,uint32_t range_start,uint32_t range_end,uint8_t typecode)4210 static inline bool container_contains_range(const void *container, uint32_t range_start,
4211 					uint32_t range_end, uint8_t typecode) {
4212     container = container_unwrap_shared(container, &typecode);
4213     switch (typecode) {
4214         case BITSET_CONTAINER_TYPE_CODE:
4215             return bitset_container_get_range((const bitset_container_t *)container,
4216                                                 range_start, range_end);
4217         case ARRAY_CONTAINER_TYPE_CODE:
4218             return array_container_contains_range((const array_container_t *)container,
4219                                                     range_start, range_end);
4220         case RUN_CONTAINER_TYPE_CODE:
4221             return run_container_contains_range((const run_container_t *)container,
4222                                                     range_start, range_end);
4223         case SHARED_CONTAINER_TYPE_CODE:
4224         default:
4225             assert(false);
4226             __builtin_unreachable();
4227             return false;
4228     }
4229 }
4230 
4231 int32_t container_serialize(const void *container, uint8_t typecode,
4232                             char *buf) WARN_UNUSED;
4233 
4234 uint32_t container_serialization_len(const void *container, uint8_t typecode);
4235 
4236 void *container_deserialize(uint8_t typecode, const char *buf, size_t buf_len);
4237 
4238 /**
4239  * Returns true if the two containers have the same content. Note that
4240  * two containers having different types can be "equal" in this sense.
4241  */
container_equals(const void * c1,uint8_t type1,const void * c2,uint8_t type2)4242 static inline bool container_equals(const void *c1, uint8_t type1,
4243                                     const void *c2, uint8_t type2) {
4244     c1 = container_unwrap_shared(c1, &type1);
4245     c2 = container_unwrap_shared(c2, &type2);
4246     switch (CONTAINER_PAIR(type1, type2)) {
4247         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4248                             BITSET_CONTAINER_TYPE_CODE):
4249             return bitset_container_equals((const bitset_container_t *)c1,
4250                                            (const bitset_container_t *)c2);
4251         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4252                             RUN_CONTAINER_TYPE_CODE):
4253             return run_container_equals_bitset((const run_container_t *)c2,
4254                                                (const bitset_container_t *)c1);
4255         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE,
4256                             BITSET_CONTAINER_TYPE_CODE):
4257             return run_container_equals_bitset((const run_container_t *)c1,
4258                                                (const bitset_container_t *)c2);
4259         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4260                             ARRAY_CONTAINER_TYPE_CODE):
4261             // java would always return false?
4262             return array_container_equal_bitset((const array_container_t *)c2,
4263                                                 (const bitset_container_t *)c1);
4264         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
4265                             BITSET_CONTAINER_TYPE_CODE):
4266             // java would always return false?
4267             return array_container_equal_bitset((const array_container_t *)c1,
4268                                                 (const bitset_container_t *)c2);
4269         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
4270             return run_container_equals_array((const run_container_t *)c2,
4271                                               (const array_container_t *)c1);
4272         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE):
4273             return run_container_equals_array((const run_container_t *)c1,
4274                                               (const array_container_t *)c2);
4275         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
4276                             ARRAY_CONTAINER_TYPE_CODE):
4277             return array_container_equals((const array_container_t *)c1,
4278                                           (const array_container_t *)c2);
4279         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
4280             return run_container_equals((const run_container_t *)c1,
4281                                         (const run_container_t *)c2);
4282         default:
4283             assert(false);
4284             __builtin_unreachable();
4285             return false;
4286     }
4287 }
4288 
4289 /**
4290  * Returns true if the container c1 is a subset of the container c2. Note that
4291  * c1 can be a subset of c2 even if they have a different type.
4292  */
container_is_subset(const void * c1,uint8_t type1,const void * c2,uint8_t type2)4293 static inline bool container_is_subset(const void *c1, uint8_t type1,
4294                                        const void *c2, uint8_t type2) {
4295     c1 = container_unwrap_shared(c1, &type1);
4296     c2 = container_unwrap_shared(c2, &type2);
4297     switch (CONTAINER_PAIR(type1, type2)) {
4298         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4299                             BITSET_CONTAINER_TYPE_CODE):
4300             return bitset_container_is_subset((const bitset_container_t *)c1,
4301                                               (const bitset_container_t *)c2);
4302         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4303                             RUN_CONTAINER_TYPE_CODE):
4304             return bitset_container_is_subset_run((const bitset_container_t *)c1,
4305                                                   (const run_container_t *)c2);
4306         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE,
4307                             BITSET_CONTAINER_TYPE_CODE):
4308             return run_container_is_subset_bitset((const run_container_t *)c1,
4309                                                   (const bitset_container_t *)c2);
4310         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4311                             ARRAY_CONTAINER_TYPE_CODE):
4312             return false;  // by construction, size(c1) > size(c2)
4313         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
4314                             BITSET_CONTAINER_TYPE_CODE):
4315             return array_container_is_subset_bitset((const array_container_t *)c1,
4316                                                     (const bitset_container_t *)c2);
4317         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
4318             return array_container_is_subset_run((const array_container_t *)c1,
4319                                                  (const run_container_t *)c2);
4320         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE):
4321             return run_container_is_subset_array((const run_container_t *)c1,
4322                                                  (const array_container_t *)c2);
4323         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
4324                             ARRAY_CONTAINER_TYPE_CODE):
4325             return array_container_is_subset((const array_container_t *)c1,
4326                                              (const array_container_t *)c2);
4327         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
4328             return run_container_is_subset((const run_container_t *)c1,
4329                                            (const run_container_t *)c2);
4330         default:
4331             assert(false);
4332             __builtin_unreachable();
4333             return false;
4334     }
4335 }
4336 
4337 // macro-izations possibilities for generic non-inplace binary-op dispatch
4338 
4339 /**
4340  * Compute intersection between two containers, generate a new container (having
4341  * type result_type), requires a typecode. This allocates new memory, caller
4342  * is responsible for deallocation.
4343  */
container_and(const void * c1,uint8_t type1,const void * c2,uint8_t type2,uint8_t * result_type)4344 static inline void *container_and(const void *c1, uint8_t type1, const void *c2,
4345                                   uint8_t type2, uint8_t *result_type) {
4346     c1 = container_unwrap_shared(c1, &type1);
4347     c2 = container_unwrap_shared(c2, &type2);
4348     void *result = NULL;
4349     switch (CONTAINER_PAIR(type1, type2)) {
4350         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4351                             BITSET_CONTAINER_TYPE_CODE):
4352             *result_type = bitset_bitset_container_intersection(
4353                                (const bitset_container_t *)c1,
4354                                (const bitset_container_t *)c2, &result)
4355                                ? BITSET_CONTAINER_TYPE_CODE
4356                                : ARRAY_CONTAINER_TYPE_CODE;
4357             return result;
4358         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
4359                             ARRAY_CONTAINER_TYPE_CODE):
4360             result = array_container_create();
4361             array_container_intersection((const array_container_t *)c1,
4362                                          (const array_container_t *)c2,
4363                                          (array_container_t *)result);
4364             *result_type = ARRAY_CONTAINER_TYPE_CODE;  // never bitset
4365             return result;
4366         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
4367             result = run_container_create();
4368             run_container_intersection((const run_container_t *)c1,
4369                                        (const run_container_t *)c2,
4370                                        (run_container_t *)result);
4371             return convert_run_to_efficient_container_and_free(
4372                 (run_container_t *)result, result_type);
4373         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4374                             ARRAY_CONTAINER_TYPE_CODE):
4375             result = array_container_create();
4376             array_bitset_container_intersection((const array_container_t *)c2,
4377                                                 (const bitset_container_t *)c1,
4378                                                 (array_container_t *)result);
4379             *result_type = ARRAY_CONTAINER_TYPE_CODE;  // never bitset
4380             return result;
4381         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
4382                             BITSET_CONTAINER_TYPE_CODE):
4383             result = array_container_create();
4384             *result_type = ARRAY_CONTAINER_TYPE_CODE;  // never bitset
4385             array_bitset_container_intersection((const array_container_t *)c1,
4386                                                 (const bitset_container_t *)c2,
4387                                                 (array_container_t *)result);
4388             return result;
4389 
4390         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4391                             RUN_CONTAINER_TYPE_CODE):
4392             *result_type = run_bitset_container_intersection(
4393                                (const run_container_t *)c2,
4394                                (const bitset_container_t *)c1, &result)
4395                                ? BITSET_CONTAINER_TYPE_CODE
4396                                : ARRAY_CONTAINER_TYPE_CODE;
4397             return result;
4398         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE,
4399                             BITSET_CONTAINER_TYPE_CODE):
4400             *result_type = run_bitset_container_intersection(
4401                                (const run_container_t *)c1,
4402                                (const bitset_container_t *)c2, &result)
4403                                ? BITSET_CONTAINER_TYPE_CODE
4404                                : ARRAY_CONTAINER_TYPE_CODE;
4405             return result;
4406         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
4407             result = array_container_create();
4408             *result_type = ARRAY_CONTAINER_TYPE_CODE;  // never bitset
4409             array_run_container_intersection((const array_container_t *)c1,
4410                                              (const run_container_t *)c2,
4411                                              (array_container_t *)result);
4412             return result;
4413 
4414         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE):
4415             result = array_container_create();
4416             *result_type = ARRAY_CONTAINER_TYPE_CODE;  // never bitset
4417             array_run_container_intersection((const array_container_t *)c2,
4418                                              (const run_container_t *)c1,
4419                                              (array_container_t *)result);
4420             return result;
4421         default:
4422             assert(false);
4423             __builtin_unreachable();
4424             return NULL;
4425     }
4426 }
4427 
4428 /**
4429  * Compute the size of the intersection between two containers.
4430  */
container_and_cardinality(const void * c1,uint8_t type1,const void * c2,uint8_t type2)4431 static inline int container_and_cardinality(const void *c1, uint8_t type1,
4432                                             const void *c2, uint8_t type2) {
4433     c1 = container_unwrap_shared(c1, &type1);
4434     c2 = container_unwrap_shared(c2, &type2);
4435     switch (CONTAINER_PAIR(type1, type2)) {
4436         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4437                             BITSET_CONTAINER_TYPE_CODE):
4438             return bitset_container_and_justcard(
4439                 (const bitset_container_t *)c1, (const bitset_container_t *)c2);
4440         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
4441                             ARRAY_CONTAINER_TYPE_CODE):
4442             return array_container_intersection_cardinality(
4443                 (const array_container_t *)c1, (const array_container_t *)c2);
4444         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
4445             return run_container_intersection_cardinality(
4446                 (const run_container_t *)c1, (const run_container_t *)c2);
4447         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4448                             ARRAY_CONTAINER_TYPE_CODE):
4449             return array_bitset_container_intersection_cardinality(
4450                 (const array_container_t *)c2, (const bitset_container_t *)c1);
4451         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
4452                             BITSET_CONTAINER_TYPE_CODE):
4453             return array_bitset_container_intersection_cardinality(
4454                 (const array_container_t *)c1, (const bitset_container_t *)c2);
4455         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4456                             RUN_CONTAINER_TYPE_CODE):
4457             return run_bitset_container_intersection_cardinality(
4458                 (const run_container_t *)c2, (const bitset_container_t *)c1);
4459         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE,
4460                             BITSET_CONTAINER_TYPE_CODE):
4461             return run_bitset_container_intersection_cardinality(
4462                 (const run_container_t *)c1, (const bitset_container_t *)c2);
4463         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
4464             return array_run_container_intersection_cardinality(
4465                 (const array_container_t *)c1, (const run_container_t *)c2);
4466         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE):
4467             return array_run_container_intersection_cardinality(
4468                 (const array_container_t *)c2, (const run_container_t *)c1);
4469         default:
4470             assert(false);
4471             __builtin_unreachable();
4472             return 0;
4473     }
4474 }
4475 
4476 /**
4477  * Check whether two containers intersect.
4478  */
container_intersect(const void * c1,uint8_t type1,const void * c2,uint8_t type2)4479 static inline bool container_intersect(const void *c1, uint8_t type1, const void *c2,
4480                                   uint8_t type2) {
4481     c1 = container_unwrap_shared(c1, &type1);
4482     c2 = container_unwrap_shared(c2, &type2);
4483     switch (CONTAINER_PAIR(type1, type2)) {
4484         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4485                             BITSET_CONTAINER_TYPE_CODE):
4486             return bitset_container_intersect(
4487                                (const bitset_container_t *)c1,
4488                                (const bitset_container_t *)c2);
4489         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
4490                             ARRAY_CONTAINER_TYPE_CODE):
4491             return array_container_intersect((const array_container_t *)c1,
4492                                          (const array_container_t *)c2);
4493         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
4494             return run_container_intersect((const run_container_t *)c1,
4495                                        (const run_container_t *)c2);
4496         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4497                             ARRAY_CONTAINER_TYPE_CODE):
4498             return array_bitset_container_intersect((const array_container_t *)c2,
4499                                                 (const bitset_container_t *)c1);
4500         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
4501                             BITSET_CONTAINER_TYPE_CODE):
4502             return array_bitset_container_intersect((const array_container_t *)c1,
4503                                                 (const bitset_container_t *)c2);
4504         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4505                             RUN_CONTAINER_TYPE_CODE):
4506             return run_bitset_container_intersect(
4507                                (const run_container_t *)c2,
4508                                (const bitset_container_t *)c1);
4509         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE,
4510                             BITSET_CONTAINER_TYPE_CODE):
4511             return run_bitset_container_intersect(
4512                                (const run_container_t *)c1,
4513                                (const bitset_container_t *)c2);
4514         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
4515             return array_run_container_intersect((const array_container_t *)c1,
4516                                              (const run_container_t *)c2);
4517         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE):
4518             return array_run_container_intersect((const array_container_t *)c2,
4519                                              (const run_container_t *)c1);
4520         default:
4521             assert(false);
4522             __builtin_unreachable();
4523             return 0;
4524     }
4525 }
4526 
4527 /**
4528  * Compute intersection between two containers, with result in the first
4529  container if possible. If the returned pointer is identical to c1,
4530  then the container has been modified. If the returned pointer is different
4531  from c1, then a new container has been created and the caller is responsible
4532  for freeing it.
4533  The type of the first container may change. Returns the modified
4534  (and possibly new) container.
4535 */
container_iand(void * c1,uint8_t type1,const void * c2,uint8_t type2,uint8_t * result_type)4536 static inline void *container_iand(void *c1, uint8_t type1, const void *c2,
4537                                    uint8_t type2, uint8_t *result_type) {
4538     c1 = get_writable_copy_if_shared(c1, &type1);
4539     c2 = container_unwrap_shared(c2, &type2);
4540     void *result = NULL;
4541     switch (CONTAINER_PAIR(type1, type2)) {
4542         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4543                             BITSET_CONTAINER_TYPE_CODE):
4544             *result_type =
4545                 bitset_bitset_container_intersection_inplace(
4546                     (bitset_container_t *)c1, (const bitset_container_t *)c2, &result)
4547                     ? BITSET_CONTAINER_TYPE_CODE
4548                     : ARRAY_CONTAINER_TYPE_CODE;
4549             return result;
4550         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
4551                             ARRAY_CONTAINER_TYPE_CODE):
4552             array_container_intersection_inplace((array_container_t *)c1,
4553                                                  (const array_container_t *)c2);
4554             *result_type = ARRAY_CONTAINER_TYPE_CODE;
4555             return c1;
4556         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
4557             result = run_container_create();
4558             run_container_intersection((const run_container_t *)c1,
4559                                        (const run_container_t *)c2,
4560                                        (run_container_t *)result);
4561             // as of January 2016, Java code used non-in-place intersection for
4562             // two runcontainers
4563             return convert_run_to_efficient_container_and_free(
4564                 (run_container_t *)result, result_type);
4565         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4566                             ARRAY_CONTAINER_TYPE_CODE):
4567             // c1 is a bitmap so no inplace possible
4568             result = array_container_create();
4569             array_bitset_container_intersection((const array_container_t *)c2,
4570                                                 (const bitset_container_t *)c1,
4571                                                 (array_container_t *)result);
4572             *result_type = ARRAY_CONTAINER_TYPE_CODE;  // never bitset
4573             return result;
4574         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
4575                             BITSET_CONTAINER_TYPE_CODE):
4576             *result_type = ARRAY_CONTAINER_TYPE_CODE;  // never bitset
4577             array_bitset_container_intersection(
4578                 (const array_container_t *)c1, (const bitset_container_t *)c2,
4579                 (array_container_t *)c1);  // allowed
4580             return c1;
4581 
4582         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4583                             RUN_CONTAINER_TYPE_CODE):
4584             // will attempt in-place computation
4585             *result_type = run_bitset_container_intersection(
4586                                (const run_container_t *)c2,
4587                                (const bitset_container_t *)c1, &c1)
4588                                ? BITSET_CONTAINER_TYPE_CODE
4589                                : ARRAY_CONTAINER_TYPE_CODE;
4590             return c1;
4591         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE,
4592                             BITSET_CONTAINER_TYPE_CODE):
4593             *result_type = run_bitset_container_intersection(
4594                                (const run_container_t *)c1,
4595                                (const bitset_container_t *)c2, &result)
4596                                ? BITSET_CONTAINER_TYPE_CODE
4597                                : ARRAY_CONTAINER_TYPE_CODE;
4598             return result;
4599         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
4600             result = array_container_create();
4601             *result_type = ARRAY_CONTAINER_TYPE_CODE;  // never bitset
4602             array_run_container_intersection((const array_container_t *)c1,
4603                                              (const run_container_t *)c2,
4604                                              (array_container_t *)result);
4605             return result;
4606 
4607         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE):
4608             result = array_container_create();
4609             *result_type = ARRAY_CONTAINER_TYPE_CODE;  // never bitset
4610             array_run_container_intersection((const array_container_t *)c2,
4611                                              (const run_container_t *)c1,
4612                                              (array_container_t *)result);
4613             return result;
4614         default:
4615             assert(false);
4616             __builtin_unreachable();
4617             return NULL;
4618     }
4619 }
4620 
4621 /**
4622  * Compute union between two containers, generate a new container (having type
4623  * result_type), requires a typecode. This allocates new memory, caller
4624  * is responsible for deallocation.
4625  */
container_or(const void * c1,uint8_t type1,const void * c2,uint8_t type2,uint8_t * result_type)4626 static inline void *container_or(const void *c1, uint8_t type1, const void *c2,
4627                                  uint8_t type2, uint8_t *result_type) {
4628     c1 = container_unwrap_shared(c1, &type1);
4629     c2 = container_unwrap_shared(c2, &type2);
4630     void *result = NULL;
4631     switch (CONTAINER_PAIR(type1, type2)) {
4632         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4633                             BITSET_CONTAINER_TYPE_CODE):
4634             result = bitset_container_create();
4635             bitset_container_or((const bitset_container_t *)c1,
4636                                 (const bitset_container_t *)c2,
4637                                 (bitset_container_t *)result);
4638             *result_type = BITSET_CONTAINER_TYPE_CODE;
4639             return result;
4640         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
4641                             ARRAY_CONTAINER_TYPE_CODE):
4642             *result_type = array_array_container_union(
4643                                (const array_container_t *)c1,
4644                                (const array_container_t *)c2, &result)
4645                                ? BITSET_CONTAINER_TYPE_CODE
4646                                : ARRAY_CONTAINER_TYPE_CODE;
4647             return result;
4648         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
4649             result = run_container_create();
4650             run_container_union((const run_container_t *)c1,
4651                                 (const run_container_t *)c2,
4652                                 (run_container_t *)result);
4653             *result_type = RUN_CONTAINER_TYPE_CODE;
4654             // todo: could be optimized since will never convert to array
4655             result = convert_run_to_efficient_container_and_free(
4656                 (run_container_t *)result, (uint8_t *)result_type);
4657             return result;
4658         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4659                             ARRAY_CONTAINER_TYPE_CODE):
4660             result = bitset_container_create();
4661             array_bitset_container_union((const array_container_t *)c2,
4662                                          (const bitset_container_t *)c1,
4663                                          (bitset_container_t *)result);
4664             *result_type = BITSET_CONTAINER_TYPE_CODE;
4665             return result;
4666         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
4667                             BITSET_CONTAINER_TYPE_CODE):
4668             result = bitset_container_create();
4669             array_bitset_container_union((const array_container_t *)c1,
4670                                          (const bitset_container_t *)c2,
4671                                          (bitset_container_t *)result);
4672             *result_type = BITSET_CONTAINER_TYPE_CODE;
4673             return result;
4674         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4675                             RUN_CONTAINER_TYPE_CODE):
4676             if (run_container_is_full((const run_container_t *)c2)) {
4677                 result = run_container_create();
4678                 *result_type = RUN_CONTAINER_TYPE_CODE;
4679                 run_container_copy((const run_container_t *)c2,
4680                                    (run_container_t *)result);
4681                 return result;
4682             }
4683             result = bitset_container_create();
4684             run_bitset_container_union((const run_container_t *)c2,
4685                                        (const bitset_container_t *)c1,
4686                                        (bitset_container_t *)result);
4687             *result_type = BITSET_CONTAINER_TYPE_CODE;
4688             return result;
4689         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE,
4690                             BITSET_CONTAINER_TYPE_CODE):
4691             if (run_container_is_full((const run_container_t *)c1)) {
4692                 result = run_container_create();
4693                 *result_type = RUN_CONTAINER_TYPE_CODE;
4694                 run_container_copy((const run_container_t *)c1,
4695                                    (run_container_t *)result);
4696                 return result;
4697             }
4698             result = bitset_container_create();
4699             run_bitset_container_union((const run_container_t *)c1,
4700                                        (const bitset_container_t *)c2,
4701                                        (bitset_container_t *)result);
4702             *result_type = BITSET_CONTAINER_TYPE_CODE;
4703             return result;
4704         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
4705             result = run_container_create();
4706             array_run_container_union((const array_container_t *)c1,
4707                                       (const run_container_t *)c2,
4708                                       (run_container_t *)result);
4709             result = convert_run_to_efficient_container_and_free(
4710                 (run_container_t *)result, (uint8_t *)result_type);
4711             return result;
4712         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE):
4713             result = run_container_create();
4714             array_run_container_union((const array_container_t *)c2,
4715                                       (const run_container_t *)c1,
4716                                       (run_container_t *)result);
4717             result = convert_run_to_efficient_container_and_free(
4718                 (run_container_t *)result, (uint8_t *)result_type);
4719             return result;
4720         default:
4721             assert(false);
4722             __builtin_unreachable();
4723             return NULL;  // unreached
4724     }
4725 }
4726 
4727 /**
4728  * Compute union between two containers, generate a new container (having type
4729  * result_type), requires a typecode. This allocates new memory, caller
4730  * is responsible for deallocation.
4731  *
4732  * This lazy version delays some operations such as the maintenance of the
4733  * cardinality. It requires repair later on the generated containers.
4734  */
container_lazy_or(const void * c1,uint8_t type1,const void * c2,uint8_t type2,uint8_t * result_type)4735 static inline void *container_lazy_or(const void *c1, uint8_t type1,
4736                                       const void *c2, uint8_t type2,
4737                                       uint8_t *result_type) {
4738     c1 = container_unwrap_shared(c1, &type1);
4739     c2 = container_unwrap_shared(c2, &type2);
4740     void *result = NULL;
4741     switch (CONTAINER_PAIR(type1, type2)) {
4742         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4743                             BITSET_CONTAINER_TYPE_CODE):
4744             result = bitset_container_create();
4745             bitset_container_or_nocard(
4746                 (const bitset_container_t *)c1, (const bitset_container_t *)c2,
4747                 (bitset_container_t *)result);  // is lazy
4748             *result_type = BITSET_CONTAINER_TYPE_CODE;
4749             return result;
4750         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
4751                             ARRAY_CONTAINER_TYPE_CODE):
4752             *result_type = array_array_container_lazy_union(
4753                                (const array_container_t *)c1,
4754                                (const array_container_t *)c2, &result)
4755                                ? BITSET_CONTAINER_TYPE_CODE
4756                                : ARRAY_CONTAINER_TYPE_CODE;
4757             return result;
4758         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
4759             result = run_container_create();
4760             run_container_union((const run_container_t *)c1,
4761                                 (const run_container_t *)c2,
4762                                 (run_container_t *)result);
4763             *result_type = RUN_CONTAINER_TYPE_CODE;
4764             // we are being lazy
4765             result = convert_run_to_efficient_container(
4766                 (run_container_t *)result, result_type);
4767             return result;
4768         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4769                             ARRAY_CONTAINER_TYPE_CODE):
4770             result = bitset_container_create();
4771             array_bitset_container_lazy_union(
4772                 (const array_container_t *)c2, (const bitset_container_t *)c1,
4773                 (bitset_container_t *)result);  // is lazy
4774             *result_type = BITSET_CONTAINER_TYPE_CODE;
4775             return result;
4776         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
4777                             BITSET_CONTAINER_TYPE_CODE):
4778             result = bitset_container_create();
4779             array_bitset_container_lazy_union(
4780                 (const array_container_t *)c1, (const bitset_container_t *)c2,
4781                 (bitset_container_t *)result);  // is lazy
4782             *result_type = BITSET_CONTAINER_TYPE_CODE;
4783             return result;
4784         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4785                             RUN_CONTAINER_TYPE_CODE):
4786             if (run_container_is_full((const run_container_t *)c2)) {
4787                 result = run_container_create();
4788                 *result_type = RUN_CONTAINER_TYPE_CODE;
4789                 run_container_copy((const run_container_t *)c2,
4790                                    (run_container_t *)result);
4791                 return result;
4792             }
4793             result = bitset_container_create();
4794             run_bitset_container_lazy_union(
4795                 (const run_container_t *)c2, (const bitset_container_t *)c1,
4796                 (bitset_container_t *)result);  // is lazy
4797             *result_type = BITSET_CONTAINER_TYPE_CODE;
4798             return result;
4799         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE,
4800                             BITSET_CONTAINER_TYPE_CODE):
4801             if (run_container_is_full((const run_container_t *)c1)) {
4802                 result = run_container_create();
4803                 *result_type = RUN_CONTAINER_TYPE_CODE;
4804                 run_container_copy((const run_container_t *)c1,
4805                                    (run_container_t *)result);
4806                 return result;
4807             }
4808             result = bitset_container_create();
4809             run_bitset_container_lazy_union(
4810                 (const run_container_t *)c1, (const bitset_container_t *)c2,
4811                 (bitset_container_t *)result);  // is lazy
4812             *result_type = BITSET_CONTAINER_TYPE_CODE;
4813             return result;
4814         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
4815             result = run_container_create();
4816             array_run_container_union((const array_container_t *)c1,
4817                                       (const run_container_t *)c2,
4818                                       (run_container_t *)result);
4819             *result_type = RUN_CONTAINER_TYPE_CODE;
4820             // next line skipped since we are lazy
4821             // result = convert_run_to_efficient_container(result, result_type);
4822             return result;
4823         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE):
4824             result = run_container_create();
4825             array_run_container_union(
4826                 (const array_container_t *)c2, (const run_container_t *)c1,
4827                 (run_container_t *)result);  // TODO make lazy
4828             *result_type = RUN_CONTAINER_TYPE_CODE;
4829             // next line skipped since we are lazy
4830             // result = convert_run_to_efficient_container(result, result_type);
4831             return result;
4832         default:
4833             assert(false);
4834             __builtin_unreachable();
4835             return NULL;  // unreached
4836     }
4837 }
4838 
4839 /**
4840  * Compute the union between two containers, with result in the first container.
4841  * If the returned pointer is identical to c1, then the container has been
4842  * modified.
4843  * If the returned pointer is different from c1, then a new container has been
4844  * created and the caller is responsible for freeing it.
4845  * The type of the first container may change. Returns the modified
4846  * (and possibly new) container
4847 */
container_ior(void * c1,uint8_t type1,const void * c2,uint8_t type2,uint8_t * result_type)4848 static inline void *container_ior(void *c1, uint8_t type1, const void *c2,
4849                                   uint8_t type2, uint8_t *result_type) {
4850     c1 = get_writable_copy_if_shared(c1, &type1);
4851     c2 = container_unwrap_shared(c2, &type2);
4852     void *result = NULL;
4853     switch (CONTAINER_PAIR(type1, type2)) {
4854         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4855                             BITSET_CONTAINER_TYPE_CODE):
4856             bitset_container_or((const bitset_container_t *)c1,
4857                                 (const bitset_container_t *)c2,
4858                                 (bitset_container_t *)c1);
4859 #ifdef OR_BITSET_CONVERSION_TO_FULL
4860             if (((bitset_container_t *)c1)->cardinality ==
4861                 (1 << 16)) {  // we convert
4862                 result = run_container_create_range(0, (1 << 16));
4863                 *result_type = RUN_CONTAINER_TYPE_CODE;
4864                 return result;
4865             }
4866 #endif
4867             *result_type = BITSET_CONTAINER_TYPE_CODE;
4868             return c1;
4869         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
4870                             ARRAY_CONTAINER_TYPE_CODE):
4871             *result_type = array_array_container_inplace_union(
4872                                (array_container_t *)c1,
4873                                (const array_container_t *)c2, &result)
4874                                ? BITSET_CONTAINER_TYPE_CODE
4875                                : ARRAY_CONTAINER_TYPE_CODE;
4876             if((result == NULL)
4877                && (*result_type == ARRAY_CONTAINER_TYPE_CODE)) {
4878                  return c1; // the computation was done in-place!
4879             }
4880             return result;
4881         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
4882             run_container_union_inplace((run_container_t *)c1,
4883                                         (const run_container_t *)c2);
4884             return convert_run_to_efficient_container((run_container_t *)c1,
4885                                                       result_type);
4886         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4887                             ARRAY_CONTAINER_TYPE_CODE):
4888             array_bitset_container_union((const array_container_t *)c2,
4889                                          (const bitset_container_t *)c1,
4890                                          (bitset_container_t *)c1);
4891             *result_type = BITSET_CONTAINER_TYPE_CODE;  // never array
4892             return c1;
4893         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
4894                             BITSET_CONTAINER_TYPE_CODE):
4895             // c1 is an array, so no in-place possible
4896             result = bitset_container_create();
4897             *result_type = BITSET_CONTAINER_TYPE_CODE;
4898             array_bitset_container_union((const array_container_t *)c1,
4899                                          (const bitset_container_t *)c2,
4900                                          (bitset_container_t *)result);
4901             return result;
4902         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4903                             RUN_CONTAINER_TYPE_CODE):
4904             if (run_container_is_full((const run_container_t *)c2)) {
4905                 result = run_container_create();
4906                 *result_type = RUN_CONTAINER_TYPE_CODE;
4907                 run_container_copy((const run_container_t *)c2,
4908                                    (run_container_t *)result);
4909                 return result;
4910             }
4911             run_bitset_container_union((const run_container_t *)c2,
4912                                        (const bitset_container_t *)c1,
4913                                        (bitset_container_t *)c1);  // allowed
4914             *result_type = BITSET_CONTAINER_TYPE_CODE;
4915             return c1;
4916         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE,
4917                             BITSET_CONTAINER_TYPE_CODE):
4918             if (run_container_is_full((const run_container_t *)c1)) {
4919                 *result_type = RUN_CONTAINER_TYPE_CODE;
4920 
4921                 return c1;
4922             }
4923             result = bitset_container_create();
4924             run_bitset_container_union((const run_container_t *)c1,
4925                                        (const bitset_container_t *)c2,
4926                                        (bitset_container_t *)result);
4927             *result_type = BITSET_CONTAINER_TYPE_CODE;
4928             return result;
4929         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
4930             result = run_container_create();
4931             array_run_container_union((const array_container_t *)c1,
4932                                       (const run_container_t *)c2,
4933                                       (run_container_t *)result);
4934             result = convert_run_to_efficient_container_and_free(
4935                 (run_container_t *)result, result_type);
4936             return result;
4937         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE):
4938             array_run_container_inplace_union((const array_container_t *)c2,
4939                                               (run_container_t *)c1);
4940             c1 = convert_run_to_efficient_container((run_container_t *)c1,
4941                                                     result_type);
4942             return c1;
4943         default:
4944             assert(false);
4945             __builtin_unreachable();
4946             return NULL;
4947     }
4948 }
4949 
4950 /**
4951  * Compute the union between two containers, with result in the first container.
4952  * If the returned pointer is identical to c1, then the container has been
4953  * modified.
4954  * If the returned pointer is different from c1, then a new container has been
4955  * created and the caller is responsible for freeing it.
4956  * The type of the first container may change. Returns the modified
4957  * (and possibly new) container
4958  *
4959  * This lazy version delays some operations such as the maintenance of the
4960  * cardinality. It requires repair later on the generated containers.
4961 */
container_lazy_ior(void * c1,uint8_t type1,const void * c2,uint8_t type2,uint8_t * result_type)4962 static inline void *container_lazy_ior(void *c1, uint8_t type1, const void *c2,
4963                                        uint8_t type2, uint8_t *result_type) {
4964     assert(type1 != SHARED_CONTAINER_TYPE_CODE);
4965     // c1 = get_writable_copy_if_shared(c1,&type1);
4966     c2 = container_unwrap_shared(c2, &type2);
4967     void *result = NULL;
4968     switch (CONTAINER_PAIR(type1, type2)) {
4969         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
4970                             BITSET_CONTAINER_TYPE_CODE):
4971 #ifdef LAZY_OR_BITSET_CONVERSION_TO_FULL
4972             // if we have two bitsets, we might as well compute the cardinality
4973             bitset_container_or((const bitset_container_t *)c1,
4974                                 (const bitset_container_t *)c2,
4975                                 (bitset_container_t *)c1);
4976             // it is possible that two bitsets can lead to a full container
4977             if (((bitset_container_t *)c1)->cardinality ==
4978                 (1 << 16)) {  // we convert
4979                 result = run_container_create_range(0, (1 << 16));
4980                 *result_type = RUN_CONTAINER_TYPE_CODE;
4981                 return result;
4982             }
4983 #else
4984             bitset_container_or_nocard((const bitset_container_t *)c1,
4985                                        (const bitset_container_t *)c2,
4986                                        (bitset_container_t *)c1);
4987 
4988 #endif
4989             *result_type = BITSET_CONTAINER_TYPE_CODE;
4990             return c1;
4991         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
4992                             ARRAY_CONTAINER_TYPE_CODE):
4993             *result_type = array_array_container_lazy_inplace_union(
4994                                (array_container_t *)c1,
4995                                (const array_container_t *)c2, &result)
4996                                ? BITSET_CONTAINER_TYPE_CODE
4997                                : ARRAY_CONTAINER_TYPE_CODE;
4998             if((result == NULL)
4999                && (*result_type == ARRAY_CONTAINER_TYPE_CODE)) {
5000                  return c1; // the computation was done in-place!
5001             }
5002             return result;
5003         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
5004             run_container_union_inplace((run_container_t *)c1,
5005                                         (const run_container_t *)c2);
5006             *result_type = RUN_CONTAINER_TYPE_CODE;
5007             return convert_run_to_efficient_container((run_container_t *)c1,
5008                                                       result_type);
5009         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
5010                             ARRAY_CONTAINER_TYPE_CODE):
5011             array_bitset_container_lazy_union(
5012                 (const array_container_t *)c2, (const bitset_container_t *)c1,
5013                 (bitset_container_t *)c1);              // is lazy
5014             *result_type = BITSET_CONTAINER_TYPE_CODE;  // never array
5015             return c1;
5016         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
5017                             BITSET_CONTAINER_TYPE_CODE):
5018             // c1 is an array, so no in-place possible
5019             result = bitset_container_create();
5020             *result_type = BITSET_CONTAINER_TYPE_CODE;
5021             array_bitset_container_lazy_union(
5022                 (const array_container_t *)c1, (const bitset_container_t *)c2,
5023                 (bitset_container_t *)result);  // is lazy
5024             return result;
5025         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
5026                             RUN_CONTAINER_TYPE_CODE):
5027             if (run_container_is_full((const run_container_t *)c2)) {
5028                 result = run_container_create();
5029                 *result_type = RUN_CONTAINER_TYPE_CODE;
5030                 run_container_copy((const run_container_t *)c2,
5031                                    (run_container_t *)result);
5032                 return result;
5033             }
5034             run_bitset_container_lazy_union(
5035                 (const run_container_t *)c2, (const bitset_container_t *)c1,
5036                 (bitset_container_t *)c1);  // allowed //  lazy
5037             *result_type = BITSET_CONTAINER_TYPE_CODE;
5038             return c1;
5039         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE,
5040                             BITSET_CONTAINER_TYPE_CODE):
5041             if (run_container_is_full((const run_container_t *)c1)) {
5042                 *result_type = RUN_CONTAINER_TYPE_CODE;
5043                 return c1;
5044             }
5045             result = bitset_container_create();
5046             run_bitset_container_lazy_union(
5047                 (const run_container_t *)c1, (const bitset_container_t *)c2,
5048                 (bitset_container_t *)result);  //  lazy
5049             *result_type = BITSET_CONTAINER_TYPE_CODE;
5050             return result;
5051         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
5052             result = run_container_create();
5053             array_run_container_union((const array_container_t *)c1,
5054                                       (const run_container_t *)c2,
5055                                       (run_container_t *)result);
5056             *result_type = RUN_CONTAINER_TYPE_CODE;
5057             // next line skipped since we are lazy
5058             // result = convert_run_to_efficient_container_and_free(result,
5059             // result_type);
5060             return result;
5061         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE):
5062             array_run_container_inplace_union((const array_container_t *)c2,
5063                                               (run_container_t *)c1);
5064             *result_type = RUN_CONTAINER_TYPE_CODE;
5065             // next line skipped since we are lazy
5066             // result = convert_run_to_efficient_container_and_free(result,
5067             // result_type);
5068             return c1;
5069         default:
5070             assert(false);
5071             __builtin_unreachable();
5072             return NULL;
5073     }
5074 }
5075 
5076 /**
5077  * Compute symmetric difference (xor) between two containers, generate a new
5078  * container (having type result_type), requires a typecode. This allocates new
5079  * memory, caller is responsible for deallocation.
5080  */
container_xor(const void * c1,uint8_t type1,const void * c2,uint8_t type2,uint8_t * result_type)5081 static inline void *container_xor(const void *c1, uint8_t type1, const void *c2,
5082                                   uint8_t type2, uint8_t *result_type) {
5083     c1 = container_unwrap_shared(c1, &type1);
5084     c2 = container_unwrap_shared(c2, &type2);
5085     void *result = NULL;
5086     switch (CONTAINER_PAIR(type1, type2)) {
5087         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
5088                             BITSET_CONTAINER_TYPE_CODE):
5089             *result_type = bitset_bitset_container_xor(
5090                                (const bitset_container_t *)c1,
5091                                (const bitset_container_t *)c2, &result)
5092                                ? BITSET_CONTAINER_TYPE_CODE
5093                                : ARRAY_CONTAINER_TYPE_CODE;
5094             return result;
5095         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
5096                             ARRAY_CONTAINER_TYPE_CODE):
5097             *result_type = array_array_container_xor(
5098                                (const array_container_t *)c1,
5099                                (const array_container_t *)c2, &result)
5100                                ? BITSET_CONTAINER_TYPE_CODE
5101                                : ARRAY_CONTAINER_TYPE_CODE;
5102             return result;
5103         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
5104             *result_type =
5105                 run_run_container_xor((const run_container_t *)c1,
5106                                       (const run_container_t *)c2, &result);
5107             return result;
5108 
5109         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
5110                             ARRAY_CONTAINER_TYPE_CODE):
5111             *result_type = array_bitset_container_xor(
5112                                (const array_container_t *)c2,
5113                                (const bitset_container_t *)c1, &result)
5114                                ? BITSET_CONTAINER_TYPE_CODE
5115                                : ARRAY_CONTAINER_TYPE_CODE;
5116             return result;
5117         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
5118                             BITSET_CONTAINER_TYPE_CODE):
5119             *result_type = array_bitset_container_xor(
5120                                (const array_container_t *)c1,
5121                                (const bitset_container_t *)c2, &result)
5122                                ? BITSET_CONTAINER_TYPE_CODE
5123                                : ARRAY_CONTAINER_TYPE_CODE;
5124             return result;
5125         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
5126                             RUN_CONTAINER_TYPE_CODE):
5127             *result_type = run_bitset_container_xor(
5128                                (const run_container_t *)c2,
5129                                (const bitset_container_t *)c1, &result)
5130                                ? BITSET_CONTAINER_TYPE_CODE
5131                                : ARRAY_CONTAINER_TYPE_CODE;
5132             return result;
5133 
5134         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE,
5135                             BITSET_CONTAINER_TYPE_CODE):
5136 
5137             *result_type = run_bitset_container_xor(
5138                                (const run_container_t *)c1,
5139                                (const bitset_container_t *)c2, &result)
5140                                ? BITSET_CONTAINER_TYPE_CODE
5141                                : ARRAY_CONTAINER_TYPE_CODE;
5142             return result;
5143 
5144         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
5145             *result_type =
5146                 array_run_container_xor((const array_container_t *)c1,
5147                                         (const run_container_t *)c2, &result);
5148             return result;
5149 
5150         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE):
5151             *result_type =
5152                 array_run_container_xor((const array_container_t *)c2,
5153                                         (const run_container_t *)c1, &result);
5154             return result;
5155 
5156         default:
5157             assert(false);
5158             __builtin_unreachable();
5159             return NULL;  // unreached
5160     }
5161 }
5162 
5163 /**
5164  * Compute xor between two containers, generate a new container (having type
5165  * result_type), requires a typecode. This allocates new memory, caller
5166  * is responsible for deallocation.
5167  *
5168  * This lazy version delays some operations such as the maintenance of the
5169  * cardinality. It requires repair later on the generated containers.
5170  */
container_lazy_xor(const void * c1,uint8_t type1,const void * c2,uint8_t type2,uint8_t * result_type)5171 static inline void *container_lazy_xor(const void *c1, uint8_t type1,
5172                                        const void *c2, uint8_t type2,
5173                                        uint8_t *result_type) {
5174     c1 = container_unwrap_shared(c1, &type1);
5175     c2 = container_unwrap_shared(c2, &type2);
5176     void *result = NULL;
5177     switch (CONTAINER_PAIR(type1, type2)) {
5178         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
5179                             BITSET_CONTAINER_TYPE_CODE):
5180             result = bitset_container_create();
5181             bitset_container_xor_nocard(
5182                 (const bitset_container_t *)c1, (const bitset_container_t *)c2,
5183                 (bitset_container_t *)result);  // is lazy
5184             *result_type = BITSET_CONTAINER_TYPE_CODE;
5185             return result;
5186         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
5187                             ARRAY_CONTAINER_TYPE_CODE):
5188             *result_type = array_array_container_lazy_xor(
5189                                (const array_container_t *)c1,
5190                                (const array_container_t *)c2, &result)
5191                                ? BITSET_CONTAINER_TYPE_CODE
5192                                : ARRAY_CONTAINER_TYPE_CODE;
5193             return result;
5194         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
5195             // nothing special done yet.
5196             *result_type =
5197                 run_run_container_xor((const run_container_t *)c1,
5198                                       (const run_container_t *)c2, &result);
5199             return result;
5200         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
5201                             ARRAY_CONTAINER_TYPE_CODE):
5202             result = bitset_container_create();
5203             *result_type = BITSET_CONTAINER_TYPE_CODE;
5204             array_bitset_container_lazy_xor((const array_container_t *)c2,
5205                                             (const bitset_container_t *)c1,
5206                                             (bitset_container_t *)result);
5207             return result;
5208         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
5209                             BITSET_CONTAINER_TYPE_CODE):
5210             result = bitset_container_create();
5211             *result_type = BITSET_CONTAINER_TYPE_CODE;
5212             array_bitset_container_lazy_xor((const array_container_t *)c1,
5213                                             (const bitset_container_t *)c2,
5214                                             (bitset_container_t *)result);
5215             return result;
5216         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
5217                             RUN_CONTAINER_TYPE_CODE):
5218             result = bitset_container_create();
5219             run_bitset_container_lazy_xor((const run_container_t *)c2,
5220                                           (const bitset_container_t *)c1,
5221                                           (bitset_container_t *)result);
5222             *result_type = BITSET_CONTAINER_TYPE_CODE;
5223             return result;
5224         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE,
5225                             BITSET_CONTAINER_TYPE_CODE):
5226             result = bitset_container_create();
5227             run_bitset_container_lazy_xor((const run_container_t *)c1,
5228                                           (const bitset_container_t *)c2,
5229                                           (bitset_container_t *)result);
5230             *result_type = BITSET_CONTAINER_TYPE_CODE;
5231             return result;
5232 
5233         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
5234             result = run_container_create();
5235             array_run_container_lazy_xor((const array_container_t *)c1,
5236                                          (const run_container_t *)c2,
5237                                          (run_container_t *)result);
5238             *result_type = RUN_CONTAINER_TYPE_CODE;
5239             // next line skipped since we are lazy
5240             // result = convert_run_to_efficient_container(result, result_type);
5241             return result;
5242         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE):
5243             result = run_container_create();
5244             array_run_container_lazy_xor((const array_container_t *)c2,
5245                                          (const run_container_t *)c1,
5246                                          (run_container_t *)result);
5247             *result_type = RUN_CONTAINER_TYPE_CODE;
5248             // next line skipped since we are lazy
5249             // result = convert_run_to_efficient_container(result, result_type);
5250             return result;
5251         default:
5252             assert(false);
5253             __builtin_unreachable();
5254             return NULL;  // unreached
5255     }
5256 }
5257 
5258 /**
5259  * Compute the xor between two containers, with result in the first container.
5260  * If the returned pointer is identical to c1, then the container has been
5261  * modified.
5262  * If the returned pointer is different from c1, then a new container has been
5263  * created and the caller is responsible for freeing it.
5264  * The type of the first container may change. Returns the modified
5265  * (and possibly new) container
5266 */
container_ixor(void * c1,uint8_t type1,const void * c2,uint8_t type2,uint8_t * result_type)5267 static inline void *container_ixor(void *c1, uint8_t type1, const void *c2,
5268                                    uint8_t type2, uint8_t *result_type) {
5269     c1 = get_writable_copy_if_shared(c1, &type1);
5270     c2 = container_unwrap_shared(c2, &type2);
5271     void *result = NULL;
5272     switch (CONTAINER_PAIR(type1, type2)) {
5273         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
5274                             BITSET_CONTAINER_TYPE_CODE):
5275             *result_type = bitset_bitset_container_ixor(
5276                                (bitset_container_t *)c1,
5277                                (const bitset_container_t *)c2, &result)
5278                                ? BITSET_CONTAINER_TYPE_CODE
5279                                : ARRAY_CONTAINER_TYPE_CODE;
5280             return result;
5281         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
5282                             ARRAY_CONTAINER_TYPE_CODE):
5283             *result_type = array_array_container_ixor(
5284                                (array_container_t *)c1,
5285                                (const array_container_t *)c2, &result)
5286                                ? BITSET_CONTAINER_TYPE_CODE
5287                                : ARRAY_CONTAINER_TYPE_CODE;
5288             return result;
5289 
5290         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
5291             *result_type = run_run_container_ixor(
5292                 (run_container_t *)c1, (const run_container_t *)c2, &result);
5293             return result;
5294 
5295         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
5296                             ARRAY_CONTAINER_TYPE_CODE):
5297             *result_type = bitset_array_container_ixor(
5298                                (bitset_container_t *)c1,
5299                                (const array_container_t *)c2, &result)
5300                                ? BITSET_CONTAINER_TYPE_CODE
5301                                : ARRAY_CONTAINER_TYPE_CODE;
5302             return result;
5303         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
5304                             BITSET_CONTAINER_TYPE_CODE):
5305             *result_type = array_bitset_container_ixor(
5306                                (array_container_t *)c1,
5307                                (const bitset_container_t *)c2, &result)
5308                                ? BITSET_CONTAINER_TYPE_CODE
5309                                : ARRAY_CONTAINER_TYPE_CODE;
5310 
5311             return result;
5312 
5313         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
5314                             RUN_CONTAINER_TYPE_CODE):
5315             *result_type =
5316                 bitset_run_container_ixor((bitset_container_t *)c1,
5317                                           (const run_container_t *)c2, &result)
5318                     ? BITSET_CONTAINER_TYPE_CODE
5319                     : ARRAY_CONTAINER_TYPE_CODE;
5320 
5321             return result;
5322 
5323         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE,
5324                             BITSET_CONTAINER_TYPE_CODE):
5325             *result_type = run_bitset_container_ixor(
5326                                (run_container_t *)c1,
5327                                (const bitset_container_t *)c2, &result)
5328                                ? BITSET_CONTAINER_TYPE_CODE
5329                                : ARRAY_CONTAINER_TYPE_CODE;
5330 
5331             return result;
5332 
5333         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
5334             *result_type = array_run_container_ixor(
5335                 (array_container_t *)c1, (const run_container_t *)c2, &result);
5336             return result;
5337         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE):
5338             *result_type = run_array_container_ixor(
5339                 (run_container_t *)c1, (const array_container_t *)c2, &result);
5340             return result;
5341         default:
5342             assert(false);
5343             __builtin_unreachable();
5344             return NULL;
5345     }
5346 }
5347 
5348 /**
5349  * Compute the xor between two containers, with result in the first container.
5350  * If the returned pointer is identical to c1, then the container has been
5351  * modified.
5352  * If the returned pointer is different from c1, then a new container has been
5353  * created and the caller is responsible for freeing it.
5354  * The type of the first container may change. Returns the modified
5355  * (and possibly new) container
5356  *
5357  * This lazy version delays some operations such as the maintenance of the
5358  * cardinality. It requires repair later on the generated containers.
5359 */
container_lazy_ixor(void * c1,uint8_t type1,const void * c2,uint8_t type2,uint8_t * result_type)5360 static inline void *container_lazy_ixor(void *c1, uint8_t type1, const void *c2,
5361                                         uint8_t type2, uint8_t *result_type) {
5362     assert(type1 != SHARED_CONTAINER_TYPE_CODE);
5363     // c1 = get_writable_copy_if_shared(c1,&type1);
5364     c2 = container_unwrap_shared(c2, &type2);
5365     switch (CONTAINER_PAIR(type1, type2)) {
5366         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
5367                             BITSET_CONTAINER_TYPE_CODE):
5368             bitset_container_xor_nocard((bitset_container_t *)c1,
5369                                         (const bitset_container_t *)c2,
5370                                         (bitset_container_t *)c1);  // is lazy
5371             *result_type = BITSET_CONTAINER_TYPE_CODE;
5372             return c1;
5373         // TODO: other cases being lazy, esp. when we know inplace not likely
5374         // could see the corresponding code for union
5375         default:
5376             // we may have a dirty bitset (without a precomputed cardinality) and
5377             // calling container_ixor on it might be unsafe.
5378             if( (type1 == BITSET_CONTAINER_TYPE_CODE)
5379               && (((const bitset_container_t *)c1)->cardinality == BITSET_UNKNOWN_CARDINALITY)) {
5380                 ((bitset_container_t *)c1)->cardinality = bitset_container_compute_cardinality((bitset_container_t *)c1);
5381             }
5382             return container_ixor(c1, type1, c2, type2, result_type);
5383     }
5384 }
5385 
5386 /**
5387  * Compute difference (andnot) between two containers, generate a new
5388  * container (having type result_type), requires a typecode. This allocates new
5389  * memory, caller is responsible for deallocation.
5390  */
container_andnot(const void * c1,uint8_t type1,const void * c2,uint8_t type2,uint8_t * result_type)5391 static inline void *container_andnot(const void *c1, uint8_t type1,
5392                                      const void *c2, uint8_t type2,
5393                                      uint8_t *result_type) {
5394     c1 = container_unwrap_shared(c1, &type1);
5395     c2 = container_unwrap_shared(c2, &type2);
5396     void *result = NULL;
5397     switch (CONTAINER_PAIR(type1, type2)) {
5398         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
5399                             BITSET_CONTAINER_TYPE_CODE):
5400             *result_type = bitset_bitset_container_andnot(
5401                                (const bitset_container_t *)c1,
5402                                (const bitset_container_t *)c2, &result)
5403                                ? BITSET_CONTAINER_TYPE_CODE
5404                                : ARRAY_CONTAINER_TYPE_CODE;
5405             return result;
5406         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
5407                             ARRAY_CONTAINER_TYPE_CODE):
5408             result = array_container_create();
5409             array_array_container_andnot((const array_container_t *)c1,
5410                                          (const array_container_t *)c2,
5411                                          (array_container_t *)result);
5412             *result_type = ARRAY_CONTAINER_TYPE_CODE;
5413             return result;
5414         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
5415             if (run_container_is_full((const run_container_t *)c2)) {
5416                 result = array_container_create();
5417                 *result_type = ARRAY_CONTAINER_TYPE_CODE;
5418                 return result;
5419             }
5420             *result_type =
5421                 run_run_container_andnot((const run_container_t *)c1,
5422                                          (const run_container_t *)c2, &result);
5423             return result;
5424 
5425         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
5426                             ARRAY_CONTAINER_TYPE_CODE):
5427             *result_type = bitset_array_container_andnot(
5428                                (const bitset_container_t *)c1,
5429                                (const array_container_t *)c2, &result)
5430                                ? BITSET_CONTAINER_TYPE_CODE
5431                                : ARRAY_CONTAINER_TYPE_CODE;
5432             return result;
5433         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
5434                             BITSET_CONTAINER_TYPE_CODE):
5435             result = array_container_create();
5436             array_bitset_container_andnot((const array_container_t *)c1,
5437                                           (const bitset_container_t *)c2,
5438                                           (array_container_t *)result);
5439             *result_type = ARRAY_CONTAINER_TYPE_CODE;
5440             return result;
5441         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
5442                             RUN_CONTAINER_TYPE_CODE):
5443             if (run_container_is_full((const run_container_t *)c2)) {
5444                 result = array_container_create();
5445                 *result_type = ARRAY_CONTAINER_TYPE_CODE;
5446                 return result;
5447             }
5448             *result_type = bitset_run_container_andnot(
5449                                (const bitset_container_t *)c1,
5450                                (const run_container_t *)c2, &result)
5451                                ? BITSET_CONTAINER_TYPE_CODE
5452                                : ARRAY_CONTAINER_TYPE_CODE;
5453             return result;
5454         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE,
5455                             BITSET_CONTAINER_TYPE_CODE):
5456 
5457             *result_type = run_bitset_container_andnot(
5458                                (const run_container_t *)c1,
5459                                (const bitset_container_t *)c2, &result)
5460                                ? BITSET_CONTAINER_TYPE_CODE
5461                                : ARRAY_CONTAINER_TYPE_CODE;
5462             return result;
5463 
5464         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
5465             if (run_container_is_full((const run_container_t *)c2)) {
5466                 result = array_container_create();
5467                 *result_type = ARRAY_CONTAINER_TYPE_CODE;
5468                 return result;
5469             }
5470             result = array_container_create();
5471             array_run_container_andnot((const array_container_t *)c1,
5472                                        (const run_container_t *)c2,
5473                                        (array_container_t *)result);
5474             *result_type = ARRAY_CONTAINER_TYPE_CODE;
5475             return result;
5476 
5477         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE):
5478             *result_type = run_array_container_andnot(
5479                 (const run_container_t *)c1, (const array_container_t *)c2,
5480                 &result);
5481             return result;
5482 
5483         default:
5484             assert(false);
5485             __builtin_unreachable();
5486             return NULL;  // unreached
5487     }
5488 }
5489 
5490 /**
5491  * Compute the andnot between two containers, with result in the first
5492  * container.
5493  * If the returned pointer is identical to c1, then the container has been
5494  * modified.
5495  * If the returned pointer is different from c1, then a new container has been
5496  * created and the caller is responsible for freeing it.
5497  * The type of the first container may change. Returns the modified
5498  * (and possibly new) container
5499 */
container_iandnot(void * c1,uint8_t type1,const void * c2,uint8_t type2,uint8_t * result_type)5500 static inline void *container_iandnot(void *c1, uint8_t type1, const void *c2,
5501                                       uint8_t type2, uint8_t *result_type) {
5502     c1 = get_writable_copy_if_shared(c1, &type1);
5503     c2 = container_unwrap_shared(c2, &type2);
5504     void *result = NULL;
5505     switch (CONTAINER_PAIR(type1, type2)) {
5506         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
5507                             BITSET_CONTAINER_TYPE_CODE):
5508             *result_type = bitset_bitset_container_iandnot(
5509                                (bitset_container_t *)c1,
5510                                (const bitset_container_t *)c2, &result)
5511                                ? BITSET_CONTAINER_TYPE_CODE
5512                                : ARRAY_CONTAINER_TYPE_CODE;
5513             return result;
5514         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
5515                             ARRAY_CONTAINER_TYPE_CODE):
5516             array_array_container_iandnot((array_container_t *)c1,
5517                                           (const array_container_t *)c2);
5518             *result_type = ARRAY_CONTAINER_TYPE_CODE;
5519             return c1;
5520 
5521         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
5522             *result_type = run_run_container_iandnot(
5523                 (run_container_t *)c1, (const run_container_t *)c2, &result);
5524             return result;
5525 
5526         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
5527                             ARRAY_CONTAINER_TYPE_CODE):
5528             *result_type = bitset_array_container_iandnot(
5529                                (bitset_container_t *)c1,
5530                                (const array_container_t *)c2, &result)
5531                                ? BITSET_CONTAINER_TYPE_CODE
5532                                : ARRAY_CONTAINER_TYPE_CODE;
5533             return result;
5534         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE,
5535                             BITSET_CONTAINER_TYPE_CODE):
5536             *result_type = ARRAY_CONTAINER_TYPE_CODE;
5537 
5538             array_bitset_container_iandnot((array_container_t *)c1,
5539                                            (const bitset_container_t *)c2);
5540             return c1;
5541 
5542         case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
5543                             RUN_CONTAINER_TYPE_CODE):
5544             *result_type = bitset_run_container_iandnot(
5545                                (bitset_container_t *)c1,
5546                                (const run_container_t *)c2, &result)
5547                                ? BITSET_CONTAINER_TYPE_CODE
5548                                : ARRAY_CONTAINER_TYPE_CODE;
5549 
5550             return result;
5551 
5552         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE,
5553                             BITSET_CONTAINER_TYPE_CODE):
5554             *result_type = run_bitset_container_iandnot(
5555                                (run_container_t *)c1,
5556                                (const bitset_container_t *)c2, &result)
5557                                ? BITSET_CONTAINER_TYPE_CODE
5558                                : ARRAY_CONTAINER_TYPE_CODE;
5559 
5560             return result;
5561 
5562         case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
5563             *result_type = ARRAY_CONTAINER_TYPE_CODE;
5564             array_run_container_iandnot((array_container_t *)c1,
5565                                         (const run_container_t *)c2);
5566             return c1;
5567         case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE):
5568             *result_type = run_array_container_iandnot(
5569                 (run_container_t *)c1, (const array_container_t *)c2, &result);
5570             return result;
5571         default:
5572             assert(false);
5573             __builtin_unreachable();
5574             return NULL;
5575     }
5576 }
5577 
5578 /**
5579  * Visit all values x of the container once, passing (base+x,ptr)
5580  * to iterator. You need to specify a container and its type.
5581  * Returns true if the iteration should continue.
5582  */
container_iterate(const void * container,uint8_t typecode,uint32_t base,roaring_iterator iterator,void * ptr)5583 static inline bool container_iterate(const void *container, uint8_t typecode,
5584                                      uint32_t base, roaring_iterator iterator,
5585                                      void *ptr) {
5586     container = container_unwrap_shared(container, &typecode);
5587     switch (typecode) {
5588         case BITSET_CONTAINER_TYPE_CODE:
5589             return bitset_container_iterate(
5590                 (const bitset_container_t *)container, base, iterator, ptr);
5591         case ARRAY_CONTAINER_TYPE_CODE:
5592             return array_container_iterate((const array_container_t *)container,
5593                                            base, iterator, ptr);
5594         case RUN_CONTAINER_TYPE_CODE:
5595             return run_container_iterate((const run_container_t *)container,
5596                                          base, iterator, ptr);
5597         case SHARED_CONTAINER_TYPE_CODE:
5598         default:
5599             assert(false);
5600             __builtin_unreachable();
5601             return false;
5602     }
5603 }
5604 
container_iterate64(const void * container,uint8_t typecode,uint32_t base,roaring_iterator64 iterator,uint64_t high_bits,void * ptr)5605 static inline bool container_iterate64(const void *container, uint8_t typecode,
5606                                        uint32_t base,
5607                                        roaring_iterator64 iterator,
5608                                        uint64_t high_bits, void *ptr) {
5609     container = container_unwrap_shared(container, &typecode);
5610     switch (typecode) {
5611         case BITSET_CONTAINER_TYPE_CODE:
5612             return bitset_container_iterate64(
5613                 (const bitset_container_t *)container, base, iterator,
5614                 high_bits, ptr);
5615         case ARRAY_CONTAINER_TYPE_CODE:
5616             return array_container_iterate64(
5617                 (const array_container_t *)container, base, iterator, high_bits,
5618                 ptr);
5619         case RUN_CONTAINER_TYPE_CODE:
5620             return run_container_iterate64((const run_container_t *)container,
5621                                            base, iterator, high_bits, ptr);
5622         case SHARED_CONTAINER_TYPE_CODE:
5623         default:
5624             assert(false);
5625             __builtin_unreachable();
5626             return false;
5627     }
5628 }
5629 
container_not(const void * c,uint8_t typ,uint8_t * result_type)5630 static inline void *container_not(const void *c, uint8_t typ,
5631                                   uint8_t *result_type) {
5632     c = container_unwrap_shared(c, &typ);
5633     void *result = NULL;
5634     switch (typ) {
5635         case BITSET_CONTAINER_TYPE_CODE:
5636             *result_type = bitset_container_negation(
5637                                (const bitset_container_t *)c, &result)
5638                                ? BITSET_CONTAINER_TYPE_CODE
5639                                : ARRAY_CONTAINER_TYPE_CODE;
5640             return result;
5641         case ARRAY_CONTAINER_TYPE_CODE:
5642             result = bitset_container_create();
5643             *result_type = BITSET_CONTAINER_TYPE_CODE;
5644             array_container_negation((const array_container_t *)c,
5645                                      (bitset_container_t *)result);
5646             return result;
5647         case RUN_CONTAINER_TYPE_CODE:
5648             *result_type =
5649                 run_container_negation((const run_container_t *)c, &result);
5650             return result;
5651 
5652         case SHARED_CONTAINER_TYPE_CODE:
5653         default:
5654             assert(false);
5655             __builtin_unreachable();
5656             return NULL;
5657     }
5658 }
5659 
container_not_range(const void * c,uint8_t typ,uint32_t range_start,uint32_t range_end,uint8_t * result_type)5660 static inline void *container_not_range(const void *c, uint8_t typ,
5661                                         uint32_t range_start,
5662                                         uint32_t range_end,
5663                                         uint8_t *result_type) {
5664     c = container_unwrap_shared(c, &typ);
5665     void *result = NULL;
5666     switch (typ) {
5667         case BITSET_CONTAINER_TYPE_CODE:
5668             *result_type =
5669                 bitset_container_negation_range((const bitset_container_t *)c,
5670                                                 range_start, range_end, &result)
5671                     ? BITSET_CONTAINER_TYPE_CODE
5672                     : ARRAY_CONTAINER_TYPE_CODE;
5673             return result;
5674         case ARRAY_CONTAINER_TYPE_CODE:
5675             *result_type =
5676                 array_container_negation_range((const array_container_t *)c,
5677                                                range_start, range_end, &result)
5678                     ? BITSET_CONTAINER_TYPE_CODE
5679                     : ARRAY_CONTAINER_TYPE_CODE;
5680             return result;
5681         case RUN_CONTAINER_TYPE_CODE:
5682             *result_type = run_container_negation_range(
5683                 (const run_container_t *)c, range_start, range_end, &result);
5684             return result;
5685 
5686         case SHARED_CONTAINER_TYPE_CODE:
5687         default:
5688             assert(false);
5689             __builtin_unreachable();
5690             return NULL;
5691     }
5692 }
5693 
container_inot(void * c,uint8_t typ,uint8_t * result_type)5694 static inline void *container_inot(void *c, uint8_t typ, uint8_t *result_type) {
5695     c = get_writable_copy_if_shared(c, &typ);
5696     void *result = NULL;
5697     switch (typ) {
5698         case BITSET_CONTAINER_TYPE_CODE:
5699             *result_type = bitset_container_negation_inplace(
5700                                (bitset_container_t *)c, &result)
5701                                ? BITSET_CONTAINER_TYPE_CODE
5702                                : ARRAY_CONTAINER_TYPE_CODE;
5703             return result;
5704         case ARRAY_CONTAINER_TYPE_CODE:
5705             // will never be inplace
5706             result = bitset_container_create();
5707             *result_type = BITSET_CONTAINER_TYPE_CODE;
5708             array_container_negation((array_container_t *)c,
5709                                      (bitset_container_t *)result);
5710             array_container_free((array_container_t *)c);
5711             return result;
5712         case RUN_CONTAINER_TYPE_CODE:
5713             *result_type =
5714                 run_container_negation_inplace((run_container_t *)c, &result);
5715             return result;
5716 
5717         case SHARED_CONTAINER_TYPE_CODE:
5718         default:
5719             assert(false);
5720             __builtin_unreachable();
5721             return NULL;
5722     }
5723 }
5724 
container_inot_range(void * c,uint8_t typ,uint32_t range_start,uint32_t range_end,uint8_t * result_type)5725 static inline void *container_inot_range(void *c, uint8_t typ,
5726                                          uint32_t range_start,
5727                                          uint32_t range_end,
5728                                          uint8_t *result_type) {
5729     c = get_writable_copy_if_shared(c, &typ);
5730     void *result = NULL;
5731     switch (typ) {
5732         case BITSET_CONTAINER_TYPE_CODE:
5733             *result_type =
5734                 bitset_container_negation_range_inplace(
5735                     (bitset_container_t *)c, range_start, range_end, &result)
5736                     ? BITSET_CONTAINER_TYPE_CODE
5737                     : ARRAY_CONTAINER_TYPE_CODE;
5738             return result;
5739         case ARRAY_CONTAINER_TYPE_CODE:
5740             *result_type =
5741                 array_container_negation_range_inplace(
5742                     (array_container_t *)c, range_start, range_end, &result)
5743                     ? BITSET_CONTAINER_TYPE_CODE
5744                     : ARRAY_CONTAINER_TYPE_CODE;
5745             return result;
5746         case RUN_CONTAINER_TYPE_CODE:
5747             *result_type = run_container_negation_range_inplace(
5748                 (run_container_t *)c, range_start, range_end, &result);
5749             return result;
5750 
5751         case SHARED_CONTAINER_TYPE_CODE:
5752         default:
5753             assert(false);
5754             __builtin_unreachable();
5755             return NULL;
5756     }
5757 }
5758 
5759 /**
5760  * If the element of given rank is in this container, supposing that
5761  * the first
5762  * element has rank start_rank, then the function returns true and
5763  * sets element
5764  * accordingly.
5765  * Otherwise, it returns false and update start_rank.
5766  */
container_select(const void * container,uint8_t typecode,uint32_t * start_rank,uint32_t rank,uint32_t * element)5767 static inline bool container_select(const void *container, uint8_t typecode,
5768                                     uint32_t *start_rank, uint32_t rank,
5769                                     uint32_t *element) {
5770     container = container_unwrap_shared(container, &typecode);
5771     switch (typecode) {
5772         case BITSET_CONTAINER_TYPE_CODE:
5773             return bitset_container_select((const bitset_container_t *)container,
5774                                            start_rank, rank, element);
5775         case ARRAY_CONTAINER_TYPE_CODE:
5776             return array_container_select((const array_container_t *)container,
5777                                           start_rank, rank, element);
5778         case RUN_CONTAINER_TYPE_CODE:
5779             return run_container_select((const run_container_t *)container,
5780                                         start_rank, rank, element);
5781         case SHARED_CONTAINER_TYPE_CODE:
5782         default:
5783             assert(false);
5784             __builtin_unreachable();
5785             return false;
5786     }
5787 }
5788 
container_maximum(const void * container,uint8_t typecode)5789 static inline uint16_t container_maximum(const void *container,
5790                                          uint8_t typecode) {
5791     container = container_unwrap_shared(container, &typecode);
5792     switch (typecode) {
5793         case BITSET_CONTAINER_TYPE_CODE:
5794             return bitset_container_maximum((const bitset_container_t *)container);
5795         case ARRAY_CONTAINER_TYPE_CODE:
5796             return array_container_maximum((const array_container_t *)container);
5797         case RUN_CONTAINER_TYPE_CODE:
5798             return run_container_maximum((const run_container_t *)container);
5799         case SHARED_CONTAINER_TYPE_CODE:
5800         default:
5801             assert(false);
5802             __builtin_unreachable();
5803             return false;
5804     }
5805 }
5806 
container_minimum(const void * container,uint8_t typecode)5807 static inline uint16_t container_minimum(const void *container,
5808                                          uint8_t typecode) {
5809     container = container_unwrap_shared(container, &typecode);
5810     switch (typecode) {
5811         case BITSET_CONTAINER_TYPE_CODE:
5812             return bitset_container_minimum((const bitset_container_t *)container);
5813         case ARRAY_CONTAINER_TYPE_CODE:
5814             return array_container_minimum((const array_container_t *)container);
5815         case RUN_CONTAINER_TYPE_CODE:
5816             return run_container_minimum((const run_container_t *)container);
5817         case SHARED_CONTAINER_TYPE_CODE:
5818         default:
5819             assert(false);
5820             __builtin_unreachable();
5821             return false;
5822     }
5823 }
5824 
5825 // number of values smaller or equal to x
container_rank(const void * container,uint8_t typecode,uint16_t x)5826 static inline int container_rank(const void *container, uint8_t typecode,
5827                                  uint16_t x) {
5828     container = container_unwrap_shared(container, &typecode);
5829     switch (typecode) {
5830         case BITSET_CONTAINER_TYPE_CODE:
5831             return bitset_container_rank((const bitset_container_t *)container, x);
5832         case ARRAY_CONTAINER_TYPE_CODE:
5833             return array_container_rank((const array_container_t *)container, x);
5834         case RUN_CONTAINER_TYPE_CODE:
5835             return run_container_rank((const run_container_t *)container, x);
5836         case SHARED_CONTAINER_TYPE_CODE:
5837         default:
5838             assert(false);
5839             __builtin_unreachable();
5840             return false;
5841     }
5842 }
5843 
5844 /**
5845  * Add all values in range [min, max] to a given container.
5846  *
5847  * If the returned pointer is different from $container, then a new container
5848  * has been created and the caller is responsible for freeing it.
5849  * The type of the first container may change. Returns the modified
5850  * (and possibly new) container.
5851  */
container_add_range(void * container,uint8_t type,uint32_t min,uint32_t max,uint8_t * result_type)5852 static inline void *container_add_range(void *container, uint8_t type,
5853                                         uint32_t min, uint32_t max,
5854                                         uint8_t *result_type) {
5855     // NB: when selecting new container type, we perform only inexpensive checks
5856     switch (type) {
5857         case BITSET_CONTAINER_TYPE_CODE: {
5858             bitset_container_t *bitset = (bitset_container_t *) container;
5859 
5860             int32_t union_cardinality = 0;
5861             union_cardinality += bitset->cardinality;
5862             union_cardinality += max - min + 1;
5863             union_cardinality -= bitset_lenrange_cardinality(bitset->array, min, max-min);
5864 
5865             if (union_cardinality == INT32_C(0x10000)) {
5866                 *result_type = RUN_CONTAINER_TYPE_CODE;
5867                 return run_container_create_range(0, INT32_C(0x10000));
5868             } else {
5869                 *result_type = BITSET_CONTAINER_TYPE_CODE;
5870                 bitset_set_lenrange(bitset->array, min, max - min);
5871                 bitset->cardinality = union_cardinality;
5872                 return bitset;
5873             }
5874         }
5875         case ARRAY_CONTAINER_TYPE_CODE: {
5876             array_container_t *array = (array_container_t *) container;
5877 
5878             int32_t nvals_greater = count_greater(array->array, array->cardinality, max);
5879             int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min);
5880             int32_t union_cardinality = nvals_less + (max - min + 1) + nvals_greater;
5881 
5882             if (union_cardinality == INT32_C(0x10000)) {
5883                 *result_type = RUN_CONTAINER_TYPE_CODE;
5884                 return run_container_create_range(0, INT32_C(0x10000));
5885             } else if (union_cardinality <= DEFAULT_MAX_SIZE) {
5886                 *result_type = ARRAY_CONTAINER_TYPE_CODE;
5887                 array_container_add_range_nvals(array, min, max, nvals_less, nvals_greater);
5888                 return array;
5889             } else {
5890                 *result_type = BITSET_CONTAINER_TYPE_CODE;
5891                 bitset_container_t *bitset = bitset_container_from_array(array);
5892                 bitset_set_lenrange(bitset->array, min, max - min);
5893                 bitset->cardinality = union_cardinality;
5894                 return bitset;
5895             }
5896         }
5897         case RUN_CONTAINER_TYPE_CODE: {
5898             run_container_t *run = (run_container_t *) container;
5899 
5900             int32_t nruns_greater = rle16_count_greater(run->runs, run->n_runs, max);
5901             int32_t nruns_less = rle16_count_less(run->runs, run->n_runs - nruns_greater, min);
5902 
5903             int32_t run_size_bytes = (nruns_less + 1 + nruns_greater) * sizeof(rle16_t);
5904             int32_t bitset_size_bytes = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
5905 
5906             if (run_size_bytes <= bitset_size_bytes) {
5907                 run_container_add_range_nruns(run, min, max, nruns_less, nruns_greater);
5908                 *result_type = RUN_CONTAINER_TYPE_CODE;
5909                 return run;
5910             } else {
5911                 *result_type = BITSET_CONTAINER_TYPE_CODE;
5912                 return bitset_container_from_run_range(run, min, max);
5913             }
5914         }
5915         case SHARED_CONTAINER_TYPE_CODE:
5916         default:
5917             __builtin_unreachable();
5918     }
5919 }
5920 
5921 /*
5922  * Removes all elements in range [min, max].
5923  * Returns one of:
5924  *   - NULL if no elements left
5925  *   - pointer to the original container
5926  *   - pointer to a newly-allocated container (if it is more efficient)
5927  *
5928  * If the returned pointer is different from $container, then a new container
5929  * has been created and the caller is responsible for freeing the original container.
5930  */
container_remove_range(void * container,uint8_t type,uint32_t min,uint32_t max,uint8_t * result_type)5931 static inline void *container_remove_range(void *container, uint8_t type,
5932                                            uint32_t min, uint32_t max,
5933                                            uint8_t *result_type) {
5934      switch (type) {
5935         case BITSET_CONTAINER_TYPE_CODE: {
5936             bitset_container_t *bitset = (bitset_container_t *) container;
5937 
5938             int32_t result_cardinality = bitset->cardinality -
5939                 bitset_lenrange_cardinality(bitset->array, min, max-min);
5940 
5941             if (result_cardinality == 0) {
5942                 return NULL;
5943             } else if (result_cardinality < DEFAULT_MAX_SIZE) {
5944                 *result_type = ARRAY_CONTAINER_TYPE_CODE;
5945                 bitset_reset_range(bitset->array, min, max+1);
5946                 bitset->cardinality = result_cardinality;
5947                 return array_container_from_bitset(bitset);
5948             } else {
5949                 *result_type = BITSET_CONTAINER_TYPE_CODE;
5950                 bitset_reset_range(bitset->array, min, max+1);
5951                 bitset->cardinality = result_cardinality;
5952                 return bitset;
5953             }
5954         }
5955         case ARRAY_CONTAINER_TYPE_CODE: {
5956             array_container_t *array = (array_container_t *) container;
5957 
5958             int32_t nvals_greater = count_greater(array->array, array->cardinality, max);
5959             int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min);
5960             int32_t result_cardinality = nvals_less + nvals_greater;
5961 
5962             if (result_cardinality == 0) {
5963                 return NULL;
5964             } else {
5965                 *result_type = ARRAY_CONTAINER_TYPE_CODE;
5966                 array_container_remove_range(array, nvals_less,
5967                     array->cardinality - result_cardinality);
5968                 return array;
5969             }
5970         }
5971         case RUN_CONTAINER_TYPE_CODE: {
5972             run_container_t *run = (run_container_t *) container;
5973 
5974             if (run->n_runs == 0) {
5975                 return NULL;
5976             }
5977             if (min <= run_container_minimum(run) && max >= run_container_maximum(run)) {
5978                 return NULL;
5979             }
5980 
5981             run_container_remove_range(run, min, max);
5982 
5983             if (run_container_serialized_size_in_bytes(run->n_runs) <=
5984                     bitset_container_serialized_size_in_bytes()) {
5985                 *result_type = RUN_CONTAINER_TYPE_CODE;
5986                 return run;
5987             } else {
5988                 *result_type = BITSET_CONTAINER_TYPE_CODE;
5989                 return bitset_container_from_run(run);
5990             }
5991         }
5992         case SHARED_CONTAINER_TYPE_CODE:
5993         default:
5994             __builtin_unreachable();
5995      }
5996 }
5997 
5998 #endif
5999 /* end file include/roaring/containers/containers.h */
6000 /* begin file include/roaring/roaring_array.h */
6001 #ifndef INCLUDE_ROARING_ARRAY_H
6002 #define INCLUDE_ROARING_ARRAY_H
6003 #ifdef __cplusplus
6004 extern "C" {
6005 #endif
6006 
6007 #include <assert.h>
6008 #include <stdbool.h>
6009 #include <stdint.h>
6010 
6011 #define MAX_CONTAINERS 65536
6012 
6013 #define SERIALIZATION_ARRAY_UINT32 1
6014 #define SERIALIZATION_CONTAINER 2
6015 
6016 #define ROARING_FLAG_COW UINT8_C(0x1)
6017 #define ROARING_FLAG_FROZEN UINT8_C(0x2)
6018 
6019 enum {
6020     SERIAL_COOKIE_NO_RUNCONTAINER = 12346,
6021     SERIAL_COOKIE = 12347,
6022     FROZEN_COOKIE = 13766,
6023     NO_OFFSET_THRESHOLD = 4
6024 };
6025 
6026 /**
6027  * Roaring arrays are array-based key-value pairs having containers as values
6028  * and 16-bit integer keys. A roaring bitmap  might be implemented as such.
6029  */
6030 
6031 // parallel arrays.  Element sizes quite different.
6032 // Alternative is array
6033 // of structs.  Which would have better
6034 // cache performance through binary searches?
6035 
6036 typedef struct roaring_array_s {
6037     int32_t size;
6038     int32_t allocation_size;
6039     void **containers;
6040     uint16_t *keys;
6041     uint8_t *typecodes;
6042     uint8_t flags;
6043 } roaring_array_t;
6044 
6045 /**
6046  * Create a new roaring array
6047  */
6048 roaring_array_t *ra_create(void);
6049 
6050 /**
6051  * Initialize an existing roaring array with the specified capacity (in number
6052  * of containers)
6053  */
6054 bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap);
6055 
6056 /**
6057  * Initialize with zero capacity
6058  */
6059 void ra_init(roaring_array_t *t);
6060 
6061 /**
6062  * Copies this roaring array, we assume that dest is not initialized
6063  */
6064 bool ra_copy(const roaring_array_t *source, roaring_array_t *dest,
6065              bool copy_on_write);
6066 
6067 /*
6068  * Shrinks the capacity, returns the number of bytes saved.
6069  */
6070 int ra_shrink_to_fit(roaring_array_t *ra);
6071 
6072 /**
6073  * Copies this roaring array, we assume that dest is initialized
6074  */
6075 bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest,
6076                   bool copy_on_write);
6077 
6078 /**
6079  * Frees the memory used by a roaring array
6080  */
6081 void ra_clear(roaring_array_t *r);
6082 
6083 /**
6084  * Frees the memory used by a roaring array, but does not free the containers
6085  */
6086 void ra_clear_without_containers(roaring_array_t *r);
6087 
6088 /**
6089  * Frees just the containers
6090  */
6091 void ra_clear_containers(roaring_array_t *ra);
6092 
6093 /**
6094  * Get the index corresponding to a 16-bit key
6095  */
ra_get_index(const roaring_array_t * ra,uint16_t x)6096 static inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x) {
6097     if ((ra->size == 0) || ra->keys[ra->size - 1] == x) return ra->size - 1;
6098     return binarySearch(ra->keys, (int32_t)ra->size, x);
6099 }
6100 
6101 /**
6102  * Retrieves the container at index i, filling in the typecode
6103  */
ra_get_container_at_index(const roaring_array_t * ra,uint16_t i,uint8_t * typecode)6104 static inline void *ra_get_container_at_index(const roaring_array_t *ra, uint16_t i,
6105                                        uint8_t *typecode) {
6106     *typecode = ra->typecodes[i];
6107     return ra->containers[i];
6108 }
6109 
6110 /**
6111  * Retrieves the key at index i
6112  */
6113 uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i);
6114 
6115 /**
6116  * Add a new key-value pair at index i
6117  */
6118 void ra_insert_new_key_value_at(roaring_array_t *ra, int32_t i, uint16_t key,
6119                                 void *container, uint8_t typecode);
6120 
6121 /**
6122  * Append a new key-value pair
6123  */
6124 void ra_append(roaring_array_t *ra, uint16_t s, void *c, uint8_t typecode);
6125 
6126 /**
6127  * Append a new key-value pair to ra, cloning (in COW sense) a value from sa
6128  * at index index
6129  */
6130 void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa,
6131                     uint16_t index, bool copy_on_write);
6132 
6133 /**
6134  * Append new key-value pairs to ra, cloning (in COW sense)  values from sa
6135  * at indexes
6136  * [start_index, end_index)
6137  */
6138 void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa,
6139                           int32_t start_index, int32_t end_index,
6140                           bool copy_on_write);
6141 
6142 /** appends from sa to ra, ending with the greatest key that is
6143  * is less or equal stopping_key
6144  */
6145 void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa,
6146                             uint16_t stopping_key, bool copy_on_write);
6147 
6148 /** appends from sa to ra, starting with the smallest key that is
6149  * is strictly greater than before_start
6150  */
6151 
6152 void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa,
6153                             uint16_t before_start, bool copy_on_write);
6154 
6155 /**
6156  * Move the key-value pairs to ra from sa at indexes
6157  * [start_index, end_index), old array should not be freed
6158  * (use ra_clear_without_containers)
6159  **/
6160 void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa,
6161                           int32_t start_index, int32_t end_index);
6162 /**
6163  * Append new key-value pairs to ra,  from sa at indexes
6164  * [start_index, end_index)
6165  */
6166 void ra_append_range(roaring_array_t *ra, roaring_array_t *sa,
6167                      int32_t start_index, int32_t end_index,
6168                      bool copy_on_write);
6169 
6170 /**
6171  * Set the container at the corresponding index using the specified
6172  * typecode.
6173  */
ra_set_container_at_index(const roaring_array_t * ra,int32_t i,void * c,uint8_t typecode)6174 static inline void ra_set_container_at_index(const roaring_array_t *ra, int32_t i,
6175                                       void *c, uint8_t typecode) {
6176     assert(i < ra->size);
6177     ra->containers[i] = c;
6178     ra->typecodes[i] = typecode;
6179 }
6180 
6181 /**
6182  * If needed, increase the capacity of the array so that it can fit k values
6183  * (at
6184  * least);
6185  */
6186 bool extend_array(roaring_array_t *ra, int32_t k);
6187 
ra_get_size(const roaring_array_t * ra)6188 static inline int32_t ra_get_size(const roaring_array_t *ra) { return ra->size; }
6189 
ra_advance_until(const roaring_array_t * ra,uint16_t x,int32_t pos)6190 static inline int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x,
6191                                        int32_t pos) {
6192     return advanceUntil(ra->keys, pos, ra->size, x);
6193 }
6194 
6195 int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos);
6196 
6197 void ra_downsize(roaring_array_t *ra, int32_t new_length);
6198 
ra_replace_key_and_container_at_index(roaring_array_t * ra,int32_t i,uint16_t key,void * c,uint8_t typecode)6199 static inline void ra_replace_key_and_container_at_index(roaring_array_t *ra,
6200                                                   int32_t i, uint16_t key,
6201                                                   void *c, uint8_t typecode) {
6202     assert(i < ra->size);
6203 
6204     ra->keys[i] = key;
6205     ra->containers[i] = c;
6206     ra->typecodes[i] = typecode;
6207 }
6208 
6209 // write set bits to an array
6210 void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans);
6211 
6212 bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans);
6213 
6214 /**
6215  * write a bitmap to a buffer. This is meant to be compatible with
6216  * the
6217  * Java and Go versions. Return the size in bytes of the serialized
6218  * output (which should be ra_portable_size_in_bytes(ra)).
6219  */
6220 size_t ra_portable_serialize(const roaring_array_t *ra, char *buf);
6221 
6222 /**
6223  * read a bitmap from a serialized version. This is meant to be compatible
6224  * with the Java and Go versions.
6225  * maxbytes  indicates how many bytes available from buf.
6226  * When the function returns true, roaring_array_t is populated with the data
6227  * and *readbytes indicates how many bytes were read. In all cases, if the function
6228  * returns true, then maxbytes >= *readbytes.
6229  */
6230 bool ra_portable_deserialize(roaring_array_t *ra, const char *buf, const size_t maxbytes, size_t * readbytes);
6231 
6232 /**
6233  * Quickly checks whether there is a serialized bitmap at the pointer,
6234  * not exceeding size "maxbytes" in bytes. This function does not allocate
6235  * memory dynamically.
6236  *
6237  * This function returns 0 if and only if no valid bitmap is found.
6238  * Otherwise, it returns how many bytes are occupied by the bitmap data.
6239  */
6240 size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes);
6241 
6242 /**
6243  * How many bytes are required to serialize this bitmap (meant to be
6244  * compatible
6245  * with Java and Go versions)
6246  */
6247 size_t ra_portable_size_in_bytes(const roaring_array_t *ra);
6248 
6249 /**
6250  * return true if it contains at least one run container.
6251  */
6252 bool ra_has_run_container(const roaring_array_t *ra);
6253 
6254 /**
6255  * Size of the header when serializing (meant to be compatible
6256  * with Java and Go versions)
6257  */
6258 uint32_t ra_portable_header_size(const roaring_array_t *ra);
6259 
6260 /**
6261  * If the container at the index i is share, unshare it (creating a local
6262  * copy if needed).
6263  */
ra_unshare_container_at_index(roaring_array_t * ra,uint16_t i)6264 static inline void ra_unshare_container_at_index(roaring_array_t *ra,
6265                                                  uint16_t i) {
6266     assert(i < ra->size);
6267     ra->containers[i] =
6268         get_writable_copy_if_shared(ra->containers[i], &ra->typecodes[i]);
6269 }
6270 
6271 /**
6272  * remove at index i, sliding over all entries after i
6273  */
6274 void ra_remove_at_index(roaring_array_t *ra, int32_t i);
6275 
6276 
6277 /**
6278 * clears all containers, sets the size at 0 and shrinks the memory usage.
6279 */
6280 void ra_reset(roaring_array_t *ra);
6281 
6282 /**
6283  * remove at index i, sliding over all entries after i. Free removed container.
6284  */
6285 void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i);
6286 
6287 /**
6288  * remove a chunk of indices, sliding over entries after it
6289  */
6290 // void ra_remove_index_range(roaring_array_t *ra, int32_t begin, int32_t end);
6291 
6292 // used in inplace andNot only, to slide left the containers from
6293 // the mutated RoaringBitmap that are after the largest container of
6294 // the argument RoaringBitmap.  It is followed by a call to resize.
6295 //
6296 void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end,
6297                    uint32_t new_begin);
6298 
6299 /**
6300  * Shifts rightmost $count containers to the left (distance < 0) or
6301  * to the right (distance > 0).
6302  * Allocates memory if necessary.
6303  * This function doesn't free or create new containers.
6304  * Caller is responsible for that.
6305  */
6306 void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance);
6307 
6308 #ifdef __cplusplus
6309 }
6310 #endif
6311 
6312 #endif
6313 /* end file include/roaring/roaring_array.h */
6314 /* begin file include/roaring/roaring.h */
6315 /*
6316 An implementation of Roaring Bitmaps in C.
6317 */
6318 
6319 #ifndef ROARING_H
6320 #define ROARING_H
6321 #ifdef __cplusplus
6322 extern "C" {
6323 #endif
6324 
6325 #include <stdbool.h>
6326 
6327 typedef struct roaring_bitmap_s {
6328     roaring_array_t high_low_container;
6329 } roaring_bitmap_t;
6330 
6331 /**
6332  * Creates a new bitmap (initially empty)
6333  */
6334 roaring_bitmap_t *roaring_bitmap_create(void);
6335 
6336 /**
6337  * Add all the values between min (included) and max (excluded) that are at a
6338  * distance k*step from min.
6339 */
6340 roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max,
6341                                             uint32_t step);
6342 
6343 /**
6344  * Creates a new bitmap (initially empty) with a provided
6345  * container-storage capacity (it is a performance hint).
6346  */
6347 roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap);
6348 
6349 /**
6350  * Creates a new bitmap from a pointer of uint32_t integers
6351  */
6352 roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals);
6353 
6354 /*
6355  * Whether you want to use copy-on-write.
6356  * Saves memory and avoids copies but needs more care in a threaded context.
6357  * Most users should ignore this flag.
6358  * Note: if you do turn this flag to 'true', enabling COW,
6359  * then ensure that you do so for all of your bitmaps since
6360  * interactions between bitmaps with and without COW is unsafe.
6361  */
roaring_bitmap_get_copy_on_write(const roaring_bitmap_t * r)6362 static inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r) {
6363     return r->high_low_container.flags & ROARING_FLAG_COW;
6364 }
roaring_bitmap_set_copy_on_write(roaring_bitmap_t * r,bool cow)6365 static inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, bool cow) {
6366     if (cow) {
6367         r->high_low_container.flags |= ROARING_FLAG_COW;
6368     } else {
6369         r->high_low_container.flags &= ~ROARING_FLAG_COW;
6370     }
6371 }
6372 
6373 /**
6374  * Describe the inner structure of the bitmap.
6375  */
6376 void roaring_bitmap_printf_describe(const roaring_bitmap_t *ra);
6377 
6378 /**
6379  * Creates a new bitmap from a list of uint32_t integers
6380  */
6381 roaring_bitmap_t *roaring_bitmap_of(size_t n, ...);
6382 
6383 /**
6384  * Copies a  bitmap. This does memory allocation. The caller is responsible for
6385  * memory management.
6386  *
6387  */
6388 roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r);
6389 
6390 
6391 /**
6392  * Copies a  bitmap from src to dest. It is assumed that the pointer dest
6393  * is to an already allocated bitmap. The content of the dest bitmap is
6394  * freed/deleted.
6395  *
6396  * It might be preferable and simpler to call roaring_bitmap_copy except
6397  * that roaring_bitmap_overwrite can save on memory allocations.
6398  *
6399  */
6400 bool roaring_bitmap_overwrite(roaring_bitmap_t *dest,
6401                                      const roaring_bitmap_t *src);
6402 
6403 /**
6404  * Print the content of the bitmap.
6405  */
6406 void roaring_bitmap_printf(const roaring_bitmap_t *ra);
6407 
6408 /**
6409  * Computes the intersection between two bitmaps and returns new bitmap. The
6410  * caller is
6411  * responsible for memory management.
6412  *
6413  */
6414 roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1,
6415                                      const roaring_bitmap_t *x2);
6416 
6417 /**
6418  * Computes the size of the intersection between two bitmaps.
6419  *
6420  */
6421 uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1,
6422                                         const roaring_bitmap_t *x2);
6423 
6424 
6425 /**
6426  * Check whether two bitmaps intersect.
6427  *
6428  */
6429 bool roaring_bitmap_intersect(const roaring_bitmap_t *x1,
6430                                      const roaring_bitmap_t *x2);
6431 
6432 /**
6433  * Computes the Jaccard index between two bitmaps. (Also known as the Tanimoto
6434  * distance,
6435  * or the Jaccard similarity coefficient)
6436  *
6437  * The Jaccard index is undefined if both bitmaps are empty.
6438  *
6439  */
6440 double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1,
6441                                     const roaring_bitmap_t *x2);
6442 
6443 /**
6444  * Computes the size of the union between two bitmaps.
6445  *
6446  */
6447 uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1,
6448                                        const roaring_bitmap_t *x2);
6449 
6450 /**
6451  * Computes the size of the difference (andnot) between two bitmaps.
6452  *
6453  */
6454 uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1,
6455                                            const roaring_bitmap_t *x2);
6456 
6457 /**
6458  * Computes the size of the symmetric difference (andnot) between two bitmaps.
6459  *
6460  */
6461 uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1,
6462                                         const roaring_bitmap_t *x2);
6463 
6464 /**
6465  * Inplace version modifies x1, x1 == x2 is allowed
6466  */
6467 void roaring_bitmap_and_inplace(roaring_bitmap_t *x1,
6468                                 const roaring_bitmap_t *x2);
6469 
6470 /**
6471  * Computes the union between two bitmaps and returns new bitmap. The caller is
6472  * responsible for memory management.
6473  */
6474 roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1,
6475                                     const roaring_bitmap_t *x2);
6476 
6477 /**
6478  * Inplace version of roaring_bitmap_or, modifies x1. TDOO: decide whether x1 ==
6479  *x2 ok
6480  *
6481  */
6482 void roaring_bitmap_or_inplace(roaring_bitmap_t *x1,
6483                                const roaring_bitmap_t *x2);
6484 
6485 /**
6486  * Compute the union of 'number' bitmaps. See also roaring_bitmap_or_many_heap.
6487  * Caller is responsible for freeing the
6488  * result.
6489  *
6490  */
6491 roaring_bitmap_t *roaring_bitmap_or_many(size_t number,
6492                                          const roaring_bitmap_t **x);
6493 
6494 /**
6495  * Compute the union of 'number' bitmaps using a heap. This can
6496  * sometimes be faster than roaring_bitmap_or_many which uses
6497  * a naive algorithm. Caller is responsible for freeing the
6498  * result.
6499  *
6500  */
6501 roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number,
6502                                               const roaring_bitmap_t **x);
6503 
6504 /**
6505  * Computes the symmetric difference (xor) between two bitmaps
6506  * and returns new bitmap. The caller is responsible for memory management.
6507  */
6508 roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1,
6509                                      const roaring_bitmap_t *x2);
6510 
6511 /**
6512  * Inplace version of roaring_bitmap_xor, modifies x1. x1 != x2.
6513  *
6514  */
6515 void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1,
6516                                 const roaring_bitmap_t *x2);
6517 
6518 /**
6519  * Compute the xor of 'number' bitmaps.
6520  * Caller is responsible for freeing the
6521  * result.
6522  *
6523  */
6524 roaring_bitmap_t *roaring_bitmap_xor_many(size_t number,
6525                                           const roaring_bitmap_t **x);
6526 
6527 /**
6528  * Computes the  difference (andnot) between two bitmaps
6529  * and returns new bitmap. The caller is responsible for memory management.
6530  */
6531 roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1,
6532                                         const roaring_bitmap_t *x2);
6533 
6534 /**
6535  * Inplace version of roaring_bitmap_andnot, modifies x1. x1 != x2.
6536  *
6537  */
6538 void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1,
6539                                    const roaring_bitmap_t *x2);
6540 
6541 /**
6542  * TODO: consider implementing:
6543  * Compute the xor of 'number' bitmaps using a heap. This can
6544  * sometimes be faster than roaring_bitmap_xor_many which uses
6545  * a naive algorithm. Caller is responsible for freeing the
6546  * result.
6547  *
6548  * roaring_bitmap_t *roaring_bitmap_xor_many_heap(uint32_t number,
6549  *                                              const roaring_bitmap_t **x);
6550  */
6551 
6552 /**
6553  * Frees the memory.
6554  */
6555 void roaring_bitmap_free(const roaring_bitmap_t *r);
6556 
6557 /**
6558  * Add value n_args from pointer vals, faster than repeatedly calling
6559  * roaring_bitmap_add
6560  *
6561  */
6562 void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args,
6563                              const uint32_t *vals);
6564 
6565 /**
6566  * Add value x
6567  *
6568  */
6569 void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t x);
6570 
6571 /**
6572  * Add value x
6573  * Returns true if a new value was added, false if the value was already existing.
6574  */
6575 bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t x);
6576 
6577 /**
6578  * Add all values in range [min, max]
6579  */
6580 void roaring_bitmap_add_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max);
6581 
6582 /**
6583  * Add all values in range [min, max)
6584  */
roaring_bitmap_add_range(roaring_bitmap_t * ra,uint64_t min,uint64_t max)6585 static inline void roaring_bitmap_add_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max) {
6586   if(max == min) return;
6587   roaring_bitmap_add_range_closed(ra, (uint32_t)min, (uint32_t)(max - 1));
6588 }
6589 
6590 /**
6591  * Remove value x
6592  *
6593  */
6594 void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t x);
6595 
6596 /** Remove all values in range [min, max] */
6597 void roaring_bitmap_remove_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max);
6598 
6599 /** Remove all values in range [min, max) */
roaring_bitmap_remove_range(roaring_bitmap_t * ra,uint64_t min,uint64_t max)6600 static inline void roaring_bitmap_remove_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max) {
6601     if(max == min) return;
6602     roaring_bitmap_remove_range_closed(ra, (uint32_t)min, (uint32_t)(max - 1));
6603 }
6604 
6605 /** Remove multiple values */
6606 void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args,
6607                                 const uint32_t *vals);
6608 
6609 /**
6610  * Remove value x
6611  * Returns true if a new value was removed, false if the value was not existing.
6612  */
6613 bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t x);
6614 
6615 /**
6616  * Check if value x is present
6617  */
roaring_bitmap_contains(const roaring_bitmap_t * r,uint32_t val)6618 static inline bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) {
6619     const uint16_t hb = val >> 16;
6620     /*
6621      * the next function call involves a binary search and lots of branching.
6622      */
6623     int32_t i = ra_get_index(&r->high_low_container, hb);
6624     if (i < 0) return false;
6625 
6626     uint8_t typecode;
6627     // next call ought to be cheap
6628     void *container =
6629         ra_get_container_at_index(&r->high_low_container, i, &typecode);
6630     // rest might be a tad expensive, possibly involving another round of binary search
6631     return container_contains(container, val & 0xFFFF, typecode);
6632 }
6633 
6634 /**
6635  * Check whether a range of values from range_start (included) to range_end (excluded) is present
6636  */
6637 bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end);
6638 
6639 /**
6640  * Get the cardinality of the bitmap (number of elements).
6641  */
6642 uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *ra);
6643 
6644 /**
6645  * Returns the number of elements in the range [range_start, range_end).
6646  */
6647 uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *ra,
6648                                           uint64_t range_start, uint64_t range_end);
6649 
6650 /**
6651 * Returns true if the bitmap is empty (cardinality is zero).
6652 */
6653 bool roaring_bitmap_is_empty(const roaring_bitmap_t *ra);
6654 
6655 
6656 /**
6657 * Empties the bitmap
6658 */
6659 void roaring_bitmap_clear(roaring_bitmap_t *ra);
6660 
6661 /**
6662  * Convert the bitmap to an array. Write the output to "ans",
6663  * caller is responsible to ensure that there is enough memory
6664  * allocated
6665  * (e.g., ans = malloc(roaring_bitmap_get_cardinality(mybitmap)
6666  *   * sizeof(uint32_t))
6667  */
6668 void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *ra, uint32_t *ans);
6669 
6670 
6671 /**
6672  * Convert the bitmap to an array from "offset" by "limit". Write the output to "ans".
6673  * so, you can get data in paging.
6674  * caller is responsible to ensure that there is enough memory
6675  * allocated
6676  * (e.g., ans = malloc(roaring_bitmap_get_cardinality(limit)
6677  *   * sizeof(uint32_t))
6678  * Return false in case of failure (e.g., insufficient memory)
6679  */
6680 bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *ra, size_t offset, size_t limit, uint32_t *ans);
6681 
6682 /**
6683  *  Remove run-length encoding even when it is more space efficient
6684  *  return whether a change was applied
6685  */
6686 bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r);
6687 
6688 /** convert array and bitmap containers to run containers when it is more
6689  * efficient;
6690  * also convert from run containers when more space efficient.  Returns
6691  * true if the result has at least one run container.
6692  * Additional savings might be possible by calling shrinkToFit().
6693  */
6694 bool roaring_bitmap_run_optimize(roaring_bitmap_t *r);
6695 
6696 /**
6697  * If needed, reallocate memory to shrink the memory usage. Returns
6698  * the number of bytes saved.
6699 */
6700 size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r);
6701 
6702 /**
6703 * write the bitmap to an output pointer, this output buffer should refer to
6704 * at least roaring_bitmap_size_in_bytes(ra) allocated bytes.
6705 *
6706 * see roaring_bitmap_portable_serialize if you want a format that's compatible
6707 * with Java and Go implementations
6708 *
6709 * this format has the benefit of being sometimes more space efficient than
6710 * roaring_bitmap_portable_serialize
6711 * e.g., when the data is sparse.
6712 *
6713 * Returns how many bytes were written which should be
6714 * roaring_bitmap_size_in_bytes(ra).
6715 */
6716 size_t roaring_bitmap_serialize(const roaring_bitmap_t *ra, char *buf);
6717 
6718 /**  use with roaring_bitmap_serialize
6719 * see roaring_bitmap_portable_deserialize if you want a format that's
6720 * compatible with Java and Go implementations
6721 */
6722 roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf);
6723 
6724 /**
6725  * How many bytes are required to serialize this bitmap (NOT compatible
6726  * with Java and Go versions)
6727  */
6728 size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *ra);
6729 
6730 /**
6731  * read a bitmap from a serialized version. This is meant to be compatible with
6732  * the Java and Go versions. See format specification at
6733  * https://github.com/RoaringBitmap/RoaringFormatSpec
6734  * In case of failure, a null pointer is returned.
6735  * This function is unsafe in the sense that if there is no valid serialized
6736  * bitmap at the pointer, then many bytes could be read, possibly causing a buffer
6737  * overflow. For a safer approach,
6738  * call roaring_bitmap_portable_deserialize_safe.
6739  */
6740 roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
6741 
6742 /**
6743  * read a bitmap from a serialized version in a safe manner (reading up to maxbytes).
6744  * This is meant to be compatible with
6745  * the Java and Go versions. See format specification at
6746  * https://github.com/RoaringBitmap/RoaringFormatSpec
6747  * In case of failure, a null pointer is returned.
6748  */
6749 roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes);
6750 
6751 /**
6752  * Check how many bytes would be read (up to maxbytes) at this pointer if there
6753  * is a bitmap, returns zero if there is no valid bitmap.
6754  * This is meant to be compatible with
6755  * the Java and Go versions. See format specification at
6756  * https://github.com/RoaringBitmap/RoaringFormatSpec
6757  */
6758 size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes);
6759 
6760 
6761 /**
6762  * How many bytes are required to serialize this bitmap (meant to be compatible
6763  * with Java and Go versions).  See format specification at
6764  * https://github.com/RoaringBitmap/RoaringFormatSpec
6765  */
6766 size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *ra);
6767 
6768 /**
6769  * write a bitmap to a char buffer.  The output buffer should refer to at least
6770  *  roaring_bitmap_portable_size_in_bytes(ra) bytes of allocated memory.
6771  * This is meant to be compatible with
6772  * the
6773  * Java and Go versions. Returns how many bytes were written which should be
6774  * roaring_bitmap_portable_size_in_bytes(ra).  See format specification at
6775  * https://github.com/RoaringBitmap/RoaringFormatSpec
6776  */
6777 size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *ra, char *buf);
6778 
6779 /*
6780  * "Frozen" serialization format imitates memory layout of roaring_bitmap_t.
6781  * Deserialized bitmap is a constant view of the underlying buffer.
6782  * This significantly reduces amount of allocations and copying required during
6783  * deserialization.
6784  * It can be used with memory mapped files.
6785  * Example can be found in benchmarks/frozen_benchmark.c
6786  *
6787  *         [#####] const roaring_bitmap_t *
6788  *          | | |
6789  *     +----+ | +-+
6790  *     |      |   |
6791  * [#####################################] underlying buffer
6792  *
6793  * Note that because frozen serialization format imitates C memory layout
6794  * of roaring_bitmap_t, it is not fixed. It is different on big/little endian
6795  * platforms and can be changed in future.
6796  */
6797 
6798 /**
6799  * Returns number of bytes required to serialize bitmap using frozen format.
6800  */
6801 size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *ra);
6802 
6803 /**
6804  * Serializes bitmap using frozen format.
6805  * Buffer size must be at least roaring_bitmap_frozen_size_in_bytes().
6806  */
6807 void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *ra, char *buf);
6808 
6809 /**
6810  * Creates constant bitmap that is a view of a given buffer.
6811  * Buffer must contain data previously written by roaring_bitmap_frozen_serialize(),
6812  * and additionally its beginning must be aligned by 32 bytes.
6813  * Length must be equal exactly to roaring_bitmap_frozen_size_in_bytes().
6814  *
6815  * On error, NULL is returned.
6816  *
6817  * Bitmap returned by this function can be used in all readonly contexts.
6818  * Bitmap must be freed as usual, by calling roaring_bitmap_free().
6819  * Underlying buffer must not be freed or modified while it backs any bitmaps.
6820  */
6821 const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf, size_t length);
6822 
6823 
6824 /**
6825  * Iterate over the bitmap elements. The function iterator is called once for
6826  *  all the values with ptr (can be NULL) as the second parameter of each call.
6827  *
6828  *  roaring_iterator is simply a pointer to a function that returns bool
6829  *  (true means that the iteration should continue while false means that it
6830  * should stop),
6831  *  and takes (uint32_t,void*) as inputs.
6832  *
6833  *  Returns true if the roaring_iterator returned true throughout (so that
6834  *  all data points were necessarily visited).
6835  */
6836 bool roaring_iterate(const roaring_bitmap_t *ra, roaring_iterator iterator,
6837                      void *ptr);
6838 
6839 bool roaring_iterate64(const roaring_bitmap_t *ra, roaring_iterator64 iterator,
6840                        uint64_t high_bits, void *ptr);
6841 
6842 /**
6843  * Return true if the two bitmaps contain the same elements.
6844  */
6845 bool roaring_bitmap_equals(const roaring_bitmap_t *ra1,
6846                            const roaring_bitmap_t *ra2);
6847 
6848 /**
6849  * Return true if all the elements of ra1 are also in ra2.
6850  */
6851 bool roaring_bitmap_is_subset(const roaring_bitmap_t *ra1,
6852                               const roaring_bitmap_t *ra2);
6853 
6854 /**
6855  * Return true if all the elements of ra1 are also in ra2 and ra2 is strictly
6856  * greater
6857  * than ra1.
6858  */
6859 bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *ra1,
6860                                             const roaring_bitmap_t *ra2);
6861 
6862 /**
6863  * (For expert users who seek high performance.)
6864  *
6865  * Computes the union between two bitmaps and returns new bitmap. The caller is
6866  * responsible for memory management.
6867  *
6868  * The lazy version defers some computations such as the maintenance of the
6869  * cardinality counts. Thus you need
6870  * to call roaring_bitmap_repair_after_lazy after executing "lazy" computations.
6871  * It is safe to repeatedly call roaring_bitmap_lazy_or_inplace on the result.
6872  * The bitsetconversion conversion is a flag which determines
6873  * whether container-container operations force a bitset conversion.
6874  **/
6875 roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1,
6876                                          const roaring_bitmap_t *x2,
6877                                          const bool bitsetconversion);
6878 
6879 /**
6880  * (For expert users who seek high performance.)
6881  * Inplace version of roaring_bitmap_lazy_or, modifies x1
6882  * The bitsetconversion conversion is a flag which determines
6883  * whether container-container operations force a bitset conversion.
6884  */
6885 void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1,
6886                                     const roaring_bitmap_t *x2,
6887                                     const bool bitsetconversion);
6888 
6889 /**
6890  * (For expert users who seek high performance.)
6891  *
6892  * Execute maintenance operations on a bitmap created from
6893  * roaring_bitmap_lazy_or
6894  * or modified with roaring_bitmap_lazy_or_inplace.
6895  */
6896 void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *x1);
6897 
6898 /**
6899  * Computes the symmetric difference between two bitmaps and returns new bitmap.
6900  *The caller is
6901  * responsible for memory management.
6902  *
6903  * The lazy version defers some computations such as the maintenance of the
6904  * cardinality counts. Thus you need
6905  * to call roaring_bitmap_repair_after_lazy after executing "lazy" computations.
6906  * It is safe to repeatedly call roaring_bitmap_lazy_xor_inplace on the result.
6907  *
6908  */
6909 roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1,
6910                                           const roaring_bitmap_t *x2);
6911 
6912 /**
6913  * (For expert users who seek high performance.)
6914  * Inplace version of roaring_bitmap_lazy_xor, modifies x1. x1 != x2
6915  *
6916  */
6917 void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1,
6918                                      const roaring_bitmap_t *x2);
6919 
6920 /**
6921  * compute the negation of the roaring bitmap within a specified
6922  * interval: [range_start, range_end). The number of negated values is
6923  * range_end - range_start.
6924  * Areas outside the range are passed through unchanged.
6925  */
6926 
6927 roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1,
6928                                       uint64_t range_start, uint64_t range_end);
6929 
6930 /**
6931  * compute (in place) the negation of the roaring bitmap within a specified
6932  * interval: [range_start, range_end). The number of negated values is
6933  * range_end - range_start.
6934  * Areas outside the range are passed through unchanged.
6935  */
6936 
6937 void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start,
6938                                  uint64_t range_end);
6939 
6940 /**
6941  * Selects the element at index 'rank' where the smallest element is at index 0.
6942  * If the size of the roaring bitmap is strictly greater than rank, then this
6943    function returns true and sets element to the element of given rank.
6944    Otherwise, it returns false.
6945  */
6946 bool roaring_bitmap_select(const roaring_bitmap_t *ra, uint32_t rank,
6947                            uint32_t *element);
6948 /**
6949 * roaring_bitmap_rank returns the number of integers that are smaller or equal
6950 * to x. Thus if x is the first element, this function will return 1. If
6951 * x is smaller than the smallest element, this function will return 0.
6952 *
6953 * The indexing convention differs between roaring_bitmap_select and
6954 * roaring_bitmap_rank: roaring_bitmap_select refers to the smallest value
6955 * as having index 0, whereas roaring_bitmap_rank returns 1 when ranking
6956 * the smallest value.
6957 */
6958 uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x);
6959 
6960 /**
6961 * roaring_bitmap_smallest returns the smallest value in the set.
6962 * Returns UINT32_MAX if the set is empty.
6963 */
6964 uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm);
6965 
6966 /**
6967 * roaring_bitmap_smallest returns the greatest value in the set.
6968 * Returns 0 if the set is empty.
6969 */
6970 uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm);
6971 
6972 /**
6973 *  (For advanced users.)
6974 * Collect statistics about the bitmap, see roaring_types.h for
6975 * a description of roaring_statistics_t
6976 */
6977 void roaring_bitmap_statistics(const roaring_bitmap_t *ra,
6978                                roaring_statistics_t *stat);
6979 
6980 /*********************
6981 * What follows is code use to iterate through values in a roaring bitmap
6982 
6983 roaring_bitmap_t *ra =...
6984 roaring_uint32_iterator_t   i;
6985 roaring_create_iterator(ra, &i);
6986 while(i.has_value) {
6987   printf("value = %d\n", i.current_value);
6988   roaring_advance_uint32_iterator(&i);
6989 }
6990 
6991 Obviously, if you modify the underlying bitmap, the iterator
6992 becomes invalid. So don't.
6993 */
6994 
6995 typedef struct roaring_uint32_iterator_s {
6996     const roaring_bitmap_t *parent;  // owner
6997     int32_t container_index;         // point to the current container index
6998     int32_t in_container_index;  // for bitset and array container, this is out
6999                                  // index
7000     int32_t run_index;           // for run container, this points  at the run
7001 
7002     uint32_t current_value;
7003     bool has_value;
7004 
7005     const void
7006         *container;  // should be:
7007                      // parent->high_low_container.containers[container_index];
7008     uint8_t typecode;  // should be:
7009                        // parent->high_low_container.typecodes[container_index];
7010     uint32_t highbits;  // should be:
7011                         // parent->high_low_container.keys[container_index]) <<
7012                         // 16;
7013 
7014 } roaring_uint32_iterator_t;
7015 
7016 /**
7017 * Initialize an iterator object that can be used to iterate through the
7018 * values. If there is a  value, then this iterator points to the first value
7019 * and it->has_value is true. The value is in it->current_value.
7020 */
7021 void roaring_init_iterator(const roaring_bitmap_t *ra,
7022                            roaring_uint32_iterator_t *newit);
7023 
7024 /**
7025 * Initialize an iterator object that can be used to iterate through the
7026 * values. If there is a value, then this iterator points to the last value
7027 * and it->has_value is true. The value is in it->current_value.
7028 */
7029 void roaring_init_iterator_last(const roaring_bitmap_t *ra,
7030                                 roaring_uint32_iterator_t *newit);
7031 
7032 /**
7033 * Create an iterator object that can be used to iterate through the
7034 * values. Caller is responsible for calling roaring_free_iterator.
7035 * The iterator is initialized. If there is a  value, then this iterator
7036 * points to the first value and it->has_value is true.
7037 * The value is in it->current_value.
7038 *
7039 * This function calls roaring_init_iterator.
7040 */
7041 roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *ra);
7042 
7043 /**
7044 * Advance the iterator. If there is a new value, then it->has_value is true.
7045 * The new value is in it->current_value. Values are traversed in increasing
7046 * orders. For convenience, returns it->has_value.
7047 */
7048 bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it);
7049 
7050 /**
7051 * Decrement the iterator. If there is a new value, then it->has_value is true.
7052 * The new value is in it->current_value. Values are traversed in decreasing
7053 * orders. For convenience, returns it->has_value.
7054 */
7055 bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it);
7056 
7057 /**
7058 * Move the iterator to the first value >= val. If there is a such a value, then it->has_value is true.
7059 * The new value is in it->current_value. For convenience, returns it->has_value.
7060 */
7061 bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) ;
7062 /**
7063 * Creates a copy of an iterator.
7064 * Caller must free it.
7065 */
7066 roaring_uint32_iterator_t *roaring_copy_uint32_iterator(
7067     const roaring_uint32_iterator_t *it);
7068 
7069 /**
7070 * Free memory following roaring_create_iterator
7071 */
7072 void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it);
7073 
7074 /*
7075  * Reads next ${count} values from iterator into user-supplied ${buf}.
7076  * Returns the number of read elements.
7077  * This number can be smaller than ${count}, which means that iterator is drained.
7078  *
7079  * This function satisfies semantics of iteration and can be used together with
7080  * other iterator functions.
7081  *  - first value is copied from ${it}->current_value
7082  *  - after function returns, iterator is positioned at the next element
7083  */
7084 uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count);
7085 
7086 #ifdef __cplusplus
7087 }
7088 #endif
7089 
7090 #endif
7091 /* end file include/roaring/roaring.h */
7092