1 /*-------------------------------------------------------------------------
2  *
3  * bitmapset.c
4  *	  PostgreSQL generic bitmap set package
5  *
6  * A bitmap set can represent any set of nonnegative integers, although
7  * it is mainly intended for sets where the maximum value is not large,
8  * say at most a few hundred.  By convention, a NULL pointer is always
9  * accepted by all operations to represent the empty set.  (But beware
10  * that this is not the only representation of the empty set.  Use
11  * bms_is_empty() in preference to testing for NULL.)
12  *
13  *
14  * Copyright (c) 2003-2019, PostgreSQL Global Development Group
15  *
16  * IDENTIFICATION
17  *	  src/backend/nodes/bitmapset.c
18  *
19  *-------------------------------------------------------------------------
20  */
21 #include "postgres.h"
22 
23 #include "nodes/bitmapset.h"
24 #include "nodes/pg_list.h"
25 #include "port/pg_bitutils.h"
26 #include "utils/hashutils.h"
27 
28 
29 #define WORDNUM(x)	((x) / BITS_PER_BITMAPWORD)
30 #define BITNUM(x)	((x) % BITS_PER_BITMAPWORD)
31 
32 #define BITMAPSET_SIZE(nwords)	\
33 	(offsetof(Bitmapset, words) + (nwords) * sizeof(bitmapword))
34 
35 /*----------
36  * This is a well-known cute trick for isolating the rightmost one-bit
37  * in a word.  It assumes two's complement arithmetic.  Consider any
38  * nonzero value, and focus attention on the rightmost one.  The value is
39  * then something like
40  *				xxxxxx10000
41  * where x's are unspecified bits.  The two's complement negative is formed
42  * by inverting all the bits and adding one.  Inversion gives
43  *				yyyyyy01111
44  * where each y is the inverse of the corresponding x.  Incrementing gives
45  *				yyyyyy10000
46  * and then ANDing with the original value gives
47  *				00000010000
48  * This works for all cases except original value = zero, where of course
49  * we get zero.
50  *----------
51  */
52 #define RIGHTMOST_ONE(x) ((signedbitmapword) (x) & -((signedbitmapword) (x)))
53 
54 #define HAS_MULTIPLE_ONES(x)	((bitmapword) RIGHTMOST_ONE(x) != (x))
55 
56 /* Select appropriate bit-twiddling functions for bitmap word size */
57 #if BITS_PER_BITMAPWORD == 32
58 #define bmw_leftmost_one_pos(w)		pg_leftmost_one_pos32(w)
59 #define bmw_rightmost_one_pos(w)	pg_rightmost_one_pos32(w)
60 #define bmw_popcount(w)				pg_popcount32(w)
61 #elif BITS_PER_BITMAPWORD == 64
62 #define bmw_leftmost_one_pos(w)		pg_leftmost_one_pos64(w)
63 #define bmw_rightmost_one_pos(w)	pg_rightmost_one_pos64(w)
64 #define bmw_popcount(w)				pg_popcount64(w)
65 #else
66 #error "invalid BITS_PER_BITMAPWORD"
67 #endif
68 
69 
70 /*
71  * bms_copy - make a palloc'd copy of a bitmapset
72  */
73 Bitmapset *
bms_copy(const Bitmapset * a)74 bms_copy(const Bitmapset *a)
75 {
76 	Bitmapset  *result;
77 	size_t		size;
78 
79 	if (a == NULL)
80 		return NULL;
81 	size = BITMAPSET_SIZE(a->nwords);
82 	result = (Bitmapset *) palloc(size);
83 	memcpy(result, a, size);
84 	return result;
85 }
86 
87 /*
88  * bms_equal - are two bitmapsets equal?
89  *
90  * This is logical not physical equality; in particular, a NULL pointer will
91  * be reported as equal to a palloc'd value containing no members.
92  */
93 bool
bms_equal(const Bitmapset * a,const Bitmapset * b)94 bms_equal(const Bitmapset *a, const Bitmapset *b)
95 {
96 	const Bitmapset *shorter;
97 	const Bitmapset *longer;
98 	int			shortlen;
99 	int			longlen;
100 	int			i;
101 
102 	/* Handle cases where either input is NULL */
103 	if (a == NULL)
104 	{
105 		if (b == NULL)
106 			return true;
107 		return bms_is_empty(b);
108 	}
109 	else if (b == NULL)
110 		return bms_is_empty(a);
111 	/* Identify shorter and longer input */
112 	if (a->nwords <= b->nwords)
113 	{
114 		shorter = a;
115 		longer = b;
116 	}
117 	else
118 	{
119 		shorter = b;
120 		longer = a;
121 	}
122 	/* And process */
123 	shortlen = shorter->nwords;
124 	for (i = 0; i < shortlen; i++)
125 	{
126 		if (shorter->words[i] != longer->words[i])
127 			return false;
128 	}
129 	longlen = longer->nwords;
130 	for (; i < longlen; i++)
131 	{
132 		if (longer->words[i] != 0)
133 			return false;
134 	}
135 	return true;
136 }
137 
138 /*
139  * bms_compare - qsort-style comparator for bitmapsets
140  *
141  * This guarantees to report values as equal iff bms_equal would say they are
142  * equal.  Otherwise, the highest-numbered bit that is set in one value but
143  * not the other determines the result.  (This rule means that, for example,
144  * {6} is greater than {5}, which seems plausible.)
145  */
146 int
bms_compare(const Bitmapset * a,const Bitmapset * b)147 bms_compare(const Bitmapset *a, const Bitmapset *b)
148 {
149 	int			shortlen;
150 	int			i;
151 
152 	/* Handle cases where either input is NULL */
153 	if (a == NULL)
154 		return bms_is_empty(b) ? 0 : -1;
155 	else if (b == NULL)
156 		return bms_is_empty(a) ? 0 : +1;
157 	/* Handle cases where one input is longer than the other */
158 	shortlen = Min(a->nwords, b->nwords);
159 	for (i = shortlen; i < a->nwords; i++)
160 	{
161 		if (a->words[i] != 0)
162 			return +1;
163 	}
164 	for (i = shortlen; i < b->nwords; i++)
165 	{
166 		if (b->words[i] != 0)
167 			return -1;
168 	}
169 	/* Process words in common */
170 	i = shortlen;
171 	while (--i >= 0)
172 	{
173 		bitmapword	aw = a->words[i];
174 		bitmapword	bw = b->words[i];
175 
176 		if (aw != bw)
177 			return (aw > bw) ? +1 : -1;
178 	}
179 	return 0;
180 }
181 
182 /*
183  * bms_make_singleton - build a bitmapset containing a single member
184  */
185 Bitmapset *
bms_make_singleton(int x)186 bms_make_singleton(int x)
187 {
188 	Bitmapset  *result;
189 	int			wordnum,
190 				bitnum;
191 
192 	if (x < 0)
193 		elog(ERROR, "negative bitmapset member not allowed");
194 	wordnum = WORDNUM(x);
195 	bitnum = BITNUM(x);
196 	result = (Bitmapset *) palloc0(BITMAPSET_SIZE(wordnum + 1));
197 	result->nwords = wordnum + 1;
198 	result->words[wordnum] = ((bitmapword) 1 << bitnum);
199 	return result;
200 }
201 
202 /*
203  * bms_free - free a bitmapset
204  *
205  * Same as pfree except for allowing NULL input
206  */
207 void
bms_free(Bitmapset * a)208 bms_free(Bitmapset *a)
209 {
210 	if (a)
211 		pfree(a);
212 }
213 
214 
215 /*
216  * These operations all make a freshly palloc'd result,
217  * leaving their inputs untouched
218  */
219 
220 
221 /*
222  * bms_union - set union
223  */
224 Bitmapset *
bms_union(const Bitmapset * a,const Bitmapset * b)225 bms_union(const Bitmapset *a, const Bitmapset *b)
226 {
227 	Bitmapset  *result;
228 	const Bitmapset *other;
229 	int			otherlen;
230 	int			i;
231 
232 	/* Handle cases where either input is NULL */
233 	if (a == NULL)
234 		return bms_copy(b);
235 	if (b == NULL)
236 		return bms_copy(a);
237 	/* Identify shorter and longer input; copy the longer one */
238 	if (a->nwords <= b->nwords)
239 	{
240 		result = bms_copy(b);
241 		other = a;
242 	}
243 	else
244 	{
245 		result = bms_copy(a);
246 		other = b;
247 	}
248 	/* And union the shorter input into the result */
249 	otherlen = other->nwords;
250 	for (i = 0; i < otherlen; i++)
251 		result->words[i] |= other->words[i];
252 	return result;
253 }
254 
255 /*
256  * bms_intersect - set intersection
257  */
258 Bitmapset *
bms_intersect(const Bitmapset * a,const Bitmapset * b)259 bms_intersect(const Bitmapset *a, const Bitmapset *b)
260 {
261 	Bitmapset  *result;
262 	const Bitmapset *other;
263 	int			resultlen;
264 	int			i;
265 
266 	/* Handle cases where either input is NULL */
267 	if (a == NULL || b == NULL)
268 		return NULL;
269 	/* Identify shorter and longer input; copy the shorter one */
270 	if (a->nwords <= b->nwords)
271 	{
272 		result = bms_copy(a);
273 		other = b;
274 	}
275 	else
276 	{
277 		result = bms_copy(b);
278 		other = a;
279 	}
280 	/* And intersect the longer input with the result */
281 	resultlen = result->nwords;
282 	for (i = 0; i < resultlen; i++)
283 		result->words[i] &= other->words[i];
284 	return result;
285 }
286 
287 /*
288  * bms_difference - set difference (ie, A without members of B)
289  */
290 Bitmapset *
bms_difference(const Bitmapset * a,const Bitmapset * b)291 bms_difference(const Bitmapset *a, const Bitmapset *b)
292 {
293 	Bitmapset  *result;
294 	int			shortlen;
295 	int			i;
296 
297 	/* Handle cases where either input is NULL */
298 	if (a == NULL)
299 		return NULL;
300 	if (b == NULL)
301 		return bms_copy(a);
302 	/* Copy the left input */
303 	result = bms_copy(a);
304 	/* And remove b's bits from result */
305 	shortlen = Min(a->nwords, b->nwords);
306 	for (i = 0; i < shortlen; i++)
307 		result->words[i] &= ~b->words[i];
308 	return result;
309 }
310 
311 /*
312  * bms_is_subset - is A a subset of B?
313  */
314 bool
bms_is_subset(const Bitmapset * a,const Bitmapset * b)315 bms_is_subset(const Bitmapset *a, const Bitmapset *b)
316 {
317 	int			shortlen;
318 	int			longlen;
319 	int			i;
320 
321 	/* Handle cases where either input is NULL */
322 	if (a == NULL)
323 		return true;			/* empty set is a subset of anything */
324 	if (b == NULL)
325 		return bms_is_empty(a);
326 	/* Check common words */
327 	shortlen = Min(a->nwords, b->nwords);
328 	for (i = 0; i < shortlen; i++)
329 	{
330 		if ((a->words[i] & ~b->words[i]) != 0)
331 			return false;
332 	}
333 	/* Check extra words */
334 	if (a->nwords > b->nwords)
335 	{
336 		longlen = a->nwords;
337 		for (; i < longlen; i++)
338 		{
339 			if (a->words[i] != 0)
340 				return false;
341 		}
342 	}
343 	return true;
344 }
345 
346 /*
347  * bms_subset_compare - compare A and B for equality/subset relationships
348  *
349  * This is more efficient than testing bms_is_subset in both directions.
350  */
351 BMS_Comparison
bms_subset_compare(const Bitmapset * a,const Bitmapset * b)352 bms_subset_compare(const Bitmapset *a, const Bitmapset *b)
353 {
354 	BMS_Comparison result;
355 	int			shortlen;
356 	int			longlen;
357 	int			i;
358 
359 	/* Handle cases where either input is NULL */
360 	if (a == NULL)
361 	{
362 		if (b == NULL)
363 			return BMS_EQUAL;
364 		return bms_is_empty(b) ? BMS_EQUAL : BMS_SUBSET1;
365 	}
366 	if (b == NULL)
367 		return bms_is_empty(a) ? BMS_EQUAL : BMS_SUBSET2;
368 	/* Check common words */
369 	result = BMS_EQUAL;			/* status so far */
370 	shortlen = Min(a->nwords, b->nwords);
371 	for (i = 0; i < shortlen; i++)
372 	{
373 		bitmapword	aword = a->words[i];
374 		bitmapword	bword = b->words[i];
375 
376 		if ((aword & ~bword) != 0)
377 		{
378 			/* a is not a subset of b */
379 			if (result == BMS_SUBSET1)
380 				return BMS_DIFFERENT;
381 			result = BMS_SUBSET2;
382 		}
383 		if ((bword & ~aword) != 0)
384 		{
385 			/* b is not a subset of a */
386 			if (result == BMS_SUBSET2)
387 				return BMS_DIFFERENT;
388 			result = BMS_SUBSET1;
389 		}
390 	}
391 	/* Check extra words */
392 	if (a->nwords > b->nwords)
393 	{
394 		longlen = a->nwords;
395 		for (; i < longlen; i++)
396 		{
397 			if (a->words[i] != 0)
398 			{
399 				/* a is not a subset of b */
400 				if (result == BMS_SUBSET1)
401 					return BMS_DIFFERENT;
402 				result = BMS_SUBSET2;
403 			}
404 		}
405 	}
406 	else if (a->nwords < b->nwords)
407 	{
408 		longlen = b->nwords;
409 		for (; i < longlen; i++)
410 		{
411 			if (b->words[i] != 0)
412 			{
413 				/* b is not a subset of a */
414 				if (result == BMS_SUBSET2)
415 					return BMS_DIFFERENT;
416 				result = BMS_SUBSET1;
417 			}
418 		}
419 	}
420 	return result;
421 }
422 
423 /*
424  * bms_is_member - is X a member of A?
425  */
426 bool
bms_is_member(int x,const Bitmapset * a)427 bms_is_member(int x, const Bitmapset *a)
428 {
429 	int			wordnum,
430 				bitnum;
431 
432 	/* XXX better to just return false for x<0 ? */
433 	if (x < 0)
434 		elog(ERROR, "negative bitmapset member not allowed");
435 	if (a == NULL)
436 		return false;
437 	wordnum = WORDNUM(x);
438 	bitnum = BITNUM(x);
439 	if (wordnum >= a->nwords)
440 		return false;
441 	if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
442 		return true;
443 	return false;
444 }
445 
446 /*
447  * bms_member_index
448  *		determine 0-based index of member x in the bitmap
449  *
450  * Returns (-1) when x is not a member.
451  */
452 int
bms_member_index(Bitmapset * a,int x)453 bms_member_index(Bitmapset *a, int x)
454 {
455 	int			i;
456 	int			bitnum;
457 	int			wordnum;
458 	int			result = 0;
459 	bitmapword	mask;
460 
461 	/* return -1 if not a member of the bitmap */
462 	if (!bms_is_member(x, a))
463 		return -1;
464 
465 	wordnum = WORDNUM(x);
466 	bitnum = BITNUM(x);
467 
468 	/* count bits in preceding words */
469 	for (i = 0; i < wordnum; i++)
470 	{
471 		bitmapword	w = a->words[i];
472 
473 		/* No need to count the bits in a zero word */
474 		if (w != 0)
475 			result += bmw_popcount(w);
476 	}
477 
478 	/*
479 	 * Now add bits of the last word, but only those before the item. We can
480 	 * do that by applying a mask and then using popcount again. To get
481 	 * 0-based index, we want to count only preceding bits, not the item
482 	 * itself, so we subtract 1.
483 	 */
484 	mask = ((bitmapword) 1 << bitnum) - 1;
485 	result += bmw_popcount(a->words[wordnum] & mask);
486 
487 	return result;
488 }
489 
490 /*
491  * bms_overlap - do sets overlap (ie, have a nonempty intersection)?
492  */
493 bool
bms_overlap(const Bitmapset * a,const Bitmapset * b)494 bms_overlap(const Bitmapset *a, const Bitmapset *b)
495 {
496 	int			shortlen;
497 	int			i;
498 
499 	/* Handle cases where either input is NULL */
500 	if (a == NULL || b == NULL)
501 		return false;
502 	/* Check words in common */
503 	shortlen = Min(a->nwords, b->nwords);
504 	for (i = 0; i < shortlen; i++)
505 	{
506 		if ((a->words[i] & b->words[i]) != 0)
507 			return true;
508 	}
509 	return false;
510 }
511 
512 /*
513  * bms_overlap_list - does a set overlap an integer list?
514  */
515 bool
bms_overlap_list(const Bitmapset * a,const List * b)516 bms_overlap_list(const Bitmapset *a, const List *b)
517 {
518 	ListCell   *lc;
519 	int			wordnum,
520 				bitnum;
521 
522 	if (a == NULL || b == NIL)
523 		return false;
524 
525 	foreach(lc, b)
526 	{
527 		int			x = lfirst_int(lc);
528 
529 		if (x < 0)
530 			elog(ERROR, "negative bitmapset member not allowed");
531 		wordnum = WORDNUM(x);
532 		bitnum = BITNUM(x);
533 		if (wordnum < a->nwords)
534 			if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
535 				return true;
536 	}
537 
538 	return false;
539 }
540 
541 /*
542  * bms_nonempty_difference - do sets have a nonempty difference?
543  */
544 bool
bms_nonempty_difference(const Bitmapset * a,const Bitmapset * b)545 bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b)
546 {
547 	int			shortlen;
548 	int			i;
549 
550 	/* Handle cases where either input is NULL */
551 	if (a == NULL)
552 		return false;
553 	if (b == NULL)
554 		return !bms_is_empty(a);
555 	/* Check words in common */
556 	shortlen = Min(a->nwords, b->nwords);
557 	for (i = 0; i < shortlen; i++)
558 	{
559 		if ((a->words[i] & ~b->words[i]) != 0)
560 			return true;
561 	}
562 	/* Check extra words in a */
563 	for (; i < a->nwords; i++)
564 	{
565 		if (a->words[i] != 0)
566 			return true;
567 	}
568 	return false;
569 }
570 
571 /*
572  * bms_singleton_member - return the sole integer member of set
573  *
574  * Raises error if |a| is not 1.
575  */
576 int
bms_singleton_member(const Bitmapset * a)577 bms_singleton_member(const Bitmapset *a)
578 {
579 	int			result = -1;
580 	int			nwords;
581 	int			wordnum;
582 
583 	if (a == NULL)
584 		elog(ERROR, "bitmapset is empty");
585 	nwords = a->nwords;
586 	for (wordnum = 0; wordnum < nwords; wordnum++)
587 	{
588 		bitmapword	w = a->words[wordnum];
589 
590 		if (w != 0)
591 		{
592 			if (result >= 0 || HAS_MULTIPLE_ONES(w))
593 				elog(ERROR, "bitmapset has multiple members");
594 			result = wordnum * BITS_PER_BITMAPWORD;
595 			result += bmw_rightmost_one_pos(w);
596 		}
597 	}
598 	if (result < 0)
599 		elog(ERROR, "bitmapset is empty");
600 	return result;
601 }
602 
603 /*
604  * bms_get_singleton_member
605  *
606  * Test whether the given set is a singleton.
607  * If so, set *member to the value of its sole member, and return true.
608  * If not, return false, without changing *member.
609  *
610  * This is more convenient and faster than calling bms_membership() and then
611  * bms_singleton_member(), if we don't care about distinguishing empty sets
612  * from multiple-member sets.
613  */
614 bool
bms_get_singleton_member(const Bitmapset * a,int * member)615 bms_get_singleton_member(const Bitmapset *a, int *member)
616 {
617 	int			result = -1;
618 	int			nwords;
619 	int			wordnum;
620 
621 	if (a == NULL)
622 		return false;
623 	nwords = a->nwords;
624 	for (wordnum = 0; wordnum < nwords; wordnum++)
625 	{
626 		bitmapword	w = a->words[wordnum];
627 
628 		if (w != 0)
629 		{
630 			if (result >= 0 || HAS_MULTIPLE_ONES(w))
631 				return false;
632 			result = wordnum * BITS_PER_BITMAPWORD;
633 			result += bmw_rightmost_one_pos(w);
634 		}
635 	}
636 	if (result < 0)
637 		return false;
638 	*member = result;
639 	return true;
640 }
641 
642 /*
643  * bms_num_members - count members of set
644  */
645 int
bms_num_members(const Bitmapset * a)646 bms_num_members(const Bitmapset *a)
647 {
648 	int			result = 0;
649 	int			nwords;
650 	int			wordnum;
651 
652 	if (a == NULL)
653 		return 0;
654 	nwords = a->nwords;
655 	for (wordnum = 0; wordnum < nwords; wordnum++)
656 	{
657 		bitmapword	w = a->words[wordnum];
658 
659 		/* No need to count the bits in a zero word */
660 		if (w != 0)
661 			result += bmw_popcount(w);
662 	}
663 	return result;
664 }
665 
666 /*
667  * bms_membership - does a set have zero, one, or multiple members?
668  *
669  * This is faster than making an exact count with bms_num_members().
670  */
671 BMS_Membership
bms_membership(const Bitmapset * a)672 bms_membership(const Bitmapset *a)
673 {
674 	BMS_Membership result = BMS_EMPTY_SET;
675 	int			nwords;
676 	int			wordnum;
677 
678 	if (a == NULL)
679 		return BMS_EMPTY_SET;
680 	nwords = a->nwords;
681 	for (wordnum = 0; wordnum < nwords; wordnum++)
682 	{
683 		bitmapword	w = a->words[wordnum];
684 
685 		if (w != 0)
686 		{
687 			if (result != BMS_EMPTY_SET || HAS_MULTIPLE_ONES(w))
688 				return BMS_MULTIPLE;
689 			result = BMS_SINGLETON;
690 		}
691 	}
692 	return result;
693 }
694 
695 /*
696  * bms_is_empty - is a set empty?
697  *
698  * This is even faster than bms_membership().
699  */
700 bool
bms_is_empty(const Bitmapset * a)701 bms_is_empty(const Bitmapset *a)
702 {
703 	int			nwords;
704 	int			wordnum;
705 
706 	if (a == NULL)
707 		return true;
708 	nwords = a->nwords;
709 	for (wordnum = 0; wordnum < nwords; wordnum++)
710 	{
711 		bitmapword	w = a->words[wordnum];
712 
713 		if (w != 0)
714 			return false;
715 	}
716 	return true;
717 }
718 
719 
720 /*
721  * These operations all "recycle" their non-const inputs, ie, either
722  * return the modified input or pfree it if it can't hold the result.
723  *
724  * These should generally be used in the style
725  *
726  *		foo = bms_add_member(foo, x);
727  */
728 
729 
730 /*
731  * bms_add_member - add a specified member to set
732  *
733  * Input set is modified or recycled!
734  */
735 Bitmapset *
bms_add_member(Bitmapset * a,int x)736 bms_add_member(Bitmapset *a, int x)
737 {
738 	int			wordnum,
739 				bitnum;
740 
741 	if (x < 0)
742 		elog(ERROR, "negative bitmapset member not allowed");
743 	if (a == NULL)
744 		return bms_make_singleton(x);
745 	wordnum = WORDNUM(x);
746 	bitnum = BITNUM(x);
747 
748 	/* enlarge the set if necessary */
749 	if (wordnum >= a->nwords)
750 	{
751 		int			oldnwords = a->nwords;
752 		int			i;
753 
754 		a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(wordnum + 1));
755 		a->nwords = wordnum + 1;
756 		/* zero out the enlarged portion */
757 		for (i = oldnwords; i < a->nwords; i++)
758 			a->words[i] = 0;
759 	}
760 
761 	a->words[wordnum] |= ((bitmapword) 1 << bitnum);
762 	return a;
763 }
764 
765 /*
766  * bms_del_member - remove a specified member from set
767  *
768  * No error if x is not currently a member of set
769  *
770  * Input set is modified in-place!
771  */
772 Bitmapset *
bms_del_member(Bitmapset * a,int x)773 bms_del_member(Bitmapset *a, int x)
774 {
775 	int			wordnum,
776 				bitnum;
777 
778 	if (x < 0)
779 		elog(ERROR, "negative bitmapset member not allowed");
780 	if (a == NULL)
781 		return NULL;
782 	wordnum = WORDNUM(x);
783 	bitnum = BITNUM(x);
784 	if (wordnum < a->nwords)
785 		a->words[wordnum] &= ~((bitmapword) 1 << bitnum);
786 	return a;
787 }
788 
789 /*
790  * bms_add_members - like bms_union, but left input is recycled
791  */
792 Bitmapset *
bms_add_members(Bitmapset * a,const Bitmapset * b)793 bms_add_members(Bitmapset *a, const Bitmapset *b)
794 {
795 	Bitmapset  *result;
796 	const Bitmapset *other;
797 	int			otherlen;
798 	int			i;
799 
800 	/* Handle cases where either input is NULL */
801 	if (a == NULL)
802 		return bms_copy(b);
803 	if (b == NULL)
804 		return a;
805 	/* Identify shorter and longer input; copy the longer one if needed */
806 	if (a->nwords < b->nwords)
807 	{
808 		result = bms_copy(b);
809 		other = a;
810 	}
811 	else
812 	{
813 		result = a;
814 		other = b;
815 	}
816 	/* And union the shorter input into the result */
817 	otherlen = other->nwords;
818 	for (i = 0; i < otherlen; i++)
819 		result->words[i] |= other->words[i];
820 	if (result != a)
821 		pfree(a);
822 	return result;
823 }
824 
825 /*
826  * bms_add_range
827  *		Add members in the range of 'lower' to 'upper' to the set.
828  *
829  * Note this could also be done by calling bms_add_member in a loop, however,
830  * using this function will be faster when the range is large as we work at
831  * the bitmapword level rather than at bit level.
832  */
833 Bitmapset *
bms_add_range(Bitmapset * a,int lower,int upper)834 bms_add_range(Bitmapset *a, int lower, int upper)
835 {
836 	int			lwordnum,
837 				lbitnum,
838 				uwordnum,
839 				ushiftbits,
840 				wordnum;
841 
842 	/* do nothing if nothing is called for, without further checking */
843 	if (upper < lower)
844 		return a;
845 
846 	if (lower < 0)
847 		elog(ERROR, "negative bitmapset member not allowed");
848 	uwordnum = WORDNUM(upper);
849 
850 	if (a == NULL)
851 	{
852 		a = (Bitmapset *) palloc0(BITMAPSET_SIZE(uwordnum + 1));
853 		a->nwords = uwordnum + 1;
854 	}
855 	else if (uwordnum >= a->nwords)
856 	{
857 		int			oldnwords = a->nwords;
858 		int			i;
859 
860 		/* ensure we have enough words to store the upper bit */
861 		a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(uwordnum + 1));
862 		a->nwords = uwordnum + 1;
863 		/* zero out the enlarged portion */
864 		for (i = oldnwords; i < a->nwords; i++)
865 			a->words[i] = 0;
866 	}
867 
868 	wordnum = lwordnum = WORDNUM(lower);
869 
870 	lbitnum = BITNUM(lower);
871 	ushiftbits = BITS_PER_BITMAPWORD - (BITNUM(upper) + 1);
872 
873 	/*
874 	 * Special case when lwordnum is the same as uwordnum we must perform the
875 	 * upper and lower masking on the word.
876 	 */
877 	if (lwordnum == uwordnum)
878 	{
879 		a->words[lwordnum] |= ~(bitmapword) (((bitmapword) 1 << lbitnum) - 1)
880 			& (~(bitmapword) 0) >> ushiftbits;
881 	}
882 	else
883 	{
884 		/* turn on lbitnum and all bits left of it */
885 		a->words[wordnum++] |= ~(bitmapword) (((bitmapword) 1 << lbitnum) - 1);
886 
887 		/* turn on all bits for any intermediate words */
888 		while (wordnum < uwordnum)
889 			a->words[wordnum++] = ~(bitmapword) 0;
890 
891 		/* turn on upper's bit and all bits right of it. */
892 		a->words[uwordnum] |= (~(bitmapword) 0) >> ushiftbits;
893 	}
894 
895 	return a;
896 }
897 
898 /*
899  * bms_int_members - like bms_intersect, but left input is recycled
900  */
901 Bitmapset *
bms_int_members(Bitmapset * a,const Bitmapset * b)902 bms_int_members(Bitmapset *a, const Bitmapset *b)
903 {
904 	int			shortlen;
905 	int			i;
906 
907 	/* Handle cases where either input is NULL */
908 	if (a == NULL)
909 		return NULL;
910 	if (b == NULL)
911 	{
912 		pfree(a);
913 		return NULL;
914 	}
915 	/* Intersect b into a; we need never copy */
916 	shortlen = Min(a->nwords, b->nwords);
917 	for (i = 0; i < shortlen; i++)
918 		a->words[i] &= b->words[i];
919 	for (; i < a->nwords; i++)
920 		a->words[i] = 0;
921 	return a;
922 }
923 
924 /*
925  * bms_del_members - like bms_difference, but left input is recycled
926  */
927 Bitmapset *
bms_del_members(Bitmapset * a,const Bitmapset * b)928 bms_del_members(Bitmapset *a, const Bitmapset *b)
929 {
930 	int			shortlen;
931 	int			i;
932 
933 	/* Handle cases where either input is NULL */
934 	if (a == NULL)
935 		return NULL;
936 	if (b == NULL)
937 		return a;
938 	/* Remove b's bits from a; we need never copy */
939 	shortlen = Min(a->nwords, b->nwords);
940 	for (i = 0; i < shortlen; i++)
941 		a->words[i] &= ~b->words[i];
942 	return a;
943 }
944 
945 /*
946  * bms_join - like bms_union, but *both* inputs are recycled
947  */
948 Bitmapset *
bms_join(Bitmapset * a,Bitmapset * b)949 bms_join(Bitmapset *a, Bitmapset *b)
950 {
951 	Bitmapset  *result;
952 	Bitmapset  *other;
953 	int			otherlen;
954 	int			i;
955 
956 	/* Handle cases where either input is NULL */
957 	if (a == NULL)
958 		return b;
959 	if (b == NULL)
960 		return a;
961 	/* Identify shorter and longer input; use longer one as result */
962 	if (a->nwords < b->nwords)
963 	{
964 		result = b;
965 		other = a;
966 	}
967 	else
968 	{
969 		result = a;
970 		other = b;
971 	}
972 	/* And union the shorter input into the result */
973 	otherlen = other->nwords;
974 	for (i = 0; i < otherlen; i++)
975 		result->words[i] |= other->words[i];
976 	if (other != result)		/* pure paranoia */
977 		pfree(other);
978 	return result;
979 }
980 
981 /*
982  * bms_first_member - find and remove first member of a set
983  *
984  * Returns -1 if set is empty.  NB: set is destructively modified!
985  *
986  * This is intended as support for iterating through the members of a set.
987  * The typical pattern is
988  *
989  *			while ((x = bms_first_member(inputset)) >= 0)
990  *				process member x;
991  *
992  * CAUTION: this destroys the content of "inputset".  If the set must
993  * not be modified, use bms_next_member instead.
994  */
995 int
bms_first_member(Bitmapset * a)996 bms_first_member(Bitmapset *a)
997 {
998 	int			nwords;
999 	int			wordnum;
1000 
1001 	if (a == NULL)
1002 		return -1;
1003 	nwords = a->nwords;
1004 	for (wordnum = 0; wordnum < nwords; wordnum++)
1005 	{
1006 		bitmapword	w = a->words[wordnum];
1007 
1008 		if (w != 0)
1009 		{
1010 			int			result;
1011 
1012 			w = RIGHTMOST_ONE(w);
1013 			a->words[wordnum] &= ~w;
1014 
1015 			result = wordnum * BITS_PER_BITMAPWORD;
1016 			result += bmw_rightmost_one_pos(w);
1017 			return result;
1018 		}
1019 	}
1020 	return -1;
1021 }
1022 
1023 /*
1024  * bms_next_member - find next member of a set
1025  *
1026  * Returns smallest member greater than "prevbit", or -2 if there is none.
1027  * "prevbit" must NOT be less than -1, or the behavior is unpredictable.
1028  *
1029  * This is intended as support for iterating through the members of a set.
1030  * The typical pattern is
1031  *
1032  *			x = -1;
1033  *			while ((x = bms_next_member(inputset, x)) >= 0)
1034  *				process member x;
1035  *
1036  * Notice that when there are no more members, we return -2, not -1 as you
1037  * might expect.  The rationale for that is to allow distinguishing the
1038  * loop-not-started state (x == -1) from the loop-completed state (x == -2).
1039  * It makes no difference in simple loop usage, but complex iteration logic
1040  * might need such an ability.
1041  */
1042 int
bms_next_member(const Bitmapset * a,int prevbit)1043 bms_next_member(const Bitmapset *a, int prevbit)
1044 {
1045 	int			nwords;
1046 	int			wordnum;
1047 	bitmapword	mask;
1048 
1049 	if (a == NULL)
1050 		return -2;
1051 	nwords = a->nwords;
1052 	prevbit++;
1053 	mask = (~(bitmapword) 0) << BITNUM(prevbit);
1054 	for (wordnum = WORDNUM(prevbit); wordnum < nwords; wordnum++)
1055 	{
1056 		bitmapword	w = a->words[wordnum];
1057 
1058 		/* ignore bits before prevbit */
1059 		w &= mask;
1060 
1061 		if (w != 0)
1062 		{
1063 			int			result;
1064 
1065 			result = wordnum * BITS_PER_BITMAPWORD;
1066 			result += bmw_rightmost_one_pos(w);
1067 			return result;
1068 		}
1069 
1070 		/* in subsequent words, consider all bits */
1071 		mask = (~(bitmapword) 0);
1072 	}
1073 	return -2;
1074 }
1075 
1076 /*
1077  * bms_prev_member - find prev member of a set
1078  *
1079  * Returns largest member less than "prevbit", or -2 if there is none.
1080  * "prevbit" must NOT be more than one above the highest possible bit that can
1081  * be set at the Bitmapset at its current size.
1082  *
1083  * To ease finding the highest set bit for the initial loop, the special
1084  * prevbit value of -1 can be passed to have the function find the highest
1085  * valued member in the set.
1086  *
1087  * This is intended as support for iterating through the members of a set in
1088  * reverse.  The typical pattern is
1089  *
1090  *			x = -1;
1091  *			while ((x = bms_prev_member(inputset, x)) >= 0)
1092  *				process member x;
1093  *
1094  * Notice that when there are no more members, we return -2, not -1 as you
1095  * might expect.  The rationale for that is to allow distinguishing the
1096  * loop-not-started state (x == -1) from the loop-completed state (x == -2).
1097  * It makes no difference in simple loop usage, but complex iteration logic
1098  * might need such an ability.
1099  */
1100 
1101 int
bms_prev_member(const Bitmapset * a,int prevbit)1102 bms_prev_member(const Bitmapset *a, int prevbit)
1103 {
1104 	int			wordnum;
1105 	int			ushiftbits;
1106 	bitmapword	mask;
1107 
1108 	/*
1109 	 * If set is NULL or if there are no more bits to the right then we've
1110 	 * nothing to do.
1111 	 */
1112 	if (a == NULL || prevbit == 0)
1113 		return -2;
1114 
1115 	/* transform -1 to the highest possible bit we could have set */
1116 	if (prevbit == -1)
1117 		prevbit = a->nwords * BITS_PER_BITMAPWORD - 1;
1118 	else
1119 		prevbit--;
1120 
1121 	ushiftbits = BITS_PER_BITMAPWORD - (BITNUM(prevbit) + 1);
1122 	mask = (~(bitmapword) 0) >> ushiftbits;
1123 	for (wordnum = WORDNUM(prevbit); wordnum >= 0; wordnum--)
1124 	{
1125 		bitmapword	w = a->words[wordnum];
1126 
1127 		/* mask out bits left of prevbit */
1128 		w &= mask;
1129 
1130 		if (w != 0)
1131 		{
1132 			int			result;
1133 
1134 			result = wordnum * BITS_PER_BITMAPWORD;
1135 			result += bmw_leftmost_one_pos(w);
1136 			return result;
1137 		}
1138 
1139 		/* in subsequent words, consider all bits */
1140 		mask = (~(bitmapword) 0);
1141 	}
1142 	return -2;
1143 }
1144 
1145 /*
1146  * bms_hash_value - compute a hash key for a Bitmapset
1147  *
1148  * Note: we must ensure that any two bitmapsets that are bms_equal() will
1149  * hash to the same value; in practice this means that trailing all-zero
1150  * words must not affect the result.  Hence we strip those before applying
1151  * hash_any().
1152  */
1153 uint32
bms_hash_value(const Bitmapset * a)1154 bms_hash_value(const Bitmapset *a)
1155 {
1156 	int			lastword;
1157 
1158 	if (a == NULL)
1159 		return 0;				/* All empty sets hash to 0 */
1160 	for (lastword = a->nwords; --lastword >= 0;)
1161 	{
1162 		if (a->words[lastword] != 0)
1163 			break;
1164 	}
1165 	if (lastword < 0)
1166 		return 0;				/* All empty sets hash to 0 */
1167 	return DatumGetUInt32(hash_any((const unsigned char *) a->words,
1168 								   (lastword + 1) * sizeof(bitmapword)));
1169 }
1170