1 /*--------------------------------------------------------------------
2  * Symbols referenced in this file:
3  * - bms_copy
4  * - bms_equal
5  * - bms_is_empty
6  * - bms_add_member
7  * - bms_make_singleton
8  * - bms_first_member
9  * - rightmost_one_pos
10  * - bms_free
11  *--------------------------------------------------------------------
12  */
13 
14 /*-------------------------------------------------------------------------
15  *
16  * bitmapset.c
17  *	  PostgreSQL generic bitmap set package
18  *
19  * A bitmap set can represent any set of nonnegative integers, although
20  * it is mainly intended for sets where the maximum value is not large,
21  * say at most a few hundred.  By convention, a NULL pointer is always
22  * accepted by all operations to represent the empty set.  (But beware
23  * that this is not the only representation of the empty set.  Use
24  * bms_is_empty() in preference to testing for NULL.)
25  *
26  *
27  * Copyright (c) 2003-2017, PostgreSQL Global Development Group
28  *
29  * IDENTIFICATION
30  *	  src/backend/nodes/bitmapset.c
31  *
32  *-------------------------------------------------------------------------
33  */
34 #include "postgres.h"
35 
36 #include "access/hash.h"
37 #include "nodes/pg_list.h"
38 
39 
40 #define WORDNUM(x)	((x) / BITS_PER_BITMAPWORD)
41 #define BITNUM(x)	((x) % BITS_PER_BITMAPWORD)
42 
43 #define BITMAPSET_SIZE(nwords)	\
44 	(offsetof(Bitmapset, words) + (nwords) * sizeof(bitmapword))
45 
46 /*----------
Art_create()47  * This is a well-known cute trick for isolating the rightmost one-bit
48  * in a word.  It assumes two's complement arithmetic.  Consider any
49  * nonzero value, and focus attention on the rightmost one.  The value is
50  * then something like
51  *				xxxxxx10000
52  * where x's are unspecified bits.  The two's complement negative is formed
53  * by inverting all the bits and adding one.  Inversion gives
54  *				yyyyyy01111
55  * where each y is the inverse of the corresponding x.  Incrementing gives
56  *				yyyyyy10000
57  * and then ANDing with the original value gives
58  *				00000010000
59  * This works for all cases except original value = zero, where of course
60  * we get zero.
61  *----------
62  */
63 #define RIGHTMOST_ONE(x) ((signedbitmapword) (x) & -((signedbitmapword) (x)))
64 
65 #define HAS_MULTIPLE_ONES(x)	((bitmapword) RIGHTMOST_ONE(x) != (x))
66 
67 
68 /*
69  * Lookup tables to avoid need for bit-by-bit groveling
70  *
71  * rightmost_one_pos[x] gives the bit number (0-7) of the rightmost one bit
72  * in a nonzero byte value x.  The entry for x=0 is never used.
73  *
74  * number_of_ones[x] gives the number of one-bits (0-8) in a byte value x.
75  *
76  * We could make these tables larger and reduce the number of iterations
77  * in the functions that use them, but bytewise shifts and masks are
78  * especially fast on many machines, so working a byte at a time seems best.
79  */
80 
81 static const uint8 rightmost_one_pos[256] = {
82 	0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
83 	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
84 	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
85 	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
86 	6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
87 	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
88 	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
89 	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
90 	7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
91 	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
92 	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
93 	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
94 	6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
95 	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
96 	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
97 	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
98 };
99 
100 
101 
102 
103 /*
104  * bms_copy - make a palloc'd copy of a bitmapset
105  */
106 Bitmapset *
107 bms_copy(const Bitmapset *a)
108 {
109 	Bitmapset  *result;
110 	size_t		size;
111 
112 	if (a == NULL)
113 		return NULL;
114 	size = BITMAPSET_SIZE(a->nwords);
115 	result = (Bitmapset *) palloc(size);
116 	memcpy(result, a, size);
117 	return result;
118 }
119 
120 /*
121  * bms_equal - are two bitmapsets equal?
122  *
123  * This is logical not physical equality; in particular, a NULL pointer will
124  * be reported as equal to a palloc'd value containing no members.
125  */
126 bool
127 bms_equal(const Bitmapset *a, const Bitmapset *b)
128 {
129 	const Bitmapset *shorter;
130 	const Bitmapset *longer;
131 	int			shortlen;
132 	int			longlen;
133 	int			i;
134 
135 	/* Handle cases where either input is NULL */
136 	if (a == NULL)
137 	{
138 		if (b == NULL)
139 			return true;
140 		return bms_is_empty(b);
141 	}
142 	else if (b == NULL)
143 		return bms_is_empty(a);
144 	/* Identify shorter and longer input */
145 	if (a->nwords <= b->nwords)
146 	{
147 		shorter = a;
148 		longer = b;
149 	}
150 	else
151 	{
152 		shorter = b;
153 		longer = a;
154 	}
155 	/* And process */
156 	shortlen = shorter->nwords;
157 	for (i = 0; i < shortlen; i++)
158 	{
159 		if (shorter->words[i] != longer->words[i])
160 			return false;
161 	}
162 	longlen = longer->nwords;
163 	for (; i < longlen; i++)
164 	{
165 		if (longer->words[i] != 0)
166 			return false;
167 	}
168 	return true;
169 }
170 
171 /*
172  * bms_make_singleton - build a bitmapset containing a single member
173  */
174 Bitmapset *
175 bms_make_singleton(int x)
176 {
177 	Bitmapset  *result;
178 	int			wordnum,
179 				bitnum;
180 
181 	if (x < 0)
182 		elog(ERROR, "negative bitmapset member not allowed");
183 	wordnum = WORDNUM(x);
184 	bitnum = BITNUM(x);
185 	result = (Bitmapset *) palloc0(BITMAPSET_SIZE(wordnum + 1));
186 	result->nwords = wordnum + 1;
187 	result->words[wordnum] = ((bitmapword) 1 << bitnum);
188 	return result;
189 }
190 
191 /*
192  * bms_free - free a bitmapset
193  *
194  * Same as pfree except for allowing NULL input
195  */
196 void
197 bms_free(Bitmapset *a)
198 {
199 	if (a)
200 		pfree(a);
201 }
202 
203 
204 /*
205  * These operations all make a freshly palloc'd result,
206  * leaving their inputs untouched
207  */
208 
209 
210 /*
211  * bms_union - set union
212  */
213 
214 
215 /*
216  * bms_intersect - set intersection
217  */
218 
219 
220 /*
221  * bms_difference - set difference (ie, A without members of B)
222  */
223 
224 
225 /*
226  * bms_is_subset - is A a subset of B?
227  */
228 
229 
230 /*
231  * bms_subset_compare - compare A and B for equality/subset relationships
232  *
233  * This is more efficient than testing bms_is_subset in both directions.
234  */
235 
236 
237 /*
238  * bms_is_member - is X a member of A?
239  */
240 
241 
242 /*
243  * bms_overlap - do sets overlap (ie, have a nonempty intersection)?
244  */
245 
246 
247 /*
248  * bms_overlap_list - does a set overlap an integer list?
249  */
250 
251 
252 /*
253  * bms_nonempty_difference - do sets have a nonempty difference?
254  */
255 
256 
257 /*
258  * bms_singleton_member - return the sole integer member of set
259  *
260  * Raises error if |a| is not 1.
261  */
262 
263 
264 /*
265  * bms_get_singleton_member
266  *
267  * Test whether the given set is a singleton.
268  * If so, set *member to the value of its sole member, and return TRUE.
269  * If not, return FALSE, without changing *member.
270  *
271  * This is more convenient and faster than calling bms_membership() and then
272  * bms_singleton_member(), if we don't care about distinguishing empty sets
273  * from multiple-member sets.
274  */
275 
276 
277 /*
278  * bms_num_members - count members of set
279  */
280 
281 
282 /*
283  * bms_membership - does a set have zero, one, or multiple members?
284  *
285  * This is faster than making an exact count with bms_num_members().
286  */
287 
288 
289 /*
290  * bms_is_empty - is a set empty?
291  *
292  * This is even faster than bms_membership().
293  */
294 bool
295 bms_is_empty(const Bitmapset *a)
296 {
297 	int			nwords;
298 	int			wordnum;
299 
300 	if (a == NULL)
301 		return true;
302 	nwords = a->nwords;
303 	for (wordnum = 0; wordnum < nwords; wordnum++)
304 	{
305 		bitmapword	w = a->words[wordnum];
306 
307 		if (w != 0)
308 			return false;
309 	}
310 	return true;
311 }
312 
313 
314 /*
315  * These operations all "recycle" their non-const inputs, ie, either
316  * return the modified input or pfree it if it can't hold the result.
317  *
318  * These should generally be used in the style
319  *
320  *		foo = bms_add_member(foo, x);
321  */
322 
323 
324 /*
325  * bms_add_member - add a specified member to set
326  *
327  * Input set is modified or recycled!
328  */
329 Bitmapset *
330 bms_add_member(Bitmapset *a, int x)
331 {
332 	int			wordnum,
333 				bitnum;
334 
335 	if (x < 0)
336 		elog(ERROR, "negative bitmapset member not allowed");
337 	if (a == NULL)
338 		return bms_make_singleton(x);
339 	wordnum = WORDNUM(x);
340 	bitnum = BITNUM(x);
341 
342 	/* enlarge the set if necessary */
343 	if (wordnum >= a->nwords)
344 	{
345 		int			oldnwords = a->nwords;
346 		int			i;
347 
348 		a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(wordnum + 1));
349 		a->nwords = wordnum + 1;
350 		/* zero out the enlarged portion */
351 		for (i = oldnwords; i < a->nwords; i++)
352 			a->words[i] = 0;
353 	}
354 
355 	a->words[wordnum] |= ((bitmapword) 1 << bitnum);
356 	return a;
357 }
358 
359 /*
360  * bms_del_member - remove a specified member from set
361  *
362  * No error if x is not currently a member of set
363  *
364  * Input set is modified in-place!
365  */
366 
367 
368 /*
369  * bms_add_members - like bms_union, but left input is recycled
370  */
371 
372 
373 /*
374  * bms_int_members - like bms_intersect, but left input is recycled
375  */
376 
377 
378 /*
379  * bms_del_members - like bms_difference, but left input is recycled
380  */
381 
382 
383 /*
384  * bms_join - like bms_union, but *both* inputs are recycled
385  */
386 
387 
388 /*
389  * bms_first_member - find and remove first member of a set
390  *
391  * Returns -1 if set is empty.  NB: set is destructively modified!
392  *
393  * This is intended as support for iterating through the members of a set.
394  * The typical pattern is
395  *
396  *			while ((x = bms_first_member(inputset)) >= 0)
397  *				process member x;
398  *
399  * CAUTION: this destroys the content of "inputset".  If the set must
400  * not be modified, use bms_next_member instead.
401  */
402 int
403 bms_first_member(Bitmapset *a)
404 {
405 	int			nwords;
406 	int			wordnum;
407 
408 	if (a == NULL)
409 		return -1;
410 	nwords = a->nwords;
411 	for (wordnum = 0; wordnum < nwords; wordnum++)
412 	{
413 		bitmapword	w = a->words[wordnum];
414 
415 		if (w != 0)
416 		{
417 			int			result;
418 
419 			w = RIGHTMOST_ONE(w);
420 			a->words[wordnum] &= ~w;
421 
422 			result = wordnum * BITS_PER_BITMAPWORD;
423 			while ((w & 255) == 0)
424 			{
425 				w >>= 8;
426 				result += 8;
427 			}
428 			result += rightmost_one_pos[w & 255];
429 			return result;
430 		}
431 	}
432 	return -1;
433 }
434 
435 /*
436  * bms_next_member - find next member of a set
437  *
438  * Returns smallest member greater than "prevbit", or -2 if there is none.
439  * "prevbit" must NOT be less than -1, or the behavior is unpredictable.
440  *
441  * This is intended as support for iterating through the members of a set.
442  * The typical pattern is
443  *
444  *			x = -1;
445  *			while ((x = bms_next_member(inputset, x)) >= 0)
446  *				process member x;
447  *
448  * Notice that when there are no more members, we return -2, not -1 as you
449  * might expect.  The rationale for that is to allow distinguishing the
450  * loop-not-started state (x == -1) from the loop-completed state (x == -2).
451  * It makes no difference in simple loop usage, but complex iteration logic
452  * might need such an ability.
453  */
454 
455 
456 /*
457  * bms_hash_value - compute a hash key for a Bitmapset
458  *
459  * Note: we must ensure that any two bitmapsets that are bms_equal() will
460  * hash to the same value; in practice this means that trailing all-zero
461  * words must not affect the result.  Hence we strip those before applying
462  * hash_any().
463  */
464 
465