1 /*-------------------------------------------------------------------- 2 * Symbols referenced in this file: 3 * - bms_copy 4 * - bms_equal 5 * - bms_is_empty 6 * - bms_add_member 7 * - bms_make_singleton 8 * - bms_first_member 9 * - rightmost_one_pos 10 * - bms_free 11 *-------------------------------------------------------------------- 12 */ 13 14 /*------------------------------------------------------------------------- 15 * 16 * bitmapset.c 17 * PostgreSQL generic bitmap set package 18 * 19 * A bitmap set can represent any set of nonnegative integers, although 20 * it is mainly intended for sets where the maximum value is not large, 21 * say at most a few hundred. By convention, a NULL pointer is always 22 * accepted by all operations to represent the empty set. (But beware 23 * that this is not the only representation of the empty set. Use 24 * bms_is_empty() in preference to testing for NULL.) 25 * 26 * 27 * Copyright (c) 2003-2017, PostgreSQL Global Development Group 28 * 29 * IDENTIFICATION 30 * src/backend/nodes/bitmapset.c 31 * 32 *------------------------------------------------------------------------- 33 */ 34 #include "postgres.h" 35 36 #include "access/hash.h" 37 #include "nodes/pg_list.h" 38 39 40 #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD) 41 #define BITNUM(x) ((x) % BITS_PER_BITMAPWORD) 42 43 #define BITMAPSET_SIZE(nwords) \ 44 (offsetof(Bitmapset, words) + (nwords) * sizeof(bitmapword)) 45 46 /*---------- Art_create()47 * This is a well-known cute trick for isolating the rightmost one-bit 48 * in a word. It assumes two's complement arithmetic. Consider any 49 * nonzero value, and focus attention on the rightmost one. The value is 50 * then something like 51 * xxxxxx10000 52 * where x's are unspecified bits. The two's complement negative is formed 53 * by inverting all the bits and adding one. Inversion gives 54 * yyyyyy01111 55 * where each y is the inverse of the corresponding x. Incrementing gives 56 * yyyyyy10000 57 * and then ANDing with the original value gives 58 * 00000010000 59 * This works for all cases except original value = zero, where of course 60 * we get zero. 61 *---------- 62 */ 63 #define RIGHTMOST_ONE(x) ((signedbitmapword) (x) & -((signedbitmapword) (x))) 64 65 #define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x)) 66 67 68 /* 69 * Lookup tables to avoid need for bit-by-bit groveling 70 * 71 * rightmost_one_pos[x] gives the bit number (0-7) of the rightmost one bit 72 * in a nonzero byte value x. The entry for x=0 is never used. 73 * 74 * number_of_ones[x] gives the number of one-bits (0-8) in a byte value x. 75 * 76 * We could make these tables larger and reduce the number of iterations 77 * in the functions that use them, but bytewise shifts and masks are 78 * especially fast on many machines, so working a byte at a time seems best. 79 */ 80 81 static const uint8 rightmost_one_pos[256] = { 82 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 83 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 84 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 85 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 86 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 87 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 88 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 89 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 90 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 91 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 92 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 93 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 94 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 95 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 96 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 97 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 98 }; 99 100 101 102 103 /* 104 * bms_copy - make a palloc'd copy of a bitmapset 105 */ 106 Bitmapset * 107 bms_copy(const Bitmapset *a) 108 { 109 Bitmapset *result; 110 size_t size; 111 112 if (a == NULL) 113 return NULL; 114 size = BITMAPSET_SIZE(a->nwords); 115 result = (Bitmapset *) palloc(size); 116 memcpy(result, a, size); 117 return result; 118 } 119 120 /* 121 * bms_equal - are two bitmapsets equal? 122 * 123 * This is logical not physical equality; in particular, a NULL pointer will 124 * be reported as equal to a palloc'd value containing no members. 125 */ 126 bool 127 bms_equal(const Bitmapset *a, const Bitmapset *b) 128 { 129 const Bitmapset *shorter; 130 const Bitmapset *longer; 131 int shortlen; 132 int longlen; 133 int i; 134 135 /* Handle cases where either input is NULL */ 136 if (a == NULL) 137 { 138 if (b == NULL) 139 return true; 140 return bms_is_empty(b); 141 } 142 else if (b == NULL) 143 return bms_is_empty(a); 144 /* Identify shorter and longer input */ 145 if (a->nwords <= b->nwords) 146 { 147 shorter = a; 148 longer = b; 149 } 150 else 151 { 152 shorter = b; 153 longer = a; 154 } 155 /* And process */ 156 shortlen = shorter->nwords; 157 for (i = 0; i < shortlen; i++) 158 { 159 if (shorter->words[i] != longer->words[i]) 160 return false; 161 } 162 longlen = longer->nwords; 163 for (; i < longlen; i++) 164 { 165 if (longer->words[i] != 0) 166 return false; 167 } 168 return true; 169 } 170 171 /* 172 * bms_make_singleton - build a bitmapset containing a single member 173 */ 174 Bitmapset * 175 bms_make_singleton(int x) 176 { 177 Bitmapset *result; 178 int wordnum, 179 bitnum; 180 181 if (x < 0) 182 elog(ERROR, "negative bitmapset member not allowed"); 183 wordnum = WORDNUM(x); 184 bitnum = BITNUM(x); 185 result = (Bitmapset *) palloc0(BITMAPSET_SIZE(wordnum + 1)); 186 result->nwords = wordnum + 1; 187 result->words[wordnum] = ((bitmapword) 1 << bitnum); 188 return result; 189 } 190 191 /* 192 * bms_free - free a bitmapset 193 * 194 * Same as pfree except for allowing NULL input 195 */ 196 void 197 bms_free(Bitmapset *a) 198 { 199 if (a) 200 pfree(a); 201 } 202 203 204 /* 205 * These operations all make a freshly palloc'd result, 206 * leaving their inputs untouched 207 */ 208 209 210 /* 211 * bms_union - set union 212 */ 213 214 215 /* 216 * bms_intersect - set intersection 217 */ 218 219 220 /* 221 * bms_difference - set difference (ie, A without members of B) 222 */ 223 224 225 /* 226 * bms_is_subset - is A a subset of B? 227 */ 228 229 230 /* 231 * bms_subset_compare - compare A and B for equality/subset relationships 232 * 233 * This is more efficient than testing bms_is_subset in both directions. 234 */ 235 236 237 /* 238 * bms_is_member - is X a member of A? 239 */ 240 241 242 /* 243 * bms_overlap - do sets overlap (ie, have a nonempty intersection)? 244 */ 245 246 247 /* 248 * bms_overlap_list - does a set overlap an integer list? 249 */ 250 251 252 /* 253 * bms_nonempty_difference - do sets have a nonempty difference? 254 */ 255 256 257 /* 258 * bms_singleton_member - return the sole integer member of set 259 * 260 * Raises error if |a| is not 1. 261 */ 262 263 264 /* 265 * bms_get_singleton_member 266 * 267 * Test whether the given set is a singleton. 268 * If so, set *member to the value of its sole member, and return TRUE. 269 * If not, return FALSE, without changing *member. 270 * 271 * This is more convenient and faster than calling bms_membership() and then 272 * bms_singleton_member(), if we don't care about distinguishing empty sets 273 * from multiple-member sets. 274 */ 275 276 277 /* 278 * bms_num_members - count members of set 279 */ 280 281 282 /* 283 * bms_membership - does a set have zero, one, or multiple members? 284 * 285 * This is faster than making an exact count with bms_num_members(). 286 */ 287 288 289 /* 290 * bms_is_empty - is a set empty? 291 * 292 * This is even faster than bms_membership(). 293 */ 294 bool 295 bms_is_empty(const Bitmapset *a) 296 { 297 int nwords; 298 int wordnum; 299 300 if (a == NULL) 301 return true; 302 nwords = a->nwords; 303 for (wordnum = 0; wordnum < nwords; wordnum++) 304 { 305 bitmapword w = a->words[wordnum]; 306 307 if (w != 0) 308 return false; 309 } 310 return true; 311 } 312 313 314 /* 315 * These operations all "recycle" their non-const inputs, ie, either 316 * return the modified input or pfree it if it can't hold the result. 317 * 318 * These should generally be used in the style 319 * 320 * foo = bms_add_member(foo, x); 321 */ 322 323 324 /* 325 * bms_add_member - add a specified member to set 326 * 327 * Input set is modified or recycled! 328 */ 329 Bitmapset * 330 bms_add_member(Bitmapset *a, int x) 331 { 332 int wordnum, 333 bitnum; 334 335 if (x < 0) 336 elog(ERROR, "negative bitmapset member not allowed"); 337 if (a == NULL) 338 return bms_make_singleton(x); 339 wordnum = WORDNUM(x); 340 bitnum = BITNUM(x); 341 342 /* enlarge the set if necessary */ 343 if (wordnum >= a->nwords) 344 { 345 int oldnwords = a->nwords; 346 int i; 347 348 a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(wordnum + 1)); 349 a->nwords = wordnum + 1; 350 /* zero out the enlarged portion */ 351 for (i = oldnwords; i < a->nwords; i++) 352 a->words[i] = 0; 353 } 354 355 a->words[wordnum] |= ((bitmapword) 1 << bitnum); 356 return a; 357 } 358 359 /* 360 * bms_del_member - remove a specified member from set 361 * 362 * No error if x is not currently a member of set 363 * 364 * Input set is modified in-place! 365 */ 366 367 368 /* 369 * bms_add_members - like bms_union, but left input is recycled 370 */ 371 372 373 /* 374 * bms_int_members - like bms_intersect, but left input is recycled 375 */ 376 377 378 /* 379 * bms_del_members - like bms_difference, but left input is recycled 380 */ 381 382 383 /* 384 * bms_join - like bms_union, but *both* inputs are recycled 385 */ 386 387 388 /* 389 * bms_first_member - find and remove first member of a set 390 * 391 * Returns -1 if set is empty. NB: set is destructively modified! 392 * 393 * This is intended as support for iterating through the members of a set. 394 * The typical pattern is 395 * 396 * while ((x = bms_first_member(inputset)) >= 0) 397 * process member x; 398 * 399 * CAUTION: this destroys the content of "inputset". If the set must 400 * not be modified, use bms_next_member instead. 401 */ 402 int 403 bms_first_member(Bitmapset *a) 404 { 405 int nwords; 406 int wordnum; 407 408 if (a == NULL) 409 return -1; 410 nwords = a->nwords; 411 for (wordnum = 0; wordnum < nwords; wordnum++) 412 { 413 bitmapword w = a->words[wordnum]; 414 415 if (w != 0) 416 { 417 int result; 418 419 w = RIGHTMOST_ONE(w); 420 a->words[wordnum] &= ~w; 421 422 result = wordnum * BITS_PER_BITMAPWORD; 423 while ((w & 255) == 0) 424 { 425 w >>= 8; 426 result += 8; 427 } 428 result += rightmost_one_pos[w & 255]; 429 return result; 430 } 431 } 432 return -1; 433 } 434 435 /* 436 * bms_next_member - find next member of a set 437 * 438 * Returns smallest member greater than "prevbit", or -2 if there is none. 439 * "prevbit" must NOT be less than -1, or the behavior is unpredictable. 440 * 441 * This is intended as support for iterating through the members of a set. 442 * The typical pattern is 443 * 444 * x = -1; 445 * while ((x = bms_next_member(inputset, x)) >= 0) 446 * process member x; 447 * 448 * Notice that when there are no more members, we return -2, not -1 as you 449 * might expect. The rationale for that is to allow distinguishing the 450 * loop-not-started state (x == -1) from the loop-completed state (x == -2). 451 * It makes no difference in simple loop usage, but complex iteration logic 452 * might need such an ability. 453 */ 454 455 456 /* 457 * bms_hash_value - compute a hash key for a Bitmapset 458 * 459 * Note: we must ensure that any two bitmapsets that are bms_equal() will 460 * hash to the same value; in practice this means that trailing all-zero 461 * words must not affect the result. Hence we strip those before applying 462 * hash_any(). 463 */ 464 465