1 /*--------------------------------------------------------------------------
2 *
3 * test_integerset.c
4 * Test integer set data structure.
5 *
6 * Copyright (c) 2019, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/test/modules/test_integerset/test_integerset.c
10 *
11 * -------------------------------------------------------------------------
12 */
13 #include "postgres.h"
14
15 #include "fmgr.h"
16 #include "lib/integerset.h"
17 #include "nodes/bitmapset.h"
18 #include "utils/memutils.h"
19 #include "utils/timestamp.h"
20 #include "storage/block.h"
21 #include "storage/itemptr.h"
22 #include "miscadmin.h"
23
24 /*
25 * If you enable this, the "pattern" tests will print information about
26 * how long populating, probing, and iterating the test set takes, and
27 * how much memory the test set consumed. That can be used as
28 * micro-benchmark of various operations and input patterns (you might
29 * want to increase the number of values used in each of the test, if
30 * you do that, to reduce noise).
31 *
32 * The information is printed to the server's stderr, mostly because
33 * that's where MemoryContextStats() output goes.
34 */
35 static const bool intset_test_stats = false;
36
37 PG_MODULE_MAGIC;
38
39 PG_FUNCTION_INFO_V1(test_integerset);
40
41 /*
42 * A struct to define a pattern of integers, for use with the test_pattern()
43 * function.
44 */
45 typedef struct
46 {
47 char *test_name; /* short name of the test, for humans */
48 char *pattern_str; /* a bit pattern */
49 uint64 spacing; /* pattern repeats at this interval */
50 uint64 num_values; /* number of integers to set in total */
51 } test_spec;
52
53 static const test_spec test_specs[] = {
54 {
55 "all ones", "1111111111",
56 10, 10000000
57 },
58 {
59 "alternating bits", "0101010101",
60 10, 10000000
61 },
62 {
63 "clusters of ten", "1111111111",
64 10000, 10000000
65 },
66 {
67 "clusters of hundred",
68 "1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111",
69 10000, 100000000
70 },
71 {
72 "one-every-64k", "1",
73 65536, 10000000
74 },
75 {
76 "sparse", "100000000000000000000000000000001",
77 10000000, 10000000
78 },
79 {
80 "single values, distance > 2^32", "1",
81 UINT64CONST(10000000000), 1000000
82 },
83 {
84 "clusters, distance > 2^32", "10101010",
85 UINT64CONST(10000000000), 10000000
86 },
87 {
88 "clusters, distance > 2^60", "10101010",
89 UINT64CONST(2000000000000000000),
90 23 /* can't be much higher than this, or we
91 * overflow uint64 */
92 }
93 };
94
95 static void test_pattern(const test_spec *spec);
96 static void test_empty(void);
97 static void test_single_value(uint64 value);
98 static void check_with_filler(IntegerSet *intset, uint64 x, uint64 value, uint64 filler_min, uint64 filler_max);
99 static void test_single_value_and_filler(uint64 value, uint64 filler_min, uint64 filler_max);
100 static void test_huge_distances(void);
101
102 /*
103 * SQL-callable entry point to perform all tests.
104 */
105 Datum
test_integerset(PG_FUNCTION_ARGS)106 test_integerset(PG_FUNCTION_ARGS)
107 {
108 /* Tests for various corner cases */
109 test_empty();
110 test_huge_distances();
111 test_single_value(0);
112 test_single_value(1);
113 test_single_value(PG_UINT64_MAX - 1);
114 test_single_value(PG_UINT64_MAX);
115 test_single_value_and_filler(0, 1000, 2000);
116 test_single_value_and_filler(1, 1000, 2000);
117 test_single_value_and_filler(1, 1000, 2000000);
118 test_single_value_and_filler(PG_UINT64_MAX - 1, 1000, 2000);
119 test_single_value_and_filler(PG_UINT64_MAX, 1000, 2000);
120
121 /* Test different test patterns, with lots of entries */
122 for (int i = 0; i < lengthof(test_specs); i++)
123 {
124 test_pattern(&test_specs[i]);
125 }
126
127 PG_RETURN_VOID();
128 }
129
130 /*
131 * Test with a repeating pattern, defined by the 'spec'.
132 */
133 static void
test_pattern(const test_spec * spec)134 test_pattern(const test_spec *spec)
135 {
136 IntegerSet *intset;
137 MemoryContext intset_ctx;
138 MemoryContext old_ctx;
139 TimestampTz starttime;
140 TimestampTz endtime;
141 uint64 n;
142 uint64 last_int;
143 int patternlen;
144 uint64 *pattern_values;
145 uint64 pattern_num_values;
146
147 elog(NOTICE, "testing intset with pattern \"%s\"", spec->test_name);
148 if (intset_test_stats)
149 fprintf(stderr, "-----\ntesting intset with pattern \"%s\"\n", spec->test_name);
150
151 /* Pre-process the pattern, creating an array of integers from it. */
152 patternlen = strlen(spec->pattern_str);
153 pattern_values = palloc(patternlen * sizeof(uint64));
154 pattern_num_values = 0;
155 for (int i = 0; i < patternlen; i++)
156 {
157 if (spec->pattern_str[i] == '1')
158 pattern_values[pattern_num_values++] = i;
159 }
160
161 /*
162 * Allocate the integer set.
163 *
164 * Allocate it in a separate memory context, so that we can print its
165 * memory usage easily. (intset_create() creates a memory context of its
166 * own, too, but we don't have direct access to it, so we cannot call
167 * MemoryContextStats() on it directly).
168 */
169 intset_ctx = AllocSetContextCreate(CurrentMemoryContext,
170 "intset test",
171 ALLOCSET_SMALL_SIZES);
172 MemoryContextSetIdentifier(intset_ctx, spec->test_name);
173 old_ctx = MemoryContextSwitchTo(intset_ctx);
174 intset = intset_create();
175 MemoryContextSwitchTo(old_ctx);
176
177 /*
178 * Add values to the set.
179 */
180 starttime = GetCurrentTimestamp();
181
182 n = 0;
183 last_int = 0;
184 while (n < spec->num_values)
185 {
186 uint64 x = 0;
187
188 for (int i = 0; i < pattern_num_values && n < spec->num_values; i++)
189 {
190 x = last_int + pattern_values[i];
191
192 intset_add_member(intset, x);
193 n++;
194 }
195 last_int += spec->spacing;
196 }
197
198 endtime = GetCurrentTimestamp();
199
200 if (intset_test_stats)
201 fprintf(stderr, "added " UINT64_FORMAT " values in %d ms\n",
202 spec->num_values, (int) (endtime - starttime) / 1000);
203
204 /*
205 * Print stats on the amount of memory used.
206 *
207 * We print the usage reported by intset_memory_usage(), as well as the
208 * stats from the memory context. They should be in the same ballpark,
209 * but it's hard to automate testing that, so if you're making changes to
210 * the implementation, just observe that manually.
211 */
212 if (intset_test_stats)
213 {
214 uint64 mem_usage;
215
216 /*
217 * Also print memory usage as reported by intset_memory_usage(). It
218 * should be in the same ballpark as the usage reported by
219 * MemoryContextStats().
220 */
221 mem_usage = intset_memory_usage(intset);
222 fprintf(stderr, "intset_memory_usage() reported " UINT64_FORMAT " (%0.2f bytes / integer)\n",
223 mem_usage, (double) mem_usage / spec->num_values);
224
225 MemoryContextStats(intset_ctx);
226 }
227
228 /* Check that intset_get_num_entries works */
229 n = intset_num_entries(intset);
230 if (n != spec->num_values)
231 elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected " UINT64_FORMAT, n, spec->num_values);
232
233 /*
234 * Test random-access probes with intset_is_member()
235 */
236 starttime = GetCurrentTimestamp();
237
238 for (n = 0; n < 100000; n++)
239 {
240 bool b;
241 bool expected;
242 uint64 x;
243
244 /*
245 * Pick next value to probe at random. We limit the probes to the
246 * last integer that we added to the set, plus an arbitrary constant
247 * (1000). There's no point in probing the whole 0 - 2^64 range, if
248 * only a small part of the integer space is used. We would very
249 * rarely hit values that are actually in the set.
250 */
251 x = (pg_lrand48() << 31) | pg_lrand48();
252 x = x % (last_int + 1000);
253
254 /* Do we expect this value to be present in the set? */
255 if (x >= last_int)
256 expected = false;
257 else
258 {
259 uint64 idx = x % spec->spacing;
260
261 if (idx >= patternlen)
262 expected = false;
263 else if (spec->pattern_str[idx] == '1')
264 expected = true;
265 else
266 expected = false;
267 }
268
269 /* Is it present according to intset_is_member() ? */
270 b = intset_is_member(intset, x);
271
272 if (b != expected)
273 elog(ERROR, "mismatch at " UINT64_FORMAT ": %d vs %d", x, b, expected);
274 }
275 endtime = GetCurrentTimestamp();
276 if (intset_test_stats)
277 fprintf(stderr, "probed " UINT64_FORMAT " values in %d ms\n",
278 n, (int) (endtime - starttime) / 1000);
279
280 /*
281 * Test iterator
282 */
283 starttime = GetCurrentTimestamp();
284
285 intset_begin_iterate(intset);
286 n = 0;
287 last_int = 0;
288 while (n < spec->num_values)
289 {
290 for (int i = 0; i < pattern_num_values && n < spec->num_values; i++)
291 {
292 uint64 expected = last_int + pattern_values[i];
293 uint64 x;
294
295 if (!intset_iterate_next(intset, &x))
296 break;
297
298 if (x != expected)
299 elog(ERROR, "iterate returned wrong value; got " UINT64_FORMAT ", expected " UINT64_FORMAT, x, expected);
300 n++;
301 }
302 last_int += spec->spacing;
303 }
304 endtime = GetCurrentTimestamp();
305 if (intset_test_stats)
306 fprintf(stderr, "iterated " UINT64_FORMAT " values in %d ms\n",
307 n, (int) (endtime - starttime) / 1000);
308
309 if (n < spec->num_values)
310 elog(ERROR, "iterator stopped short after " UINT64_FORMAT " entries, expected " UINT64_FORMAT, n, spec->num_values);
311 if (n > spec->num_values)
312 elog(ERROR, "iterator returned " UINT64_FORMAT " entries, " UINT64_FORMAT " was expected", n, spec->num_values);
313
314 MemoryContextDelete(intset_ctx);
315 }
316
317 /*
318 * Test with a set containing a single integer.
319 */
320 static void
test_single_value(uint64 value)321 test_single_value(uint64 value)
322 {
323 IntegerSet *intset;
324 uint64 x;
325 uint64 num_entries;
326 bool found;
327
328 elog(NOTICE, "testing intset with single value " UINT64_FORMAT, value);
329
330 /* Create the set. */
331 intset = intset_create();
332 intset_add_member(intset, value);
333
334 /* Test intset_get_num_entries() */
335 num_entries = intset_num_entries(intset);
336 if (num_entries != 1)
337 elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected 1", num_entries);
338
339 /*
340 * Test intset_is_member() at various special values, like 0 and maximum
341 * possible 64-bit integer, as well as the value itself.
342 */
343 if (intset_is_member(intset, 0) != (value == 0))
344 elog(ERROR, "intset_is_member failed for 0");
345 if (intset_is_member(intset, 1) != (value == 1))
346 elog(ERROR, "intset_is_member failed for 1");
347 if (intset_is_member(intset, PG_UINT64_MAX) != (value == PG_UINT64_MAX))
348 elog(ERROR, "intset_is_member failed for PG_UINT64_MAX");
349 if (intset_is_member(intset, value) != true)
350 elog(ERROR, "intset_is_member failed for the tested value");
351
352 /*
353 * Test iterator
354 */
355 intset_begin_iterate(intset);
356 found = intset_iterate_next(intset, &x);
357 if (!found || x != value)
358 elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
359
360 found = intset_iterate_next(intset, &x);
361 if (found)
362 elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
363 }
364
365 /*
366 * Test with an integer set that contains:
367 *
368 * - a given single 'value', and
369 * - all integers between 'filler_min' and 'filler_max'.
370 *
371 * This exercises different codepaths than testing just with a single value,
372 * because the implementation buffers newly-added values. If we add just a
373 * single value to the set, we won't test the internal B-tree code at all,
374 * just the code that deals with the buffer.
375 */
376 static void
test_single_value_and_filler(uint64 value,uint64 filler_min,uint64 filler_max)377 test_single_value_and_filler(uint64 value, uint64 filler_min, uint64 filler_max)
378 {
379 IntegerSet *intset;
380 uint64 x;
381 bool found;
382 uint64 *iter_expected;
383 uint64 n = 0;
384 uint64 num_entries = 0;
385 uint64 mem_usage;
386
387 elog(NOTICE, "testing intset with value " UINT64_FORMAT ", and all between " UINT64_FORMAT " and " UINT64_FORMAT,
388 value, filler_min, filler_max);
389
390 intset = intset_create();
391
392 iter_expected = palloc(sizeof(uint64) * (filler_max - filler_min + 1));
393 if (value < filler_min)
394 {
395 intset_add_member(intset, value);
396 iter_expected[n++] = value;
397 }
398
399 for (x = filler_min; x < filler_max; x++)
400 {
401 intset_add_member(intset, x);
402 iter_expected[n++] = x;
403 }
404
405 if (value >= filler_max)
406 {
407 intset_add_member(intset, value);
408 iter_expected[n++] = value;
409 }
410
411 /* Test intset_get_num_entries() */
412 num_entries = intset_num_entries(intset);
413 if (num_entries != n)
414 elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected " UINT64_FORMAT, num_entries, n);
415
416 /*
417 * Test intset_is_member() at various spots, at and around the values that
418 * we expect to be set, as well as 0 and the maximum possible value.
419 */
420 check_with_filler(intset, 0,
421 value, filler_min, filler_max);
422 check_with_filler(intset, 1,
423 value, filler_min, filler_max);
424 check_with_filler(intset, filler_min - 1,
425 value, filler_min, filler_max);
426 check_with_filler(intset, filler_min,
427 value, filler_min, filler_max);
428 check_with_filler(intset, filler_min + 1,
429 value, filler_min, filler_max);
430 check_with_filler(intset, value - 1,
431 value, filler_min, filler_max);
432 check_with_filler(intset, value,
433 value, filler_min, filler_max);
434 check_with_filler(intset, value + 1,
435 value, filler_min, filler_max);
436 check_with_filler(intset, filler_max - 1,
437 value, filler_min, filler_max);
438 check_with_filler(intset, filler_max,
439 value, filler_min, filler_max);
440 check_with_filler(intset, filler_max + 1,
441 value, filler_min, filler_max);
442 check_with_filler(intset, PG_UINT64_MAX - 1,
443 value, filler_min, filler_max);
444 check_with_filler(intset, PG_UINT64_MAX,
445 value, filler_min, filler_max);
446
447 intset_begin_iterate(intset);
448 for (uint64 i = 0; i < n; i++)
449 {
450 found = intset_iterate_next(intset, &x);
451 if (!found || x != iter_expected[i])
452 elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
453 }
454 found = intset_iterate_next(intset, &x);
455 if (found)
456 elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
457
458 mem_usage = intset_memory_usage(intset);
459 if (mem_usage < 5000 || mem_usage > 500000000)
460 elog(ERROR, "intset_memory_usage() reported suspicious value: " UINT64_FORMAT, mem_usage);
461 }
462
463 /*
464 * Helper function for test_single_value_and_filler.
465 *
466 * Calls intset_is_member() for value 'x', and checks that the result is what
467 * we expect.
468 */
469 static void
check_with_filler(IntegerSet * intset,uint64 x,uint64 value,uint64 filler_min,uint64 filler_max)470 check_with_filler(IntegerSet *intset, uint64 x,
471 uint64 value, uint64 filler_min, uint64 filler_max)
472 {
473 bool expected;
474 bool actual;
475
476 expected = (x == value || (filler_min <= x && x < filler_max));
477
478 actual = intset_is_member(intset, x);
479
480 if (actual != expected)
481 elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x);
482 }
483
484 /*
485 * Test empty set
486 */
487 static void
test_empty(void)488 test_empty(void)
489 {
490 IntegerSet *intset;
491 uint64 x;
492
493 elog(NOTICE, "testing intset with empty set");
494
495 intset = intset_create();
496
497 /* Test intset_is_member() */
498 if (intset_is_member(intset, 0) != false)
499 elog(ERROR, "intset_is_member on empty set returned true");
500 if (intset_is_member(intset, 1) != false)
501 elog(ERROR, "intset_is_member on empty set returned true");
502 if (intset_is_member(intset, PG_UINT64_MAX) != false)
503 elog(ERROR, "intset_is_member on empty set returned true");
504
505 /* Test iterator */
506 intset_begin_iterate(intset);
507 if (intset_iterate_next(intset, &x))
508 elog(ERROR, "intset_iterate_next on empty set returned a value (" UINT64_FORMAT ")", x);
509 }
510
511 /*
512 * Test with integers that are more than 2^60 apart.
513 *
514 * The Simple-8b encoding used by the set implementation can only encode
515 * values up to 2^60. That makes large differences like this interesting
516 * to test.
517 */
518 static void
test_huge_distances(void)519 test_huge_distances(void)
520 {
521 IntegerSet *intset;
522 uint64 values[1000];
523 int num_values = 0;
524 uint64 val = 0;
525 bool found;
526 uint64 x;
527
528 elog(NOTICE, "testing intset with distances > 2^60 between values");
529
530 val = 0;
531 values[num_values++] = val;
532
533 /* Test differences on both sides of the 2^60 boundary. */
534 val += UINT64CONST(1152921504606846976) - 1; /* 2^60 - 1 */
535 values[num_values++] = val;
536
537 val += UINT64CONST(1152921504606846976) - 1; /* 2^60 - 1 */
538 values[num_values++] = val;
539
540 val += UINT64CONST(1152921504606846976); /* 2^60 */
541 values[num_values++] = val;
542
543 val += UINT64CONST(1152921504606846976); /* 2^60 */
544 values[num_values++] = val;
545
546 val += UINT64CONST(1152921504606846976); /* 2^60 */
547 values[num_values++] = val;
548
549 val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
550 values[num_values++] = val;
551
552 val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
553 values[num_values++] = val;
554
555 val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
556 values[num_values++] = val;
557
558 val += UINT64CONST(1152921504606846976) + 2; /* 2^60 + 2 */
559 values[num_values++] = val;
560
561 val += UINT64CONST(1152921504606846976) + 2; /* 2^60 + 2 */
562 values[num_values++] = val;
563
564 val += UINT64CONST(1152921504606846976); /* 2^60 */
565 values[num_values++] = val;
566
567 /*
568 * We're now very close to 2^64, so can't add large values anymore. But
569 * add more smaller values to the end, to make sure that all the above
570 * values get flushed and packed into the tree structure.
571 */
572 while (num_values < 1000)
573 {
574 val += pg_lrand48();
575 values[num_values++] = val;
576 }
577
578 /* Create an IntegerSet using these values */
579 intset = intset_create();
580 for (int i = 0; i < num_values; i++)
581 intset_add_member(intset, values[i]);
582
583 /*
584 * Test intset_is_member() around each of these values
585 */
586 for (int i = 0; i < num_values; i++)
587 {
588 uint64 x = values[i];
589 bool expected;
590 bool result;
591
592 if (x > 0)
593 {
594 expected = (values[i - 1] == x - 1);
595 result = intset_is_member(intset, x - 1);
596 if (result != expected)
597 elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x - 1);
598 }
599
600 result = intset_is_member(intset, x);
601 if (result != true)
602 elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x);
603
604 expected = (i != num_values - 1) ? (values[i + 1] == x + 1) : false;
605 result = intset_is_member(intset, x + 1);
606 if (result != expected)
607 elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x + 1);
608 }
609
610 /*
611 * Test iterator
612 */
613 intset_begin_iterate(intset);
614 for (int i = 0; i < num_values; i++)
615 {
616 found = intset_iterate_next(intset, &x);
617 if (!found || x != values[i])
618 elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
619 }
620 found = intset_iterate_next(intset, &x);
621 if (found)
622 elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
623 }
624