1 /*--------------------------------------------------------------------------
2  *
3  * test_integerset.c
4  *		Test integer set data structure.
5  *
6  * Copyright (c) 2019, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *		src/test/modules/test_integerset/test_integerset.c
10  *
11  * -------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "fmgr.h"
16 #include "lib/integerset.h"
17 #include "nodes/bitmapset.h"
18 #include "utils/memutils.h"
19 #include "utils/timestamp.h"
20 #include "storage/block.h"
21 #include "storage/itemptr.h"
22 #include "miscadmin.h"
23 
24 /*
25  * If you enable this, the "pattern" tests will print information about
26  * how long populating, probing, and iterating the test set takes, and
27  * how much memory the test set consumed.  That can be used as
28  * micro-benchmark of various operations and input patterns (you might
29  * want to increase the number of values used in each of the test, if
30  * you do that, to reduce noise).
31  *
32  * The information is printed to the server's stderr, mostly because
33  * that's where MemoryContextStats() output goes.
34  */
35 static const bool intset_test_stats = false;
36 
37 PG_MODULE_MAGIC;
38 
39 PG_FUNCTION_INFO_V1(test_integerset);
40 
41 /*
42  * A struct to define a pattern of integers, for use with the test_pattern()
43  * function.
44  */
45 typedef struct
46 {
47 	char	   *test_name;		/* short name of the test, for humans */
48 	char	   *pattern_str;	/* a bit pattern */
49 	uint64		spacing;		/* pattern repeats at this interval */
50 	uint64		num_values;		/* number of integers to set in total */
51 } test_spec;
52 
53 static const test_spec test_specs[] = {
54 	{
55 		"all ones", "1111111111",
56 		10, 10000000
57 	},
58 	{
59 		"alternating bits", "0101010101",
60 		10, 10000000
61 	},
62 	{
63 		"clusters of ten", "1111111111",
64 		10000, 10000000
65 	},
66 	{
67 		"clusters of hundred",
68 		"1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111",
69 		10000, 100000000
70 	},
71 	{
72 		"one-every-64k", "1",
73 		65536, 10000000
74 	},
75 	{
76 		"sparse", "100000000000000000000000000000001",
77 		10000000, 10000000
78 	},
79 	{
80 		"single values, distance > 2^32", "1",
81 		UINT64CONST(10000000000), 1000000
82 	},
83 	{
84 		"clusters, distance > 2^32", "10101010",
85 		UINT64CONST(10000000000), 10000000
86 	},
87 	{
88 		"clusters, distance > 2^60", "10101010",
89 		UINT64CONST(2000000000000000000),
90 		23						/* can't be much higher than this, or we
91 								 * overflow uint64 */
92 	}
93 };
94 
95 static void test_pattern(const test_spec *spec);
96 static void test_empty(void);
97 static void test_single_value(uint64 value);
98 static void check_with_filler(IntegerSet *intset, uint64 x, uint64 value, uint64 filler_min, uint64 filler_max);
99 static void test_single_value_and_filler(uint64 value, uint64 filler_min, uint64 filler_max);
100 static void test_huge_distances(void);
101 
102 /*
103  * SQL-callable entry point to perform all tests.
104  */
105 Datum
test_integerset(PG_FUNCTION_ARGS)106 test_integerset(PG_FUNCTION_ARGS)
107 {
108 	/* Tests for various corner cases */
109 	test_empty();
110 	test_huge_distances();
111 	test_single_value(0);
112 	test_single_value(1);
113 	test_single_value(PG_UINT64_MAX - 1);
114 	test_single_value(PG_UINT64_MAX);
115 	test_single_value_and_filler(0, 1000, 2000);
116 	test_single_value_and_filler(1, 1000, 2000);
117 	test_single_value_and_filler(1, 1000, 2000000);
118 	test_single_value_and_filler(PG_UINT64_MAX - 1, 1000, 2000);
119 	test_single_value_and_filler(PG_UINT64_MAX, 1000, 2000);
120 
121 	/* Test different test patterns, with lots of entries */
122 	for (int i = 0; i < lengthof(test_specs); i++)
123 	{
124 		test_pattern(&test_specs[i]);
125 	}
126 
127 	PG_RETURN_VOID();
128 }
129 
130 /*
131  * Test with a repeating pattern, defined by the 'spec'.
132  */
133 static void
test_pattern(const test_spec * spec)134 test_pattern(const test_spec *spec)
135 {
136 	IntegerSet *intset;
137 	MemoryContext intset_ctx;
138 	MemoryContext old_ctx;
139 	TimestampTz starttime;
140 	TimestampTz endtime;
141 	uint64		n;
142 	uint64		last_int;
143 	int			patternlen;
144 	uint64	   *pattern_values;
145 	uint64		pattern_num_values;
146 
147 	elog(NOTICE, "testing intset with pattern \"%s\"", spec->test_name);
148 	if (intset_test_stats)
149 		fprintf(stderr, "-----\ntesting intset with pattern \"%s\"\n", spec->test_name);
150 
151 	/* Pre-process the pattern, creating an array of integers from it. */
152 	patternlen = strlen(spec->pattern_str);
153 	pattern_values = palloc(patternlen * sizeof(uint64));
154 	pattern_num_values = 0;
155 	for (int i = 0; i < patternlen; i++)
156 	{
157 		if (spec->pattern_str[i] == '1')
158 			pattern_values[pattern_num_values++] = i;
159 	}
160 
161 	/*
162 	 * Allocate the integer set.
163 	 *
164 	 * Allocate it in a separate memory context, so that we can print its
165 	 * memory usage easily.  (intset_create() creates a memory context of its
166 	 * own, too, but we don't have direct access to it, so we cannot call
167 	 * MemoryContextStats() on it directly).
168 	 */
169 	intset_ctx = AllocSetContextCreate(CurrentMemoryContext,
170 									   "intset test",
171 									   ALLOCSET_SMALL_SIZES);
172 	MemoryContextSetIdentifier(intset_ctx, spec->test_name);
173 	old_ctx = MemoryContextSwitchTo(intset_ctx);
174 	intset = intset_create();
175 	MemoryContextSwitchTo(old_ctx);
176 
177 	/*
178 	 * Add values to the set.
179 	 */
180 	starttime = GetCurrentTimestamp();
181 
182 	n = 0;
183 	last_int = 0;
184 	while (n < spec->num_values)
185 	{
186 		uint64		x = 0;
187 
188 		for (int i = 0; i < pattern_num_values && n < spec->num_values; i++)
189 		{
190 			x = last_int + pattern_values[i];
191 
192 			intset_add_member(intset, x);
193 			n++;
194 		}
195 		last_int += spec->spacing;
196 	}
197 
198 	endtime = GetCurrentTimestamp();
199 
200 	if (intset_test_stats)
201 		fprintf(stderr, "added " UINT64_FORMAT " values in %d ms\n",
202 				spec->num_values, (int) (endtime - starttime) / 1000);
203 
204 	/*
205 	 * Print stats on the amount of memory used.
206 	 *
207 	 * We print the usage reported by intset_memory_usage(), as well as the
208 	 * stats from the memory context.  They should be in the same ballpark,
209 	 * but it's hard to automate testing that, so if you're making changes to
210 	 * the implementation, just observe that manually.
211 	 */
212 	if (intset_test_stats)
213 	{
214 		uint64		mem_usage;
215 
216 		/*
217 		 * Also print memory usage as reported by intset_memory_usage().  It
218 		 * should be in the same ballpark as the usage reported by
219 		 * MemoryContextStats().
220 		 */
221 		mem_usage = intset_memory_usage(intset);
222 		fprintf(stderr, "intset_memory_usage() reported " UINT64_FORMAT " (%0.2f bytes / integer)\n",
223 				mem_usage, (double) mem_usage / spec->num_values);
224 
225 		MemoryContextStats(intset_ctx);
226 	}
227 
228 	/* Check that intset_get_num_entries works */
229 	n = intset_num_entries(intset);
230 	if (n != spec->num_values)
231 		elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected " UINT64_FORMAT, n, spec->num_values);
232 
233 	/*
234 	 * Test random-access probes with intset_is_member()
235 	 */
236 	starttime = GetCurrentTimestamp();
237 
238 	for (n = 0; n < 100000; n++)
239 	{
240 		bool		b;
241 		bool		expected;
242 		uint64		x;
243 
244 		/*
245 		 * Pick next value to probe at random.  We limit the probes to the
246 		 * last integer that we added to the set, plus an arbitrary constant
247 		 * (1000).  There's no point in probing the whole 0 - 2^64 range, if
248 		 * only a small part of the integer space is used.  We would very
249 		 * rarely hit values that are actually in the set.
250 		 */
251 		x = (pg_lrand48() << 31) | pg_lrand48();
252 		x = x % (last_int + 1000);
253 
254 		/* Do we expect this value to be present in the set? */
255 		if (x >= last_int)
256 			expected = false;
257 		else
258 		{
259 			uint64		idx = x % spec->spacing;
260 
261 			if (idx >= patternlen)
262 				expected = false;
263 			else if (spec->pattern_str[idx] == '1')
264 				expected = true;
265 			else
266 				expected = false;
267 		}
268 
269 		/* Is it present according to intset_is_member() ? */
270 		b = intset_is_member(intset, x);
271 
272 		if (b != expected)
273 			elog(ERROR, "mismatch at " UINT64_FORMAT ": %d vs %d", x, b, expected);
274 	}
275 	endtime = GetCurrentTimestamp();
276 	if (intset_test_stats)
277 		fprintf(stderr, "probed " UINT64_FORMAT " values in %d ms\n",
278 				n, (int) (endtime - starttime) / 1000);
279 
280 	/*
281 	 * Test iterator
282 	 */
283 	starttime = GetCurrentTimestamp();
284 
285 	intset_begin_iterate(intset);
286 	n = 0;
287 	last_int = 0;
288 	while (n < spec->num_values)
289 	{
290 		for (int i = 0; i < pattern_num_values && n < spec->num_values; i++)
291 		{
292 			uint64		expected = last_int + pattern_values[i];
293 			uint64		x;
294 
295 			if (!intset_iterate_next(intset, &x))
296 				break;
297 
298 			if (x != expected)
299 				elog(ERROR, "iterate returned wrong value; got " UINT64_FORMAT ", expected " UINT64_FORMAT, x, expected);
300 			n++;
301 		}
302 		last_int += spec->spacing;
303 	}
304 	endtime = GetCurrentTimestamp();
305 	if (intset_test_stats)
306 		fprintf(stderr, "iterated " UINT64_FORMAT " values in %d ms\n",
307 				n, (int) (endtime - starttime) / 1000);
308 
309 	if (n < spec->num_values)
310 		elog(ERROR, "iterator stopped short after " UINT64_FORMAT " entries, expected " UINT64_FORMAT, n, spec->num_values);
311 	if (n > spec->num_values)
312 		elog(ERROR, "iterator returned " UINT64_FORMAT " entries, " UINT64_FORMAT " was expected", n, spec->num_values);
313 
314 	MemoryContextDelete(intset_ctx);
315 }
316 
317 /*
318  * Test with a set containing a single integer.
319  */
320 static void
test_single_value(uint64 value)321 test_single_value(uint64 value)
322 {
323 	IntegerSet *intset;
324 	uint64		x;
325 	uint64		num_entries;
326 	bool		found;
327 
328 	elog(NOTICE, "testing intset with single value " UINT64_FORMAT, value);
329 
330 	/* Create the set. */
331 	intset = intset_create();
332 	intset_add_member(intset, value);
333 
334 	/* Test intset_get_num_entries() */
335 	num_entries = intset_num_entries(intset);
336 	if (num_entries != 1)
337 		elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected 1", num_entries);
338 
339 	/*
340 	 * Test intset_is_member() at various special values, like 0 and maximum
341 	 * possible 64-bit integer, as well as the value itself.
342 	 */
343 	if (intset_is_member(intset, 0) != (value == 0))
344 		elog(ERROR, "intset_is_member failed for 0");
345 	if (intset_is_member(intset, 1) != (value == 1))
346 		elog(ERROR, "intset_is_member failed for 1");
347 	if (intset_is_member(intset, PG_UINT64_MAX) != (value == PG_UINT64_MAX))
348 		elog(ERROR, "intset_is_member failed for PG_UINT64_MAX");
349 	if (intset_is_member(intset, value) != true)
350 		elog(ERROR, "intset_is_member failed for the tested value");
351 
352 	/*
353 	 * Test iterator
354 	 */
355 	intset_begin_iterate(intset);
356 	found = intset_iterate_next(intset, &x);
357 	if (!found || x != value)
358 		elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
359 
360 	found = intset_iterate_next(intset, &x);
361 	if (found)
362 		elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
363 }
364 
365 /*
366  * Test with an integer set that contains:
367  *
368  * - a given single 'value', and
369  * - all integers between 'filler_min' and 'filler_max'.
370  *
371  * This exercises different codepaths than testing just with a single value,
372  * because the implementation buffers newly-added values.  If we add just a
373  * single value to the set, we won't test the internal B-tree code at all,
374  * just the code that deals with the buffer.
375  */
376 static void
test_single_value_and_filler(uint64 value,uint64 filler_min,uint64 filler_max)377 test_single_value_and_filler(uint64 value, uint64 filler_min, uint64 filler_max)
378 {
379 	IntegerSet *intset;
380 	uint64		x;
381 	bool		found;
382 	uint64	   *iter_expected;
383 	uint64		n = 0;
384 	uint64		num_entries = 0;
385 	uint64		mem_usage;
386 
387 	elog(NOTICE, "testing intset with value " UINT64_FORMAT ", and all between " UINT64_FORMAT " and " UINT64_FORMAT,
388 		 value, filler_min, filler_max);
389 
390 	intset = intset_create();
391 
392 	iter_expected = palloc(sizeof(uint64) * (filler_max - filler_min + 1));
393 	if (value < filler_min)
394 	{
395 		intset_add_member(intset, value);
396 		iter_expected[n++] = value;
397 	}
398 
399 	for (x = filler_min; x < filler_max; x++)
400 	{
401 		intset_add_member(intset, x);
402 		iter_expected[n++] = x;
403 	}
404 
405 	if (value >= filler_max)
406 	{
407 		intset_add_member(intset, value);
408 		iter_expected[n++] = value;
409 	}
410 
411 	/* Test intset_get_num_entries() */
412 	num_entries = intset_num_entries(intset);
413 	if (num_entries != n)
414 		elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected " UINT64_FORMAT, num_entries, n);
415 
416 	/*
417 	 * Test intset_is_member() at various spots, at and around the values that
418 	 * we expect to be set, as well as 0 and the maximum possible value.
419 	 */
420 	check_with_filler(intset, 0,
421 					  value, filler_min, filler_max);
422 	check_with_filler(intset, 1,
423 					  value, filler_min, filler_max);
424 	check_with_filler(intset, filler_min - 1,
425 					  value, filler_min, filler_max);
426 	check_with_filler(intset, filler_min,
427 					  value, filler_min, filler_max);
428 	check_with_filler(intset, filler_min + 1,
429 					  value, filler_min, filler_max);
430 	check_with_filler(intset, value - 1,
431 					  value, filler_min, filler_max);
432 	check_with_filler(intset, value,
433 					  value, filler_min, filler_max);
434 	check_with_filler(intset, value + 1,
435 					  value, filler_min, filler_max);
436 	check_with_filler(intset, filler_max - 1,
437 					  value, filler_min, filler_max);
438 	check_with_filler(intset, filler_max,
439 					  value, filler_min, filler_max);
440 	check_with_filler(intset, filler_max + 1,
441 					  value, filler_min, filler_max);
442 	check_with_filler(intset, PG_UINT64_MAX - 1,
443 					  value, filler_min, filler_max);
444 	check_with_filler(intset, PG_UINT64_MAX,
445 					  value, filler_min, filler_max);
446 
447 	intset_begin_iterate(intset);
448 	for (uint64 i = 0; i < n; i++)
449 	{
450 		found = intset_iterate_next(intset, &x);
451 		if (!found || x != iter_expected[i])
452 			elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
453 	}
454 	found = intset_iterate_next(intset, &x);
455 	if (found)
456 		elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
457 
458 	mem_usage = intset_memory_usage(intset);
459 	if (mem_usage < 5000 || mem_usage > 500000000)
460 		elog(ERROR, "intset_memory_usage() reported suspicious value: " UINT64_FORMAT, mem_usage);
461 }
462 
463 /*
464  * Helper function for test_single_value_and_filler.
465  *
466  * Calls intset_is_member() for value 'x', and checks that the result is what
467  * we expect.
468  */
469 static void
check_with_filler(IntegerSet * intset,uint64 x,uint64 value,uint64 filler_min,uint64 filler_max)470 check_with_filler(IntegerSet *intset, uint64 x,
471 				  uint64 value, uint64 filler_min, uint64 filler_max)
472 {
473 	bool		expected;
474 	bool		actual;
475 
476 	expected = (x == value || (filler_min <= x && x < filler_max));
477 
478 	actual = intset_is_member(intset, x);
479 
480 	if (actual != expected)
481 		elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x);
482 }
483 
484 /*
485  * Test empty set
486  */
487 static void
test_empty(void)488 test_empty(void)
489 {
490 	IntegerSet *intset;
491 	uint64		x;
492 
493 	elog(NOTICE, "testing intset with empty set");
494 
495 	intset = intset_create();
496 
497 	/* Test intset_is_member() */
498 	if (intset_is_member(intset, 0) != false)
499 		elog(ERROR, "intset_is_member on empty set returned true");
500 	if (intset_is_member(intset, 1) != false)
501 		elog(ERROR, "intset_is_member on empty set returned true");
502 	if (intset_is_member(intset, PG_UINT64_MAX) != false)
503 		elog(ERROR, "intset_is_member on empty set returned true");
504 
505 	/* Test iterator */
506 	intset_begin_iterate(intset);
507 	if (intset_iterate_next(intset, &x))
508 		elog(ERROR, "intset_iterate_next on empty set returned a value (" UINT64_FORMAT ")", x);
509 }
510 
511 /*
512  * Test with integers that are more than 2^60 apart.
513  *
514  * The Simple-8b encoding used by the set implementation can only encode
515  * values up to 2^60.  That makes large differences like this interesting
516  * to test.
517  */
518 static void
test_huge_distances(void)519 test_huge_distances(void)
520 {
521 	IntegerSet *intset;
522 	uint64		values[1000];
523 	int			num_values = 0;
524 	uint64		val = 0;
525 	bool		found;
526 	uint64		x;
527 
528 	elog(NOTICE, "testing intset with distances > 2^60 between values");
529 
530 	val = 0;
531 	values[num_values++] = val;
532 
533 	/* Test differences on both sides of the 2^60 boundary. */
534 	val += UINT64CONST(1152921504606846976) - 1;	/* 2^60 - 1 */
535 	values[num_values++] = val;
536 
537 	val += UINT64CONST(1152921504606846976) - 1;	/* 2^60 - 1 */
538 	values[num_values++] = val;
539 
540 	val += UINT64CONST(1152921504606846976);	/* 2^60 */
541 	values[num_values++] = val;
542 
543 	val += UINT64CONST(1152921504606846976);	/* 2^60 */
544 	values[num_values++] = val;
545 
546 	val += UINT64CONST(1152921504606846976);	/* 2^60 */
547 	values[num_values++] = val;
548 
549 	val += UINT64CONST(1152921504606846976) + 1;	/* 2^60 + 1 */
550 	values[num_values++] = val;
551 
552 	val += UINT64CONST(1152921504606846976) + 1;	/* 2^60 + 1 */
553 	values[num_values++] = val;
554 
555 	val += UINT64CONST(1152921504606846976) + 1;	/* 2^60 + 1 */
556 	values[num_values++] = val;
557 
558 	val += UINT64CONST(1152921504606846976) + 2;	/* 2^60 + 2 */
559 	values[num_values++] = val;
560 
561 	val += UINT64CONST(1152921504606846976) + 2;	/* 2^60 + 2 */
562 	values[num_values++] = val;
563 
564 	val += UINT64CONST(1152921504606846976);	/* 2^60 */
565 	values[num_values++] = val;
566 
567 	/*
568 	 * We're now very close to 2^64, so can't add large values anymore.  But
569 	 * add more smaller values to the end, to make sure that all the above
570 	 * values get flushed and packed into the tree structure.
571 	 */
572 	while (num_values < 1000)
573 	{
574 		val += pg_lrand48();
575 		values[num_values++] = val;
576 	}
577 
578 	/* Create an IntegerSet using these values */
579 	intset = intset_create();
580 	for (int i = 0; i < num_values; i++)
581 		intset_add_member(intset, values[i]);
582 
583 	/*
584 	 * Test intset_is_member() around each of these values
585 	 */
586 	for (int i = 0; i < num_values; i++)
587 	{
588 		uint64		x = values[i];
589 		bool		expected;
590 		bool		result;
591 
592 		if (x > 0)
593 		{
594 			expected = (values[i - 1] == x - 1);
595 			result = intset_is_member(intset, x - 1);
596 			if (result != expected)
597 				elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x - 1);
598 		}
599 
600 		result = intset_is_member(intset, x);
601 		if (result != true)
602 			elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x);
603 
604 		expected = (i != num_values - 1) ? (values[i + 1] == x + 1) : false;
605 		result = intset_is_member(intset, x + 1);
606 		if (result != expected)
607 			elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x + 1);
608 	}
609 
610 	/*
611 	 * Test iterator
612 	 */
613 	intset_begin_iterate(intset);
614 	for (int i = 0; i < num_values; i++)
615 	{
616 		found = intset_iterate_next(intset, &x);
617 		if (!found || x != values[i])
618 			elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
619 	}
620 	found = intset_iterate_next(intset, &x);
621 	if (found)
622 		elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
623 }
624