1 /*-
2  * Public Domain 2014-2018 MongoDB, Inc.
3  * Public Domain 2008-2014 WiredTiger, Inc.
4  *
5  * This is free and unencumbered software released into the public domain.
6  *
7  * Anyone is free to copy, modify, publish, use, compile, sell, or
8  * distribute this software, either in source code form or as a compiled
9  * binary, for any purpose, commercial or non-commercial, and by any
10  * means.
11  *
12  * In jurisdictions that recognize copyright laws, the author or authors
13  * of this software dedicate any and all copyright interest in the
14  * software to the public domain. We make this dedication for the benefit
15  * of the public at large and to the detriment of our heirs and
16  * successors. We intend this dedication to be an overt act of
17  * relinquishment in perpetuity of all present and future rights to this
18  * software under copyright law.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23  * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
24  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
25  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
26  * OTHER DEALINGS IN THE SOFTWARE.
27  */
28 #include "test_util.h"
29 
30 #include <sys/resource.h>
31 #include <sys/wait.h>
32 
33 /*
34  * JIRA ticket reference: WT-2909
35  * Test case description:
36  *
37  * This test attempts to check the integrity of checkpoints by injecting
38  * failures (by means of a custom file system) and then trying to recover. To
39  * insulate the top level program from various crashes that may occur when
40  * injecting failures, the "populate" code runs in another process, and is
41  * expected to sometimes fail. Then the top level program runs recovery (with
42  * the normal file system) and checks the results. Any failure at the top level
43  * indicates a checkpoint integrity problem.
44  *
45  * Each subtest uses the same kind of schema and data, the only variance is
46  * when the faults are injected. At the moment, this test only injects during
47  * checkpoints, and only injects write failures. It varies in the number of
48  * successful writes that occur before an injected failure (during a checkpoint
49  * operation), this can be indicated with "-o N".  When N is not specified, the
50  * test attempts to find the optimal range of N for testing. Clearly when N is
51  * large, then the checkpoint may be successfully written, and the data
52  * represented by the checkpoint will be fully present. When N is small,
53  * nothing of interest is written and no data is present. To find the sweet
54  * spot where interesting failures occur, the test does a binary search to find
55  * the approximate N that divides the "small" and "large" cases. This is not
56  * strictly deterministic, a given N may give different results on different
57  * runs. But approximate optimal N can be determined, allowing a series of
58  * additional tests clustered around this N.
59  *
60  * The data is stored in two tables, one having indices. Both tables have
61  * the same keys and are updated with the same key in a single transaction.
62  *
63  * Failure mode:
64  * If one table is out of step with the other, that is detected as a failure at
65  * the top level.  If an index is missing values (or has extra values), that is
66  * likewise a failure at the top level. If the tables or the home directory
67  * cannot be opened, that is a top level error. The tables must be present
68  * as an initial checkpoint is done without any injected fault.
69  */
70 
71 /*
72  * This program does not run on Windows.  The non-portable aspects at minimum
73  * are fork/exec the use of environment variables (used by fail_fs), and file
74  * name and build locations of dynamically loaded libraries.
75  */
76 #define	BIG_SIZE			(1024 * 10)
77 #define	BIG_CONTENTS			"<Big String Contents>"
78 #define	MAX_ARGS			20
79 #define	MAX_OP_RANGE			1000
80 #define	STDERR_FILE			"stderr.txt"
81 #define	STDOUT_FILE			"stdout.txt"
82 #define	TESTS_PER_OP_VALUE		3
83 #define	VERBOSE_PRINT			10000
84 
85 static int check_results(TEST_OPTS *, uint64_t *);
86 static void check_values(WT_CURSOR *, int, int, int, char *);
87 static int create_big_string(char **);
88 static void cursor_count_items(WT_CURSOR *, uint64_t *);
89 static void disable_failures(void);
90 static void enable_failures(uint64_t, uint64_t);
91 static void generate_key(uint64_t, int *);
92 static void generate_value(uint32_t, uint64_t, char *, int *, int *, int *,
93     char **);
94 static void run_check_subtest(TEST_OPTS *, const char *, uint64_t, bool,
95     uint64_t *);
96 static void run_check_subtest_range(TEST_OPTS *, const char *, bool);
97 static int run_process(TEST_OPTS *, const char *, char *[], int *);
98 static void subtest_main(int, char *[], bool);
99 static void subtest_populate(TEST_OPTS *, bool);
100 
101 extern int   __wt_optind;
102 
103 #define	WT_FAIL_FS_LIB	"../../ext/test/fail_fs/.libs/libwiredtiger_fail_fs.so"
104 
105 /*
106  * check_results --
107  *	Check all the tables and verify the results.
108  */
109 static int
check_results(TEST_OPTS * opts,uint64_t * foundp)110 check_results(TEST_OPTS *opts, uint64_t *foundp)
111 {
112 	WT_CURSOR *maincur, *maincur2, *v0cur, *v1cur, *v2cur;
113 	WT_SESSION *session;
114 	uint64_t count, idxcount, nrecords;
115 	uint32_t rndint;
116 	int key, key_got, ret, v0, v1, v2;
117 	char *big, *bigref;
118 
119 	testutil_check(create_big_string(&bigref));
120 	nrecords = opts->nrecords;
121 	testutil_check(wiredtiger_open(opts->home, NULL,
122 	    "create,log=(enabled)", &opts->conn));
123 	testutil_check(
124 	    opts->conn->open_session(opts->conn, NULL, NULL, &session));
125 
126 	testutil_check(session->open_cursor(session, "table:subtest", NULL,
127 	    NULL, &maincur));
128 	testutil_check(session->open_cursor(session, "table:subtest2", NULL,
129 	    NULL, &maincur2));
130 	testutil_check(session->open_cursor(session, "index:subtest:v0", NULL,
131 	    NULL, &v0cur));
132 	testutil_check(session->open_cursor(session, "index:subtest:v1", NULL,
133 	    NULL, &v1cur));
134 	testutil_check(session->open_cursor(session, "index:subtest:v2", NULL,
135 	    NULL, &v2cur));
136 
137 	count = 0;
138 	while ((ret = maincur->next(maincur)) == 0) {
139 		testutil_check(maincur2->next(maincur2));
140 		testutil_check(maincur2->get_key(maincur2, &key_got));
141 		testutil_check(maincur2->get_value(maincur2, &rndint));
142 
143 		generate_key(count, &key);
144 		generate_value(rndint, count, bigref, &v0, &v1, &v2, &big);
145 		testutil_assert(key == key_got);
146 
147 		/* Check the key/values in main table. */
148 		testutil_check(maincur->get_key(maincur, &key_got));
149 		testutil_assert(key == key_got);
150 		check_values(maincur, v0, v1, v2, big);
151 
152 		/* Check the values in the indices. */
153 		v0cur->set_key(v0cur, v0);
154 		testutil_check(v0cur->search(v0cur));
155 		check_values(v0cur, v0, v1, v2, big);
156 		v1cur->set_key(v1cur, v1);
157 		testutil_check(v1cur->search(v1cur));
158 		check_values(v1cur, v0, v1, v2, big);
159 		v2cur->set_key(v2cur, v2);
160 		testutil_check(v2cur->search(v2cur));
161 		check_values(v2cur, v0, v1, v2, big);
162 
163 		count++;
164 		if (count % VERBOSE_PRINT == 0 && opts->verbose)
165 			printf("checked %" PRIu64 "/%" PRIu64 "\n", count,
166 			    nrecords);
167 	}
168 	if (count % VERBOSE_PRINT != 0 && opts->verbose)
169 		printf("checked %" PRIu64 "/%" PRIu64 "\n", count, nrecords);
170 
171 	/*
172 	 * Always expect at least one entry, as populate does a
173 	 * checkpoint after the first insert.
174 	 */
175 	testutil_assert(count > 0);
176 	testutil_assert(ret == WT_NOTFOUND);
177 	testutil_assert(maincur2->next(maincur2) == WT_NOTFOUND);
178 	cursor_count_items(v0cur, &idxcount);
179 	testutil_assert(count == idxcount);
180 	cursor_count_items(v1cur, &idxcount);
181 	testutil_assert(count == idxcount);
182 	cursor_count_items(v2cur, &idxcount);
183 	testutil_assert(count == idxcount);
184 
185 	testutil_check(opts->conn->close(opts->conn, NULL));
186 	opts->conn = NULL;
187 
188 	free(bigref);
189 	*foundp = count;
190 	return (0);
191 }
192 
193 /*
194  * check_values --
195  *	Check that the values in the cursor match the given values.
196  */
197 static void
check_values(WT_CURSOR * cursor,int v0,int v1,int v2,char * big)198 check_values(WT_CURSOR *cursor, int v0, int v1, int v2, char *big)
199 {
200 	int v0_got, v1_got, v2_got;
201 	char *big_got;
202 
203 	testutil_check(cursor->get_value(cursor, &v0_got, &v1_got, &v2_got,
204 	    &big_got));
205 	testutil_assert(v0 == v0_got);
206 	testutil_assert(v1 == v1_got);
207 	testutil_assert(v2 == v2_got);
208 	testutil_assert(strcmp(big, big_got) == 0);
209 }
210 
211 /*
212  * create_big_string --
213  *	Create and fill the "reference" big array.
214  */
215 static int
create_big_string(char ** bigp)216 create_big_string(char **bigp)
217 {
218 	size_t i, mod;
219 	char *big;
220 
221 	if ((big = malloc(BIG_SIZE + 1)) == NULL)
222 		return (ENOMEM);
223 	mod = strlen(BIG_CONTENTS);
224 	for (i = 0; i < BIG_SIZE; i++) {
225 		big[i] = BIG_CONTENTS[i % mod];
226 	}
227 	big[BIG_SIZE] = '\0';
228 	*bigp = big;
229 	return (0);
230 }
231 
232 /*
233  * cursor_count_items --
234  *	Count the number of items in the table by traversing
235  *	through the cursor.
236  */
237 static void
cursor_count_items(WT_CURSOR * cursor,uint64_t * countp)238 cursor_count_items(WT_CURSOR *cursor, uint64_t *countp)
239 {
240 	int ret;
241 
242 	*countp = 0;
243 
244 	testutil_check(cursor->reset(cursor));
245 	while ((ret = cursor->next(cursor)) == 0)
246 		(*countp)++;
247 	testutil_assert(ret == WT_NOTFOUND);
248 }
249 
250 /*
251  * disable_failures --
252  *	Disable failures in the fail file system.
253  */
254 static void
disable_failures(void)255 disable_failures(void)
256 {
257 	testutil_check(setenv("WT_FAIL_FS_ENABLE", "0", 1));
258 }
259 
260 /*
261  * enable_failures --
262  *	Enable failures in the fail file system.
263  */
264 static void
enable_failures(uint64_t allow_writes,uint64_t allow_reads)265 enable_failures(uint64_t allow_writes, uint64_t allow_reads)
266 {
267 	char value[100];
268 
269 	testutil_check(setenv("WT_FAIL_FS_ENABLE", "1", 1));
270 	testutil_check(__wt_snprintf(
271 	    value, sizeof(value), "%" PRIu64, allow_writes));
272 	testutil_check(setenv("WT_FAIL_FS_WRITE_ALLOW", value, 1));
273 	testutil_check(__wt_snprintf(
274 	    value, sizeof(value), "%" PRIu64, allow_reads));
275 	testutil_check(setenv("WT_FAIL_FS_READ_ALLOW", value, 1));
276 }
277 
278 /*
279  * generate_key --
280  *	Generate a key used by the "subtest" and "subtest2" tables.
281  */
282 static void
generate_key(uint64_t i,int * keyp)283 generate_key(uint64_t i, int *keyp)
284 {
285 	*keyp = (int)i;
286 }
287 
288 /*
289  * generate_value --
290  *	Generate values for the "subtest" table.
291  */
292 static void
generate_value(uint32_t rndint,uint64_t i,char * bigref,int * v0p,int * v1p,int * v2p,char ** bigp)293 generate_value(uint32_t rndint, uint64_t i, char *bigref,
294     int *v0p, int *v1p, int *v2p, char **bigp)
295 {
296 	*v0p = (int)(i * 7);
297 	*v1p = (int)(i * 10007);
298 	*v2p = (int)(i * 100000007);
299 	*bigp = &bigref[rndint % BIG_SIZE];
300 }
301 
302 /*
303  * run_check_subtest --
304  *	Run the subtest with the given parameters and check the results.
305  */
306 static void
run_check_subtest(TEST_OPTS * opts,const char * debugger,uint64_t nops,bool close_test,uint64_t * nresultsp)307 run_check_subtest(TEST_OPTS *opts, const char *debugger, uint64_t nops,
308     bool close_test, uint64_t *nresultsp)
309 {
310 	int estatus, narg;
311 	char rarg[20], sarg[20], *subtest_args[MAX_ARGS];
312 
313 	narg = 0;
314 	if (debugger != NULL) {
315 		subtest_args[narg++] = (char *)debugger;
316 		subtest_args[narg++] = (char *)"--";
317 	}
318 
319 	subtest_args[narg++] = (char *)opts->progname;
320 	/* "subtest" must appear before arguments */
321 	if (close_test)
322 		subtest_args[narg++] = (char *)"subtest_close";
323 	else
324 		subtest_args[narg++] = (char *)"subtest";
325 	subtest_args[narg++] = (char *)"-h";
326 	subtest_args[narg++] = opts->home;
327 	subtest_args[narg++] = (char *)"-v";	/* subtest is always verbose */
328 	subtest_args[narg++] = (char *)"-p";
329 	subtest_args[narg++] = (char *)"-o";
330 	testutil_check(__wt_snprintf(sarg, sizeof(sarg), "%" PRIu64, nops));
331 	subtest_args[narg++] = sarg;		/* number of operations */
332 	subtest_args[narg++] = (char *)"-n";
333 	testutil_check(__wt_snprintf(
334 	    rarg, sizeof(rarg), "%" PRIu64, opts->nrecords));
335 	subtest_args[narg++] = rarg;		/* number of records */
336 	subtest_args[narg++] = NULL;
337 	testutil_assert(narg <= MAX_ARGS);
338 	if (opts->verbose)
339 		printf("running a separate process with %" PRIu64
340 		    " operations until fail...\n", nops);
341 	testutil_clean_work_dir(opts->home);
342 	testutil_check(run_process(
343 	    opts, debugger != NULL ? debugger : opts->progname,
344 	    subtest_args, &estatus));
345 	if (opts->verbose)
346 		printf("process exited %d\n", estatus);
347 
348 	/*
349 	 * Verify results in parent process.
350 	 */
351 	testutil_check(check_results(opts, nresultsp));
352 }
353 
354 /*
355  * run_check_subtest_range --
356  *
357  * Run successive tests via binary search that determines the approximate
358  * crossover point between when data is recoverable or not. Once that is
359  * determined, run the subtest in a range near that crossover point.
360  *
361  * The theory is that running at the crossover point will tend to trigger
362  * "interesting" failures at the borderline when the checkpoint is about to,
363  * or has, succeeded.  If any of those failures creates a WT home directory
364  * that cannot be recovered, the top level test will fail.
365  */
366 static void
run_check_subtest_range(TEST_OPTS * opts,const char * debugger,bool close_test)367 run_check_subtest_range(TEST_OPTS *opts, const char *debugger, bool close_test)
368 {
369 	uint64_t cutoff, high, low, mid, nops, nresults;
370 	int i;
371 	bool got_failure, got_success;
372 
373 	if (opts->verbose)
374 		printf("Determining best range of operations until failure, "
375 		    "with close_test %s.\n",
376 		    (close_test ? "enabled" : "disabled"));
377 
378 	run_check_subtest(opts, debugger, 1, close_test, &cutoff);
379 	low = 0;
380 	high = MAX_OP_RANGE;
381 	mid = (low + high) / 2;
382 	while (mid != low) {
383 		run_check_subtest(opts, debugger, mid, close_test,
384 		    &nresults);
385 		if (nresults > cutoff)
386 			high = mid;
387 		else
388 			low = mid;
389 		mid = (low + high) / 2;
390 	}
391 	/*
392 	 * mid is the number of ops that is the crossover point.
393 	 * Run some tests near that point to try to trigger weird
394 	 * failures.  If mid is too low or too high, it indicates
395 	 * there is a fundamental problem with the test.
396 	 */
397 	testutil_assert(mid > 1 && mid < MAX_OP_RANGE - 1);
398 	if (opts->verbose)
399 		printf("Retesting around %" PRIu64 " operations.\n",
400 		    mid);
401 
402 	got_failure = false;
403 	got_success = false;
404 	for (nops = mid - 10; nops < mid + 10; nops++) {
405 		for (i = 0; i < TESTS_PER_OP_VALUE; i++) {
406 			run_check_subtest(opts, debugger, nops,
407 			    close_test, &nresults);
408 			if (nresults > cutoff)
409 				got_failure = true;
410 			else
411 				got_success = true;
412 		}
413 	}
414 	/*
415 	 * Check that it really ran with a crossover point.
416 	 */
417 	testutil_assert(got_failure);
418 	testutil_assert(got_success);
419 }
420 
421 /*
422  * run_process --
423  *	Run a program with arguments, wait until it completes.
424  */
425 static int
run_process(TEST_OPTS * opts,const char * prog,char * argv[],int * status)426 run_process(TEST_OPTS *opts, const char *prog, char *argv[], int *status)
427 {
428 	int pid;
429 	char **arg;
430 
431 	if (opts->verbose) {
432 		printf("running: ");
433 		for (arg = argv; *arg != NULL; arg++)
434 			printf("%s ", *arg);
435 		printf("\n");
436 	}
437 	if ((pid = fork()) == 0) {
438 		(void)execv(prog, argv);
439 		testutil_die(errno, "%s", prog);
440 	} else if (pid < 0)
441 		return (errno);
442 
443 	(void)waitpid(pid, status, 0);
444 	return (0);
445 }
446 
447 /*
448  * subtest_error_handler --
449  *     Error event handler.
450  */
451 static int
subtest_error_handler(WT_EVENT_HANDLER * handler,WT_SESSION * session,int error,const char * message)452 subtest_error_handler(WT_EVENT_HANDLER *handler,
453     WT_SESSION *session, int error, const char *message)
454 {
455 	(void)(handler);
456 	(void)(session);
457 	(void)(message);
458 
459 	/* Exit on panic, there's no checking to be done. */
460 	if (error == WT_PANIC)
461 		exit (1);
462 	return (0);
463 }
464 
465 static WT_EVENT_HANDLER event_handler = {
466 	subtest_error_handler,
467 	NULL,   /* Message handler */
468 	NULL,   /* Progress handler */
469 	NULL    /* Close handler */
470 };
471 
472 /*
473  * subtest_main --
474  *	The main program for the subtest
475  */
476 static void
subtest_main(int argc,char * argv[],bool close_test)477 subtest_main(int argc, char *argv[], bool close_test)
478 {
479 	struct rlimit rlim;
480 	TEST_OPTS *opts, _opts;
481 	WT_SESSION *session;
482 	char config[1024], filename[1024];
483 
484 	opts = &_opts;
485 	memset(opts, 0, sizeof(*opts));
486 	memset(&rlim, 0, sizeof(rlim));
487 
488 	/* No core files during fault injection tests. */
489 	testutil_check(setrlimit(RLIMIT_CORE, &rlim));
490 	testutil_check(testutil_parse_opts(argc, argv, opts));
491 	testutil_make_work_dir(opts->home);
492 
493 	/* Redirect stderr, stdout. */
494 	testutil_check(__wt_snprintf(
495 	    filename, sizeof(filename), "%s/%s", opts->home, STDERR_FILE));
496 	testutil_assert(freopen(filename, "a", stderr) != NULL);
497 	testutil_check(__wt_snprintf(
498 	    filename, sizeof(filename), "%s/%s", opts->home, STDOUT_FILE));
499 	testutil_assert(freopen(filename, "a", stdout) != NULL);
500 	testutil_check(__wt_snprintf(config, sizeof(config),
501 	    "create,cache_size=250M,log=(enabled),"
502 	    "transaction_sync=(enabled,method=none),extensions=("
503 	    WT_FAIL_FS_LIB
504 	    "=(early_load,config={environment=true,verbose=true})]"));
505 
506 	testutil_check(
507 	    wiredtiger_open(opts->home, &event_handler, config, &opts->conn));
508 	testutil_check(
509 	    opts->conn->open_session(opts->conn, NULL, NULL, &session));
510 
511 	testutil_check(session->create(session, "table:subtest",
512 	    "key_format=i,value_format=iiiS,"
513 	    "columns=(id,v0,v1,v2,big)"));
514 
515 	testutil_check(session->create(session, "table:subtest2",
516 	    "key_format=i,value_format=i"));
517 
518 	testutil_check(session->create(session, "index:subtest:v0",
519 	    "columns=(v0)"));
520 	testutil_check(session->create(session, "index:subtest:v1",
521 	    "columns=(v1)"));
522 	testutil_check(session->create(session, "index:subtest:v2",
523 	    "columns=(v2)"));
524 
525 	testutil_check(session->close(session, NULL));
526 
527 	subtest_populate(opts, close_test);
528 
529 	testutil_cleanup(opts);
530 }
531 
532 /*
533  * This macro is used as a substitute for testutil_check, except that it is
534  * aware of when a failure may be expected due to the effects of the fail_fs.
535  * This macro is used only in subtest_populate(), it uses local variables.
536  */
537 #define	CHECK(expr, failmode) {						\
538 	int _ret;							\
539 	_ret = expr;							\
540 	if (_ret != 0) {						\
541 		if (!failmode ||					\
542 		    (_ret != WT_RUN_RECOVERY && _ret != EIO)) {		\
543 			fprintf(stderr, "  BAD RETURN %d for \"%s\"\n", \
544 			    _ret, #expr);				\
545 			testutil_check(_ret);				\
546 		} else							\
547 			failed = true;					\
548 	}								\
549 }
550 
551 /*
552  * subtest_populate --
553  *	Populate the tables.
554  */
555 static void
subtest_populate(TEST_OPTS * opts,bool close_test)556 subtest_populate(TEST_OPTS *opts, bool close_test)
557 {
558 	WT_CURSOR *maincur, *maincur2;
559 	WT_RAND_STATE rnd;
560 	WT_SESSION *session;
561 	uint64_t i, nrecords;
562 	uint32_t rndint;
563 	int key, v0, v1, v2;
564 	char *big, *bigref;
565 	bool failed;
566 
567 	failed = false;
568 	__wt_random_init_seed(NULL, &rnd);
569 	CHECK(create_big_string(&bigref), false);
570 	nrecords = opts->nrecords;
571 
572 	CHECK(opts->conn->open_session(
573 	    opts->conn, NULL, NULL, &session), false);
574 
575 	CHECK(session->open_cursor(session, "table:subtest", NULL,
576 	    NULL, &maincur), false);
577 
578 	CHECK(session->open_cursor(session, "table:subtest2", NULL,
579 	    NULL, &maincur2), false);
580 
581 	for (i = 0; i < nrecords && !failed; i++) {
582 		rndint = __wt_random(&rnd);
583 		generate_key(i, &key);
584 		generate_value(rndint, i, bigref, &v0, &v1, &v2, &big);
585 		CHECK(session->begin_transaction(session, NULL), false);
586 		maincur->set_key(maincur, key);
587 		maincur->set_value(maincur, v0, v1, v2, big);
588 		CHECK(maincur->insert(maincur), false);
589 
590 		maincur2->set_key(maincur2, key);
591 		maincur2->set_value(maincur2, rndint);
592 		CHECK(maincur2->insert(maincur2), false);
593 		CHECK(session->commit_transaction(session, NULL), false);
594 
595 		if (i == 0)
596 			/*
597 			 * Force an initial checkpoint, that helps to
598 			 * distinguish a clear failure from just not running
599 			 * long enough.
600 			 */
601 			CHECK(session->checkpoint(session, NULL), false);
602 
603 		if ((i + 1) % VERBOSE_PRINT == 0 && opts->verbose)
604 			printf("  %" PRIu64 "/%" PRIu64 "\n",
605 			    (i + 1), nrecords);
606 		/* Attempt to isolate the failures to checkpointing. */
607 		if (i == (nrecords/100)) {
608 			enable_failures(opts->nops, 1000000);
609 			/* CHECK should expect failures. */
610 			CHECK(session->checkpoint(session, NULL), true);
611 			disable_failures();
612 			if (failed && opts->verbose)
613 				printf("checkpoint failed (expected).\n");
614 		}
615 	}
616 
617 	/*
618 	 * Closing handles after an extreme fail is likely to cause
619 	 * cascading failures (or crashes), so recommended practice is
620 	 * to immediately exit. We're interested in testing both with
621 	 * and without the recommended practice.
622 	 */
623 	if (failed) {
624 		if (!close_test) {
625 			fprintf(stderr, "exit early.\n");
626 			exit(0);
627 		} else
628 			fprintf(stderr, "closing after failure.\n");
629 	}
630 
631 	free(bigref);
632 	CHECK(maincur->close(maincur), false);
633 	CHECK(maincur2->close(maincur2), false);
634 	CHECK(session->close(session, NULL), false);
635 }
636 
637 /*
638  * main --
639  *	The main program for the test. When invoked with "subtest"
640  *	argument, run the subtest. Otherwise, run a separate process
641  *	for each needed subtest, and check the results.
642  */
643 int
main(int argc,char * argv[])644 main(int argc, char *argv[])
645 {
646 	TEST_OPTS *opts, _opts;
647 	uint64_t nresults;
648 	const char *debugger;
649 
650 	/* Ignore unless requested */
651 	if (!testutil_is_flag_set("TESTUTIL_ENABLE_LONG_TESTS"))
652 		return (EXIT_SUCCESS);
653 
654 	opts = &_opts;
655 	memset(opts, 0, sizeof(*opts));
656 	debugger = NULL;
657 
658 	testutil_check(testutil_parse_opts(argc, argv, opts));
659 	argc -= __wt_optind;
660 	argv += __wt_optind;
661 	if (opts->nrecords == 0)
662 		opts->nrecords = 50000;
663 
664 	while (argc > 0) {
665 		if (strcmp(argv[0], "subtest") == 0) {
666 			subtest_main(argc, argv, false);
667 			return (0);
668 		} else if (strcmp(argv[0], "subtest_close") == 0) {
669 			subtest_main(argc, argv, true);
670 			return (0);
671 		} else if (strcmp(argv[0], "gdb") == 0)
672 			debugger = "/usr/bin/gdb";
673 		else
674 			testutil_assert(false);
675 		argc--;
676 		argv++;
677 	}
678 	if (opts->verbose) {
679 		printf("Number of operations until failure: %" PRIu64
680 		    "  (change with -o N)\n", opts->nops);
681 		printf("Number of records: %" PRIu64
682 		    "  (change with -n N)\n", opts->nrecords);
683 	}
684 	if (opts->nops == 0) {
685 		run_check_subtest_range(opts, debugger, false);
686 		run_check_subtest_range(opts, debugger, true);
687 	} else
688 		run_check_subtest(opts, debugger, opts->nops,
689 		    opts->nrecords, &nresults);
690 
691 	testutil_clean_work_dir(opts->home);
692 	testutil_cleanup(opts);
693 
694 	return (0);
695 }
696