1 /*
2  *	pg_test_fsync.c
3  *		tests all supported fsync() methods
4  */
5 
6 #include "postgres_fe.h"
7 
8 #include <sys/stat.h>
9 #include <sys/time.h>
10 #include <fcntl.h>
11 #include <time.h>
12 #include <unistd.h>
13 #include <signal.h>
14 
15 #include "getopt_long.h"
16 #include "access/xlogdefs.h"
17 
18 
19 /*
20  * put the temp files in the local directory
21  * unless the user specifies otherwise
22  */
23 #define FSYNC_FILENAME	"./pg_test_fsync.out"
24 
25 #define XLOG_BLCKSZ_K	(XLOG_BLCKSZ / 1024)
26 
27 #define LABEL_FORMAT		"        %-30s"
28 #define NA_FORMAT			"%21s\n"
29 /* translator: maintain alignment with NA_FORMAT */
30 #define OPS_FORMAT			gettext_noop("%13.3f ops/sec  %6.0f usecs/op\n")
31 #define USECS_SEC			1000000
32 
33 /* These are macros to avoid timing the function call overhead. */
34 #ifndef WIN32
35 #define START_TIMER \
36 do { \
37 	alarm_triggered = false; \
38 	alarm(secs_per_test); \
39 	gettimeofday(&start_t, NULL); \
40 } while (0)
41 #else
42 /* WIN32 doesn't support alarm, so we create a thread and sleep there */
43 #define START_TIMER \
44 do { \
45 	alarm_triggered = false; \
46 	if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \
47 		INVALID_HANDLE_VALUE) \
48 	{ \
49 		fprintf(stderr, _("Could not create thread for alarm\n")); \
50 		exit(1); \
51 	} \
52 	gettimeofday(&start_t, NULL); \
53 } while (0)
54 #endif
55 
56 #define STOP_TIMER	\
57 do { \
58 	gettimeofday(&stop_t, NULL); \
59 	print_elapse(start_t, stop_t, ops); \
60 } while (0)
61 
62 
63 static const char *progname;
64 
65 static int	secs_per_test = 5;
66 static int	needs_unlink = 0;
67 static char full_buf[DEFAULT_XLOG_SEG_SIZE],
68 		   *buf,
69 		   *filename = FSYNC_FILENAME;
70 static struct timeval start_t,
71 			stop_t;
72 static bool alarm_triggered = false;
73 
74 
75 static void handle_args(int argc, char *argv[]);
76 static void prepare_buf(void);
77 static void test_open(void);
78 static void test_non_sync(void);
79 static void test_sync(int writes_per_op);
80 static void test_open_syncs(void);
81 static void test_open_sync(const char *msg, int writes_size);
82 static void test_file_descriptor_sync(void);
83 
84 #ifndef WIN32
85 static void process_alarm(int sig);
86 #else
87 static DWORD WINAPI process_alarm(LPVOID param);
88 #endif
89 static void signal_cleanup(int sig);
90 
91 #ifdef HAVE_FSYNC_WRITETHROUGH
92 static int	pg_fsync_writethrough(int fd);
93 #endif
94 static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops);
95 static void die(const char *str);
96 
97 
98 int
main(int argc,char * argv[])99 main(int argc, char *argv[])
100 {
101 	set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_fsync"));
102 	progname = get_progname(argv[0]);
103 
104 	handle_args(argc, argv);
105 
106 	/* Prevent leaving behind the test file */
107 	pqsignal(SIGINT, signal_cleanup);
108 	pqsignal(SIGTERM, signal_cleanup);
109 #ifndef WIN32
110 	pqsignal(SIGALRM, process_alarm);
111 #endif
112 #ifdef SIGHUP
113 	/* Not defined on win32 */
114 	pqsignal(SIGHUP, signal_cleanup);
115 #endif
116 
117 	prepare_buf();
118 
119 	test_open();
120 
121 	/* Test using 1 XLOG_BLCKSZ write */
122 	test_sync(1);
123 
124 	/* Test using 2 XLOG_BLCKSZ writes */
125 	test_sync(2);
126 
127 	test_open_syncs();
128 
129 	test_file_descriptor_sync();
130 
131 	test_non_sync();
132 
133 	unlink(filename);
134 
135 	return 0;
136 }
137 
138 static void
handle_args(int argc,char * argv[])139 handle_args(int argc, char *argv[])
140 {
141 	static struct option long_options[] = {
142 		{"filename", required_argument, NULL, 'f'},
143 		{"secs-per-test", required_argument, NULL, 's'},
144 		{NULL, 0, NULL, 0}
145 	};
146 
147 	int			option;			/* Command line option */
148 	int			optindex = 0;	/* used by getopt_long */
149 
150 	if (argc > 1)
151 	{
152 		if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
153 		{
154 			printf(_("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n"), progname);
155 			exit(0);
156 		}
157 		if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
158 		{
159 			puts("pg_test_fsync (PostgreSQL) " PG_VERSION);
160 			exit(0);
161 		}
162 	}
163 
164 	while ((option = getopt_long(argc, argv, "f:s:",
165 								 long_options, &optindex)) != -1)
166 	{
167 		switch (option)
168 		{
169 			case 'f':
170 				filename = strdup(optarg);
171 				break;
172 
173 			case 's':
174 				secs_per_test = atoi(optarg);
175 				break;
176 
177 			default:
178 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
179 						progname);
180 				exit(1);
181 				break;
182 		}
183 	}
184 
185 	if (argc > optind)
186 	{
187 		fprintf(stderr,
188 				_("%s: too many command-line arguments (first is \"%s\")\n"),
189 				progname, argv[optind]);
190 		fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
191 				progname);
192 		exit(1);
193 	}
194 
195 	printf(ngettext("%d second per test\n",
196 					"%d seconds per test\n",
197 					secs_per_test),
198 		   secs_per_test);
199 #if PG_O_DIRECT != 0
200 	printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n"));
201 #else
202 	printf(_("Direct I/O is not supported on this platform.\n"));
203 #endif
204 }
205 
206 static void
prepare_buf(void)207 prepare_buf(void)
208 {
209 	int			ops;
210 
211 	/* write random data into buffer */
212 	for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++)
213 		full_buf[ops] = random();
214 
215 	buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf);
216 }
217 
218 static void
test_open(void)219 test_open(void)
220 {
221 	int			tmpfile;
222 
223 	/*
224 	 * test if we can open the target file
225 	 */
226 	if ((tmpfile = open(filename, O_RDWR | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR)) == -1)
227 		die("could not open output file");
228 	needs_unlink = 1;
229 	if (write(tmpfile, full_buf, DEFAULT_XLOG_SEG_SIZE) !=
230 		DEFAULT_XLOG_SEG_SIZE)
231 		die("write failed");
232 
233 	/* fsync now so that dirty buffers don't skew later tests */
234 	if (fsync(tmpfile) != 0)
235 		die("fsync failed");
236 
237 	close(tmpfile);
238 }
239 
240 static void
test_sync(int writes_per_op)241 test_sync(int writes_per_op)
242 {
243 	int			tmpfile,
244 				ops,
245 				writes;
246 	bool		fs_warning = false;
247 
248 	if (writes_per_op == 1)
249 		printf(_("\nCompare file sync methods using one %dkB write:\n"), XLOG_BLCKSZ_K);
250 	else
251 		printf(_("\nCompare file sync methods using two %dkB writes:\n"), XLOG_BLCKSZ_K);
252 	printf(_("(in wal_sync_method preference order, except fdatasync is Linux's default)\n"));
253 
254 	/*
255 	 * Test open_datasync if available
256 	 */
257 	printf(LABEL_FORMAT, "open_datasync");
258 	fflush(stdout);
259 
260 #ifdef OPEN_DATASYNC_FLAG
261 	if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT | PG_BINARY, 0)) == -1)
262 	{
263 		printf(NA_FORMAT, _("n/a*"));
264 		fs_warning = true;
265 	}
266 	else
267 	{
268 		START_TIMER;
269 		for (ops = 0; alarm_triggered == false; ops++)
270 		{
271 			for (writes = 0; writes < writes_per_op; writes++)
272 				if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
273 					die("write failed");
274 			if (lseek(tmpfile, 0, SEEK_SET) == -1)
275 				die("seek failed");
276 		}
277 		STOP_TIMER;
278 		close(tmpfile);
279 	}
280 #else
281 	printf(NA_FORMAT, _("n/a"));
282 #endif
283 
284 /*
285  * Test fdatasync if available
286  */
287 	printf(LABEL_FORMAT, "fdatasync");
288 	fflush(stdout);
289 
290 #ifdef HAVE_FDATASYNC
291 	if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
292 		die("could not open output file");
293 	START_TIMER;
294 	for (ops = 0; alarm_triggered == false; ops++)
295 	{
296 		for (writes = 0; writes < writes_per_op; writes++)
297 			if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
298 				die("write failed");
299 		fdatasync(tmpfile);
300 		if (lseek(tmpfile, 0, SEEK_SET) == -1)
301 			die("seek failed");
302 	}
303 	STOP_TIMER;
304 	close(tmpfile);
305 #else
306 	printf(NA_FORMAT, _("n/a"));
307 #endif
308 
309 /*
310  * Test fsync
311  */
312 	printf(LABEL_FORMAT, "fsync");
313 	fflush(stdout);
314 
315 	if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
316 		die("could not open output file");
317 	START_TIMER;
318 	for (ops = 0; alarm_triggered == false; ops++)
319 	{
320 		for (writes = 0; writes < writes_per_op; writes++)
321 			if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
322 				die("write failed");
323 		if (fsync(tmpfile) != 0)
324 			die("fsync failed");
325 		if (lseek(tmpfile, 0, SEEK_SET) == -1)
326 			die("seek failed");
327 	}
328 	STOP_TIMER;
329 	close(tmpfile);
330 
331 /*
332  * If fsync_writethrough is available, test as well
333  */
334 	printf(LABEL_FORMAT, "fsync_writethrough");
335 	fflush(stdout);
336 
337 #ifdef HAVE_FSYNC_WRITETHROUGH
338 	if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
339 		die("could not open output file");
340 	START_TIMER;
341 	for (ops = 0; alarm_triggered == false; ops++)
342 	{
343 		for (writes = 0; writes < writes_per_op; writes++)
344 			if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
345 				die("write failed");
346 		if (pg_fsync_writethrough(tmpfile) != 0)
347 			die("fsync failed");
348 		if (lseek(tmpfile, 0, SEEK_SET) == -1)
349 			die("seek failed");
350 	}
351 	STOP_TIMER;
352 	close(tmpfile);
353 #else
354 	printf(NA_FORMAT, _("n/a"));
355 #endif
356 
357 /*
358  * Test open_sync if available
359  */
360 	printf(LABEL_FORMAT, "open_sync");
361 	fflush(stdout);
362 
363 #ifdef OPEN_SYNC_FLAG
364 	if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT | PG_BINARY, 0)) == -1)
365 	{
366 		printf(NA_FORMAT, _("n/a*"));
367 		fs_warning = true;
368 	}
369 	else
370 	{
371 		START_TIMER;
372 		for (ops = 0; alarm_triggered == false; ops++)
373 		{
374 			for (writes = 0; writes < writes_per_op; writes++)
375 				if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
376 
377 					/*
378 					 * This can generate write failures if the filesystem has
379 					 * a large block size, e.g. 4k, and there is no support
380 					 * for O_DIRECT writes smaller than the file system block
381 					 * size, e.g. XFS.
382 					 */
383 					die("write failed");
384 			if (lseek(tmpfile, 0, SEEK_SET) == -1)
385 				die("seek failed");
386 		}
387 		STOP_TIMER;
388 		close(tmpfile);
389 	}
390 #else
391 	printf(NA_FORMAT, _("n/a"));
392 #endif
393 
394 	if (fs_warning)
395 	{
396 		printf(_("* This file system and its mount options do not support direct\n"
397 				 "  I/O, e.g. ext4 in journaled mode.\n"));
398 	}
399 }
400 
401 static void
test_open_syncs(void)402 test_open_syncs(void)
403 {
404 	printf(_("\nCompare open_sync with different write sizes:\n"));
405 	printf(_("(This is designed to compare the cost of writing 16kB in different write\n"
406 			 "open_sync sizes.)\n"));
407 
408 	test_open_sync(_(" 1 * 16kB open_sync write"), 16);
409 	test_open_sync(_(" 2 *  8kB open_sync writes"), 8);
410 	test_open_sync(_(" 4 *  4kB open_sync writes"), 4);
411 	test_open_sync(_(" 8 *  2kB open_sync writes"), 2);
412 	test_open_sync(_("16 *  1kB open_sync writes"), 1);
413 }
414 
415 /*
416  * Test open_sync with different size files
417  */
418 static void
test_open_sync(const char * msg,int writes_size)419 test_open_sync(const char *msg, int writes_size)
420 {
421 #ifdef OPEN_SYNC_FLAG
422 	int			tmpfile,
423 				ops,
424 				writes;
425 #endif
426 
427 	printf(LABEL_FORMAT, msg);
428 	fflush(stdout);
429 
430 #ifdef OPEN_SYNC_FLAG
431 	if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT | PG_BINARY, 0)) == -1)
432 		printf(NA_FORMAT, _("n/a*"));
433 	else
434 	{
435 		START_TIMER;
436 		for (ops = 0; alarm_triggered == false; ops++)
437 		{
438 			for (writes = 0; writes < 16 / writes_size; writes++)
439 				if (write(tmpfile, buf, writes_size * 1024) !=
440 					writes_size * 1024)
441 					die("write failed");
442 			if (lseek(tmpfile, 0, SEEK_SET) == -1)
443 				die("seek failed");
444 		}
445 		STOP_TIMER;
446 		close(tmpfile);
447 	}
448 #else
449 	printf(NA_FORMAT, _("n/a"));
450 #endif
451 }
452 
453 static void
test_file_descriptor_sync(void)454 test_file_descriptor_sync(void)
455 {
456 	int			tmpfile,
457 				ops;
458 
459 	/*
460 	 * Test whether fsync can sync data written on a different descriptor for
461 	 * the same file.  This checks the efficiency of multi-process fsyncs
462 	 * against the same file. Possibly this should be done with writethrough
463 	 * on platforms which support it.
464 	 */
465 	printf(_("\nTest if fsync on non-write file descriptor is honored:\n"));
466 	printf(_("(If the times are similar, fsync() can sync data written on a different\n"
467 			 "descriptor.)\n"));
468 
469 	/*
470 	 * first write, fsync and close, which is the normal behavior without
471 	 * multiple descriptors
472 	 */
473 	printf(LABEL_FORMAT, "write, fsync, close");
474 	fflush(stdout);
475 
476 	START_TIMER;
477 	for (ops = 0; alarm_triggered == false; ops++)
478 	{
479 		if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
480 			die("could not open output file");
481 		if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
482 			die("write failed");
483 		if (fsync(tmpfile) != 0)
484 			die("fsync failed");
485 		close(tmpfile);
486 
487 		/*
488 		 * open and close the file again to be consistent with the following
489 		 * test
490 		 */
491 		if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
492 			die("could not open output file");
493 		close(tmpfile);
494 	}
495 	STOP_TIMER;
496 
497 	/*
498 	 * Now open, write, close, open again and fsync This simulates processes
499 	 * fsyncing each other's writes.
500 	 */
501 	printf(LABEL_FORMAT, "write, close, fsync");
502 	fflush(stdout);
503 
504 	START_TIMER;
505 	for (ops = 0; alarm_triggered == false; ops++)
506 	{
507 		if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
508 			die("could not open output file");
509 		if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
510 			die("write failed");
511 		close(tmpfile);
512 		/* reopen file */
513 		if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
514 			die("could not open output file");
515 		if (fsync(tmpfile) != 0)
516 			die("fsync failed");
517 		close(tmpfile);
518 	}
519 	STOP_TIMER;
520 }
521 
522 static void
test_non_sync(void)523 test_non_sync(void)
524 {
525 	int			tmpfile,
526 				ops;
527 
528 	/*
529 	 * Test a simple write without fsync
530 	 */
531 	printf(_("\nNon-sync'ed %dkB writes:\n"), XLOG_BLCKSZ_K);
532 	printf(LABEL_FORMAT, "write");
533 	fflush(stdout);
534 
535 	START_TIMER;
536 	for (ops = 0; alarm_triggered == false; ops++)
537 	{
538 		if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
539 			die("could not open output file");
540 		if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
541 			die("write failed");
542 		close(tmpfile);
543 	}
544 	STOP_TIMER;
545 }
546 
547 static void
signal_cleanup(int signum)548 signal_cleanup(int signum)
549 {
550 	/* Delete the file if it exists. Ignore errors */
551 	if (needs_unlink)
552 		unlink(filename);
553 	/* Finish incomplete line on stdout */
554 	puts("");
555 	exit(signum);
556 }
557 
558 #ifdef HAVE_FSYNC_WRITETHROUGH
559 
560 static int
pg_fsync_writethrough(int fd)561 pg_fsync_writethrough(int fd)
562 {
563 #ifdef WIN32
564 	return _commit(fd);
565 #elif defined(F_FULLFSYNC)
566 	return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;
567 #else
568 	errno = ENOSYS;
569 	return -1;
570 #endif
571 }
572 #endif
573 
574 /*
575  * print out the writes per second for tests
576  */
577 static void
print_elapse(struct timeval start_t,struct timeval stop_t,int ops)578 print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
579 {
580 	double		total_time = (stop_t.tv_sec - start_t.tv_sec) +
581 	(stop_t.tv_usec - start_t.tv_usec) * 0.000001;
582 	double		per_second = ops / total_time;
583 	double		avg_op_time_us = (total_time / ops) * USECS_SEC;
584 
585 	printf(_(OPS_FORMAT), per_second, avg_op_time_us);
586 }
587 
588 #ifndef WIN32
589 static void
process_alarm(int sig)590 process_alarm(int sig)
591 {
592 	alarm_triggered = true;
593 }
594 #else
595 static DWORD WINAPI
process_alarm(LPVOID param)596 process_alarm(LPVOID param)
597 {
598 	/* WIN32 doesn't support alarm, so we create a thread and sleep here */
599 	Sleep(secs_per_test * 1000);
600 	alarm_triggered = true;
601 	ExitThread(0);
602 }
603 #endif
604 
605 static void
die(const char * str)606 die(const char *str)
607 {
608 	fprintf(stderr, _("%s: %s\n"), _(str), strerror(errno));
609 	exit(1);
610 }
611