1 /*
2  *	pg_test_fsync.c
3  *		tests all supported fsync() methods
4  */
5 
6 #include "postgres_fe.h"
7 
8 #include <sys/stat.h>
9 #include <sys/time.h>
10 #include <fcntl.h>
11 #include <time.h>
12 #include <unistd.h>
13 #include <signal.h>
14 
15 #include "getopt_long.h"
16 #include "access/xlogdefs.h"
17 #include "common/logging.h"
18 
19 
20 /*
21  * put the temp files in the local directory
22  * unless the user specifies otherwise
23  */
24 #define FSYNC_FILENAME	"./pg_test_fsync.out"
25 
26 #define XLOG_BLCKSZ_K	(XLOG_BLCKSZ / 1024)
MipsTestBase()27 
28 #define LABEL_FORMAT		"        %-30s"
29 #define NA_FORMAT			"%21s\n"
30 /* translator: maintain alignment with NA_FORMAT */
31 #define OPS_FORMAT			gettext_noop("%13.3f ops/sec  %6.0f usecs/op\n")
32 #define USECS_SEC			1000000
33 
34 /* These are macros to avoid timing the function call overhead. */
35 #ifndef WIN32
36 #define START_TIMER \
37 do { \
38 	alarm_triggered = false; \
39 	alarm(secs_per_test); \
40 	gettimeofday(&start_t, NULL); \
41 } while (0)
42 #else
43 /* WIN32 doesn't support alarm, so we create a thread and sleep there */
44 #define START_TIMER \
45 do { \
46 	alarm_triggered = false; \
47 	if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \
48 		INVALID_HANDLE_VALUE) \
49 	{ \
50 		pg_log_error("could not create thread for alarm"); \
51 		exit(1); \
52 	} \
53 	gettimeofday(&start_t, NULL); \
54 } while (0)
55 #endif
56 
57 #define STOP_TIMER	\
58 do { \
59 	gettimeofday(&stop_t, NULL); \
60 	print_elapse(start_t, stop_t, ops); \
61 } while (0)
62 
63 
64 static const char *progname;
65 
66 static int	secs_per_test = 5;
67 static int	needs_unlink = 0;
68 static char full_buf[DEFAULT_XLOG_SEG_SIZE],
69 		   *buf,
70 		   *filename = FSYNC_FILENAME;
71 static struct timeval start_t,
72 			stop_t;
73 static bool alarm_triggered = false;
74 
75 
76 static void handle_args(int argc, char *argv[]);
77 static void prepare_buf(void);
78 static void test_open(void);
79 static void test_non_sync(void);
80 static void test_sync(int writes_per_op);
81 static void test_open_syncs(void);
82 static void test_open_sync(const char *msg, int writes_size);
83 static void test_file_descriptor_sync(void);
84 
85 #ifndef WIN32
86 static void process_alarm(int sig);
87 #else
88 static DWORD WINAPI process_alarm(LPVOID param);
89 #endif
90 static void signal_cleanup(int sig);
91 
92 #ifdef HAVE_FSYNC_WRITETHROUGH
93 static int	pg_fsync_writethrough(int fd);
94 #endif
95 static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops);
96 
97 #define die(msg) do { pg_log_error("%s: %m", _(msg)); exit(1); } while(0)
98 
99 
100 int
101 main(int argc, char *argv[])
102 {
103 	pg_logging_init(argv[0]);
104 	set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_fsync"));
105 	progname = get_progname(argv[0]);
106 
107 	handle_args(argc, argv);
108 
109 	/* Prevent leaving behind the test file */
110 	pqsignal(SIGINT, signal_cleanup);
111 	pqsignal(SIGTERM, signal_cleanup);
112 #ifndef WIN32
113 	pqsignal(SIGALRM, process_alarm);
114 #endif
115 #ifdef SIGHUP
116 	/* Not defined on win32 */
117 	pqsignal(SIGHUP, signal_cleanup);
118 #endif
119 
120 	prepare_buf();
121 
122 	test_open();
123 
124 	/* Test using 1 XLOG_BLCKSZ write */
125 	test_sync(1);
126 
127 	/* Test using 2 XLOG_BLCKSZ writes */
128 	test_sync(2);
129 
130 	test_open_syncs();
131 
132 	test_file_descriptor_sync();
133 
134 	test_non_sync();
135 
136 	unlink(filename);
137 
138 	return 0;
139 }
140 
141 static void
142 handle_args(int argc, char *argv[])
143 {
144 	static struct option long_options[] = {
145 		{"filename", required_argument, NULL, 'f'},
146 		{"secs-per-test", required_argument, NULL, 's'},
147 		{NULL, 0, NULL, 0}
148 	};
149 
150 	int			option;			/* Command line option */
151 	int			optindex = 0;	/* used by getopt_long */
152 
153 	if (argc > 1)
154 	{
155 		if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
156 		{
157 			printf(_("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n"), progname);
158 			exit(0);
159 		}
160 		if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
161 		{
162 			puts("pg_test_fsync (PostgreSQL) " PG_VERSION);
163 			exit(0);
164 		}
165 	}
166 
167 	while ((option = getopt_long(argc, argv, "f:s:",
168 								 long_options, &optindex)) != -1)
169 	{
170 		switch (option)
171 		{
172 			case 'f':
173 				filename = pg_strdup(optarg);
174 				break;
175 
176 			case 's':
177 				secs_per_test = atoi(optarg);
178 				break;
179 
180 			default:
181 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
182 						progname);
183 				exit(1);
184 				break;
185 		}
186 	}
187 
188 	if (argc > optind)
189 	{
190 		pg_log_error("too many command-line arguments (first is \"%s\")",
191 					 argv[optind]);
192 		fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
193 				progname);
194 		exit(1);
195 	}
196 
197 	printf(ngettext("%d second per test\n",
198 					"%d seconds per test\n",
199 					secs_per_test),
200 		   secs_per_test);
201 #if PG_O_DIRECT != 0
202 	printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n"));
203 #else
204 	printf(_("Direct I/O is not supported on this platform.\n"));
205 #endif
206 }
207 
208 static void
209 prepare_buf(void)
210 {
211 	int			ops;
212 
213 	/* write random data into buffer */
214 	for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++)
215 		full_buf[ops] = random();
216 
217 	buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf);
218 }
219 
220 static void
221 test_open(void)
222 {
223 	int			tmpfile;
224 
225 	/*
226 	 * test if we can open the target file
227 	 */
228 	if ((tmpfile = open(filename, O_RDWR | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR)) == -1)
229 		die("could not open output file");
230 	needs_unlink = 1;
231 	if (write(tmpfile, full_buf, DEFAULT_XLOG_SEG_SIZE) !=
232 		DEFAULT_XLOG_SEG_SIZE)
233 		die("write failed");
234 
235 	/* fsync now so that dirty buffers don't skew later tests */
236 	if (fsync(tmpfile) != 0)
237 		die("fsync failed");
238 
239 	close(tmpfile);
240 }
241 
242 static void
243 test_sync(int writes_per_op)
244 {
245 	int			tmpfile,
246 				ops,
247 				writes;
248 	bool		fs_warning = false;
249 
250 	if (writes_per_op == 1)
251 		printf(_("\nCompare file sync methods using one %dkB write:\n"), XLOG_BLCKSZ_K);
252 	else
253 		printf(_("\nCompare file sync methods using two %dkB writes:\n"), XLOG_BLCKSZ_K);
254 	printf(_("(in wal_sync_method preference order, except fdatasync is Linux's default)\n"));
255 
256 	/*
257 	 * Test open_datasync if available
258 	 */
259 	printf(LABEL_FORMAT, "open_datasync");
260 	fflush(stdout);
261 
262 #ifdef OPEN_DATASYNC_FLAG
263 	if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT | PG_BINARY, 0)) == -1)
264 	{
265 		printf(NA_FORMAT, _("n/a*"));
266 		fs_warning = true;
267 	}
268 	else
269 	{
270 		START_TIMER;
271 		for (ops = 0; alarm_triggered == false; ops++)
272 		{
273 			for (writes = 0; writes < writes_per_op; writes++)
274 				if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
275 					die("write failed");
276 			if (lseek(tmpfile, 0, SEEK_SET) == -1)
277 				die("seek failed");
278 		}
279 		STOP_TIMER;
280 		close(tmpfile);
281 	}
282 #else
283 	printf(NA_FORMAT, _("n/a"));
284 #endif
285 
286 /*
287  * Test fdatasync if available
288  */
289 	printf(LABEL_FORMAT, "fdatasync");
290 	fflush(stdout);
291 
292 #ifdef HAVE_FDATASYNC
293 	if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
294 		die("could not open output file");
295 	START_TIMER;
296 	for (ops = 0; alarm_triggered == false; ops++)
297 	{
298 		for (writes = 0; writes < writes_per_op; writes++)
299 			if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
300 				die("write failed");
301 		fdatasync(tmpfile);
302 		if (lseek(tmpfile, 0, SEEK_SET) == -1)
303 			die("seek failed");
304 	}
305 	STOP_TIMER;
306 	close(tmpfile);
307 #else
308 	printf(NA_FORMAT, _("n/a"));
309 #endif
310 
311 /*
312  * Test fsync
313  */
314 	printf(LABEL_FORMAT, "fsync");
315 	fflush(stdout);
316 
317 	if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
318 		die("could not open output file");
319 	START_TIMER;
320 	for (ops = 0; alarm_triggered == false; ops++)
321 	{
322 		for (writes = 0; writes < writes_per_op; writes++)
323 			if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
324 				die("write failed");
325 		if (fsync(tmpfile) != 0)
326 			die("fsync failed");
327 		if (lseek(tmpfile, 0, SEEK_SET) == -1)
328 			die("seek failed");
329 	}
330 	STOP_TIMER;
331 	close(tmpfile);
332 
333 /*
334  * If fsync_writethrough is available, test as well
335  */
336 	printf(LABEL_FORMAT, "fsync_writethrough");
337 	fflush(stdout);
338 
339 #ifdef HAVE_FSYNC_WRITETHROUGH
340 	if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
341 		die("could not open output file");
342 	START_TIMER;
343 	for (ops = 0; alarm_triggered == false; ops++)
344 	{
345 		for (writes = 0; writes < writes_per_op; writes++)
346 			if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
347 				die("write failed");
348 		if (pg_fsync_writethrough(tmpfile) != 0)
349 			die("fsync failed");
350 		if (lseek(tmpfile, 0, SEEK_SET) == -1)
351 			die("seek failed");
352 	}
353 	STOP_TIMER;
354 	close(tmpfile);
355 #else
356 	printf(NA_FORMAT, _("n/a"));
357 #endif
358 
359 /*
360  * Test open_sync if available
361  */
362 	printf(LABEL_FORMAT, "open_sync");
363 	fflush(stdout);
364 
365 #ifdef OPEN_SYNC_FLAG
366 	if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT | PG_BINARY, 0)) == -1)
367 	{
368 		printf(NA_FORMAT, _("n/a*"));
369 		fs_warning = true;
370 	}
371 	else
372 	{
373 		START_TIMER;
374 		for (ops = 0; alarm_triggered == false; ops++)
375 		{
376 			for (writes = 0; writes < writes_per_op; writes++)
377 				if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
378 
379 					/*
380 					 * This can generate write failures if the filesystem has
381 					 * a large block size, e.g. 4k, and there is no support
382 					 * for O_DIRECT writes smaller than the file system block
383 					 * size, e.g. XFS.
384 					 */
385 					die("write failed");
386 			if (lseek(tmpfile, 0, SEEK_SET) == -1)
387 				die("seek failed");
388 		}
389 		STOP_TIMER;
390 		close(tmpfile);
391 	}
392 #else
393 	printf(NA_FORMAT, _("n/a"));
394 #endif
395 
396 	if (fs_warning)
397 	{
398 		printf(_("* This file system and its mount options do not support direct\n"
399 				 "  I/O, e.g. ext4 in journaled mode.\n"));
400 	}
401 }
402 
403 static void
404 test_open_syncs(void)
405 {
406 	printf(_("\nCompare open_sync with different write sizes:\n"));
407 	printf(_("(This is designed to compare the cost of writing 16kB in different write\n"
408 			 "open_sync sizes.)\n"));
409 
410 	test_open_sync(_(" 1 * 16kB open_sync write"), 16);
411 	test_open_sync(_(" 2 *  8kB open_sync writes"), 8);
412 	test_open_sync(_(" 4 *  4kB open_sync writes"), 4);
413 	test_open_sync(_(" 8 *  2kB open_sync writes"), 2);
414 	test_open_sync(_("16 *  1kB open_sync writes"), 1);
415 }
416 
417 /*
418  * Test open_sync with different size files
419  */
420 static void
421 test_open_sync(const char *msg, int writes_size)
422 {
423 #ifdef OPEN_SYNC_FLAG
424 	int			tmpfile,
425 				ops,
426 				writes;
427 #endif
428 
429 	printf(LABEL_FORMAT, msg);
430 	fflush(stdout);
431 
432 #ifdef OPEN_SYNC_FLAG
433 	if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT | PG_BINARY, 0)) == -1)
434 		printf(NA_FORMAT, _("n/a*"));
435 	else
436 	{
437 		START_TIMER;
438 		for (ops = 0; alarm_triggered == false; ops++)
439 		{
440 			for (writes = 0; writes < 16 / writes_size; writes++)
441 				if (write(tmpfile, buf, writes_size * 1024) !=
442 					writes_size * 1024)
443 					die("write failed");
444 			if (lseek(tmpfile, 0, SEEK_SET) == -1)
445 				die("seek failed");
446 		}
447 		STOP_TIMER;
448 		close(tmpfile);
449 	}
450 #else
451 	printf(NA_FORMAT, _("n/a"));
452 #endif
453 }
454 
455 static void
456 test_file_descriptor_sync(void)
457 {
458 	int			tmpfile,
459 				ops;
460 
461 	/*
462 	 * Test whether fsync can sync data written on a different descriptor for
463 	 * the same file.  This checks the efficiency of multi-process fsyncs
464 	 * against the same file. Possibly this should be done with writethrough
465 	 * on platforms which support it.
466 	 */
467 	printf(_("\nTest if fsync on non-write file descriptor is honored:\n"));
468 	printf(_("(If the times are similar, fsync() can sync data written on a different\n"
469 			 "descriptor.)\n"));
470 
471 	/*
472 	 * first write, fsync and close, which is the normal behavior without
473 	 * multiple descriptors
474 	 */
475 	printf(LABEL_FORMAT, "write, fsync, close");
476 	fflush(stdout);
477 
478 	START_TIMER;
479 	for (ops = 0; alarm_triggered == false; ops++)
480 	{
481 		if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
482 			die("could not open output file");
483 		if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
484 			die("write failed");
485 		if (fsync(tmpfile) != 0)
486 			die("fsync failed");
487 		close(tmpfile);
488 
489 		/*
490 		 * open and close the file again to be consistent with the following
491 		 * test
492 		 */
493 		if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
494 			die("could not open output file");
495 		close(tmpfile);
496 	}
497 	STOP_TIMER;
498 
499 	/*
500 	 * Now open, write, close, open again and fsync This simulates processes
501 	 * fsyncing each other's writes.
502 	 */
503 	printf(LABEL_FORMAT, "write, close, fsync");
504 	fflush(stdout);
505 
506 	START_TIMER;
507 	for (ops = 0; alarm_triggered == false; ops++)
508 	{
509 		if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
510 			die("could not open output file");
511 		if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
512 			die("write failed");
513 		close(tmpfile);
514 		/* reopen file */
515 		if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
516 			die("could not open output file");
517 		if (fsync(tmpfile) != 0)
518 			die("fsync failed");
519 		close(tmpfile);
520 	}
521 	STOP_TIMER;
522 }
523 
524 static void
525 test_non_sync(void)
526 {
527 	int			tmpfile,
528 				ops;
529 
530 	/*
531 	 * Test a simple write without fsync
532 	 */
533 	printf(_("\nNon-sync'ed %dkB writes:\n"), XLOG_BLCKSZ_K);
534 	printf(LABEL_FORMAT, "write");
535 	fflush(stdout);
536 
537 	START_TIMER;
538 	for (ops = 0; alarm_triggered == false; ops++)
539 	{
540 		if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
541 			die("could not open output file");
542 		if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
543 			die("write failed");
544 		close(tmpfile);
545 	}
546 	STOP_TIMER;
547 }
548 
549 static void
550 signal_cleanup(int signum)
551 {
552 	/* Delete the file if it exists. Ignore errors */
553 	if (needs_unlink)
554 		unlink(filename);
555 	/* Finish incomplete line on stdout */
556 	puts("");
557 	exit(signum);
558 }
559 
560 #ifdef HAVE_FSYNC_WRITETHROUGH
561 
562 static int
563 pg_fsync_writethrough(int fd)
564 {
565 #ifdef WIN32
566 	return _commit(fd);
567 #elif defined(F_FULLFSYNC)
568 	return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;
569 #else
570 	errno = ENOSYS;
571 	return -1;
572 #endif
573 }
574 #endif
575 
576 /*
577  * print out the writes per second for tests
578  */
579 static void
580 print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
581 {
582 	double		total_time = (stop_t.tv_sec - start_t.tv_sec) +
583 	(stop_t.tv_usec - start_t.tv_usec) * 0.000001;
584 	double		per_second = ops / total_time;
585 	double		avg_op_time_us = (total_time / ops) * USECS_SEC;
586 
587 	printf(_(OPS_FORMAT), per_second, avg_op_time_us);
588 }
589 
590 #ifndef WIN32
591 static void
592 process_alarm(int sig)
593 {
594 	alarm_triggered = true;
595 }
596 #else
597 static DWORD WINAPI
598 process_alarm(LPVOID param)
599 {
600 	/* WIN32 doesn't support alarm, so we create a thread and sleep here */
601 	Sleep(secs_per_test * 1000);
602 	alarm_triggered = true;
603 	ExitThread(0);
604 }
605 #endif
606