1 /*
2 * pg_test_fsync.c
3 * tests all supported fsync() methods
4 */
5
6 #include "postgres_fe.h"
7
8 #include <sys/stat.h>
9 #include <sys/time.h>
10 #include <fcntl.h>
11 #include <time.h>
12 #include <unistd.h>
13 #include <signal.h>
14
15 #include "getopt_long.h"
16 #include "access/xlogdefs.h"
17
18
19 /*
20 * put the temp files in the local directory
21 * unless the user specifies otherwise
22 */
23 #define FSYNC_FILENAME "./pg_test_fsync.out"
24
25 #define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024)
26
27 #define LABEL_FORMAT " %-30s"
28 #define NA_FORMAT "%21s\n"
29 /* translator: maintain alignment with NA_FORMAT */
30 #define OPS_FORMAT gettext_noop("%13.3f ops/sec %6.0f usecs/op\n")
31 #define USECS_SEC 1000000
32
33 /* These are macros to avoid timing the function call overhead. */
34 #ifndef WIN32
35 #define START_TIMER \
36 do { \
37 alarm_triggered = false; \
38 alarm(secs_per_test); \
39 gettimeofday(&start_t, NULL); \
40 } while (0)
41 #else
42 /* WIN32 doesn't support alarm, so we create a thread and sleep there */
43 #define START_TIMER \
44 do { \
45 alarm_triggered = false; \
46 if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \
47 INVALID_HANDLE_VALUE) \
48 { \
49 fprintf(stderr, _("Could not create thread for alarm\n")); \
50 exit(1); \
51 } \
52 gettimeofday(&start_t, NULL); \
53 } while (0)
54 #endif
55
56 #define STOP_TIMER \
57 do { \
58 gettimeofday(&stop_t, NULL); \
59 print_elapse(start_t, stop_t, ops); \
60 } while (0)
61
62
63 static const char *progname;
64
65 static int secs_per_test = 5;
66 static int needs_unlink = 0;
67 static char full_buf[DEFAULT_XLOG_SEG_SIZE],
68 *buf,
69 *filename = FSYNC_FILENAME;
70 static struct timeval start_t,
71 stop_t;
72 static bool alarm_triggered = false;
73
74
75 static void handle_args(int argc, char *argv[]);
76 static void prepare_buf(void);
77 static void test_open(void);
78 static void test_non_sync(void);
79 static void test_sync(int writes_per_op);
80 static void test_open_syncs(void);
81 static void test_open_sync(const char *msg, int writes_size);
82 static void test_file_descriptor_sync(void);
83
84 #ifndef WIN32
85 static void process_alarm(int sig);
86 #else
87 static DWORD WINAPI process_alarm(LPVOID param);
88 #endif
89 static void signal_cleanup(int sig);
90
91 #ifdef HAVE_FSYNC_WRITETHROUGH
92 static int pg_fsync_writethrough(int fd);
93 #endif
94 static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops);
95 static void die(const char *str);
96
97
98 int
main(int argc,char * argv[])99 main(int argc, char *argv[])
100 {
101 set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_fsync"));
102 progname = get_progname(argv[0]);
103
104 handle_args(argc, argv);
105
106 /* Prevent leaving behind the test file */
107 pqsignal(SIGINT, signal_cleanup);
108 pqsignal(SIGTERM, signal_cleanup);
109 #ifndef WIN32
110 pqsignal(SIGALRM, process_alarm);
111 #endif
112 #ifdef SIGHUP
113 /* Not defined on win32 */
114 pqsignal(SIGHUP, signal_cleanup);
115 #endif
116
117 prepare_buf();
118
119 test_open();
120
121 /* Test using 1 XLOG_BLCKSZ write */
122 test_sync(1);
123
124 /* Test using 2 XLOG_BLCKSZ writes */
125 test_sync(2);
126
127 test_open_syncs();
128
129 test_file_descriptor_sync();
130
131 test_non_sync();
132
133 unlink(filename);
134
135 return 0;
136 }
137
138 static void
handle_args(int argc,char * argv[])139 handle_args(int argc, char *argv[])
140 {
141 static struct option long_options[] = {
142 {"filename", required_argument, NULL, 'f'},
143 {"secs-per-test", required_argument, NULL, 's'},
144 {NULL, 0, NULL, 0}
145 };
146
147 int option; /* Command line option */
148 int optindex = 0; /* used by getopt_long */
149
150 if (argc > 1)
151 {
152 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
153 {
154 printf(_("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n"), progname);
155 exit(0);
156 }
157 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
158 {
159 puts("pg_test_fsync (PostgreSQL) " PG_VERSION);
160 exit(0);
161 }
162 }
163
164 while ((option = getopt_long(argc, argv, "f:s:",
165 long_options, &optindex)) != -1)
166 {
167 switch (option)
168 {
169 case 'f':
170 filename = strdup(optarg);
171 break;
172
173 case 's':
174 secs_per_test = atoi(optarg);
175 break;
176
177 default:
178 fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
179 progname);
180 exit(1);
181 break;
182 }
183 }
184
185 if (argc > optind)
186 {
187 fprintf(stderr,
188 _("%s: too many command-line arguments (first is \"%s\")\n"),
189 progname, argv[optind]);
190 fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
191 progname);
192 exit(1);
193 }
194
195 printf(ngettext("%d second per test\n",
196 "%d seconds per test\n",
197 secs_per_test),
198 secs_per_test);
199 #if PG_O_DIRECT != 0
200 printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n"));
201 #else
202 printf(_("Direct I/O is not supported on this platform.\n"));
203 #endif
204 }
205
206 static void
prepare_buf(void)207 prepare_buf(void)
208 {
209 int ops;
210
211 /* write random data into buffer */
212 for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++)
213 full_buf[ops] = random();
214
215 buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf);
216 }
217
218 static void
test_open(void)219 test_open(void)
220 {
221 int tmpfile;
222
223 /*
224 * test if we can open the target file
225 */
226 if ((tmpfile = open(filename, O_RDWR | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR)) == -1)
227 die("could not open output file");
228 needs_unlink = 1;
229 if (write(tmpfile, full_buf, DEFAULT_XLOG_SEG_SIZE) !=
230 DEFAULT_XLOG_SEG_SIZE)
231 die("write failed");
232
233 /* fsync now so that dirty buffers don't skew later tests */
234 if (fsync(tmpfile) != 0)
235 die("fsync failed");
236
237 close(tmpfile);
238 }
239
240 static void
test_sync(int writes_per_op)241 test_sync(int writes_per_op)
242 {
243 int tmpfile,
244 ops,
245 writes;
246 bool fs_warning = false;
247
248 if (writes_per_op == 1)
249 printf(_("\nCompare file sync methods using one %dkB write:\n"), XLOG_BLCKSZ_K);
250 else
251 printf(_("\nCompare file sync methods using two %dkB writes:\n"), XLOG_BLCKSZ_K);
252 printf(_("(in wal_sync_method preference order, except fdatasync is Linux's default)\n"));
253
254 /*
255 * Test open_datasync if available
256 */
257 printf(LABEL_FORMAT, "open_datasync");
258 fflush(stdout);
259
260 #ifdef OPEN_DATASYNC_FLAG
261 if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT | PG_BINARY, 0)) == -1)
262 {
263 printf(NA_FORMAT, _("n/a*"));
264 fs_warning = true;
265 }
266 else
267 {
268 START_TIMER;
269 for (ops = 0; alarm_triggered == false; ops++)
270 {
271 for (writes = 0; writes < writes_per_op; writes++)
272 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
273 die("write failed");
274 if (lseek(tmpfile, 0, SEEK_SET) == -1)
275 die("seek failed");
276 }
277 STOP_TIMER;
278 close(tmpfile);
279 }
280 #else
281 printf(NA_FORMAT, _("n/a"));
282 #endif
283
284 /*
285 * Test fdatasync if available
286 */
287 printf(LABEL_FORMAT, "fdatasync");
288 fflush(stdout);
289
290 #ifdef HAVE_FDATASYNC
291 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
292 die("could not open output file");
293 START_TIMER;
294 for (ops = 0; alarm_triggered == false; ops++)
295 {
296 for (writes = 0; writes < writes_per_op; writes++)
297 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
298 die("write failed");
299 fdatasync(tmpfile);
300 if (lseek(tmpfile, 0, SEEK_SET) == -1)
301 die("seek failed");
302 }
303 STOP_TIMER;
304 close(tmpfile);
305 #else
306 printf(NA_FORMAT, _("n/a"));
307 #endif
308
309 /*
310 * Test fsync
311 */
312 printf(LABEL_FORMAT, "fsync");
313 fflush(stdout);
314
315 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
316 die("could not open output file");
317 START_TIMER;
318 for (ops = 0; alarm_triggered == false; ops++)
319 {
320 for (writes = 0; writes < writes_per_op; writes++)
321 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
322 die("write failed");
323 if (fsync(tmpfile) != 0)
324 die("fsync failed");
325 if (lseek(tmpfile, 0, SEEK_SET) == -1)
326 die("seek failed");
327 }
328 STOP_TIMER;
329 close(tmpfile);
330
331 /*
332 * If fsync_writethrough is available, test as well
333 */
334 printf(LABEL_FORMAT, "fsync_writethrough");
335 fflush(stdout);
336
337 #ifdef HAVE_FSYNC_WRITETHROUGH
338 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
339 die("could not open output file");
340 START_TIMER;
341 for (ops = 0; alarm_triggered == false; ops++)
342 {
343 for (writes = 0; writes < writes_per_op; writes++)
344 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
345 die("write failed");
346 if (pg_fsync_writethrough(tmpfile) != 0)
347 die("fsync failed");
348 if (lseek(tmpfile, 0, SEEK_SET) == -1)
349 die("seek failed");
350 }
351 STOP_TIMER;
352 close(tmpfile);
353 #else
354 printf(NA_FORMAT, _("n/a"));
355 #endif
356
357 /*
358 * Test open_sync if available
359 */
360 printf(LABEL_FORMAT, "open_sync");
361 fflush(stdout);
362
363 #ifdef OPEN_SYNC_FLAG
364 if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT | PG_BINARY, 0)) == -1)
365 {
366 printf(NA_FORMAT, _("n/a*"));
367 fs_warning = true;
368 }
369 else
370 {
371 START_TIMER;
372 for (ops = 0; alarm_triggered == false; ops++)
373 {
374 for (writes = 0; writes < writes_per_op; writes++)
375 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
376
377 /*
378 * This can generate write failures if the filesystem has
379 * a large block size, e.g. 4k, and there is no support
380 * for O_DIRECT writes smaller than the file system block
381 * size, e.g. XFS.
382 */
383 die("write failed");
384 if (lseek(tmpfile, 0, SEEK_SET) == -1)
385 die("seek failed");
386 }
387 STOP_TIMER;
388 close(tmpfile);
389 }
390 #else
391 printf(NA_FORMAT, _("n/a"));
392 #endif
393
394 if (fs_warning)
395 {
396 printf(_("* This file system and its mount options do not support direct\n"
397 " I/O, e.g. ext4 in journaled mode.\n"));
398 }
399 }
400
401 static void
test_open_syncs(void)402 test_open_syncs(void)
403 {
404 printf(_("\nCompare open_sync with different write sizes:\n"));
405 printf(_("(This is designed to compare the cost of writing 16kB in different write\n"
406 "open_sync sizes.)\n"));
407
408 test_open_sync(_(" 1 * 16kB open_sync write"), 16);
409 test_open_sync(_(" 2 * 8kB open_sync writes"), 8);
410 test_open_sync(_(" 4 * 4kB open_sync writes"), 4);
411 test_open_sync(_(" 8 * 2kB open_sync writes"), 2);
412 test_open_sync(_("16 * 1kB open_sync writes"), 1);
413 }
414
415 /*
416 * Test open_sync with different size files
417 */
418 static void
test_open_sync(const char * msg,int writes_size)419 test_open_sync(const char *msg, int writes_size)
420 {
421 #ifdef OPEN_SYNC_FLAG
422 int tmpfile,
423 ops,
424 writes;
425 #endif
426
427 printf(LABEL_FORMAT, msg);
428 fflush(stdout);
429
430 #ifdef OPEN_SYNC_FLAG
431 if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT | PG_BINARY, 0)) == -1)
432 printf(NA_FORMAT, _("n/a*"));
433 else
434 {
435 START_TIMER;
436 for (ops = 0; alarm_triggered == false; ops++)
437 {
438 for (writes = 0; writes < 16 / writes_size; writes++)
439 if (write(tmpfile, buf, writes_size * 1024) !=
440 writes_size * 1024)
441 die("write failed");
442 if (lseek(tmpfile, 0, SEEK_SET) == -1)
443 die("seek failed");
444 }
445 STOP_TIMER;
446 close(tmpfile);
447 }
448 #else
449 printf(NA_FORMAT, _("n/a"));
450 #endif
451 }
452
453 static void
test_file_descriptor_sync(void)454 test_file_descriptor_sync(void)
455 {
456 int tmpfile,
457 ops;
458
459 /*
460 * Test whether fsync can sync data written on a different descriptor for
461 * the same file. This checks the efficiency of multi-process fsyncs
462 * against the same file. Possibly this should be done with writethrough
463 * on platforms which support it.
464 */
465 printf(_("\nTest if fsync on non-write file descriptor is honored:\n"));
466 printf(_("(If the times are similar, fsync() can sync data written on a different\n"
467 "descriptor.)\n"));
468
469 /*
470 * first write, fsync and close, which is the normal behavior without
471 * multiple descriptors
472 */
473 printf(LABEL_FORMAT, "write, fsync, close");
474 fflush(stdout);
475
476 START_TIMER;
477 for (ops = 0; alarm_triggered == false; ops++)
478 {
479 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
480 die("could not open output file");
481 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
482 die("write failed");
483 if (fsync(tmpfile) != 0)
484 die("fsync failed");
485 close(tmpfile);
486
487 /*
488 * open and close the file again to be consistent with the following
489 * test
490 */
491 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
492 die("could not open output file");
493 close(tmpfile);
494 }
495 STOP_TIMER;
496
497 /*
498 * Now open, write, close, open again and fsync This simulates processes
499 * fsyncing each other's writes.
500 */
501 printf(LABEL_FORMAT, "write, close, fsync");
502 fflush(stdout);
503
504 START_TIMER;
505 for (ops = 0; alarm_triggered == false; ops++)
506 {
507 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
508 die("could not open output file");
509 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
510 die("write failed");
511 close(tmpfile);
512 /* reopen file */
513 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
514 die("could not open output file");
515 if (fsync(tmpfile) != 0)
516 die("fsync failed");
517 close(tmpfile);
518 }
519 STOP_TIMER;
520 }
521
522 static void
test_non_sync(void)523 test_non_sync(void)
524 {
525 int tmpfile,
526 ops;
527
528 /*
529 * Test a simple write without fsync
530 */
531 printf(_("\nNon-sync'ed %dkB writes:\n"), XLOG_BLCKSZ_K);
532 printf(LABEL_FORMAT, "write");
533 fflush(stdout);
534
535 START_TIMER;
536 for (ops = 0; alarm_triggered == false; ops++)
537 {
538 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
539 die("could not open output file");
540 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
541 die("write failed");
542 close(tmpfile);
543 }
544 STOP_TIMER;
545 }
546
547 static void
signal_cleanup(int signum)548 signal_cleanup(int signum)
549 {
550 /* Delete the file if it exists. Ignore errors */
551 if (needs_unlink)
552 unlink(filename);
553 /* Finish incomplete line on stdout */
554 puts("");
555 exit(signum);
556 }
557
558 #ifdef HAVE_FSYNC_WRITETHROUGH
559
560 static int
pg_fsync_writethrough(int fd)561 pg_fsync_writethrough(int fd)
562 {
563 #ifdef WIN32
564 return _commit(fd);
565 #elif defined(F_FULLFSYNC)
566 return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;
567 #else
568 errno = ENOSYS;
569 return -1;
570 #endif
571 }
572 #endif
573
574 /*
575 * print out the writes per second for tests
576 */
577 static void
print_elapse(struct timeval start_t,struct timeval stop_t,int ops)578 print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
579 {
580 double total_time = (stop_t.tv_sec - start_t.tv_sec) +
581 (stop_t.tv_usec - start_t.tv_usec) * 0.000001;
582 double per_second = ops / total_time;
583 double avg_op_time_us = (total_time / ops) * USECS_SEC;
584
585 printf(_(OPS_FORMAT), per_second, avg_op_time_us);
586 }
587
588 #ifndef WIN32
589 static void
process_alarm(int sig)590 process_alarm(int sig)
591 {
592 alarm_triggered = true;
593 }
594 #else
595 static DWORD WINAPI
process_alarm(LPVOID param)596 process_alarm(LPVOID param)
597 {
598 /* WIN32 doesn't support alarm, so we create a thread and sleep here */
599 Sleep(secs_per_test * 1000);
600 alarm_triggered = true;
601 ExitThread(0);
602 }
603 #endif
604
605 static void
die(const char * str)606 die(const char *str)
607 {
608 fprintf(stderr, _("%s: %s\n"), _(str), strerror(errno));
609 exit(1);
610 }
611