1 /*
2 * pg_test_fsync.c
3 * tests all supported fsync() methods
4 */
5
6 #include "postgres_fe.h"
7
8 #include <sys/stat.h>
9 #include <sys/time.h>
10 #include <fcntl.h>
11 #include <time.h>
12 #include <unistd.h>
13 #include <signal.h>
14
15 #include "getopt_long.h"
16 #include "access/xlogdefs.h"
17 #include "common/logging.h"
18
19
20 /*
21 * put the temp files in the local directory
22 * unless the user specifies otherwise
23 */
24 #define FSYNC_FILENAME "./pg_test_fsync.out"
25
26 #define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024)
27
28 #define LABEL_FORMAT " %-30s"
29 #define NA_FORMAT "%21s\n"
30 /* translator: maintain alignment with NA_FORMAT */
31 #define OPS_FORMAT gettext_noop("%13.3f ops/sec %6.0f usecs/op\n")
32 #define USECS_SEC 1000000
33
34 /* These are macros to avoid timing the function call overhead. */
35 #ifndef WIN32
36 #define START_TIMER \
37 do { \
38 alarm_triggered = false; \
39 alarm(secs_per_test); \
40 gettimeofday(&start_t, NULL); \
41 } while (0)
42 #else
43 /* WIN32 doesn't support alarm, so we create a thread and sleep there */
44 #define START_TIMER \
45 do { \
46 alarm_triggered = false; \
47 if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \
48 INVALID_HANDLE_VALUE) \
49 { \
50 pg_log_error("could not create thread for alarm"); \
51 exit(1); \
52 } \
53 gettimeofday(&start_t, NULL); \
54 } while (0)
55 #endif
56
57 #define STOP_TIMER \
58 do { \
59 gettimeofday(&stop_t, NULL); \
60 print_elapse(start_t, stop_t, ops); \
61 } while (0)
62
63
64 static const char *progname;
65
66 static int secs_per_test = 5;
67 static int needs_unlink = 0;
68 static char full_buf[DEFAULT_XLOG_SEG_SIZE],
69 *buf,
70 *filename = FSYNC_FILENAME;
71 static struct timeval start_t,
72 stop_t;
73 static bool alarm_triggered = false;
74
75
76 static void handle_args(int argc, char *argv[]);
77 static void prepare_buf(void);
78 static void test_open(void);
79 static void test_non_sync(void);
80 static void test_sync(int writes_per_op);
81 static void test_open_syncs(void);
82 static void test_open_sync(const char *msg, int writes_size);
83 static void test_file_descriptor_sync(void);
84
85 #ifndef WIN32
86 static void process_alarm(int sig);
87 #else
88 static DWORD WINAPI process_alarm(LPVOID param);
89 #endif
90 static void signal_cleanup(int sig);
91
92 #ifdef HAVE_FSYNC_WRITETHROUGH
93 static int pg_fsync_writethrough(int fd);
94 #endif
95 static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops);
96
97 #define die(msg) do { pg_log_error("%s: %m", _(msg)); exit(1); } while(0)
98
99
100 int
main(int argc,char * argv[])101 main(int argc, char *argv[])
102 {
103 pg_logging_init(argv[0]);
104 set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_fsync"));
105 progname = get_progname(argv[0]);
106
107 handle_args(argc, argv);
108
109 /* Prevent leaving behind the test file */
110 pqsignal(SIGINT, signal_cleanup);
111 pqsignal(SIGTERM, signal_cleanup);
112 #ifndef WIN32
113 pqsignal(SIGALRM, process_alarm);
114 #endif
115 #ifdef SIGHUP
116 /* Not defined on win32 */
117 pqsignal(SIGHUP, signal_cleanup);
118 #endif
119
120 prepare_buf();
121
122 test_open();
123
124 /* Test using 1 XLOG_BLCKSZ write */
125 test_sync(1);
126
127 /* Test using 2 XLOG_BLCKSZ writes */
128 test_sync(2);
129
130 test_open_syncs();
131
132 test_file_descriptor_sync();
133
134 test_non_sync();
135
136 unlink(filename);
137
138 return 0;
139 }
140
141 static void
handle_args(int argc,char * argv[])142 handle_args(int argc, char *argv[])
143 {
144 static struct option long_options[] = {
145 {"filename", required_argument, NULL, 'f'},
146 {"secs-per-test", required_argument, NULL, 's'},
147 {NULL, 0, NULL, 0}
148 };
149
150 int option; /* Command line option */
151 int optindex = 0; /* used by getopt_long */
152
153 if (argc > 1)
154 {
155 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
156 {
157 printf(_("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n"), progname);
158 exit(0);
159 }
160 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
161 {
162 puts("pg_test_fsync (PostgreSQL) " PG_VERSION);
163 exit(0);
164 }
165 }
166
167 while ((option = getopt_long(argc, argv, "f:s:",
168 long_options, &optindex)) != -1)
169 {
170 switch (option)
171 {
172 case 'f':
173 filename = pg_strdup(optarg);
174 break;
175
176 case 's':
177 secs_per_test = atoi(optarg);
178 break;
179
180 default:
181 fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
182 progname);
183 exit(1);
184 break;
185 }
186 }
187
188 if (argc > optind)
189 {
190 pg_log_error("too many command-line arguments (first is \"%s\")",
191 argv[optind]);
192 fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
193 progname);
194 exit(1);
195 }
196
197 printf(ngettext("%d second per test\n",
198 "%d seconds per test\n",
199 secs_per_test),
200 secs_per_test);
201 #if PG_O_DIRECT != 0
202 printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n"));
203 #else
204 printf(_("Direct I/O is not supported on this platform.\n"));
205 #endif
206 }
207
208 static void
prepare_buf(void)209 prepare_buf(void)
210 {
211 int ops;
212
213 /* write random data into buffer */
214 for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++)
215 full_buf[ops] = random();
216
217 buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf);
218 }
219
220 static void
test_open(void)221 test_open(void)
222 {
223 int tmpfile;
224
225 /*
226 * test if we can open the target file
227 */
228 if ((tmpfile = open(filename, O_RDWR | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR)) == -1)
229 die("could not open output file");
230 needs_unlink = 1;
231 if (write(tmpfile, full_buf, DEFAULT_XLOG_SEG_SIZE) !=
232 DEFAULT_XLOG_SEG_SIZE)
233 die("write failed");
234
235 /* fsync now so that dirty buffers don't skew later tests */
236 if (fsync(tmpfile) != 0)
237 die("fsync failed");
238
239 close(tmpfile);
240 }
241
242 static void
test_sync(int writes_per_op)243 test_sync(int writes_per_op)
244 {
245 int tmpfile,
246 ops,
247 writes;
248 bool fs_warning = false;
249
250 if (writes_per_op == 1)
251 printf(_("\nCompare file sync methods using one %dkB write:\n"), XLOG_BLCKSZ_K);
252 else
253 printf(_("\nCompare file sync methods using two %dkB writes:\n"), XLOG_BLCKSZ_K);
254 printf(_("(in wal_sync_method preference order, except fdatasync is Linux's default)\n"));
255
256 /*
257 * Test open_datasync if available
258 */
259 printf(LABEL_FORMAT, "open_datasync");
260 fflush(stdout);
261
262 #ifdef OPEN_DATASYNC_FLAG
263 if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT | PG_BINARY, 0)) == -1)
264 {
265 printf(NA_FORMAT, _("n/a*"));
266 fs_warning = true;
267 }
268 else
269 {
270 START_TIMER;
271 for (ops = 0; alarm_triggered == false; ops++)
272 {
273 for (writes = 0; writes < writes_per_op; writes++)
274 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
275 die("write failed");
276 if (lseek(tmpfile, 0, SEEK_SET) == -1)
277 die("seek failed");
278 }
279 STOP_TIMER;
280 close(tmpfile);
281 }
282 #else
283 printf(NA_FORMAT, _("n/a"));
284 #endif
285
286 /*
287 * Test fdatasync if available
288 */
289 printf(LABEL_FORMAT, "fdatasync");
290 fflush(stdout);
291
292 #ifdef HAVE_FDATASYNC
293 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
294 die("could not open output file");
295 START_TIMER;
296 for (ops = 0; alarm_triggered == false; ops++)
297 {
298 for (writes = 0; writes < writes_per_op; writes++)
299 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
300 die("write failed");
301 fdatasync(tmpfile);
302 if (lseek(tmpfile, 0, SEEK_SET) == -1)
303 die("seek failed");
304 }
305 STOP_TIMER;
306 close(tmpfile);
307 #else
308 printf(NA_FORMAT, _("n/a"));
309 #endif
310
311 /*
312 * Test fsync
313 */
314 printf(LABEL_FORMAT, "fsync");
315 fflush(stdout);
316
317 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
318 die("could not open output file");
319 START_TIMER;
320 for (ops = 0; alarm_triggered == false; ops++)
321 {
322 for (writes = 0; writes < writes_per_op; writes++)
323 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
324 die("write failed");
325 if (fsync(tmpfile) != 0)
326 die("fsync failed");
327 if (lseek(tmpfile, 0, SEEK_SET) == -1)
328 die("seek failed");
329 }
330 STOP_TIMER;
331 close(tmpfile);
332
333 /*
334 * If fsync_writethrough is available, test as well
335 */
336 printf(LABEL_FORMAT, "fsync_writethrough");
337 fflush(stdout);
338
339 #ifdef HAVE_FSYNC_WRITETHROUGH
340 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
341 die("could not open output file");
342 START_TIMER;
343 for (ops = 0; alarm_triggered == false; ops++)
344 {
345 for (writes = 0; writes < writes_per_op; writes++)
346 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
347 die("write failed");
348 if (pg_fsync_writethrough(tmpfile) != 0)
349 die("fsync failed");
350 if (lseek(tmpfile, 0, SEEK_SET) == -1)
351 die("seek failed");
352 }
353 STOP_TIMER;
354 close(tmpfile);
355 #else
356 printf(NA_FORMAT, _("n/a"));
357 #endif
358
359 /*
360 * Test open_sync if available
361 */
362 printf(LABEL_FORMAT, "open_sync");
363 fflush(stdout);
364
365 #ifdef OPEN_SYNC_FLAG
366 if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT | PG_BINARY, 0)) == -1)
367 {
368 printf(NA_FORMAT, _("n/a*"));
369 fs_warning = true;
370 }
371 else
372 {
373 START_TIMER;
374 for (ops = 0; alarm_triggered == false; ops++)
375 {
376 for (writes = 0; writes < writes_per_op; writes++)
377 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
378
379 /*
380 * This can generate write failures if the filesystem has
381 * a large block size, e.g. 4k, and there is no support
382 * for O_DIRECT writes smaller than the file system block
383 * size, e.g. XFS.
384 */
385 die("write failed");
386 if (lseek(tmpfile, 0, SEEK_SET) == -1)
387 die("seek failed");
388 }
389 STOP_TIMER;
390 close(tmpfile);
391 }
392 #else
393 printf(NA_FORMAT, _("n/a"));
394 #endif
395
396 if (fs_warning)
397 {
398 printf(_("* This file system and its mount options do not support direct\n"
399 " I/O, e.g. ext4 in journaled mode.\n"));
400 }
401 }
402
403 static void
test_open_syncs(void)404 test_open_syncs(void)
405 {
406 printf(_("\nCompare open_sync with different write sizes:\n"));
407 printf(_("(This is designed to compare the cost of writing 16kB in different write\n"
408 "open_sync sizes.)\n"));
409
410 test_open_sync(_(" 1 * 16kB open_sync write"), 16);
411 test_open_sync(_(" 2 * 8kB open_sync writes"), 8);
412 test_open_sync(_(" 4 * 4kB open_sync writes"), 4);
413 test_open_sync(_(" 8 * 2kB open_sync writes"), 2);
414 test_open_sync(_("16 * 1kB open_sync writes"), 1);
415 }
416
417 /*
418 * Test open_sync with different size files
419 */
420 static void
test_open_sync(const char * msg,int writes_size)421 test_open_sync(const char *msg, int writes_size)
422 {
423 #ifdef OPEN_SYNC_FLAG
424 int tmpfile,
425 ops,
426 writes;
427 #endif
428
429 printf(LABEL_FORMAT, msg);
430 fflush(stdout);
431
432 #ifdef OPEN_SYNC_FLAG
433 if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT | PG_BINARY, 0)) == -1)
434 printf(NA_FORMAT, _("n/a*"));
435 else
436 {
437 START_TIMER;
438 for (ops = 0; alarm_triggered == false; ops++)
439 {
440 for (writes = 0; writes < 16 / writes_size; writes++)
441 if (write(tmpfile, buf, writes_size * 1024) !=
442 writes_size * 1024)
443 die("write failed");
444 if (lseek(tmpfile, 0, SEEK_SET) == -1)
445 die("seek failed");
446 }
447 STOP_TIMER;
448 close(tmpfile);
449 }
450 #else
451 printf(NA_FORMAT, _("n/a"));
452 #endif
453 }
454
455 static void
test_file_descriptor_sync(void)456 test_file_descriptor_sync(void)
457 {
458 int tmpfile,
459 ops;
460
461 /*
462 * Test whether fsync can sync data written on a different descriptor for
463 * the same file. This checks the efficiency of multi-process fsyncs
464 * against the same file. Possibly this should be done with writethrough
465 * on platforms which support it.
466 */
467 printf(_("\nTest if fsync on non-write file descriptor is honored:\n"));
468 printf(_("(If the times are similar, fsync() can sync data written on a different\n"
469 "descriptor.)\n"));
470
471 /*
472 * first write, fsync and close, which is the normal behavior without
473 * multiple descriptors
474 */
475 printf(LABEL_FORMAT, "write, fsync, close");
476 fflush(stdout);
477
478 START_TIMER;
479 for (ops = 0; alarm_triggered == false; ops++)
480 {
481 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
482 die("could not open output file");
483 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
484 die("write failed");
485 if (fsync(tmpfile) != 0)
486 die("fsync failed");
487 close(tmpfile);
488
489 /*
490 * open and close the file again to be consistent with the following
491 * test
492 */
493 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
494 die("could not open output file");
495 close(tmpfile);
496 }
497 STOP_TIMER;
498
499 /*
500 * Now open, write, close, open again and fsync This simulates processes
501 * fsyncing each other's writes.
502 */
503 printf(LABEL_FORMAT, "write, close, fsync");
504 fflush(stdout);
505
506 START_TIMER;
507 for (ops = 0; alarm_triggered == false; ops++)
508 {
509 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
510 die("could not open output file");
511 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
512 die("write failed");
513 close(tmpfile);
514 /* reopen file */
515 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
516 die("could not open output file");
517 if (fsync(tmpfile) != 0)
518 die("fsync failed");
519 close(tmpfile);
520 }
521 STOP_TIMER;
522 }
523
524 static void
test_non_sync(void)525 test_non_sync(void)
526 {
527 int tmpfile,
528 ops;
529
530 /*
531 * Test a simple write without fsync
532 */
533 printf(_("\nNon-sync'ed %dkB writes:\n"), XLOG_BLCKSZ_K);
534 printf(LABEL_FORMAT, "write");
535 fflush(stdout);
536
537 START_TIMER;
538 for (ops = 0; alarm_triggered == false; ops++)
539 {
540 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1)
541 die("could not open output file");
542 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
543 die("write failed");
544 close(tmpfile);
545 }
546 STOP_TIMER;
547 }
548
549 static void
signal_cleanup(int signum)550 signal_cleanup(int signum)
551 {
552 /* Delete the file if it exists. Ignore errors */
553 if (needs_unlink)
554 unlink(filename);
555 /* Finish incomplete line on stdout */
556 puts("");
557 exit(signum);
558 }
559
560 #ifdef HAVE_FSYNC_WRITETHROUGH
561
562 static int
pg_fsync_writethrough(int fd)563 pg_fsync_writethrough(int fd)
564 {
565 #ifdef WIN32
566 return _commit(fd);
567 #elif defined(F_FULLFSYNC)
568 return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0;
569 #else
570 errno = ENOSYS;
571 return -1;
572 #endif
573 }
574 #endif
575
576 /*
577 * print out the writes per second for tests
578 */
579 static void
print_elapse(struct timeval start_t,struct timeval stop_t,int ops)580 print_elapse(struct timeval start_t, struct timeval stop_t, int ops)
581 {
582 double total_time = (stop_t.tv_sec - start_t.tv_sec) +
583 (stop_t.tv_usec - start_t.tv_usec) * 0.000001;
584 double per_second = ops / total_time;
585 double avg_op_time_us = (total_time / ops) * USECS_SEC;
586
587 printf(_(OPS_FORMAT), per_second, avg_op_time_us);
588 }
589
590 #ifndef WIN32
591 static void
process_alarm(int sig)592 process_alarm(int sig)
593 {
594 alarm_triggered = true;
595 }
596 #else
597 static DWORD WINAPI
process_alarm(LPVOID param)598 process_alarm(LPVOID param)
599 {
600 /* WIN32 doesn't support alarm, so we create a thread and sleep here */
601 Sleep(secs_per_test * 1000);
602 alarm_triggered = true;
603 ExitThread(0);
604 }
605 #endif
606