1 /* 2 * pg_test_fsync.c 3 * tests all supported fsync() methods 4 */ 5 6 #include "postgres_fe.h" 7 8 #include <sys/stat.h> 9 #include <sys/time.h> 10 #include <fcntl.h> 11 #include <time.h> 12 #include <unistd.h> 13 #include <signal.h> 14 15 #include "getopt_long.h" 16 #include "access/xlogdefs.h" 17 #include "common/logging.h" 18 19 20 /* 21 * put the temp files in the local directory 22 * unless the user specifies otherwise 23 */ 24 #define FSYNC_FILENAME "./pg_test_fsync.out" 25 26 #define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024) MipsTestBase()27 28 #define LABEL_FORMAT " %-30s" 29 #define NA_FORMAT "%21s\n" 30 /* translator: maintain alignment with NA_FORMAT */ 31 #define OPS_FORMAT gettext_noop("%13.3f ops/sec %6.0f usecs/op\n") 32 #define USECS_SEC 1000000 33 34 /* These are macros to avoid timing the function call overhead. */ 35 #ifndef WIN32 36 #define START_TIMER \ 37 do { \ 38 alarm_triggered = false; \ 39 alarm(secs_per_test); \ 40 gettimeofday(&start_t, NULL); \ 41 } while (0) 42 #else 43 /* WIN32 doesn't support alarm, so we create a thread and sleep there */ 44 #define START_TIMER \ 45 do { \ 46 alarm_triggered = false; \ 47 if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \ 48 INVALID_HANDLE_VALUE) \ 49 { \ 50 pg_log_error("could not create thread for alarm"); \ 51 exit(1); \ 52 } \ 53 gettimeofday(&start_t, NULL); \ 54 } while (0) 55 #endif 56 57 #define STOP_TIMER \ 58 do { \ 59 gettimeofday(&stop_t, NULL); \ 60 print_elapse(start_t, stop_t, ops); \ 61 } while (0) 62 63 64 static const char *progname; 65 66 static int secs_per_test = 5; 67 static int needs_unlink = 0; 68 static char full_buf[DEFAULT_XLOG_SEG_SIZE], 69 *buf, 70 *filename = FSYNC_FILENAME; 71 static struct timeval start_t, 72 stop_t; 73 static bool alarm_triggered = false; 74 75 76 static void handle_args(int argc, char *argv[]); 77 static void prepare_buf(void); 78 static void test_open(void); 79 static void test_non_sync(void); 80 static void test_sync(int writes_per_op); 81 static void test_open_syncs(void); 82 static void test_open_sync(const char *msg, int writes_size); 83 static void test_file_descriptor_sync(void); 84 85 #ifndef WIN32 86 static void process_alarm(int sig); 87 #else 88 static DWORD WINAPI process_alarm(LPVOID param); 89 #endif 90 static void signal_cleanup(int sig); 91 92 #ifdef HAVE_FSYNC_WRITETHROUGH 93 static int pg_fsync_writethrough(int fd); 94 #endif 95 static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops); 96 97 #define die(msg) do { pg_log_error("%s: %m", _(msg)); exit(1); } while(0) 98 99 100 int 101 main(int argc, char *argv[]) 102 { 103 pg_logging_init(argv[0]); 104 set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_fsync")); 105 progname = get_progname(argv[0]); 106 107 handle_args(argc, argv); 108 109 /* Prevent leaving behind the test file */ 110 pqsignal(SIGINT, signal_cleanup); 111 pqsignal(SIGTERM, signal_cleanup); 112 #ifndef WIN32 113 pqsignal(SIGALRM, process_alarm); 114 #endif 115 #ifdef SIGHUP 116 /* Not defined on win32 */ 117 pqsignal(SIGHUP, signal_cleanup); 118 #endif 119 120 prepare_buf(); 121 122 test_open(); 123 124 /* Test using 1 XLOG_BLCKSZ write */ 125 test_sync(1); 126 127 /* Test using 2 XLOG_BLCKSZ writes */ 128 test_sync(2); 129 130 test_open_syncs(); 131 132 test_file_descriptor_sync(); 133 134 test_non_sync(); 135 136 unlink(filename); 137 138 return 0; 139 } 140 141 static void 142 handle_args(int argc, char *argv[]) 143 { 144 static struct option long_options[] = { 145 {"filename", required_argument, NULL, 'f'}, 146 {"secs-per-test", required_argument, NULL, 's'}, 147 {NULL, 0, NULL, 0} 148 }; 149 150 int option; /* Command line option */ 151 int optindex = 0; /* used by getopt_long */ 152 153 if (argc > 1) 154 { 155 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) 156 { 157 printf(_("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n"), progname); 158 exit(0); 159 } 160 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) 161 { 162 puts("pg_test_fsync (PostgreSQL) " PG_VERSION); 163 exit(0); 164 } 165 } 166 167 while ((option = getopt_long(argc, argv, "f:s:", 168 long_options, &optindex)) != -1) 169 { 170 switch (option) 171 { 172 case 'f': 173 filename = pg_strdup(optarg); 174 break; 175 176 case 's': 177 secs_per_test = atoi(optarg); 178 break; 179 180 default: 181 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), 182 progname); 183 exit(1); 184 break; 185 } 186 } 187 188 if (argc > optind) 189 { 190 pg_log_error("too many command-line arguments (first is \"%s\")", 191 argv[optind]); 192 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), 193 progname); 194 exit(1); 195 } 196 197 printf(ngettext("%d second per test\n", 198 "%d seconds per test\n", 199 secs_per_test), 200 secs_per_test); 201 #if PG_O_DIRECT != 0 202 printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n")); 203 #else 204 printf(_("Direct I/O is not supported on this platform.\n")); 205 #endif 206 } 207 208 static void 209 prepare_buf(void) 210 { 211 int ops; 212 213 /* write random data into buffer */ 214 for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++) 215 full_buf[ops] = random(); 216 217 buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf); 218 } 219 220 static void 221 test_open(void) 222 { 223 int tmpfile; 224 225 /* 226 * test if we can open the target file 227 */ 228 if ((tmpfile = open(filename, O_RDWR | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR)) == -1) 229 die("could not open output file"); 230 needs_unlink = 1; 231 if (write(tmpfile, full_buf, DEFAULT_XLOG_SEG_SIZE) != 232 DEFAULT_XLOG_SEG_SIZE) 233 die("write failed"); 234 235 /* fsync now so that dirty buffers don't skew later tests */ 236 if (fsync(tmpfile) != 0) 237 die("fsync failed"); 238 239 close(tmpfile); 240 } 241 242 static void 243 test_sync(int writes_per_op) 244 { 245 int tmpfile, 246 ops, 247 writes; 248 bool fs_warning = false; 249 250 if (writes_per_op == 1) 251 printf(_("\nCompare file sync methods using one %dkB write:\n"), XLOG_BLCKSZ_K); 252 else 253 printf(_("\nCompare file sync methods using two %dkB writes:\n"), XLOG_BLCKSZ_K); 254 printf(_("(in wal_sync_method preference order, except fdatasync is Linux's default)\n")); 255 256 /* 257 * Test open_datasync if available 258 */ 259 printf(LABEL_FORMAT, "open_datasync"); 260 fflush(stdout); 261 262 #ifdef OPEN_DATASYNC_FLAG 263 if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT | PG_BINARY, 0)) == -1) 264 { 265 printf(NA_FORMAT, _("n/a*")); 266 fs_warning = true; 267 } 268 else 269 { 270 START_TIMER; 271 for (ops = 0; alarm_triggered == false; ops++) 272 { 273 for (writes = 0; writes < writes_per_op; writes++) 274 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) 275 die("write failed"); 276 if (lseek(tmpfile, 0, SEEK_SET) == -1) 277 die("seek failed"); 278 } 279 STOP_TIMER; 280 close(tmpfile); 281 } 282 #else 283 printf(NA_FORMAT, _("n/a")); 284 #endif 285 286 /* 287 * Test fdatasync if available 288 */ 289 printf(LABEL_FORMAT, "fdatasync"); 290 fflush(stdout); 291 292 #ifdef HAVE_FDATASYNC 293 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1) 294 die("could not open output file"); 295 START_TIMER; 296 for (ops = 0; alarm_triggered == false; ops++) 297 { 298 for (writes = 0; writes < writes_per_op; writes++) 299 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) 300 die("write failed"); 301 fdatasync(tmpfile); 302 if (lseek(tmpfile, 0, SEEK_SET) == -1) 303 die("seek failed"); 304 } 305 STOP_TIMER; 306 close(tmpfile); 307 #else 308 printf(NA_FORMAT, _("n/a")); 309 #endif 310 311 /* 312 * Test fsync 313 */ 314 printf(LABEL_FORMAT, "fsync"); 315 fflush(stdout); 316 317 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1) 318 die("could not open output file"); 319 START_TIMER; 320 for (ops = 0; alarm_triggered == false; ops++) 321 { 322 for (writes = 0; writes < writes_per_op; writes++) 323 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) 324 die("write failed"); 325 if (fsync(tmpfile) != 0) 326 die("fsync failed"); 327 if (lseek(tmpfile, 0, SEEK_SET) == -1) 328 die("seek failed"); 329 } 330 STOP_TIMER; 331 close(tmpfile); 332 333 /* 334 * If fsync_writethrough is available, test as well 335 */ 336 printf(LABEL_FORMAT, "fsync_writethrough"); 337 fflush(stdout); 338 339 #ifdef HAVE_FSYNC_WRITETHROUGH 340 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1) 341 die("could not open output file"); 342 START_TIMER; 343 for (ops = 0; alarm_triggered == false; ops++) 344 { 345 for (writes = 0; writes < writes_per_op; writes++) 346 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) 347 die("write failed"); 348 if (pg_fsync_writethrough(tmpfile) != 0) 349 die("fsync failed"); 350 if (lseek(tmpfile, 0, SEEK_SET) == -1) 351 die("seek failed"); 352 } 353 STOP_TIMER; 354 close(tmpfile); 355 #else 356 printf(NA_FORMAT, _("n/a")); 357 #endif 358 359 /* 360 * Test open_sync if available 361 */ 362 printf(LABEL_FORMAT, "open_sync"); 363 fflush(stdout); 364 365 #ifdef OPEN_SYNC_FLAG 366 if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT | PG_BINARY, 0)) == -1) 367 { 368 printf(NA_FORMAT, _("n/a*")); 369 fs_warning = true; 370 } 371 else 372 { 373 START_TIMER; 374 for (ops = 0; alarm_triggered == false; ops++) 375 { 376 for (writes = 0; writes < writes_per_op; writes++) 377 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) 378 379 /* 380 * This can generate write failures if the filesystem has 381 * a large block size, e.g. 4k, and there is no support 382 * for O_DIRECT writes smaller than the file system block 383 * size, e.g. XFS. 384 */ 385 die("write failed"); 386 if (lseek(tmpfile, 0, SEEK_SET) == -1) 387 die("seek failed"); 388 } 389 STOP_TIMER; 390 close(tmpfile); 391 } 392 #else 393 printf(NA_FORMAT, _("n/a")); 394 #endif 395 396 if (fs_warning) 397 { 398 printf(_("* This file system and its mount options do not support direct\n" 399 " I/O, e.g. ext4 in journaled mode.\n")); 400 } 401 } 402 403 static void 404 test_open_syncs(void) 405 { 406 printf(_("\nCompare open_sync with different write sizes:\n")); 407 printf(_("(This is designed to compare the cost of writing 16kB in different write\n" 408 "open_sync sizes.)\n")); 409 410 test_open_sync(_(" 1 * 16kB open_sync write"), 16); 411 test_open_sync(_(" 2 * 8kB open_sync writes"), 8); 412 test_open_sync(_(" 4 * 4kB open_sync writes"), 4); 413 test_open_sync(_(" 8 * 2kB open_sync writes"), 2); 414 test_open_sync(_("16 * 1kB open_sync writes"), 1); 415 } 416 417 /* 418 * Test open_sync with different size files 419 */ 420 static void 421 test_open_sync(const char *msg, int writes_size) 422 { 423 #ifdef OPEN_SYNC_FLAG 424 int tmpfile, 425 ops, 426 writes; 427 #endif 428 429 printf(LABEL_FORMAT, msg); 430 fflush(stdout); 431 432 #ifdef OPEN_SYNC_FLAG 433 if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT | PG_BINARY, 0)) == -1) 434 printf(NA_FORMAT, _("n/a*")); 435 else 436 { 437 START_TIMER; 438 for (ops = 0; alarm_triggered == false; ops++) 439 { 440 for (writes = 0; writes < 16 / writes_size; writes++) 441 if (write(tmpfile, buf, writes_size * 1024) != 442 writes_size * 1024) 443 die("write failed"); 444 if (lseek(tmpfile, 0, SEEK_SET) == -1) 445 die("seek failed"); 446 } 447 STOP_TIMER; 448 close(tmpfile); 449 } 450 #else 451 printf(NA_FORMAT, _("n/a")); 452 #endif 453 } 454 455 static void 456 test_file_descriptor_sync(void) 457 { 458 int tmpfile, 459 ops; 460 461 /* 462 * Test whether fsync can sync data written on a different descriptor for 463 * the same file. This checks the efficiency of multi-process fsyncs 464 * against the same file. Possibly this should be done with writethrough 465 * on platforms which support it. 466 */ 467 printf(_("\nTest if fsync on non-write file descriptor is honored:\n")); 468 printf(_("(If the times are similar, fsync() can sync data written on a different\n" 469 "descriptor.)\n")); 470 471 /* 472 * first write, fsync and close, which is the normal behavior without 473 * multiple descriptors 474 */ 475 printf(LABEL_FORMAT, "write, fsync, close"); 476 fflush(stdout); 477 478 START_TIMER; 479 for (ops = 0; alarm_triggered == false; ops++) 480 { 481 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1) 482 die("could not open output file"); 483 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) 484 die("write failed"); 485 if (fsync(tmpfile) != 0) 486 die("fsync failed"); 487 close(tmpfile); 488 489 /* 490 * open and close the file again to be consistent with the following 491 * test 492 */ 493 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1) 494 die("could not open output file"); 495 close(tmpfile); 496 } 497 STOP_TIMER; 498 499 /* 500 * Now open, write, close, open again and fsync This simulates processes 501 * fsyncing each other's writes. 502 */ 503 printf(LABEL_FORMAT, "write, close, fsync"); 504 fflush(stdout); 505 506 START_TIMER; 507 for (ops = 0; alarm_triggered == false; ops++) 508 { 509 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1) 510 die("could not open output file"); 511 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) 512 die("write failed"); 513 close(tmpfile); 514 /* reopen file */ 515 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1) 516 die("could not open output file"); 517 if (fsync(tmpfile) != 0) 518 die("fsync failed"); 519 close(tmpfile); 520 } 521 STOP_TIMER; 522 } 523 524 static void 525 test_non_sync(void) 526 { 527 int tmpfile, 528 ops; 529 530 /* 531 * Test a simple write without fsync 532 */ 533 printf(_("\nNon-sync'ed %dkB writes:\n"), XLOG_BLCKSZ_K); 534 printf(LABEL_FORMAT, "write"); 535 fflush(stdout); 536 537 START_TIMER; 538 for (ops = 0; alarm_triggered == false; ops++) 539 { 540 if ((tmpfile = open(filename, O_RDWR | PG_BINARY, 0)) == -1) 541 die("could not open output file"); 542 if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) 543 die("write failed"); 544 close(tmpfile); 545 } 546 STOP_TIMER; 547 } 548 549 static void 550 signal_cleanup(int signum) 551 { 552 /* Delete the file if it exists. Ignore errors */ 553 if (needs_unlink) 554 unlink(filename); 555 /* Finish incomplete line on stdout */ 556 puts(""); 557 exit(signum); 558 } 559 560 #ifdef HAVE_FSYNC_WRITETHROUGH 561 562 static int 563 pg_fsync_writethrough(int fd) 564 { 565 #ifdef WIN32 566 return _commit(fd); 567 #elif defined(F_FULLFSYNC) 568 return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0; 569 #else 570 errno = ENOSYS; 571 return -1; 572 #endif 573 } 574 #endif 575 576 /* 577 * print out the writes per second for tests 578 */ 579 static void 580 print_elapse(struct timeval start_t, struct timeval stop_t, int ops) 581 { 582 double total_time = (stop_t.tv_sec - start_t.tv_sec) + 583 (stop_t.tv_usec - start_t.tv_usec) * 0.000001; 584 double per_second = ops / total_time; 585 double avg_op_time_us = (total_time / ops) * USECS_SEC; 586 587 printf(_(OPS_FORMAT), per_second, avg_op_time_us); 588 } 589 590 #ifndef WIN32 591 static void 592 process_alarm(int sig) 593 { 594 alarm_triggered = true; 595 } 596 #else 597 static DWORD WINAPI 598 process_alarm(LPVOID param) 599 { 600 /* WIN32 doesn't support alarm, so we create a thread and sleep here */ 601 Sleep(secs_per_test * 1000); 602 alarm_triggered = true; 603 ExitThread(0); 604 } 605 #endif 606