/* * pg_test_fsync.c * tests all supported fsync() methods */ #include "postgres_fe.h" #include #include #include #include #include #include "getopt_long.h" #include "access/xlogdefs.h" /* * put the temp files in the local directory * unless the user specifies otherwise */ #define FSYNC_FILENAME "./pg_test_fsync.out" #define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024) #define LABEL_FORMAT " %-32s" #define NA_FORMAT "%18s" #define OPS_FORMAT "%9.3f ops/sec" /* These are macros to avoid timing the function call overhead. */ #ifndef WIN32 #define START_TIMER \ do { \ alarm_triggered = false; \ alarm(secs_per_test); \ gettimeofday(&start_t, NULL); \ } while (0) #else /* WIN32 doesn't support alarm, so we create a thread and sleep there */ #define START_TIMER \ do { \ alarm_triggered = false; \ if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \ INVALID_HANDLE_VALUE) \ { \ fprintf(stderr, "Cannot create thread for alarm\n"); \ exit(1); \ } \ gettimeofday(&start_t, NULL); \ } while (0) #endif #define STOP_TIMER \ do { \ gettimeofday(&stop_t, NULL); \ print_elapse(start_t, stop_t, ops); \ } while (0) static const char *progname; static int secs_per_test = 2; static int needs_unlink = 0; static char full_buf[XLOG_SEG_SIZE], *buf, *filename = FSYNC_FILENAME; static struct timeval start_t, stop_t; static bool alarm_triggered = false; static void handle_args(int argc, char *argv[]); static void prepare_buf(void); static void test_open(void); static void test_non_sync(void); static void test_sync(int writes_per_op); static void test_open_syncs(void); static void test_open_sync(const char *msg, int writes_size); static void test_file_descriptor_sync(void); #ifndef WIN32 static void process_alarm(int sig); #else static DWORD WINAPI process_alarm(LPVOID param); #endif static void signal_cleanup(int sig); #ifdef HAVE_FSYNC_WRITETHROUGH static int pg_fsync_writethrough(int fd); #endif static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops); static void die(const char *str); int main(int argc, char *argv[]) { progname = get_progname(argv[0]); handle_args(argc, argv); /* Prevent leaving behind the test file */ signal(SIGINT, signal_cleanup); signal(SIGTERM, signal_cleanup); #ifndef WIN32 signal(SIGALRM, process_alarm); #endif #ifdef SIGHUP /* Not defined on win32 */ signal(SIGHUP, signal_cleanup); #endif prepare_buf(); test_open(); /* Test using 1 XLOG_BLCKSZ write */ test_sync(1); /* Test using 2 XLOG_BLCKSZ writes */ test_sync(2); test_open_syncs(); test_file_descriptor_sync(); test_non_sync(); unlink(filename); return 0; } static void handle_args(int argc, char *argv[]) { static struct option long_options[] = { {"filename", required_argument, NULL, 'f'}, {"secs-per-test", required_argument, NULL, 's'}, {NULL, 0, NULL, 0} }; int option; /* Command line option */ int optindex = 0; /* used by getopt_long */ if (argc > 1) { if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "-?") == 0) { printf("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n", progname); exit(0); } if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) { puts("pg_test_fsync (PostgreSQL) " PG_VERSION); exit(0); } } while ((option = getopt_long(argc, argv, "f:s:", long_options, &optindex)) != -1) { switch (option) { case 'f': filename = strdup(optarg); break; case 's': secs_per_test = atoi(optarg); break; default: fprintf(stderr, "Try \"%s --help\" for more information.\n", progname); exit(1); break; } } if (argc > optind) { fprintf(stderr, "%s: too many command-line arguments (first is \"%s\")\n", progname, argv[optind]); fprintf(stderr, "Try \"%s --help\" for more information.\n", progname); exit(1); } printf("%d seconds per test\n", secs_per_test); #if PG_O_DIRECT != 0 printf("O_DIRECT supported on this platform for open_datasync and open_sync.\n"); #else printf("Direct I/O is not supported on this platform.\n"); #endif } static void prepare_buf(void) { int ops; /* write random data into buffer */ for (ops = 0; ops < XLOG_SEG_SIZE; ops++) full_buf[ops] = random(); buf = (char *) TYPEALIGN(ALIGNOF_XLOG_BUFFER, full_buf); } static void test_open(void) { int tmpfile; /* * test if we can open the target file */ if ((tmpfile = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR)) == -1) die("could not open output file"); needs_unlink = 1; if (write(tmpfile, full_buf, XLOG_SEG_SIZE) != XLOG_SEG_SIZE) die("write failed"); /* fsync now so that dirty buffers don't skew later tests */ if (fsync(tmpfile) != 0) die("fsync failed"); close(tmpfile); } static void test_sync(int writes_per_op) { int tmpfile, ops, writes; bool fs_warning = false; if (writes_per_op == 1) printf("\nCompare file sync methods using one %dkB write:\n", XLOG_BLCKSZ_K); else printf("\nCompare file sync methods using two %dkB writes:\n", XLOG_BLCKSZ_K); printf("(in wal_sync_method preference order, except fdatasync\n"); printf("is Linux's default)\n"); /* * Test open_datasync if available */ printf(LABEL_FORMAT, "open_datasync"); fflush(stdout); #ifdef OPEN_DATASYNC_FLAG if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT, 0)) == -1) { printf(NA_FORMAT, "n/a*\n"); fs_warning = true; } else { if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT, 0)) == -1) die("could not open output file"); START_TIMER; for (ops = 0; alarm_triggered == false; ops++) { for (writes = 0; writes < writes_per_op; writes++) if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) die("write failed"); if (lseek(tmpfile, 0, SEEK_SET) == -1) die("seek failed"); } STOP_TIMER; close(tmpfile); } #else printf(NA_FORMAT, "n/a\n"); #endif /* * Test fdatasync if available */ printf(LABEL_FORMAT, "fdatasync"); fflush(stdout); #ifdef HAVE_FDATASYNC if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("could not open output file"); START_TIMER; for (ops = 0; alarm_triggered == false; ops++) { for (writes = 0; writes < writes_per_op; writes++) if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) die("write failed"); fdatasync(tmpfile); if (lseek(tmpfile, 0, SEEK_SET) == -1) die("seek failed"); } STOP_TIMER; close(tmpfile); #else printf(NA_FORMAT, "n/a\n"); #endif /* * Test fsync */ printf(LABEL_FORMAT, "fsync"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("could not open output file"); START_TIMER; for (ops = 0; alarm_triggered == false; ops++) { for (writes = 0; writes < writes_per_op; writes++) if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) die("write failed"); if (fsync(tmpfile) != 0) die("fsync failed"); if (lseek(tmpfile, 0, SEEK_SET) == -1) die("seek failed"); } STOP_TIMER; close(tmpfile); /* * If fsync_writethrough is available, test as well */ printf(LABEL_FORMAT, "fsync_writethrough"); fflush(stdout); #ifdef HAVE_FSYNC_WRITETHROUGH if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("could not open output file"); START_TIMER; for (ops = 0; alarm_triggered == false; ops++) { for (writes = 0; writes < writes_per_op; writes++) if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) die("write failed"); if (pg_fsync_writethrough(tmpfile) != 0) die("fsync failed"); if (lseek(tmpfile, 0, SEEK_SET) == -1) die("seek failed"); } STOP_TIMER; close(tmpfile); #else printf(NA_FORMAT, "n/a\n"); #endif /* * Test open_sync if available */ printf(LABEL_FORMAT, "open_sync"); fflush(stdout); #ifdef OPEN_SYNC_FLAG if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1) { printf(NA_FORMAT, "n/a*\n"); fs_warning = true; } else { START_TIMER; for (ops = 0; alarm_triggered == false; ops++) { for (writes = 0; writes < writes_per_op; writes++) if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) die("write failed"); if (lseek(tmpfile, 0, SEEK_SET) == -1) die("seek failed"); } STOP_TIMER; close(tmpfile); } #else printf(NA_FORMAT, "n/a\n"); #endif if (fs_warning) { printf("* This file system and its mount options do not support direct\n"); printf("I/O, e.g. ext4 in journaled mode.\n"); } } static void test_open_syncs(void) { printf("\nCompare open_sync with different write sizes:\n"); printf("(This is designed to compare the cost of writing 16kB\n"); printf("in different write open_sync sizes.)\n"); test_open_sync(" 1 * 16kB open_sync write", 16); test_open_sync(" 2 * 8kB open_sync writes", 8); test_open_sync(" 4 * 4kB open_sync writes", 4); test_open_sync(" 8 * 2kB open_sync writes", 2); test_open_sync("16 * 1kB open_sync writes", 1); } /* * Test open_sync with different size files */ static void test_open_sync(const char *msg, int writes_size) { #ifdef OPEN_SYNC_FLAG int tmpfile, ops, writes; #endif printf(LABEL_FORMAT, msg); fflush(stdout); #ifdef OPEN_SYNC_FLAG if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1) printf(NA_FORMAT, "n/a*\n"); else { START_TIMER; for (ops = 0; alarm_triggered == false; ops++) { for (writes = 0; writes < 16 / writes_size; writes++) if (write(tmpfile, buf, writes_size * 1024) != writes_size * 1024) die("write failed"); if (lseek(tmpfile, 0, SEEK_SET) == -1) die("seek failed"); } STOP_TIMER; close(tmpfile); } #else printf(NA_FORMAT, "n/a\n"); #endif } static void test_file_descriptor_sync(void) { int tmpfile, ops; /* * Test whether fsync can sync data written on a different descriptor for * the same file. This checks the efficiency of multi-process fsyncs * against the same file. Possibly this should be done with writethrough * on platforms which support it. */ printf("\nTest if fsync on non-write file descriptor is honored:\n"); printf("(If the times are similar, fsync() can sync data written\n"); printf("on a different descriptor.)\n"); /* * first write, fsync and close, which is the normal behavior without * multiple descriptors */ printf(LABEL_FORMAT, "write, fsync, close"); fflush(stdout); START_TIMER; for (ops = 0; alarm_triggered == false; ops++) { if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("could not open output file"); if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) die("write failed"); if (fsync(tmpfile) != 0) die("fsync failed"); close(tmpfile); /* * open and close the file again to be consistent with the following * test */ if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("could not open output file"); close(tmpfile); } STOP_TIMER; /* * Now open, write, close, open again and fsync This simulates processes * fsyncing each other's writes. */ printf(LABEL_FORMAT, "write, close, fsync"); fflush(stdout); START_TIMER; for (ops = 0; alarm_triggered == false; ops++) { if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("could not open output file"); if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) die("write failed"); close(tmpfile); /* reopen file */ if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("could not open output file"); if (fsync(tmpfile) != 0) die("fsync failed"); close(tmpfile); } STOP_TIMER; } static void test_non_sync(void) { int tmpfile, ops; /* * Test a simple write without fsync */ printf("\nNon-Sync'ed %dkB writes:\n", XLOG_BLCKSZ_K); printf(LABEL_FORMAT, "write"); fflush(stdout); START_TIMER; for (ops = 0; alarm_triggered == false; ops++) { if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("could not open output file"); if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) die("write failed"); close(tmpfile); } STOP_TIMER; } static void signal_cleanup(int signum) { /* Delete the file if it exists. Ignore errors */ if (needs_unlink) unlink(filename); /* Finish incomplete line on stdout */ puts(""); exit(signum); } #ifdef HAVE_FSYNC_WRITETHROUGH static int pg_fsync_writethrough(int fd) { #ifdef WIN32 return _commit(fd); #elif defined(F_FULLFSYNC) return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0; #else errno = ENOSYS; return -1; #endif } #endif /* * print out the writes per second for tests */ static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops) { double total_time = (stop_t.tv_sec - start_t.tv_sec) + (stop_t.tv_usec - start_t.tv_usec) * 0.000001; double per_second = ops / total_time; printf(OPS_FORMAT "\n", per_second); } #ifndef WIN32 static void process_alarm(int sig) { alarm_triggered = true; } #else static DWORD WINAPI process_alarm(LPVOID param) { /* WIN32 doesn't support alarm, so we create a thread and sleep here */ Sleep(secs_per_test * 1000); alarm_triggered = true; ExitThread(0); } #endif static void die(const char *str) { fprintf(stderr, "%s: %s\n", str, strerror(errno)); exit(1); }