From 677b06ca462ec6fd98da9369a2eae6085c9d7fed Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Sat, 15 Jan 2011 11:54:43 -0500 Subject: [PATCH] Apply patch for test_fsync to add tests for O_DIRECT. Adjusted patch by Josh Berkus --- src/tools/fsync/Makefile | 4 +- src/tools/fsync/README | 21 ++- src/tools/fsync/test_fsync.c | 256 +++++++++++++++++++++++++++++++---- 3 files changed, 249 insertions(+), 32 deletions(-) diff --git a/src/tools/fsync/Makefile b/src/tools/fsync/Makefile index fe3e626223..44419eef49 100644 --- a/src/tools/fsync/Makefile +++ b/src/tools/fsync/Makefile @@ -16,9 +16,9 @@ override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) OBJS= test_fsync.o -all: test_fsync +all: submake-libpq submake-libpgport test_fsync -test_fsync: test_fsync.o | submake-libpq submake-libpgport +test_fsync: test_fsync.o $(libpq_builddir)/libpq.a $(CC) $(CFLAGS) test_fsync.o $(libpq_pgport) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X) clean distclean maintainer-clean: diff --git a/src/tools/fsync/README b/src/tools/fsync/README index 6d9acd333f..a1c2ae4594 100644 --- a/src/tools/fsync/README +++ b/src/tools/fsync/README @@ -1,11 +1,20 @@ -src/tools/fsync/README - -fsync -===== +test_fsync +========== This program tests fsync. The tests are described as part of the program output. Usage: test_fsync [-f filename] [loops] + +test_fsync is intended to give you a reasonable idea of what the fastest +fsync_method is on your specific system, as well as supplying diagnostic +information in the event of an identified I/O problem. However, +differences shown by test_fsync might not make any difference in real +database throughput, especially since many database servers are not +speed-limited by their transaction logs. + +The output filename defaults to test_fsync.out in the current directory. +test_fsync should be run in the same filesystem as your transaction log +directory (pg_xlog). + +Loops default to 2000. Increase this to get more accurate measurements. -Loops defaults to 5000. The default output file is /var/tmp/test_fsync.out. -Consider that /tmp or /var/tmp might be memory-based file systems. diff --git a/src/tools/fsync/test_fsync.c b/src/tools/fsync/test_fsync.c index 28c211929e..831e2a008f 100644 --- a/src/tools/fsync/test_fsync.c +++ b/src/tools/fsync/test_fsync.c @@ -3,7 +3,7 @@ * * * test_fsync.c - * test various fsync() methods + * tests all supported fsync() methods */ #include "postgres.h" @@ -22,19 +22,18 @@ #include #include - -#ifdef WIN32 +/* + * put the temp files in the local directory + * unless the user specifies otherwise + */ #define FSYNC_FILENAME "./test_fsync.out" -#else -/* /tmp might be a memory file system */ -#define FSYNC_FILENAME "/var/tmp/test_fsync.out" -#endif #define WRITE_SIZE (8 * 1024) /* 8k */ #define LABEL_FORMAT "\t%-30s" -int loops = 10000; + +int loops = 2000; void die(char *str); void print_elapse(struct timeval start_t, struct timeval stop_t); @@ -42,14 +41,14 @@ void print_elapse(struct timeval start_t, struct timeval stop_t); int main(int argc, char *argv[]) { - struct timeval start_t; - struct timeval stop_t; - int tmpfile, - i; + struct timeval start_t, stop_t; + int tmpfile, i; char *full_buf = (char *) malloc(XLOG_SEG_SIZE), - *buf; - char *filename = FSYNC_FILENAME; + *buf, *filename = FSYNC_FILENAME; + /* + * arguments: loops and filename (optional) + */ if (argc > 2 && strcmp(argv[1], "-f") == 0) { filename = argv[2]; @@ -57,17 +56,22 @@ main(int argc, char *argv[]) argc -= 2; } - if (argc > 1) + if (argc > 1) loops = atoi(argv[1]); for (i = 0; i < XLOG_SEG_SIZE; i++) full_buf[i] = random(); + /* + * test if we can open the target file + */ if ((tmpfile = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR)) == -1) die("Cannot open output file."); if (write(tmpfile, full_buf, XLOG_SEG_SIZE) != XLOG_SEG_SIZE) die("write failed"); - /* fsync now so later fsync's don't have to do it */ + /* + * fsync now so that dirty buffers don't skew later tests + */ if (fsync(tmpfile) != 0) die("fsync failed"); close(tmpfile); @@ -77,7 +81,7 @@ main(int argc, char *argv[]) printf("Loops = %d\n\n", loops); /* - * Simple write + * Test a simple write without fsync */ printf("Simple write:\n"); printf(LABEL_FORMAT, "8k write"); @@ -95,10 +99,13 @@ main(int argc, char *argv[]) print_elapse(start_t, stop_t); /* - * Compare file sync methods with one 8k write + * Test all fsync methods using single 8k writes */ printf("\nCompare file sync methods using one write:\n"); + /* + * Test open_datasync if available + */ #ifdef OPEN_DATASYNC_FLAG printf(LABEL_FORMAT, "open_datasync 8k write"); fflush(stdout); @@ -115,10 +122,40 @@ main(int argc, char *argv[]) gettimeofday(&stop_t, NULL); close(tmpfile); print_elapse(start_t, stop_t); + + /* + * If O_DIRECT is enabled, test that with open_datasync + */ +#if PG_O_DIRECT != 0 + fflush(stdout); + if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT, 0)) == -1) + printf("\t(unavailable: o_direct on this filesystem)\n"); + else + { + printf(LABEL_FORMAT, "open_datasync 8k direct I/O write"); + gettimeofday(&start_t, NULL); + for (i = 0; i < loops; i++) + { + if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) + die("write failed"); + if (lseek(tmpfile, 0, SEEK_SET) == -1) + die("seek failed"); + } + gettimeofday(&stop_t, NULL); + close(tmpfile); + print_elapse(start_t, stop_t); + } +#else + printf("\t(unavailable: o_direct)\n"); +#endif + #else printf("\t(unavailable: open_datasync)\n"); #endif +/* + * Test open_sync if available + */ #ifdef OPEN_SYNC_FLAG printf(LABEL_FORMAT, "open_sync 8k write"); fflush(stdout); @@ -135,10 +172,40 @@ main(int argc, char *argv[]) gettimeofday(&stop_t, NULL); close(tmpfile); print_elapse(start_t, stop_t); + + /* + * If O_DIRECT is enabled, test that with open_sync + */ +#if PG_O_DIRECT != 0 + printf(LABEL_FORMAT, "open_sync 8k direct I/O write"); + fflush(stdout); + if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1) + printf("\t(unavailable: o_direct on this filesystem)\n"); + else + { + gettimeofday(&start_t, NULL); + for (i = 0; i < loops; i++) + { + if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) + die("write failed"); + if (lseek(tmpfile, 0, SEEK_SET) == -1) + die("seek failed"); + } + gettimeofday(&stop_t, NULL); + close(tmpfile); + print_elapse(start_t, stop_t); + } +#else + printf("\t(unavailable: o_direct)\n"); +#endif + #else printf("\t(unavailable: open_sync)\n"); #endif +/* + * Test fdatasync if available + */ #ifdef HAVE_FDATASYNC printf(LABEL_FORMAT, "8k write, fdatasync"); fflush(stdout); @@ -160,6 +227,9 @@ main(int argc, char *argv[]) printf("\t(unavailable: fdatasync)\n"); #endif +/* + * Test fsync + */ printf(LABEL_FORMAT, "8k write, fsync"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR, 0)) == -1) @@ -177,14 +247,43 @@ main(int argc, char *argv[]) gettimeofday(&stop_t, NULL); close(tmpfile); print_elapse(start_t, stop_t); + +/* + * If fsync_writethrough is available, test as well + */ +#ifdef HAVE_FSYNC_WRITETHROUGH + printf(LABEL_FORMAT, "8k write, fsync_writethrough"); + fflush(stdout); + if ((tmpfile = open(filename, O_RDWR, 0)) == -1) + die("Cannot open output file."); + gettimeofday(&start_t, NULL); + for (i = 0; i < loops; i++) + { + if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) + die("write failed"); + if (fcntl(tmpfile, F_FULLFSYNC ) != 0) + die("fsync failed"); + if (lseek(tmpfile, 0, SEEK_SET) == -1) + die("seek failed"); + } + gettimeofday(&stop_t, NULL); + close(tmpfile); + print_elapse(start_t, stop_t); +#else + printf("\t(unavailable: fsync_writethrough)\n"); +#endif /* - * Compare file sync methods with two 8k write + * Compare some of the file sync methods with + * two 8k writes to see if timing is different */ printf("\nCompare file sync methods using two writes:\n"); +/* + * Test open_datasync with and without o_direct + */ #ifdef OPEN_DATASYNC_FLAG - printf(LABEL_FORMAT, "2 open_datasync 8k writes"); + printf(LABEL_FORMAT, "2 open_datasync 8k writes"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR | O_DSYNC, 0)) == -1) die("Cannot open output file."); @@ -201,10 +300,36 @@ main(int argc, char *argv[]) gettimeofday(&stop_t, NULL); close(tmpfile); print_elapse(start_t, stop_t); + +#if PG_O_DIRECT != 0 + printf(LABEL_FORMAT, "2 open_datasync direct I/O 8k writes"); + fflush(stdout); + if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT, 0)) == -1) + die("Cannot open output file."); + gettimeofday(&start_t, NULL); + for (i = 0; i < loops; i++) + { + if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) + die("write failed"); + if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) + die("write failed"); + if (lseek(tmpfile, 0, SEEK_SET) == -1) + die("seek failed"); + } + gettimeofday(&stop_t, NULL); + close(tmpfile); + print_elapse(start_t, stop_t); +#else + printf("\t(unavailable: o_direct)\n"); +#endif + #else printf("\t(unavailable: open_datasync)\n"); #endif +/* + * Test open_sync with and without o_direct + */ #ifdef OPEN_SYNC_FLAG printf(LABEL_FORMAT, "2 open_sync 8k writes"); fflush(stdout); @@ -223,8 +348,36 @@ main(int argc, char *argv[]) gettimeofday(&stop_t, NULL); close(tmpfile); print_elapse(start_t, stop_t); + +#if PG_O_DIRECT != 0 + printf(LABEL_FORMAT, "2 open_sync direct I/O 8k writes"); + fflush(stdout); + if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1) + die("Cannot open output file."); + gettimeofday(&start_t, NULL); + for (i = 0; i < loops; i++) + { + if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) + die("write failed"); + if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) + die("write failed"); + if (lseek(tmpfile, 0, SEEK_SET) == -1) + die("seek failed"); + } + gettimeofday(&stop_t, NULL); + close(tmpfile); + print_elapse(start_t, stop_t); +#else + printf("\t(unavailable: o_direct)\n"); #endif +#else + printf("\t(unavailable: open_sync)\n"); +#endif + +/* + * Test fdatasync + */ #ifdef HAVE_FDATASYNC printf(LABEL_FORMAT, "8k write, 8k write, fdatasync"); fflush(stdout); @@ -248,6 +401,9 @@ main(int argc, char *argv[]) printf("\t(unavailable: fdatasync)\n"); #endif +/* + * Test basic fsync + */ printf(LABEL_FORMAT, "8k write, 8k write, fsync"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR, 0)) == -1) @@ -267,12 +423,44 @@ main(int argc, char *argv[]) gettimeofday(&stop_t, NULL); close(tmpfile); print_elapse(start_t, stop_t); + +/* + * Test fsync_writethrough if available + */ +#ifdef HAVE_FSYNC_WRITETHROUGH + printf(LABEL_FORMAT, "8k write, 8k write, fsync_writethrough"); + fflush(stdout); + if ((tmpfile = open(filename, O_RDWR, 0)) == -1) + die("Cannot open output file."); + gettimeofday(&start_t, NULL); + for (i = 0; i < loops; i++) + { + if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) + die("write failed"); + if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) + die("write failed"); + if (fcntl(tmpfile, F_FULLFSYNC) != 0) + die("fsync failed"); + if (lseek(tmpfile, 0, SEEK_SET) == -1) + die("seek failed"); + } + gettimeofday(&stop_t, NULL); + close(tmpfile); + print_elapse(start_t, stop_t); +#else + printf("\t(unavailable: fsync_writethrough)\n"); +#endif /* * Compare 1 to 2 writes */ printf("\nCompare open_sync with different sizes:\n"); + printf("(This is designed to compare the cost of one large\n"); + printf("sync'ed write and two smaller sync'ed writes.)\n"); +/* + * Test open_sync with different size files + */ #ifdef OPEN_SYNC_FLAG printf(LABEL_FORMAT, "open_sync 16k write"); fflush(stdout); @@ -312,12 +500,20 @@ main(int argc, char *argv[]) #endif /* - * Fsync another file descriptor? + * Test whether fsync can sync data written on a different + * descriptor for the same file. This checks the efficiency + * of multi-process fsyncs against the same file. + * Possibly this should be done with writethrough on platforms + * which support it. */ printf("\nTest if fsync on non-write file descriptor is honored:\n"); printf("(If the times are similar, fsync() can sync data written\n"); printf("on a different descriptor.)\n"); + /* + * first write, fsync and close, which is the + * normal behavior without multiple descriptors + */ printf(LABEL_FORMAT, "8k write, fsync, close"); fflush(stdout); gettimeofday(&start_t, NULL); @@ -330,14 +526,22 @@ main(int argc, char *argv[]) if (fsync(tmpfile) != 0) die("fsync failed"); close(tmpfile); + /* + * open and close the file again to be consistent + * with the following test + */ if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("Cannot open output file."); - /* do nothing but the open/close the tests are consistent. */ close(tmpfile); } gettimeofday(&stop_t, NULL); print_elapse(start_t, stop_t); + /* + * Now open, write, close, open again and fsync + * This simulates processes fsyncing each other's + * writes. + */ printf(LABEL_FORMAT, "8k write, close, fsync"); fflush(stdout); gettimeofday(&start_t, NULL); @@ -358,18 +562,22 @@ main(int argc, char *argv[]) gettimeofday(&stop_t, NULL); print_elapse(start_t, stop_t); - /* cleanup */ + /* + * cleanup + */ free(full_buf); unlink(filename); return 0; } +/* + * print out the writes per second for tests + */ void print_elapse(struct timeval start_t, struct timeval stop_t) { double total_time = (stop_t.tv_sec - start_t.tv_sec) + - /* usec subtraction might be negative, e.g. 5.4 - 4.8 */ (stop_t.tv_usec - start_t.tv_usec) * 0.000001; double per_second = loops / total_time;