From b1892aaeaaf34d8d1637221fc1cbda82ac3fcd71 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 4 Sep 2013 23:43:41 -0700 Subject: [PATCH] Revert WAL posix_fallocate() patches. This reverts commit 269e780822abb2e44189afaccd6b0ee7aefa7ddd and commit 5b571bb8c8d2bea610e01ae1ee7bc05adcfff528. Unfortunately, the initial patch had insufficient performance testing, and resulted in a regression. Per report by Thom Brown. --- configure | 3 +- configure.in | 2 +- src/backend/access/transam/xlog.c | 85 +++++++++++++------------------ src/include/pg_config.h.in | 3 -- src/include/pg_config.h.win32 | 3 -- 5 files changed, 36 insertions(+), 60 deletions(-) diff --git a/configure b/configure index d4a544d5c8..c685ca3f91 100755 --- a/configure +++ b/configure @@ -19763,8 +19763,7 @@ LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'` - -for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l +for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l do as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` { $as_echo "$as_me:$LINENO: checking for $ac_func" >&5 diff --git a/configure.in b/configure.in index fe2541952f..82771bddb1 100644 --- a/configure.in +++ b/configure.in @@ -1230,7 +1230,7 @@ PGAC_FUNC_GETTIMEOFDAY_1ARG LIBS_including_readline="$LIBS" LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'` -AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l]) +AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l]) AC_REPLACE_FUNCS(fseeko) case $host_os in diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 386811389d..dc47c4760b 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -3353,10 +3353,11 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) { char path[MAXPGPATH]; char tmppath[MAXPGPATH]; + char *zbuffer; XLogSegNo installed_segno; int max_advance; int fd; - bool zero_fill = true; + int nbytes; XLogFilePath(path, ThisTimeLineID, logsegno); @@ -3390,6 +3391,16 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) unlink(tmppath); + /* + * Allocate a buffer full of zeros. This is done before opening the file + * so that we don't leak the file descriptor if palloc fails. + * + * Note: palloc zbuffer, instead of just using a local char array, to + * ensure it is reasonably well-aligned; this may save a few cycles + * transferring data to the kernel. + */ + zbuffer = (char *) palloc0(XLOG_BLCKSZ); + /* do not use get_sync_bit() here --- want to fsync only at end of fill */ fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, S_IRUSR | S_IWUSR); @@ -3398,66 +3409,38 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) (errcode_for_file_access(), errmsg("could not create file \"%s\": %m", tmppath))); -#ifdef HAVE_POSIX_FALLOCATE /* - * If posix_fallocate() is available and succeeds, then the file is - * properly allocated and we don't need to zero-fill it (which is less - * efficient). In case of an error, fall back to writing zeros, because on - * some platforms posix_fallocate() is available but will not always - * succeed in cases where zero-filling will. + * Zero-fill the file. We have to do this the hard way to ensure that all + * the file space has really been allocated --- on platforms that allow + * "holes" in files, just seeking to the end doesn't allocate intermediate + * space. This way, we know that we have all the space and (after the + * fsync below) that all the indirect blocks are down on disk. Therefore, + * fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the + * log file. */ - if (posix_fallocate(fd, 0, XLogSegSize) == 0) - zero_fill = false; -#endif /* HAVE_POSIX_FALLOCATE */ - - if (zero_fill) + for (nbytes = 0; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ) { - /* - * Allocate a buffer full of zeros. This is done before opening the - * file so that we don't leak the file descriptor if palloc fails. - * - * Note: palloc zbuffer, instead of just using a local char array, to - * ensure it is reasonably well-aligned; this may save a few cycles - * transferring data to the kernel. - */ - - char *zbuffer = (char *) palloc0(XLOG_BLCKSZ); - int nbytes; - - /* - * Zero-fill the file. We have to do this the hard way to ensure that - * all the file space has really been allocated --- on platforms that - * allow "holes" in files, just seeking to the end doesn't allocate - * intermediate space. This way, we know that we have all the space - * and (after the fsync below) that all the indirect blocks are down on - * disk. Therefore, fdatasync(2) or O_DSYNC will be sufficient to sync - * future writes to the log file. - */ - for (nbytes = 0; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ) + errno = 0; + if ((int) write(fd, zbuffer, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ) { - errno = 0; - if ((int) write(fd, zbuffer, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ) - { - int save_errno = errno; + int save_errno = errno; - /* - * If we fail to make the file, delete it to release disk space - */ - unlink(tmppath); + /* + * If we fail to make the file, delete it to release disk space + */ + unlink(tmppath); - close(fd); + close(fd); - /* if write didn't set errno, assume no disk space */ - errno = save_errno ? save_errno : ENOSPC; + /* if write didn't set errno, assume problem is no disk space */ + errno = save_errno ? save_errno : ENOSPC; - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not write to file \"%s\": %m", - tmppath))); - } + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to file \"%s\": %m", tmppath))); } - pfree(zbuffer); } + pfree(zbuffer); if (pg_fsync(fd) != 0) { diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 033127beff..8aabf3c87a 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -369,9 +369,6 @@ /* Define to 1 if you have the `posix_fadvise' function. */ #undef HAVE_POSIX_FADVISE -/* Define to 1 if you have the `posix_fallocate' function. */ -#undef HAVE_POSIX_FALLOCATE - /* Define to 1 if you have the POSIX signal interface. */ #undef HAVE_POSIX_SIGNALS diff --git a/src/include/pg_config.h.win32 b/src/include/pg_config.h.win32 index 931551086b..54db287aff 100644 --- a/src/include/pg_config.h.win32 +++ b/src/include/pg_config.h.win32 @@ -276,9 +276,6 @@ /* Define to 1 if you have the header file. */ /* #undef HAVE_POLL_H */ -/* Define to 1 if you have the `posix_fallocate' function. */ -/* #undef HAVE_POSIX_FALLOCATE */ - /* Define to 1 if you have the POSIX signal interface. */ /* #undef HAVE_POSIX_SIGNALS */