Move our p{read,write}v replacements into their own files.

macOS's ranlib issued a warning about an empty pread.o file with the
previous arrangement, on systems new enough to require no replacement
functions.  Let's go back to using configure's AC_REPLACE_FUNCS system
to build and include each .o in the library only if it's needed, which
requires moving the *v() functions to their own files.

Also move the _with_retry() wrapper to a more permanent home.

Reported-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/1283127.1610554395%40sss.pgh.pa.us
This commit is contained in:
Thomas Munro 2021-01-14 11:10:24 +13:00
parent 5a6f9bce8d
commit 0d56acfbaa
10 changed files with 248 additions and 154 deletions

54
configure vendored
View File

@ -15155,7 +15155,7 @@ fi
LIBS_including_readline="$LIBS"
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
for ac_func in backtrace_symbols clock_gettime copyfile fdatasync getifaddrs getpeerucred getrlimit kqueue mbstowcs_l memset_s poll posix_fallocate ppoll pread preadv pstat pthread_is_threaded_np pwrite pwritev readlink readv setproctitle setproctitle_fast setsid shm_open strchrnul strsignal symlink sync_file_range uselocale wcstombs_l writev
for ac_func in backtrace_symbols clock_gettime copyfile fdatasync getifaddrs getpeerucred getrlimit kqueue mbstowcs_l memset_s poll posix_fallocate ppoll pstat pthread_is_threaded_np readlink readv setproctitle setproctitle_fast setsid shm_open strchrnul strsignal symlink sync_file_range uselocale wcstombs_l writev
do :
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
@ -15832,6 +15832,58 @@ esac
fi
ac_fn_c_check_func "$LINENO" "pread" "ac_cv_func_pread"
if test "x$ac_cv_func_pread" = xyes; then :
$as_echo "#define HAVE_PREAD 1" >>confdefs.h
else
case " $LIBOBJS " in
*" pread.$ac_objext "* ) ;;
*) LIBOBJS="$LIBOBJS pread.$ac_objext"
;;
esac
fi
ac_fn_c_check_func "$LINENO" "preadv" "ac_cv_func_preadv"
if test "x$ac_cv_func_preadv" = xyes; then :
$as_echo "#define HAVE_PREADV 1" >>confdefs.h
else
case " $LIBOBJS " in
*" preadv.$ac_objext "* ) ;;
*) LIBOBJS="$LIBOBJS preadv.$ac_objext"
;;
esac
fi
ac_fn_c_check_func "$LINENO" "pwrite" "ac_cv_func_pwrite"
if test "x$ac_cv_func_pwrite" = xyes; then :
$as_echo "#define HAVE_PWRITE 1" >>confdefs.h
else
case " $LIBOBJS " in
*" pwrite.$ac_objext "* ) ;;
*) LIBOBJS="$LIBOBJS pwrite.$ac_objext"
;;
esac
fi
ac_fn_c_check_func "$LINENO" "pwritev" "ac_cv_func_pwritev"
if test "x$ac_cv_func_pwritev" = xyes; then :
$as_echo "#define HAVE_PWRITEV 1" >>confdefs.h
else
case " $LIBOBJS " in
*" pwritev.$ac_objext "* ) ;;
*) LIBOBJS="$LIBOBJS pwritev.$ac_objext"
;;
esac
fi
ac_fn_c_check_func "$LINENO" "random" "ac_cv_func_random"
if test "x$ac_cv_func_random" = xyes; then :
$as_echo "#define HAVE_RANDOM 1" >>confdefs.h

View File

@ -1661,12 +1661,8 @@ AC_CHECK_FUNCS(m4_normalize([
poll
posix_fallocate
ppoll
pread
preadv
pstat
pthread_is_threaded_np
pwrite
pwritev
readlink
readv
setproctitle
@ -1740,6 +1736,10 @@ AC_REPLACE_FUNCS(m4_normalize([
inet_aton
link
mkdtemp
pread
preadv
pwrite
pwritev
random
srandom
strlcat

View File

@ -92,6 +92,7 @@
#include "common/file_utils.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "port/pg_iovec.h"
#include "portability/mem.h"
#include "storage/fd.h"
#include "storage/ipc.h"
@ -3635,3 +3636,67 @@ data_sync_elevel(int elevel)
{
return data_sync_retry ? elevel : PANIC;
}
/*
* A convenience wrapper for pg_pwritev() that retries on partial write. If an
* error is returned, it is unspecified how much has been written.
*/
ssize_t
pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset)
{
struct iovec iov_copy[PG_IOV_MAX];
ssize_t sum = 0;
ssize_t part;
/* We'd better have space to make a copy, in case we need to retry. */
if (iovcnt > PG_IOV_MAX)
{
errno = EINVAL;
return -1;
}
for (;;)
{
/* Write as much as we can. */
part = pg_pwritev(fd, iov, iovcnt, offset);
if (part < 0)
return -1;
#ifdef SIMULATE_SHORT_WRITE
part = Min(part, 4096);
#endif
/* Count our progress. */
sum += part;
offset += part;
/* Step over iovecs that are done. */
while (iovcnt > 0 && iov->iov_len <= part)
{
part -= iov->iov_len;
++iov;
--iovcnt;
}
/* Are they all done? */
if (iovcnt == 0)
{
/* We don't expect the kernel to write more than requested. */
Assert(part == 0);
break;
}
/*
* Move whatever's left to the front of our mutable copy and adjust
* the leading iovec.
*/
Assert(iovcnt > 0);
memmove(iov_copy, iov, sizeof(*iov) * iovcnt);
Assert(iov->iov_len > part);
iov_copy[0].iov_base = (char *) iov_copy[0].iov_base + part;
iov_copy[0].iov_len -= part;
iov = iov_copy;
}
return sum;
}

View File

@ -45,6 +45,7 @@
#include <dirent.h>
struct iovec; /* avoid including port/pg_iovec.h here */
typedef int File;
@ -161,6 +162,10 @@ extern int durable_unlink(const char *fname, int loglevel);
extern int durable_rename_excl(const char *oldfile, const char *newfile, int loglevel);
extern void SyncDataDirectory(void);
extern int data_sync_elevel(int elevel);
extern ssize_t pg_pwritev_with_retry(int fd,
const struct iovec *iov,
int iovcnt,
off_t offset);
/* Filename components */
#define PG_TEMP_FILES_DIR "pgsql_tmp"

View File

@ -53,8 +53,6 @@ OBJS = \
pgstrcasecmp.o \
pgstrsignal.o \
pqsignal.o \
pread.o \
pwrite.o \
qsort.o \
qsort_arg.o \
quotes.o \

View File

@ -1,7 +1,7 @@
/*-------------------------------------------------------------------------
*
* pread.c
* Implementation of pread[v](2) for platforms that lack one.
* Implementation of pread(2) for platforms that lack one.
*
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
*
@ -9,8 +9,7 @@
* src/port/pread.c
*
* Note that this implementation changes the current file position, unlike
* the POSIX function, so we use the name pg_pread(). Likewise for the
* iovec version.
* the POSIX function, so we use the name pg_pread().
*
*-------------------------------------------------------------------------
*/
@ -24,9 +23,6 @@
#include <unistd.h>
#endif
#include "port/pg_iovec.h"
#ifndef HAVE_PREAD
ssize_t
pg_pread(int fd, void *buf, size_t size, off_t offset)
{
@ -60,38 +56,3 @@ pg_pread(int fd, void *buf, size_t size, off_t offset)
return read(fd, buf, size);
#endif
}
#endif
#ifndef HAVE_PREADV
ssize_t
pg_preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset)
{
#ifdef HAVE_READV
if (iovcnt == 1)
return pg_pread(fd, iov[0].iov_base, iov[0].iov_len, offset);
if (lseek(fd, offset, SEEK_SET) < 0)
return -1;
return readv(fd, iov, iovcnt);
#else
ssize_t sum = 0;
ssize_t part;
for (int i = 0; i < iovcnt; ++i)
{
part = pg_pread(fd, iov[i].iov_base, iov[i].iov_len, offset);
if (part < 0)
{
if (i == 0)
return -1;
else
return sum;
}
sum += part;
offset += part;
if (part < iov[i].iov_len)
return sum;
}
return sum;
#endif
}
#endif

58
src/port/preadv.c Normal file
View File

@ -0,0 +1,58 @@
/*-------------------------------------------------------------------------
*
* preadv.c
* Implementation of preadv(2) for platforms that lack one.
*
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/port/preadv.c
*
* Note that this implementation changes the current file position, unlike
* the POSIX-like function, so we use the name pg_preadv().
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#ifdef WIN32
#include <windows.h>
#else
#include <unistd.h>
#endif
#include "port/pg_iovec.h"
ssize_t
pg_preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset)
{
#ifdef HAVE_READV
if (iovcnt == 1)
return pg_pread(fd, iov[0].iov_base, iov[0].iov_len, offset);
if (lseek(fd, offset, SEEK_SET) < 0)
return -1;
return readv(fd, iov, iovcnt);
#else
ssize_t sum = 0;
ssize_t part;
for (int i = 0; i < iovcnt; ++i)
{
part = pg_pread(fd, iov[i].iov_base, iov[i].iov_len, offset);
if (part < 0)
{
if (i == 0)
return -1;
else
return sum;
}
sum += part;
offset += part;
if (part < iov[i].iov_len)
return sum;
}
return sum;
#endif
}

View File

@ -1,7 +1,7 @@
/*-------------------------------------------------------------------------
*
* pwrite.c
* Implementation of pwrite[v](2) for platforms that lack one.
* Implementation of pwrite(2) for platforms that lack one.
*
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
*
@ -9,8 +9,7 @@
* src/port/pwrite.c
*
* Note that this implementation changes the current file position, unlike
* the POSIX function, so we use the name pg_pwrite(). Likewise for the
* iovec version.
* the POSIX function, so we use the name pg_pwrite().
*
*-------------------------------------------------------------------------
*/
@ -24,9 +23,6 @@
#include <unistd.h>
#endif
#include "port/pg_iovec.h"
#ifndef HAVE_PWRITE
ssize_t
pg_pwrite(int fd, const void *buf, size_t size, off_t offset)
{
@ -57,102 +53,3 @@ pg_pwrite(int fd, const void *buf, size_t size, off_t offset)
return write(fd, buf, size);
#endif
}
#endif
#ifndef HAVE_PWRITEV
ssize_t
pg_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset)
{
#ifdef HAVE_WRITEV
if (iovcnt == 1)
return pg_pwrite(fd, iov[0].iov_base, iov[0].iov_len, offset);
if (lseek(fd, offset, SEEK_SET) < 0)
return -1;
return writev(fd, iov, iovcnt);
#else
ssize_t sum = 0;
ssize_t part;
for (int i = 0; i < iovcnt; ++i)
{
part = pg_pwrite(fd, iov[i].iov_base, iov[i].iov_len, offset);
if (part < 0)
{
if (i == 0)
return -1;
else
return sum;
}
sum += part;
offset += part;
if (part < iov[i].iov_len)
return sum;
}
return sum;
#endif
}
#endif
/*
* A convenience wrapper for pg_pwritev() that retries on partial write. If an
* error is returned, it is unspecified how much has been written.
*/
ssize_t
pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset)
{
struct iovec iov_copy[PG_IOV_MAX];
ssize_t sum = 0;
ssize_t part;
/* We'd better have space to make a copy, in case we need to retry. */
if (iovcnt > PG_IOV_MAX)
{
errno = EINVAL;
return -1;
}
for (;;)
{
/* Write as much as we can. */
part = pg_pwritev(fd, iov, iovcnt, offset);
if (part < 0)
return -1;
#ifdef SIMULATE_SHORT_WRITE
part = Min(part, 4096);
#endif
/* Count our progress. */
sum += part;
offset += part;
/* Step over iovecs that are done. */
while (iovcnt > 0 && iov->iov_len <= part)
{
part -= iov->iov_len;
++iov;
--iovcnt;
}
/* Are they all done? */
if (iovcnt == 0)
{
/* We don't expect the kernel to write more than requested. */
Assert(part == 0);
break;
}
/*
* Move whatever's left to the front of our mutable copy and adjust
* the leading iovec.
*/
Assert(iovcnt > 0);
memmove(iov_copy, iov, sizeof(*iov) * iovcnt);
Assert(iov->iov_len > part);
iov_copy[0].iov_base = (char *) iov_copy[0].iov_base + part;
iov_copy[0].iov_len -= part;
iov = iov_copy;
}
return sum;
}

58
src/port/pwritev.c Normal file
View File

@ -0,0 +1,58 @@
/*-------------------------------------------------------------------------
*
* pwritev.c
* Implementation of pwritev(2) for platforms that lack one.
*
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/port/pwritev.c
*
* Note that this implementation changes the current file position, unlike
* the POSIX-like function, so we use the name pg_pwritev().
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#ifdef WIN32
#include <windows.h>
#else
#include <unistd.h>
#endif
#include "port/pg_iovec.h"
ssize_t
pg_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset)
{
#ifdef HAVE_WRITEV
if (iovcnt == 1)
return pg_pwrite(fd, iov[0].iov_base, iov[0].iov_len, offset);
if (lseek(fd, offset, SEEK_SET) < 0)
return -1;
return writev(fd, iov, iovcnt);
#else
ssize_t sum = 0;
ssize_t part;
for (int i = 0; i < iovcnt; ++i)
{
part = pg_pwrite(fd, iov[i].iov_base, iov[i].iov_len, offset);
if (part < 0)
{
if (i == 0)
return -1;
else
return sum;
}
sum += part;
offset += part;
if (part < iov[i].iov_len)
return sum;
}
return sum;
#endif
}

View File

@ -99,7 +99,7 @@ sub mkvcbuild
srandom.c getaddrinfo.c gettimeofday.c inet_net_ntop.c kill.c open.c
erand48.c snprintf.c strlcat.c strlcpy.c dirmod.c noblock.c path.c
dirent.c dlopen.c getopt.c getopt_long.c link.c
pread.c pwrite.c pg_bitutils.c
pread.c preadv.c pwrite.c pwritev.c pg_bitutils.c
pg_strong_random.c pgcheckdir.c pgmkdirp.c pgsleep.c pgstrcasecmp.c
pqsignal.c mkdtemp.c qsort.c qsort_arg.c quotes.c system.c
strerror.c tar.c thread.c