Provide pg_preadv() and pg_pwritev().

Provide synchronous vectored file I/O routines.  These map to preadv()
and pwritev(), with fallback implementations for systems that don't have
them.  Also provide a wrapper pg_pwritev_with_retry() that automatically
retries on short writes.

Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/CA%2BhUKGJA%2Bu-220VONeoREBXJ9P3S94Y7J%2BkqCnTYmahvZJwM%3Dg%40mail.gmail.com
This commit is contained in:
Thomas Munro 2021-01-11 14:37:13 +13:00
parent 01334c92fa
commit 13a021f3e8
9 changed files with 238 additions and 34 deletions

30
configure vendored
View File

@ -13061,7 +13061,7 @@ $as_echo "#define HAVE_STDBOOL_H 1" >>confdefs.h
fi
for ac_header in atomic.h copyfile.h execinfo.h getopt.h ifaddrs.h langinfo.h mbarrier.h poll.h sys/epoll.h sys/event.h sys/ipc.h sys/prctl.h sys/procctl.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/sockio.h sys/tas.h sys/un.h termios.h ucred.h wctype.h
for ac_header in atomic.h copyfile.h execinfo.h getopt.h ifaddrs.h langinfo.h mbarrier.h poll.h sys/epoll.h sys/event.h sys/ipc.h sys/prctl.h sys/procctl.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/sockio.h sys/tas.h sys/uio.h sys/un.h termios.h ucred.h wctype.h
do :
as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
@ -15155,7 +15155,7 @@ fi
LIBS_including_readline="$LIBS"
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
for ac_func in backtrace_symbols clock_gettime copyfile fdatasync getifaddrs getpeerucred getrlimit kqueue mbstowcs_l memset_s poll posix_fallocate ppoll pstat pthread_is_threaded_np readlink setproctitle setproctitle_fast setsid shm_open strchrnul strsignal symlink sync_file_range uselocale wcstombs_l
for ac_func in backtrace_symbols clock_gettime copyfile fdatasync getifaddrs getpeerucred getrlimit kqueue mbstowcs_l memset_s poll posix_fallocate ppoll pread preadv pstat pthread_is_threaded_np pwrite pwritev readlink readv setproctitle setproctitle_fast setsid shm_open strchrnul strsignal symlink sync_file_range uselocale wcstombs_l writev
do :
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
@ -15832,32 +15832,6 @@ esac
fi
ac_fn_c_check_func "$LINENO" "pread" "ac_cv_func_pread"
if test "x$ac_cv_func_pread" = xyes; then :
$as_echo "#define HAVE_PREAD 1" >>confdefs.h
else
case " $LIBOBJS " in
*" pread.$ac_objext "* ) ;;
*) LIBOBJS="$LIBOBJS pread.$ac_objext"
;;
esac
fi
ac_fn_c_check_func "$LINENO" "pwrite" "ac_cv_func_pwrite"
if test "x$ac_cv_func_pwrite" = xyes; then :
$as_echo "#define HAVE_PWRITE 1" >>confdefs.h
else
case " $LIBOBJS " in
*" pwrite.$ac_objext "* ) ;;
*) LIBOBJS="$LIBOBJS pwrite.$ac_objext"
;;
esac
fi
ac_fn_c_check_func "$LINENO" "random" "ac_cv_func_random"
if test "x$ac_cv_func_random" = xyes; then :
$as_echo "#define HAVE_RANDOM 1" >>confdefs.h

View File

@ -1331,6 +1331,7 @@ AC_CHECK_HEADERS(m4_normalize([
sys/shm.h
sys/sockio.h
sys/tas.h
sys/uio.h
sys/un.h
termios.h
ucred.h
@ -1660,9 +1661,14 @@ AC_CHECK_FUNCS(m4_normalize([
poll
posix_fallocate
ppoll
pread
preadv
pstat
pthread_is_threaded_np
pwrite
pwritev
readlink
readv
setproctitle
setproctitle_fast
setsid
@ -1673,6 +1679,7 @@ AC_CHECK_FUNCS(m4_normalize([
sync_file_range
uselocale
wcstombs_l
writev
]))
# These typically are compiler builtins, for which AC_CHECK_FUNCS fails.
@ -1733,8 +1740,6 @@ AC_REPLACE_FUNCS(m4_normalize([
inet_aton
link
mkdtemp
pread
pwrite
random
srandom
strlcat

View File

@ -412,6 +412,9 @@
/* Define to 1 if you have the `pread' function. */
#undef HAVE_PREAD
/* Define to 1 if you have the `preadv' function. */
#undef HAVE_PREADV
/* Define to 1 if you have the `pstat' function. */
#undef HAVE_PSTAT
@ -430,6 +433,9 @@
/* Define to 1 if you have the `pwrite' function. */
#undef HAVE_PWRITE
/* Define to 1 if you have the `pwritev' function. */
#undef HAVE_PWRITEV
/* Define to 1 if you have the `random' function. */
#undef HAVE_RANDOM
@ -445,6 +451,9 @@
/* Define to 1 if you have the `readlink' function. */
#undef HAVE_READLINK
/* Define to 1 if you have the `readv' function. */
#undef HAVE_READV
/* Define to 1 if you have the global variable
'rl_completion_append_character'. */
#undef HAVE_RL_COMPLETION_APPEND_CHARACTER
@ -629,6 +638,9 @@
/* Define to 1 if you have the <sys/ucred.h> header file. */
#undef HAVE_SYS_UCRED_H
/* Define to 1 if you have the <sys/uio.h> header file. */
#undef HAVE_SYS_UIO_H
/* Define to 1 if you have the <sys/un.h> header file. */
#undef HAVE_SYS_UN_H
@ -683,6 +695,9 @@
/* Define to 1 if you have the <winldap.h> header file. */
#undef HAVE_WINLDAP_H
/* Define to 1 if you have the `writev' function. */
#undef HAVE_WRITEV
/* Define to 1 if you have the `X509_get_signature_nid' function. */
#undef HAVE_X509_GET_SIGNATURE_NID

View File

@ -431,6 +431,8 @@ extern ssize_t pg_pread(int fd, void *buf, size_t nbyte, off_t offset);
extern ssize_t pg_pwrite(int fd, const void *buf, size_t nbyte, off_t offset);
#endif
/* For pg_pwritev() and pg_preadv(), see port/pg_iovec.h. */
#if !HAVE_DECL_STRLCAT
extern size_t strlcat(char *dst, const char *src, size_t siz);
#endif

View File

@ -0,0 +1,59 @@
/*-------------------------------------------------------------------------
*
* pg_iovec.h
* Header for the vectored I/O functions in src/port/p{read,write}.c.
*
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/port/pg_iovec.h
*
*-------------------------------------------------------------------------
*/
#ifndef PG_IOVEC_H
#define PG_IOVEC_H
#include <limits.h>
#ifdef HAVE_SYS_UIO_H
#include <sys/uio.h>
#endif
/* If <sys/uio.h> is missing, define our own POSIX-compatible iovec struct. */
#ifndef HAVE_SYS_UIO_H
struct iovec
{
void *iov_base;
size_t iov_len;
};
#endif
/*
* If <limits.h> didn't define IOV_MAX, define our own. POSIX requires at
* least 16.
*/
#ifndef IOV_MAX
#define IOV_MAX 16
#endif
/* Define a reasonable maximum that is safe to use on the stack. */
#define PG_IOV_MAX Min(IOV_MAX, 32)
#ifdef HAVE_PREADV
#define pg_preadv preadv
#else
extern ssize_t pg_preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset);
#endif
#ifdef HAVE_PWRITEV
#define pg_pwritev pwritev
#else
extern ssize_t pg_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset);
#endif
extern ssize_t pg_pwritev_with_retry(int fd,
const struct iovec *iov,
int iovcnt,
off_t offset);
#endif /* PG_IOVEC_H */

View File

@ -53,6 +53,8 @@ OBJS = \
pgstrcasecmp.o \
pgstrsignal.o \
pqsignal.o \
pread.o \
pwrite.o \
qsort.o \
qsort_arg.o \
quotes.o \

View File

@ -1,7 +1,7 @@
/*-------------------------------------------------------------------------
*
* pread.c
* Implementation of pread(2) for platforms that lack one.
* Implementation of pread[v](2) for platforms that lack one.
*
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
*
@ -9,7 +9,8 @@
* src/port/pread.c
*
* Note that this implementation changes the current file position, unlike
* the POSIX function, so we use the name pg_pread().
* the POSIX function, so we use the name pg_pread(). Likewise for the
* iovec version.
*
*-------------------------------------------------------------------------
*/
@ -23,6 +24,9 @@
#include <unistd.h>
#endif
#include "port/pg_iovec.h"
#ifndef HAVE_PREAD
ssize_t
pg_pread(int fd, void *buf, size_t size, off_t offset)
{
@ -56,3 +60,38 @@ pg_pread(int fd, void *buf, size_t size, off_t offset)
return read(fd, buf, size);
#endif
}
#endif
#ifndef HAVE_PREADV
ssize_t
pg_preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset)
{
#ifdef HAVE_READV
if (iovcnt == 1)
return pg_pread(fd, iov[0].iov_base, iov[0].iov_len, offset);
if (lseek(fd, offset, SEEK_SET) < 0)
return -1;
return readv(fd, iov, iovcnt);
#else
ssize_t sum = 0;
ssize_t part;
for (int i = 0; i < iovcnt; ++i)
{
part = pg_pread(fd, iov[i].iov_base, iov[i].iov_len, offset);
if (part < 0)
{
if (i == 0)
return -1;
else
return sum;
}
sum += part;
offset += part;
if (part < iov[i].iov_len)
return sum;
}
return sum;
#endif
}
#endif

View File

@ -1,7 +1,7 @@
/*-------------------------------------------------------------------------
*
* pwrite.c
* Implementation of pwrite(2) for platforms that lack one.
* Implementation of pwrite[v](2) for platforms that lack one.
*
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
*
@ -9,7 +9,8 @@
* src/port/pwrite.c
*
* Note that this implementation changes the current file position, unlike
* the POSIX function, so we use the name pg_pwrite().
* the POSIX function, so we use the name pg_pwrite(). Likewise for the
* iovec version.
*
*-------------------------------------------------------------------------
*/
@ -23,6 +24,9 @@
#include <unistd.h>
#endif
#include "port/pg_iovec.h"
#ifndef HAVE_PWRITE
ssize_t
pg_pwrite(int fd, const void *buf, size_t size, off_t offset)
{
@ -53,3 +57,102 @@ pg_pwrite(int fd, const void *buf, size_t size, off_t offset)
return write(fd, buf, size);
#endif
}
#endif
#ifndef HAVE_PWRITEV
ssize_t
pg_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset)
{
#ifdef HAVE_WRITEV
if (iovcnt == 1)
return pg_pwrite(fd, iov[0].iov_base, iov[0].iov_len, offset);
if (lseek(fd, offset, SEEK_SET) < 0)
return -1;
return writev(fd, iov, iovcnt);
#else
ssize_t sum = 0;
ssize_t part;
for (int i = 0; i < iovcnt; ++i)
{
part = pg_pwrite(fd, iov[i].iov_base, iov[i].iov_len, offset);
if (part < 0)
{
if (i == 0)
return -1;
else
return sum;
}
sum += part;
offset += part;
if (part < iov[i].iov_len)
return sum;
}
return sum;
#endif
}
#endif
/*
* A convenience wrapper for pg_pwritev() that retries on partial write. If an
* error is returned, it is unspecified how much has been written.
*/
ssize_t
pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset)
{
struct iovec iov_copy[PG_IOV_MAX];
ssize_t sum = 0;
ssize_t part;
/* We'd better have space to make a copy, in case we need to retry. */
if (iovcnt > PG_IOV_MAX)
{
errno = EINVAL;
return -1;
}
for (;;)
{
/* Write as much as we can. */
part = pg_pwritev(fd, iov, iovcnt, offset);
if (part < 0)
return -1;
#ifdef SIMULATE_SHORT_WRITE
part = Min(part, 4096);
#endif
/* Count our progress. */
sum += part;
offset += part;
/* Step over iovecs that are done. */
while (iovcnt > 0 && iov->iov_len <= part)
{
part -= iov->iov_len;
++iov;
--iovcnt;
}
/* Are they all done? */
if (iovcnt == 0)
{
if (part > 0)
elog(ERROR, "unexpectedly wrote more than requested");
break;
}
/*
* Move whatever's left to the front of our mutable copy and adjust the
* leading iovec.
*/
Assert(iovcnt > 0);
memmove(iov_copy, iov, sizeof(*iov) * iovcnt);
Assert(iov->iov_len > part);
iov_copy[0].iov_base = (char *) iov_copy[0].iov_base + part;
iov_copy[0].iov_len -= part;
iov = iov_copy;
}
return sum;
}

View File

@ -329,17 +329,20 @@ sub GenerateFiles
HAVE_PPC_LWARX_MUTEX_HINT => undef,
HAVE_PPOLL => undef,
HAVE_PREAD => undef,
HAVE_PREADV => undef,
HAVE_PSTAT => undef,
HAVE_PS_STRINGS => undef,
HAVE_PTHREAD => undef,
HAVE_PTHREAD_IS_THREADED_NP => undef,
HAVE_PTHREAD_PRIO_INHERIT => undef,
HAVE_PWRITE => undef,
HAVE_PWRITEV => undef,
HAVE_RANDOM => undef,
HAVE_READLINE_H => undef,
HAVE_READLINE_HISTORY_H => undef,
HAVE_READLINE_READLINE_H => undef,
HAVE_READLINK => undef,
HAVE_READV => undef,
HAVE_RL_COMPLETION_APPEND_CHARACTER => undef,
HAVE_RL_COMPLETION_MATCHES => undef,
HAVE_RL_COMPLETION_SUPPRESS_QUOTE => undef,
@ -400,6 +403,7 @@ sub GenerateFiles
HAVE_SYS_TAS_H => undef,
HAVE_SYS_TYPES_H => 1,
HAVE_SYS_UCRED_H => undef,
HAVE_SYS_UIO_H => undef,
HAVE_SYS_UN_H => undef,
HAVE_TERMIOS_H => undef,
HAVE_TYPEOF => undef,
@ -418,6 +422,7 @@ sub GenerateFiles
HAVE_WINLDAP_H => undef,
HAVE_WCSTOMBS_L => 1,
HAVE_WCTYPE_H => 1,
HAVE_WRITEV => undef,
HAVE_X509_GET_SIGNATURE_NID => 1,
HAVE_X86_64_POPCNTQ => undef,
HAVE__BOOL => undef,