Provide a build-time option to store large relations as single files, rather

than dividing them into 1GB segments as has been our longtime practice.  This
requires working support for large files in the operating system; at least for
the time being, it won't be the default.

Zdenek Kotala
This commit is contained in:
Tom Lane 2008-03-10 20:06:27 +00:00
parent b6912af22b
commit f0828b2fc3
12 changed files with 595 additions and 100 deletions

446
configure vendored
View File

@ -1357,6 +1357,7 @@ Optional Features:
--enable-debug build with debugging symbols (-g)
--enable-profiling build with profiling enabled
--enable-dtrace build with DTrace support
--disable-segmented-files disable data file segmentation (requires largefile support)
--enable-depend turn on automatic dependency tracking
--enable-cassert enable assertion checks (for debugging)
--enable-thread-safety make client libraries thread-safe
@ -2541,6 +2542,36 @@ fi
#
# Data file segmentation
#
pgac_args="$pgac_args enable_segmented_files"
# Check whether --enable-segmented-files was given.
if test "${enable_segmented_files+set}" = set; then
enableval=$enable_segmented_files;
case $enableval in
yes)
:
;;
no)
:
;;
*)
{ { echo "$as_me:$LINENO: error: no argument expected for --enable-segmented-files option" >&5
echo "$as_me: error: no argument expected for --enable-segmented-files option" >&2;}
{ (exit 1); exit 1; }; }
;;
esac
else
enable_segmented_files=yes
fi
#
# C compiler
#
@ -23642,6 +23673,421 @@ fi
fi
# Check for largefile support (must be after AC_SYS_LARGEFILE)
{ echo "$as_me:$LINENO: checking for off_t" >&5
echo $ECHO_N "checking for off_t... $ECHO_C" >&6; }
if test "${ac_cv_type_off_t+set}" = set; then
echo $ECHO_N "(cached) $ECHO_C" >&6
else
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
$ac_includes_default
typedef off_t ac__type_new_;
int
main ()
{
if ((ac__type_new_ *) 0)
return 0;
if (sizeof (ac__type_new_))
return 0;
;
return 0;
}
_ACEOF
rm -f conftest.$ac_objext
if { (ac_try="$ac_compile"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
(eval "$ac_compile") 2>conftest.er1
ac_status=$?
grep -v '^ *+' conftest.er1 >conftest.err
rm -f conftest.er1
cat conftest.err >&5
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && {
test -z "$ac_c_werror_flag" ||
test ! -s conftest.err
} && test -s conftest.$ac_objext; then
ac_cv_type_off_t=yes
else
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
ac_cv_type_off_t=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
{ echo "$as_me:$LINENO: result: $ac_cv_type_off_t" >&5
echo "${ECHO_T}$ac_cv_type_off_t" >&6; }
# The cast to long int works around a bug in the HP C Compiler
# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
# This bug is HP SR number 8606223364.
{ echo "$as_me:$LINENO: checking size of off_t" >&5
echo $ECHO_N "checking size of off_t... $ECHO_C" >&6; }
if test "${ac_cv_sizeof_off_t+set}" = set; then
echo $ECHO_N "(cached) $ECHO_C" >&6
else
if test "$cross_compiling" = yes; then
# Depending upon the size, compute the lo and hi bounds.
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
$ac_includes_default
typedef off_t ac__type_sizeof_;
int
main ()
{
static int test_array [1 - 2 * !(((long int) (sizeof (ac__type_sizeof_))) >= 0)];
test_array [0] = 0
;
return 0;
}
_ACEOF
rm -f conftest.$ac_objext
if { (ac_try="$ac_compile"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
(eval "$ac_compile") 2>conftest.er1
ac_status=$?
grep -v '^ *+' conftest.er1 >conftest.err
rm -f conftest.er1
cat conftest.err >&5
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && {
test -z "$ac_c_werror_flag" ||
test ! -s conftest.err
} && test -s conftest.$ac_objext; then
ac_lo=0 ac_mid=0
while :; do
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
$ac_includes_default
typedef off_t ac__type_sizeof_;
int
main ()
{
static int test_array [1 - 2 * !(((long int) (sizeof (ac__type_sizeof_))) <= $ac_mid)];
test_array [0] = 0
;
return 0;
}
_ACEOF
rm -f conftest.$ac_objext
if { (ac_try="$ac_compile"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
(eval "$ac_compile") 2>conftest.er1
ac_status=$?
grep -v '^ *+' conftest.er1 >conftest.err
rm -f conftest.er1
cat conftest.err >&5
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && {
test -z "$ac_c_werror_flag" ||
test ! -s conftest.err
} && test -s conftest.$ac_objext; then
ac_hi=$ac_mid; break
else
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
ac_lo=`expr $ac_mid + 1`
if test $ac_lo -le $ac_mid; then
ac_lo= ac_hi=
break
fi
ac_mid=`expr 2 '*' $ac_mid + 1`
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
done
else
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
$ac_includes_default
typedef off_t ac__type_sizeof_;
int
main ()
{
static int test_array [1 - 2 * !(((long int) (sizeof (ac__type_sizeof_))) < 0)];
test_array [0] = 0
;
return 0;
}
_ACEOF
rm -f conftest.$ac_objext
if { (ac_try="$ac_compile"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
(eval "$ac_compile") 2>conftest.er1
ac_status=$?
grep -v '^ *+' conftest.er1 >conftest.err
rm -f conftest.er1
cat conftest.err >&5
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && {
test -z "$ac_c_werror_flag" ||
test ! -s conftest.err
} && test -s conftest.$ac_objext; then
ac_hi=-1 ac_mid=-1
while :; do
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
$ac_includes_default
typedef off_t ac__type_sizeof_;
int
main ()
{
static int test_array [1 - 2 * !(((long int) (sizeof (ac__type_sizeof_))) >= $ac_mid)];
test_array [0] = 0
;
return 0;
}
_ACEOF
rm -f conftest.$ac_objext
if { (ac_try="$ac_compile"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
(eval "$ac_compile") 2>conftest.er1
ac_status=$?
grep -v '^ *+' conftest.er1 >conftest.err
rm -f conftest.er1
cat conftest.err >&5
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && {
test -z "$ac_c_werror_flag" ||
test ! -s conftest.err
} && test -s conftest.$ac_objext; then
ac_lo=$ac_mid; break
else
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
ac_hi=`expr '(' $ac_mid ')' - 1`
if test $ac_mid -le $ac_hi; then
ac_lo= ac_hi=
break
fi
ac_mid=`expr 2 '*' $ac_mid`
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
done
else
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
ac_lo= ac_hi=
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
# Binary search between lo and hi bounds.
while test "x$ac_lo" != "x$ac_hi"; do
ac_mid=`expr '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo`
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
$ac_includes_default
typedef off_t ac__type_sizeof_;
int
main ()
{
static int test_array [1 - 2 * !(((long int) (sizeof (ac__type_sizeof_))) <= $ac_mid)];
test_array [0] = 0
;
return 0;
}
_ACEOF
rm -f conftest.$ac_objext
if { (ac_try="$ac_compile"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
(eval "$ac_compile") 2>conftest.er1
ac_status=$?
grep -v '^ *+' conftest.er1 >conftest.err
rm -f conftest.er1
cat conftest.err >&5
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && {
test -z "$ac_c_werror_flag" ||
test ! -s conftest.err
} && test -s conftest.$ac_objext; then
ac_hi=$ac_mid
else
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
ac_lo=`expr '(' $ac_mid ')' + 1`
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
done
case $ac_lo in
?*) ac_cv_sizeof_off_t=$ac_lo;;
'') if test "$ac_cv_type_off_t" = yes; then
{ { echo "$as_me:$LINENO: error: cannot compute sizeof (off_t)
See \`config.log' for more details." >&5
echo "$as_me: error: cannot compute sizeof (off_t)
See \`config.log' for more details." >&2;}
{ (exit 77); exit 77; }; }
else
ac_cv_sizeof_off_t=0
fi ;;
esac
else
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
$ac_includes_default
typedef off_t ac__type_sizeof_;
static long int longval () { return (long int) (sizeof (ac__type_sizeof_)); }
static unsigned long int ulongval () { return (long int) (sizeof (ac__type_sizeof_)); }
#include <stdio.h>
#include <stdlib.h>
int
main ()
{
FILE *f = fopen ("conftest.val", "w");
if (! f)
return 1;
if (((long int) (sizeof (ac__type_sizeof_))) < 0)
{
long int i = longval ();
if (i != ((long int) (sizeof (ac__type_sizeof_))))
return 1;
fprintf (f, "%ld\n", i);
}
else
{
unsigned long int i = ulongval ();
if (i != ((long int) (sizeof (ac__type_sizeof_))))
return 1;
fprintf (f, "%lu\n", i);
}
return ferror (f) || fclose (f) != 0;
;
return 0;
}
_ACEOF
rm -f conftest$ac_exeext
if { (ac_try="$ac_link"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
(eval "$ac_link") 2>&5
ac_status=$?
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && { ac_try='./conftest$ac_exeext'
{ (case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
(eval "$ac_try") 2>&5
ac_status=$?
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); }; }; then
ac_cv_sizeof_off_t=`cat conftest.val`
else
echo "$as_me: program exited with status $ac_status" >&5
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
( exit $ac_status )
if test "$ac_cv_type_off_t" = yes; then
{ { echo "$as_me:$LINENO: error: cannot compute sizeof (off_t)
See \`config.log' for more details." >&5
echo "$as_me: error: cannot compute sizeof (off_t)
See \`config.log' for more details." >&2;}
{ (exit 77); exit 77; }; }
else
ac_cv_sizeof_off_t=0
fi
fi
rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
fi
rm -f conftest.val
fi
{ echo "$as_me:$LINENO: result: $ac_cv_sizeof_off_t" >&5
echo "${ECHO_T}$ac_cv_sizeof_off_t" >&6; }
cat >>confdefs.h <<_ACEOF
#define SIZEOF_OFF_T $ac_cv_sizeof_off_t
_ACEOF
if test "$ac_cv_sizeof_off_t" -lt 8 -o "$enable_segmented_files" = "yes"; then
cat >>confdefs.h <<\_ACEOF
#define USE_SEGMENTED_FILES 1
_ACEOF
fi
# SunOS doesn't handle negative byte comparisons properly with +/- return
{ echo "$as_me:$LINENO: checking for working memcmp" >&5
echo $ECHO_N "checking for working memcmp... $ECHO_C" >&6; }

View File

@ -1,5 +1,5 @@
dnl Process this file with autoconf to produce a configure script.
dnl $PostgreSQL: pgsql/configure.in,v 1.552 2008/02/24 05:21:54 tgl Exp $
dnl $PostgreSQL: pgsql/configure.in,v 1.553 2008/03/10 20:06:27 tgl Exp $
dnl
dnl Developers, please strive to achieve this order:
dnl
@ -217,6 +217,12 @@ fi
AC_SUBST(DTRACEFLAGS)])
AC_SUBST(enable_dtrace)
#
# Data file segmentation
#
PGAC_ARG_BOOL(enable, segmented-files, yes,
[ --disable-segmented-files disable data file segmentation (requires largefile support)])
#
# C compiler
#
@ -1411,6 +1417,13 @@ if test $ac_cv_func_fseeko = yes; then
AC_SYS_LARGEFILE
fi
# Check for largefile support (must be after AC_SYS_LARGEFILE)
AC_CHECK_SIZEOF([off_t])
if test "$ac_cv_sizeof_off_t" -lt 8 -o "$enable_segmented_files" = "yes"; then
AC_DEFINE([USE_SEGMENTED_FILES], 1, [Define to split data files into 1GB segments.])
fi
# SunOS doesn't handle negative byte comparisons properly with +/- return
AC_FUNC_MEMCMP

View File

@ -1,4 +1,4 @@
<!-- $PostgreSQL: pgsql/doc/src/sgml/installation.sgml,v 1.303 2008/03/06 21:37:33 momjian Exp $ -->
<!-- $PostgreSQL: pgsql/doc/src/sgml/installation.sgml,v 1.304 2008/03/10 20:06:27 tgl Exp $ -->
<chapter id="installation">
<title><![%standalone-include[<productname>PostgreSQL</>]]>
@ -1025,6 +1025,20 @@ su - postgres
</listitem>
</varlistentry>
<varlistentry>
<term><option>--disable-segmented-files</option></term>
<listitem>
<para>
Store large tables as single operating-system files, rather than
dividing them into 1GB segments as is the default. This option
is ignored unless the operating system has <quote>largefile</>
support (which most do, nowadays). It can be helpful to reduce
the number of file descriptors consumed when working with very
large tables.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--disable-spinlocks</option></term>
<listitem>

View File

@ -1,4 +1,4 @@
<!-- $PostgreSQL: pgsql/doc/src/sgml/storage.sgml,v 1.21 2007/11/23 00:24:12 ishii Exp $ -->
<!-- $PostgreSQL: pgsql/doc/src/sgml/storage.sgml,v 1.22 2008/03/10 20:06:27 tgl Exp $ -->
<chapter id="storage">
@ -138,10 +138,14 @@ Avoid assuming that filenode and table OID are the same.
</caution>
<para>
When a table or index exceeds 1 GB, it is divided into gigabyte-sized
When a table or index exceeds 1 GB, it is normally divided into gigabyte-sized
<firstterm>segments</>. The first segment's file name is the same as the
filenode; subsequent segments are named filenode.1, filenode.2, etc.
This arrangement avoids problems on platforms that have file size limitations.
(But if the platform does not have such a limitation, and
<option>--disable-segmented-files</option> was specified when
<productname>PostgreSQL</> was built, then each table or index is stored
as a single file, without segmentation.)
The contents of tables and indexes are discussed further in
<xref linkend="storage-page-layout">.
</para>

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/file/buffile.c,v 1.29 2008/01/01 19:45:51 momjian Exp $
* $PostgreSQL: pgsql/src/backend/storage/file/buffile.c,v 1.30 2008/03/10 20:06:27 tgl Exp $
*
* NOTES:
*
@ -38,13 +38,12 @@
#include "storage/buffile.h"
/*
* The maximum safe file size is presumed to be RELSEG_SIZE * BLCKSZ.
* Note we adhere to this limit whether or not LET_OS_MANAGE_FILESIZE
* is defined, although md.c ignores it when that symbol is defined.
* The reason for doing this is that we'd like large temporary BufFiles
* to be spread across multiple tablespaces when available.
* We break BufFiles into gigabyte-sized segments, whether or not
* USE_SEGMENTED_FILES is defined. The reason is that we'd like large
* temporary BufFiles to be spread across multiple tablespaces when available.
*/
#define MAX_PHYSICAL_FILESIZE (RELSEG_SIZE * BLCKSZ)
#define MAX_PHYSICAL_FILESIZE 0x40000000
#define BUFFILE_SEG_SIZE (MAX_PHYSICAL_FILESIZE / BLCKSZ)
/*
* This data structure represents a buffered file that consists of one or
@ -56,7 +55,7 @@ struct BufFile
int numFiles; /* number of physical files in set */
/* all files except the last have length exactly MAX_PHYSICAL_FILESIZE */
File *files; /* palloc'd array with numFiles entries */
long *offsets; /* palloc'd array with numFiles entries */
off_t *offsets; /* palloc'd array with numFiles entries */
/*
* offsets[i] is the current seek position of files[i]. We use this to
@ -72,7 +71,7 @@ struct BufFile
* Position as seen by user of BufFile is (curFile, curOffset + pos).
*/
int curFile; /* file index (0..n) part of current pos */
int curOffset; /* offset part of current pos */
off_t curOffset; /* offset part of current pos */
int pos; /* next read/write position in buffer */
int nbytes; /* total # of valid bytes in buffer */
char buffer[BLCKSZ];
@ -97,7 +96,7 @@ makeBufFile(File firstfile)
file->numFiles = 1;
file->files = (File *) palloc(sizeof(File));
file->files[0] = firstfile;
file->offsets = (long *) palloc(sizeof(long));
file->offsets = (off_t *) palloc(sizeof(off_t));
file->offsets[0] = 0L;
file->isTemp = false;
file->isInterXact = false;
@ -124,8 +123,8 @@ extendBufFile(BufFile *file)
file->files = (File *) repalloc(file->files,
(file->numFiles + 1) * sizeof(File));
file->offsets = (long *) repalloc(file->offsets,
(file->numFiles + 1) * sizeof(long));
file->offsets = (off_t *) repalloc(file->offsets,
(file->numFiles + 1) * sizeof(off_t));
file->files[file->numFiles] = pfile;
file->offsets[file->numFiles] = 0L;
file->numFiles++;
@ -279,9 +278,9 @@ BufFileDumpBuffer(BufFile *file)
bytestowrite = file->nbytes - wpos;
if (file->isTemp)
{
long availbytes = MAX_PHYSICAL_FILESIZE - file->curOffset;
off_t availbytes = MAX_PHYSICAL_FILESIZE - file->curOffset;
if ((long) bytestowrite > availbytes)
if ((off_t) bytestowrite > availbytes)
bytestowrite = (int) availbytes;
}
@ -451,10 +450,10 @@ BufFileFlush(BufFile *file)
* impossible seek is attempted.
*/
int
BufFileSeek(BufFile *file, int fileno, long offset, int whence)
BufFileSeek(BufFile *file, int fileno, off_t offset, int whence)
{
int newFile;
long newOffset;
off_t newOffset;
switch (whence)
{
@ -469,7 +468,7 @@ BufFileSeek(BufFile *file, int fileno, long offset, int whence)
/*
* Relative seek considers only the signed offset, ignoring
* fileno. Note that large offsets (> 1 gig) risk overflow in this
* add...
* add, unless we have 64-bit off_t.
*/
newFile = file->curFile;
newOffset = (file->curOffset + file->pos) + offset;
@ -537,7 +536,7 @@ BufFileSeek(BufFile *file, int fileno, long offset, int whence)
}
void
BufFileTell(BufFile *file, int *fileno, long *offset)
BufFileTell(BufFile *file, int *fileno, off_t *offset)
{
*fileno = file->curFile;
*offset = file->curOffset + file->pos;
@ -558,8 +557,8 @@ int
BufFileSeekBlock(BufFile *file, long blknum)
{
return BufFileSeek(file,
(int) (blknum / RELSEG_SIZE),
(blknum % RELSEG_SIZE) * BLCKSZ,
(int) (blknum / BUFFILE_SEG_SIZE),
(off_t) (blknum % BUFFILE_SEG_SIZE) * BLCKSZ,
SEEK_SET);
}
@ -575,7 +574,7 @@ BufFileTellBlock(BufFile *file)
long blknum;
blknum = (file->curOffset + file->pos) / BLCKSZ;
blknum += file->curFile * RELSEG_SIZE;
blknum += file->curFile * BUFFILE_SEG_SIZE;
return blknum;
}

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.143 2008/01/01 19:45:51 momjian Exp $
* $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.144 2008/03/10 20:06:27 tgl Exp $
*
* NOTES:
*
@ -115,7 +115,7 @@ static int max_safe_fds = 32; /* default if not changed */
#define FileIsNotOpen(file) (VfdCache[file].fd == VFD_CLOSED)
#define FileUnknownPos (-1L)
#define FileUnknownPos ((off_t) -1)
/* these are the assigned bits in fdstate below: */
#define FD_TEMPORARY (1 << 0) /* T = delete when closed */
@ -123,13 +123,13 @@ static int max_safe_fds = 32; /* default if not changed */
typedef struct vfd
{
signed short fd; /* current FD, or VFD_CLOSED if none */
int fd; /* current FD, or VFD_CLOSED if none */
unsigned short fdstate; /* bitflags for VFD's state */
SubTransactionId create_subid; /* for TEMPORARY fds, creating subxact */
SubTransactionId create_subid; /* for TEMPORARY fds, creating subxact */
File nextFree; /* link to next free VFD, if in freelist */
File lruMoreRecently; /* doubly linked recency-of-use list */
File lruLessRecently;
long seekPos; /* current logical file position */
off_t seekPos; /* current logical file position */
char *fileName; /* name of file, or NULL for unused VFD */
/* NB: fileName is malloc'd, and must be free'd when closing the VFD */
int fileFlags; /* open(2) flags for (re)opening the file */
@ -544,8 +544,8 @@ LruDelete(File file)
Delete(file);
/* save the seek position */
vfdP->seekPos = (long) lseek(vfdP->fd, 0L, SEEK_CUR);
Assert(vfdP->seekPos != -1L);
vfdP->seekPos = lseek(vfdP->fd, (off_t) 0, SEEK_CUR);
Assert(vfdP->seekPos != (off_t) -1);
/* close the file */
if (close(vfdP->fd))
@ -616,12 +616,12 @@ LruInsert(File file)
}
/* seek to the right position */
if (vfdP->seekPos != 0L)
if (vfdP->seekPos != (off_t) 0)
{
long returnValue;
off_t returnValue;
returnValue = (long) lseek(vfdP->fd, vfdP->seekPos, SEEK_SET);
Assert(returnValue != -1L);
returnValue = lseek(vfdP->fd, vfdP->seekPos, SEEK_SET);
Assert(returnValue != (off_t) -1);
}
}
@ -1027,9 +1027,10 @@ FileRead(File file, char *buffer, int amount)
Assert(FileIsValid(file));
DO_DB(elog(LOG, "FileRead: %d (%s) %ld %d %p",
DO_DB(elog(LOG, "FileRead: %d (%s) " INT64_FORMAT " %d %p",
file, VfdCache[file].fileName,
VfdCache[file].seekPos, amount, buffer));
(int64) VfdCache[file].seekPos,
amount, buffer));
returnCode = FileAccess(file);
if (returnCode < 0)
@ -1081,9 +1082,10 @@ FileWrite(File file, char *buffer, int amount)
Assert(FileIsValid(file));
DO_DB(elog(LOG, "FileWrite: %d (%s) %ld %d %p",
DO_DB(elog(LOG, "FileWrite: %d (%s) " INT64_FORMAT " %d %p",
file, VfdCache[file].fileName,
VfdCache[file].seekPos, amount, buffer));
(int64) VfdCache[file].seekPos,
amount, buffer));
returnCode = FileAccess(file);
if (returnCode < 0)
@ -1146,16 +1148,17 @@ FileSync(File file)
return pg_fsync(VfdCache[file].fd);
}
long
FileSeek(File file, long offset, int whence)
off_t
FileSeek(File file, off_t offset, int whence)
{
int returnCode;
Assert(FileIsValid(file));
DO_DB(elog(LOG, "FileSeek: %d (%s) %ld %ld %d",
DO_DB(elog(LOG, "FileSeek: %d (%s) " INT64_FORMAT " " INT64_FORMAT " %d",
file, VfdCache[file].fileName,
VfdCache[file].seekPos, offset, whence));
(int64) VfdCache[file].seekPos,
(int64) offset, whence));
if (FileIsNotOpen(file))
{
@ -1163,7 +1166,8 @@ FileSeek(File file, long offset, int whence)
{
case SEEK_SET:
if (offset < 0)
elog(ERROR, "invalid seek offset: %ld", offset);
elog(ERROR, "invalid seek offset: " INT64_FORMAT,
(int64) offset);
VfdCache[file].seekPos = offset;
break;
case SEEK_CUR:
@ -1187,7 +1191,8 @@ FileSeek(File file, long offset, int whence)
{
case SEEK_SET:
if (offset < 0)
elog(ERROR, "invalid seek offset: %ld", offset);
elog(ERROR, "invalid seek offset: " INT64_FORMAT,
(int64) offset);
if (VfdCache[file].seekPos != offset)
VfdCache[file].seekPos = lseek(VfdCache[file].fd,
offset, whence);
@ -1213,7 +1218,7 @@ FileSeek(File file, long offset, int whence)
* XXX not actually used but here for completeness
*/
#ifdef NOT_USED
long
off_t
FileTell(File file)
{
Assert(FileIsValid(file));
@ -1224,7 +1229,7 @@ FileTell(File file)
#endif
int
FileTruncate(File file, long offset)
FileTruncate(File file, off_t offset)
{
int returnCode;
@ -1237,7 +1242,7 @@ FileTruncate(File file, long offset)
if (returnCode < 0)
return returnCode;
returnCode = ftruncate(VfdCache[file].fd, (size_t) offset);
returnCode = ftruncate(VfdCache[file].fd, offset);
return returnCode;
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.135 2008/01/01 19:45:52 momjian Exp $
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.136 2008/03/10 20:06:27 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -89,16 +89,16 @@
*
* All MdfdVec objects are palloc'd in the MdCxt memory context.
*
* Defining LET_OS_MANAGE_FILESIZE disables the segmentation logic,
* for use on machines that support large files. Beware that that
* code has not been tested in a long time and is probably bit-rotted.
* On platforms that support large files, USE_SEGMENTED_FILES can be
* #undef'd to disable the segmentation logic. In that case each
* relation is a single operating-system file.
*/
typedef struct _MdfdVec
{
File mdfd_vfd; /* fd number in fd.c's pool */
BlockNumber mdfd_segno; /* segment number, from 0 */
#ifndef LET_OS_MANAGE_FILESIZE /* for large relations */
#ifdef USE_SEGMENTED_FILES
struct _MdfdVec *mdfd_chain; /* next segment, or NULL */
#endif
} MdfdVec;
@ -162,7 +162,7 @@ static void register_dirty_segment(SMgrRelation reln, MdfdVec *seg);
static void register_unlink(RelFileNode rnode);
static MdfdVec *_fdvec_alloc(void);
#ifndef LET_OS_MANAGE_FILESIZE
#ifdef USE_SEGMENTED_FILES
static MdfdVec *_mdfd_openseg(SMgrRelation reln, BlockNumber segno,
int oflags);
#endif
@ -258,7 +258,7 @@ mdcreate(SMgrRelation reln, bool isRedo)
reln->md_fd->mdfd_vfd = fd;
reln->md_fd->mdfd_segno = 0;
#ifndef LET_OS_MANAGE_FILESIZE
#ifdef USE_SEGMENTED_FILES
reln->md_fd->mdfd_chain = NULL;
#endif
}
@ -344,7 +344,7 @@ mdunlink(RelFileNode rnode, bool isRedo)
rnode.relNode)));
}
#ifndef LET_OS_MANAGE_FILESIZE
#ifdef USE_SEGMENTED_FILES
/* Delete the additional segments, if any */
else
{
@ -395,7 +395,7 @@ mdunlink(RelFileNode rnode, bool isRedo)
void
mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
{
long seekpos;
off_t seekpos;
int nbytes;
MdfdVec *v;
@ -420,11 +420,11 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_CREATE);
#ifndef LET_OS_MANAGE_FILESIZE
seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
Assert(seekpos < BLCKSZ * RELSEG_SIZE);
#ifdef USE_SEGMENTED_FILES
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
#else
seekpos = (long) (BLCKSZ * (blocknum));
seekpos = (off_t) BLCKSZ * blocknum;
#endif
/*
@ -469,7 +469,7 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
if (!isTemp)
register_dirty_segment(reln, v);
#ifndef LET_OS_MANAGE_FILESIZE
#ifdef USE_SEGMENTED_FILES
Assert(_mdnblocks(reln, v) <= ((BlockNumber) RELSEG_SIZE));
#endif
}
@ -530,7 +530,7 @@ mdopen(SMgrRelation reln, ExtensionBehavior behavior)
mdfd->mdfd_vfd = fd;
mdfd->mdfd_segno = 0;
#ifndef LET_OS_MANAGE_FILESIZE
#ifdef USE_SEGMENTED_FILES
mdfd->mdfd_chain = NULL;
Assert(_mdnblocks(reln, mdfd) <= ((BlockNumber) RELSEG_SIZE));
#endif
@ -552,7 +552,7 @@ mdclose(SMgrRelation reln)
reln->md_fd = NULL; /* prevent dangling pointer after error */
#ifndef LET_OS_MANAGE_FILESIZE
#ifdef USE_SEGMENTED_FILES
while (v != NULL)
{
MdfdVec *ov = v;
@ -577,17 +577,17 @@ mdclose(SMgrRelation reln)
void
mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
{
long seekpos;
off_t seekpos;
int nbytes;
MdfdVec *v;
v = _mdfd_getseg(reln, blocknum, false, EXTENSION_FAIL);
#ifndef LET_OS_MANAGE_FILESIZE
seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
Assert(seekpos < BLCKSZ * RELSEG_SIZE);
#ifdef USE_SEGMENTED_FILES
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
#else
seekpos = (long) (BLCKSZ * (blocknum));
seekpos = (off_t) BLCKSZ * blocknum;
#endif
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
@ -642,7 +642,7 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
void
mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
{
long seekpos;
off_t seekpos;
int nbytes;
MdfdVec *v;
@ -653,11 +653,11 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_FAIL);
#ifndef LET_OS_MANAGE_FILESIZE
seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
Assert(seekpos < BLCKSZ * RELSEG_SIZE);
#ifdef USE_SEGMENTED_FILES
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
#else
seekpos = (long) (BLCKSZ * (blocknum));
seekpos = (off_t) BLCKSZ * blocknum;
#endif
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
@ -708,7 +708,7 @@ mdnblocks(SMgrRelation reln)
{
MdfdVec *v = mdopen(reln, EXTENSION_FAIL);
#ifndef LET_OS_MANAGE_FILESIZE
#ifdef USE_SEGMENTED_FILES
BlockNumber nblocks;
BlockNumber segno = 0;
@ -778,7 +778,7 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
MdfdVec *v;
BlockNumber curnblk;
#ifndef LET_OS_MANAGE_FILESIZE
#ifdef USE_SEGMENTED_FILES
BlockNumber priorblocks;
#endif
@ -804,7 +804,7 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
v = mdopen(reln, EXTENSION_FAIL);
#ifndef LET_OS_MANAGE_FILESIZE
#ifdef USE_SEGMENTED_FILES
priorblocks = 0;
while (v != NULL)
{
@ -843,7 +843,7 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
*/
BlockNumber lastsegblocks = nblocks - priorblocks;
if (FileTruncate(v->mdfd_vfd, lastsegblocks * BLCKSZ) < 0)
if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not truncate relation %u/%u/%u to %u blocks: %m",
@ -867,7 +867,8 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
priorblocks += RELSEG_SIZE;
}
#else
if (FileTruncate(v->mdfd_vfd, nblocks * BLCKSZ) < 0)
/* For unsegmented files, it's a lot easier */
if (FileTruncate(v->mdfd_vfd, (off_t) nblocks * BLCKSZ) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not truncate relation %u/%u/%u to %u blocks: %m",
@ -900,7 +901,7 @@ mdimmedsync(SMgrRelation reln)
v = mdopen(reln, EXTENSION_FAIL);
#ifndef LET_OS_MANAGE_FILESIZE
#ifdef USE_SEGMENTED_FILES
while (v != NULL)
{
if (FileSync(v->mdfd_vfd) < 0)
@ -917,8 +918,7 @@ mdimmedsync(SMgrRelation reln)
if (FileSync(v->mdfd_vfd) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
v->mdfd_segno,
errmsg("could not fsync relation %u/%u/%u: %m",
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode)));
@ -1453,7 +1453,7 @@ _fdvec_alloc(void)
return (MdfdVec *) MemoryContextAlloc(MdCxt, sizeof(MdfdVec));
}
#ifndef LET_OS_MANAGE_FILESIZE
#ifdef USE_SEGMENTED_FILES
/*
* Open the specified segment of the relation,
@ -1499,7 +1499,7 @@ _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags)
/* all done */
return v;
}
#endif /* LET_OS_MANAGE_FILESIZE */
#endif /* USE_SEGMENTED_FILES */
/*
* _mdfd_getseg() -- Find the segment of the relation holding the
@ -1515,7 +1515,7 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp,
{
MdfdVec *v = mdopen(reln, behavior);
#ifndef LET_OS_MANAGE_FILESIZE
#ifdef USE_SEGMENTED_FILES
BlockNumber targetseg;
BlockNumber nextsegno;
@ -1588,7 +1588,7 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp,
static BlockNumber
_mdnblocks(SMgrRelation reln, MdfdVec *seg)
{
long len;
off_t len;
len = FileSeek(seg->mdfd_vfd, 0L, SEEK_END);
if (len < 0)

View File

@ -38,7 +38,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/sort/tuplestore.c,v 1.36 2008/01/01 19:45:55 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/sort/tuplestore.c,v 1.37 2008/03/10 20:06:27 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -134,14 +134,14 @@ struct Tuplestorestate
bool eof_reached; /* read reached EOF (always valid) */
int current; /* next array index (valid if INMEM) */
int readpos_file; /* file# (valid if WRITEFILE and not eof) */
long readpos_offset; /* offset (valid if WRITEFILE and not eof) */
off_t readpos_offset; /* offset (valid if WRITEFILE and not eof) */
int writepos_file; /* file# (valid if READFILE) */
long writepos_offset; /* offset (valid if READFILE) */
off_t writepos_offset; /* offset (valid if READFILE) */
/* markpos_xxx holds marked position for mark and restore */
int markpos_current; /* saved "current" */
int markpos_file; /* saved "readpos_file" */
long markpos_offset; /* saved "readpos_offset" */
off_t markpos_offset; /* saved "readpos_offset" */
};
#define COPYTUP(state,tup) ((*(state)->copytup) (state, tup))

View File

@ -637,6 +637,9 @@
your system. */
#undef PTHREAD_CREATE_JOINABLE
/* The size of `off_t', as computed by sizeof. */
#undef SIZEOF_OFF_T
/* The size of `size_t', as computed by sizeof. */
#undef SIZEOF_SIZE_T
@ -685,6 +688,9 @@
/* Use replacement snprintf() functions. */
#undef USE_REPL_SNPRINTF
/* Define to split data files into 1GB segments. */
#undef USE_SEGMENTED_FILES
/* Define to build with (Open)SSL support. (--with-openssl) */
#undef USE_SSL

View File

@ -6,7 +6,7 @@
* for developers. If you edit any of these, be sure to do a *full*
* rebuild (and an initdb if noted).
*
* $PostgreSQL: pgsql/src/include/pg_config_manual.h,v 1.28 2008/02/29 20:58:33 alvherre Exp $
* $PostgreSQL: pgsql/src/include/pg_config_manual.h,v 1.29 2008/03/10 20:06:27 tgl Exp $
*------------------------------------------------------------------------
*/
@ -27,8 +27,9 @@
/*
* RELSEG_SIZE is the maximum number of blocks allowed in one disk
* file. Thus, the maximum size of a single file is RELSEG_SIZE *
* BLCKSZ; relations bigger than that are divided into multiple files.
* file when USE_SEGMENTED_FILES is defined. Thus, the maximum size
* of a single file is RELSEG_SIZE * BLCKSZ; relations bigger than that
* are divided into multiple files.
*
* RELSEG_SIZE * BLCKSZ must be less than your OS' limit on file size.
* This is often 2 GB or 4GB in a 32-bit operating system, unless you
@ -39,9 +40,16 @@
* in the direction of a small limit. (Besides, a power-of-2 value
* saves a few cycles in md.c.)
*
* When not using segmented files, RELSEG_SIZE is set to zero so that
* this behavior can be distinguished in pg_control.
*
* Changing RELSEG_SIZE requires an initdb.
*/
#ifdef USE_SEGMENTED_FILES
#define RELSEG_SIZE (0x40000000 / BLCKSZ)
#else
#define RELSEG_SIZE 0
#endif
/*
* Size of a WAL file block. This need have no particular relation to BLCKSZ.

View File

@ -18,7 +18,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/buffile.h,v 1.23 2008/01/01 19:45:58 momjian Exp $
* $PostgreSQL: pgsql/src/include/storage/buffile.h,v 1.24 2008/03/10 20:06:27 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -38,8 +38,8 @@ extern BufFile *BufFileCreateTemp(bool interXact);
extern void BufFileClose(BufFile *file);
extern size_t BufFileRead(BufFile *file, void *ptr, size_t size);
extern size_t BufFileWrite(BufFile *file, void *ptr, size_t size);
extern int BufFileSeek(BufFile *file, int fileno, long offset, int whence);
extern void BufFileTell(BufFile *file, int *fileno, long *offset);
extern int BufFileSeek(BufFile *file, int fileno, off_t offset, int whence);
extern void BufFileTell(BufFile *file, int *fileno, off_t *offset);
extern int BufFileSeekBlock(BufFile *file, long blknum);
#endif /* BUFFILE_H */

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.61 2008/01/01 19:45:58 momjian Exp $
* $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.62 2008/03/10 20:06:27 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -65,8 +65,8 @@ extern void FileClose(File file);
extern int FileRead(File file, char *buffer, int amount);
extern int FileWrite(File file, char *buffer, int amount);
extern int FileSync(File file);
extern long FileSeek(File file, long offset, int whence);
extern int FileTruncate(File file, long offset);
extern off_t FileSeek(File file, off_t offset, int whence);
extern int FileTruncate(File file, off_t offset);
/* Operations that allow use of regular stdio --- USE WITH CAUTION */
extern FILE *AllocateFile(const char *name, const char *mode);