diff --git a/configure b/configure index 99b0722ca1..476ce76c8a 100755 --- a/configure +++ b/configure @@ -1357,6 +1357,7 @@ Optional Features: --enable-debug build with debugging symbols (-g) --enable-profiling build with profiling enabled --enable-dtrace build with DTrace support + --disable-segmented-files disable data file segmentation (requires largefile support) --enable-depend turn on automatic dependency tracking --enable-cassert enable assertion checks (for debugging) --enable-thread-safety make client libraries thread-safe @@ -2541,6 +2542,36 @@ fi +# +# Data file segmentation +# + +pgac_args="$pgac_args enable_segmented_files" + +# Check whether --enable-segmented-files was given. +if test "${enable_segmented_files+set}" = set; then + enableval=$enable_segmented_files; + case $enableval in + yes) + : + ;; + no) + : + ;; + *) + { { echo "$as_me:$LINENO: error: no argument expected for --enable-segmented-files option" >&5 +echo "$as_me: error: no argument expected for --enable-segmented-files option" >&2;} + { (exit 1); exit 1; }; } + ;; + esac + +else + enable_segmented_files=yes + +fi + + + # # C compiler # @@ -23642,6 +23673,421 @@ fi fi +# Check for largefile support (must be after AC_SYS_LARGEFILE) +{ echo "$as_me:$LINENO: checking for off_t" >&5 +echo $ECHO_N "checking for off_t... $ECHO_C" >&6; } +if test "${ac_cv_type_off_t+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +typedef off_t ac__type_new_; +int +main () +{ +if ((ac__type_new_ *) 0) + return 0; +if (sizeof (ac__type_new_)) + return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_type_off_t=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_cv_type_off_t=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ echo "$as_me:$LINENO: result: $ac_cv_type_off_t" >&5 +echo "${ECHO_T}$ac_cv_type_off_t" >&6; } + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ echo "$as_me:$LINENO: checking size of off_t" >&5 +echo $ECHO_N "checking size of off_t... $ECHO_C" >&6; } +if test "${ac_cv_sizeof_off_t+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test "$cross_compiling" = yes; then + # Depending upon the size, compute the lo and hi bounds. +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default + typedef off_t ac__type_sizeof_; +int +main () +{ +static int test_array [1 - 2 * !(((long int) (sizeof (ac__type_sizeof_))) >= 0)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_lo=0 ac_mid=0 + while :; do + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default + typedef off_t ac__type_sizeof_; +int +main () +{ +static int test_array [1 - 2 * !(((long int) (sizeof (ac__type_sizeof_))) <= $ac_mid)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_hi=$ac_mid; break +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_lo=`expr $ac_mid + 1` + if test $ac_lo -le $ac_mid; then + ac_lo= ac_hi= + break + fi + ac_mid=`expr 2 '*' $ac_mid + 1` +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + done +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default + typedef off_t ac__type_sizeof_; +int +main () +{ +static int test_array [1 - 2 * !(((long int) (sizeof (ac__type_sizeof_))) < 0)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_hi=-1 ac_mid=-1 + while :; do + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default + typedef off_t ac__type_sizeof_; +int +main () +{ +static int test_array [1 - 2 * !(((long int) (sizeof (ac__type_sizeof_))) >= $ac_mid)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_lo=$ac_mid; break +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_hi=`expr '(' $ac_mid ')' - 1` + if test $ac_mid -le $ac_hi; then + ac_lo= ac_hi= + break + fi + ac_mid=`expr 2 '*' $ac_mid` +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + done +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_lo= ac_hi= +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +# Binary search between lo and hi bounds. +while test "x$ac_lo" != "x$ac_hi"; do + ac_mid=`expr '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo` + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default + typedef off_t ac__type_sizeof_; +int +main () +{ +static int test_array [1 - 2 * !(((long int) (sizeof (ac__type_sizeof_))) <= $ac_mid)]; +test_array [0] = 0 + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_hi=$ac_mid +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_lo=`expr '(' $ac_mid ')' + 1` +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +done +case $ac_lo in +?*) ac_cv_sizeof_off_t=$ac_lo;; +'') if test "$ac_cv_type_off_t" = yes; then + { { echo "$as_me:$LINENO: error: cannot compute sizeof (off_t) +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot compute sizeof (off_t) +See \`config.log' for more details." >&2;} + { (exit 77); exit 77; }; } + else + ac_cv_sizeof_off_t=0 + fi ;; +esac +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default + typedef off_t ac__type_sizeof_; +static long int longval () { return (long int) (sizeof (ac__type_sizeof_)); } +static unsigned long int ulongval () { return (long int) (sizeof (ac__type_sizeof_)); } +#include +#include +int +main () +{ + + FILE *f = fopen ("conftest.val", "w"); + if (! f) + return 1; + if (((long int) (sizeof (ac__type_sizeof_))) < 0) + { + long int i = longval (); + if (i != ((long int) (sizeof (ac__type_sizeof_)))) + return 1; + fprintf (f, "%ld\n", i); + } + else + { + unsigned long int i = ulongval (); + if (i != ((long int) (sizeof (ac__type_sizeof_)))) + return 1; + fprintf (f, "%lu\n", i); + } + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +rm -f conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { ac_try='./conftest$ac_exeext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_sizeof_off_t=`cat conftest.val` +else + echo "$as_me: program exited with status $ac_status" >&5 +echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +( exit $ac_status ) +if test "$ac_cv_type_off_t" = yes; then + { { echo "$as_me:$LINENO: error: cannot compute sizeof (off_t) +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot compute sizeof (off_t) +See \`config.log' for more details." >&2;} + { (exit 77); exit 77; }; } + else + ac_cv_sizeof_off_t=0 + fi +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +fi +rm -f conftest.val +fi +{ echo "$as_me:$LINENO: result: $ac_cv_sizeof_off_t" >&5 +echo "${ECHO_T}$ac_cv_sizeof_off_t" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_OFF_T $ac_cv_sizeof_off_t +_ACEOF + + + +if test "$ac_cv_sizeof_off_t" -lt 8 -o "$enable_segmented_files" = "yes"; then + +cat >>confdefs.h <<\_ACEOF +#define USE_SEGMENTED_FILES 1 +_ACEOF + +fi + # SunOS doesn't handle negative byte comparisons properly with +/- return { echo "$as_me:$LINENO: checking for working memcmp" >&5 echo $ECHO_N "checking for working memcmp... $ECHO_C" >&6; } diff --git a/configure.in b/configure.in index 2bdc371984..020009785c 100644 --- a/configure.in +++ b/configure.in @@ -1,5 +1,5 @@ dnl Process this file with autoconf to produce a configure script. -dnl $PostgreSQL: pgsql/configure.in,v 1.552 2008/02/24 05:21:54 tgl Exp $ +dnl $PostgreSQL: pgsql/configure.in,v 1.553 2008/03/10 20:06:27 tgl Exp $ dnl dnl Developers, please strive to achieve this order: dnl @@ -217,6 +217,12 @@ fi AC_SUBST(DTRACEFLAGS)]) AC_SUBST(enable_dtrace) +# +# Data file segmentation +# +PGAC_ARG_BOOL(enable, segmented-files, yes, + [ --disable-segmented-files disable data file segmentation (requires largefile support)]) + # # C compiler # @@ -1411,6 +1417,13 @@ if test $ac_cv_func_fseeko = yes; then AC_SYS_LARGEFILE fi +# Check for largefile support (must be after AC_SYS_LARGEFILE) +AC_CHECK_SIZEOF([off_t]) + +if test "$ac_cv_sizeof_off_t" -lt 8 -o "$enable_segmented_files" = "yes"; then + AC_DEFINE([USE_SEGMENTED_FILES], 1, [Define to split data files into 1GB segments.]) +fi + # SunOS doesn't handle negative byte comparisons properly with +/- return AC_FUNC_MEMCMP diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml index a999002346..95a3f10be6 100644 --- a/doc/src/sgml/installation.sgml +++ b/doc/src/sgml/installation.sgml @@ -1,4 +1,4 @@ - + <![%standalone-include[<productname>PostgreSQL</>]]> @@ -1025,6 +1025,20 @@ su - postgres </listitem> </varlistentry> + <varlistentry> + <term><option>--disable-segmented-files</option></term> + <listitem> + <para> + Store large tables as single operating-system files, rather than + dividing them into 1GB segments as is the default. This option + is ignored unless the operating system has <quote>largefile</> + support (which most do, nowadays). It can be helpful to reduce + the number of file descriptors consumed when working with very + large tables. + </para> + </listitem> + </varlistentry> + <varlistentry> <term><option>--disable-spinlocks</option></term> <listitem> diff --git a/doc/src/sgml/storage.sgml b/doc/src/sgml/storage.sgml index fe9ae611bf..7ba0c1e343 100644 --- a/doc/src/sgml/storage.sgml +++ b/doc/src/sgml/storage.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/storage.sgml,v 1.21 2007/11/23 00:24:12 ishii Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/storage.sgml,v 1.22 2008/03/10 20:06:27 tgl Exp $ --> <chapter id="storage"> @@ -138,10 +138,14 @@ Avoid assuming that filenode and table OID are the same. </caution> <para> -When a table or index exceeds 1 GB, it is divided into gigabyte-sized +When a table or index exceeds 1 GB, it is normally divided into gigabyte-sized <firstterm>segments</>. The first segment's file name is the same as the filenode; subsequent segments are named filenode.1, filenode.2, etc. This arrangement avoids problems on platforms that have file size limitations. +(But if the platform does not have such a limitation, and +<option>--disable-segmented-files</option> was specified when +<productname>PostgreSQL</> was built, then each table or index is stored +as a single file, without segmentation.) The contents of tables and indexes are discussed further in <xref linkend="storage-page-layout">. </para> diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c index 8d79e9574b..94e5c67911 100644 --- a/src/backend/storage/file/buffile.c +++ b/src/backend/storage/file/buffile.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/file/buffile.c,v 1.29 2008/01/01 19:45:51 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/file/buffile.c,v 1.30 2008/03/10 20:06:27 tgl Exp $ * * NOTES: * @@ -38,13 +38,12 @@ #include "storage/buffile.h" /* - * The maximum safe file size is presumed to be RELSEG_SIZE * BLCKSZ. - * Note we adhere to this limit whether or not LET_OS_MANAGE_FILESIZE - * is defined, although md.c ignores it when that symbol is defined. - * The reason for doing this is that we'd like large temporary BufFiles - * to be spread across multiple tablespaces when available. + * We break BufFiles into gigabyte-sized segments, whether or not + * USE_SEGMENTED_FILES is defined. The reason is that we'd like large + * temporary BufFiles to be spread across multiple tablespaces when available. */ -#define MAX_PHYSICAL_FILESIZE (RELSEG_SIZE * BLCKSZ) +#define MAX_PHYSICAL_FILESIZE 0x40000000 +#define BUFFILE_SEG_SIZE (MAX_PHYSICAL_FILESIZE / BLCKSZ) /* * This data structure represents a buffered file that consists of one or @@ -56,7 +55,7 @@ struct BufFile int numFiles; /* number of physical files in set */ /* all files except the last have length exactly MAX_PHYSICAL_FILESIZE */ File *files; /* palloc'd array with numFiles entries */ - long *offsets; /* palloc'd array with numFiles entries */ + off_t *offsets; /* palloc'd array with numFiles entries */ /* * offsets[i] is the current seek position of files[i]. We use this to @@ -72,7 +71,7 @@ struct BufFile * Position as seen by user of BufFile is (curFile, curOffset + pos). */ int curFile; /* file index (0..n) part of current pos */ - int curOffset; /* offset part of current pos */ + off_t curOffset; /* offset part of current pos */ int pos; /* next read/write position in buffer */ int nbytes; /* total # of valid bytes in buffer */ char buffer[BLCKSZ]; @@ -97,7 +96,7 @@ makeBufFile(File firstfile) file->numFiles = 1; file->files = (File *) palloc(sizeof(File)); file->files[0] = firstfile; - file->offsets = (long *) palloc(sizeof(long)); + file->offsets = (off_t *) palloc(sizeof(off_t)); file->offsets[0] = 0L; file->isTemp = false; file->isInterXact = false; @@ -124,8 +123,8 @@ extendBufFile(BufFile *file) file->files = (File *) repalloc(file->files, (file->numFiles + 1) * sizeof(File)); - file->offsets = (long *) repalloc(file->offsets, - (file->numFiles + 1) * sizeof(long)); + file->offsets = (off_t *) repalloc(file->offsets, + (file->numFiles + 1) * sizeof(off_t)); file->files[file->numFiles] = pfile; file->offsets[file->numFiles] = 0L; file->numFiles++; @@ -279,9 +278,9 @@ BufFileDumpBuffer(BufFile *file) bytestowrite = file->nbytes - wpos; if (file->isTemp) { - long availbytes = MAX_PHYSICAL_FILESIZE - file->curOffset; + off_t availbytes = MAX_PHYSICAL_FILESIZE - file->curOffset; - if ((long) bytestowrite > availbytes) + if ((off_t) bytestowrite > availbytes) bytestowrite = (int) availbytes; } @@ -451,10 +450,10 @@ BufFileFlush(BufFile *file) * impossible seek is attempted. */ int -BufFileSeek(BufFile *file, int fileno, long offset, int whence) +BufFileSeek(BufFile *file, int fileno, off_t offset, int whence) { int newFile; - long newOffset; + off_t newOffset; switch (whence) { @@ -469,7 +468,7 @@ BufFileSeek(BufFile *file, int fileno, long offset, int whence) /* * Relative seek considers only the signed offset, ignoring * fileno. Note that large offsets (> 1 gig) risk overflow in this - * add... + * add, unless we have 64-bit off_t. */ newFile = file->curFile; newOffset = (file->curOffset + file->pos) + offset; @@ -537,7 +536,7 @@ BufFileSeek(BufFile *file, int fileno, long offset, int whence) } void -BufFileTell(BufFile *file, int *fileno, long *offset) +BufFileTell(BufFile *file, int *fileno, off_t *offset) { *fileno = file->curFile; *offset = file->curOffset + file->pos; @@ -558,8 +557,8 @@ int BufFileSeekBlock(BufFile *file, long blknum) { return BufFileSeek(file, - (int) (blknum / RELSEG_SIZE), - (blknum % RELSEG_SIZE) * BLCKSZ, + (int) (blknum / BUFFILE_SEG_SIZE), + (off_t) (blknum % BUFFILE_SEG_SIZE) * BLCKSZ, SEEK_SET); } @@ -575,7 +574,7 @@ BufFileTellBlock(BufFile *file) long blknum; blknum = (file->curOffset + file->pos) / BLCKSZ; - blknum += file->curFile * RELSEG_SIZE; + blknum += file->curFile * BUFFILE_SEG_SIZE; return blknum; } diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 2a0108fcee..edce52155f 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.143 2008/01/01 19:45:51 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.144 2008/03/10 20:06:27 tgl Exp $ * * NOTES: * @@ -115,7 +115,7 @@ static int max_safe_fds = 32; /* default if not changed */ #define FileIsNotOpen(file) (VfdCache[file].fd == VFD_CLOSED) -#define FileUnknownPos (-1L) +#define FileUnknownPos ((off_t) -1) /* these are the assigned bits in fdstate below: */ #define FD_TEMPORARY (1 << 0) /* T = delete when closed */ @@ -123,13 +123,13 @@ static int max_safe_fds = 32; /* default if not changed */ typedef struct vfd { - signed short fd; /* current FD, or VFD_CLOSED if none */ + int fd; /* current FD, or VFD_CLOSED if none */ unsigned short fdstate; /* bitflags for VFD's state */ - SubTransactionId create_subid; /* for TEMPORARY fds, creating subxact */ + SubTransactionId create_subid; /* for TEMPORARY fds, creating subxact */ File nextFree; /* link to next free VFD, if in freelist */ File lruMoreRecently; /* doubly linked recency-of-use list */ File lruLessRecently; - long seekPos; /* current logical file position */ + off_t seekPos; /* current logical file position */ char *fileName; /* name of file, or NULL for unused VFD */ /* NB: fileName is malloc'd, and must be free'd when closing the VFD */ int fileFlags; /* open(2) flags for (re)opening the file */ @@ -544,8 +544,8 @@ LruDelete(File file) Delete(file); /* save the seek position */ - vfdP->seekPos = (long) lseek(vfdP->fd, 0L, SEEK_CUR); - Assert(vfdP->seekPos != -1L); + vfdP->seekPos = lseek(vfdP->fd, (off_t) 0, SEEK_CUR); + Assert(vfdP->seekPos != (off_t) -1); /* close the file */ if (close(vfdP->fd)) @@ -616,12 +616,12 @@ LruInsert(File file) } /* seek to the right position */ - if (vfdP->seekPos != 0L) + if (vfdP->seekPos != (off_t) 0) { - long returnValue; + off_t returnValue; - returnValue = (long) lseek(vfdP->fd, vfdP->seekPos, SEEK_SET); - Assert(returnValue != -1L); + returnValue = lseek(vfdP->fd, vfdP->seekPos, SEEK_SET); + Assert(returnValue != (off_t) -1); } } @@ -1027,9 +1027,10 @@ FileRead(File file, char *buffer, int amount) Assert(FileIsValid(file)); - DO_DB(elog(LOG, "FileRead: %d (%s) %ld %d %p", + DO_DB(elog(LOG, "FileRead: %d (%s) " INT64_FORMAT " %d %p", file, VfdCache[file].fileName, - VfdCache[file].seekPos, amount, buffer)); + (int64) VfdCache[file].seekPos, + amount, buffer)); returnCode = FileAccess(file); if (returnCode < 0) @@ -1081,9 +1082,10 @@ FileWrite(File file, char *buffer, int amount) Assert(FileIsValid(file)); - DO_DB(elog(LOG, "FileWrite: %d (%s) %ld %d %p", + DO_DB(elog(LOG, "FileWrite: %d (%s) " INT64_FORMAT " %d %p", file, VfdCache[file].fileName, - VfdCache[file].seekPos, amount, buffer)); + (int64) VfdCache[file].seekPos, + amount, buffer)); returnCode = FileAccess(file); if (returnCode < 0) @@ -1146,16 +1148,17 @@ FileSync(File file) return pg_fsync(VfdCache[file].fd); } -long -FileSeek(File file, long offset, int whence) +off_t +FileSeek(File file, off_t offset, int whence) { int returnCode; Assert(FileIsValid(file)); - DO_DB(elog(LOG, "FileSeek: %d (%s) %ld %ld %d", + DO_DB(elog(LOG, "FileSeek: %d (%s) " INT64_FORMAT " " INT64_FORMAT " %d", file, VfdCache[file].fileName, - VfdCache[file].seekPos, offset, whence)); + (int64) VfdCache[file].seekPos, + (int64) offset, whence)); if (FileIsNotOpen(file)) { @@ -1163,7 +1166,8 @@ FileSeek(File file, long offset, int whence) { case SEEK_SET: if (offset < 0) - elog(ERROR, "invalid seek offset: %ld", offset); + elog(ERROR, "invalid seek offset: " INT64_FORMAT, + (int64) offset); VfdCache[file].seekPos = offset; break; case SEEK_CUR: @@ -1187,7 +1191,8 @@ FileSeek(File file, long offset, int whence) { case SEEK_SET: if (offset < 0) - elog(ERROR, "invalid seek offset: %ld", offset); + elog(ERROR, "invalid seek offset: " INT64_FORMAT, + (int64) offset); if (VfdCache[file].seekPos != offset) VfdCache[file].seekPos = lseek(VfdCache[file].fd, offset, whence); @@ -1213,7 +1218,7 @@ FileSeek(File file, long offset, int whence) * XXX not actually used but here for completeness */ #ifdef NOT_USED -long +off_t FileTell(File file) { Assert(FileIsValid(file)); @@ -1224,7 +1229,7 @@ FileTell(File file) #endif int -FileTruncate(File file, long offset) +FileTruncate(File file, off_t offset) { int returnCode; @@ -1237,7 +1242,7 @@ FileTruncate(File file, long offset) if (returnCode < 0) return returnCode; - returnCode = ftruncate(VfdCache[file].fd, (size_t) offset); + returnCode = ftruncate(VfdCache[file].fd, offset); return returnCode; } diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 543574be40..6ea4a00b01 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.135 2008/01/01 19:45:52 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.136 2008/03/10 20:06:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -89,16 +89,16 @@ * * All MdfdVec objects are palloc'd in the MdCxt memory context. * - * Defining LET_OS_MANAGE_FILESIZE disables the segmentation logic, - * for use on machines that support large files. Beware that that - * code has not been tested in a long time and is probably bit-rotted. + * On platforms that support large files, USE_SEGMENTED_FILES can be + * #undef'd to disable the segmentation logic. In that case each + * relation is a single operating-system file. */ typedef struct _MdfdVec { File mdfd_vfd; /* fd number in fd.c's pool */ BlockNumber mdfd_segno; /* segment number, from 0 */ -#ifndef LET_OS_MANAGE_FILESIZE /* for large relations */ +#ifdef USE_SEGMENTED_FILES struct _MdfdVec *mdfd_chain; /* next segment, or NULL */ #endif } MdfdVec; @@ -162,7 +162,7 @@ static void register_dirty_segment(SMgrRelation reln, MdfdVec *seg); static void register_unlink(RelFileNode rnode); static MdfdVec *_fdvec_alloc(void); -#ifndef LET_OS_MANAGE_FILESIZE +#ifdef USE_SEGMENTED_FILES static MdfdVec *_mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags); #endif @@ -258,7 +258,7 @@ mdcreate(SMgrRelation reln, bool isRedo) reln->md_fd->mdfd_vfd = fd; reln->md_fd->mdfd_segno = 0; -#ifndef LET_OS_MANAGE_FILESIZE +#ifdef USE_SEGMENTED_FILES reln->md_fd->mdfd_chain = NULL; #endif } @@ -344,7 +344,7 @@ mdunlink(RelFileNode rnode, bool isRedo) rnode.relNode))); } -#ifndef LET_OS_MANAGE_FILESIZE +#ifdef USE_SEGMENTED_FILES /* Delete the additional segments, if any */ else { @@ -395,7 +395,7 @@ mdunlink(RelFileNode rnode, bool isRedo) void mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) { - long seekpos; + off_t seekpos; int nbytes; MdfdVec *v; @@ -420,11 +420,11 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_CREATE); -#ifndef LET_OS_MANAGE_FILESIZE - seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE))); - Assert(seekpos < BLCKSZ * RELSEG_SIZE); +#ifdef USE_SEGMENTED_FILES + seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); + Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); #else - seekpos = (long) (BLCKSZ * (blocknum)); + seekpos = (off_t) BLCKSZ * blocknum; #endif /* @@ -469,7 +469,7 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) if (!isTemp) register_dirty_segment(reln, v); -#ifndef LET_OS_MANAGE_FILESIZE +#ifdef USE_SEGMENTED_FILES Assert(_mdnblocks(reln, v) <= ((BlockNumber) RELSEG_SIZE)); #endif } @@ -530,7 +530,7 @@ mdopen(SMgrRelation reln, ExtensionBehavior behavior) mdfd->mdfd_vfd = fd; mdfd->mdfd_segno = 0; -#ifndef LET_OS_MANAGE_FILESIZE +#ifdef USE_SEGMENTED_FILES mdfd->mdfd_chain = NULL; Assert(_mdnblocks(reln, mdfd) <= ((BlockNumber) RELSEG_SIZE)); #endif @@ -552,7 +552,7 @@ mdclose(SMgrRelation reln) reln->md_fd = NULL; /* prevent dangling pointer after error */ -#ifndef LET_OS_MANAGE_FILESIZE +#ifdef USE_SEGMENTED_FILES while (v != NULL) { MdfdVec *ov = v; @@ -577,17 +577,17 @@ mdclose(SMgrRelation reln) void mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer) { - long seekpos; + off_t seekpos; int nbytes; MdfdVec *v; v = _mdfd_getseg(reln, blocknum, false, EXTENSION_FAIL); -#ifndef LET_OS_MANAGE_FILESIZE - seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE))); - Assert(seekpos < BLCKSZ * RELSEG_SIZE); +#ifdef USE_SEGMENTED_FILES + seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); + Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); #else - seekpos = (long) (BLCKSZ * (blocknum)); + seekpos = (off_t) BLCKSZ * blocknum; #endif if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) @@ -642,7 +642,7 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer) void mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) { - long seekpos; + off_t seekpos; int nbytes; MdfdVec *v; @@ -653,11 +653,11 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_FAIL); -#ifndef LET_OS_MANAGE_FILESIZE - seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE))); - Assert(seekpos < BLCKSZ * RELSEG_SIZE); +#ifdef USE_SEGMENTED_FILES + seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); + Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); #else - seekpos = (long) (BLCKSZ * (blocknum)); + seekpos = (off_t) BLCKSZ * blocknum; #endif if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) @@ -708,7 +708,7 @@ mdnblocks(SMgrRelation reln) { MdfdVec *v = mdopen(reln, EXTENSION_FAIL); -#ifndef LET_OS_MANAGE_FILESIZE +#ifdef USE_SEGMENTED_FILES BlockNumber nblocks; BlockNumber segno = 0; @@ -778,7 +778,7 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) MdfdVec *v; BlockNumber curnblk; -#ifndef LET_OS_MANAGE_FILESIZE +#ifdef USE_SEGMENTED_FILES BlockNumber priorblocks; #endif @@ -804,7 +804,7 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) v = mdopen(reln, EXTENSION_FAIL); -#ifndef LET_OS_MANAGE_FILESIZE +#ifdef USE_SEGMENTED_FILES priorblocks = 0; while (v != NULL) { @@ -843,7 +843,7 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) */ BlockNumber lastsegblocks = nblocks - priorblocks; - if (FileTruncate(v->mdfd_vfd, lastsegblocks * BLCKSZ) < 0) + if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not truncate relation %u/%u/%u to %u blocks: %m", @@ -867,7 +867,8 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) priorblocks += RELSEG_SIZE; } #else - if (FileTruncate(v->mdfd_vfd, nblocks * BLCKSZ) < 0) + /* For unsegmented files, it's a lot easier */ + if (FileTruncate(v->mdfd_vfd, (off_t) nblocks * BLCKSZ) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not truncate relation %u/%u/%u to %u blocks: %m", @@ -900,7 +901,7 @@ mdimmedsync(SMgrRelation reln) v = mdopen(reln, EXTENSION_FAIL); -#ifndef LET_OS_MANAGE_FILESIZE +#ifdef USE_SEGMENTED_FILES while (v != NULL) { if (FileSync(v->mdfd_vfd) < 0) @@ -917,8 +918,7 @@ mdimmedsync(SMgrRelation reln) if (FileSync(v->mdfd_vfd) < 0) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not fsync segment %u of relation %u/%u/%u: %m", - v->mdfd_segno, + errmsg("could not fsync relation %u/%u/%u: %m", reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, reln->smgr_rnode.relNode))); @@ -1453,7 +1453,7 @@ _fdvec_alloc(void) return (MdfdVec *) MemoryContextAlloc(MdCxt, sizeof(MdfdVec)); } -#ifndef LET_OS_MANAGE_FILESIZE +#ifdef USE_SEGMENTED_FILES /* * Open the specified segment of the relation, @@ -1499,7 +1499,7 @@ _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags) /* all done */ return v; } -#endif /* LET_OS_MANAGE_FILESIZE */ +#endif /* USE_SEGMENTED_FILES */ /* * _mdfd_getseg() -- Find the segment of the relation holding the @@ -1515,7 +1515,7 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp, { MdfdVec *v = mdopen(reln, behavior); -#ifndef LET_OS_MANAGE_FILESIZE +#ifdef USE_SEGMENTED_FILES BlockNumber targetseg; BlockNumber nextsegno; @@ -1588,7 +1588,7 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp, static BlockNumber _mdnblocks(SMgrRelation reln, MdfdVec *seg) { - long len; + off_t len; len = FileSeek(seg->mdfd_vfd, 0L, SEEK_END); if (len < 0) diff --git a/src/backend/utils/sort/tuplestore.c b/src/backend/utils/sort/tuplestore.c index e297579674..d6c192993e 100644 --- a/src/backend/utils/sort/tuplestore.c +++ b/src/backend/utils/sort/tuplestore.c @@ -38,7 +38,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/sort/tuplestore.c,v 1.36 2008/01/01 19:45:55 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/sort/tuplestore.c,v 1.37 2008/03/10 20:06:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -134,14 +134,14 @@ struct Tuplestorestate bool eof_reached; /* read reached EOF (always valid) */ int current; /* next array index (valid if INMEM) */ int readpos_file; /* file# (valid if WRITEFILE and not eof) */ - long readpos_offset; /* offset (valid if WRITEFILE and not eof) */ + off_t readpos_offset; /* offset (valid if WRITEFILE and not eof) */ int writepos_file; /* file# (valid if READFILE) */ - long writepos_offset; /* offset (valid if READFILE) */ + off_t writepos_offset; /* offset (valid if READFILE) */ /* markpos_xxx holds marked position for mark and restore */ int markpos_current; /* saved "current" */ int markpos_file; /* saved "readpos_file" */ - long markpos_offset; /* saved "readpos_offset" */ + off_t markpos_offset; /* saved "readpos_offset" */ }; #define COPYTUP(state,tup) ((*(state)->copytup) (state, tup)) diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index b48e261cbf..24b7c0dc86 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -637,6 +637,9 @@ your system. */ #undef PTHREAD_CREATE_JOINABLE +/* The size of `off_t', as computed by sizeof. */ +#undef SIZEOF_OFF_T + /* The size of `size_t', as computed by sizeof. */ #undef SIZEOF_SIZE_T @@ -685,6 +688,9 @@ /* Use replacement snprintf() functions. */ #undef USE_REPL_SNPRINTF +/* Define to split data files into 1GB segments. */ +#undef USE_SEGMENTED_FILES + /* Define to build with (Open)SSL support. (--with-openssl) */ #undef USE_SSL diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h index 0a1e8233d3..c0d546761a 100644 --- a/src/include/pg_config_manual.h +++ b/src/include/pg_config_manual.h @@ -6,7 +6,7 @@ * for developers. If you edit any of these, be sure to do a *full* * rebuild (and an initdb if noted). * - * $PostgreSQL: pgsql/src/include/pg_config_manual.h,v 1.28 2008/02/29 20:58:33 alvherre Exp $ + * $PostgreSQL: pgsql/src/include/pg_config_manual.h,v 1.29 2008/03/10 20:06:27 tgl Exp $ *------------------------------------------------------------------------ */ @@ -27,8 +27,9 @@ /* * RELSEG_SIZE is the maximum number of blocks allowed in one disk - * file. Thus, the maximum size of a single file is RELSEG_SIZE * - * BLCKSZ; relations bigger than that are divided into multiple files. + * file when USE_SEGMENTED_FILES is defined. Thus, the maximum size + * of a single file is RELSEG_SIZE * BLCKSZ; relations bigger than that + * are divided into multiple files. * * RELSEG_SIZE * BLCKSZ must be less than your OS' limit on file size. * This is often 2 GB or 4GB in a 32-bit operating system, unless you @@ -39,9 +40,16 @@ * in the direction of a small limit. (Besides, a power-of-2 value * saves a few cycles in md.c.) * + * When not using segmented files, RELSEG_SIZE is set to zero so that + * this behavior can be distinguished in pg_control. + * * Changing RELSEG_SIZE requires an initdb. */ +#ifdef USE_SEGMENTED_FILES #define RELSEG_SIZE (0x40000000 / BLCKSZ) +#else +#define RELSEG_SIZE 0 +#endif /* * Size of a WAL file block. This need have no particular relation to BLCKSZ. diff --git a/src/include/storage/buffile.h b/src/include/storage/buffile.h index 3313e43ea0..e50ec2f834 100644 --- a/src/include/storage/buffile.h +++ b/src/include/storage/buffile.h @@ -18,7 +18,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/buffile.h,v 1.23 2008/01/01 19:45:58 momjian Exp $ + * $PostgreSQL: pgsql/src/include/storage/buffile.h,v 1.24 2008/03/10 20:06:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -38,8 +38,8 @@ extern BufFile *BufFileCreateTemp(bool interXact); extern void BufFileClose(BufFile *file); extern size_t BufFileRead(BufFile *file, void *ptr, size_t size); extern size_t BufFileWrite(BufFile *file, void *ptr, size_t size); -extern int BufFileSeek(BufFile *file, int fileno, long offset, int whence); -extern void BufFileTell(BufFile *file, int *fileno, long *offset); +extern int BufFileSeek(BufFile *file, int fileno, off_t offset, int whence); +extern void BufFileTell(BufFile *file, int *fileno, off_t *offset); extern int BufFileSeekBlock(BufFile *file, long blknum); #endif /* BUFFILE_H */ diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index f5862bf82b..05c2a62525 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.61 2008/01/01 19:45:58 momjian Exp $ + * $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.62 2008/03/10 20:06:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -65,8 +65,8 @@ extern void FileClose(File file); extern int FileRead(File file, char *buffer, int amount); extern int FileWrite(File file, char *buffer, int amount); extern int FileSync(File file); -extern long FileSeek(File file, long offset, int whence); -extern int FileTruncate(File file, long offset); +extern off_t FileSeek(File file, off_t offset, int whence); +extern int FileTruncate(File file, off_t offset); /* Operations that allow use of regular stdio --- USE WITH CAUTION */ extern FILE *AllocateFile(const char *name, const char *mode);