diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index 9d84f8b4cc..25b3d9632d 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -272,12 +272,12 @@ PostgreSQL documentation Output a tar-format archive suitable for input - into pg_restore. The tar-format is - compatible with the directory-format; extracting a tar-format + into pg_restore. The tar format is + compatible with the directory format: extracting a tar-format archive produces a valid directory-format archive. - However, the tar-format does not support compression and has a - limit of 8 GB on the size of individual tables. Also, the relative - order of table data items cannot be changed during restore. + However, the tar format does not support compression. Also, when + using tar format the relative order of table data items cannot be + changed during restore. @@ -1140,15 +1140,6 @@ CREATE DATABASE foo WITH TEMPLATE template0; catalogs might be left in the wrong state. - - Members of tar archives are limited to a size less than 8 GB. - (This is an inherent limitation of the tar file format.) Therefore - this format cannot be used if the textual representation of any one table - exceeds that size. The total size of a tar archive and any of the - other output formats is not limited, except possibly by the - operating system. - - The dump file produced by pg_dump does not contain the statistics used by the optimizer to make diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 1af011ee6e..6120c8f6db 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -698,7 +698,7 @@ SendBackupHeader(List *tablespaces) } else { - Size len; + Size len; len = strlen(ti->oid); pq_sendint(&buf, len, 4); @@ -1131,13 +1131,6 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces, */ -/* - * Maximum file size for a tar member: The limit inherent in the - * format is 2^33-1 bytes (nearly 8 GB). But we don't want to exceed - * what we can represent in pgoff_t. - */ -#define MAX_TAR_MEMBER_FILELEN (((int64) 1 << Min(33, sizeof(pgoff_t)*8 - 1)) - 1) - /* * Given the member, write the TAR header & send the file. * @@ -1166,15 +1159,6 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf, errmsg("could not open file \"%s\": %m", readfilename))); } - /* - * Some compilers will throw a warning knowing this test can never be true - * because pgoff_t can't exceed the compared maximum on their platform. - */ - if (statbuf->st_size > MAX_TAR_MEMBER_FILELEN) - ereport(ERROR, - (errmsg("archive member \"%s\" too large for tar format", - tarfilename))); - _tarWriteHeader(tarfilename, NULL, statbuf); while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0) diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index 80de8820ff..8c4dffea93 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -781,7 +781,7 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum) bool in_tarhdr = true; bool skip_file = false; size_t tarhdrsz = 0; - size_t filesz = 0; + pgoff_t filesz = 0; #ifdef HAVE_LIBZ gzFile ztarfile = NULL; @@ -1046,7 +1046,7 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum) skip_file = (strcmp(&tarhdr[0], "recovery.conf") == 0); - sscanf(&tarhdr[124], "%11o", (unsigned int *) &filesz); + filesz = read_tar_number(&tarhdr[124], 12); padding = ((filesz + 511) & ~511) - filesz; filesz += padding; @@ -1139,7 +1139,7 @@ ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum) char current_path[MAXPGPATH]; char filename[MAXPGPATH]; const char *mapped_tblspc_path; - int current_len_left; + pgoff_t current_len_left = 0; int current_padding = 0; bool basetablespace; char *copybuf = NULL; @@ -1208,20 +1208,10 @@ ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum) } totaldone += 512; - if (sscanf(copybuf + 124, "%11o", ¤t_len_left) != 1) - { - fprintf(stderr, _("%s: could not parse file size\n"), - progname); - disconnect_and_exit(1); - } + current_len_left = read_tar_number(©buf[124], 12); /* Set permissions on the file */ - if (sscanf(©buf[100], "%07o ", &filemode) != 1) - { - fprintf(stderr, _("%s: could not parse file mode\n"), - progname); - disconnect_and_exit(1); - } + filemode = read_tar_number(©buf[100], 8); /* * All files are padded up to 512 bytes @@ -2180,7 +2170,7 @@ main(int argc, char **argv) if (replication_slot && !streamwal) { fprintf(stderr, - _("%s: replication slots can only be used with WAL streaming\n"), + _("%s: replication slots can only be used with WAL streaming\n"), progname); fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); diff --git a/src/bin/pg_dump/pg_backup_tar.c b/src/bin/pg_dump/pg_backup_tar.c index 532eacc066..c40dfe5726 100644 --- a/src/bin/pg_dump/pg_backup_tar.c +++ b/src/bin/pg_dump/pg_backup_tar.c @@ -78,13 +78,6 @@ typedef struct ArchiveHandle *AH; } TAR_MEMBER; -/* - * Maximum file size for a tar member: The limit inherent in the - * format is 2^33-1 bytes (nearly 8 GB). But we don't want to exceed - * what we can represent in pgoff_t. - */ -#define MAX_TAR_MEMBER_FILELEN (((int64) 1 << Min(33, sizeof(pgoff_t)*8 - 1)) - 1) - typedef struct { int hasSeek; @@ -1049,7 +1042,7 @@ isValidTarHeader(char *header) int sum; int chk = tarChecksum(header); - sscanf(&header[148], "%8o", &sum); + sum = read_tar_number(&header[148], 8); if (sum != chk) return false; @@ -1091,13 +1084,6 @@ _tarAddFile(ArchiveHandle *AH, TAR_MEMBER *th) strerror(errno)); fseeko(tmp, 0, SEEK_SET); - /* - * Some compilers will throw a warning knowing this test can never be true - * because pgoff_t can't exceed the compared maximum on their platform. - */ - if (th->fileLen > MAX_TAR_MEMBER_FILELEN) - exit_horribly(modulename, "archive member too large for tar format\n"); - _tarWriteHeader(th); while ((cnt = fread(buf, 1, sizeof(buf), tmp)) > 0) @@ -1222,11 +1208,10 @@ _tarGetHeader(ArchiveHandle *AH, TAR_MEMBER *th) { lclContext *ctx = (lclContext *) AH->formatData; char h[512]; - char tag[100]; + char tag[100 + 1]; int sum, chk; - size_t len; - unsigned long ullen; + pgoff_t len; pgoff_t hPos; bool gotBlock = false; @@ -1249,7 +1234,7 @@ _tarGetHeader(ArchiveHandle *AH, TAR_MEMBER *th) /* Calc checksum */ chk = tarChecksum(h); - sscanf(&h[148], "%8o", &sum); + sum = read_tar_number(&h[148], 8); /* * If the checksum failed, see if it is a null block. If so, silently @@ -1272,27 +1257,31 @@ _tarGetHeader(ArchiveHandle *AH, TAR_MEMBER *th) } } - sscanf(&h[0], "%99s", tag); - sscanf(&h[124], "%12lo", &ullen); - len = (size_t) ullen; + /* Name field is 100 bytes, might not be null-terminated */ + strlcpy(tag, &h[0], 100 + 1); + + len = read_tar_number(&h[124], 12); { - char buf[100]; + char posbuf[32]; + char lenbuf[32]; - snprintf(buf, sizeof(buf), INT64_FORMAT, (int64) hPos); - ahlog(AH, 3, "TOC Entry %s at %s (length %lu, checksum %d)\n", - tag, buf, (unsigned long) len, sum); + snprintf(posbuf, sizeof(posbuf), UINT64_FORMAT, (uint64) hPos); + snprintf(lenbuf, sizeof(lenbuf), UINT64_FORMAT, (uint64) len); + ahlog(AH, 3, "TOC Entry %s at %s (length %s, checksum %d)\n", + tag, posbuf, lenbuf, sum); } if (chk != sum) { - char buf[100]; + char posbuf[32]; - snprintf(buf, sizeof(buf), INT64_FORMAT, (int64) ftello(ctx->tarFH)); + snprintf(posbuf, sizeof(posbuf), UINT64_FORMAT, + (uint64) ftello(ctx->tarFH)); exit_horribly(modulename, "corrupt tar header found in %s " "(expected %d, computed %d) file position %s\n", - tag, sum, chk, buf); + tag, sum, chk, posbuf); } th->targetFile = pg_strdup(tag); @@ -1307,7 +1296,8 @@ _tarWriteHeader(TAR_MEMBER *th) { char h[512]; - tarCreateHeader(h, th->targetFile, NULL, th->fileLen, 0600, 04000, 02000, time(NULL)); + tarCreateHeader(h, th->targetFile, NULL, th->fileLen, + 0600, 04000, 02000, time(NULL)); /* Now write the completed header. */ if (fwrite(h, 1, 512, th->tarFH) != 512) diff --git a/src/include/pgtar.h b/src/include/pgtar.h index 906db7cebc..9c94a58b52 100644 --- a/src/include/pgtar.h +++ b/src/include/pgtar.h @@ -19,5 +19,7 @@ enum tarError TAR_SYMLINK_TOO_LONG }; -extern enum tarError tarCreateHeader(char *h, const char *filename, const char *linktarget, size_t size, mode_t mode, uid_t uid, gid_t gid, time_t mtime); +extern enum tarError tarCreateHeader(char *h, const char *filename, const char *linktarget, + pgoff_t size, mode_t mode, uid_t uid, gid_t gid, time_t mtime); +extern uint64 read_tar_number(const char *s, int len); extern int tarChecksum(char *header); diff --git a/src/port/tar.c b/src/port/tar.c index 72fd4e13ac..52a2113a47 100644 --- a/src/port/tar.c +++ b/src/port/tar.c @@ -3,21 +3,80 @@ #include /* - * Utility routine to print possibly larger than 32 bit integers in a - * portable fashion. Filled with zeros. + * Print a numeric field in a tar header. The field starts at *s and is of + * length len; val is the value to be written. + * + * Per POSIX, the way to write a number is in octal with leading zeroes and + * one trailing space (or NUL, but we use space) at the end of the specified + * field width. + * + * However, the given value may not fit in the available space in octal form. + * If that's true, we use the GNU extension of writing \200 followed by the + * number in base-256 form (ie, stored in binary MSB-first). (Note: here we + * support only non-negative numbers, so we don't worry about the GNU rules + * for handling negative numbers.) */ static void -print_val(char *s, uint64 val, unsigned int base, size_t len) +print_tar_number(char *s, int len, uint64 val) { - int i; - - for (i = len; i > 0; i--) + if (val < (((uint64) 1) << ((len - 1) * 3))) { - int digit = val % base; - - s[i - 1] = '0' + digit; - val = val / base; + /* Use octal with trailing space */ + s[--len] = ' '; + while (len) + { + s[--len] = (val & 7) + '0'; + val >>= 3; + } } + else + { + /* Use base-256 with leading \200 */ + s[0] = '\200'; + while (len > 1) + { + s[--len] = (val & 255); + val >>= 8; + } + } +} + + +/* + * Read a numeric field in a tar header. The field starts at *s and is of + * length len. + * + * The POSIX-approved format for a number is octal, ending with a space or + * NUL. However, for values that don't fit, we recognize the GNU extension + * of \200 followed by the number in base-256 form (ie, stored in binary + * MSB-first). (Note: here we support only non-negative numbers, so we don't + * worry about the GNU rules for handling negative numbers.) + */ +uint64 +read_tar_number(const char *s, int len) +{ + uint64 result = 0; + + if (*s == '\200') + { + /* base-256 */ + while (--len) + { + result <<= 8; + result |= (unsigned char) (*++s); + } + } + else + { + /* octal */ + while (len-- && *s >= '0' && *s <= '7') + { + result <<= 3; + result |= (*s - '0'); + s++; + } + } + return result; } @@ -46,12 +105,12 @@ tarChecksum(char *header) /* * Fill in the buffer pointed to by h with a tar format header. This buffer - * must always have space for 512 characters, which is a requirement by + * must always have space for 512 characters, which is a requirement of * the tar format. */ enum tarError tarCreateHeader(char *h, const char *filename, const char *linktarget, - size_t size, mode_t mode, uid_t uid, gid_t gid, time_t mtime) + pgoff_t size, mode_t mode, uid_t uid, gid_t gid, time_t mtime) { if (strlen(filename) > 99) return TAR_NAME_TOO_LONG; @@ -59,12 +118,6 @@ tarCreateHeader(char *h, const char *filename, const char *linktarget, if (linktarget && strlen(linktarget) > 99) return TAR_SYMLINK_TOO_LONG; - /* - * Note: most of the fields in a tar header are not supposed to be - * null-terminated. We use sprintf, which will write a null after the - * required bytes; that null goes into the first byte of the next field. - * This is okay as long as we fill the fields in order. - */ memset(h, 0, 512); /* assume tar header size */ /* Name 100 */ @@ -84,46 +137,49 @@ tarCreateHeader(char *h, const char *filename, const char *linktarget, } /* Mode 8 - this doesn't include the file type bits (S_IFMT) */ - sprintf(&h[100], "%07o ", (int) (mode & 07777)); + print_tar_number(&h[100], 8, (mode & 07777)); /* User ID 8 */ - sprintf(&h[108], "%07o ", (int) uid); + print_tar_number(&h[108], 8, uid); /* Group 8 */ - sprintf(&h[116], "%07o ", (int) gid); + print_tar_number(&h[116], 8, gid); - /* File size 12 - 11 digits, 1 space; use print_val for 64 bit support */ + /* File size 12 */ if (linktarget != NULL || S_ISDIR(mode)) /* Symbolic link or directory has size zero */ - print_val(&h[124], 0, 8, 11); + print_tar_number(&h[124], 12, 0); else - print_val(&h[124], size, 8, 11); - sprintf(&h[135], " "); + print_tar_number(&h[124], 12, size); /* Mod Time 12 */ - sprintf(&h[136], "%011o ", (int) mtime); + print_tar_number(&h[136], 12, mtime); /* Checksum 8 cannot be calculated until we've filled all other fields */ if (linktarget != NULL) { /* Type - Symbolic link */ - sprintf(&h[156], "2"); + h[156] = '2'; /* Link Name 100 */ strlcpy(&h[157], linktarget, 100); } else if (S_ISDIR(mode)) + { /* Type - directory */ - sprintf(&h[156], "5"); + h[156] = '5'; + } else + { /* Type - regular file */ - sprintf(&h[156], "0"); + h[156] = '0'; + } /* Magic 6 */ - sprintf(&h[257], "ustar"); + strcpy(&h[257], "ustar"); /* Version 2 */ - sprintf(&h[263], "00"); + memcpy(&h[263], "00", 2); /* User 32 */ /* XXX: Do we need to care about setting correct username? */ @@ -134,19 +190,15 @@ tarCreateHeader(char *h, const char *filename, const char *linktarget, strlcpy(&h[297], "postgres", 32); /* Major Dev 8 */ - sprintf(&h[329], "%07o ", 0); + print_tar_number(&h[329], 8, 0); /* Minor Dev 8 */ - sprintf(&h[337], "%07o ", 0); + print_tar_number(&h[337], 8, 0); /* Prefix 155 - not used, leave as nulls */ - /* - * We mustn't overwrite the next field while inserting the checksum. - * Fortunately, the checksum can't exceed 6 octal digits, so we just write - * 6 digits, a space, and a null, which is legal per POSIX. - */ - sprintf(&h[148], "%06o ", tarChecksum(h)); + /* Finally, compute and insert the checksum */ + print_tar_number(&h[148], 8, tarChecksum(h)); return TAR_OK; }