Fix tar files emitted by pg_dump and pg_basebackup to be POSIX conformant.

Both programs got the "magic" string wrong, causing standard-conforming tar
implementations to believe the output was just legacy tar format without
any POSIX extensions.  This doesn't actually matter that much, especially
since pg_dump failed to fill the POSIX fields anyway, but still there is
little point in emitting tar format if we can't be compliant with the
standard.  In addition, pg_dump failed to write the EOF marker correctly
(there should be 2 blocks of zeroes not just one), pg_basebackup put the
numeric group ID in the wrong place, and both programs had a pretty
brain-dead idea of how to compute the checksum.  Fix all that and improve
the comments a bit.

pg_restore is modified to accept either the correct POSIX-compliant "magic"
string or the previous value.  This part of the change will need to be
back-patched to avoid an unnecessary compatibility break when a previous
version tries to read tar-format output from 9.3 pg_dump.

Brian Weaver and Tom Lane
This commit is contained in:
Tom Lane 2012-09-28 15:19:15 -04:00
parent edc9109c42
commit 05b555d12b
4 changed files with 110 additions and 69 deletions

View File

@ -1759,9 +1759,11 @@ The commands accepted in walsender mode are:
After the second regular result set, one or more CopyResponse results After the second regular result set, one or more CopyResponse results
will be sent, one for PGDATA and one for each additional tablespace other will be sent, one for PGDATA and one for each additional tablespace other
than <literal>pg_default</> and <literal>pg_global</>. The data in than <literal>pg_default</> and <literal>pg_global</>. The data in
the CopyResponse results will be a tar format (using ustar00 the CopyResponse results will be a tar format (following the
extensions) dump of the tablespace contents. After the tar data is <quote>ustar interchange format</> specified in the POSIX 1003.1-2008
complete, a final ordinary result set will be sent. standard) dump of the tablespace contents, except that the two trailing
blocks of zeroes specified in the standard are omitted.
After the tar data is complete, a final ordinary result set will be sent.
</para> </para>
<para> <para>

View File

@ -568,7 +568,7 @@ sendFileWithContent(const char *filename, const char *content)
/* /*
* Include all files from the given directory in the output tar stream. If * Include all files from the given directory in the output tar stream. If
* 'sizeonly' is true, we just calculate a total length and return ig, without * 'sizeonly' is true, we just calculate a total length and return it, without
* actually sending anything. * actually sending anything.
*/ */
static int64 static int64
@ -763,11 +763,16 @@ _tarChecksum(char *header)
int i, int i,
sum; sum;
sum = 0; /*
* Per POSIX, the checksum is the simple sum of all bytes in the header,
* treating the bytes as unsigned, and treating the checksum field (at
* offset 148) as though it contained 8 spaces.
*/
sum = 8 * ' '; /* presumed value for checksum field */
for (i = 0; i < 512; i++) for (i = 0; i < 512; i++)
if (i < 148 || i >= 156) if (i < 148 || i >= 156)
sum += 0xFF & header[i]; sum += 0xFF & header[i];
return sum + 256; /* Assume 8 blanks in checksum field */ return sum;
} }
/* Given the member, write the TAR header & send the file */ /* Given the member, write the TAR header & send the file */
@ -846,9 +851,13 @@ _tarWriteHeader(const char *filename, const char *linktarget,
struct stat * statbuf) struct stat * statbuf)
{ {
char h[512]; char h[512];
int lastSum = 0;
int sum;
/*
* Note: most of the fields in a tar header are not supposed to be
* null-terminated. We use sprintf, which will write a null after the
* required bytes; that null goes into the first byte of the next field.
* This is okay as long as we fill the fields in order.
*/
memset(h, 0, sizeof(h)); memset(h, 0, sizeof(h));
/* Name 100 */ /* Name 100 */
@ -860,8 +869,11 @@ _tarWriteHeader(const char *filename, const char *linktarget,
* indicated in the tar format by adding a slash at the end of the * indicated in the tar format by adding a slash at the end of the
* name, the same as for regular directories. * name, the same as for regular directories.
*/ */
h[strlen(filename)] = '/'; int flen = strlen(filename);
h[strlen(filename) + 1] = '\0';
flen = Min(flen, 99);
h[flen] = '/';
h[flen + 1] = '\0';
} }
/* Mode 8 */ /* Mode 8 */
@ -871,9 +883,9 @@ _tarWriteHeader(const char *filename, const char *linktarget,
sprintf(&h[108], "%07o ", statbuf->st_uid); sprintf(&h[108], "%07o ", statbuf->st_uid);
/* Group 8 */ /* Group 8 */
sprintf(&h[117], "%07o ", statbuf->st_gid); sprintf(&h[116], "%07o ", statbuf->st_gid);
/* File size 12 - 11 digits, 1 space, no NUL */ /* File size 12 - 11 digits, 1 space; use print_val for 64 bit support */
if (linktarget != NULL || S_ISDIR(statbuf->st_mode)) if (linktarget != NULL || S_ISDIR(statbuf->st_mode))
/* Symbolic link or directory has size zero */ /* Symbolic link or directory has size zero */
print_val(&h[124], 0, 8, 11); print_val(&h[124], 0, 8, 11);
@ -884,13 +896,13 @@ _tarWriteHeader(const char *filename, const char *linktarget,
/* Mod Time 12 */ /* Mod Time 12 */
sprintf(&h[136], "%011o ", (int) statbuf->st_mtime); sprintf(&h[136], "%011o ", (int) statbuf->st_mtime);
/* Checksum 8 */ /* Checksum 8 cannot be calculated until we've filled all other fields */
sprintf(&h[148], "%06o ", lastSum);
if (linktarget != NULL) if (linktarget != NULL)
{ {
/* Type - Symbolic link */ /* Type - Symbolic link */
sprintf(&h[156], "2"); sprintf(&h[156], "2");
/* Link Name 100 */
sprintf(&h[157], "%.99s", linktarget); sprintf(&h[157], "%.99s", linktarget);
} }
else if (S_ISDIR(statbuf->st_mode)) else if (S_ISDIR(statbuf->st_mode))
@ -900,10 +912,11 @@ _tarWriteHeader(const char *filename, const char *linktarget,
/* Type - regular file */ /* Type - regular file */
sprintf(&h[156], "0"); sprintf(&h[156], "0");
/* Link tag 100 (NULL) */ /* Magic 6 */
sprintf(&h[257], "ustar");
/* Magic 6 + Version 2 */ /* Version 2 */
sprintf(&h[257], "ustar00"); sprintf(&h[263], "00");
/* User 32 */ /* User 32 */
/* XXX: Do we need to care about setting correct username? */ /* XXX: Do we need to care about setting correct username? */
@ -913,17 +926,21 @@ _tarWriteHeader(const char *filename, const char *linktarget,
/* XXX: Do we need to care about setting correct group name? */ /* XXX: Do we need to care about setting correct group name? */
sprintf(&h[297], "%.31s", "postgres"); sprintf(&h[297], "%.31s", "postgres");
/* Maj Dev 8 */ /* Major Dev 8 */
sprintf(&h[329], "%6o ", 0); sprintf(&h[329], "%07o ", 0);
/* Min Dev 8 */ /* Minor Dev 8 */
sprintf(&h[337], "%6o ", 0); sprintf(&h[337], "%07o ", 0);
while ((sum = _tarChecksum(h)) != lastSum) /* Prefix 155 - not used, leave as nulls */
{
sprintf(&h[148], "%06o ", sum);
lastSum = sum;
}
/*
* We mustn't overwrite the next field while inserting the checksum.
* Fortunately, the checksum can't exceed 6 octal digits, so we just write
* 6 digits, a space, and a null, which is legal per POSIX.
*/
sprintf(&h[148], "%06o ", _tarChecksum(h));
/* Now send the completed header. */
pq_putmessage('d', h, 512); pq_putmessage('d', h, 512);
} }

View File

@ -882,8 +882,10 @@ _CloseArchive(ArchiveHandle *AH)
tarClose(AH, th); tarClose(AH, th);
/* Add a block of NULLs since it's de-rigeur. */ /*
for (i = 0; i < 512; i++) * EOF marker for tar files is two blocks of NULLs.
*/
for (i = 0; i < 512 * 2; i++)
{ {
if (fputc(0, ctx->tarFH) == EOF) if (fputc(0, ctx->tarFH) == EOF)
exit_horribly(modulename, exit_horribly(modulename,
@ -1032,11 +1034,16 @@ _tarChecksum(char *header)
int i, int i,
sum; sum;
sum = 0; /*
* Per POSIX, the checksum is the simple sum of all bytes in the header,
* treating the bytes as unsigned, and treating the checksum field (at
* offset 148) as though it contained 8 spaces.
*/
sum = 8 * ' '; /* presumed value for checksum field */
for (i = 0; i < 512; i++) for (i = 0; i < 512; i++)
if (i < 148 || i >= 156) if (i < 148 || i >= 156)
sum += 0xFF & header[i]; sum += 0xFF & header[i];
return sum + 256; /* Assume 8 blanks in checksum field */ return sum;
} }
bool bool
@ -1050,11 +1057,15 @@ isValidTarHeader(char *header)
if (sum != chk) if (sum != chk)
return false; return false;
/* POSIX format */ /* POSIX tar format */
if (strncmp(&header[257], "ustar00", 7) == 0) if (memcmp(&header[257], "ustar\0", 6) == 0 &&
memcmp(&header[263], "00", 2) == 0)
return true; return true;
/* older format */ /* GNU tar format */
if (strncmp(&header[257], "ustar ", 7) == 0) if (memcmp(&header[257], "ustar \0", 8) == 0)
return true;
/* not-quite-POSIX format written by pre-9.3 pg_dump */
if (memcmp(&header[257], "ustar00\0", 8) == 0)
return true; return true;
return false; return false;
@ -1329,63 +1340,71 @@ static void
_tarWriteHeader(TAR_MEMBER *th) _tarWriteHeader(TAR_MEMBER *th)
{ {
char h[512]; char h[512];
int lastSum = 0;
int sum;
/*
* Note: most of the fields in a tar header are not supposed to be
* null-terminated. We use sprintf, which will write a null after the
* required bytes; that null goes into the first byte of the next field.
* This is okay as long as we fill the fields in order.
*/
memset(h, 0, sizeof(h)); memset(h, 0, sizeof(h));
/* Name 100 */ /* Name 100 */
sprintf(&h[0], "%.99s", th->targetFile); sprintf(&h[0], "%.99s", th->targetFile);
/* Mode 8 */ /* Mode 8 */
sprintf(&h[100], "100600 "); sprintf(&h[100], "0000600 ");
/* User ID 8 */ /* User ID 8 */
sprintf(&h[108], "004000 "); sprintf(&h[108], "0004000 ");
/* Group 8 */ /* Group 8 */
sprintf(&h[116], "002000 "); sprintf(&h[116], "0002000 ");
/* File size 12 - 11 digits, 1 space, no NUL */ /* File size 12 - 11 digits, 1 space; use print_val for 64 bit support */
print_val(&h[124], th->fileLen, 8, 11); print_val(&h[124], th->fileLen, 8, 11);
sprintf(&h[135], " "); sprintf(&h[135], " ");
/* Mod Time 12 */ /* Mod Time 12 */
sprintf(&h[136], "%011o ", (int) time(NULL)); sprintf(&h[136], "%011o ", (int) time(NULL));
/* Checksum 8 */ /* Checksum 8 cannot be calculated until we've filled all other fields */
sprintf(&h[148], "%06o ", lastSum);
/* Type - regular file */ /* Type - regular file */
sprintf(&h[156], "0"); sprintf(&h[156], "0");
/* Link tag 100 (NULL) */ /* Link Name 100 (leave as nulls) */
/* Magic 6 + Version 2 */ /* Magic 6 */
sprintf(&h[257], "ustar00"); sprintf(&h[257], "ustar");
/* Version 2 */
sprintf(&h[263], "00");
#if 0
/* User 32 */ /* User 32 */
sprintf(&h[265], "%.31s", ""); /* How do I get username reliably? Do /* XXX: Do we need to care about setting correct username? */
* I need to? */ sprintf(&h[265], "%.31s", "postgres");
/* Group 32 */ /* Group 32 */
sprintf(&h[297], "%.31s", ""); /* How do I get group reliably? Do I /* XXX: Do we need to care about setting correct group name? */
* need to? */ sprintf(&h[297], "%.31s", "postgres");
/* Maj Dev 8 */ /* Major Dev 8 */
sprintf(&h[329], "%6o ", 0); sprintf(&h[329], "%07o ", 0);
/* Min Dev 8 */ /* Minor Dev 8 */
sprintf(&h[337], "%6o ", 0); sprintf(&h[337], "%07o ", 0);
#endif
while ((sum = _tarChecksum(h)) != lastSum) /* Prefix 155 - not used, leave as nulls */
{
sprintf(&h[148], "%06o ", sum);
lastSum = sum;
}
/*
* We mustn't overwrite the next field while inserting the checksum.
* Fortunately, the checksum can't exceed 6 octal digits, so we just write
* 6 digits, a space, and a null, which is legal per POSIX.
*/
sprintf(&h[148], "%06o ", _tarChecksum(h));
/* Now write the completed header. */
if (fwrite(h, 1, 512, th->tarFH) != 512) if (fwrite(h, 1, 512, th->tarFH) != 512)
exit_horribly(modulename, "could not write to output file: %s\n", strerror(errno)); exit_horribly(modulename, "could not write to output file: %s\n", strerror(errno));
} }

View File

@ -1,28 +1,31 @@
/* /*
* src/bin/pg_dump/pg_backup_tar.h * src/bin/pg_dump/pg_backup_tar.h
* *
* TAR Header * TAR Header (see "ustar interchange format" in POSIX 1003.1)
* *
* Offset Length Contents * Offset Length Contents
* 0 100 bytes File name ('\0' terminated, 99 maximum length) * 0 100 bytes File name ('\0' terminated, 99 maximum length)
* 100 8 bytes File mode (in octal ascii) * 100 8 bytes File mode (in octal ascii)
* 108 8 bytes User ID (in octal ascii) * 108 8 bytes User ID (in octal ascii)
* 116 8 bytes Group ID (in octal ascii) * 116 8 bytes Group ID (in octal ascii)
* 124 12 bytes File size (s) (in octal ascii) * 124 12 bytes File size (in octal ascii)
* 136 12 bytes Modify time (in octal ascii) * 136 12 bytes Modify time (Unix timestamp in octal ascii)
* 148 8 bytes Header checksum (in octal ascii) * 148 8 bytes Header checksum (in octal ascii)
* 156 1 bytes Link flag * 156 1 bytes Type flag (see below)
* 157 100 bytes Linkname ('\0' terminated, 99 maximum length) * 157 100 bytes Linkname, if symlink ('\0' terminated, 99 maximum length)
* 257 8 bytes Magic ("ustar \0") * 257 6 bytes Magic ("ustar\0")
* 263 2 bytes Version ("00")
* 265 32 bytes User name ('\0' terminated, 31 maximum length) * 265 32 bytes User name ('\0' terminated, 31 maximum length)
* 297 32 bytes Group name ('\0' terminated, 31 maximum length) * 297 32 bytes Group name ('\0' terminated, 31 maximum length)
* 329 8 bytes Major device ID (in octal ascii) * 329 8 bytes Major device ID (in octal ascii)
* 337 8 bytes Minor device ID (in octal ascii) * 337 8 bytes Minor device ID (in octal ascii)
* 345 167 bytes Padding * 345 155 bytes File name prefix (not used in our implementation)
* 512 (s+p)bytes File contents (s+p) := (((s) + 511) & ~511), round up to 512 bytes * 500 12 bytes Padding
*
* 512 (s+p)bytes File contents, padded out to 512-byte boundary
*/ */
/* The linkflag defines the type of file */ /* The type flag defines the type of file */
#define LF_OLDNORMAL '\0' /* Normal disk file, Unix compatible */ #define LF_OLDNORMAL '\0' /* Normal disk file, Unix compatible */
#define LF_NORMAL '0' /* Normal disk file */ #define LF_NORMAL '0' /* Normal disk file */
#define LF_LINK '1' /* Link to previously dumped file */ #define LF_LINK '1' /* Link to previously dumped file */