Add 'directory' format to pg_dump. The new directory format is compatible

with the 'tar' format, in that untarring a tar format archive produces a valid directory format archive. Joachim Wieland and Heikki Linnakangas
2011-01-23 23:10:15 +02:00 · 2011-01-23 23:10:15 +02:00 · 7f508f1c6b
parent f36920796e
commit 7f508f1c6b
12 changed files with 1134 additions and 57 deletions
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@ -76,11 +76,7 @@ PostgreSQL documentation
   database are to be restored. The most flexible output file format is
   the <quote>custom</quote> format (<option>-Fc</option>). It allows
   for selection and reordering of all archived items, and is compressed
-   by default. The <application>tar</application> format
-   (<option>-Ft</option>) is not compressed and has restrictions on
-   reordering data when loading, but it is otherwise quite flexible;
-   moreover, it can be manipulated with standard Unix tools such as
-   <command>tar</command>.
+   by default.
  </para>

  <para>
@ -194,8 +190,12 @@ PostgreSQL documentation
      <term><option>--file=<replaceable class="parameter">file</replaceable></option></term>
      <listitem>
       <para>
-        Send output to the specified file.  If this is omitted, the
-        standard output is used.
+        Send output to the specified file. This parameter can be omitted for
+        file based output formats, in which case the standard output is used.
+        It must be given for the directory output format however, where it
+        specifies the target directory instead of a file. In this case the
+        directory is created by <command>pg_dump</command> and must not exist
+        before.
       </para>
      </listitem>
     </varlistentry>
@ -226,9 +226,28 @@ PostgreSQL documentation
          <para>
           Output a custom-format archive suitable for input into
           <application>pg_restore</application>.
-           This is the most flexible output format in that it allows manual
-           selection and reordering of archived items during restore.
-           This format is also compressed by default.
+           Together with the directory output format, this is the most flexible
+           output format in that it allows manual selection and reordering of
+           archived items during restore. This format is also compressed by
+           default.
+          </para>
+         </listitem>
+        </varlistentry>
+
+        <varlistentry>
+         <term><literal>d</></term>
+         <term><literal>directory</></term>
+         <listitem>
+          <para>
+           Output a directory-format archive suitable for input into
+           <application>pg_restore</application>. This will create a directory
+           with one file for each table and blob being dumped, plus a
+           so-called Table of Contents file describing the dumped objects in a
+           machine-readable format that <application>pg_restore</application>
+           can read. A directory format archive can be manipulated with
+           standard Unix tools; for example, files in an uncompressed archive
+           can be compressed with the <application>gzip</application> tool.
+           This format is compressed by default.
          </para>
         </listitem>
        </varlistentry>
@ -239,13 +258,12 @@ PostgreSQL documentation
         <listitem>
          <para>
           Output a <command>tar</command>-format archive suitable for input
-           into <application>pg_restore</application>.
-           This output format allows manual selection and reordering of
-           archived items during restore, but there is a restriction: the
-           relative order of table data items cannot be changed during
-           restore.  Also, <command>tar</command> format does not support
-           compression and has a limit of 8 GB on the size of individual
-           tables.
+           into <application>pg_restore</application>. The tar-format is
+           compatible with the directory-format; extracting a tar-format
+           archive produces a valid directory-format archive.
+           However, the tar-format does not support compression and has a
+           limit of 8 GB on the size of individual tables. Also, the relative
+           order of table data items cannot be changed during restore.
          </para>
         </listitem>
        </varlistentry>
@ -946,6 +964,14 @@ CREATE DATABASE foo WITH TEMPLATE template0;
 </screen>
  </para>

+  <para>
+   To dump a database into a directory-format archive:
+
+<screen>
+<prompt>$</prompt> <userinput>pg_dump -Fd mydb -f dumpdir</userinput>
+</screen>
+  </para>
+
  <para>
   To reload an archive file into a (freshly created) database named
   <literal>newdb</>:
--- a/doc/src/sgml/ref/pg_restore.sgml
+++ b/doc/src/sgml/ref/pg_restore.sgml
@ -79,7 +79,8 @@
      <term><replaceable class="parameter">filename</replaceable></term>
      <listitem>
       <para>
-       Specifies the location of the archive file to be restored.
+       Specifies the location of the archive file (or directory, for a
+       directory-format archive) to be restored.
       If not specified, the standard input is used.
       </para>
      </listitem>
@ -166,6 +167,16 @@
        one of the following:

       <variablelist>
+        <varlistentry>
+         <term><literal>d</></term>
+         <term><literal>directory</></term>
+         <listitem>
+          <para>
+           The archive is a <command>directory</command> archive.
+          </para>
+         </listitem>
+        </varlistentry>
+
        <varlistentry>
         <term><literal>t</></term>
         <term><literal>tar</></term>
--- a/src/bin/pg_dump/Makefile
+++ b/src/bin/pg_dump/Makefile
@ -20,7 +20,7 @@ override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS)

 OBJS=	pg_backup_archiver.o pg_backup_db.o pg_backup_custom.o \
 	pg_backup_files.o pg_backup_null.o pg_backup_tar.o \
-	dumputils.o compress_io.o $(WIN32RES)
+	pg_backup_directory.o dumputils.o compress_io.o $(WIN32RES)

 KEYWRDOBJS = keywords.o kwlookup.o

--- a/src/bin/pg_dump/compress_io.c
+++ b/src/bin/pg_dump/compress_io.c
@ -7,6 +7,17 @@
 * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
+ * This file includes two APIs for dealing with compressed data. The first
+ * provides more flexibility, using callbacks to read/write data from the
+ * underlying stream. The second API is a wrapper around fopen/gzopen and
+ * friends, providing an interface similar to those, but abstracts away
+ * the possible compression. Both APIs use libz for the compression, but
+ * the second API uses gzip headers, so the resulting files can be easily
+ * manipulated with the gzip utility.
+ *
+ * Compressor API
+ * --------------
+ *
 *  The interface for writing to an archive consists of three functions:
 *  AllocateCompressor, WriteDataToArchive and EndCompressor. First you call
 *  AllocateCompressor, then write all the data by calling WriteDataToArchive
@ -23,6 +34,17 @@
 *
 *  The interface is the same for compressed and uncompressed streams.
 *
+ * Compressed stream API
+ * ----------------------
+ *
+ *  The compressed stream API is a wrapper around the C standard fopen() and
+ *  libz's gzopen() APIs. It allows you to use the same functions for
+ *  compressed and uncompressed streams. cfopen_read() first tries to open
+ *  the file with given name, and if it fails, it tries to open the same
+ *  file with the .gz suffix. cfopen_write() opens a file for writing, an
+ *  extra argument specifies if the file should be compressed, and adds the
+ *  .gz suffix to the filename if so. This allows you to easily handle both
+ *  compressed and uncompressed files.
 *
 * IDENTIFICATION
 *     src/bin/pg_dump/compress_io.c
@ -32,6 +54,10 @@

 #include "compress_io.h"

+/*----------------------
+ * Compressor API
+ *----------------------
+ */

 /* typedef appears in compress_io.h */
 struct CompressorState
@ -418,3 +444,234 @@ WriteDataToArchiveNone(ArchiveHandle *AH, CompressorState *cs,
 }


+/*----------------------
+ * Compressed stream API
+ *----------------------
+ */
+
+/*
+ * cfp represents an open stream, wrapping the underlying FILE or gzFile
+ * pointer. This is opaque to the callers.
+ */
+struct cfp
+{
+	FILE *uncompressedfp;
+#ifdef HAVE_LIBZ
+	gzFile compressedfp;
+#endif
+};
+
+#ifdef HAVE_LIBZ
+static int	hasSuffix(const char *filename, const char *suffix);
+#endif
+
+/*
+ * Open a file for reading. 'path' is the file to open, and 'mode' should
+ * be either "r" or "rb".
+ *
+ * If the file at 'path' does not exist, we append the ".gz" suffix (if 'path'
+ * doesn't already have it) and try again. So if you pass "foo" as 'path',
+ * this will open either "foo" or "foo.gz".
+ */
+cfp *
+cfopen_read(const char *path, const char *mode)
+{
+	cfp *fp;
+
+#ifdef HAVE_LIBZ
+	if (hasSuffix(path, ".gz"))
+		fp = cfopen(path, mode, 1);
+	else
+#endif
+	{
+		fp = cfopen(path, mode, 0);
+#ifdef HAVE_LIBZ
+		if (fp == NULL)
+		{
+			int fnamelen = strlen(path) + 4;
+			char *fname = malloc(fnamelen);
+			if (fname == NULL)
+				die_horribly(NULL, modulename, "Out of memory\n");
+
+			snprintf(fname, fnamelen, "%s%s", path, ".gz");
+			fp = cfopen(fname, mode, 1);
+			free(fname);
+		}
+#endif
+	}
+	return fp;
+}
+
+/*
+ * Open a file for writing. 'path' indicates the path name, and 'mode' must
+ * be a filemode as accepted by fopen() and gzopen() that indicates writing
+ * ("w", "wb", "a", or "ab").
+ *
+ * If 'compression' is non-zero, a gzip compressed stream is opened, and
+ * and 'compression' indicates the compression level used. The ".gz" suffix
+ * is automatically added to 'path' in that case.
+ */
+cfp *
+cfopen_write(const char *path, const char *mode, int compression)
+{
+	cfp *fp;
+
+	if (compression == 0)
+		fp = cfopen(path, mode, 0);
+	else
+	{
+#ifdef HAVE_LIBZ
+		int fnamelen = strlen(path) + 4;
+		char *fname = malloc(fnamelen);
+		if (fname == NULL)
+			die_horribly(NULL, modulename, "Out of memory\n");
+
+		snprintf(fname, fnamelen, "%s%s", path, ".gz");
+		fp = cfopen(fname, mode, 1);
+		free(fname);
+#else
+		die_horribly(NULL, modulename, "not built with zlib support\n");
+#endif
+	}
+	return fp;
+}
+
+/*
+ * Opens file 'path' in 'mode'. If 'compression' is non-zero, the file
+ * is opened with libz gzopen(), otherwise with plain fopen()
+ */
+cfp *
+cfopen(const char *path, const char *mode, int compression)
+{
+	cfp *fp = malloc(sizeof(cfp));
+	if (fp == NULL)
+		die_horribly(NULL, modulename, "Out of memory\n");
+
+	if (compression != 0)
+	{
+#ifdef HAVE_LIBZ
+		fp->compressedfp = gzopen(path, mode);
+		fp->uncompressedfp = NULL;
+		if (fp->compressedfp == NULL)
+		{
+			free(fp);
+			fp = NULL;
+		}
+#else
+		die_horribly(NULL, modulename, "not built with zlib support\n");
+#endif
+	}
+	else
+	{
+#ifdef HAVE_LIBZ
+		fp->compressedfp = NULL;
+#endif
+		fp->uncompressedfp = fopen(path, mode);
+		if (fp->uncompressedfp == NULL)
+		{
+			free(fp);
+			fp = NULL;
+		}
+	}
+
+	return fp;
+}
+
+
+int
+cfread(void *ptr, int size, cfp *fp)
+{
+#ifdef HAVE_LIBZ
+	if (fp->compressedfp)
+		return gzread(fp->compressedfp, ptr, size);
+	else
+#endif
+		return fread(ptr, 1, size, fp->uncompressedfp);
+}
+
+int
+cfwrite(const void *ptr, int size, cfp *fp)
+{
+#ifdef HAVE_LIBZ
+	if (fp->compressedfp)
+		return gzwrite(fp->compressedfp, ptr, size);
+	else
+#endif
+		return fwrite(ptr, 1, size, fp->uncompressedfp);
+}
+
+int
+cfgetc(cfp *fp)
+{
+#ifdef HAVE_LIBZ
+	if (fp->compressedfp)
+		return gzgetc(fp->compressedfp);
+	else
+#endif
+		return fgetc(fp->uncompressedfp);
+}
+
+char *
+cfgets(cfp *fp, char *buf, int len)
+{
+#ifdef HAVE_LIBZ
+	if (fp->compressedfp)
+		return gzgets(fp->compressedfp, buf, len);
+	else
+#endif
+		return fgets(buf, len, fp->uncompressedfp);
+}
+
+int
+cfclose(cfp *fp)
+{
+	int result;
+
+	if (fp == NULL)
+	{
+		errno = EBADF;
+		return EOF;
+	}
+#ifdef HAVE_LIBZ
+	if (fp->compressedfp)
+	{
+		result = gzclose(fp->compressedfp);
+		fp->compressedfp = NULL;
+	}
+	else
+#endif
+	{
+		result = fclose(fp->uncompressedfp);
+		fp->uncompressedfp = NULL;
+	}
+	free(fp);
+
+	return result;
+}
+
+int
+cfeof(cfp *fp)
+{
+#ifdef HAVE_LIBZ
+	if (fp->compressedfp)
+		return gzeof(fp->compressedfp);
+	else
+#endif
+		return feof(fp->uncompressedfp);
+}
+
+#ifdef HAVE_LIBZ
+static int
+hasSuffix(const char *filename, const char *suffix)
+{
+	int filenamelen = strlen(filename);
+	int suffixlen = strlen(suffix);
+
+	if (filenamelen < suffixlen)
+		return 0;
+
+	return memcmp(&filename[filenamelen - suffixlen],
+					suffix,
+					suffixlen) == 0;
+}
+#endif
--- a/src/bin/pg_dump/compress_io.h
+++ b/src/bin/pg_dump/compress_io.h
@ -54,4 +54,17 @@ extern size_t WriteDataToArchive(ArchiveHandle *AH, CompressorState *cs,
 								 const void *data, size_t dLen);
 extern void EndCompressor(ArchiveHandle *AH, CompressorState *cs);

+
+typedef struct cfp cfp;
+
+extern cfp *cfopen(const char *path, const char *mode, int compression);
+extern cfp *cfopen_read(const char *path, const char *mode);
+extern cfp *cfopen_write(const char *path, const char *mode, int compression);
+extern int cfread(void *ptr, int size, cfp *fp);
+extern int cfwrite(const void *ptr, int size, cfp *fp);
+extern int cfgetc(cfp *fp);
+extern char *cfgets(cfp *fp, char *buf, int len);
+extern int cfclose(cfp *fp);
+extern int cfeof(cfp *fp);
+
 #endif
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@ -50,7 +50,8 @@ typedef enum _archiveFormat
 	archCustom = 1,
 	archFiles = 2,
 	archTar = 3,
-	archNull = 4
+	archNull = 4,
+	archDirectory = 5
 } ArchiveFormat;

 typedef enum _archiveMode
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@ -25,6 +25,7 @@

 #include <ctype.h>
 #include <unistd.h>
+#include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/wait.h>

@ -1751,11 +1752,46 @@ _discoverArchiveFormat(ArchiveHandle *AH)

 	if (AH->fSpec)
 	{
+		struct stat	st;
+
 		wantClose = 1;
-		fh = fopen(AH->fSpec, PG_BINARY_R);
-		if (!fh)
-			die_horribly(AH, modulename, "could not open input file \"%s\": %s\n",
-						 AH->fSpec, strerror(errno));
+
+		/*
+		 * Check if the specified archive is a directory. If so, check if
+		 * there's a "toc.dat" (or "toc.dat.gz") file in it.
+		 */
+		if (stat(AH->fSpec, &st) == 0 && S_ISDIR(st.st_mode))
+		{
+			char		buf[MAXPGPATH];
+			if (snprintf(buf, MAXPGPATH, "%s/toc.dat", AH->fSpec) >= MAXPGPATH)
+				die_horribly(AH, modulename, "directory name too long: \"%s\"\n",
+							 AH->fSpec);
+			if (stat(buf, &st) == 0 && S_ISREG(st.st_mode))
+			{
+				AH->format = archDirectory;
+				return AH->format;
+			}
+
+#ifdef HAVE_LIBZ
+			if (snprintf(buf, MAXPGPATH, "%s/toc.dat.gz", AH->fSpec) >= MAXPGPATH)
+				die_horribly(AH, modulename, "directory name too long: \"%s\"\n",
+							 AH->fSpec);
+			if (stat(buf, &st) == 0 && S_ISREG(st.st_mode))
+			{
+				AH->format = archDirectory;
+				return AH->format;
+			}
+#endif
+			die_horribly(AH, modulename, "directory \"%s\" does not appear to be a valid archive (\"toc.dat\" does not exist)\n",
+						 AH->fSpec);
+		}
+		else
+		{
+			fh = fopen(AH->fSpec, PG_BINARY_R);
+			if (!fh)
+				die_horribly(AH, modulename, "could not open input file \"%s\": %s\n",
+							 AH->fSpec, strerror(errno));
+		}
 	}
 	else
 	{
@ -1973,6 +2009,10 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt,
 			InitArchiveFmt_Null(AH);
 			break;

+		case archDirectory:
+			InitArchiveFmt_Directory(AH);
+			break;
+
 		case archTar:
 			InitArchiveFmt_Tar(AH);
 			break;
--- a/src/bin/pg_dump/pg_backup_archiver.h
+++ b/src/bin/pg_dump/pg_backup_archiver.h
@ -370,6 +370,7 @@ extern void EndRestoreBlobs(ArchiveHandle *AH);
 extern void InitArchiveFmt_Custom(ArchiveHandle *AH);
 extern void InitArchiveFmt_Files(ArchiveHandle *AH);
 extern void InitArchiveFmt_Null(ArchiveHandle *AH);
+extern void InitArchiveFmt_Directory(ArchiveHandle *AH);
 extern void InitArchiveFmt_Tar(ArchiveHandle *AH);

 extern bool isValidTarHeader(char *header);
--- a/src/bin/pg_dump/pg_backup_directory.c
+++ b/src/bin/pg_dump/pg_backup_directory.c
@ -0,0 +1,678 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_backup_directory.c
+ *
+ *	A directory format dump is a directory, which contains a "toc.dat" file
+ *	for the TOC, and a separate file for each data entry, named "<oid>.dat".
+ *	Large objects (BLOBs) are stored in separate files named "blob_<uid>.dat",
+ *	and there's a plain-text TOC file for them called "blobs.toc". If
+ *	compression is used, each data file is individually compressed and the
+ *	".gz" suffix is added to the filenames. The TOC files are never
+ *	compressed by pg_dump, however they are accepted with the .gz suffix too,
+ *	in case the user has manually compressed them with 'gzip'.
+ *
+ *	NOTE: This format is identical to the files written in the tar file in
+ *	the 'tar' format, except that we don't write the restore.sql file (TODO),
+ *	and the tar format doesn't support compression. Please keep the formats in
+ *	sync.
+ *
+ *
+ *	Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
+ *	Portions Copyright (c) 1994, Regents of the University of California
+ *	Portions Copyright (c) 2000, Philip Warner
+ *
+ *	Rights are granted to use this software in any way so long
+ *	as this notice is not removed.
+ *
+ *	The author is not responsible for loss or damages that may
+ *	result from it's use.
+ *
+ * IDENTIFICATION
+ *		src/bin/pg_dump/pg_backup_directory.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include <dirent.h>
+#include <sys/stat.h>
+
+#include "pg_backup_archiver.h"
+#include "compress_io.h"
+
+typedef struct
+{
+	/*
+	 * Our archive location. This is basically what the user specified as his
+	 * backup file but of course here it is a directory.
+	 */
+	char			   *directory;
+
+	cfp				   *dataFH;				/* currently open data file */
+
+	cfp				   *blobsTocFH;			/* file handle for blobs.toc */
+} lclContext;
+
+typedef struct
+{
+	char	   *filename;		/* filename excluding the directory (basename) */
+} lclTocEntry;
+
+static const char *modulename = gettext_noop("directory archiver");
+
+/* prototypes for private functions */
+static void _ArchiveEntry(ArchiveHandle *AH, TocEntry *te);
+static void _StartData(ArchiveHandle *AH, TocEntry *te);
+static void _EndData(ArchiveHandle *AH, TocEntry *te);
+static size_t _WriteData(ArchiveHandle *AH, const void *data, size_t dLen);
+static int	_WriteByte(ArchiveHandle *AH, const int i);
+static int	_ReadByte(ArchiveHandle *);
+static size_t _WriteBuf(ArchiveHandle *AH, const void *buf, size_t len);
+static size_t _ReadBuf(ArchiveHandle *AH, void *buf, size_t len);
+static void _CloseArchive(ArchiveHandle *AH);
+static void _PrintTocData(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt);
+
+static void _WriteExtraToc(ArchiveHandle *AH, TocEntry *te);
+static void _ReadExtraToc(ArchiveHandle *AH, TocEntry *te);
+static void _PrintExtraToc(ArchiveHandle *AH, TocEntry *te);
+
+static void _StartBlobs(ArchiveHandle *AH, TocEntry *te);
+static void _StartBlob(ArchiveHandle *AH, TocEntry *te, Oid oid);
+static void _EndBlob(ArchiveHandle *AH, TocEntry *te, Oid oid);
+static void _EndBlobs(ArchiveHandle *AH, TocEntry *te);
+static void _LoadBlobs(ArchiveHandle *AH, RestoreOptions *ropt);
+
+static char *prependDirectory(ArchiveHandle *AH, const char *relativeFilename);
+
+static void createDirectory(const char *dir);
+
+
+/*
+ *	Init routine required by ALL formats. This is a global routine
+ *	and should be declared in pg_backup_archiver.h
+ *
+ *	Its task is to create any extra archive context (using AH->formatData),
+ *	and to initialize the supported function pointers.
+ *
+ *	It should also prepare whatever its input source is for reading/writing,
+ *	and in the case of a read mode connection, it should load the Header & TOC.
+ */
+void
+InitArchiveFmt_Directory(ArchiveHandle *AH)
+{
+	lclContext *ctx;
+
+	/* Assuming static functions, this can be copied for each format. */
+	AH->ArchiveEntryPtr = _ArchiveEntry;
+	AH->StartDataPtr = _StartData;
+	AH->WriteDataPtr = _WriteData;
+	AH->EndDataPtr = _EndData;
+	AH->WriteBytePtr = _WriteByte;
+	AH->ReadBytePtr = _ReadByte;
+	AH->WriteBufPtr = _WriteBuf;
+	AH->ReadBufPtr = _ReadBuf;
+	AH->ClosePtr = _CloseArchive;
+	AH->ReopenPtr = NULL;
+	AH->PrintTocDataPtr = _PrintTocData;
+	AH->ReadExtraTocPtr = _ReadExtraToc;
+	AH->WriteExtraTocPtr = _WriteExtraToc;
+	AH->PrintExtraTocPtr = _PrintExtraToc;
+
+	AH->StartBlobsPtr = _StartBlobs;
+	AH->StartBlobPtr = _StartBlob;
+	AH->EndBlobPtr = _EndBlob;
+	AH->EndBlobsPtr = _EndBlobs;
+
+	AH->ClonePtr = NULL;
+	AH->DeClonePtr = NULL;
+
+	/* Set up our private context */
+	ctx = (lclContext *) calloc(1, sizeof(lclContext));
+	if (ctx == NULL)
+		die_horribly(AH, modulename, "out of memory\n");
+	AH->formatData = (void *) ctx;
+
+	ctx->dataFH = NULL;
+	ctx->blobsTocFH = NULL;
+
+	/* Initialize LO buffering */
+	AH->lo_buf_size = LOBBUFSIZE;
+	AH->lo_buf = (void *) malloc(LOBBUFSIZE);
+	if (AH->lo_buf == NULL)
+		die_horribly(AH, modulename, "out of memory\n");
+
+	/*
+	 * Now open the TOC file
+	 */
+
+	if (!AH->fSpec || strcmp(AH->fSpec, "") == 0)
+		die_horribly(AH, modulename, "no output directory specified\n");
+
+	ctx->directory = AH->fSpec;
+
+	if (AH->mode == archModeWrite)
+	{
+		/* Create the directory, errors are caught there */
+		createDirectory(ctx->directory);
+	}
+	else
+	{							/* Read Mode */
+		char	   *fname;
+		cfp		   *tocFH;
+
+		fname = prependDirectory(AH, "toc.dat");
+
+		tocFH = cfopen_read(fname, PG_BINARY_R);
+		if (tocFH == NULL)
+			die_horribly(AH, modulename,
+						 "could not open input file \"%s\": %s\n",
+						 fname, strerror(errno));
+
+		ctx->dataFH = tocFH;
+		/*
+		 * The TOC of a directory format dump shares the format code of
+		 * the tar format.
+		 */
+		AH->format = archTar;
+		ReadHead(AH);
+		AH->format = archDirectory;
+		ReadToc(AH);
+
+		/* Nothing else in the file, so close it again... */
+		if (cfclose(tocFH) != 0)
+			die_horribly(AH, modulename, "could not close TOC file: %s\n",
+						 strerror(errno));
+		ctx->dataFH = NULL;
+	}
+}
+
+/*
+ * Called by the Archiver when the dumper creates a new TOC entry.
+ *
+ * We determine the filename for this entry.
+*/
+static void
+_ArchiveEntry(ArchiveHandle *AH, TocEntry *te)
+{
+	lclTocEntry	   *tctx;
+	char			fn[MAXPGPATH];
+
+	tctx = (lclTocEntry *) calloc(1, sizeof(lclTocEntry));
+	if (!tctx)
+		die_horribly(AH, modulename, "out of memory\n");
+	if (te->dataDumper)
+	{
+		snprintf(fn, MAXPGPATH, "%d.dat", te->dumpId);
+		tctx->filename = strdup(fn);
+	}
+	else if (strcmp(te->desc, "BLOBS") == 0)
+		tctx->filename = strdup("blobs.toc");
+	else
+		tctx->filename = NULL;
+
+	te->formatData = (void *) tctx;
+}
+
+/*
+ * Called by the Archiver to save any extra format-related TOC entry
+ * data.
+ *
+ * Use the Archiver routines to write data - they are non-endian, and
+ * maintain other important file information.
+ */
+static void
+_WriteExtraToc(ArchiveHandle *AH, TocEntry *te)
+{
+	lclTocEntry *tctx = (lclTocEntry *) te->formatData;
+
+	/*
+	 * A dumpable object has set tctx->filename, any other object has not.
+	 * (see _ArchiveEntry).
+	 */
+	if (tctx->filename)
+		WriteStr(AH, tctx->filename);
+	else
+		WriteStr(AH, "");
+}
+
+/*
+ * Called by the Archiver to read any extra format-related TOC data.
+ *
+ * Needs to match the order defined in _WriteExtraToc, and should also
+ * use the Archiver input routines.
+ */
+static void
+_ReadExtraToc(ArchiveHandle *AH, TocEntry *te)
+{
+	lclTocEntry *tctx = (lclTocEntry *) te->formatData;
+
+	if (tctx == NULL)
+	{
+		tctx = (lclTocEntry *) calloc(1, sizeof(lclTocEntry));
+		if (!tctx)
+			die_horribly(AH, modulename, "out of memory\n");
+		te->formatData = (void *) tctx;
+	}
+
+	tctx->filename = ReadStr(AH);
+	if (strlen(tctx->filename) == 0)
+	{
+		free(tctx->filename);
+		tctx->filename = NULL;
+	}
+}
+
+/*
+ * Called by the Archiver when restoring an archive to output a comment
+ * that includes useful information about the TOC entry.
+ */
+static void
+_PrintExtraToc(ArchiveHandle *AH, TocEntry *te)
+{
+	lclTocEntry *tctx = (lclTocEntry *) te->formatData;
+
+	if (AH->public.verbose && tctx->filename)
+		ahprintf(AH, "-- File: %s\n", tctx->filename);
+}
+
+/*
+ * Called by the archiver when saving TABLE DATA (not schema). This routine
+ * should save whatever format-specific information is needed to read
+ * the archive back.
+ *
+ * It is called just prior to the dumper's 'DataDumper' routine being called.
+ *
+ * We create the data file for writing.
+ */
+static void
+_StartData(ArchiveHandle *AH, TocEntry *te)
+{
+	lclTocEntry	   *tctx = (lclTocEntry *) te->formatData;
+	lclContext	   *ctx = (lclContext *) AH->formatData;
+	char		   *fname;
+
+	fname = prependDirectory(AH, tctx->filename);
+
+	ctx->dataFH = cfopen_write(fname, PG_BINARY_W, AH->compression);
+	if (ctx->dataFH == NULL)
+		die_horribly(AH, modulename, "could not open output file \"%s\": %s\n",
+					 fname, strerror(errno));
+}
+
+/*
+ * Called by archiver when dumper calls WriteData. This routine is
+ * called for both BLOB and TABLE data; it is the responsibility of
+ * the format to manage each kind of data using StartBlob/StartData.
+ *
+ * It should only be called from within a DataDumper routine.
+ *
+ * We write the data to the open data file.
+ */
+static size_t
+_WriteData(ArchiveHandle *AH, const void *data, size_t dLen)
+{
+	lclContext		   *ctx = (lclContext *) AH->formatData;
+
+	if (dLen == 0)
+		return 0;
+
+	return cfwrite(data, dLen, ctx->dataFH);
+}
+
+/*
+ * Called by the archiver when a dumper's 'DataDumper' routine has
+ * finished.
+ *
+ * We close the data file.
+ */
+static void
+_EndData(ArchiveHandle *AH, TocEntry *te)
+{
+	lclContext	   *ctx = (lclContext *) AH->formatData;
+
+	/* Close the file */
+	cfclose(ctx->dataFH);
+
+	ctx->dataFH = NULL;
+}
+
+/*
+ * Print data for a given file (can be a BLOB as well)
+ */
+static void
+_PrintFileData(ArchiveHandle *AH, char *filename, RestoreOptions *ropt)
+{
+	size_t		cnt;
+	char	   *buf;
+	size_t		buflen;
+	cfp		   *cfp;
+
+	if (!filename)
+		return;
+
+	cfp  = cfopen_read(filename, PG_BINARY_R);
+	if (!cfp)
+		die_horribly(AH, modulename, "could not open input file \"%s\": %s\n",
+					 filename, strerror(errno));
+
+	buf = malloc(ZLIB_OUT_SIZE);
+	if (buf == NULL)
+		die_horribly(NULL, modulename, "out of memory\n");
+	buflen = ZLIB_OUT_SIZE;
+
+	while ((cnt = cfread(buf, buflen, cfp)))
+		ahwrite(buf, 1, cnt, AH);
+
+	free(buf);
+}
+
+/*
+ * Print data for a given TOC entry
+*/
+static void
+_PrintTocData(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt)
+{
+	lclTocEntry *tctx = (lclTocEntry *) te->formatData;
+
+	if (!tctx->filename)
+		return;
+
+	if (strcmp(te->desc, "BLOBS") == 0)
+		_LoadBlobs(AH, ropt);
+	else
+	{
+		char   *fname = prependDirectory(AH, tctx->filename);
+		_PrintFileData(AH, fname, ropt);
+	}
+}
+
+static void
+_LoadBlobs(ArchiveHandle *AH, RestoreOptions *ropt)
+{
+	Oid				oid;
+	lclContext	   *ctx = (lclContext *) AH->formatData;
+	char		   *fname;
+	char			line[MAXPGPATH];
+
+	StartRestoreBlobs(AH);
+
+	fname = prependDirectory(AH, "blobs.toc");
+
+	ctx->blobsTocFH = cfopen_read(fname, PG_BINARY_R);
+
+	if (ctx->blobsTocFH == NULL)
+		die_horribly(AH, modulename, "could not open large object TOC file \"%s\" for input: %s\n",
+					 fname, strerror(errno));
+
+	/* Read the blobs TOC file line-by-line, and process each blob */
+	while ((cfgets(ctx->blobsTocFH, line, MAXPGPATH)) != NULL)
+	{
+		char		fname[MAXPGPATH];
+		char		path[MAXPGPATH];
+
+		if (sscanf(line, "%u %s\n", &oid, fname) != 2)
+			die_horribly(AH, modulename, "invalid line in large object TOC file: %s\n",
+						 line);
+
+		StartRestoreBlob(AH, oid, ropt->dropSchema);
+		snprintf(path, MAXPGPATH, "%s/%s", ctx->directory, fname);
+		_PrintFileData(AH, path, ropt);
+		EndRestoreBlob(AH, oid);
+	}
+	if (!cfeof(ctx->blobsTocFH))
+		die_horribly(AH, modulename, "error reading large object TOC file \"%s\"\n",
+					 fname);
+
+	if (cfclose(ctx->blobsTocFH) != 0)
+		die_horribly(AH, modulename, "could not close large object TOC file \"%s\": %s\n",
+					 fname, strerror(errno));
+
+	ctx->blobsTocFH = NULL;
+
+	EndRestoreBlobs(AH);
+}
+
+
+/*
+ * Write a byte of data to the archive.
+ * Called by the archiver to do integer & byte output to the archive.
+ * These routines are only used to read & write the headers & TOC.
+ */
+static int
+_WriteByte(ArchiveHandle *AH, const int i)
+{
+	unsigned char c = (unsigned char) i;
+	lclContext *ctx = (lclContext *) AH->formatData;
+
+	if (cfwrite(&c, 1, ctx->dataFH) != 1)
+		die_horribly(AH, modulename, "could not write byte\n");
+
+	return 1;
+}
+
+/*
+ * Read a byte of data from the archive.
+ * Called by the archiver to read bytes & integers from the archive.
+ * These routines are only used to read & write headers & TOC.
+ * EOF should be treated as a fatal error.
+ */
+static int
+_ReadByte(ArchiveHandle *AH)
+{
+	lclContext *ctx = (lclContext *) AH->formatData;
+	int			res;
+
+	res = cfgetc(ctx->dataFH);
+	if (res == EOF)
+		die_horribly(AH, modulename, "unexpected end of file\n");
+
+	return res;
+}
+
+/*
+ * Write a buffer of data to the archive.
+ * Called by the archiver to write a block of bytes to the TOC or a data file.
+ */
+static size_t
+_WriteBuf(ArchiveHandle *AH, const void *buf, size_t len)
+{
+	lclContext *ctx = (lclContext *) AH->formatData;
+	size_t		res;
+
+	res = cfwrite(buf, len, ctx->dataFH);
+	if (res != len)
+		die_horribly(AH, modulename, "could not write to output file: %s\n",
+					 strerror(errno));
+
+	return res;
+}
+
+/*
+ * Read a block of bytes from the archive.
+ *
+ * Called by the archiver to read a block of bytes from the archive
+ */
+static size_t
+_ReadBuf(ArchiveHandle *AH, void *buf, size_t len)
+{
+	lclContext *ctx = (lclContext *) AH->formatData;
+	size_t		res;
+
+	res = cfread(buf, len, ctx->dataFH);
+
+	return res;
+}
+
+/*
+ * Close the archive.
+ *
+ * When writing the archive, this is the routine that actually starts
+ * the process of saving it to files. No data should be written prior
+ * to this point, since the user could sort the TOC after creating it.
+ *
+ * If an archive is to be written, this routine must call:
+ *		WriteHead			to save the archive header
+ *		WriteToc			to save the TOC entries
+ *		WriteDataChunks		to save all DATA & BLOBs.
+ */
+static void
+_CloseArchive(ArchiveHandle *AH)
+{
+	lclContext *ctx = (lclContext *) AH->formatData;
+	if (AH->mode == archModeWrite)
+	{
+		cfp	   *tocFH;
+		char   *fname = prependDirectory(AH, "toc.dat");
+
+		/* The TOC is always created uncompressed */
+		tocFH = cfopen_write(fname, PG_BINARY_W, 0);
+		if (tocFH == NULL)
+			die_horribly(AH, modulename, "could not open output file \"%s\": %s\n",
+						 fname, strerror(errno));
+		ctx->dataFH = tocFH;
+		/*
+		 * Write 'tar' in the format field of the toc.dat file. The directory
+		 * is compatible with 'tar', so there's no point having a different
+		 * format code for it.
+		 */
+		AH->format = archTar;
+		WriteHead(AH);
+		AH->format = archDirectory;
+		WriteToc(AH);
+		if (cfclose(tocFH) != 0)
+			die_horribly(AH, modulename, "could not close TOC file: %s\n",
+						 strerror(errno));
+		WriteDataChunks(AH);
+	}
+	AH->FH = NULL;
+}
+
+
+/*
+ * BLOB support
+ */
+
+/*
+ * Called by the archiver when starting to save all BLOB DATA (not schema).
+ * It is called just prior to the dumper's DataDumper routine.
+ *
+ * We open the large object TOC file here, so that we can append a line to 
+ * it for each blob.
+ */
+static void
+_StartBlobs(ArchiveHandle *AH, TocEntry *te)
+{
+	lclContext	   *ctx = (lclContext *) AH->formatData;
+	char		   *fname;
+
+	fname = prependDirectory(AH, "blobs.toc");
+
+	/* The blob TOC file is never compressed */
+	ctx->blobsTocFH = cfopen_write(fname, "ab", 0);
+	if (ctx->blobsTocFH == NULL)
+		die_horribly(AH, modulename, "could not open output file \"%s\": %s\n",
+					 fname, strerror(errno));
+}
+
+/*
+ * Called by the archiver when we're about to start dumping a blob.
+ *
+ * We create a file to write the blob to.
+ */
+static void
+_StartBlob(ArchiveHandle *AH, TocEntry *te, Oid oid)
+{
+	lclContext	   *ctx = (lclContext *) AH->formatData;
+	char			fname[MAXPGPATH];
+
+	snprintf(fname, MAXPGPATH, "%s/blob_%u.dat", ctx->directory, oid);
+
+	ctx->dataFH = cfopen_write(fname, PG_BINARY_W, AH->compression);
+
+	if (ctx->dataFH == NULL)
+		die_horribly(AH, modulename, "could not open output file \"%s\": %s\n",
+					 fname, strerror(errno));
+}
+
+/*
+ * Called by the archiver when the dumper is finished writing a blob.
+ *
+ * We close the blob file and write an entry to the blob TOC file for it.
+ */
+static void
+_EndBlob(ArchiveHandle *AH, TocEntry *te, Oid oid)
+{
+	lclContext	   *ctx = (lclContext *) AH->formatData;
+	char			buf[50];
+	int				len;
+
+	/* Close the BLOB data file itself */
+	cfclose(ctx->dataFH);
+	ctx->dataFH = NULL;
+
+	/* register the blob in blobs.toc */
+	len = snprintf(buf, sizeof(buf), "%u blob_%u.dat\n", oid, oid);
+	if (cfwrite(buf, len, ctx->blobsTocFH) != len)
+		die_horribly(AH, modulename, "could not write to blobs TOC file\n");		
+}
+
+/*
+ * Called by the archiver when finishing saving all BLOB DATA.
+ *
+ * We close the blobs TOC file.
+ */
+static void
+_EndBlobs(ArchiveHandle *AH, TocEntry *te)
+{
+	lclContext *ctx = (lclContext *) AH->formatData;
+
+	cfclose(ctx->blobsTocFH);
+	ctx->blobsTocFH = NULL;
+}
+
+static void
+createDirectory(const char *dir)
+{
+	struct stat		st;
+
+	/* the directory must not exist yet. */
+	if (stat(dir, &st) == 0)
+	{
+		if (S_ISDIR(st.st_mode))
+			die_horribly(NULL, modulename,
+						 "cannot create directory %s, it exists already\n",
+						 dir);
+		else
+			die_horribly(NULL, modulename,
+						 "cannot create directory %s, a file with this name "
+						 "exists already\n", dir);
+	}
+
+	/*
+	 * Now we create the directory. Note that for some race condition we could
+	 * also run into the situation that the directory has been created just
+	 * between our two calls.
+	 */
+	if (mkdir(dir, 0700) < 0)
+		die_horribly(NULL, modulename, "could not create directory %s: %s",
+					 dir, strerror(errno));
+}
+
+
+static char *
+prependDirectory(ArchiveHandle *AH, const char *relativeFilename)
+{
+	lclContext	   *ctx = (lclContext *) AH->formatData;
+	static char		buf[MAXPGPATH];
+	char		   *dname;
+
+	dname = ctx->directory;
+
+	if (strlen(dname) + 1 + strlen(relativeFilename) + 1 > MAXPGPATH)
+			die_horribly(AH, modulename, "path name too long: %s", dname);
+
+	strcpy(buf, dname);
+	strcat(buf, "/");
+	strcat(buf, relativeFilename);
+
+	return buf;
+}
--- a/src/bin/pg_dump/pg_backup_tar.c
+++ b/src/bin/pg_dump/pg_backup_tar.c
@ -4,6 +4,10 @@
 *
 *	This file is copied from the 'files' format file, but dumps data into
 *	one temp file then sends it to the output TAR archive.
+ * 
+ *	NOTE: If you untar the created 'tar' file, the resulting files are
+ *	compatible with the 'directory' format. Please keep the two formats in
+ *	sync.
 *
 *	See the headers to pg_backup_files & pg_restore for more details.
 *
@ -167,7 +171,7 @@ InitArchiveFmt_Tar(ArchiveHandle *AH)
 		die_horribly(AH, modulename, "out of memory\n");

 	/*
-	 * Now open the TOC file
+	 * Now open the tar file, and load the TOC if we're in read mode.
 	 */
 	if (AH->mode == archModeWrite)
 	{
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@ -138,6 +138,7 @@ static int	no_unlogged_table_data = 0;


 static void help(const char *progname);
+static ArchiveFormat parseArchiveFormat(const char *format, ArchiveMode *mode);
 static void expand_schema_name_patterns(SimpleStringList *patterns,
 							SimpleOidList *oids);
 static void expand_table_name_patterns(SimpleStringList *patterns,
@ -267,6 +268,8 @@ main(int argc, char **argv)
 	int			my_version;
 	int			optindex;
 	RestoreOptions *ropt;
+	ArchiveFormat archiveFormat = archUnknown;
+	ArchiveMode	archiveMode;

 	static int	disable_triggers = 0;
 	static int	outputNoTablespaces = 0;
@ -539,35 +542,30 @@ main(int argc, char **argv)
 		exit(1);
 	}

+	archiveFormat = parseArchiveFormat(format, &archiveMode);
+
+	/* archiveFormat specific setup */
+	if (archiveFormat == archNull)
+		plainText = 1;
+
+	/*
+	 * Ignore compression level for plain format. XXX: This is a bit
+	 * inconsistent, tar-format throws an error instead.
+	 */
+	if (archiveFormat == archNull)
+		compressLevel = 0;
+
+	/* Custom and directory formats are compressed by default */
+	if (compressLevel == -1)
+	{
+		if (archiveFormat == archCustom || archiveFormat == archDirectory)
+			compressLevel = Z_DEFAULT_COMPRESSION;
+		else
+			compressLevel = 0;
+	}
+
 	/* open the output file */
-	if (pg_strcasecmp(format, "a") == 0 || pg_strcasecmp(format, "append") == 0)
-	{
-		/* This is used by pg_dumpall, and is not documented */
-		plainText = 1;
-		g_fout = CreateArchive(filename, archNull, 0, archModeAppend);
-	}
-	else if (pg_strcasecmp(format, "c") == 0 || pg_strcasecmp(format, "custom") == 0)
-		g_fout = CreateArchive(filename, archCustom, compressLevel, archModeWrite);
-	else if (pg_strcasecmp(format, "f") == 0 || pg_strcasecmp(format, "file") == 0)
-	{
-		/*
-		 * Dump files into the current directory; for demonstration only, not
-		 * documented.
-		 */
-		g_fout = CreateArchive(filename, archFiles, compressLevel, archModeWrite);
-	}
-	else if (pg_strcasecmp(format, "p") == 0 || pg_strcasecmp(format, "plain") == 0)
-	{
-		plainText = 1;
-		g_fout = CreateArchive(filename, archNull, 0, archModeWrite);
-	}
-	else if (pg_strcasecmp(format, "t") == 0 || pg_strcasecmp(format, "tar") == 0)
-		g_fout = CreateArchive(filename, archTar, compressLevel, archModeWrite);
-	else
-	{
-		write_msg(NULL, "invalid output format \"%s\" specified\n", format);
-		exit(1);
-	}
+	g_fout = CreateArchive(filename, archiveFormat, compressLevel, archiveMode);

 	if (g_fout == NULL)
 	{
@ -835,8 +833,8 @@ help(const char *progname)
 	printf(_("  %s [OPTION]... [DBNAME]\n"), progname);

 	printf(_("\nGeneral options:\n"));
-	printf(_("  -f, --file=FILENAME         output file name\n"));
-	printf(_("  -F, --format=c|t|p          output file format (custom, tar, plain text)\n"));
+	printf(_("  -f, --file=OUTPUT           output file or directory name\n"));
+	printf(_("  -F, --format=c|d|t|p        output file format (custom, directory, tar, plain text)\n"));
 	printf(_("  -v, --verbose               verbose mode\n"));
 	printf(_("  -Z, --compress=0-9          compression level for compressed formats\n"));
 	printf(_("  --lock-wait-timeout=TIMEOUT fail after waiting TIMEOUT for a table lock\n"));
@ -894,6 +892,49 @@ exit_nicely(void)
 	exit(1);
 }

+static ArchiveFormat
+parseArchiveFormat(const char *format, ArchiveMode *mode)
+{
+	ArchiveFormat archiveFormat;
+
+	*mode = archModeWrite;
+
+	if (pg_strcasecmp(format, "a") == 0 || pg_strcasecmp(format, "append") == 0)
+	{
+		/* This is used by pg_dumpall, and is not documented */
+		archiveFormat = archNull;
+		*mode = archModeAppend;
+	}
+	else if (pg_strcasecmp(format, "c") == 0)
+		archiveFormat = archCustom;
+	else if (pg_strcasecmp(format, "custom") == 0)
+		archiveFormat = archCustom;
+	else if (pg_strcasecmp(format, "d") == 0)
+		archiveFormat = archDirectory;
+	else if (pg_strcasecmp(format, "directory") == 0)
+		archiveFormat = archDirectory;
+	else if (pg_strcasecmp(format, "f") == 0 || pg_strcasecmp(format, "file") == 0)
+		/*
+		 * Dump files into the current directory; for demonstration only, not
+		 * documented.
+		 */
+		archiveFormat = archFiles;
+	else if (pg_strcasecmp(format, "p") == 0)
+		archiveFormat = archNull;
+	else if (pg_strcasecmp(format, "plain") == 0)
+		archiveFormat = archNull;
+	else if (pg_strcasecmp(format, "t") == 0)
+		archiveFormat = archTar;
+	else if (pg_strcasecmp(format, "tar") == 0)
+		archiveFormat = archTar;
+	else
+	{
+		write_msg(NULL, "invalid output format \"%s\" specified\n", format);
+		exit(1);
+	}
+	return archiveFormat;
+}
+
 /*
 * Find the OIDs of all schemas matching the given list of patterns,
 * and append them to the given OID list.
--- a/src/bin/pg_dump/pg_restore.c
+++ b/src/bin/pg_dump/pg_restore.c
@ -352,6 +352,11 @@ main(int argc, char **argv)
 				opts->format = archCustom;
 				break;

+			case 'd':
+			case 'D':
+				opts->format = archDirectory;
+				break;
+
 			case 'f':
 			case 'F':
 				opts->format = archFiles;
@ -363,7 +368,7 @@ main(int argc, char **argv)
 				break;

 			default:
-				write_msg(NULL, "unrecognized archive format \"%s\"; please specify \"c\" or \"t\"\n",
+				write_msg(NULL, "unrecognized archive format \"%s\"; please specify \"c\", \"d\" or \"t\"\n",
 						  opts->formatName);
 				exit(1);
 		}
@ -418,7 +423,7 @@ usage(const char *progname)
 	printf(_("\nGeneral options:\n"));
 	printf(_("  -d, --dbname=NAME        connect to database name\n"));
 	printf(_("  -f, --file=FILENAME      output file name\n"));
-	printf(_("  -F, --format=c|t         backup file format (should be automatic)\n"));
+	printf(_("  -F, --format=c|d|t       backup file format (should be automatic)\n"));
 	printf(_("  -l, --list               print summarized TOC of the archive\n"));
 	printf(_("  -v, --verbose            verbose mode\n"));
 	printf(_("  --help                   show this help, then exit\n"));