postgresql/src/backend/commands/dbcommands.c

/*-------------------------------------------------------------------------
 *
 * dbcommands.c
 *		Database management commands (create/drop database).
 *
 * Note: database creation/destruction commands use exclusive locks on
 * the database objects (as expressed by LockSharedObject()) to avoid
 * stepping on each others' toes.  Formerly we used table-level locks
 * on pg_database, but that's too coarse-grained.
 *
 * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *	  src/backend/commands/dbcommands.c
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>

#include "access/genam.h"
#include "access/heapam.h"
#include "access/htup_details.h"
#include "access/multixact.h"
#include "access/tableam.h"
#include "access/xact.h"
#include "access/xloginsert.h"
#include "access/xlogrecovery.h"
#include "access/xlogutils.h"
#include "catalog/catalog.h"
#include "catalog/dependency.h"
#include "catalog/indexing.h"
#include "catalog/objectaccess.h"
#include "catalog/pg_authid.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_database.h"
#include "catalog/pg_db_role_setting.h"
#include "catalog/pg_subscription.h"
#include "catalog/pg_tablespace.h"
#include "commands/comment.h"
#include "commands/dbcommands.h"
#include "commands/dbcommands_xlog.h"
#include "commands/defrem.h"
#include "commands/seclabel.h"
#include "commands/tablespace.h"
#include "common/file_perm.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "replication/slot.h"
#include "storage/copydir.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/md.h"
#include "storage/procarray.h"
#include "storage/smgr.h"
#include "utils/acl.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/guc.h"
#include "utils/pg_locale.h"
#include "utils/relmapper.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"

/*
 * Create database strategy.
 *
 * CREATEDB_WAL_LOG will copy the database at the block level and WAL log each
 * copied block.
 *
 * CREATEDB_FILE_COPY will simply perform a file system level copy of the
 * database and log a single record for each tablespace copied. To make this
 * safe, it also triggers checkpoints before and after the operation.
 */
typedef enum CreateDBStrategy
{
	CREATEDB_WAL_LOG,
	CREATEDB_FILE_COPY
} CreateDBStrategy;

typedef struct
{
	Oid			src_dboid;		/* source (template) DB */
	Oid			dest_dboid;		/* DB we are trying to create */
	CreateDBStrategy strategy;	/* create db strategy */
} createdb_failure_params;

typedef struct
{
	Oid			dest_dboid;		/* DB we are trying to move */
	Oid			dest_tsoid;		/* tablespace we are trying to move to */
} movedb_failure_params;

/*
 * Information about a relation to be copied when creating a database.
 */
typedef struct CreateDBRelInfo
{
	RelFileLocator rlocator;	/* physical relation identifier */
	Oid			reloid;			/* relation oid */
	bool		permanent;		/* relation is permanent or unlogged */
} CreateDBRelInfo;


/* non-export function prototypes */
static void createdb_failure_callback(int code, Datum arg);
static void movedb(const char *dbname, const char *tblspcname);
static void movedb_failure_callback(int code, Datum arg);
static bool get_db_info(const char *name, LOCKMODE lockmode,
						Oid *dbIdP, Oid *ownerIdP,
						int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP,
						TransactionId *dbFrozenXidP, MultiXactId *dbMinMultiP,
						Oid *dbTablespace, char **dbCollate, char **dbCtype, char **dbIculocale,
						char **dbIcurules,
						char *dbLocProvider,
						char **dbCollversion);
static void remove_dbtablespaces(Oid db_id);
static bool check_db_file_conflict(Oid db_id);
static int	errdetail_busy_db(int notherbackends, int npreparedxacts);
static void CreateDatabaseUsingWalLog(Oid src_dboid, Oid dst_dboid, Oid src_tsid,
									  Oid dst_tsid);
static List *ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath);
static List *ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid,
										   Oid dbid, char *srcpath,
										   List *rlocatorlist, Snapshot snapshot);
static CreateDBRelInfo *ScanSourceDatabasePgClassTuple(HeapTupleData *tuple,
													   Oid tbid, Oid dbid,
													   char *srcpath);
static void CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid,
									bool isRedo);
static void CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dst_dboid,
										Oid src_tsid, Oid dst_tsid);
static void recovery_create_dbdir(char *path, bool only_tblspc);

/*
 * Create a new database using the WAL_LOG strategy.
 *
 * Each copied block is separately written to the write-ahead log.
 */
static void
CreateDatabaseUsingWalLog(Oid src_dboid, Oid dst_dboid,
						  Oid src_tsid, Oid dst_tsid)
{
	char	   *srcpath;
	char	   *dstpath;
	List	   *rlocatorlist = NULL;
	ListCell   *cell;
	LockRelId	srcrelid;
	LockRelId	dstrelid;
	RelFileLocator srcrlocator;
	RelFileLocator dstrlocator;
	CreateDBRelInfo *relinfo;

	/* Get source and destination database paths. */
	srcpath = GetDatabasePath(src_dboid, src_tsid);
	dstpath = GetDatabasePath(dst_dboid, dst_tsid);

	/* Create database directory and write PG_VERSION file. */
	CreateDirAndVersionFile(dstpath, dst_dboid, dst_tsid, false);

	/* Copy relmap file from source database to the destination database. */
	RelationMapCopy(dst_dboid, dst_tsid, srcpath, dstpath);

	/* Get list of relfilelocators to copy from the source database. */
	rlocatorlist = ScanSourceDatabasePgClass(src_tsid, src_dboid, srcpath);
	Assert(rlocatorlist != NIL);

	/*
	 * Database IDs will be the same for all relations so set them before
	 * entering the loop.
	 */
	srcrelid.dbId = src_dboid;
	dstrelid.dbId = dst_dboid;

	/* Loop over our list of relfilelocators and copy each one. */
	foreach(cell, rlocatorlist)
	{
		relinfo = lfirst(cell);
		srcrlocator = relinfo->rlocator;

		/*
		 * If the relation is from the source db's default tablespace then we
		 * need to create it in the destination db's default tablespace.
		 * Otherwise, we need to create in the same tablespace as it is in the
		 * source database.
		 */
		if (srcrlocator.spcOid == src_tsid)
			dstrlocator.spcOid = dst_tsid;
		else
			dstrlocator.spcOid = srcrlocator.spcOid;

		dstrlocator.dbOid = dst_dboid;
		dstrlocator.relNumber = srcrlocator.relNumber;

		/*
		 * Acquire locks on source and target relations before copying.
		 *
		 * We typically do not read relation data into shared_buffers without
		 * holding a relation lock. It's unclear what could go wrong if we
		 * skipped it in this case, because nobody can be modifying either the
		 * source or destination database at this point, and we have locks on
		 * both databases, too, but let's take the conservative route.
		 */
		dstrelid.relId = srcrelid.relId = relinfo->reloid;
		LockRelationId(&srcrelid, AccessShareLock);
		LockRelationId(&dstrelid, AccessShareLock);

		/* Copy relation storage from source to the destination. */
		CreateAndCopyRelationData(srcrlocator, dstrlocator, relinfo->permanent);

		/* Release the relation locks. */
		UnlockRelationId(&srcrelid, AccessShareLock);
		UnlockRelationId(&dstrelid, AccessShareLock);
	}

	pfree(srcpath);
	pfree(dstpath);
	list_free_deep(rlocatorlist);
}

/*
 * Scan the pg_class table in the source database to identify the relations
 * that need to be copied to the destination database.
 *
 * This is an exception to the usual rule that cross-database access is
 * not possible. We can make it work here because we know that there are no
 * connections to the source database and (since there can't be prepared
 * transactions touching that database) no in-doubt tuples either. This
 * means that we don't need to worry about pruning removing anything from
 * under us, and we don't need to be too picky about our snapshot either.
 * As long as it sees all previously-committed XIDs as committed and all
 * aborted XIDs as aborted, we should be fine: nothing else is possible
 * here.
 *
 * We can't rely on the relcache for anything here, because that only knows
 * about the database to which we are connected, and can't handle access to
 * other databases. That also means we can't rely on the heap scan
 * infrastructure, which would be a bad idea anyway since it might try
 * to do things like HOT pruning which we definitely can't do safely in
 * a database to which we're not even connected.
 */
static List *
ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath)
{
	RelFileLocator rlocator;
	BlockNumber nblocks;
	BlockNumber blkno;
	Buffer		buf;
	RelFileNumber relfilenumber;
	Page		page;
	List	   *rlocatorlist = NIL;
	LockRelId	relid;
	Snapshot	snapshot;
	SMgrRelation smgr;
	BufferAccessStrategy bstrategy;

	/* Get pg_class relfilenumber. */
	relfilenumber = RelationMapOidToFilenumberForDatabase(srcpath,
														  RelationRelationId);

	/* Don't read data into shared_buffers without holding a relation lock. */
	relid.dbId = dbid;
	relid.relId = RelationRelationId;
	LockRelationId(&relid, AccessShareLock);

	/* Prepare a RelFileLocator for the pg_class relation. */
	rlocator.spcOid = tbid;
	rlocator.dbOid = dbid;
	rlocator.relNumber = relfilenumber;

	smgr = smgropen(rlocator, InvalidBackendId);
	nblocks = smgrnblocks(smgr, MAIN_FORKNUM);
	smgrclose(smgr);

	/* Use a buffer access strategy since this is a bulk read operation. */
	bstrategy = GetAccessStrategy(BAS_BULKREAD);

	/*
	 * As explained in the function header comments, we need a snapshot that
	 * will see all committed transactions as committed, and our transaction
	 * snapshot - or the active snapshot - might not be new enough for that,
	 * but the return value of GetLatestSnapshot() should work fine.
	 */
	snapshot = GetLatestSnapshot();

	/* Process the relation block by block. */
	for (blkno = 0; blkno < nblocks; blkno++)
	{
		CHECK_FOR_INTERRUPTS();

		buf = ReadBufferWithoutRelcache(rlocator, MAIN_FORKNUM, blkno,
										RBM_NORMAL, bstrategy, true);

		LockBuffer(buf, BUFFER_LOCK_SHARE);
		page = BufferGetPage(buf);
		if (PageIsNew(page) || PageIsEmpty(page))
		{
			UnlockReleaseBuffer(buf);
			continue;
		}

		/* Append relevant pg_class tuples for current page to rlocatorlist. */
		rlocatorlist = ScanSourceDatabasePgClassPage(page, buf, tbid, dbid,
													 srcpath, rlocatorlist,
													 snapshot);

		UnlockReleaseBuffer(buf);
	}

	/* Release relation lock. */
	UnlockRelationId(&relid, AccessShareLock);

	return rlocatorlist;
}

/*
 * Scan one page of the source database's pg_class relation and add relevant
 * entries to rlocatorlist. The return value is the updated list.
 */
static List *
ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid, Oid dbid,
							  char *srcpath, List *rlocatorlist,
							  Snapshot snapshot)
{
	BlockNumber blkno = BufferGetBlockNumber(buf);
	OffsetNumber offnum;
	OffsetNumber maxoff;
	HeapTupleData tuple;

	maxoff = PageGetMaxOffsetNumber(page);

	/* Loop over offsets. */
	for (offnum = FirstOffsetNumber;
		 offnum <= maxoff;
		 offnum = OffsetNumberNext(offnum))
	{
		ItemId		itemid;

		itemid = PageGetItemId(page, offnum);

		/* Nothing to do if slot is empty or already dead. */
		if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid) ||
			ItemIdIsRedirected(itemid))
			continue;

		Assert(ItemIdIsNormal(itemid));
		ItemPointerSet(&(tuple.t_self), blkno, offnum);

		/* Initialize a HeapTupleData structure. */
		tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
		tuple.t_len = ItemIdGetLength(itemid);
		tuple.t_tableOid = RelationRelationId;

		/* Skip tuples that are not visible to this snapshot. */
		if (HeapTupleSatisfiesVisibility(&tuple, snapshot, buf))
		{
			CreateDBRelInfo *relinfo;

			/*
			 * ScanSourceDatabasePgClassTuple is in charge of constructing a
			 * CreateDBRelInfo object for this tuple, but can also decide that
			 * this tuple isn't something we need to copy. If we do need to
			 * copy the relation, add it to the list.
			 */
			relinfo = ScanSourceDatabasePgClassTuple(&tuple, tbid, dbid,
													 srcpath);
			if (relinfo != NULL)
				rlocatorlist = lappend(rlocatorlist, relinfo);
		}
	}

	return rlocatorlist;
}

/*
 * Decide whether a certain pg_class tuple represents something that
 * needs to be copied from the source database to the destination database,
 * and if so, construct a CreateDBRelInfo for it.
 *
 * Visibility checks are handled by the caller, so our job here is just
 * to assess the data stored in the tuple.
 */
CreateDBRelInfo *
ScanSourceDatabasePgClassTuple(HeapTupleData *tuple, Oid tbid, Oid dbid,
							   char *srcpath)
{
	CreateDBRelInfo *relinfo;
	Form_pg_class classForm;
	RelFileNumber relfilenumber = InvalidRelFileNumber;

	classForm = (Form_pg_class) GETSTRUCT(tuple);

	/*
	 * Return NULL if this object does not need to be copied.
	 *
	 * Shared objects don't need to be copied, because they are shared.
	 * Objects without storage can't be copied, because there's nothing to
	 * copy. Temporary relations don't need to be copied either, because they
	 * are inaccessible outside of the session that created them, which must
	 * be gone already, and couldn't connect to a different database if it
	 * still existed. autovacuum will eventually remove the pg_class entries
	 * as well.
	 */
	if (classForm->reltablespace == GLOBALTABLESPACE_OID ||
		!RELKIND_HAS_STORAGE(classForm->relkind) ||
		classForm->relpersistence == RELPERSISTENCE_TEMP)
		return NULL;

	/*
	 * If relfilenumber is valid then directly use it.  Otherwise, consult the
	 * relmap.
	 */
	if (RelFileNumberIsValid(classForm->relfilenode))
		relfilenumber = classForm->relfilenode;
	else
		relfilenumber = RelationMapOidToFilenumberForDatabase(srcpath,
															  classForm->oid);

	/* We must have a valid relfilenumber. */
	if (!RelFileNumberIsValid(relfilenumber))
		elog(ERROR, "relation with OID %u does not have a valid relfilenumber",
			 classForm->oid);

	/* Prepare a rel info element and add it to the list. */
	relinfo = (CreateDBRelInfo *) palloc(sizeof(CreateDBRelInfo));
	if (OidIsValid(classForm->reltablespace))
		relinfo->rlocator.spcOid = classForm->reltablespace;
	else
		relinfo->rlocator.spcOid = tbid;

	relinfo->rlocator.dbOid = dbid;
	relinfo->rlocator.relNumber = relfilenumber;
	relinfo->reloid = classForm->oid;

	/* Temporary relations were rejected above. */
	Assert(classForm->relpersistence != RELPERSISTENCE_TEMP);
	relinfo->permanent =
		(classForm->relpersistence == RELPERSISTENCE_PERMANENT) ? true : false;

	return relinfo;
}

/*
 * Create database directory and write out the PG_VERSION file in the database
 * path.  If isRedo is true, it's okay for the database directory to exist
 * already.
 */
static void
CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid, bool isRedo)
{
	int			fd;
	int			nbytes;
	char		versionfile[MAXPGPATH];
	char		buf[16];

	/*
	 * Prepare version data before starting a critical section.
	 *
	 * Note that we don't have to copy this from the source database; there's
	 * only one legal value.
	 */
	sprintf(buf, "%s\n", PG_MAJORVERSION);
	nbytes = strlen(PG_MAJORVERSION) + 1;

	/* If we are not in WAL replay then write the WAL. */
	if (!isRedo)
	{
		xl_dbase_create_wal_log_rec xlrec;
		XLogRecPtr	lsn;

		START_CRIT_SECTION();

		xlrec.db_id = dbid;
		xlrec.tablespace_id = tsid;

		XLogBeginInsert();
		XLogRegisterData((char *) (&xlrec),
						 sizeof(xl_dbase_create_wal_log_rec));

		lsn = XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE_WAL_LOG);

		/* As always, WAL must hit the disk before the data update does. */
		XLogFlush(lsn);
	}

	/* Create database directory. */
	if (MakePGDirectory(dbpath) < 0)
	{
		/* Failure other than already exists or not in WAL replay? */
		if (errno != EEXIST || !isRedo)
			ereport(ERROR,
					(errcode_for_file_access(),
					 errmsg("could not create directory \"%s\": %m", dbpath)));
	}

	/*
	 * Create PG_VERSION file in the database path.  If the file already
	 * exists and we are in WAL replay then try again to open it in write
	 * mode.
	 */
	snprintf(versionfile, sizeof(versionfile), "%s/%s", dbpath, "PG_VERSION");

	fd = OpenTransientFile(versionfile, O_WRONLY | O_CREAT | O_EXCL | PG_BINARY);
	if (fd < 0 && errno == EEXIST && isRedo)
		fd = OpenTransientFile(versionfile, O_WRONLY | O_TRUNC | PG_BINARY);

	if (fd < 0)
		ereport(ERROR,
				(errcode_for_file_access(),
				 errmsg("could not create file \"%s\": %m", versionfile)));

	/* Write PG_MAJORVERSION in the PG_VERSION file. */
	pgstat_report_wait_start(WAIT_EVENT_VERSION_FILE_WRITE);
	errno = 0;
	if ((int) write(fd, buf, nbytes) != nbytes)
	{
		/* If write didn't set errno, assume problem is no disk space. */
		if (errno == 0)
			errno = ENOSPC;
		ereport(ERROR,
				(errcode_for_file_access(),
				 errmsg("could not write to file \"%s\": %m", versionfile)));
	}
	pgstat_report_wait_end();

	/* Close the version file. */
	CloseTransientFile(fd);

	/* Critical section done. */
	if (!isRedo)
		END_CRIT_SECTION();
}

/*
 * Create a new database using the FILE_COPY strategy.
 *
 * Copy each tablespace at the filesystem level, and log a single WAL record
 * for each tablespace copied.  This requires a checkpoint before and after the
 * copy, which may be expensive, but it does greatly reduce WAL generation
 * if the copied database is large.
 */
static void
CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dst_dboid, Oid src_tsid,
							Oid dst_tsid)
{
	TableScanDesc scan;
	Relation	rel;
	HeapTuple	tuple;

	/*
	 * Force a checkpoint before starting the copy. This will force all dirty
	 * buffers, including those of unlogged tables, out to disk, to ensure
	 * source database is up-to-date on disk for the copy.
	 * FlushDatabaseBuffers() would suffice for that, but we also want to
	 * process any pending unlink requests. Otherwise, if a checkpoint
	 * happened while we're copying files, a file might be deleted just when
	 * we're about to copy it, causing the lstat() call in copydir() to fail
	 * with ENOENT.
	 */
	RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE |
					  CHECKPOINT_WAIT | CHECKPOINT_FLUSH_ALL);

	/*
	 * Iterate through all tablespaces of the template database, and copy each
	 * one to the new database.
	 */
	rel = table_open(TableSpaceRelationId, AccessShareLock);
	scan = table_beginscan_catalog(rel, 0, NULL);
	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
	{
		Form_pg_tablespace spaceform = (Form_pg_tablespace) GETSTRUCT(tuple);
		Oid			srctablespace = spaceform->oid;
		Oid			dsttablespace;
		char	   *srcpath;
		char	   *dstpath;
		struct stat st;

		/* No need to copy global tablespace */
		if (srctablespace == GLOBALTABLESPACE_OID)
			continue;

		srcpath = GetDatabasePath(src_dboid, srctablespace);

		if (stat(srcpath, &st) < 0 || !S_ISDIR(st.st_mode) ||
			directory_is_empty(srcpath))
		{
			/* Assume we can ignore it */
			pfree(srcpath);
			continue;
		}

		if (srctablespace == src_tsid)
			dsttablespace = dst_tsid;
		else
			dsttablespace = srctablespace;

		dstpath = GetDatabasePath(dst_dboid, dsttablespace);

		/*
		 * Copy this subdirectory to the new location
		 *
		 * We don't need to copy subdirectories
		 */
		copydir(srcpath, dstpath, false);

		/* Record the filesystem change in XLOG */
		{
			xl_dbase_create_file_copy_rec xlrec;

			xlrec.db_id = dst_dboid;
			xlrec.tablespace_id = dsttablespace;
			xlrec.src_db_id = src_dboid;
			xlrec.src_tablespace_id = srctablespace;

			XLogBeginInsert();
			XLogRegisterData((char *) &xlrec,
							 sizeof(xl_dbase_create_file_copy_rec));

			(void) XLogInsert(RM_DBASE_ID,
							  XLOG_DBASE_CREATE_FILE_COPY | XLR_SPECIAL_REL_UPDATE);
		}
		pfree(srcpath);
		pfree(dstpath);
	}
	table_endscan(scan);
	table_close(rel, AccessShareLock);

	/*
	 * We force a checkpoint before committing.  This effectively means that
	 * committed XLOG_DBASE_CREATE_FILE_COPY operations will never need to be
	 * replayed (at least not in ordinary crash recovery; we still have to
	 * make the XLOG entry for the benefit of PITR operations). This avoids
	 * two nasty scenarios:
	 *
	 * #1: When PITR is off, we don't XLOG the contents of newly created
	 * indexes; therefore the drop-and-recreate-whole-directory behavior of
	 * DBASE_CREATE replay would lose such indexes.
	 *
	 * #2: Since we have to recopy the source database during DBASE_CREATE
	 * replay, we run the risk of copying changes in it that were committed
	 * after the original CREATE DATABASE command but before the system crash
	 * that led to the replay.  This is at least unexpected and at worst could
	 * lead to inconsistencies, eg duplicate table names.
	 *
	 * (Both of these were real bugs in releases 8.0 through 8.0.3.)
	 *
	 * In PITR replay, the first of these isn't an issue, and the second is
	 * only a risk if the CREATE DATABASE and subsequent template database
	 * change both occur while a base backup is being taken. There doesn't
	 * seem to be much we can do about that except document it as a
	 * limitation.
	 *
	 * See CreateDatabaseUsingWalLog() for a less cheesy CREATE DATABASE
	 * strategy that avoids these problems.
	 */
	RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
}

/*
 * CREATE DATABASE
 */
Oid
createdb(ParseState *pstate, const CreatedbStmt *stmt)
{
	Oid			src_dboid;
	Oid			src_owner;
	int			src_encoding = -1;
	char	   *src_collate = NULL;
	char	   *src_ctype = NULL;
	char	   *src_iculocale = NULL;
	char	   *src_icurules = NULL;
	char		src_locprovider = '\0';
	char	   *src_collversion = NULL;
	bool		src_istemplate;
	bool		src_allowconn;
	TransactionId src_frozenxid = InvalidTransactionId;
	MultiXactId src_minmxid = InvalidMultiXactId;
	Oid			src_deftablespace;
	volatile Oid dst_deftablespace;
	Relation	pg_database_rel;
	HeapTuple	tuple;
	Datum		new_record[Natts_pg_database] = {0};
	bool		new_record_nulls[Natts_pg_database] = {0};
	Oid			dboid = InvalidOid;
	Oid			datdba;
	ListCell   *option;
	DefElem    *dtablespacename = NULL;
	DefElem    *downer = NULL;
	DefElem    *dtemplate = NULL;
	DefElem    *dencoding = NULL;
	DefElem    *dlocale = NULL;
	DefElem    *dcollate = NULL;
	DefElem    *dctype = NULL;
	DefElem    *diculocale = NULL;
	DefElem    *dicurules = NULL;
	DefElem    *dlocprovider = NULL;
	DefElem    *distemplate = NULL;
	DefElem    *dallowconnections = NULL;
	DefElem    *dconnlimit = NULL;
	DefElem    *dcollversion = NULL;
	DefElem    *dstrategy = NULL;
	char	   *dbname = stmt->dbname;
	char	   *dbowner = NULL;
	const char *dbtemplate = NULL;
	char	   *dbcollate = NULL;
	char	   *dbctype = NULL;
	char	   *dbiculocale = NULL;
	char	   *dbicurules = NULL;
	char		dblocprovider = '\0';
	char	   *canonname;
	int			encoding = -1;
	bool		dbistemplate = false;
	bool		dballowconnections = true;
	int			dbconnlimit = -1;
	char	   *dbcollversion = NULL;
	int			notherbackends;
	int			npreparedxacts;
	CreateDBStrategy dbstrategy = CREATEDB_WAL_LOG;
	createdb_failure_params fparms;

	/* Extract options from the statement node tree */
	foreach(option, stmt->options)
	{
		DefElem    *defel = (DefElem *) lfirst(option);

		if (strcmp(defel->defname, "tablespace") == 0)
		{
			if (dtablespacename)
				errorConflictingDefElem(defel, pstate);
			dtablespacename = defel;
		}
		else if (strcmp(defel->defname, "owner") == 0)
		{
			if (downer)
				errorConflictingDefElem(defel, pstate);
			downer = defel;
		}
		else if (strcmp(defel->defname, "template") == 0)
		{
			if (dtemplate)
				errorConflictingDefElem(defel, pstate);
			dtemplate = defel;
		}
		else if (strcmp(defel->defname, "encoding") == 0)
		{
			if (dencoding)
				errorConflictingDefElem(defel, pstate);
			dencoding = defel;
		}
		else if (strcmp(defel->defname, "locale") == 0)
		{
			if (dlocale)
				errorConflictingDefElem(defel, pstate);
			dlocale = defel;
		}
		else if (strcmp(defel->defname, "lc_collate") == 0)
		{
			if (dcollate)
				errorConflictingDefElem(defel, pstate);
			dcollate = defel;
		}
		else if (strcmp(defel->defname, "lc_ctype") == 0)
		{
			if (dctype)
				errorConflictingDefElem(defel, pstate);
			dctype = defel;
		}
		else if (strcmp(defel->defname, "icu_locale") == 0)
		{
			if (diculocale)
				errorConflictingDefElem(defel, pstate);
			diculocale = defel;
		}
		else if (strcmp(defel->defname, "icu_rules") == 0)
		{
			if (dicurules)
				errorConflictingDefElem(defel, pstate);
			dicurules = defel;
		}
		else if (strcmp(defel->defname, "locale_provider") == 0)
		{
			if (dlocprovider)
				errorConflictingDefElem(defel, pstate);
			dlocprovider = defel;
		}
		else if (strcmp(defel->defname, "is_template") == 0)
		{
			if (distemplate)
				errorConflictingDefElem(defel, pstate);
			distemplate = defel;
		}
		else if (strcmp(defel->defname, "allow_connections") == 0)
		{
			if (dallowconnections)
				errorConflictingDefElem(defel, pstate);
			dallowconnections = defel;
		}
		else if (strcmp(defel->defname, "connection_limit") == 0)
		{
			if (dconnlimit)
				errorConflictingDefElem(defel, pstate);
			dconnlimit = defel;
		}
		else if (strcmp(defel->defname, "collation_version") == 0)
		{
			if (dcollversion)
				errorConflictingDefElem(defel, pstate);
			dcollversion = defel;
		}
		else if (strcmp(defel->defname, "location") == 0)
		{
			ereport(WARNING,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					 errmsg("LOCATION is not supported anymore"),
					 errhint("Consider using tablespaces instead."),
					 parser_errposition(pstate, defel->location)));
		}
		else if (strcmp(defel->defname, "oid") == 0)
		{
			dboid = defGetObjectId(defel);

			/*
			 * We don't normally permit new databases to be created with
			 * system-assigned OIDs. pg_upgrade tries to preserve database
			 * OIDs, so we can't allow any database to be created with an OID
			 * that might be in use in a freshly-initialized cluster created
			 * by some future version. We assume all such OIDs will be from
			 * the system-managed OID range.
			 *
			 * As an exception, however, we permit any OID to be assigned when
			 * allow_system_table_mods=on (so that initdb can assign system
			 * OIDs to template0 and postgres) or when performing a binary
			 * upgrade (so that pg_upgrade can preserve whatever OIDs it finds
			 * in the source cluster).
			 */
			if (dboid < FirstNormalObjectId &&
				!allowSystemTableMods && !IsBinaryUpgrade)
				ereport(ERROR,
						(errcode(ERRCODE_INVALID_PARAMETER_VALUE)),
						errmsg("OIDs less than %u are reserved for system objects", FirstNormalObjectId));
		}
		else if (strcmp(defel->defname, "strategy") == 0)
		{
			if (dstrategy)
				errorConflictingDefElem(defel, pstate);
			dstrategy = defel;
		}
		else
			ereport(ERROR,
					(errcode(ERRCODE_SYNTAX_ERROR),
					 errmsg("option \"%s\" not recognized", defel->defname),
					 parser_errposition(pstate, defel->location)));
	}

	if (downer && downer->arg)
		dbowner = defGetString(downer);
	if (dtemplate && dtemplate->arg)
		dbtemplate = defGetString(dtemplate);
	if (dencoding && dencoding->arg)
	{
		const char *encoding_name;

		if (IsA(dencoding->arg, Integer))
		{
			encoding = defGetInt32(dencoding);
			encoding_name = pg_encoding_to_char(encoding);
			if (strcmp(encoding_name, "") == 0 ||
				pg_valid_server_encoding(encoding_name) < 0)
				ereport(ERROR,
						(errcode(ERRCODE_UNDEFINED_OBJECT),
						 errmsg("%d is not a valid encoding code",
								encoding),
						 parser_errposition(pstate, dencoding->location)));
		}
		else
		{
			encoding_name = defGetString(dencoding);
			encoding = pg_valid_server_encoding(encoding_name);
			if (encoding < 0)
				ereport(ERROR,
						(errcode(ERRCODE_UNDEFINED_OBJECT),
						 errmsg("%s is not a valid encoding name",
								encoding_name),
						 parser_errposition(pstate, dencoding->location)));
		}
	}
	if (dlocale && dlocale->arg)
	{
		dbcollate = defGetString(dlocale);
		dbctype = defGetString(dlocale);
	}
	if (dcollate && dcollate->arg)
		dbcollate = defGetString(dcollate);
	if (dctype && dctype->arg)
		dbctype = defGetString(dctype);
	if (diculocale && diculocale->arg)
		dbiculocale = defGetString(diculocale);
	if (dicurules && dicurules->arg)
		dbicurules = defGetString(dicurules);
	if (dlocprovider && dlocprovider->arg)
	{
		char	   *locproviderstr = defGetString(dlocprovider);

		if (pg_strcasecmp(locproviderstr, "icu") == 0)
			dblocprovider = COLLPROVIDER_ICU;
		else if (pg_strcasecmp(locproviderstr, "libc") == 0)
			dblocprovider = COLLPROVIDER_LIBC;
		else
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
					 errmsg("unrecognized locale provider: %s",
							locproviderstr)));
	}
	if (distemplate && distemplate->arg)
		dbistemplate = defGetBoolean(distemplate);
	if (dallowconnections && dallowconnections->arg)
		dballowconnections = defGetBoolean(dallowconnections);
	if (dconnlimit && dconnlimit->arg)
	{
		dbconnlimit = defGetInt32(dconnlimit);
		if (dbconnlimit < -1)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
					 errmsg("invalid connection limit: %d", dbconnlimit)));
	}
	if (dcollversion)
		dbcollversion = defGetString(dcollversion);

	/* obtain OID of proposed owner */
	if (dbowner)
		datdba = get_role_oid(dbowner, false);
	else
		datdba = GetUserId();

	/*
	 * To create a database, must have createdb privilege and must be able to
	 * become the target role (this does not imply that the target role itself
	 * must have createdb privilege).  The latter provision guards against
	 * "giveaway" attacks.  Note that a superuser will always have both of
	 * these privileges a fortiori.
	 */
	if (!have_createdb_privilege())
		ereport(ERROR,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
				 errmsg("permission denied to create database")));

	check_can_set_role(GetUserId(), datdba);

	/*
	 * Lookup database (template) to be cloned, and obtain share lock on it.
	 * ShareLock allows two CREATE DATABASEs to work from the same template
	 * concurrently, while ensuring no one is busy dropping it in parallel
	 * (which would be Very Bad since we'd likely get an incomplete copy
	 * without knowing it).  This also prevents any new connections from being
	 * made to the source until we finish copying it, so we can be sure it
	 * won't change underneath us.
	 */
	if (!dbtemplate)
		dbtemplate = "template1";	/* Default template database name */

	if (!get_db_info(dbtemplate, ShareLock,
					 &src_dboid, &src_owner, &src_encoding,
					 &src_istemplate, &src_allowconn,
					 &src_frozenxid, &src_minmxid, &src_deftablespace,
					 &src_collate, &src_ctype, &src_iculocale, &src_icurules, &src_locprovider,
					 &src_collversion))
		ereport(ERROR,
				(errcode(ERRCODE_UNDEFINED_DATABASE),
				 errmsg("template database \"%s\" does not exist",
						dbtemplate)));

	/*
	 * Permission check: to copy a DB that's not marked datistemplate, you
	 * must be superuser or the owner thereof.
	 */
	if (!src_istemplate)
	{
		if (!object_ownercheck(DatabaseRelationId, src_dboid, GetUserId()))
			ereport(ERROR,
					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
					 errmsg("permission denied to copy database \"%s\"",
							dbtemplate)));
	}

	/* Validate the database creation strategy. */
	if (dstrategy && dstrategy->arg)
	{
		char	   *strategy;

		strategy = defGetString(dstrategy);
		if (strcmp(strategy, "wal_log") == 0)
			dbstrategy = CREATEDB_WAL_LOG;
		else if (strcmp(strategy, "file_copy") == 0)
			dbstrategy = CREATEDB_FILE_COPY;
		else
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
					 errmsg("invalid create database strategy \"%s\"", strategy),
					 errhint("Valid strategies are \"wal_log\", and \"file_copy\".")));
	}

	/* If encoding or locales are defaulted, use source's setting */
	if (encoding < 0)
		encoding = src_encoding;
	if (dbcollate == NULL)
		dbcollate = src_collate;
	if (dbctype == NULL)
		dbctype = src_ctype;
	if (dblocprovider == '\0')
		dblocprovider = src_locprovider;
	if (dbiculocale == NULL && dblocprovider == COLLPROVIDER_ICU)
		dbiculocale = src_iculocale;
	if (dbicurules == NULL && dblocprovider == COLLPROVIDER_ICU)
		dbicurules = src_icurules;

	/* Some encodings are client only */
	if (!PG_VALID_BE_ENCODING(encoding))
		ereport(ERROR,
				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
				 errmsg("invalid server encoding %d", encoding)));

	/* Check that the chosen locales are valid, and get canonical spellings */
	if (!check_locale(LC_COLLATE, dbcollate, &canonname))
		ereport(ERROR,
				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
				 errmsg("invalid locale name: \"%s\"", dbcollate)));
	dbcollate = canonname;
	if (!check_locale(LC_CTYPE, dbctype, &canonname))
		ereport(ERROR,
				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
				 errmsg("invalid locale name: \"%s\"", dbctype)));
	dbctype = canonname;

	check_encoding_locale_matches(encoding, dbcollate, dbctype);

	if (dblocprovider == COLLPROVIDER_ICU)
	{
		if (!(is_encoding_supported_by_icu(encoding)))
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
					 errmsg("encoding \"%s\" is not supported with ICU provider",
							pg_encoding_to_char(encoding))));

		/*
		 * This would happen if template0 uses the libc provider but the new
		 * database uses icu.
		 */
		if (!dbiculocale)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
					 errmsg("ICU locale must be specified")));

		/*
		 * During binary upgrade, or when the locale came from the template
		 * database, preserve locale string. Otherwise, canonicalize to a
		 * language tag.
		 */
		if (!IsBinaryUpgrade && dbiculocale != src_iculocale)
		{
			char	   *langtag = icu_language_tag(dbiculocale,
												   icu_validation_level);

			if (langtag && strcmp(dbiculocale, langtag) != 0)
			{
				ereport(NOTICE,
						(errmsg("using standard form \"%s\" for locale \"%s\"",
								langtag, dbiculocale)));

				dbiculocale = langtag;
			}
		}

		icu_validate_locale(dbiculocale);
	}
	else
	{
		if (dbiculocale)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
					 errmsg("ICU locale cannot be specified unless locale provider is ICU")));

		if (dbicurules)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
					 errmsg("ICU rules cannot be specified unless locale provider is ICU")));
	}

	/*
	 * Check that the new encoding and locale settings match the source
	 * database.  We insist on this because we simply copy the source data ---
	 * any non-ASCII data would be wrongly encoded, and any indexes sorted
	 * according to the source locale would be wrong.
	 *
	 * However, we assume that template0 doesn't contain any non-ASCII data
	 * nor any indexes that depend on collation or ctype, so template0 can be
	 * used as template for creating a database with any encoding or locale.
	 */
	if (strcmp(dbtemplate, "template0") != 0)
	{
		if (encoding != src_encoding)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
					 errmsg("new encoding (%s) is incompatible with the encoding of the template database (%s)",
							pg_encoding_to_char(encoding),
							pg_encoding_to_char(src_encoding)),
					 errhint("Use the same encoding as in the template database, or use template0 as template.")));

		if (strcmp(dbcollate, src_collate) != 0)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
					 errmsg("new collation (%s) is incompatible with the collation of the template database (%s)",
							dbcollate, src_collate),
					 errhint("Use the same collation as in the template database, or use template0 as template.")));

		if (strcmp(dbctype, src_ctype) != 0)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
					 errmsg("new LC_CTYPE (%s) is incompatible with the LC_CTYPE of the template database (%s)",
							dbctype, src_ctype),
					 errhint("Use the same LC_CTYPE as in the template database, or use template0 as template.")));

		if (dblocprovider != src_locprovider)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
					 errmsg("new locale provider (%s) does not match locale provider of the template database (%s)",
							collprovider_name(dblocprovider), collprovider_name(src_locprovider)),
					 errhint("Use the same locale provider as in the template database, or use template0 as template.")));

		if (dblocprovider == COLLPROVIDER_ICU)
		{
			char	   *val1;
			char	   *val2;

			Assert(dbiculocale);
			Assert(src_iculocale);
			if (strcmp(dbiculocale, src_iculocale) != 0)
				ereport(ERROR,
						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
						 errmsg("new ICU locale (%s) is incompatible with the ICU locale of the template database (%s)",
								dbiculocale, src_iculocale),
						 errhint("Use the same ICU locale as in the template database, or use template0 as template.")));

			val1 = dbicurules;
			if (!val1)
				val1 = "";
			val2 = src_icurules;
			if (!val2)
				val2 = "";
			if (strcmp(val1, val2) != 0)
				ereport(ERROR,
						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
						 errmsg("new ICU collation rules (%s) are incompatible with the ICU collation rules of the template database (%s)",
								val1, val2),
						 errhint("Use the same ICU collation rules as in the template database, or use template0 as template.")));
		}
	}

	/*
	 * If we got a collation version for the template database, check that it
	 * matches the actual OS collation version.  Otherwise error; the user
	 * needs to fix the template database first.  Don't complain if a
	 * collation version was specified explicitly as a statement option; that
	 * is used by pg_upgrade to reproduce the old state exactly.
	 *
	 * (If the template database has no collation version, then either the
	 * platform/provider does not support collation versioning, or it's
	 * template0, for which we stipulate that it does not contain
	 * collation-using objects.)
	 */
	if (src_collversion && !dcollversion)
	{
		char	   *actual_versionstr;

		actual_versionstr = get_collation_actual_version(dblocprovider, dblocprovider == COLLPROVIDER_ICU ? dbiculocale : dbcollate);
		if (!actual_versionstr)
			ereport(ERROR,
					(errmsg("template database \"%s\" has a collation version, but no actual collation version could be determined",
							dbtemplate)));

		if (strcmp(actual_versionstr, src_collversion) != 0)
			ereport(ERROR,
					(errmsg("template database \"%s\" has a collation version mismatch",
							dbtemplate),
					 errdetail("The template database was created using collation version %s, "
							   "but the operating system provides version %s.",
							   src_collversion, actual_versionstr),
					 errhint("Rebuild all objects in the template database that use the default collation and run "
							 "ALTER DATABASE %s REFRESH COLLATION VERSION, "
							 "or build PostgreSQL with the right library version.",
							 quote_identifier(dbtemplate))));
	}

	if (dbcollversion == NULL)
		dbcollversion = src_collversion;

	/*
	 * Normally, we copy the collation version from the template database.
	 * This last resort only applies if the template database does not have a
	 * collation version, which is normally only the case for template0.
	 */
	if (dbcollversion == NULL)
		dbcollversion = get_collation_actual_version(dblocprovider, dblocprovider == COLLPROVIDER_ICU ? dbiculocale : dbcollate);

	/* Resolve default tablespace for new database */
	if (dtablespacename && dtablespacename->arg)
	{
		char	   *tablespacename;
		AclResult	aclresult;

		tablespacename = defGetString(dtablespacename);
		dst_deftablespace = get_tablespace_oid(tablespacename, false);
		/* check permissions */
		aclresult = object_aclcheck(TableSpaceRelationId, dst_deftablespace, GetUserId(),
									ACL_CREATE);
		if (aclresult != ACLCHECK_OK)
			aclcheck_error(aclresult, OBJECT_TABLESPACE,
						   tablespacename);

		/* pg_global must never be the default tablespace */
		if (dst_deftablespace == GLOBALTABLESPACE_OID)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
					 errmsg("pg_global cannot be used as default tablespace")));

		/*
		 * If we are trying to change the default tablespace of the template,
		 * we require that the template not have any files in the new default
		 * tablespace.  This is necessary because otherwise the copied
		 * database would contain pg_class rows that refer to its default
		 * tablespace both explicitly (by OID) and implicitly (as zero), which
		 * would cause problems.  For example another CREATE DATABASE using
		 * the copied database as template, and trying to change its default
		 * tablespace again, would yield outright incorrect results (it would
		 * improperly move tables to the new default tablespace that should
		 * stay in the same tablespace).
		 */
		if (dst_deftablespace != src_deftablespace)
		{
			char	   *srcpath;
			struct stat st;

			srcpath = GetDatabasePath(src_dboid, dst_deftablespace);

			if (stat(srcpath, &st) == 0 &&
				S_ISDIR(st.st_mode) &&
				!directory_is_empty(srcpath))
				ereport(ERROR,
						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						 errmsg("cannot assign new default tablespace \"%s\"",
								tablespacename),
						 errdetail("There is a conflict because database \"%s\" already has some tables in this tablespace.",
								   dbtemplate)));
			pfree(srcpath);
		}
	}
	else
	{
		/* Use template database's default tablespace */
		dst_deftablespace = src_deftablespace;
		/* Note there is no additional permission check in this path */
	}

	/*
	 * If built with appropriate switch, whine when regression-testing
	 * conventions for database names are violated.  But don't complain during
	 * initdb.
	 */
#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
	if (IsUnderPostmaster && strstr(dbname, "regression") == NULL)
		elog(WARNING, "databases created by regression test cases should have names including \"regression\"");
#endif

	/*
	 * Check for db name conflict.  This is just to give a more friendly error
	 * message than "unique index violation".  There's a race condition but
	 * we're willing to accept the less friendly message in that case.
	 */
	if (OidIsValid(get_database_oid(dbname, true)))
		ereport(ERROR,
				(errcode(ERRCODE_DUPLICATE_DATABASE),
				 errmsg("database \"%s\" already exists", dbname)));

	/*
	 * The source DB can't have any active backends, except this one
	 * (exception is to allow CREATE DB while connected to template1).
	 * Otherwise we might copy inconsistent data.
	 *
	 * This should be last among the basic error checks, because it involves
	 * potential waiting; we may as well throw an error first if we're gonna
	 * throw one.
	 */
	if (CountOtherDBBackends(src_dboid, &notherbackends, &npreparedxacts))
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_IN_USE),
				 errmsg("source database \"%s\" is being accessed by other users",
						dbtemplate),
				 errdetail_busy_db(notherbackends, npreparedxacts)));

	/*
	 * Select an OID for the new database, checking that it doesn't have a
	 * filename conflict with anything already existing in the tablespace
	 * directories.
	 */
	pg_database_rel = table_open(DatabaseRelationId, RowExclusiveLock);

	/*
	 * If database OID is configured, check if the OID is already in use or
	 * data directory already exists.
	 */
	if (OidIsValid(dboid))
	{
		char	   *existing_dbname = get_database_name(dboid);

		if (existing_dbname != NULL)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE)),
					errmsg("database OID %u is already in use by database \"%s\"",
						   dboid, existing_dbname));

		if (check_db_file_conflict(dboid))
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE)),
					errmsg("data directory with the specified OID %u already exists", dboid));
	}
	else
	{
		/* Select an OID for the new database if is not explicitly configured. */
		do
		{
			dboid = GetNewOidWithIndex(pg_database_rel, DatabaseOidIndexId,
									   Anum_pg_database_oid);
		} while (check_db_file_conflict(dboid));
	}

	/*
	 * Insert a new tuple into pg_database.  This establishes our ownership of
	 * the new database name (anyone else trying to insert the same name will
	 * block on the unique index, and fail after we commit).
	 */

	Assert((dblocprovider == COLLPROVIDER_ICU && dbiculocale) ||
		   (dblocprovider != COLLPROVIDER_ICU && !dbiculocale));

	/* Form tuple */
	new_record[Anum_pg_database_oid - 1] = ObjectIdGetDatum(dboid);
	new_record[Anum_pg_database_datname - 1] =
		DirectFunctionCall1(namein, CStringGetDatum(dbname));
	new_record[Anum_pg_database_datdba - 1] = ObjectIdGetDatum(datdba);
	new_record[Anum_pg_database_encoding - 1] = Int32GetDatum(encoding);
	new_record[Anum_pg_database_datlocprovider - 1] = CharGetDatum(dblocprovider);
	new_record[Anum_pg_database_datistemplate - 1] = BoolGetDatum(dbistemplate);
	new_record[Anum_pg_database_datallowconn - 1] = BoolGetDatum(dballowconnections);
	new_record[Anum_pg_database_datconnlimit - 1] = Int32GetDatum(dbconnlimit);
	new_record[Anum_pg_database_datfrozenxid - 1] = TransactionIdGetDatum(src_frozenxid);
	new_record[Anum_pg_database_datminmxid - 1] = TransactionIdGetDatum(src_minmxid);
	new_record[Anum_pg_database_dattablespace - 1] = ObjectIdGetDatum(dst_deftablespace);
	new_record[Anum_pg_database_datcollate - 1] = CStringGetTextDatum(dbcollate);
	new_record[Anum_pg_database_datctype - 1] = CStringGetTextDatum(dbctype);
	if (dbiculocale)
		new_record[Anum_pg_database_daticulocale - 1] = CStringGetTextDatum(dbiculocale);
	else
		new_record_nulls[Anum_pg_database_daticulocale - 1] = true;
	if (dbicurules)
		new_record[Anum_pg_database_daticurules - 1] = CStringGetTextDatum(dbicurules);
	else
		new_record_nulls[Anum_pg_database_daticurules - 1] = true;
	if (dbcollversion)
		new_record[Anum_pg_database_datcollversion - 1] = CStringGetTextDatum(dbcollversion);
	else
		new_record_nulls[Anum_pg_database_datcollversion - 1] = true;

	/*
	 * We deliberately set datacl to default (NULL), rather than copying it
	 * from the template database.  Copying it would be a bad idea when the
	 * owner is not the same as the template's owner.
	 */
	new_record_nulls[Anum_pg_database_datacl - 1] = true;

	tuple = heap_form_tuple(RelationGetDescr(pg_database_rel),
							new_record, new_record_nulls);

	CatalogTupleInsert(pg_database_rel, tuple);

	/*
	 * Now generate additional catalog entries associated with the new DB
	 */

	/* Register owner dependency */
	recordDependencyOnOwner(DatabaseRelationId, dboid, datdba);

	/* Create pg_shdepend entries for objects within database */
	copyTemplateDependencies(src_dboid, dboid);

	/* Post creation hook for new database */
	InvokeObjectPostCreateHook(DatabaseRelationId, dboid, 0);

	/*
	 * If we're going to be reading data for the to-be-created database into
	 * shared_buffers, take a lock on it. Nobody should know that this
	 * database exists yet, but it's good to maintain the invariant that an
	 * AccessExclusiveLock on the database is sufficient to drop all of its
	 * buffers without worrying about more being read later.
	 *
	 * Note that we need to do this before entering the
	 * PG_ENSURE_ERROR_CLEANUP block below, because createdb_failure_callback
	 * expects this lock to be held already.
	 */
	if (dbstrategy == CREATEDB_WAL_LOG)
		LockSharedObject(DatabaseRelationId, dboid, 0, AccessShareLock);

	/*
	 * Once we start copying subdirectories, we need to be able to clean 'em
	 * up if we fail.  Use an ENSURE block to make sure this happens.  (This
	 * is not a 100% solution, because of the possibility of failure during
	 * transaction commit after we leave this routine, but it should handle
	 * most scenarios.)
	 */
	fparms.src_dboid = src_dboid;
	fparms.dest_dboid = dboid;
	fparms.strategy = dbstrategy;

	PG_ENSURE_ERROR_CLEANUP(createdb_failure_callback,
							PointerGetDatum(&fparms));
	{
		/*
		 * If the user has asked to create a database with WAL_LOG strategy
		 * then call CreateDatabaseUsingWalLog, which will copy the database
		 * at the block level and it will WAL log each copied block.
		 * Otherwise, call CreateDatabaseUsingFileCopy that will copy the
		 * database file by file.
		 */
		if (dbstrategy == CREATEDB_WAL_LOG)
			CreateDatabaseUsingWalLog(src_dboid, dboid, src_deftablespace,
									  dst_deftablespace);
		else
			CreateDatabaseUsingFileCopy(src_dboid, dboid, src_deftablespace,
										dst_deftablespace);

		/*
		 * Close pg_database, but keep lock till commit.
		 */
		table_close(pg_database_rel, NoLock);

		/*
		 * Force synchronous commit, thus minimizing the window between
		 * creation of the database files and committal of the transaction. If
		 * we crash before committing, we'll have a DB that's taking up disk
		 * space but is not in pg_database, which is not good.
		 */
		ForceSyncCommit();
	}
	PG_END_ENSURE_ERROR_CLEANUP(createdb_failure_callback,
								PointerGetDatum(&fparms));

	return dboid;
}

/*
 * Check whether chosen encoding matches chosen locale settings.  This
 * restriction is necessary because libc's locale-specific code usually
 * fails when presented with data in an encoding it's not expecting. We
 * allow mismatch in four cases:
 *
 * 1. locale encoding = SQL_ASCII, which means that the locale is C/POSIX
 * which works with any encoding.
 *
 * 2. locale encoding = -1, which means that we couldn't determine the
 * locale's encoding and have to trust the user to get it right.
 *
 * 3. selected encoding is UTF8 and platform is win32. This is because
 * UTF8 is a pseudo codepage that is supported in all locales since it's
 * converted to UTF16 before being used.
 *
 * 4. selected encoding is SQL_ASCII, but only if you're a superuser. This
 * is risky but we have historically allowed it --- notably, the
 * regression tests require it.
 *
 * Note: if you change this policy, fix initdb to match.
 */
void
check_encoding_locale_matches(int encoding, const char *collate, const char *ctype)
{
	int			ctype_encoding = pg_get_encoding_from_locale(ctype, true);
	int			collate_encoding = pg_get_encoding_from_locale(collate, true);

	if (!(ctype_encoding == encoding ||
		  ctype_encoding == PG_SQL_ASCII ||
		  ctype_encoding == -1 ||
#ifdef WIN32
		  encoding == PG_UTF8 ||
#endif
		  (encoding == PG_SQL_ASCII && superuser())))
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("encoding \"%s\" does not match locale \"%s\"",
						pg_encoding_to_char(encoding),
						ctype),
				 errdetail("The chosen LC_CTYPE setting requires encoding \"%s\".",
						   pg_encoding_to_char(ctype_encoding))));

	if (!(collate_encoding == encoding ||
		  collate_encoding == PG_SQL_ASCII ||
		  collate_encoding == -1 ||
#ifdef WIN32
		  encoding == PG_UTF8 ||
#endif
		  (encoding == PG_SQL_ASCII && superuser())))
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("encoding \"%s\" does not match locale \"%s\"",
						pg_encoding_to_char(encoding),
						collate),
				 errdetail("The chosen LC_COLLATE setting requires encoding \"%s\".",
						   pg_encoding_to_char(collate_encoding))));
}

/* Error cleanup callback for createdb */
static void
createdb_failure_callback(int code, Datum arg)
{
	createdb_failure_params *fparms = (createdb_failure_params *) DatumGetPointer(arg);

	/*
	 * If we were copying database at block levels then drop pages for the
	 * destination database that are in the shared buffer cache.  And tell
	 * checkpointer to forget any pending fsync and unlink requests for files
	 * in the database.  The reasoning behind doing this is same as explained
	 * in dropdb function.  But unlike dropdb we don't need to call
	 * pgstat_drop_database because this database is still not created so
	 * there should not be any stat for this.
	 */
	if (fparms->strategy == CREATEDB_WAL_LOG)
	{
		DropDatabaseBuffers(fparms->dest_dboid);
		ForgetDatabaseSyncRequests(fparms->dest_dboid);

		/* Release lock on the target database. */
		UnlockSharedObject(DatabaseRelationId, fparms->dest_dboid, 0,
						   AccessShareLock);
	}

	/*
	 * Release lock on source database before doing recursive remove. This is
	 * not essential but it seems desirable to release the lock as soon as
	 * possible.
	 */
	UnlockSharedObject(DatabaseRelationId, fparms->src_dboid, 0, ShareLock);

	/* Throw away any successfully copied subdirectories */
	remove_dbtablespaces(fparms->dest_dboid);
}


/*
 * DROP DATABASE
 */
void
dropdb(const char *dbname, bool missing_ok, bool force)
{
	Oid			db_id;
	bool		db_istemplate;
	Relation	pgdbrel;
	HeapTuple	tup;
	int			notherbackends;
	int			npreparedxacts;
	int			nslots,
				nslots_active;
	int			nsubscriptions;

	/*
	 * Look up the target database's OID, and get exclusive lock on it. We
	 * need this to ensure that no new backend starts up in the target
	 * database while we are deleting it (see postinit.c), and that no one is
	 * using it as a CREATE DATABASE template or trying to delete it for
	 * themselves.
	 */
	pgdbrel = table_open(DatabaseRelationId, RowExclusiveLock);

	if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL,
					 &db_istemplate, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
	{
		if (!missing_ok)
		{
			ereport(ERROR,
					(errcode(ERRCODE_UNDEFINED_DATABASE),
					 errmsg("database \"%s\" does not exist", dbname)));
		}
		else
		{
			/* Close pg_database, release the lock, since we changed nothing */
			table_close(pgdbrel, RowExclusiveLock);
			ereport(NOTICE,
					(errmsg("database \"%s\" does not exist, skipping",
							dbname)));
			return;
		}
	}

	/*
	 * Permission checks
	 */
	if (!object_ownercheck(DatabaseRelationId, db_id, GetUserId()))
		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
					   dbname);

	/* DROP hook for the database being removed */
	InvokeObjectDropHook(DatabaseRelationId, db_id, 0);

	/*
	 * Disallow dropping a DB that is marked istemplate.  This is just to
	 * prevent people from accidentally dropping template0 or template1; they
	 * can do so if they're really determined ...
	 */
	if (db_istemplate)
		ereport(ERROR,
				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
				 errmsg("cannot drop a template database")));

	/* Obviously can't drop my own database */
	if (db_id == MyDatabaseId)
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_IN_USE),
				 errmsg("cannot drop the currently open database")));

	/*
	 * Check whether there are active logical slots that refer to the
	 * to-be-dropped database. The database lock we are holding prevents the
	 * creation of new slots using the database or existing slots becoming
	 * active.
	 */
	(void) ReplicationSlotsCountDBSlots(db_id, &nslots, &nslots_active);
	if (nslots_active)
	{
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_IN_USE),
				 errmsg("database \"%s\" is used by an active logical replication slot",
						dbname),
				 errdetail_plural("There is %d active slot.",
								  "There are %d active slots.",
								  nslots_active, nslots_active)));
	}

	/*
	 * Check if there are subscriptions defined in the target database.
	 *
	 * We can't drop them automatically because they might be holding
	 * resources in other databases/instances.
	 */
	if ((nsubscriptions = CountDBSubscriptions(db_id)) > 0)
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_IN_USE),
				 errmsg("database \"%s\" is being used by logical replication subscription",
						dbname),
				 errdetail_plural("There is %d subscription.",
								  "There are %d subscriptions.",
								  nsubscriptions, nsubscriptions)));


	/*
	 * Attempt to terminate all existing connections to the target database if
	 * the user has requested to do so.
	 */
	if (force)
		TerminateOtherDBBackends(db_id);

	/*
	 * Check for other backends in the target database.  (Because we hold the
	 * database lock, no new ones can start after this.)
	 *
	 * As in CREATE DATABASE, check this after other error conditions.
	 */
	if (CountOtherDBBackends(db_id, &notherbackends, &npreparedxacts))
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_IN_USE),
				 errmsg("database \"%s\" is being accessed by other users",
						dbname),
				 errdetail_busy_db(notherbackends, npreparedxacts)));

	/*
	 * Remove the database's tuple from pg_database.
	 */
	tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(db_id));
	if (!HeapTupleIsValid(tup))
		elog(ERROR, "cache lookup failed for database %u", db_id);

	CatalogTupleDelete(pgdbrel, &tup->t_self);

	ReleaseSysCache(tup);

	/*
	 * Delete any comments or security labels associated with the database.
	 */
	DeleteSharedComments(db_id, DatabaseRelationId);
	DeleteSharedSecurityLabel(db_id, DatabaseRelationId);

	/*
	 * Remove settings associated with this database
	 */
	DropSetting(db_id, InvalidOid);

	/*
	 * Remove shared dependency references for the database.
	 */
	dropDatabaseDependencies(db_id);

	/*
	 * Drop db-specific replication slots.
	 */
	ReplicationSlotsDropDBSlots(db_id);

	/*
	 * Drop pages for this database that are in the shared buffer cache. This
	 * is important to ensure that no remaining backend tries to write out a
	 * dirty buffer to the dead database later...
	 */
	DropDatabaseBuffers(db_id);

	/*
	 * Tell the cumulative stats system to forget it immediately, too.
	 */
	pgstat_drop_database(db_id);

	/*
	 * Tell checkpointer to forget any pending fsync and unlink requests for
	 * files in the database; else the fsyncs will fail at next checkpoint, or
	 * worse, it will delete files that belong to a newly created database
	 * with the same OID.
	 */
	ForgetDatabaseSyncRequests(db_id);

	/*
	 * Force a checkpoint to make sure the checkpointer has received the
	 * message sent by ForgetDatabaseSyncRequests.
	 */
	RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);

	/* Close all smgr fds in all backends. */
	WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));

	/*
	 * Remove all tablespace subdirs belonging to the database.
	 */
	remove_dbtablespaces(db_id);

	/*
	 * Close pg_database, but keep lock till commit.
	 */
	table_close(pgdbrel, NoLock);

	/*
	 * Force synchronous commit, thus minimizing the window between removal of
	 * the database files and committal of the transaction. If we crash before
	 * committing, we'll have a DB that's gone on disk but still there
	 * according to pg_database, which is not good.
	 */
	ForceSyncCommit();
}


/*
 * Rename database
 */
ObjectAddress
RenameDatabase(const char *oldname, const char *newname)
{
	Oid			db_id;
	HeapTuple	newtup;
	Relation	rel;
	int			notherbackends;
	int			npreparedxacts;
	ObjectAddress address;

	/*
	 * Look up the target database's OID, and get exclusive lock on it. We
	 * need this for the same reasons as DROP DATABASE.
	 */
	rel = table_open(DatabaseRelationId, RowExclusiveLock);

	if (!get_db_info(oldname, AccessExclusiveLock, &db_id, NULL, NULL,
					 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
		ereport(ERROR,
				(errcode(ERRCODE_UNDEFINED_DATABASE),
				 errmsg("database \"%s\" does not exist", oldname)));

	/* must be owner */
	if (!object_ownercheck(DatabaseRelationId, db_id, GetUserId()))
		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
					   oldname);

	/* must have createdb rights */
	if (!have_createdb_privilege())
		ereport(ERROR,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
				 errmsg("permission denied to rename database")));

	/*
	 * If built with appropriate switch, whine when regression-testing
	 * conventions for database names are violated.
	 */
#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
	if (strstr(newname, "regression") == NULL)
		elog(WARNING, "databases created by regression test cases should have names including \"regression\"");
#endif

	/*
	 * Make sure the new name doesn't exist.  See notes for same error in
	 * CREATE DATABASE.
	 */
	if (OidIsValid(get_database_oid(newname, true)))
		ereport(ERROR,
				(errcode(ERRCODE_DUPLICATE_DATABASE),
				 errmsg("database \"%s\" already exists", newname)));

	/*
	 * XXX Client applications probably store the current database somewhere,
	 * so renaming it could cause confusion.  On the other hand, there may not
	 * be an actual problem besides a little confusion, so think about this
	 * and decide.
	 */
	if (db_id == MyDatabaseId)
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("current database cannot be renamed")));

	/*
	 * Make sure the database does not have active sessions.  This is the same
	 * concern as above, but applied to other sessions.
	 *
	 * As in CREATE DATABASE, check this after other error conditions.
	 */
	if (CountOtherDBBackends(db_id, &notherbackends, &npreparedxacts))
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_IN_USE),
				 errmsg("database \"%s\" is being accessed by other users",
						oldname),
				 errdetail_busy_db(notherbackends, npreparedxacts)));

	/* rename */
	newtup = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(db_id));
	if (!HeapTupleIsValid(newtup))
		elog(ERROR, "cache lookup failed for database %u", db_id);
	namestrcpy(&(((Form_pg_database) GETSTRUCT(newtup))->datname), newname);
	CatalogTupleUpdate(rel, &newtup->t_self, newtup);

	InvokeObjectPostAlterHook(DatabaseRelationId, db_id, 0);

	ObjectAddressSet(address, DatabaseRelationId, db_id);

	/*
	 * Close pg_database, but keep lock till commit.
	 */
	table_close(rel, NoLock);

	return address;
}


/*
 * ALTER DATABASE SET TABLESPACE
 */
static void
movedb(const char *dbname, const char *tblspcname)
{
	Oid			db_id;
	Relation	pgdbrel;
	int			notherbackends;
	int			npreparedxacts;
	HeapTuple	oldtuple,
				newtuple;
	Oid			src_tblspcoid,
				dst_tblspcoid;
	ScanKeyData scankey;
	SysScanDesc sysscan;
	AclResult	aclresult;
	char	   *src_dbpath;
	char	   *dst_dbpath;
	DIR		   *dstdir;
	struct dirent *xlde;
	movedb_failure_params fparms;

	/*
	 * Look up the target database's OID, and get exclusive lock on it. We
	 * need this to ensure that no new backend starts up in the database while
	 * we are moving it, and that no one is using it as a CREATE DATABASE
	 * template or trying to delete it.
	 */
	pgdbrel = table_open(DatabaseRelationId, RowExclusiveLock);

	if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL,
					 NULL, NULL, NULL, NULL, &src_tblspcoid, NULL, NULL, NULL, NULL, NULL, NULL))
		ereport(ERROR,
				(errcode(ERRCODE_UNDEFINED_DATABASE),
				 errmsg("database \"%s\" does not exist", dbname)));

	/*
	 * We actually need a session lock, so that the lock will persist across
	 * the commit/restart below.  (We could almost get away with letting the
	 * lock be released at commit, except that someone could try to move
	 * relations of the DB back into the old directory while we rmtree() it.)
	 */
	LockSharedObjectForSession(DatabaseRelationId, db_id, 0,
							   AccessExclusiveLock);

	/*
	 * Permission checks
	 */
	if (!object_ownercheck(DatabaseRelationId, db_id, GetUserId()))
		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
					   dbname);

	/*
	 * Obviously can't move the tables of my own database
	 */
	if (db_id == MyDatabaseId)
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_IN_USE),
				 errmsg("cannot change the tablespace of the currently open database")));

	/*
	 * Get tablespace's oid
	 */
	dst_tblspcoid = get_tablespace_oid(tblspcname, false);

	/*
	 * Permission checks
	 */
	aclresult = object_aclcheck(TableSpaceRelationId, dst_tblspcoid, GetUserId(),
								ACL_CREATE);
	if (aclresult != ACLCHECK_OK)
		aclcheck_error(aclresult, OBJECT_TABLESPACE,
					   tblspcname);

	/*
	 * pg_global must never be the default tablespace
	 */
	if (dst_tblspcoid == GLOBALTABLESPACE_OID)
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("pg_global cannot be used as default tablespace")));

	/*
	 * No-op if same tablespace
	 */
	if (src_tblspcoid == dst_tblspcoid)
	{
		table_close(pgdbrel, NoLock);
		UnlockSharedObjectForSession(DatabaseRelationId, db_id, 0,
									 AccessExclusiveLock);
		return;
	}

	/*
	 * Check for other backends in the target database.  (Because we hold the
	 * database lock, no new ones can start after this.)
	 *
	 * As in CREATE DATABASE, check this after other error conditions.
	 */
	if (CountOtherDBBackends(db_id, &notherbackends, &npreparedxacts))
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_IN_USE),
				 errmsg("database \"%s\" is being accessed by other users",
						dbname),
				 errdetail_busy_db(notherbackends, npreparedxacts)));

	/*
	 * Get old and new database paths
	 */
	src_dbpath = GetDatabasePath(db_id, src_tblspcoid);
	dst_dbpath = GetDatabasePath(db_id, dst_tblspcoid);

	/*
	 * Force a checkpoint before proceeding. This will force all dirty
	 * buffers, including those of unlogged tables, out to disk, to ensure
	 * source database is up-to-date on disk for the copy.
	 * FlushDatabaseBuffers() would suffice for that, but we also want to
	 * process any pending unlink requests. Otherwise, the check for existing
	 * files in the target directory might fail unnecessarily, not to mention
	 * that the copy might fail due to source files getting deleted under it.
	 * On Windows, this also ensures that background procs don't hold any open
	 * files, which would cause rmdir() to fail.
	 */
	RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT
					  | CHECKPOINT_FLUSH_ALL);

	/* Close all smgr fds in all backends. */
	WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));

	/*
	 * Now drop all buffers holding data of the target database; they should
	 * no longer be dirty so DropDatabaseBuffers is safe.
	 *
	 * It might seem that we could just let these buffers age out of shared
	 * buffers naturally, since they should not get referenced anymore.  The
	 * problem with that is that if the user later moves the database back to
	 * its original tablespace, any still-surviving buffers would appear to
	 * contain valid data again --- but they'd be missing any changes made in
	 * the database while it was in the new tablespace.  In any case, freeing
	 * buffers that should never be used again seems worth the cycles.
	 *
	 * Note: it'd be sufficient to get rid of buffers matching db_id and
	 * src_tblspcoid, but bufmgr.c presently provides no API for that.
	 */
	DropDatabaseBuffers(db_id);

	/*
	 * Check for existence of files in the target directory, i.e., objects of
	 * this database that are already in the target tablespace.  We can't
	 * allow the move in such a case, because we would need to change those
	 * relations' pg_class.reltablespace entries to zero, and we don't have
	 * access to the DB's pg_class to do so.
	 */
	dstdir = AllocateDir(dst_dbpath);
	if (dstdir != NULL)
	{
		while ((xlde = ReadDir(dstdir, dst_dbpath)) != NULL)
		{
			if (strcmp(xlde->d_name, ".") == 0 ||
				strcmp(xlde->d_name, "..") == 0)
				continue;

			ereport(ERROR,
					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
					 errmsg("some relations of database \"%s\" are already in tablespace \"%s\"",
							dbname, tblspcname),
					 errhint("You must move them back to the database's default tablespace before using this command.")));
		}

		FreeDir(dstdir);

		/*
		 * The directory exists but is empty. We must remove it before using
		 * the copydir function.
		 */
		if (rmdir(dst_dbpath) != 0)
			elog(ERROR, "could not remove directory \"%s\": %m",
				 dst_dbpath);
	}

	/*
	 * Use an ENSURE block to make sure we remove the debris if the copy fails
	 * (eg, due to out-of-disk-space).  This is not a 100% solution, because
	 * of the possibility of failure during transaction commit, but it should
	 * handle most scenarios.
	 */
	fparms.dest_dboid = db_id;
	fparms.dest_tsoid = dst_tblspcoid;
	PG_ENSURE_ERROR_CLEANUP(movedb_failure_callback,
							PointerGetDatum(&fparms));
	{
		Datum		new_record[Natts_pg_database] = {0};
		bool		new_record_nulls[Natts_pg_database] = {0};
		bool		new_record_repl[Natts_pg_database] = {0};

		/*
		 * Copy files from the old tablespace to the new one
		 */
		copydir(src_dbpath, dst_dbpath, false);

		/*
		 * Record the filesystem change in XLOG
		 */
		{
			xl_dbase_create_file_copy_rec xlrec;

			xlrec.db_id = db_id;
			xlrec.tablespace_id = dst_tblspcoid;
			xlrec.src_db_id = db_id;
			xlrec.src_tablespace_id = src_tblspcoid;

			XLogBeginInsert();
			XLogRegisterData((char *) &xlrec,
							 sizeof(xl_dbase_create_file_copy_rec));

			(void) XLogInsert(RM_DBASE_ID,
							  XLOG_DBASE_CREATE_FILE_COPY | XLR_SPECIAL_REL_UPDATE);
		}

		/*
		 * Update the database's pg_database tuple
		 */
		ScanKeyInit(&scankey,
					Anum_pg_database_datname,
					BTEqualStrategyNumber, F_NAMEEQ,
					CStringGetDatum(dbname));
		sysscan = systable_beginscan(pgdbrel, DatabaseNameIndexId, true,
									 NULL, 1, &scankey);
		oldtuple = systable_getnext(sysscan);
		if (!HeapTupleIsValid(oldtuple))	/* shouldn't happen... */
			ereport(ERROR,
					(errcode(ERRCODE_UNDEFINED_DATABASE),
					 errmsg("database \"%s\" does not exist", dbname)));

		new_record[Anum_pg_database_dattablespace - 1] = ObjectIdGetDatum(dst_tblspcoid);
		new_record_repl[Anum_pg_database_dattablespace - 1] = true;

		newtuple = heap_modify_tuple(oldtuple, RelationGetDescr(pgdbrel),
									 new_record,
									 new_record_nulls, new_record_repl);
		CatalogTupleUpdate(pgdbrel, &oldtuple->t_self, newtuple);

		InvokeObjectPostAlterHook(DatabaseRelationId, db_id, 0);

		systable_endscan(sysscan);

		/*
		 * Force another checkpoint here.  As in CREATE DATABASE, this is to
		 * ensure that we don't have to replay a committed
		 * XLOG_DBASE_CREATE_FILE_COPY operation, which would cause us to lose
		 * any unlogged operations done in the new DB tablespace before the
		 * next checkpoint.
		 */
		RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);

		/*
		 * Force synchronous commit, thus minimizing the window between
		 * copying the database files and committal of the transaction. If we
		 * crash before committing, we'll leave an orphaned set of files on
		 * disk, which is not fatal but not good either.
		 */
		ForceSyncCommit();

		/*
		 * Close pg_database, but keep lock till commit.
		 */
		table_close(pgdbrel, NoLock);
	}
	PG_END_ENSURE_ERROR_CLEANUP(movedb_failure_callback,
								PointerGetDatum(&fparms));

	/*
	 * Commit the transaction so that the pg_database update is committed. If
	 * we crash while removing files, the database won't be corrupt, we'll
	 * just leave some orphaned files in the old directory.
	 *
	 * (This is OK because we know we aren't inside a transaction block.)
	 *
	 * XXX would it be safe/better to do this inside the ensure block?	Not
	 * convinced it's a good idea; consider elog just after the transaction
	 * really commits.
	 */
	PopActiveSnapshot();
	CommitTransactionCommand();

	/* Start new transaction for the remaining work; don't need a snapshot */
	StartTransactionCommand();

	/*
	 * Remove files from the old tablespace
	 */
	if (!rmtree(src_dbpath, true))
		ereport(WARNING,
				(errmsg("some useless files may be left behind in old database directory \"%s\"",
						src_dbpath)));

	/*
	 * Record the filesystem change in XLOG
	 */
	{
		xl_dbase_drop_rec xlrec;

		xlrec.db_id = db_id;
		xlrec.ntablespaces = 1;

		XLogBeginInsert();
		XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_drop_rec));
		XLogRegisterData((char *) &src_tblspcoid, sizeof(Oid));

		(void) XLogInsert(RM_DBASE_ID,
						  XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE);
	}

	/* Now it's safe to release the database lock */
	UnlockSharedObjectForSession(DatabaseRelationId, db_id, 0,
								 AccessExclusiveLock);

	pfree(src_dbpath);
	pfree(dst_dbpath);
}

/* Error cleanup callback for movedb */
static void
movedb_failure_callback(int code, Datum arg)
{
	movedb_failure_params *fparms = (movedb_failure_params *) DatumGetPointer(arg);
	char	   *dstpath;

	/* Get rid of anything we managed to copy to the target directory */
	dstpath = GetDatabasePath(fparms->dest_dboid, fparms->dest_tsoid);

	(void) rmtree(dstpath, true);

	pfree(dstpath);
}

/*
 * Process options and call dropdb function.
 */
void
DropDatabase(ParseState *pstate, DropdbStmt *stmt)
{
	bool		force = false;
	ListCell   *lc;

	foreach(lc, stmt->options)
	{
		DefElem    *opt = (DefElem *) lfirst(lc);

		if (strcmp(opt->defname, "force") == 0)
			force = true;
		else
			ereport(ERROR,
					(errcode(ERRCODE_SYNTAX_ERROR),
					 errmsg("unrecognized DROP DATABASE option \"%s\"", opt->defname),
					 parser_errposition(pstate, opt->location)));
	}

	dropdb(stmt->dbname, stmt->missing_ok, force);
}

/*
 * ALTER DATABASE name ...
 */
Oid
AlterDatabase(ParseState *pstate, AlterDatabaseStmt *stmt, bool isTopLevel)
{
	Relation	rel;
	Oid			dboid;
	HeapTuple	tuple,
				newtuple;
	Form_pg_database datform;
	ScanKeyData scankey;
	SysScanDesc scan;
	ListCell   *option;
	bool		dbistemplate = false;
	bool		dballowconnections = true;
	int			dbconnlimit = -1;
	DefElem    *distemplate = NULL;
	DefElem    *dallowconnections = NULL;
	DefElem    *dconnlimit = NULL;
	DefElem    *dtablespace = NULL;
	Datum		new_record[Natts_pg_database] = {0};
	bool		new_record_nulls[Natts_pg_database] = {0};
	bool		new_record_repl[Natts_pg_database] = {0};

	/* Extract options from the statement node tree */
	foreach(option, stmt->options)
	{
		DefElem    *defel = (DefElem *) lfirst(option);

		if (strcmp(defel->defname, "is_template") == 0)
		{
			if (distemplate)
				errorConflictingDefElem(defel, pstate);
			distemplate = defel;
		}
		else if (strcmp(defel->defname, "allow_connections") == 0)
		{
			if (dallowconnections)
				errorConflictingDefElem(defel, pstate);
			dallowconnections = defel;
		}
		else if (strcmp(defel->defname, "connection_limit") == 0)
		{
			if (dconnlimit)
				errorConflictingDefElem(defel, pstate);
			dconnlimit = defel;
		}
		else if (strcmp(defel->defname, "tablespace") == 0)
		{
			if (dtablespace)
				errorConflictingDefElem(defel, pstate);
			dtablespace = defel;
		}
		else
			ereport(ERROR,
					(errcode(ERRCODE_SYNTAX_ERROR),
					 errmsg("option \"%s\" not recognized", defel->defname),
					 parser_errposition(pstate, defel->location)));
	}

	if (dtablespace)
	{
		/*
		 * While the SET TABLESPACE syntax doesn't allow any other options,
		 * somebody could write "WITH TABLESPACE ...".  Forbid any other
		 * options from being specified in that case.
		 */
		if (list_length(stmt->options) != 1)
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					 errmsg("option \"%s\" cannot be specified with other options",
							dtablespace->defname),
					 parser_errposition(pstate, dtablespace->location)));
		/* this case isn't allowed within a transaction block */
		PreventInTransactionBlock(isTopLevel, "ALTER DATABASE SET TABLESPACE");
		movedb(stmt->dbname, defGetString(dtablespace));
		return InvalidOid;
	}

	if (distemplate && distemplate->arg)
		dbistemplate = defGetBoolean(distemplate);
	if (dallowconnections && dallowconnections->arg)
		dballowconnections = defGetBoolean(dallowconnections);
	if (dconnlimit && dconnlimit->arg)
	{
		dbconnlimit = defGetInt32(dconnlimit);
		if (dbconnlimit < -1)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
					 errmsg("invalid connection limit: %d", dbconnlimit)));
	}

	/*
	 * Get the old tuple.  We don't need a lock on the database per se,
	 * because we're not going to do anything that would mess up incoming
	 * connections.
	 */
	rel = table_open(DatabaseRelationId, RowExclusiveLock);
	ScanKeyInit(&scankey,
				Anum_pg_database_datname,
				BTEqualStrategyNumber, F_NAMEEQ,
				CStringGetDatum(stmt->dbname));
	scan = systable_beginscan(rel, DatabaseNameIndexId, true,
							  NULL, 1, &scankey);
	tuple = systable_getnext(scan);
	if (!HeapTupleIsValid(tuple))
		ereport(ERROR,
				(errcode(ERRCODE_UNDEFINED_DATABASE),
				 errmsg("database \"%s\" does not exist", stmt->dbname)));

	datform = (Form_pg_database) GETSTRUCT(tuple);
	dboid = datform->oid;

	if (!object_ownercheck(DatabaseRelationId, dboid, GetUserId()))
		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
					   stmt->dbname);

	/*
	 * In order to avoid getting locked out and having to go through
	 * standalone mode, we refuse to disallow connections to the database
	 * we're currently connected to.  Lockout can still happen with concurrent
	 * sessions but the likeliness of that is not high enough to worry about.
	 */
	if (!dballowconnections && dboid == MyDatabaseId)
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("cannot disallow connections for current database")));

	/*
	 * Build an updated tuple, perusing the information just obtained
	 */
	if (distemplate)
	{
		new_record[Anum_pg_database_datistemplate - 1] = BoolGetDatum(dbistemplate);
		new_record_repl[Anum_pg_database_datistemplate - 1] = true;
	}
	if (dallowconnections)
	{
		new_record[Anum_pg_database_datallowconn - 1] = BoolGetDatum(dballowconnections);
		new_record_repl[Anum_pg_database_datallowconn - 1] = true;
	}
	if (dconnlimit)
	{
		new_record[Anum_pg_database_datconnlimit - 1] = Int32GetDatum(dbconnlimit);
		new_record_repl[Anum_pg_database_datconnlimit - 1] = true;
	}

	newtuple = heap_modify_tuple(tuple, RelationGetDescr(rel), new_record,
								 new_record_nulls, new_record_repl);
	CatalogTupleUpdate(rel, &tuple->t_self, newtuple);

	InvokeObjectPostAlterHook(DatabaseRelationId, dboid, 0);

	systable_endscan(scan);

	/* Close pg_database, but keep lock till commit */
	table_close(rel, NoLock);

	return dboid;
}


/*
 * ALTER DATABASE name REFRESH COLLATION VERSION
 */
ObjectAddress
AlterDatabaseRefreshColl(AlterDatabaseRefreshCollStmt *stmt)
{
	Relation	rel;
	ScanKeyData scankey;
	SysScanDesc scan;
	Oid			db_id;
	HeapTuple	tuple;
	Form_pg_database datForm;
	ObjectAddress address;
	Datum		datum;
	bool		isnull;
	char	   *oldversion;
	char	   *newversion;

	rel = table_open(DatabaseRelationId, RowExclusiveLock);
	ScanKeyInit(&scankey,
				Anum_pg_database_datname,
				BTEqualStrategyNumber, F_NAMEEQ,
				CStringGetDatum(stmt->dbname));
	scan = systable_beginscan(rel, DatabaseNameIndexId, true,
							  NULL, 1, &scankey);
	tuple = systable_getnext(scan);
	if (!HeapTupleIsValid(tuple))
		ereport(ERROR,
				(errcode(ERRCODE_UNDEFINED_DATABASE),
				 errmsg("database \"%s\" does not exist", stmt->dbname)));

	datForm = (Form_pg_database) GETSTRUCT(tuple);
	db_id = datForm->oid;

	if (!object_ownercheck(DatabaseRelationId, db_id, GetUserId()))
		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
					   stmt->dbname);

	datum = heap_getattr(tuple, Anum_pg_database_datcollversion, RelationGetDescr(rel), &isnull);
	oldversion = isnull ? NULL : TextDatumGetCString(datum);

	datum = heap_getattr(tuple, datForm->datlocprovider == COLLPROVIDER_ICU ? Anum_pg_database_daticulocale : Anum_pg_database_datcollate, RelationGetDescr(rel), &isnull);
	if (isnull)
		elog(ERROR, "unexpected null in pg_database");
	newversion = get_collation_actual_version(datForm->datlocprovider, TextDatumGetCString(datum));

	/* cannot change from NULL to non-NULL or vice versa */
	if ((!oldversion && newversion) || (oldversion && !newversion))
		elog(ERROR, "invalid collation version change");
	else if (oldversion && newversion && strcmp(newversion, oldversion) != 0)
	{
		bool		nulls[Natts_pg_database] = {0};
		bool		replaces[Natts_pg_database] = {0};
		Datum		values[Natts_pg_database] = {0};

		ereport(NOTICE,
				(errmsg("changing version from %s to %s",
						oldversion, newversion)));

		values[Anum_pg_database_datcollversion - 1] = CStringGetTextDatum(newversion);
		replaces[Anum_pg_database_datcollversion - 1] = true;

		tuple = heap_modify_tuple(tuple, RelationGetDescr(rel),
								  values, nulls, replaces);
		CatalogTupleUpdate(rel, &tuple->t_self, tuple);
		heap_freetuple(tuple);
	}
	else
		ereport(NOTICE,
				(errmsg("version has not changed")));

	InvokeObjectPostAlterHook(DatabaseRelationId, db_id, 0);

	ObjectAddressSet(address, DatabaseRelationId, db_id);

	systable_endscan(scan);

	table_close(rel, NoLock);

	return address;
}


/*
 * ALTER DATABASE name SET ...
 */
Oid
AlterDatabaseSet(AlterDatabaseSetStmt *stmt)
{
	Oid			datid = get_database_oid(stmt->dbname, false);

	/*
	 * Obtain a lock on the database and make sure it didn't go away in the
	 * meantime.
	 */
	shdepLockAndCheckObject(DatabaseRelationId, datid);

	if (!object_ownercheck(DatabaseRelationId, datid, GetUserId()))
		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
					   stmt->dbname);

	AlterSetting(datid, InvalidOid, stmt->setstmt);

	UnlockSharedObject(DatabaseRelationId, datid, 0, AccessShareLock);

	return datid;
}


/*
 * ALTER DATABASE name OWNER TO newowner
 */
ObjectAddress
AlterDatabaseOwner(const char *dbname, Oid newOwnerId)
{
	Oid			db_id;
	HeapTuple	tuple;
	Relation	rel;
	ScanKeyData scankey;
	SysScanDesc scan;
	Form_pg_database datForm;
	ObjectAddress address;

	/*
	 * Get the old tuple.  We don't need a lock on the database per se,
	 * because we're not going to do anything that would mess up incoming
	 * connections.
	 */
	rel = table_open(DatabaseRelationId, RowExclusiveLock);
	ScanKeyInit(&scankey,
				Anum_pg_database_datname,
				BTEqualStrategyNumber, F_NAMEEQ,
				CStringGetDatum(dbname));
	scan = systable_beginscan(rel, DatabaseNameIndexId, true,
							  NULL, 1, &scankey);
	tuple = systable_getnext(scan);
	if (!HeapTupleIsValid(tuple))
		ereport(ERROR,
				(errcode(ERRCODE_UNDEFINED_DATABASE),
				 errmsg("database \"%s\" does not exist", dbname)));

	datForm = (Form_pg_database) GETSTRUCT(tuple);
	db_id = datForm->oid;

	/*
	 * If the new owner is the same as the existing owner, consider the
	 * command to have succeeded.  This is to be consistent with other
	 * objects.
	 */
	if (datForm->datdba != newOwnerId)
	{
		Datum		repl_val[Natts_pg_database];
		bool		repl_null[Natts_pg_database] = {0};
		bool		repl_repl[Natts_pg_database] = {0};
		Acl		   *newAcl;
		Datum		aclDatum;
		bool		isNull;
		HeapTuple	newtuple;

		/* Otherwise, must be owner of the existing object */
		if (!object_ownercheck(DatabaseRelationId, db_id, GetUserId()))
			aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
						   dbname);

		/* Must be able to become new owner */
		check_can_set_role(GetUserId(), newOwnerId);

		/*
		 * must have createdb rights
		 *
		 * NOTE: This is different from other alter-owner checks in that the
		 * current user is checked for createdb privileges instead of the
		 * destination owner.  This is consistent with the CREATE case for
		 * databases.  Because superusers will always have this right, we need
		 * no special case for them.
		 */
		if (!have_createdb_privilege())
			ereport(ERROR,
					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
					 errmsg("permission denied to change owner of database")));

		repl_repl[Anum_pg_database_datdba - 1] = true;
		repl_val[Anum_pg_database_datdba - 1] = ObjectIdGetDatum(newOwnerId);

		/*
		 * Determine the modified ACL for the new owner.  This is only
		 * necessary when the ACL is non-null.
		 */
		aclDatum = heap_getattr(tuple,
								Anum_pg_database_datacl,
								RelationGetDescr(rel),
								&isNull);
		if (!isNull)
		{
			newAcl = aclnewowner(DatumGetAclP(aclDatum),
								 datForm->datdba, newOwnerId);
			repl_repl[Anum_pg_database_datacl - 1] = true;
			repl_val[Anum_pg_database_datacl - 1] = PointerGetDatum(newAcl);
		}

		newtuple = heap_modify_tuple(tuple, RelationGetDescr(rel), repl_val, repl_null, repl_repl);
		CatalogTupleUpdate(rel, &newtuple->t_self, newtuple);

		heap_freetuple(newtuple);

		/* Update owner dependency reference */
		changeDependencyOnOwner(DatabaseRelationId, db_id, newOwnerId);
	}

	InvokeObjectPostAlterHook(DatabaseRelationId, db_id, 0);

	ObjectAddressSet(address, DatabaseRelationId, db_id);

	systable_endscan(scan);

	/* Close pg_database, but keep lock till commit */
	table_close(rel, NoLock);

	return address;
}


Datum
pg_database_collation_actual_version(PG_FUNCTION_ARGS)
{
	Oid			dbid = PG_GETARG_OID(0);
	HeapTuple	tp;
	char		datlocprovider;
	Datum		datum;
	char	   *version;

	tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dbid));
	if (!HeapTupleIsValid(tp))
		ereport(ERROR,
				(errcode(ERRCODE_UNDEFINED_OBJECT),
				 errmsg("database with OID %u does not exist", dbid)));

	datlocprovider = ((Form_pg_database) GETSTRUCT(tp))->datlocprovider;

	datum = SysCacheGetAttrNotNull(DATABASEOID, tp, datlocprovider == COLLPROVIDER_ICU ? Anum_pg_database_daticulocale : Anum_pg_database_datcollate);
	version = get_collation_actual_version(datlocprovider, TextDatumGetCString(datum));

	ReleaseSysCache(tp);

	if (version)
		PG_RETURN_TEXT_P(cstring_to_text(version));
	else
		PG_RETURN_NULL();
}


/*
 * Helper functions
 */

/*
 * Look up info about the database named "name".  If the database exists,
 * obtain the specified lock type on it, fill in any of the remaining
 * parameters that aren't NULL, and return true.  If no such database,
 * return false.
 */
static bool
get_db_info(const char *name, LOCKMODE lockmode,
			Oid *dbIdP, Oid *ownerIdP,
			int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP,
			TransactionId *dbFrozenXidP, MultiXactId *dbMinMultiP,
			Oid *dbTablespace, char **dbCollate, char **dbCtype, char **dbIculocale,
			char **dbIcurules,
			char *dbLocProvider,
			char **dbCollversion)
{
	bool		result = false;
	Relation	relation;

	Assert(name);

	/* Caller may wish to grab a better lock on pg_database beforehand... */
	relation = table_open(DatabaseRelationId, AccessShareLock);

	/*
	 * Loop covers the rare case where the database is renamed before we can
	 * lock it.  We try again just in case we can find a new one of the same
	 * name.
	 */
	for (;;)
	{
		ScanKeyData scanKey;
		SysScanDesc scan;
		HeapTuple	tuple;
		Oid			dbOid;

		/*
		 * there's no syscache for database-indexed-by-name, so must do it the
		 * hard way
		 */
		ScanKeyInit(&scanKey,
					Anum_pg_database_datname,
					BTEqualStrategyNumber, F_NAMEEQ,
					CStringGetDatum(name));

		scan = systable_beginscan(relation, DatabaseNameIndexId, true,
								  NULL, 1, &scanKey);

		tuple = systable_getnext(scan);

		if (!HeapTupleIsValid(tuple))
		{
			/* definitely no database of that name */
			systable_endscan(scan);
			break;
		}

		dbOid = ((Form_pg_database) GETSTRUCT(tuple))->oid;

		systable_endscan(scan);

		/*
		 * Now that we have a database OID, we can try to lock the DB.
		 */
		if (lockmode != NoLock)
			LockSharedObject(DatabaseRelationId, dbOid, 0, lockmode);

		/*
		 * And now, re-fetch the tuple by OID.  If it's still there and still
		 * the same name, we win; else, drop the lock and loop back to try
		 * again.
		 */
		tuple = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dbOid));
		if (HeapTupleIsValid(tuple))
		{
			Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple);

			if (strcmp(name, NameStr(dbform->datname)) == 0)
			{
				Datum		datum;
				bool		isnull;

				/* oid of the database */
				if (dbIdP)
					*dbIdP = dbOid;
				/* oid of the owner */
				if (ownerIdP)
					*ownerIdP = dbform->datdba;
				/* character encoding */
				if (encodingP)
					*encodingP = dbform->encoding;
				/* allowed as template? */
				if (dbIsTemplateP)
					*dbIsTemplateP = dbform->datistemplate;
				/* allowing connections? */
				if (dbAllowConnP)
					*dbAllowConnP = dbform->datallowconn;
				/* limit of frozen XIDs */
				if (dbFrozenXidP)
					*dbFrozenXidP = dbform->datfrozenxid;
				/* minimum MultiXactId */
				if (dbMinMultiP)
					*dbMinMultiP = dbform->datminmxid;
				/* default tablespace for this database */
				if (dbTablespace)
					*dbTablespace = dbform->dattablespace;
				/* default locale settings for this database */
				if (dbLocProvider)
					*dbLocProvider = dbform->datlocprovider;
				if (dbCollate)
				{
					datum = SysCacheGetAttrNotNull(DATABASEOID, tuple, Anum_pg_database_datcollate);
					*dbCollate = TextDatumGetCString(datum);
				}
				if (dbCtype)
				{
					datum = SysCacheGetAttrNotNull(DATABASEOID, tuple, Anum_pg_database_datctype);
					*dbCtype = TextDatumGetCString(datum);
				}
				if (dbIculocale)
				{
					datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_daticulocale, &isnull);
					if (isnull)
						*dbIculocale = NULL;
					else
						*dbIculocale = TextDatumGetCString(datum);
				}
				if (dbIcurules)
				{
					datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_daticurules, &isnull);
					if (isnull)
						*dbIcurules = NULL;
					else
						*dbIcurules = TextDatumGetCString(datum);
				}
				if (dbCollversion)
				{
					datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_datcollversion, &isnull);
					if (isnull)
						*dbCollversion = NULL;
					else
						*dbCollversion = TextDatumGetCString(datum);
				}
				ReleaseSysCache(tuple);
				result = true;
				break;
			}
			/* can only get here if it was just renamed */
			ReleaseSysCache(tuple);
		}

		if (lockmode != NoLock)
			UnlockSharedObject(DatabaseRelationId, dbOid, 0, lockmode);
	}

	table_close(relation, AccessShareLock);

	return result;
}

/* Check if current user has createdb privileges */
bool
have_createdb_privilege(void)
{
	bool		result = false;
	HeapTuple	utup;

	/* Superusers can always do everything */
	if (superuser())
		return true;

	utup = SearchSysCache1(AUTHOID, ObjectIdGetDatum(GetUserId()));
	if (HeapTupleIsValid(utup))
	{
		result = ((Form_pg_authid) GETSTRUCT(utup))->rolcreatedb;
		ReleaseSysCache(utup);
	}
	return result;
}

/*
 * Remove tablespace directories
 *
 * We don't know what tablespaces db_id is using, so iterate through all
 * tablespaces removing <tablespace>/db_id
 */
static void
remove_dbtablespaces(Oid db_id)
{
	Relation	rel;
	TableScanDesc scan;
	HeapTuple	tuple;
	List	   *ltblspc = NIL;
	ListCell   *cell;
	int			ntblspc;
	int			i;
	Oid		   *tablespace_ids;

	rel = table_open(TableSpaceRelationId, AccessShareLock);
	scan = table_beginscan_catalog(rel, 0, NULL);
	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
	{
		Form_pg_tablespace spcform = (Form_pg_tablespace) GETSTRUCT(tuple);
		Oid			dsttablespace = spcform->oid;
		char	   *dstpath;
		struct stat st;

		/* Don't mess with the global tablespace */
		if (dsttablespace == GLOBALTABLESPACE_OID)
			continue;

		dstpath = GetDatabasePath(db_id, dsttablespace);

		if (lstat(dstpath, &st) < 0 || !S_ISDIR(st.st_mode))
		{
			/* Assume we can ignore it */
			pfree(dstpath);
			continue;
		}

		if (!rmtree(dstpath, true))
			ereport(WARNING,
					(errmsg("some useless files may be left behind in old database directory \"%s\"",
							dstpath)));

		ltblspc = lappend_oid(ltblspc, dsttablespace);
		pfree(dstpath);
	}

	ntblspc = list_length(ltblspc);
	if (ntblspc == 0)
	{
		table_endscan(scan);
		table_close(rel, AccessShareLock);
		return;
	}

	tablespace_ids = (Oid *) palloc(ntblspc * sizeof(Oid));
	i = 0;
	foreach(cell, ltblspc)
		tablespace_ids[i++] = lfirst_oid(cell);

	/* Record the filesystem change in XLOG */
	{
		xl_dbase_drop_rec xlrec;

		xlrec.db_id = db_id;
		xlrec.ntablespaces = ntblspc;

		XLogBeginInsert();
		XLogRegisterData((char *) &xlrec, MinSizeOfDbaseDropRec);
		XLogRegisterData((char *) tablespace_ids, ntblspc * sizeof(Oid));

		(void) XLogInsert(RM_DBASE_ID,
						  XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE);
	}

	list_free(ltblspc);
	pfree(tablespace_ids);

	table_endscan(scan);
	table_close(rel, AccessShareLock);
}

/*
 * Check for existing files that conflict with a proposed new DB OID;
 * return true if there are any
 *
 * If there were a subdirectory in any tablespace matching the proposed new
 * OID, we'd get a create failure due to the duplicate name ... and then we'd
 * try to remove that already-existing subdirectory during the cleanup in
 * remove_dbtablespaces.  Nuking existing files seems like a bad idea, so
 * instead we make this extra check before settling on the OID of the new
 * database.  This exactly parallels what GetNewRelFileNumber() does for table
 * relfilenumber values.
 */
static bool
check_db_file_conflict(Oid db_id)
{
	bool		result = false;
	Relation	rel;
	TableScanDesc scan;
	HeapTuple	tuple;

	rel = table_open(TableSpaceRelationId, AccessShareLock);
	scan = table_beginscan_catalog(rel, 0, NULL);
	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
	{
		Form_pg_tablespace spcform = (Form_pg_tablespace) GETSTRUCT(tuple);
		Oid			dsttablespace = spcform->oid;
		char	   *dstpath;
		struct stat st;

		/* Don't mess with the global tablespace */
		if (dsttablespace == GLOBALTABLESPACE_OID)
			continue;

		dstpath = GetDatabasePath(db_id, dsttablespace);

		if (lstat(dstpath, &st) == 0)
		{
			/* Found a conflicting file (or directory, whatever) */
			pfree(dstpath);
			result = true;
			break;
		}

		pfree(dstpath);
	}

	table_endscan(scan);
	table_close(rel, AccessShareLock);

	return result;
}

/*
 * Issue a suitable errdetail message for a busy database
 */
static int
errdetail_busy_db(int notherbackends, int npreparedxacts)
{
	if (notherbackends > 0 && npreparedxacts > 0)

		/*
		 * We don't deal with singular versus plural here, since gettext
		 * doesn't support multiple plurals in one string.
		 */
		errdetail("There are %d other session(s) and %d prepared transaction(s) using the database.",
				  notherbackends, npreparedxacts);
	else if (notherbackends > 0)
		errdetail_plural("There is %d other session using the database.",
						 "There are %d other sessions using the database.",
						 notherbackends,
						 notherbackends);
	else
		errdetail_plural("There is %d prepared transaction using the database.",
						 "There are %d prepared transactions using the database.",
						 npreparedxacts,
						 npreparedxacts);
	return 0;					/* just to keep ereport macro happy */
}

/*
 * get_database_oid - given a database name, look up the OID
 *
 * If missing_ok is false, throw an error if database name not found.  If
 * true, just return InvalidOid.
 */
Oid
get_database_oid(const char *dbname, bool missing_ok)
{
	Relation	pg_database;
	ScanKeyData entry[1];
	SysScanDesc scan;
	HeapTuple	dbtuple;
	Oid			oid;

	/*
	 * There's no syscache for pg_database indexed by name, so we must look
	 * the hard way.
	 */
	pg_database = table_open(DatabaseRelationId, AccessShareLock);
	ScanKeyInit(&entry[0],
				Anum_pg_database_datname,
				BTEqualStrategyNumber, F_NAMEEQ,
				CStringGetDatum(dbname));
	scan = systable_beginscan(pg_database, DatabaseNameIndexId, true,
							  NULL, 1, entry);

	dbtuple = systable_getnext(scan);

	/* We assume that there can be at most one matching tuple */
	if (HeapTupleIsValid(dbtuple))
		oid = ((Form_pg_database) GETSTRUCT(dbtuple))->oid;
	else
		oid = InvalidOid;

	systable_endscan(scan);
	table_close(pg_database, AccessShareLock);

	if (!OidIsValid(oid) && !missing_ok)
		ereport(ERROR,
				(errcode(ERRCODE_UNDEFINED_DATABASE),
				 errmsg("database \"%s\" does not exist",
						dbname)));

	return oid;
}


/*
 * get_database_name - given a database OID, look up the name
 *
 * Returns a palloc'd string, or NULL if no such database.
 */
char *
get_database_name(Oid dbid)
{
	HeapTuple	dbtuple;
	char	   *result;

	dbtuple = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dbid));
	if (HeapTupleIsValid(dbtuple))
	{
		result = pstrdup(NameStr(((Form_pg_database) GETSTRUCT(dbtuple))->datname));
		ReleaseSysCache(dbtuple);
	}
	else
		result = NULL;

	return result;
}

/*
 * recovery_create_dbdir()
 *
 * During recovery, there's a case where we validly need to recover a missing
 * tablespace directory so that recovery can continue.  This happens when
 * recovery wants to create a database but the holding tablespace has been
 * removed before the server stopped.  Since we expect that the directory will
 * be gone before reaching recovery consistency, and we have no knowledge about
 * the tablespace other than its OID here, we create a real directory under
 * pg_tblspc here instead of restoring the symlink.
 *
 * If only_tblspc is true, then the requested directory must be in pg_tblspc/
 */
static void
recovery_create_dbdir(char *path, bool only_tblspc)
{
	struct stat st;

	Assert(RecoveryInProgress());

	if (stat(path, &st) == 0)
		return;

	if (only_tblspc && strstr(path, "pg_tblspc/") == NULL)
		elog(PANIC, "requested to created invalid directory: %s", path);

	if (reachedConsistency && !allow_in_place_tablespaces)
		ereport(PANIC,
				errmsg("missing directory \"%s\"", path));

	elog(reachedConsistency ? WARNING : DEBUG1,
		 "creating missing directory: %s", path);

	if (pg_mkdir_p(path, pg_dir_create_mode) != 0)
		ereport(PANIC,
				errmsg("could not create missing directory \"%s\": %m", path));
}


/*
 * DATABASE resource manager's routines
 */
void
dbase_redo(XLogReaderState *record)
{
	uint8		info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;

	/* Backup blocks are not used in dbase records */
	Assert(!XLogRecHasAnyBlockRefs(record));

	if (info == XLOG_DBASE_CREATE_FILE_COPY)
	{
		xl_dbase_create_file_copy_rec *xlrec =
			(xl_dbase_create_file_copy_rec *) XLogRecGetData(record);
		char	   *src_path;
		char	   *dst_path;
		char	   *parent_path;
		struct stat st;

		src_path = GetDatabasePath(xlrec->src_db_id, xlrec->src_tablespace_id);
		dst_path = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id);

		/*
		 * Our theory for replaying a CREATE is to forcibly drop the target
		 * subdirectory if present, then re-copy the source data. This may be
		 * more work than needed, but it is simple to implement.
		 */
		if (stat(dst_path, &st) == 0 && S_ISDIR(st.st_mode))
		{
			if (!rmtree(dst_path, true))
				/* If this failed, copydir() below is going to error. */
				ereport(WARNING,
						(errmsg("some useless files may be left behind in old database directory \"%s\"",
								dst_path)));
		}

		/*
		 * If the parent of the target path doesn't exist, create it now. This
		 * enables us to create the target underneath later.
		 */
		parent_path = pstrdup(dst_path);
		get_parent_directory(parent_path);
		if (stat(parent_path, &st) < 0)
		{
			if (errno != ENOENT)
				ereport(FATAL,
						errmsg("could not stat directory \"%s\": %m",
							   dst_path));

			/* create the parent directory if needed and valid */
			recovery_create_dbdir(parent_path, true);
		}
		pfree(parent_path);

		/*
		 * There's a case where the copy source directory is missing for the
		 * same reason above.  Create the empty source directory so that
		 * copydir below doesn't fail.  The directory will be dropped soon by
		 * recovery.
		 */
		if (stat(src_path, &st) < 0 && errno == ENOENT)
			recovery_create_dbdir(src_path, false);

		/*
		 * Force dirty buffers out to disk, to ensure source database is
		 * up-to-date for the copy.
		 */
		FlushDatabaseBuffers(xlrec->src_db_id);

		/* Close all sgmr fds in all backends. */
		WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));

		/*
		 * Copy this subdirectory to the new location
		 *
		 * We don't need to copy subdirectories
		 */
		copydir(src_path, dst_path, false);

		pfree(src_path);
		pfree(dst_path);
	}
	else if (info == XLOG_DBASE_CREATE_WAL_LOG)
	{
		xl_dbase_create_wal_log_rec *xlrec =
			(xl_dbase_create_wal_log_rec *) XLogRecGetData(record);
		char	   *dbpath;
		char	   *parent_path;

		dbpath = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id);

		/* create the parent directory if needed and valid */
		parent_path = pstrdup(dbpath);
		get_parent_directory(parent_path);
		recovery_create_dbdir(parent_path, true);

		/* Create the database directory with the version file. */
		CreateDirAndVersionFile(dbpath, xlrec->db_id, xlrec->tablespace_id,
								true);
		pfree(dbpath);
	}
	else if (info == XLOG_DBASE_DROP)
	{
		xl_dbase_drop_rec *xlrec = (xl_dbase_drop_rec *) XLogRecGetData(record);
		char	   *dst_path;
		int			i;

		if (InHotStandby)
		{
			/*
			 * Lock database while we resolve conflicts to ensure that
			 * InitPostgres() cannot fully re-execute concurrently. This
			 * avoids backends re-connecting automatically to same database,
			 * which can happen in some cases.
			 *
			 * This will lock out walsenders trying to connect to db-specific
			 * slots for logical decoding too, so it's safe for us to drop
			 * slots.
			 */
			LockSharedObjectForSession(DatabaseRelationId, xlrec->db_id, 0, AccessExclusiveLock);
			ResolveRecoveryConflictWithDatabase(xlrec->db_id);
		}

		/* Drop any database-specific replication slots */
		ReplicationSlotsDropDBSlots(xlrec->db_id);

		/* Drop pages for this database that are in the shared buffer cache */
		DropDatabaseBuffers(xlrec->db_id);

		/* Also, clean out any fsync requests that might be pending in md.c */
		ForgetDatabaseSyncRequests(xlrec->db_id);

		/* Clean out the xlog relcache too */
		XLogDropDatabase(xlrec->db_id);

		/* Close all sgmr fds in all backends. */
		WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));

		for (i = 0; i < xlrec->ntablespaces; i++)
		{
			dst_path = GetDatabasePath(xlrec->db_id, xlrec->tablespace_ids[i]);

			/* And remove the physical files */
			if (!rmtree(dst_path, true))
				ereport(WARNING,
						(errmsg("some useless files may be left behind in old database directory \"%s\"",
								dst_path)));
			pfree(dst_path);
		}

		if (InHotStandby)
		{
			/*
			 * Release locks prior to commit. XXX There is a race condition
			 * here that may allow backends to reconnect, but the window for
			 * this is small because the gap between here and commit is mostly
			 * fairly small and it is unlikely that people will be dropping
			 * databases that we are trying to connect to anyway.
			 */
			UnlockSharedObjectForSession(DatabaseRelationId, xlrec->db_id, 0, AccessExclusiveLock);
		}
	}
	else
		elog(PANIC, "dbase_redo: unknown op code %u", info);
}