/*-------------------------------------------------------------------------
 *
 * smgr.c
 *	  public interface routines to storage manager switch.
 *
 *	  All file system operations in POSTGRES dispatch through these
 *	  routines.
 *
 * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *	  $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.65 2003/09/25 06:58:02 petere Exp $
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/ipc.h"
#include "storage/smgr.h"
#include "utils/memutils.h"


static void smgrshutdown(void);

typedef struct f_smgr
{
	int			(*smgr_init) (void);	/* may be NULL */
	int			(*smgr_shutdown) (void);		/* may be NULL */
	int			(*smgr_create) (Relation reln);
	int			(*smgr_unlink) (RelFileNode rnode);
	int			(*smgr_extend) (Relation reln, BlockNumber blocknum,
											char *buffer);
	int			(*smgr_open) (Relation reln);
	int			(*smgr_close) (Relation reln);
	int			(*smgr_read) (Relation reln, BlockNumber blocknum,
										  char *buffer);
	int			(*smgr_write) (Relation reln, BlockNumber blocknum,
										   char *buffer);
	int			(*smgr_blindwrt) (RelFileNode rnode, BlockNumber blkno,
											  char *buffer);
	BlockNumber (*smgr_nblocks) (Relation reln);
	BlockNumber (*smgr_truncate) (Relation reln, BlockNumber nblocks);
	int			(*smgr_commit) (void);	/* may be NULL */
	int			(*smgr_abort) (void);	/* may be NULL */
	int			(*smgr_sync) (void);
} f_smgr;

/*
 *	The weird placement of commas in this init block is to keep the compiler
 *	happy, regardless of what storage managers we have (or don't have).
 */

static f_smgr smgrsw[] = {

	/* magnetic disk */
	{mdinit, NULL, mdcreate, mdunlink, mdextend, mdopen, mdclose,
		mdread, mdwrite, mdblindwrt,
		mdnblocks, mdtruncate, mdcommit, mdabort, mdsync
	},

#ifdef STABLE_MEMORY_STORAGE
	/* main memory */
	{mminit, mmshutdown, mmcreate, mmunlink, mmextend, mmopen, mmclose,
		mmread, mmwrite, mmblindwrt,
	mmnblocks, NULL, mmcommit, mmabort, NULL},
#endif
};

/*
 *	This array records which storage managers are write-once, and which
 *	support overwrite.	A 'true' entry means that the storage manager is
 *	write-once.  In the best of all possible worlds, there would be no
 *	write-once storage managers.
 */

#ifdef NOT_USED
static bool smgrwo[] = {
	false,						/* magnetic disk */
#ifdef STABLE_MEMORY_STORAGE
	false,						/* main memory */
#endif
};
#endif

static int	NSmgr = lengthof(smgrsw);

/*
 * We keep a list of all relations (represented as RelFileNode values)
 * that have been created or deleted in the current transaction.  When
 * a relation is created, we create the physical file immediately, but
 * remember it so that we can delete the file again if the current
 * transaction is aborted.	Conversely, a deletion request is NOT
 * executed immediately, but is just entered in the list.  When and if
 * the transaction commits, we can delete the physical file.
 *
 * NOTE: the list is kept in TopMemoryContext to be sure it won't disappear
 * unbetimes.  It'd probably be OK to keep it in TopTransactionContext,
 * but I'm being paranoid.
 */

typedef struct PendingRelDelete
{
	RelFileNode relnode;		/* relation that may need to be deleted */
	int16		which;			/* which storage manager? */
	bool		isTemp;			/* is it a temporary relation? */
	bool		atCommit;		/* T=delete at commit; F=delete at abort */
	struct PendingRelDelete *next;		/* linked-list link */
} PendingRelDelete;

static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */


/*
 *	smgrinit(), smgrshutdown() -- Initialize or shut down all storage
 *								  managers.
 *
 */
int
smgrinit(void)
{
	int			i;

	for (i = 0; i < NSmgr; i++)
	{
		if (smgrsw[i].smgr_init)
		{
			if ((*(smgrsw[i].smgr_init)) () == SM_FAIL)
				elog(FATAL, "smgr initialization failed on %s: %m",
					 DatumGetCString(DirectFunctionCall1(smgrout,
													 Int16GetDatum(i))));
		}
	}

	/* register the shutdown proc */
	on_proc_exit(smgrshutdown, 0);

	return SM_SUCCESS;
}

static void
smgrshutdown(void)
{
	int			i;

	for (i = 0; i < NSmgr; i++)
	{
		if (smgrsw[i].smgr_shutdown)
		{
			if ((*(smgrsw[i].smgr_shutdown)) () == SM_FAIL)
				elog(FATAL, "smgr shutdown failed on %s: %m",
					 DatumGetCString(DirectFunctionCall1(smgrout,
													 Int16GetDatum(i))));
		}
	}
}

/*
 *	smgrcreate() -- Create a new relation.
 *
 *		This routine takes a reldesc, creates the relation on the appropriate
 *		device, and returns a file descriptor for it.
 */
int
smgrcreate(int16 which, Relation reln)
{
	int			fd;
	PendingRelDelete *pending;

	if ((fd = (*(smgrsw[which].smgr_create)) (reln)) < 0)
		ereport(ERROR,
				(errcode_for_file_access(),
				 errmsg("could not create relation \"%s\": %m",
						RelationGetRelationName(reln))));

	/* Add the relation to the list of stuff to delete at abort */
	pending = (PendingRelDelete *)
		MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
	pending->relnode = reln->rd_node;
	pending->which = which;
	pending->isTemp = reln->rd_istemp;
	pending->atCommit = false;	/* delete if abort */
	pending->next = pendingDeletes;
	pendingDeletes = pending;

	return fd;
}

/*
 *	smgrunlink() -- Unlink a relation.
 *
 *		The relation is removed from the store.  Actually, we just remember
 *		that we want to do this at transaction commit.
 */
int
smgrunlink(int16 which, Relation reln)
{
	PendingRelDelete *pending;

	/* Make sure the file is closed */
	if (reln->rd_fd >= 0)
		smgrclose(which, reln);

	/* Add the relation to the list of stuff to delete at commit */
	pending = (PendingRelDelete *)
		MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
	pending->relnode = reln->rd_node;
	pending->which = which;
	pending->isTemp = reln->rd_istemp;
	pending->atCommit = true;	/* delete if commit */
	pending->next = pendingDeletes;
	pendingDeletes = pending;

	/*
	 * NOTE: if the relation was created in this transaction, it will now
	 * be present in the pending-delete list twice, once with atCommit
	 * true and once with atCommit false.  Hence, it will be physically
	 * deleted at end of xact in either case (and the other entry will be
	 * ignored by smgrDoPendingDeletes, so no error will occur).  We could
	 * instead remove the existing list entry and delete the physical file
	 * immediately, but for now I'll keep the logic simple.
	 */

	return SM_SUCCESS;
}

/*
 *	smgrextend() -- Add a new block to a file.
 *
 *		The semantics are basically the same as smgrwrite(): write at the
 *		specified position.  However, we are expecting to extend the
 *		relation (ie, blocknum is the current EOF), and so in case of
 *		failure we clean up by truncating.
 *
 *		Returns SM_SUCCESS on success; aborts the current transaction on
 *		failure.
 */
int
smgrextend(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
{
	int			status;

	status = (*(smgrsw[which].smgr_extend)) (reln, blocknum, buffer);

	if (status == SM_FAIL)
		ereport(ERROR,
				(errcode_for_file_access(),
				 errmsg("could not extend relation \"%s\": %m",
						RelationGetRelationName(reln)),
				 errhint("Check free disk space.")));

	return status;
}

/*
 *	smgropen() -- Open a relation using a particular storage manager.
 *
 *		Returns the fd for the open relation on success.
 *
 *		On failure, returns -1 if failOK, else aborts the transaction.
 */
int
smgropen(int16 which, Relation reln, bool failOK)
{
	int			fd;

	if (reln->rd_rel->relkind == RELKIND_VIEW)
		return -1;
	if (reln->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
		return -1;
	if ((fd = (*(smgrsw[which].smgr_open)) (reln)) < 0)
		if (!failOK)
			ereport(ERROR,
					(errcode_for_file_access(),
					 errmsg("could not open file \"%s\": %m",
							RelationGetRelationName(reln))));

	return fd;
}

/*
 *	smgrclose() -- Close a relation.
 *
 *		Returns SM_SUCCESS on success, aborts on failure.
 */
int
smgrclose(int16 which, Relation reln)
{
	if ((*(smgrsw[which].smgr_close)) (reln) == SM_FAIL)
		ereport(ERROR,
				(errcode_for_file_access(),
				 errmsg("could not close relation \"%s\": %m",
						RelationGetRelationName(reln))));

	return SM_SUCCESS;
}

/*
 *	smgrread() -- read a particular block from a relation into the supplied
 *				  buffer.
 *
 *		This routine is called from the buffer manager in order to
 *		instantiate pages in the shared buffer cache.  All storage managers
 *		return pages in the format that POSTGRES expects.  This routine
 *		dispatches the read.  On success, it returns SM_SUCCESS.  On failure,
 *		the current transaction is aborted.
 */
int
smgrread(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
{
	int			status;

	status = (*(smgrsw[which].smgr_read)) (reln, blocknum, buffer);

	if (status == SM_FAIL)
		ereport(ERROR,
				(errcode_for_file_access(),
				 errmsg("could not read block %d of relation \"%s\": %m",
						blocknum, RelationGetRelationName(reln))));

	return status;
}

/*
 *	smgrwrite() -- Write the supplied buffer out.
 *
 *		This is not a synchronous write -- the block is not necessarily
 *		on disk at return, only dumped out to the kernel.
 *
 *		The buffer is written out via the appropriate
 *		storage manager.  This routine returns SM_SUCCESS or aborts
 *		the current transaction.
 */
int
smgrwrite(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
{
	int			status;

	status = (*(smgrsw[which].smgr_write)) (reln, blocknum, buffer);

	if (status == SM_FAIL)
		ereport(ERROR,
				(errcode_for_file_access(),
				 errmsg("could not write block %d of relation \"%s\": %m",
						blocknum, RelationGetRelationName(reln))));

	return status;
}

/*
 *	smgrblindwrt() -- Write a page out blind.
 *
 *		In some cases, we may find a page in the buffer cache that we
 *		can't make a reldesc for.  This happens, for example, when we
 *		want to reuse a dirty page that was written by a transaction
 *		that has not yet committed, which created a new relation.  In
 *		this case, the buffer manager will call smgrblindwrt() with
 *		the name and OID of the database and the relation to which the
 *		buffer belongs.  Every storage manager must be able to write
 *		this page out to stable storage in this circumstance.
 */
int
smgrblindwrt(int16 which,
			 RelFileNode rnode,
			 BlockNumber blkno,
			 char *buffer)
{
	int			status;

	status = (*(smgrsw[which].smgr_blindwrt)) (rnode, blkno, buffer);

	if (status == SM_FAIL)
		ereport(ERROR,
				(errcode_for_file_access(),
				 errmsg("could not write block %d of %u/%u blind: %m",
						blkno, rnode.tblNode, rnode.relNode)));

	return status;
}

/*
 *	smgrnblocks() -- Calculate the number of POSTGRES blocks in the
 *					 supplied relation.
 *
 *		Returns the number of blocks on success, aborts the current
 *		transaction on failure.
 */
BlockNumber
smgrnblocks(int16 which, Relation reln)
{
	BlockNumber nblocks;

	nblocks = (*(smgrsw[which].smgr_nblocks)) (reln);

	/*
	 * NOTE: if a relation ever did grow to 2^32-1 blocks, this code would
	 * fail --- but that's a good thing, because it would stop us from
	 * extending the rel another block and having a block whose number
	 * actually is InvalidBlockNumber.
	 */
	if (nblocks == InvalidBlockNumber)
		ereport(ERROR,
				(errcode_for_file_access(),
				 errmsg("could not count blocks of relation \"%s\": %m",
						RelationGetRelationName(reln))));

	return nblocks;
}

/*
 *	smgrtruncate() -- Truncate supplied relation to a specified number
 *						of blocks
 *
 *		Returns the number of blocks on success, aborts the current
 *		transaction on failure.
 */
BlockNumber
smgrtruncate(int16 which, Relation reln, BlockNumber nblocks)
{
	BlockNumber newblks;

	newblks = nblocks;
	if (smgrsw[which].smgr_truncate)
	{
		/*
		 * Tell the free space map to forget anything it may have stored
		 * for the about-to-be-deleted blocks.	We want to be sure it
		 * won't return bogus block numbers later on.
		 */
		FreeSpaceMapTruncateRel(&reln->rd_node, nblocks);

		newblks = (*(smgrsw[which].smgr_truncate)) (reln, nblocks);
		if (newblks == InvalidBlockNumber)
			ereport(ERROR,
					(errcode_for_file_access(),
					 errmsg("could not truncate relation \"%s\" to %u blocks: %m",
							RelationGetRelationName(reln), nblocks)));
	}

	return newblks;
}

/*
 * smgrDoPendingDeletes() -- take care of relation deletes at end of xact.
 */
int
smgrDoPendingDeletes(bool isCommit)
{
	while (pendingDeletes != NULL)
	{
		PendingRelDelete *pending = pendingDeletes;

		pendingDeletes = pending->next;
		if (pending->atCommit == isCommit)
		{
			/*
			 * Get rid of any leftover buffers for the rel (shouldn't be
			 * any in the commit case, but there can be in the abort
			 * case).
			 */
			DropRelFileNodeBuffers(pending->relnode, pending->isTemp);

			/*
			 * Tell the free space map to forget this relation.  It won't
			 * be accessed any more anyway, but we may as well recycle the
			 * map space quickly.
			 */
			FreeSpaceMapForgetRel(&pending->relnode);

			/*
			 * And delete the physical files.
			 *
			 * Note: we treat deletion failure as a WARNING, not an error,
			 * because we've already decided to commit or abort the
			 * current xact.
			 */
			if ((*(smgrsw[pending->which].smgr_unlink)) (pending->relnode) == SM_FAIL)
				ereport(WARNING,
						(errcode_for_file_access(),
						 errmsg("could not unlink %u/%u: %m",
								pending->relnode.tblNode,
								pending->relnode.relNode)));
		}
		pfree(pending);
	}

	return SM_SUCCESS;
}

/*
 *	smgrcommit() -- Prepare to commit changes made during the current
 *					transaction.
 *
 * This is called before we actually commit.
 */
int
smgrcommit(void)
{
	int			i;

	for (i = 0; i < NSmgr; i++)
	{
		if (smgrsw[i].smgr_commit)
		{
			if ((*(smgrsw[i].smgr_commit)) () == SM_FAIL)
				elog(FATAL, "transaction commit failed on %s: %m",
					 DatumGetCString(DirectFunctionCall1(smgrout,
													 Int16GetDatum(i))));
		}
	}

	return SM_SUCCESS;
}

/*
 *	smgrabort() -- Abort changes made during the current transaction.
 */
int
smgrabort(void)
{
	int			i;

	for (i = 0; i < NSmgr; i++)
	{
		if (smgrsw[i].smgr_abort)
		{
			if ((*(smgrsw[i].smgr_abort)) () == SM_FAIL)
				elog(FATAL, "transaction abort failed on %s: %m",
					 DatumGetCString(DirectFunctionCall1(smgrout,
													 Int16GetDatum(i))));
		}
	}

	return SM_SUCCESS;
}

/*
 * Sync files to disk at checkpoint time.
 */
int
smgrsync(void)
{
	int			i;

	for (i = 0; i < NSmgr; i++)
	{
		if (smgrsw[i].smgr_sync)
		{
			if ((*(smgrsw[i].smgr_sync)) () == SM_FAIL)
				elog(PANIC, "storage sync failed on %s: %m",
					 DatumGetCString(DirectFunctionCall1(smgrout,
													 Int16GetDatum(i))));
		}
	}

	return SM_SUCCESS;
}

#ifdef NOT_USED
bool
smgriswo(int16 smgrno)
{
	if (smgrno < 0 || smgrno >= NSmgr)
		elog(ERROR, "invalid storage manager id: %d", smgrno);

	return smgrwo[smgrno];
}
#endif

void
smgr_redo(XLogRecPtr lsn, XLogRecord *record)
{
}

void
smgr_undo(XLogRecPtr lsn, XLogRecord *record)
{
}

void
smgr_desc(char *buf, uint8 xl_info, char *rec)
{
}