postgresql/src/backend/storage/smgr/smgr.c

609 lines
16 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* smgr.c
* public interface routines to storage manager switch.
*
* All file system operations in POSTGRES dispatch through these
* routines.
*
2003-08-04 04:40:20 +02:00
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.69 2004/02/10 01:55:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/ipc.h"
#include "storage/smgr.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
/*
* This struct of function pointers defines the API between smgr.c and
* any individual storage manager module. Note that smgr subfunctions are
* generally expected to return TRUE on success, FALSE on error. (For
* nblocks and truncate we instead say that returning InvalidBlockNumber
* indicates an error.)
*/
typedef struct f_smgr
{
bool (*smgr_init) (void); /* may be NULL */
bool (*smgr_shutdown) (void); /* may be NULL */
bool (*smgr_close) (SMgrRelation reln);
bool (*smgr_create) (SMgrRelation reln, bool isRedo);
bool (*smgr_unlink) (RelFileNode rnode, bool isRedo);
bool (*smgr_extend) (SMgrRelation reln, BlockNumber blocknum,
char *buffer);
bool (*smgr_read) (SMgrRelation reln, BlockNumber blocknum,
char *buffer);
bool (*smgr_write) (SMgrRelation reln, BlockNumber blocknum,
char *buffer);
BlockNumber (*smgr_nblocks) (SMgrRelation reln);
BlockNumber (*smgr_truncate) (SMgrRelation reln, BlockNumber nblocks);
bool (*smgr_commit) (void); /* may be NULL */
bool (*smgr_abort) (void); /* may be NULL */
bool (*smgr_sync) (void); /* may be NULL */
} f_smgr;
static const f_smgr smgrsw[] = {
/* magnetic disk */
{mdinit, NULL, mdclose, mdcreate, mdunlink, mdextend,
mdread, mdwrite, mdnblocks, mdtruncate, mdcommit, mdabort, mdsync
}
};
static const int NSmgr = lengthof(smgrsw);
/*
* Each backend has a hashtable that stores all extant SMgrRelation objects.
*/
static HTAB *SMgrRelationHash = NULL;
/*
* We keep a list of all relations (represented as RelFileNode values)
* that have been created or deleted in the current transaction. When
* a relation is created, we create the physical file immediately, but
* remember it so that we can delete the file again if the current
2001-03-22 05:01:46 +01:00
* transaction is aborted. Conversely, a deletion request is NOT
* executed immediately, but is just entered in the list. When and if
* the transaction commits, we can delete the physical file.
*
* NOTE: the list is kept in TopMemoryContext to be sure it won't disappear
* unbetimes. It'd probably be OK to keep it in TopTransactionContext,
* but I'm being paranoid.
*/
typedef struct PendingRelDelete
{
RelFileNode relnode; /* relation that may need to be deleted */
int which; /* which storage manager? */
bool isTemp; /* is it a temporary relation? */
2001-03-22 05:01:46 +01:00
bool atCommit; /* T=delete at commit; F=delete at abort */
struct PendingRelDelete *next; /* linked-list link */
} PendingRelDelete;
2001-03-22 05:01:46 +01:00
static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
/* local function prototypes */
static void smgrshutdown(int code, Datum arg);
static void smgr_internal_unlink(RelFileNode rnode, int which,
bool isTemp, bool isRedo);
/*
* smgrinit(), smgrshutdown() -- Initialize or shut down all storage
* managers.
*
* Note: in the normal multiprocess scenario with a postmaster, these are
* called at postmaster start and stop, not per-backend.
*/
void
smgrinit(void)
{
int i;
for (i = 0; i < NSmgr; i++)
{
if (smgrsw[i].smgr_init)
{
if (! (*(smgrsw[i].smgr_init)) ())
elog(FATAL, "smgr initialization failed on %s: %m",
DatumGetCString(DirectFunctionCall1(smgrout,
2001-03-22 05:01:46 +01:00
Int16GetDatum(i))));
}
}
/* register the shutdown proc */
on_proc_exit(smgrshutdown, 0);
}
static void
smgrshutdown(int code, Datum arg)
{
int i;
for (i = 0; i < NSmgr; i++)
{
if (smgrsw[i].smgr_shutdown)
{
if (! (*(smgrsw[i].smgr_shutdown)) ())
elog(FATAL, "smgr shutdown failed on %s: %m",
DatumGetCString(DirectFunctionCall1(smgrout,
2001-03-22 05:01:46 +01:00
Int16GetDatum(i))));
}
}
}
/*
* smgropen() -- Return an SMgrRelation object, creating it if need be.
*
* This does not attempt to actually open the object.
*/
SMgrRelation
smgropen(RelFileNode rnode)
{
SMgrRelation reln;
bool found;
if (SMgrRelationHash == NULL)
{
/* First time through: initialize the hash table */
HASHCTL ctl;
MemSet(&ctl, 0, sizeof(ctl));
ctl.keysize = sizeof(RelFileNode);
ctl.entrysize = sizeof(SMgrRelationData);
ctl.hash = tag_hash;
SMgrRelationHash = hash_create("smgr relation table", 400,
&ctl, HASH_ELEM | HASH_FUNCTION);
}
/* Look up or create an entry */
reln = (SMgrRelation) hash_search(SMgrRelationHash,
(void *) &rnode,
HASH_ENTER, &found);
if (reln == NULL)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
/* Initialize it if not present before */
if (!found)
{
/* hash_search already filled in the lookup key */
reln->smgr_which = 0; /* we only have md.c at present */
reln->md_fd = NULL; /* mark it not open */
}
return reln;
}
/*
* smgrclose() -- Close and delete an SMgrRelation object.
*
* It is the caller's responsibility not to leave any dangling references
* to the object. (Pointers should be cleared after successful return;
* on the off chance of failure, the SMgrRelation object will still exist.)
*/
void
smgrclose(SMgrRelation reln)
{
if (! (*(smgrsw[reln->smgr_which].smgr_close)) (reln))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not close relation %u/%u: %m",
reln->smgr_rnode.tblNode,
reln->smgr_rnode.relNode)));
if (hash_search(SMgrRelationHash,
(void *) &(reln->smgr_rnode),
HASH_REMOVE, NULL) == NULL)
elog(ERROR, "SMgrRelation hashtable corrupted");
}
/*
* smgrcloseall() -- Close all existing SMgrRelation objects.
*
* It is the caller's responsibility not to leave any dangling references.
*/
void
smgrcloseall(void)
{
HASH_SEQ_STATUS status;
SMgrRelation reln;
/* Nothing to do if hashtable not set up */
if (SMgrRelationHash == NULL)
return;
hash_seq_init(&status, SMgrRelationHash);
while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL)
{
smgrclose(reln);
}
}
/*
* smgrclosenode() -- Close SMgrRelation object for given RelFileNode,
* if one exists.
*
* This has the same effects as smgrclose(smgropen(rnode)), but it avoids
* uselessly creating a hashtable entry only to drop it again when no
* such entry exists already.
*
* It is the caller's responsibility not to leave any dangling references.
*/
void
smgrclosenode(RelFileNode rnode)
{
SMgrRelation reln;
/* Nothing to do if hashtable not set up */
if (SMgrRelationHash == NULL)
return;
reln = (SMgrRelation) hash_search(SMgrRelationHash,
(void *) &rnode,
HASH_FIND, NULL);
if (reln != NULL)
smgrclose(reln);
}
/*
* smgrcreate() -- Create a new relation.
*
* Given an already-created (but presumably unused) SMgrRelation,
* cause the underlying disk file or other storage to be created.
*
* If isRedo is true, it is okay for the underlying file to exist
* already because we are in a WAL replay sequence. In this case
* we should make no PendingRelDelete entry; the WAL sequence will
* tell whether to drop the file.
*/
void
smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo)
{
PendingRelDelete *pending;
if (! (*(smgrsw[reln->smgr_which].smgr_create)) (reln, isRedo))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not create relation %u/%u: %m",
reln->smgr_rnode.tblNode,
reln->smgr_rnode.relNode)));
if (isRedo)
return;
/* Add the relation to the list of stuff to delete at abort */
pending = (PendingRelDelete *)
MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
pending->relnode = reln->smgr_rnode;
pending->which = reln->smgr_which;
pending->isTemp = isTemp;
pending->atCommit = false; /* delete if abort */
pending->next = pendingDeletes;
pendingDeletes = pending;
}
/*
* smgrscheduleunlink() -- Schedule unlinking a relation at xact commit.
*
* The relation is marked to be removed from the store if we
* successfully commit the current transaction.
*
* This also implies smgrclose() on the SMgrRelation object.
*/
void
smgrscheduleunlink(SMgrRelation reln, bool isTemp)
{
PendingRelDelete *pending;
/* Add the relation to the list of stuff to delete at commit */
pending = (PendingRelDelete *)
MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
pending->relnode = reln->smgr_rnode;
pending->which = reln->smgr_which;
pending->isTemp = isTemp;
pending->atCommit = true; /* delete if commit */
pending->next = pendingDeletes;
pendingDeletes = pending;
/*
* NOTE: if the relation was created in this transaction, it will now
2001-03-22 05:01:46 +01:00
* be present in the pending-delete list twice, once with atCommit
* true and once with atCommit false. Hence, it will be physically
* deleted at end of xact in either case (and the other entry will be
* ignored by smgrDoPendingDeletes, so no error will occur). We could
* instead remove the existing list entry and delete the physical file
* immediately, but for now I'll keep the logic simple.
*/
/* Now close the file and throw away the hashtable entry */
smgrclose(reln);
}
/*
* smgrdounlink() -- Immediately unlink a relation.
*
* The relation is removed from the store. This should not be used
* during transactional operations, since it can't be undone.
*
* If isRedo is true, it is okay for the underlying file to be gone
* already. (In practice isRedo will always be true.)
*
* This also implies smgrclose() on the SMgrRelation object.
*/
void
smgrdounlink(SMgrRelation reln, bool isTemp, bool isRedo)
{
RelFileNode rnode = reln->smgr_rnode;
int which = reln->smgr_which;
/* Close the file and throw away the hashtable entry */
smgrclose(reln);
smgr_internal_unlink(rnode, which, isTemp, isRedo);
}
/*
* Shared subroutine that actually does the unlink ...
*/
static void
smgr_internal_unlink(RelFileNode rnode, int which, bool isTemp, bool isRedo)
{
/*
* Get rid of any leftover buffers for the rel (shouldn't be any in the
* commit case, but there can be in the abort case).
*/
DropRelFileNodeBuffers(rnode, isTemp);
/*
* Tell the free space map to forget this relation. It won't be accessed
* any more anyway, but we may as well recycle the map space quickly.
*/
FreeSpaceMapForgetRel(&rnode);
/*
* And delete the physical files.
*
* Note: we treat deletion failure as a WARNING, not an error,
* because we've already decided to commit or abort the current xact.
*/
if (! (*(smgrsw[which].smgr_unlink)) (rnode, isRedo))
ereport(WARNING,
(errcode_for_file_access(),
errmsg("could not unlink relation %u/%u: %m",
rnode.tblNode,
rnode.relNode)));
}
/*
* smgrextend() -- Add a new block to a file.
*
* The semantics are basically the same as smgrwrite(): write at the
* specified position. However, we are expecting to extend the
* relation (ie, blocknum is the current EOF), and so in case of
* failure we clean up by truncating.
*/
void
smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer)
{
if (! (*(smgrsw[reln->smgr_which].smgr_extend)) (reln, blocknum, buffer))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not extend relation %u/%u: %m",
reln->smgr_rnode.tblNode,
reln->smgr_rnode.relNode),
errhint("Check free disk space.")));
}
/*
* smgrread() -- read a particular block from a relation into the supplied
* buffer.
*
* This routine is called from the buffer manager in order to
* instantiate pages in the shared buffer cache. All storage managers
* return pages in the format that POSTGRES expects.
*/
void
smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
{
if (! (*(smgrsw[reln->smgr_which].smgr_read)) (reln, blocknum, buffer))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read block %u of relation %u/%u: %m",
blocknum,
reln->smgr_rnode.tblNode,
reln->smgr_rnode.relNode)));
}
/*
* smgrwrite() -- Write the supplied buffer out.
*
* This is not a synchronous write -- the block is not necessarily
* on disk at return, only dumped out to the kernel.
*/
void
smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer)
{
if (! (*(smgrsw[reln->smgr_which].smgr_write)) (reln, blocknum, buffer))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write block %u of relation %u/%u: %m",
blocknum,
reln->smgr_rnode.tblNode,
reln->smgr_rnode.relNode)));
}
/*
* smgrnblocks() -- Calculate the number of blocks in the
* supplied relation.
*
* Returns the number of blocks on success, aborts the current
* transaction on failure.
*/
BlockNumber
smgrnblocks(SMgrRelation reln)
{
BlockNumber nblocks;
nblocks = (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln);
/*
* NOTE: if a relation ever did grow to 2^32-1 blocks, this code would
* fail --- but that's a good thing, because it would stop us from
* extending the rel another block and having a block whose number
* actually is InvalidBlockNumber.
*/
if (nblocks == InvalidBlockNumber)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not count blocks of relation %u/%u: %m",
reln->smgr_rnode.tblNode,
reln->smgr_rnode.relNode)));
return nblocks;
}
1996-11-27 08:25:52 +01:00
/*
* smgrtruncate() -- Truncate supplied relation to the specified number
* of blocks
1996-11-27 08:25:52 +01:00
*
* Returns the number of blocks on success, aborts the current
* transaction on failure.
1996-11-27 08:25:52 +01:00
*/
BlockNumber
smgrtruncate(SMgrRelation reln, BlockNumber nblocks)
1996-11-27 08:25:52 +01:00
{
BlockNumber newblks;
/*
* Tell the free space map to forget anything it may have stored
* for the about-to-be-deleted blocks. We want to be sure it
* won't return bogus block numbers later on.
*/
FreeSpaceMapTruncateRel(&reln->smgr_rnode, nblocks);
newblks = (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, nblocks);
if (newblks == InvalidBlockNumber)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not truncate relation %u/%u to %u blocks: %m",
reln->smgr_rnode.tblNode,
reln->smgr_rnode.relNode,
nblocks)));
1998-09-01 05:29:17 +02:00
return newblks;
1996-11-27 08:25:52 +01:00
}
/*
* smgrDoPendingDeletes() -- Take care of relation deletes at end of xact.
*/
void
smgrDoPendingDeletes(bool isCommit)
{
while (pendingDeletes != NULL)
{
PendingRelDelete *pending = pendingDeletes;
pendingDeletes = pending->next;
if (pending->atCommit == isCommit)
smgr_internal_unlink(pending->relnode,
pending->which,
pending->isTemp,
false);
pfree(pending);
}
}
/*
* smgrcommit() -- Prepare to commit changes made during the current
* transaction.
*
* This is called before we actually commit.
*/
void
smgrcommit(void)
{
int i;
for (i = 0; i < NSmgr; i++)
{
if (smgrsw[i].smgr_commit)
{
if (! (*(smgrsw[i].smgr_commit)) ())
elog(FATAL, "transaction commit failed on %s: %m",
DatumGetCString(DirectFunctionCall1(smgrout,
2001-03-22 05:01:46 +01:00
Int16GetDatum(i))));
}
}
}
/*
* smgrabort() -- Abort changes made during the current transaction.
*/
void
smgrabort(void)
{
int i;
for (i = 0; i < NSmgr; i++)
{
if (smgrsw[i].smgr_abort)
{
if (! (*(smgrsw[i].smgr_abort)) ())
elog(FATAL, "transaction abort failed on %s: %m",
DatumGetCString(DirectFunctionCall1(smgrout,
2001-03-22 05:01:46 +01:00
Int16GetDatum(i))));
}
}
}
/*
* smgrsync() -- Sync files to disk at checkpoint time.
*/
void
smgrsync(void)
2000-10-28 18:21:00 +02:00
{
int i;
for (i = 0; i < NSmgr; i++)
{
if (smgrsw[i].smgr_sync)
{
if (! (*(smgrsw[i].smgr_sync)) ())
elog(PANIC, "storage sync failed on %s: %m",
2000-10-28 18:21:00 +02:00
DatumGetCString(DirectFunctionCall1(smgrout,
2001-03-22 05:01:46 +01:00
Int16GetDatum(i))));
2000-10-28 18:21:00 +02:00
}
}
}
2000-10-21 17:43:36 +02:00
void
smgr_redo(XLogRecPtr lsn, XLogRecord *record)
{
}
void
smgr_undo(XLogRecPtr lsn, XLogRecord *record)
{
}
2001-03-22 05:01:46 +01:00
2000-10-21 17:43:36 +02:00
void
2001-03-22 05:01:46 +01:00
smgr_desc(char *buf, uint8 xl_info, char *rec)
2000-10-21 17:43:36 +02:00
{
}