postgresql/src/backend/storage/smgr/smgr.c

587 lines
15 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* smgr.c
* public interface routines to storage manager switch.
*
* All file system operations in POSTGRES dispatch through these
* routines.
*
2003-08-04 04:40:20 +02:00
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
2003-11-29 20:52:15 +01:00
* $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.66 2003/11/29 19:51:57 pgsql Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/ipc.h"
#include "storage/smgr.h"
#include "utils/memutils.h"
static void smgrshutdown(void);
typedef struct f_smgr
{
int (*smgr_init) (void); /* may be NULL */
int (*smgr_shutdown) (void); /* may be NULL */
int (*smgr_create) (Relation reln);
int (*smgr_unlink) (RelFileNode rnode);
int (*smgr_extend) (Relation reln, BlockNumber blocknum,
char *buffer);
int (*smgr_open) (Relation reln);
int (*smgr_close) (Relation reln);
int (*smgr_read) (Relation reln, BlockNumber blocknum,
char *buffer);
int (*smgr_write) (Relation reln, BlockNumber blocknum,
char *buffer);
2001-03-22 05:01:46 +01:00
int (*smgr_blindwrt) (RelFileNode rnode, BlockNumber blkno,
char *buffer);
BlockNumber (*smgr_nblocks) (Relation reln);
BlockNumber (*smgr_truncate) (Relation reln, BlockNumber nblocks);
int (*smgr_commit) (void); /* may be NULL */
int (*smgr_abort) (void); /* may be NULL */
2000-10-28 18:21:00 +02:00
int (*smgr_sync) (void);
} f_smgr;
/*
* The weird placement of commas in this init block is to keep the compiler
* happy, regardless of what storage managers we have (or don't have).
*/
static f_smgr smgrsw[] = {
/* magnetic disk */
{mdinit, NULL, mdcreate, mdunlink, mdextend, mdopen, mdclose,
mdread, mdwrite, mdblindwrt,
2001-03-22 05:01:46 +01:00
mdnblocks, mdtruncate, mdcommit, mdabort, mdsync
},
#ifdef STABLE_MEMORY_STORAGE
/* main memory */
{mminit, mmshutdown, mmcreate, mmunlink, mmextend, mmopen, mmclose,
mmread, mmwrite, mmblindwrt,
2002-09-04 22:31:48 +02:00
mmnblocks, NULL, mmcommit, mmabort, NULL},
#endif
};
/*
* This array records which storage managers are write-once, and which
* support overwrite. A 'true' entry means that the storage manager is
* write-once. In the best of all possible worlds, there would be no
* write-once storage managers.
*/
#ifdef NOT_USED
static bool smgrwo[] = {
false, /* magnetic disk */
#ifdef STABLE_MEMORY_STORAGE
false, /* main memory */
#endif
};
#endif
static int NSmgr = lengthof(smgrsw);
/*
* We keep a list of all relations (represented as RelFileNode values)
* that have been created or deleted in the current transaction. When
* a relation is created, we create the physical file immediately, but
* remember it so that we can delete the file again if the current
2001-03-22 05:01:46 +01:00
* transaction is aborted. Conversely, a deletion request is NOT
* executed immediately, but is just entered in the list. When and if
* the transaction commits, we can delete the physical file.
*
* NOTE: the list is kept in TopMemoryContext to be sure it won't disappear
* unbetimes. It'd probably be OK to keep it in TopTransactionContext,
* but I'm being paranoid.
*/
typedef struct PendingRelDelete
{
RelFileNode relnode; /* relation that may need to be deleted */
2001-03-22 05:01:46 +01:00
int16 which; /* which storage manager? */
bool isTemp; /* is it a temporary relation? */
2001-03-22 05:01:46 +01:00
bool atCommit; /* T=delete at commit; F=delete at abort */
struct PendingRelDelete *next; /* linked-list link */
} PendingRelDelete;
2001-03-22 05:01:46 +01:00
static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
/*
* smgrinit(), smgrshutdown() -- Initialize or shut down all storage
* managers.
*
*/
int
smgrinit(void)
{
int i;
for (i = 0; i < NSmgr; i++)
{
if (smgrsw[i].smgr_init)
{
if ((*(smgrsw[i].smgr_init)) () == SM_FAIL)
elog(FATAL, "smgr initialization failed on %s: %m",
DatumGetCString(DirectFunctionCall1(smgrout,
2001-03-22 05:01:46 +01:00
Int16GetDatum(i))));
}
}
/* register the shutdown proc */
on_proc_exit(smgrshutdown, 0);
1998-09-01 05:29:17 +02:00
return SM_SUCCESS;
}
static void
smgrshutdown(void)
{
int i;
for (i = 0; i < NSmgr; i++)
{
if (smgrsw[i].smgr_shutdown)
{
if ((*(smgrsw[i].smgr_shutdown)) () == SM_FAIL)
elog(FATAL, "smgr shutdown failed on %s: %m",
DatumGetCString(DirectFunctionCall1(smgrout,
2001-03-22 05:01:46 +01:00
Int16GetDatum(i))));
}
}
}
/*
* smgrcreate() -- Create a new relation.
*
* This routine takes a reldesc, creates the relation on the appropriate
* device, and returns a file descriptor for it.
*/
int
smgrcreate(int16 which, Relation reln)
{
int fd;
PendingRelDelete *pending;
if ((fd = (*(smgrsw[which].smgr_create)) (reln)) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not create relation \"%s\": %m",
RelationGetRelationName(reln))));
/* Add the relation to the list of stuff to delete at abort */
pending = (PendingRelDelete *)
MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
pending->relnode = reln->rd_node;
pending->which = which;
pending->isTemp = reln->rd_istemp;
pending->atCommit = false; /* delete if abort */
pending->next = pendingDeletes;
pendingDeletes = pending;
1998-09-01 05:29:17 +02:00
return fd;
}
/*
* smgrunlink() -- Unlink a relation.
*
* The relation is removed from the store. Actually, we just remember
* that we want to do this at transaction commit.
*/
int
smgrunlink(int16 which, Relation reln)
{
PendingRelDelete *pending;
/* Make sure the file is closed */
if (reln->rd_fd >= 0)
smgrclose(which, reln);
/* Add the relation to the list of stuff to delete at commit */
pending = (PendingRelDelete *)
MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
pending->relnode = reln->rd_node;
pending->which = which;
pending->isTemp = reln->rd_istemp;
pending->atCommit = true; /* delete if commit */
pending->next = pendingDeletes;
pendingDeletes = pending;
/*
* NOTE: if the relation was created in this transaction, it will now
2001-03-22 05:01:46 +01:00
* be present in the pending-delete list twice, once with atCommit
* true and once with atCommit false. Hence, it will be physically
* deleted at end of xact in either case (and the other entry will be
* ignored by smgrDoPendingDeletes, so no error will occur). We could
* instead remove the existing list entry and delete the physical file
* immediately, but for now I'll keep the logic simple.
*/
return SM_SUCCESS;
}
/*
* smgrextend() -- Add a new block to a file.
*
* The semantics are basically the same as smgrwrite(): write at the
* specified position. However, we are expecting to extend the
* relation (ie, blocknum is the current EOF), and so in case of
* failure we clean up by truncating.
*
* Returns SM_SUCCESS on success; aborts the current transaction on
* failure.
*/
int
smgrextend(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
{
int status;
status = (*(smgrsw[which].smgr_extend)) (reln, blocknum, buffer);
if (status == SM_FAIL)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not extend relation \"%s\": %m",
RelationGetRelationName(reln)),
errhint("Check free disk space.")));
1998-09-01 05:29:17 +02:00
return status;
}
/*
* smgropen() -- Open a relation using a particular storage manager.
*
* Returns the fd for the open relation on success.
*
* On failure, returns -1 if failOK, else aborts the transaction.
*/
int
smgropen(int16 which, Relation reln, bool failOK)
{
int fd;
2000-11-13 10:06:36 +01:00
if (reln->rd_rel->relkind == RELKIND_VIEW)
return -1;
if (reln->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
return -1;
if ((fd = (*(smgrsw[which].smgr_open)) (reln)) < 0)
2001-03-22 05:01:46 +01:00
if (!failOK)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m",
RelationGetRelationName(reln))));
1998-09-01 05:29:17 +02:00
return fd;
}
/*
* smgrclose() -- Close a relation.
*
* Returns SM_SUCCESS on success, aborts on failure.
*/
int
smgrclose(int16 which, Relation reln)
{
if ((*(smgrsw[which].smgr_close)) (reln) == SM_FAIL)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not close relation \"%s\": %m",
RelationGetRelationName(reln))));
1998-09-01 05:29:17 +02:00
return SM_SUCCESS;
}
/*
* smgrread() -- read a particular block from a relation into the supplied
* buffer.
*
* This routine is called from the buffer manager in order to
* instantiate pages in the shared buffer cache. All storage managers
* return pages in the format that POSTGRES expects. This routine
* dispatches the read. On success, it returns SM_SUCCESS. On failure,
* the current transaction is aborted.
*/
int
smgrread(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
{
int status;
status = (*(smgrsw[which].smgr_read)) (reln, blocknum, buffer);
if (status == SM_FAIL)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read block %d of relation \"%s\": %m",
blocknum, RelationGetRelationName(reln))));
1998-09-01 05:29:17 +02:00
return status;
}
/*
* smgrwrite() -- Write the supplied buffer out.
*
* This is not a synchronous write -- the block is not necessarily
* on disk at return, only dumped out to the kernel.
*
* The buffer is written out via the appropriate
* storage manager. This routine returns SM_SUCCESS or aborts
* the current transaction.
*/
int
smgrwrite(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
{
int status;
status = (*(smgrsw[which].smgr_write)) (reln, blocknum, buffer);
if (status == SM_FAIL)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write block %d of relation \"%s\": %m",
blocknum, RelationGetRelationName(reln))));
1998-09-01 05:29:17 +02:00
return status;
}
/*
* smgrblindwrt() -- Write a page out blind.
*
* In some cases, we may find a page in the buffer cache that we
* can't make a reldesc for. This happens, for example, when we
* want to reuse a dirty page that was written by a transaction
* that has not yet committed, which created a new relation. In
* this case, the buffer manager will call smgrblindwrt() with
* the name and OID of the database and the relation to which the
* buffer belongs. Every storage manager must be able to write
* this page out to stable storage in this circumstance.
*/
int
smgrblindwrt(int16 which,
RelFileNode rnode,
BlockNumber blkno,
char *buffer)
{
int status;
status = (*(smgrsw[which].smgr_blindwrt)) (rnode, blkno, buffer);
if (status == SM_FAIL)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write block %d of %u/%u blind: %m",
blkno, rnode.tblNode, rnode.relNode)));
return status;
}
/*
* smgrnblocks() -- Calculate the number of POSTGRES blocks in the
* supplied relation.
*
* Returns the number of blocks on success, aborts the current
* transaction on failure.
*/
BlockNumber
smgrnblocks(int16 which, Relation reln)
{
BlockNumber nblocks;
nblocks = (*(smgrsw[which].smgr_nblocks)) (reln);
/*
* NOTE: if a relation ever did grow to 2^32-1 blocks, this code would
* fail --- but that's a good thing, because it would stop us from
* extending the rel another block and having a block whose number
* actually is InvalidBlockNumber.
*/
if (nblocks == InvalidBlockNumber)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not count blocks of relation \"%s\": %m",
RelationGetRelationName(reln))));
return nblocks;
}
1996-11-27 08:25:52 +01:00
/*
* smgrtruncate() -- Truncate supplied relation to a specified number
* of blocks
1996-11-27 08:25:52 +01:00
*
* Returns the number of blocks on success, aborts the current
* transaction on failure.
1996-11-27 08:25:52 +01:00
*/
BlockNumber
smgrtruncate(int16 which, Relation reln, BlockNumber nblocks)
1996-11-27 08:25:52 +01:00
{
BlockNumber newblks;
newblks = nblocks;
if (smgrsw[which].smgr_truncate)
{
/*
* Tell the free space map to forget anything it may have stored
* for the about-to-be-deleted blocks. We want to be sure it
* won't return bogus block numbers later on.
*/
FreeSpaceMapTruncateRel(&reln->rd_node, nblocks);
newblks = (*(smgrsw[which].smgr_truncate)) (reln, nblocks);
if (newblks == InvalidBlockNumber)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not truncate relation \"%s\" to %u blocks: %m",
RelationGetRelationName(reln), nblocks)));
}
1998-09-01 05:29:17 +02:00
return newblks;
1996-11-27 08:25:52 +01:00
}
/*
* smgrDoPendingDeletes() -- take care of relation deletes at end of xact.
*/
int
smgrDoPendingDeletes(bool isCommit)
{
while (pendingDeletes != NULL)
{
PendingRelDelete *pending = pendingDeletes;
pendingDeletes = pending->next;
if (pending->atCommit == isCommit)
{
/*
* Get rid of any leftover buffers for the rel (shouldn't be
2001-03-22 05:01:46 +01:00
* any in the commit case, but there can be in the abort
* case).
*/
DropRelFileNodeBuffers(pending->relnode, pending->isTemp);
2001-03-22 05:01:46 +01:00
/*
* Tell the free space map to forget this relation. It won't
* be accessed any more anyway, but we may as well recycle the
* map space quickly.
*/
FreeSpaceMapForgetRel(&pending->relnode);
/*
* And delete the physical files.
*
* Note: we treat deletion failure as a WARNING, not an error,
2001-03-22 05:01:46 +01:00
* because we've already decided to commit or abort the
* current xact.
*/
if ((*(smgrsw[pending->which].smgr_unlink)) (pending->relnode) == SM_FAIL)
ereport(WARNING,
(errcode_for_file_access(),
errmsg("could not unlink %u/%u: %m",
pending->relnode.tblNode,
pending->relnode.relNode)));
}
pfree(pending);
}
return SM_SUCCESS;
}
/*
* smgrcommit() -- Prepare to commit changes made during the current
* transaction.
*
* This is called before we actually commit.
*/
int
smgrcommit(void)
{
int i;
for (i = 0; i < NSmgr; i++)
{
if (smgrsw[i].smgr_commit)
{
if ((*(smgrsw[i].smgr_commit)) () == SM_FAIL)
elog(FATAL, "transaction commit failed on %s: %m",
DatumGetCString(DirectFunctionCall1(smgrout,
2001-03-22 05:01:46 +01:00
Int16GetDatum(i))));
}
}
1998-09-01 05:29:17 +02:00
return SM_SUCCESS;
}
/*
* smgrabort() -- Abort changes made during the current transaction.
*/
int
smgrabort(void)
{
int i;
for (i = 0; i < NSmgr; i++)
{
if (smgrsw[i].smgr_abort)
{
if ((*(smgrsw[i].smgr_abort)) () == SM_FAIL)
elog(FATAL, "transaction abort failed on %s: %m",
DatumGetCString(DirectFunctionCall1(smgrout,
2001-03-22 05:01:46 +01:00
Int16GetDatum(i))));
}
}
1998-09-01 05:29:17 +02:00
return SM_SUCCESS;
}
/*
* Sync files to disk at checkpoint time.
*/
2000-10-28 18:21:00 +02:00
int
smgrsync(void)
2000-10-28 18:21:00 +02:00
{
int i;
for (i = 0; i < NSmgr; i++)
{
if (smgrsw[i].smgr_sync)
{
if ((*(smgrsw[i].smgr_sync)) () == SM_FAIL)
elog(PANIC, "storage sync failed on %s: %m",
2000-10-28 18:21:00 +02:00
DatumGetCString(DirectFunctionCall1(smgrout,
2001-03-22 05:01:46 +01:00
Int16GetDatum(i))));
2000-10-28 18:21:00 +02:00
}
}
return SM_SUCCESS;
}
#ifdef NOT_USED
bool
smgriswo(int16 smgrno)
{
if (smgrno < 0 || smgrno >= NSmgr)
elog(ERROR, "invalid storage manager id: %d", smgrno);
1998-09-01 05:29:17 +02:00
return smgrwo[smgrno];
}
#endif
2000-10-21 17:43:36 +02:00
void
smgr_redo(XLogRecPtr lsn, XLogRecord *record)
{
}
void
smgr_undo(XLogRecPtr lsn, XLogRecord *record)
{
}
2001-03-22 05:01:46 +01:00
2000-10-21 17:43:36 +02:00
void
2001-03-22 05:01:46 +01:00
smgr_desc(char *buf, uint8 xl_info, char *rec)
2000-10-21 17:43:36 +02:00
{
}