postgresql/src/backend/access/transam/xlogutils.c

401 lines
9.1 KiB
C
Raw Normal View History

2000-10-13 14:06:40 +02:00
/*-------------------------------------------------------------------------
*
* xlogutils.c
2000-10-13 14:06:40 +02:00
*
*
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
2000-10-13 14:06:40 +02:00
* Portions Copyright (c) 1994, Regents of the University of California
*
XLOG (and related) changes: * Store two past checkpoint locations, not just one, in pg_control. On startup, we fall back to the older checkpoint if the newer one is unreadable. Also, a physical copy of the newest checkpoint record is kept in pg_control for possible use in disaster recovery (ie, complete loss of pg_xlog). Also add a version number for pg_control itself. Remove archdir from pg_control; it ought to be a GUC parameter, not a special case (not that it's implemented yet anyway). * Suppress successive checkpoint records when nothing has been entered in the WAL log since the last one. This is not so much to avoid I/O as to make it actually useful to keep track of the last two checkpoints. If the things are right next to each other then there's not a lot of redundancy gained... * Change CRC scheme to a true 64-bit CRC, not a pair of 32-bit CRCs on alternate bytes. Polynomial borrowed from ECMA DLT1 standard. * Fix XLOG record length handling so that it will work at BLCKSZ = 32k. * Change XID allocation to work more like OID allocation. (This is of dubious necessity, but I think it's a good idea anyway.) * Fix a number of minor bugs, such as off-by-one logic for XLOG file wraparound at the 4 gig mark. * Add documentation and clean up some coding infelicities; move file format declarations out to include files where planned contrib utilities can get at them. * Checkpoint will now occur every CHECKPOINT_SEGMENTS log segments or every CHECKPOINT_TIMEOUT seconds, whichever comes first. It is also possible to force a checkpoint by sending SIGUSR1 to the postmaster (undocumented feature...) * Defend against kill -9 postmaster by storing shmem block's key and ID in postmaster.pid lockfile, and checking at startup to ensure that no processes are still connected to old shmem block (if it still exists). * Switch backends to accept SIGQUIT rather than SIGUSR1 for emergency stop, for symmetry with postmaster and xlog utilities. Clean up signal handling in bootstrap.c so that xlog utilities launched by postmaster will react to signals better. * Standalone bootstrap now grabs lockfile in target directory, as added insurance against running it in parallel with live postmaster.
2001-03-13 02:17:06 +01:00
* $Header: /cvsroot/pgsql/src/backend/access/transam/xlogutils.c,v 1.14 2001/03/13 01:17:05 tgl Exp $
*
2000-10-13 14:06:40 +02:00
*-------------------------------------------------------------------------
*/
#include "postgres.h"
XLOG (and related) changes: * Store two past checkpoint locations, not just one, in pg_control. On startup, we fall back to the older checkpoint if the newer one is unreadable. Also, a physical copy of the newest checkpoint record is kept in pg_control for possible use in disaster recovery (ie, complete loss of pg_xlog). Also add a version number for pg_control itself. Remove archdir from pg_control; it ought to be a GUC parameter, not a special case (not that it's implemented yet anyway). * Suppress successive checkpoint records when nothing has been entered in the WAL log since the last one. This is not so much to avoid I/O as to make it actually useful to keep track of the last two checkpoints. If the things are right next to each other then there's not a lot of redundancy gained... * Change CRC scheme to a true 64-bit CRC, not a pair of 32-bit CRCs on alternate bytes. Polynomial borrowed from ECMA DLT1 standard. * Fix XLOG record length handling so that it will work at BLCKSZ = 32k. * Change XID allocation to work more like OID allocation. (This is of dubious necessity, but I think it's a good idea anyway.) * Fix a number of minor bugs, such as off-by-one logic for XLOG file wraparound at the 4 gig mark. * Add documentation and clean up some coding infelicities; move file format declarations out to include files where planned contrib utilities can get at them. * Checkpoint will now occur every CHECKPOINT_SEGMENTS log segments or every CHECKPOINT_TIMEOUT seconds, whichever comes first. It is also possible to force a checkpoint by sending SIGUSR1 to the postmaster (undocumented feature...) * Defend against kill -9 postmaster by storing shmem block's key and ID in postmaster.pid lockfile, and checking at startup to ensure that no processes are still connected to old shmem block (if it still exists). * Switch backends to accept SIGQUIT rather than SIGUSR1 for emergency stop, for symmetry with postmaster and xlog utilities. Clean up signal handling in bootstrap.c so that xlog utilities launched by postmaster will react to signals better. * Standalone bootstrap now grabs lockfile in target directory, as added insurance against running it in parallel with live postmaster.
2001-03-13 02:17:06 +01:00
2000-10-13 14:06:40 +02:00
#include "access/xlog.h"
2000-10-24 11:56:23 +02:00
#include "access/transam.h"
2000-10-13 14:06:40 +02:00
#include "access/xact.h"
#include "storage/bufpage.h"
#include "storage/bufmgr.h"
#include "storage/smgr.h"
#include "access/htup.h"
#include "access/xlogutils.h"
#include "catalog/pg_database.h"
2000-10-28 18:21:00 +02:00
#include "lib/hasht.h"
#include "utils/relcache.h"
2000-10-13 14:06:40 +02:00
/*
* ---------------------------------------------------------------
*
* Index support functions
*
*----------------------------------------------------------------
*/
2000-10-13 14:06:40 +02:00
/*
* Check if specified heap tuple was inserted by given
* xaction/command and return
*
* - -1 if not
* - 0 if there is no tuple at all
* - 1 if yes
*/
int
XLogIsOwnerOfTuple(RelFileNode hnode, ItemPointer iptr,
TransactionId xid, CommandId cid)
{
Relation reln;
Buffer buffer;
Page page;
ItemId lp;
HeapTupleHeader htup;
reln = XLogOpenRelation(false, RM_HEAP_ID, hnode);
if (!RelationIsValid(reln))
return(0);
buffer = ReadBuffer(reln, ItemPointerGetBlockNumber(iptr));
if (!BufferIsValid(buffer))
return(0);
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = (Page) BufferGetPage(buffer);
if (PageIsNew((PageHeader) page) ||
ItemPointerGetOffsetNumber(iptr) > PageGetMaxOffsetNumber(page))
{
UnlockAndReleaseBuffer(buffer);
return(0);
}
lp = PageGetItemId(page, ItemPointerGetOffsetNumber(iptr));
if (!ItemIdIsUsed(lp) || ItemIdDeleted(lp))
{
UnlockAndReleaseBuffer(buffer);
return(0);
}
htup = (HeapTupleHeader) PageGetItem(page, lp);
Assert(PageGetSUI(page) == ThisStartUpID);
if (htup->t_xmin != xid || htup->t_cmin != cid)
2000-10-13 14:06:40 +02:00
{
UnlockAndReleaseBuffer(buffer);
return(-1);
}
UnlockAndReleaseBuffer(buffer);
return(1);
}
/*
* MUST BE CALLED ONLY ON RECOVERY.
*
2000-10-13 14:06:40 +02:00
* Check if exists valid (inserted by not aborted xaction) heap tuple
* for given item pointer
*/
bool
XLogIsValidTuple(RelFileNode hnode, ItemPointer iptr)
{
Relation reln;
Buffer buffer;
Page page;
ItemId lp;
HeapTupleHeader htup;
reln = XLogOpenRelation(false, RM_HEAP_ID, hnode);
if (!RelationIsValid(reln))
return(false);
buffer = ReadBuffer(reln, ItemPointerGetBlockNumber(iptr));
if (!BufferIsValid(buffer))
return(false);
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = (Page) BufferGetPage(buffer);
if (PageIsNew((PageHeader) page) ||
ItemPointerGetOffsetNumber(iptr) > PageGetMaxOffsetNumber(page))
{
UnlockAndReleaseBuffer(buffer);
return(false);
}
if (PageGetSUI(page) != ThisStartUpID)
{
Assert(PageGetSUI(page) < ThisStartUpID);
UnlockAndReleaseBuffer(buffer);
return(true);
}
2000-10-13 14:06:40 +02:00
lp = PageGetItemId(page, ItemPointerGetOffsetNumber(iptr));
if (!ItemIdIsUsed(lp) || ItemIdDeleted(lp))
{
UnlockAndReleaseBuffer(buffer);
return(false);
}
htup = (HeapTupleHeader) PageGetItem(page, lp);
/* MUST CHECK WASN'T TUPLE INSERTED IN PREV STARTUP */
2000-10-24 11:56:23 +02:00
if (!(htup->t_infomask & HEAP_XMIN_COMMITTED))
2000-10-13 14:06:40 +02:00
{
2000-10-24 11:56:23 +02:00
if (htup->t_infomask & HEAP_XMIN_INVALID ||
(htup->t_infomask & HEAP_MOVED_IN &&
TransactionIdDidAbort((TransactionId)htup->t_cmin)) ||
TransactionIdDidAbort(htup->t_xmin))
{
UnlockAndReleaseBuffer(buffer);
return(false);
}
2000-10-13 14:06:40 +02:00
}
UnlockAndReleaseBuffer(buffer);
return(true);
}
/*
2000-10-24 11:56:23 +02:00
* Open pg_log in recovery
*/
2000-10-24 11:56:23 +02:00
extern Relation LogRelation; /* pg_log relation */
void
2000-10-24 11:56:23 +02:00
XLogOpenLogRelation(void)
{
2000-10-24 11:56:23 +02:00
Relation logRelation;
Assert(!LogRelation);
logRelation = (Relation) malloc(sizeof(RelationData));
memset(logRelation, 0, sizeof(RelationData));
logRelation->rd_rel = (Form_pg_class) malloc(sizeof(FormData_pg_class));
memset(logRelation->rd_rel, 0, sizeof(FormData_pg_class));
sprintf(RelationGetPhysicalRelationName(logRelation), "pg_log");
logRelation->rd_node.tblNode = InvalidOid;
logRelation->rd_node.relNode = RelOid_pg_log;
logRelation->rd_fd = -1;
logRelation->rd_fd = smgropen(DEFAULT_SMGR, logRelation, false);
2000-10-24 11:56:23 +02:00
if (logRelation->rd_fd < 0)
elog(STOP, "XLogOpenLogRelation: failed to open pg_log");
LogRelation = logRelation;
}
/*
* ---------------------------------------------------------------
*
* Storage related support functions
*
*----------------------------------------------------------------
*/
Buffer
XLogReadBuffer(bool extend, Relation reln, BlockNumber blkno)
{
BlockNumber lastblock = RelationGetNumberOfBlocks(reln);
Buffer buffer;
if (blkno >= lastblock)
{
buffer = InvalidBuffer;
if (extend) /* we do this in recovery only - no locks */
{
Assert(InRecovery);
while (lastblock <= blkno)
{
if (buffer != InvalidBuffer)
ReleaseBuffer(buffer); /* must be WriteBuffer()? */
buffer = ReadBuffer(reln, P_NEW);
lastblock++;
}
}
if (buffer != InvalidBuffer)
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
return(buffer);
}
buffer = ReadBuffer(reln, blkno);
if (buffer != InvalidBuffer)
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
return(buffer);
}
/*
* "Relation" cache
*/
typedef struct XLogRelDesc
{
RelationData reldata;
struct XLogRelDesc *lessRecently;
struct XLogRelDesc *moreRecently;
} XLogRelDesc;
typedef struct XLogRelCacheEntry
{
RelFileNode rnode;
XLogRelDesc *rdesc;
} XLogRelCacheEntry;
static HTAB *_xlrelcache;
static XLogRelDesc *_xlrelarr = NULL;
static Form_pg_class _xlpgcarr = NULL;
static int _xlast = 0;
static int _xlcnt = 0;
#define _XLOG_RELCACHESIZE 512
static void
_xl_init_rel_cache(void)
{
2000-10-28 18:21:00 +02:00
HASHCTL ctl;
_xlcnt = _XLOG_RELCACHESIZE;
_xlast = 0;
_xlrelarr = (XLogRelDesc*) malloc(sizeof(XLogRelDesc) * _xlcnt);
memset(_xlrelarr, 0, sizeof(XLogRelDesc) * _xlcnt);
_xlpgcarr = (Form_pg_class) malloc(sizeof(FormData_pg_class) * _xlcnt);
2000-10-24 11:56:23 +02:00
memset(_xlpgcarr, 0, sizeof(FormData_pg_class) * _xlcnt);
2000-10-21 17:43:36 +02:00
_xlrelarr[0].moreRecently = &(_xlrelarr[0]);
_xlrelarr[0].lessRecently = &(_xlrelarr[0]);
memset(&ctl, 0, (int) sizeof(ctl));
ctl.keysize = sizeof(RelFileNode);
ctl.datasize = sizeof(XLogRelDesc*);
ctl.hash = tag_hash;
_xlrelcache = hash_create(_XLOG_RELCACHESIZE, &ctl,
HASH_ELEM | HASH_FUNCTION);
}
2000-10-28 18:21:00 +02:00
static void
_xl_remove_hash_entry(XLogRelDesc **edata, Datum dummy)
2000-10-28 18:21:00 +02:00
{
XLogRelCacheEntry *hentry;
bool found;
XLogRelDesc *rdesc = *edata;
Form_pg_class tpgc = rdesc->reldata.rd_rel;
rdesc->lessRecently->moreRecently = rdesc->moreRecently;
rdesc->moreRecently->lessRecently = rdesc->lessRecently;
hentry = (XLogRelCacheEntry*) hash_search(_xlrelcache,
(char*)&(rdesc->reldata.rd_node), HASH_REMOVE, &found);
if (hentry == NULL)
elog(STOP, "_xl_remove_hash_entry: can't delete from cache");
if (!found)
elog(STOP, "_xl_remove_hash_entry: file was not found in cache");
if (rdesc->reldata.rd_fd >= 0)
smgrclose(DEFAULT_SMGR, &(rdesc->reldata));
memset(rdesc, 0, sizeof(XLogRelDesc));
memset(tpgc, 0, sizeof(FormData_pg_class));
rdesc->reldata.rd_rel = tpgc;
return;
}
static XLogRelDesc*
_xl_new_reldesc(void)
{
XLogRelDesc *res;
_xlast++;
if (_xlast < _xlcnt)
{
_xlrelarr[_xlast].reldata.rd_rel = &(_xlpgcarr[_xlast]);
return(&(_xlrelarr[_xlast]));
}
/* reuse */
res = _xlrelarr[0].moreRecently;
_xl_remove_hash_entry(&res, 0);
_xlast--;
return(res);
2000-10-28 18:21:00 +02:00
}
2000-10-28 18:21:00 +02:00
void
XLogInitRelationCache(void)
{
CreateDummyCaches();
_xl_init_rel_cache();
}
2000-10-28 18:21:00 +02:00
void
XLogCloseRelationCache(void)
{
2000-10-28 18:21:00 +02:00
DestroyDummyCaches();
if (!_xlrelarr)
return;
HashTableWalk(_xlrelcache, (HashtFunc) _xl_remove_hash_entry, 0);
2000-10-28 18:21:00 +02:00
hash_destroy(_xlrelcache);
free(_xlrelarr);
free(_xlpgcarr);
_xlrelarr = NULL;
}
Relation
XLogOpenRelation(bool redo, RmgrId rmid, RelFileNode rnode)
{
XLogRelDesc *res;
XLogRelCacheEntry *hentry;
bool found;
hentry = (XLogRelCacheEntry*)
hash_search(_xlrelcache, (char*)&rnode, HASH_FIND, &found);
if (hentry == NULL)
elog(STOP, "XLogOpenRelation: error in cache");
if (found)
{
res = hentry->rdesc;
res->lessRecently->moreRecently = res->moreRecently;
res->moreRecently->lessRecently = res->lessRecently;
}
else
{
res = _xl_new_reldesc();
sprintf(RelationGetPhysicalRelationName(&(res->reldata)), "%u", rnode.relNode);
/* unexisting DB id */
res->reldata.rd_lockInfo.lockRelId.dbId = RecoveryDb;
res->reldata.rd_lockInfo.lockRelId.relId = rnode.relNode;
res->reldata.rd_node = rnode;
hentry = (XLogRelCacheEntry*)
hash_search(_xlrelcache, (char*)&rnode, HASH_ENTER, &found);
if (hentry == NULL)
elog(STOP, "XLogOpenRelation: can't insert into cache");
if (found)
elog(STOP, "XLogOpenRelation: file found on insert into cache");
hentry->rdesc = res;
2000-10-21 17:43:36 +02:00
res->reldata.rd_fd = -1;
res->reldata.rd_fd = smgropen(DEFAULT_SMGR, &(res->reldata),
true /* allow failure */);
}
res->moreRecently = &(_xlrelarr[0]);
res->lessRecently = _xlrelarr[0].lessRecently;
_xlrelarr[0].lessRecently = res;
res->lessRecently->moreRecently = res;
if (res->reldata.rd_fd < 0) /* file doesn't exist */
return(NULL);
return(&(res->reldata));
}