1996-07-09 08:22:35 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* vacuum.c--
|
|
|
|
* the postgres vacuum cleaner
|
|
|
|
*
|
|
|
|
* Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
1996-10-03 22:11:41 +02:00
|
|
|
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.3 1996/10/03 20:11:41 momjian Exp $
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include <sys/file.h>
|
|
|
|
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "utils/portal.h"
|
|
|
|
|
|
|
|
#include "access/genam.h"
|
|
|
|
#include "access/heapam.h"
|
|
|
|
#include "access/xact.h"
|
|
|
|
#include "storage/bufmgr.h"
|
|
|
|
#include "access/transam.h"
|
|
|
|
#include "utils/tqual.h"
|
|
|
|
#include "access/htup.h"
|
|
|
|
|
|
|
|
#include "catalog/pg_index.h"
|
|
|
|
#include "catalog/catname.h"
|
|
|
|
#include "catalog/pg_class.h"
|
|
|
|
#include "catalog/pg_proc.h"
|
|
|
|
|
|
|
|
#include "storage/fd.h" /* for O_ */
|
|
|
|
#include "storage/itemid.h"
|
|
|
|
#include "storage/bufmgr.h"
|
|
|
|
#include "storage/bufpage.h"
|
|
|
|
#include "storage/smgr.h"
|
|
|
|
|
|
|
|
#include "utils/elog.h"
|
|
|
|
#include "utils/mcxt.h"
|
|
|
|
#include "utils/palloc.h"
|
|
|
|
|
|
|
|
#include "commands/vacuum.h"
|
|
|
|
|
|
|
|
bool VacuumRunning = false;
|
|
|
|
|
|
|
|
/* non-export function prototypes */
|
1996-10-03 06:19:29 +02:00
|
|
|
static void _vc_init(void);
|
|
|
|
static void _vc_shutdown(void);
|
|
|
|
static void _vc_vacuum(NameData *VacRelP);
|
|
|
|
static VRelList _vc_getrels(Portal p, NameData *VacRelP);
|
1996-07-09 08:22:35 +02:00
|
|
|
static void _vc_vacone(Portal p, VRelList curvrl);
|
|
|
|
static void _vc_vacheap(Portal p, VRelList curvrl, Relation onerel);
|
|
|
|
static void _vc_vacindices(VRelList curvrl, Relation onerel);
|
|
|
|
static void _vc_vaconeind(VRelList curvrl, Relation indrel);
|
|
|
|
static void _vc_updstats(Oid relid, int npages, int ntuples, bool hasindex);
|
|
|
|
static void _vc_setpagelock(Relation rel, BlockNumber blkno);
|
|
|
|
static bool _vc_ontidlist(ItemPointer itemptr, VTidList tidlist);
|
|
|
|
static void _vc_reaptid(Portal p, VRelList curvrl, BlockNumber blkno,
|
|
|
|
OffsetNumber offnum);
|
|
|
|
static void _vc_free(Portal p, VRelList vrl);
|
|
|
|
static Relation _vc_getarchrel(Relation heaprel);
|
|
|
|
static void _vc_archive(Relation archrel, HeapTuple htup);
|
|
|
|
static bool _vc_isarchrel(char *rname);
|
|
|
|
|
|
|
|
void
|
|
|
|
vacuum(char *vacrel)
|
|
|
|
{
|
1996-10-03 06:19:29 +02:00
|
|
|
NameData VacRel;
|
|
|
|
|
|
|
|
/* vacrel gets de-allocated on transaction commit */
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/* initialize vacuum cleaner */
|
1996-10-03 06:19:29 +02:00
|
|
|
_vc_init();
|
1996-07-09 08:22:35 +02:00
|
|
|
|
|
|
|
/* vacuum the database */
|
1996-10-03 06:19:29 +02:00
|
|
|
if (vacrel)
|
|
|
|
{
|
|
|
|
strcpy(VacRel.data,vacrel);
|
|
|
|
_vc_vacuum(&VacRel);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
_vc_vacuum(NULL);
|
1996-07-09 08:22:35 +02:00
|
|
|
|
|
|
|
/* clean up */
|
1996-10-03 06:19:29 +02:00
|
|
|
_vc_shutdown();
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _vc_init(), _vc_shutdown() -- start up and shut down the vacuum cleaner.
|
|
|
|
*
|
|
|
|
* We run exactly one vacuum cleaner at a time. We use the file system
|
|
|
|
* to guarantee an exclusive lock on vacuuming, since a single vacuum
|
|
|
|
* cleaner instantiation crosses transaction boundaries, and we'd lose
|
|
|
|
* postgres-style locks at the end of every transaction.
|
|
|
|
*
|
|
|
|
* The strangeness with committing and starting transactions in the
|
|
|
|
* init and shutdown routines is due to the fact that the vacuum cleaner
|
|
|
|
* is invoked via a sql command, and so is already executing inside
|
|
|
|
* a transaction. We need to leave ourselves in a predictable state
|
|
|
|
* on entry and exit to the vacuum cleaner. We commit the transaction
|
|
|
|
* started in PostgresMain() inside _vc_init(), and start one in
|
|
|
|
* _vc_shutdown() to match the commit waiting for us back in
|
|
|
|
* PostgresMain().
|
|
|
|
*/
|
|
|
|
static void
|
1996-10-03 06:19:29 +02:00
|
|
|
_vc_init()
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
|
|
|
int fd;
|
|
|
|
|
|
|
|
if ((fd = open("pg_vlock", O_CREAT|O_EXCL, 0600)) < 0)
|
|
|
|
elog(WARN, "can't create lock file -- another vacuum cleaner running?");
|
|
|
|
|
|
|
|
close(fd);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* By here, exclusive open on the lock file succeeded. If we abort
|
|
|
|
* for any reason during vacuuming, we need to remove the lock file.
|
|
|
|
* This global variable is checked in the transaction manager on xact
|
|
|
|
* abort, and the routine vc_abort() is called if necessary.
|
|
|
|
*/
|
|
|
|
|
|
|
|
VacuumRunning = true;
|
|
|
|
|
|
|
|
/* matches the StartTransaction in PostgresMain() */
|
|
|
|
CommitTransactionCommand();
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
1996-10-03 06:19:29 +02:00
|
|
|
_vc_shutdown()
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
|
|
|
/* on entry, not in a transaction */
|
|
|
|
if (unlink("pg_vlock") < 0)
|
|
|
|
elog(WARN, "vacuum: can't destroy lock file!");
|
|
|
|
|
|
|
|
/* okay, we're done */
|
|
|
|
VacuumRunning = false;
|
|
|
|
|
|
|
|
/* matches the CommitTransaction in PostgresMain() */
|
|
|
|
StartTransactionCommand();
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vc_abort()
|
|
|
|
{
|
|
|
|
/* on abort, remove the vacuum cleaner lock file */
|
|
|
|
(void) unlink("pg_vlock");
|
|
|
|
|
|
|
|
VacuumRunning = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _vc_vacuum() -- vacuum the database.
|
|
|
|
*
|
|
|
|
* This routine builds a list of relations to vacuum, and then calls
|
|
|
|
* code that vacuums them one at a time. We are careful to vacuum each
|
|
|
|
* relation in a separate transaction in order to avoid holding too many
|
|
|
|
* locks at one time.
|
|
|
|
*/
|
|
|
|
static void
|
1996-10-03 06:19:29 +02:00
|
|
|
_vc_vacuum(NameData *VacRelP)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
|
|
|
VRelList vrl, cur;
|
|
|
|
char *pname;
|
|
|
|
Portal p;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Create a portal for safe memory across transctions. We need to
|
|
|
|
* palloc the name space for it because our hash function expects
|
|
|
|
* the name to be on a longword boundary. CreatePortal copies the
|
|
|
|
* name to safe storage for us.
|
|
|
|
*/
|
|
|
|
|
|
|
|
pname = (char *) palloc(strlen(VACPNAME) + 1);
|
|
|
|
strcpy(pname, VACPNAME);
|
|
|
|
p = CreatePortal(pname);
|
|
|
|
pfree(pname);
|
|
|
|
|
|
|
|
/* get list of relations */
|
1996-10-03 06:19:29 +02:00
|
|
|
vrl = _vc_getrels(p, VacRelP);
|
1996-07-09 08:22:35 +02:00
|
|
|
|
|
|
|
/* vacuum each heap relation */
|
|
|
|
for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
|
|
|
|
_vc_vacone(p, cur);
|
|
|
|
|
|
|
|
_vc_free(p, vrl);
|
|
|
|
|
|
|
|
PortalDestroy(&p);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VRelList
|
1996-10-03 06:19:29 +02:00
|
|
|
_vc_getrels(Portal p, NameData *VacRelP)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
|
|
|
Relation pgclass;
|
|
|
|
TupleDesc pgcdesc;
|
|
|
|
HeapScanDesc pgcscan;
|
|
|
|
HeapTuple pgctup;
|
|
|
|
Buffer buf;
|
|
|
|
PortalVariableMemory portalmem;
|
|
|
|
MemoryContext old;
|
|
|
|
VRelList vrl, cur;
|
|
|
|
Datum d;
|
|
|
|
char *rname;
|
1996-10-03 22:11:41 +02:00
|
|
|
char rkind;
|
1996-07-09 08:22:35 +02:00
|
|
|
int16 smgrno;
|
|
|
|
bool n;
|
|
|
|
ScanKeyData pgckey;
|
1996-10-03 22:11:41 +02:00
|
|
|
bool found = false;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
|
|
|
StartTransactionCommand();
|
|
|
|
|
1996-10-03 06:19:29 +02:00
|
|
|
if (VacRelP->data) {
|
1996-07-09 08:22:35 +02:00
|
|
|
ScanKeyEntryInitialize(&pgckey, 0x0, Anum_pg_class_relname,
|
|
|
|
NameEqualRegProcedure,
|
1996-10-03 06:19:29 +02:00
|
|
|
PointerGetDatum(VacRelP->data));
|
1996-07-09 08:22:35 +02:00
|
|
|
} else {
|
|
|
|
ScanKeyEntryInitialize(&pgckey, 0x0, Anum_pg_class_relkind,
|
|
|
|
CharacterEqualRegProcedure, CharGetDatum('r'));
|
|
|
|
}
|
|
|
|
|
|
|
|
portalmem = PortalGetVariableMemory(p);
|
|
|
|
vrl = (VRelList) NULL;
|
|
|
|
|
|
|
|
pgclass = heap_openr(RelationRelationName);
|
|
|
|
pgcdesc = RelationGetTupleDescriptor(pgclass);
|
|
|
|
|
|
|
|
pgcscan = heap_beginscan(pgclass, false, NowTimeQual, 1, &pgckey);
|
|
|
|
|
|
|
|
while (HeapTupleIsValid(pgctup = heap_getnext(pgcscan, 0, &buf))) {
|
|
|
|
|
1996-10-03 22:11:41 +02:00
|
|
|
found = true;
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*
|
|
|
|
* We have to be careful not to vacuum the archive (since it
|
|
|
|
* already contains vacuumed tuples), and not to vacuum
|
|
|
|
* relations on write-once storage managers like the Sony
|
|
|
|
* jukebox at Berkeley.
|
|
|
|
*/
|
|
|
|
|
|
|
|
d = (Datum) heap_getattr(pgctup, buf, Anum_pg_class_relname,
|
|
|
|
pgcdesc, &n);
|
|
|
|
rname = (char*)d;
|
|
|
|
|
|
|
|
/* skip archive relations */
|
|
|
|
if (_vc_isarchrel(rname)) {
|
|
|
|
ReleaseBuffer(buf);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
d = (Datum) heap_getattr(pgctup, buf, Anum_pg_class_relsmgr,
|
|
|
|
pgcdesc, &n);
|
|
|
|
smgrno = DatumGetInt16(d);
|
|
|
|
|
|
|
|
/* skip write-once storage managers */
|
|
|
|
if (smgriswo(smgrno)) {
|
|
|
|
ReleaseBuffer(buf);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
1996-10-03 22:11:41 +02:00
|
|
|
d = (Datum) heap_getattr(pgctup, buf, Anum_pg_class_relkind,
|
|
|
|
pgcdesc, &n);
|
|
|
|
|
|
|
|
rkind = DatumGetChar(d);
|
|
|
|
|
|
|
|
/* skip system relations */
|
|
|
|
if (rkind != 'r') {
|
|
|
|
ReleaseBuffer(buf);
|
|
|
|
elog(NOTICE, "Vacuum: can not process index and certain system tables" );
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/* get a relation list entry for this guy */
|
|
|
|
old = MemoryContextSwitchTo((MemoryContext)portalmem);
|
|
|
|
if (vrl == (VRelList) NULL) {
|
|
|
|
vrl = cur = (VRelList) palloc(sizeof(VRelListData));
|
|
|
|
} else {
|
|
|
|
cur->vrl_next = (VRelList) palloc(sizeof(VRelListData));
|
|
|
|
cur = cur->vrl_next;
|
|
|
|
}
|
|
|
|
(void) MemoryContextSwitchTo(old);
|
|
|
|
|
|
|
|
cur->vrl_relid = pgctup->t_oid;
|
|
|
|
cur->vrl_attlist = (VAttList) NULL;
|
|
|
|
cur->vrl_tidlist = (VTidList) NULL;
|
|
|
|
cur->vrl_npages = cur->vrl_ntups = 0;
|
|
|
|
cur->vrl_hasindex = false;
|
|
|
|
cur->vrl_next = (VRelList) NULL;
|
|
|
|
|
|
|
|
/* wei hates it if you forget to do this */
|
|
|
|
ReleaseBuffer(buf);
|
|
|
|
}
|
1996-10-03 22:11:41 +02:00
|
|
|
if (found == false)
|
|
|
|
elog(NOTICE, "Vacuum: table not found" );
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1996-10-03 22:11:41 +02:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
heap_close(pgclass);
|
|
|
|
heap_endscan(pgcscan);
|
|
|
|
|
|
|
|
CommitTransactionCommand();
|
|
|
|
|
|
|
|
return (vrl);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _vc_vacone() -- vacuum one heap relation
|
|
|
|
*
|
|
|
|
* This routine vacuums a single heap, cleans out its indices, and
|
|
|
|
* updates its statistics npages and ntuples statistics.
|
|
|
|
*
|
|
|
|
* Doing one heap at a time incurs extra overhead, since we need to
|
|
|
|
* check that the heap exists again just before we vacuum it. The
|
|
|
|
* reason that we do this is so that vacuuming can be spread across
|
|
|
|
* many small transactions. Otherwise, two-phase locking would require
|
|
|
|
* us to lock the entire database during one pass of the vacuum cleaner.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
_vc_vacone(Portal p, VRelList curvrl)
|
|
|
|
{
|
|
|
|
Relation pgclass;
|
|
|
|
TupleDesc pgcdesc;
|
|
|
|
HeapTuple pgctup;
|
|
|
|
Buffer pgcbuf;
|
|
|
|
HeapScanDesc pgcscan;
|
|
|
|
Relation onerel;
|
|
|
|
ScanKeyData pgckey;
|
|
|
|
|
|
|
|
StartTransactionCommand();
|
|
|
|
|
|
|
|
ScanKeyEntryInitialize(&pgckey, 0x0, ObjectIdAttributeNumber,
|
|
|
|
ObjectIdEqualRegProcedure,
|
|
|
|
ObjectIdGetDatum(curvrl->vrl_relid));
|
|
|
|
|
|
|
|
pgclass = heap_openr(RelationRelationName);
|
|
|
|
pgcdesc = RelationGetTupleDescriptor(pgclass);
|
|
|
|
pgcscan = heap_beginscan(pgclass, false, NowTimeQual, 1, &pgckey);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Race condition -- if the pg_class tuple has gone away since the
|
|
|
|
* last time we saw it, we don't need to vacuum it.
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (!HeapTupleIsValid(pgctup = heap_getnext(pgcscan, 0, &pgcbuf))) {
|
|
|
|
heap_endscan(pgcscan);
|
|
|
|
heap_close(pgclass);
|
|
|
|
CommitTransactionCommand();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* now open the class and vacuum it */
|
|
|
|
onerel = heap_open(curvrl->vrl_relid);
|
|
|
|
|
|
|
|
/* we require the relation to be locked until the indices are cleaned */
|
|
|
|
RelationSetLockForWrite(onerel);
|
|
|
|
|
|
|
|
/* vacuum it */
|
|
|
|
_vc_vacheap(p, curvrl, onerel);
|
|
|
|
|
|
|
|
/* if we vacuumed any heap tuples, vacuum the indices too */
|
|
|
|
if (curvrl->vrl_tidlist != (VTidList) NULL)
|
|
|
|
_vc_vacindices(curvrl, onerel);
|
|
|
|
else
|
|
|
|
curvrl->vrl_hasindex = onerel->rd_rel->relhasindex;
|
|
|
|
|
|
|
|
/* all done with this class */
|
|
|
|
heap_close(onerel);
|
|
|
|
heap_endscan(pgcscan);
|
|
|
|
heap_close(pgclass);
|
|
|
|
|
|
|
|
/* update statistics in pg_class */
|
|
|
|
_vc_updstats(curvrl->vrl_relid, curvrl->vrl_npages, curvrl->vrl_ntups,
|
|
|
|
curvrl->vrl_hasindex);
|
|
|
|
|
|
|
|
CommitTransactionCommand();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _vc_vacheap() -- vacuum an open heap relation
|
|
|
|
*
|
|
|
|
* This routine sets commit times, vacuums dead tuples, cleans up
|
|
|
|
* wasted space on the page, and maintains statistics on the number
|
|
|
|
* of live tuples in a heap. In addition, it records the tids of
|
|
|
|
* all tuples removed from the heap for any reason. These tids are
|
|
|
|
* used in a scan of indices on the relation to get rid of dead
|
|
|
|
* index tuples.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
_vc_vacheap(Portal p, VRelList curvrl, Relation onerel)
|
|
|
|
{
|
|
|
|
int nblocks, blkno;
|
|
|
|
ItemId itemid;
|
|
|
|
HeapTuple htup;
|
|
|
|
Buffer buf;
|
|
|
|
Page page;
|
|
|
|
OffsetNumber offnum, maxoff;
|
|
|
|
Relation archrel;
|
|
|
|
bool isarchived;
|
|
|
|
int nvac;
|
|
|
|
int ntups;
|
|
|
|
bool pgchanged, tupgone;
|
|
|
|
AbsoluteTime purgetime, expiretime;
|
|
|
|
RelativeTime preservetime;
|
|
|
|
|
|
|
|
nvac = 0;
|
|
|
|
ntups = 0;
|
|
|
|
nblocks = RelationGetNumberOfBlocks(onerel);
|
|
|
|
|
|
|
|
{
|
|
|
|
char *relname;
|
|
|
|
relname = (RelationGetRelationName(onerel))->data;
|
|
|
|
|
|
|
|
if ( (strlen(relname) > 4) &&
|
|
|
|
relname[0] == 'X' &&
|
|
|
|
relname[1] == 'i' &&
|
|
|
|
relname[2] == 'n' &&
|
|
|
|
(relname[3] == 'v' || relname[3] == 'x'))
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* if the relation has an archive, open it */
|
|
|
|
if (onerel->rd_rel->relarch != 'n') {
|
|
|
|
isarchived = true;
|
|
|
|
archrel = _vc_getarchrel(onerel);
|
|
|
|
} else
|
|
|
|
isarchived = false;
|
|
|
|
|
|
|
|
/* don't vacuum large objects for now.
|
|
|
|
something breaks when we do*/
|
|
|
|
{
|
|
|
|
char *relname;
|
|
|
|
relname = (RelationGetRelationName(onerel))->data;
|
|
|
|
|
|
|
|
if ( (strlen(relname) > 4) &&
|
|
|
|
relname[0] == 'X' &&
|
|
|
|
relname[1] == 'i' &&
|
|
|
|
relname[2] == 'n' &&
|
|
|
|
(relname[3] == 'v' || relname[3] == 'x'))
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* calculate the purge time: tuples that expired before this time
|
|
|
|
will be archived or deleted */
|
|
|
|
purgetime = GetCurrentTransactionStartTime();
|
|
|
|
expiretime = (AbsoluteTime)onerel->rd_rel->relexpires;
|
|
|
|
preservetime = (RelativeTime)onerel->rd_rel->relpreserved;
|
|
|
|
|
|
|
|
if (RelativeTimeIsValid(preservetime) && (preservetime)) {
|
|
|
|
purgetime -= preservetime;
|
|
|
|
if (AbsoluteTimeIsBackwardCompatiblyValid(expiretime) &&
|
|
|
|
expiretime > purgetime)
|
|
|
|
purgetime = expiretime;
|
|
|
|
}
|
|
|
|
|
|
|
|
else if (AbsoluteTimeIsBackwardCompatiblyValid(expiretime))
|
|
|
|
purgetime = expiretime;
|
|
|
|
|
|
|
|
for (blkno = 0; blkno < nblocks; blkno++) {
|
|
|
|
buf = ReadBuffer(onerel, blkno);
|
|
|
|
page = BufferGetPage(buf);
|
|
|
|
|
|
|
|
if (PageIsEmpty(page)) {
|
|
|
|
ReleaseBuffer(buf);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
pgchanged = false;
|
|
|
|
maxoff = PageGetMaxOffsetNumber(page);
|
|
|
|
for (offnum = FirstOffsetNumber;
|
|
|
|
offnum <= maxoff;
|
|
|
|
offnum = OffsetNumberNext(offnum)) {
|
|
|
|
itemid = PageGetItemId(page, offnum);
|
|
|
|
|
|
|
|
if (!ItemIdIsUsed(itemid))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
htup = (HeapTuple) PageGetItem(page, itemid);
|
|
|
|
tupgone = false;
|
|
|
|
|
|
|
|
if (!AbsoluteTimeIsBackwardCompatiblyValid(htup->t_tmin) &&
|
|
|
|
TransactionIdIsValid((TransactionId)htup->t_xmin)) {
|
|
|
|
|
|
|
|
if (TransactionIdDidAbort(htup->t_xmin)) {
|
|
|
|
_vc_reaptid(p, curvrl, blkno, offnum);
|
|
|
|
pgchanged = true;
|
|
|
|
tupgone = true;
|
|
|
|
} else if (TransactionIdDidCommit(htup->t_xmin)) {
|
|
|
|
htup->t_tmin = TransactionIdGetCommitTime(htup->t_xmin);
|
|
|
|
pgchanged = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (TransactionIdIsValid((TransactionId)htup->t_xmax)) {
|
|
|
|
if (TransactionIdDidAbort(htup->t_xmax)) {
|
|
|
|
StoreInvalidTransactionId(&(htup->t_xmax));
|
|
|
|
pgchanged = true;
|
|
|
|
} else if (TransactionIdDidCommit(htup->t_xmax)) {
|
|
|
|
if (!AbsoluteTimeIsBackwardCompatiblyReal(htup->t_tmax)) {
|
|
|
|
|
|
|
|
htup->t_tmax = TransactionIdGetCommitTime(htup->t_xmax);
|
|
|
|
pgchanged = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Reap the dead tuple if its expiration time is
|
|
|
|
* before purgetime.
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (!tupgone && htup->t_tmax < purgetime) {
|
|
|
|
_vc_reaptid(p, curvrl, blkno, offnum);
|
|
|
|
tupgone = true;
|
|
|
|
pgchanged = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tupgone) {
|
|
|
|
ItemId lpp = &(((PageHeader) page)->pd_linp[offnum - 1]);
|
|
|
|
|
|
|
|
/* write the tuple to the archive, if necessary */
|
|
|
|
if (isarchived)
|
|
|
|
_vc_archive(archrel, htup);
|
|
|
|
|
|
|
|
/* mark it unused */
|
|
|
|
lpp->lp_flags &= ~LP_USED;
|
|
|
|
|
|
|
|
++nvac;
|
|
|
|
} else {
|
|
|
|
ntups++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pgchanged) {
|
|
|
|
PageRepairFragmentation(page);
|
|
|
|
WriteBuffer(buf);
|
|
|
|
} else {
|
|
|
|
ReleaseBuffer(buf);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (isarchived)
|
|
|
|
heap_close(archrel);
|
|
|
|
|
|
|
|
/* save stats in the rel list for use later */
|
|
|
|
curvrl->vrl_ntups = ntups;
|
|
|
|
curvrl->vrl_npages = nblocks;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _vc_vacindices() -- vacuum all the indices for a particular heap relation.
|
|
|
|
*
|
|
|
|
* On entry, curvrl points at the relation currently being vacuumed.
|
|
|
|
* We already have a write lock on the relation, so we don't need to
|
|
|
|
* worry about anyone building an index on it while we're doing the
|
|
|
|
* vacuuming. The tid list for curvrl is sorted in reverse tid order:
|
|
|
|
* that is, tids on higher page numbers are before those on lower page
|
|
|
|
* numbers, and tids high on the page are before those low on the page.
|
|
|
|
* We use this ordering to cut down the search cost when we look at an
|
|
|
|
* index entry.
|
|
|
|
*
|
|
|
|
* We're executing inside the transaction that vacuumed the heap.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
_vc_vacindices(VRelList curvrl, Relation onerel)
|
|
|
|
{
|
|
|
|
Relation pgindex;
|
|
|
|
TupleDesc pgidesc;
|
|
|
|
HeapTuple pgitup;
|
|
|
|
HeapScanDesc pgiscan;
|
|
|
|
Buffer buf;
|
|
|
|
Relation indrel;
|
|
|
|
Oid indoid;
|
|
|
|
Datum d;
|
|
|
|
bool n;
|
|
|
|
int nindices;
|
|
|
|
ScanKeyData pgikey;
|
|
|
|
|
|
|
|
/* see if we can dodge doing any work at all */
|
|
|
|
if (!(onerel->rd_rel->relhasindex))
|
|
|
|
return;
|
|
|
|
|
|
|
|
nindices = 0;
|
|
|
|
|
|
|
|
/* prepare a heap scan on the pg_index relation */
|
|
|
|
pgindex = heap_openr(IndexRelationName);
|
|
|
|
pgidesc = RelationGetTupleDescriptor(pgindex);
|
|
|
|
|
|
|
|
ScanKeyEntryInitialize(&pgikey, 0x0, Anum_pg_index_indrelid,
|
|
|
|
ObjectIdEqualRegProcedure,
|
|
|
|
ObjectIdGetDatum(curvrl->vrl_relid));
|
|
|
|
|
|
|
|
pgiscan = heap_beginscan(pgindex, false, NowTimeQual, 1, &pgikey);
|
|
|
|
|
|
|
|
/* vacuum all the indices */
|
|
|
|
while (HeapTupleIsValid(pgitup = heap_getnext(pgiscan, 0, &buf))) {
|
|
|
|
d = (Datum) heap_getattr(pgitup, buf, Anum_pg_index_indexrelid,
|
|
|
|
pgidesc, &n);
|
|
|
|
indoid = DatumGetObjectId(d);
|
|
|
|
indrel = index_open(indoid);
|
|
|
|
_vc_vaconeind(curvrl, indrel);
|
|
|
|
heap_close(indrel);
|
|
|
|
nindices++;
|
|
|
|
}
|
|
|
|
|
|
|
|
heap_endscan(pgiscan);
|
|
|
|
heap_close(pgindex);
|
|
|
|
|
|
|
|
if (nindices > 0)
|
|
|
|
curvrl->vrl_hasindex = true;
|
|
|
|
else
|
|
|
|
curvrl->vrl_hasindex = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _vc_vaconeind() -- vacuum one index relation.
|
|
|
|
*
|
|
|
|
* Curvrl is the VRelList entry for the heap we're currently vacuuming.
|
|
|
|
* It's locked. The vrl_tidlist entry in curvrl is the list of deleted
|
|
|
|
* heap tids, sorted in reverse (page, offset) order. Onerel is an
|
|
|
|
* index relation on the vacuumed heap. We don't set locks on the index
|
|
|
|
* relation here, since the indexed access methods support locking at
|
|
|
|
* different granularities. We let them handle it.
|
|
|
|
*
|
|
|
|
* Finally, we arrange to update the index relation's statistics in
|
|
|
|
* pg_class.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
_vc_vaconeind(VRelList curvrl, Relation indrel)
|
|
|
|
{
|
|
|
|
RetrieveIndexResult res;
|
|
|
|
IndexScanDesc iscan;
|
|
|
|
ItemPointer heapptr;
|
|
|
|
int nvac;
|
|
|
|
int nitups;
|
|
|
|
int nipages;
|
|
|
|
|
|
|
|
/* walk through the entire index */
|
|
|
|
iscan = index_beginscan(indrel, false, 0, (ScanKey) NULL);
|
|
|
|
nvac = 0;
|
|
|
|
nitups = 0;
|
|
|
|
|
|
|
|
while ((res = index_getnext(iscan, ForwardScanDirection))
|
|
|
|
!= (RetrieveIndexResult) NULL) {
|
|
|
|
heapptr = &res->heap_iptr;
|
|
|
|
|
|
|
|
if (_vc_ontidlist(heapptr, curvrl->vrl_tidlist)) {
|
|
|
|
#if 0
|
|
|
|
elog(DEBUG, "<%x,%x> -> <%x,%x>",
|
|
|
|
ItemPointerGetBlockNumber(&(res->index_iptr)),
|
|
|
|
ItemPointerGetOffsetNumber(&(res->index_iptr)),
|
|
|
|
ItemPointerGetBlockNumber(&(res->heap_iptr)),
|
|
|
|
ItemPointerGetOffsetNumber(&(res->heap_iptr)));
|
|
|
|
#endif
|
|
|
|
++nvac;
|
|
|
|
index_delete(indrel, &res->index_iptr);
|
|
|
|
} else {
|
|
|
|
nitups++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* be tidy */
|
|
|
|
pfree(res);
|
|
|
|
}
|
|
|
|
|
|
|
|
index_endscan(iscan);
|
|
|
|
|
|
|
|
/* now update statistics in pg_class */
|
|
|
|
nipages = RelationGetNumberOfBlocks(indrel);
|
|
|
|
_vc_updstats(indrel->rd_id, nipages, nitups, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _vc_updstats() -- update pg_class statistics for one relation
|
|
|
|
*
|
|
|
|
* This routine works for both index and heap relation entries in
|
|
|
|
* pg_class. We violate no-overwrite semantics here by storing new
|
|
|
|
* values for ntuples, npages, and hasindex directly in the pg_class
|
|
|
|
* tuple that's already on the page. The reason for this is that if
|
|
|
|
* we updated these tuples in the usual way, then every tuple in pg_class
|
|
|
|
* would be replaced every day. This would make planning and executing
|
|
|
|
* historical queries very expensive.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
_vc_updstats(Oid relid, int npages, int ntuples, bool hasindex)
|
|
|
|
{
|
|
|
|
Relation rd;
|
|
|
|
HeapScanDesc sdesc;
|
|
|
|
HeapTuple tup;
|
|
|
|
Buffer buf;
|
|
|
|
Form_pg_class pgcform;
|
|
|
|
ScanKeyData skey;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* update number of tuples and number of pages in pg_class
|
|
|
|
*/
|
|
|
|
ScanKeyEntryInitialize(&skey, 0x0, ObjectIdAttributeNumber,
|
|
|
|
ObjectIdEqualRegProcedure,
|
|
|
|
ObjectIdGetDatum(relid));
|
|
|
|
|
|
|
|
rd = heap_openr(RelationRelationName);
|
|
|
|
sdesc = heap_beginscan(rd, false, NowTimeQual, 1, &skey);
|
|
|
|
|
|
|
|
if (!HeapTupleIsValid(tup = heap_getnext(sdesc, 0, &buf)))
|
|
|
|
elog(WARN, "pg_class entry for relid %d vanished during vacuuming",
|
|
|
|
relid);
|
|
|
|
|
|
|
|
/* overwrite the existing statistics in the tuple */
|
|
|
|
_vc_setpagelock(rd, BufferGetBlockNumber(buf));
|
|
|
|
pgcform = (Form_pg_class) GETSTRUCT(tup);
|
|
|
|
pgcform->reltuples = ntuples;
|
|
|
|
pgcform->relpages = npages;
|
|
|
|
pgcform->relhasindex = hasindex;
|
|
|
|
|
|
|
|
/* XXX -- after write, should invalidate relcache in other backends */
|
|
|
|
WriteNoReleaseBuffer(buf);
|
|
|
|
|
|
|
|
/* that's all, folks */
|
|
|
|
heap_endscan(sdesc);
|
|
|
|
heap_close(rd);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
static void _vc_setpagelock(Relation rel, BlockNumber blkno)
|
|
|
|
{
|
|
|
|
ItemPointerData itm;
|
|
|
|
|
|
|
|
ItemPointerSet(&itm, blkno, 1);
|
|
|
|
|
|
|
|
RelationSetLockForWritePage(rel, &itm);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _vc_ontidlist() -- is a particular tid on the supplied tid list?
|
|
|
|
*
|
|
|
|
* Tidlist is sorted in reverse (page, offset) order.
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
_vc_ontidlist(ItemPointer itemptr, VTidList tidlist)
|
|
|
|
{
|
|
|
|
BlockNumber ibkno;
|
|
|
|
OffsetNumber ioffno;
|
|
|
|
ItemPointer check;
|
|
|
|
BlockNumber ckbkno;
|
|
|
|
OffsetNumber ckoffno;
|
|
|
|
|
|
|
|
ibkno = ItemPointerGetBlockNumber(itemptr);
|
|
|
|
ioffno = ItemPointerGetOffsetNumber(itemptr);
|
|
|
|
|
|
|
|
while (tidlist != (VTidList) NULL) {
|
|
|
|
check = &(tidlist->vtl_tid);
|
|
|
|
ckbkno = ItemPointerGetBlockNumber(check);
|
|
|
|
ckoffno = ItemPointerGetOffsetNumber(check);
|
|
|
|
|
|
|
|
/* see if we've looked far enough down the list */
|
|
|
|
if ((ckbkno < ibkno) || (ckbkno == ibkno && ckoffno < ioffno))
|
|
|
|
return (false);
|
|
|
|
|
|
|
|
/* see if we have a match */
|
|
|
|
if (ckbkno == ibkno && ckoffno == ioffno)
|
|
|
|
return (true);
|
|
|
|
|
|
|
|
/* check next */
|
|
|
|
tidlist = tidlist->vtl_next;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ran off the end of the list without finding a match */
|
|
|
|
return (false);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _vc_reaptid() -- save a tid on the list of reaped tids for the current
|
|
|
|
* entry on the vacuum relation list.
|
|
|
|
*
|
|
|
|
* As a side effect of the way that the vacuuming loop for a given
|
|
|
|
* relation works, the tids of vacuumed tuples wind up in reverse
|
|
|
|
* order in the list -- highest tid on a page is first, and higher
|
|
|
|
* pages come before lower pages. This is important later when we
|
|
|
|
* vacuum the indices, as it gives us a way of stopping the search
|
|
|
|
* for a tid if we notice we've passed the page it would be on.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
_vc_reaptid(Portal p,
|
|
|
|
VRelList curvrl,
|
|
|
|
BlockNumber blkno,
|
|
|
|
OffsetNumber offnum)
|
|
|
|
{
|
|
|
|
PortalVariableMemory pmem;
|
|
|
|
MemoryContext old;
|
|
|
|
VTidList newvtl;
|
|
|
|
|
|
|
|
/* allocate a VTidListData entry in the portal memory context */
|
|
|
|
pmem = PortalGetVariableMemory(p);
|
|
|
|
old = MemoryContextSwitchTo((MemoryContext) pmem);
|
|
|
|
newvtl = (VTidList) palloc(sizeof(VTidListData));
|
|
|
|
MemoryContextSwitchTo(old);
|
|
|
|
|
|
|
|
/* fill it in */
|
|
|
|
ItemPointerSet(&(newvtl->vtl_tid), blkno, offnum);
|
|
|
|
newvtl->vtl_next = curvrl->vrl_tidlist;
|
|
|
|
curvrl->vrl_tidlist = newvtl;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
_vc_free(Portal p, VRelList vrl)
|
|
|
|
{
|
|
|
|
VRelList p_vrl;
|
|
|
|
VAttList p_val, val;
|
|
|
|
VTidList p_vtl, vtl;
|
|
|
|
MemoryContext old;
|
|
|
|
PortalVariableMemory pmem;
|
|
|
|
|
|
|
|
pmem = PortalGetVariableMemory(p);
|
|
|
|
old = MemoryContextSwitchTo((MemoryContext)pmem);
|
|
|
|
|
|
|
|
while (vrl != (VRelList) NULL) {
|
|
|
|
|
|
|
|
/* free attribute list */
|
|
|
|
val = vrl->vrl_attlist;
|
|
|
|
while (val != (VAttList) NULL) {
|
|
|
|
p_val = val;
|
|
|
|
val = val->val_next;
|
|
|
|
pfree(p_val);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* free tid list */
|
|
|
|
vtl = vrl->vrl_tidlist;
|
|
|
|
while (vtl != (VTidList) NULL) {
|
|
|
|
p_vtl = vtl;
|
|
|
|
vtl = vtl->vtl_next;
|
|
|
|
pfree(p_vtl);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* free rel list entry */
|
|
|
|
p_vrl = vrl;
|
|
|
|
vrl = vrl->vrl_next;
|
|
|
|
pfree(p_vrl);
|
|
|
|
}
|
|
|
|
|
|
|
|
(void) MemoryContextSwitchTo(old);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _vc_getarchrel() -- open the archive relation for a heap relation
|
|
|
|
*
|
|
|
|
* The archive relation is named 'a,XXXXX' for the heap relation
|
|
|
|
* whose relid is XXXXX.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define ARCHIVE_PREFIX "a,"
|
|
|
|
|
|
|
|
static Relation
|
|
|
|
_vc_getarchrel(Relation heaprel)
|
|
|
|
{
|
|
|
|
Relation archrel;
|
|
|
|
char *archrelname;
|
|
|
|
|
|
|
|
archrelname = palloc(sizeof(ARCHIVE_PREFIX) + NAMEDATALEN); /* bogus */
|
|
|
|
sprintf(archrelname, "%s%d", ARCHIVE_PREFIX, heaprel->rd_id);
|
|
|
|
|
|
|
|
archrel = heap_openr(archrelname);
|
|
|
|
|
|
|
|
pfree(archrelname);
|
|
|
|
return (archrel);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* _vc_archive() -- write a tuple to an archive relation
|
|
|
|
*
|
|
|
|
* In the future, this will invoke the archived accessd method. For
|
|
|
|
* now, archive relations are on mag disk.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
_vc_archive(Relation archrel, HeapTuple htup)
|
|
|
|
{
|
|
|
|
doinsert(archrel, htup);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
_vc_isarchrel(char *rname)
|
|
|
|
{
|
|
|
|
if (strncmp(ARCHIVE_PREFIX, rname,strlen(ARCHIVE_PREFIX)) == 0)
|
|
|
|
return (true);
|
|
|
|
|
|
|
|
return (false);
|
|
|
|
}
|