From 1b67fe17b89380d2ffb74e850e7b38737bde6c76 Mon Sep 17 00:00:00 2001 From: "Vadim B. Mikheev" Date: Mon, 3 Jul 2000 02:54:21 +0000 Subject: [PATCH] heap' logging --- src/backend/access/heap/heapam.c | 110 ++++++++++++++++++++--------- src/backend/access/heap/hio.c | 103 +++++++++------------------ src/backend/storage/page/bufpage.c | 21 ++---- src/include/access/hio.h | 4 +- src/include/access/htup.h | 13 ++-- src/include/access/xlog.h | 7 ++ src/include/storage/bufpage.h | 6 +- 7 files changed, 132 insertions(+), 132 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index d671036f04..9f3a7ac714 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.74 2000/07/02 22:00:27 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.75 2000/07/03 02:54:15 vadim Exp $ * * * INTERFACE ROUTINES @@ -1271,10 +1271,9 @@ heap_get_latest_tid(Relation relation, Oid heap_insert(Relation relation, HeapTuple tup) { - /* ---------------- - * increment access statistics - * ---------------- - */ + Buffer buffer; + + /* increment access statistics */ tup->tableOid = relation->rd_id; IncrHeapAccessStat(local_insert); IncrHeapAccessStat(global_insert); @@ -1300,7 +1299,11 @@ heap_insert(Relation relation, HeapTuple tup) tup->t_data->t_infomask &= ~(HEAP_XACT_MASK); tup->t_data->t_infomask |= HEAP_XMAX_INVALID; - RelationPutHeapTupleAtEnd(relation, tup); + /* Find buffer for this tuple */ + buffer = RelationGetBufferForTuple(relation, tup->t_len, InvalidBuffer); + + /* NO ELOG(ERROR) from here till changes are logged */ + RelationPutHeapTuple(relation, buffer, tup); #ifdef XLOG /* XLOG stuff */ @@ -1308,7 +1311,8 @@ heap_insert(Relation relation, HeapTuple tup) xl_heap_insert xlrec; xlrec.itid.dbId = relation->rd_lockInfo.lockRelId.dbId; xlrec.itid.relId = relation->rd_lockInfo.lockRelId.relId; -XXX xlrec.itid.tid = tp.t_self; + xlrec.itid.cid = GetCurrentCommandId(); + xlrec.itid.tid = tup->t_self; xlrec.t_natts = tup->t_data->t_natts; xlrec.t_oid = tup->t_data->t_oid; xlrec.t_hoff = tup->t_data->t_hoff; @@ -1319,10 +1323,14 @@ XXX xlrec.itid.tid = tp.t_self; (char*) tup->t_data + offsetof(HeapTupleHeaderData, tbits), tup->t_len - offsetof(HeapTupleHeaderData, tbits)); - dp->pd_lsn = recptr; + ((PageHeader) BufferGetPage(buffer))->pd_lsn = recptr; + ((PageHeader) BufferGetPage(buffer))->pd_sui = ThisStartUpID; } #endif + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + WriteBuffer(buffer); + if (IsSystemRelationName(RelationGetRelationName(relation))) RelationMark4RollbackHeapTuple(relation, tup); @@ -1417,11 +1425,13 @@ l1: xl_heap_delete xlrec; xlrec.dtid.dbId = relation->rd_lockInfo.lockRelId.dbId; xlrec.dtid.relId = relation->rd_lockInfo.lockRelId.relId; + xlrec.dtid.cid = GetCurrentCommandId(); xlrec.dtid.tid = tp.t_self; XLogRecPtr recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE, (char*) xlrec, sizeof(xlrec), NULL, 0); dp->pd_lsn = recptr; + dp->pd_sui = ThisStartUpID; } #endif @@ -1451,7 +1461,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, ItemId lp; HeapTupleData oldtup; PageHeader dp; - Buffer buffer; + Buffer buffer, newbuf; int result; newtup->tableOid = relation->rd_id; @@ -1531,43 +1541,65 @@ l2: newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK); newtup->t_data->t_infomask |= (HEAP_XMAX_INVALID | HEAP_UPDATED); - /* logically delete old item */ + /* Find buffer for new tuple */ + + if ((unsigned) MAXALIGN(newtup->t_len) <= PageGetFreeSpace((Page) dp)) + newbuf = buffer; + else + newbuf = RelationGetBufferForTuple(relation, newtup->t_len, buffer); + + /* NO ELOG(ERROR) from here till changes are logged */ + + /* insert new tuple */ + RelationPutHeapTuple(relation, newbuf, newtup); + + /* logically delete old tuple */ TransactionIdStore(GetCurrentTransactionId(), &(oldtup.t_data->t_xmax)); oldtup.t_data->t_cmax = GetCurrentCommandId(); oldtup.t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED | HEAP_XMAX_INVALID | HEAP_MARKED_FOR_UPDATE); - /* insert new item */ - if ((unsigned) MAXALIGN(newtup->t_len) <= PageGetFreeSpace((Page) dp)) - RelationPutHeapTuple(relation, buffer, newtup); - else - { - - /* - * New item won't fit on same page as old item, have to look for a - * new place to put it. Note that we have to unlock current buffer - * context - not good but RelationPutHeapTupleAtEnd uses extend - * lock. - */ - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - RelationPutHeapTupleAtEnd(relation, newtup); - LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); - } - /* mark for rollback caches */ - RelationMark4RollbackHeapTuple(relation, newtup); - - /* - * New item in place, now record address of new tuple in t_ctid of old - * one. - */ + /* record address of new tuple in t_ctid of old one */ oldtup.t_data->t_ctid = newtup->t_self; +#ifdef XLOG + /* XLOG stuff */ + { + xl_heap_update xlrec; + xlrec.dtid.dbId = relation->rd_lockInfo.lockRelId.dbId; + xlrec.dtid.relId = relation->rd_lockInfo.lockRelId.relId; + xlrec.dtid.cid = GetCurrentCommandId(); + xlrec.itid.tid = newtup->t_self; + xlrec.t_natts = newtup->t_data->t_natts; + xlrec.t_hoff = newtup->t_data->t_hoff; + xlrec.mask = newtup->t_data->t_infomask; + + XLogRecPtr recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_UPDATE, + (char*) xlrec, sizeof(xlrec), + (char*) newtup->t_data + offsetof(HeapTupleHeaderData, tbits), + newtup->t_len - offsetof(HeapTupleHeaderData, tbits)); + + if (newbuf != buffer) + { + ((PageHeader) BufferGetPage(newbuf))->pd_lsn = recptr; + ((PageHeader) BufferGetPage(newbuf))->pd_sui = ThisStartUpID; + } + ((PageHeader) BufferGetPage(buffer))->pd_lsn = recptr; + ((PageHeader) BufferGetPage(buffer))->pd_sui = ThisStartUpID; + } +#endif + + if (newbuf != buffer) + { + LockBuffer(newbuf, BUFFER_LOCK_UNLOCK); + WriteBuffer(newbuf); + } LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + WriteBuffer(buffer); /* invalidate caches */ RelationInvalidateHeapTuple(relation, &oldtup); - - WriteBuffer(buffer); + RelationMark4RollbackHeapTuple(relation, newtup); return HeapTupleMayBeUpdated; } @@ -1648,6 +1680,14 @@ l3: return result; } +#ifdef XLOG + /* + * XLOG stuff: no logging is required as long as we have no + * savepoints. For savepoints private log could be used... + */ + ((PageHeader) BufferGetPage(*buffer))->pd_sui = ThisStartUpID; +#endif + /* store transaction information of xact marking the tuple */ TransactionIdStore(GetCurrentTransactionId(), &(tuple->t_data->t_xmax)); tuple->t_data->t_cmax = GetCurrentCommandId(); diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index 3fc2a69df1..9181a7984d 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Id: hio.c,v 1.31 2000/04/12 17:14:45 momjian Exp $ + * $Id: hio.c,v 1.32 2000/07/03 02:54:15 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -19,17 +19,11 @@ #include "access/hio.h" /* - * amputunique - place tuple at tid - * Currently on errors, calls elog. Perhaps should return -1? - * Possible errors include the addition of a tuple to the page - * between the time the linep is chosen and the page is L_UP'd. + * RelationPutHeapTuple - place tuple at specified page * - * This should be coordinated with the B-tree code. - * Probably needs to have an amdelunique to allow for - * internal index records to be deleted and reordered as needed. - * For the heap AM, this should never be needed. + * !!! ELOG(ERROR) IS DISALLOWED HERE !!! * - * Note - we assume that caller hold BUFFER_LOCK_EXCLUSIVE on the buffer. + * Note - we assume that caller hold BUFFER_LOCK_EXCLUSIVE on the buffer. * */ void @@ -57,62 +51,41 @@ RelationPutHeapTuple(Relation relation, offnum = PageAddItem((Page) pageHeader, (Item) tuple->t_data, tuple->t_len, InvalidOffsetNumber, LP_USED); + if (offnum == InvalidOffsetNumber) + elog(STOP, "RelationPutHeapTuple: failed to add tuple"); + itemId = PageGetItemId((Page) pageHeader, offnum); item = PageGetItem((Page) pageHeader, itemId); ItemPointerSet(&((HeapTupleHeader) item)->t_ctid, BufferGetBlockNumber(buffer), offnum); - /* - * Let the caller do this! - * - * WriteBuffer(buffer); - */ - /* return an accurate tuple */ ItemPointerSet(&tuple->t_self, BufferGetBlockNumber(buffer), offnum); } /* - * This routine is another in the series of attempts to reduce the number - * of I/O's and system calls executed in the various benchmarks. In - * particular, this routine is used to append data to the end of a relation - * file without excessive lseeks. This code should do no more than 2 semops - * in the ideal case. + * RelationGetBufferForTuple * - * Eventually, we should cache the number of blocks in a relation somewhere. - * Until that time, this code will have to do an lseek to determine the number - * of blocks in a relation. + * Returns (locked) buffer to add tuple with given len. + * If Ubuf is valid then no attempt to lock it should be made - + * this is for heap_update... * - * This code should ideally do at most 4 semops, 1 lseek, and possibly 1 write - * to do an append; it's possible to eliminate 2 of the semops if we do direct - * buffer stuff (!); the lseek and the write can go if we get - * RelationGetNumberOfBlocks to be useful. + * ELOG(ERROR) is allowed here, so this routine *must* be called + * before any (unlogged) changes are made in buffer pool. * - * NOTE: This code presumes that we have a write lock on the relation. - * Not now - we use extend locking... - * - * Also note that this routine probably shouldn't have to exist, and does - * screw up the call graph rather badly, but we are wasting so much time and - * system resources being massively general that we are losing badly in our - * performance benchmarks. */ -void -RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple) +Buffer +RelationGetBufferForTuple(Relation relation, Size len, Buffer Ubuf) { Buffer buffer; Page pageHeader; BlockNumber lastblock; - OffsetNumber offnum; - Size len; - ItemId itemId; - Item item; - len = MAXALIGN(tuple->t_len); /* be conservative */ + len = MAXALIGN(len); /* be conservative */ /* - * If we're gonna fail for oversize tuple, do it right away... this - * code should go away eventually. + * If we're gonna fail for oversize tuple, do it right away */ if (len > MaxTupleSize) elog(ERROR, "Tuple is too big: size %u, max size %ld", @@ -152,7 +125,8 @@ RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple) else buffer = ReadBuffer(relation, lastblock - 1); - LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + if (buffer != Ubuf) + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); pageHeader = (Page) BufferGetPage(buffer); /* @@ -160,7 +134,8 @@ RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple) */ if (len > PageGetFreeSpace(pageHeader)) { - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + if (buffer != Ubuf) + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); buffer = ReleaseAndReadBuffer(buffer, relation, P_NEW); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); pageHeader = (Page) BufferGetPage(buffer); @@ -168,36 +143,22 @@ RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple) if (len > PageGetFreeSpace(pageHeader)) { - - /* - * BUG: by elog'ing here, we leave the new buffer locked and - * not marked dirty, which may result in an invalid page - * header being left on disk. But we should not get here - * given the test at the top of the routine, and the whole - * deal should go away when we implement tuple splitting - * anyway... - */ - elog(ERROR, "Tuple is too big: size %u", len); + /* We should not get here given the test at the top */ + elog(STOP, "Tuple is too big: size %u", len); } } + /* + * Caller should check space in Ubuf but... + */ + else if (buffer == Ubuf) + { + ReleaseBuffer(buffer); + buffer = Ubuf; + } if (!relation->rd_myxactonly) UnlockPage(relation, 0, ExclusiveLock); - offnum = PageAddItem((Page) pageHeader, (Item) tuple->t_data, - tuple->t_len, InvalidOffsetNumber, LP_USED); - - itemId = PageGetItemId((Page) pageHeader, offnum); - item = PageGetItem((Page) pageHeader, itemId); - - lastblock = BufferGetBlockNumber(buffer); - - ItemPointerSet(&((HeapTupleHeader) item)->t_ctid, lastblock, offnum); - - /* return an accurate tuple self-pointer */ - ItemPointerSet(&tuple->t_self, lastblock, offnum); - - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - WriteBuffer(buffer); + return(buffer); } diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index f25faf180c..43cabceba1 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/page/bufpage.c,v 1.29 2000/04/12 17:15:40 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/page/bufpage.c,v 1.30 2000/07/03 02:54:16 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -50,26 +50,13 @@ PageInit(Page page, Size pageSize, Size specialSize) PageSetPageSize(page, pageSize); } -/* - * PageAddItem - * Adds item to the given page. - * - * Note: - * This does not assume that the item resides on a single page. - * It is the responsiblity of the caller to act appropriately - * depending on this fact. The "pskip" routines provide a - * friendlier interface, in this case. - * - * This does change the status of any of the resources passed. - * The semantics may change in the future. - * - * This routine should probably be combined with others? - */ /* ---------------- * PageAddItem * * add an item to a page. * + * !!! ELOG(ERROR) IS DISALLOWED HERE !!! + * * Notes on interface: * If offsetNumber is valid, shuffle ItemId's down to make room * to use it, if PageManagerShuffle is true. If PageManagerShuffle is @@ -126,7 +113,7 @@ PageAddItem(Page page, if (((*itemId).lp_flags & LP_USED) || ((*itemId).lp_len != 0)) { - elog(ERROR, "PageAddItem: tried overwrite of used ItemId"); + elog(NOTICE, "PageAddItem: tried overwrite of used ItemId"); return InvalidOffsetNumber; } } diff --git a/src/include/access/hio.h b/src/include/access/hio.h index 999d2412e7..c0636a4ff3 100644 --- a/src/include/access/hio.h +++ b/src/include/access/hio.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: hio.h,v 1.14 2000/01/26 05:57:50 momjian Exp $ + * $Id: hio.h,v 1.15 2000/07/03 02:54:17 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -18,6 +18,6 @@ extern void RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple); -extern void RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple); +extern Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer Ubuf); #endif /* HIO_H */ diff --git a/src/include/access/htup.h b/src/include/access/htup.h index 3370960e2f..4665fe60bb 100644 --- a/src/include/access/htup.h +++ b/src/include/access/htup.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: htup.h,v 1.31 2000/07/02 22:01:00 momjian Exp $ + * $Id: htup.h,v 1.32 2000/07/03 02:54:17 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -69,22 +69,25 @@ typedef HeapTupleHeaderData *HeapTupleHeader; #define XLOG_HEAP_MOVE 0x30 /* - * All what we need to find changed tuple (14 bytes) + * All what we need to find changed tuple (18 bytes) */ typedef struct xl_heaptid { Oid dbId; /* database */ Oid relId; /* relation */ + CommandId cid; /* this is for "better" tuple' */ + /* identification - it allows to avoid */ + /* "compensation" records for undo */ ItemPointerData tid; /* changed tuple id */ } xl_heaptid; -/* This is what we need to know about delete - ALIGN(14) = 16 bytes */ +/* This is what we need to know about delete - ALIGN(18) = 24 bytes */ typedef struct xl_heap_delete { xl_heaptid dtid; /* deleted tuple id */ } xl_heap_delete; -/* This is what we need to know about insert - 22 + data */ +/* This is what we need to know about insert - 26 + data */ typedef struct xl_heap_insert { xl_heaptid itid; /* inserted tuple id */ @@ -108,7 +111,7 @@ typedef struct xl_heap_update /* NEW TUPLE DATA FOLLOWS AT END OF STRUCT */ } xl_heap_update; -/* This is what we need to know about tuple move - ALIGN(20) = 24 bytes */ +/* This is what we need to know about tuple move - 24 bytes */ typedef struct xl_heap_move { xl_heaptid ftid; /* moved from */ diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index b5fda0b58a..b86339f072 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -68,6 +68,13 @@ typedef XLogPageHeaderData *XLogPageHeader; #define XLP_FIRST_IS_SUBRECORD 0x0001 +/* + * StartUpID (SUI) - system startups counter. + * It's to allow removing pg_log after shutdown. + */ +typedef uint32 StartUpID; +extern StartUpID ThisStartUpID; + extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, char *hdr, uint32 hdrlen, char *buf, uint32 buflen); diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index 15d1106f26..30b5a93ad6 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: bufpage.h,v 1.29 2000/06/02 10:20:27 vadim Exp $ + * $Id: bufpage.h,v 1.30 2000/07/03 02:54:21 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -118,8 +118,10 @@ typedef OpaqueData *Opaque; typedef struct PageHeaderData { #ifdef XLOG - XLogRecPtr pd_lsn; /* XLOG: next byte after last byte of xlog */ + XLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog */ /* record for last change of this page */ + StartUpID pd_sui; /* SUI of last changes (currently it's */ + /* used by heap AM only) */ #endif LocationIndex pd_lower; /* offset to start of free space */ LocationIndex pd_upper; /* offset to end of free space */