2000-04-12 19:17:23 +02:00
/*-------------------------------------------------------------------------
1999-10-24 22:42:27 +02:00
*
* xlog . c
*
*
2000-01-26 06:58:53 +01:00
* Portions Copyright ( c ) 1996 - 2000 , PostgreSQL , Inc
* Portions Copyright ( c ) 1994 , Regents of the University of California
1999-10-24 22:42:27 +02:00
*
2000-12-30 07:52:34 +01:00
* $ Header : / cvsroot / pgsql / src / backend / access / transam / xlog . c , v 1.47 2000 / 12 / 30 06 : 52 : 34 vadim Exp $
1999-10-24 22:42:27 +02:00
*
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
2000-03-20 08:25:39 +01:00
2000-11-21 22:16:06 +01:00
# include "postgres.h"
1999-09-27 17:48:12 +02:00
# include <fcntl.h>
# include <unistd.h>
# include <errno.h>
# include <sys/stat.h>
1999-09-28 13:41:09 +02:00
# include <sys/time.h>
2000-11-05 23:50:21 +01:00
# include <sys/types.h>
# include <dirent.h>
2000-11-25 21:33:54 +01:00
# ifdef USE_LOCALE
# include <locale.h>
# endif
1999-09-27 17:48:12 +02:00
2000-11-21 22:16:06 +01:00
# include "access/transam.h"
1999-09-27 17:48:12 +02:00
# include "access/xact.h"
1999-10-24 22:42:27 +02:00
# include "catalog/catversion.h"
1999-09-27 17:48:12 +02:00
# include "storage/sinval.h"
# include "storage/proc.h"
# include "storage/spin.h"
# include "storage/s_lock.h"
2000-12-28 14:00:29 +01:00
# include "storage/bufpage.h"
2000-10-24 11:56:23 +02:00
# include "access/xlog.h"
# include "access/xlogutils.h"
2000-11-25 21:33:54 +01:00
# include "utils/builtins.h"
2000-11-21 22:16:06 +01:00
# include "utils/relcache.h"
1999-09-27 17:48:12 +02:00
2000-10-21 17:43:36 +02:00
# include "miscadmin.h"
2000-11-20 06:18:40 +01:00
int XLOGbuffers = 8 ;
1999-09-27 17:48:12 +02:00
XLogRecPtr MyLastRecPtr = { 0 , 0 } ;
2000-12-18 01:44:50 +01:00
uint32 CritSectionCount = 0 ;
2000-10-20 13:01:21 +02:00
bool InRecovery = false ;
2000-10-21 17:43:36 +02:00
StartUpID ThisStartUpID = 0 ;
2000-12-28 14:00:29 +01:00
XLogRecPtr RedoRecPtr ;
2000-10-21 17:43:36 +02:00
2000-11-09 12:26:00 +01:00
int XLOG_DEBUG = 0 ;
1999-09-27 17:48:12 +02:00
2000-11-05 23:50:21 +01:00
/* To read/update control file and create new log file */
1999-09-27 17:48:12 +02:00
SPINLOCK ControlFileLockId ;
2000-11-05 23:50:21 +01:00
/* To generate new xid */
1999-09-27 17:48:12 +02:00
SPINLOCK XidGenLockId ;
2000-11-25 21:33:54 +01:00
static char XLogDir [ MAXPGPATH ] ;
static char ControlFilePath [ MAXPGPATH ] ;
2000-04-12 19:17:23 +02:00
# define MinXLOGbuffers 4
1999-09-27 17:48:12 +02:00
typedef struct XLgwrRqst
{
2000-04-12 19:17:23 +02:00
XLogRecPtr Write ; /* byte (1-based) to write out */
XLogRecPtr Flush ; /* byte (1-based) to flush */
1999-09-27 17:48:12 +02:00
} XLgwrRqst ;
typedef struct XLgwrResult
{
2000-04-12 19:17:23 +02:00
XLogRecPtr Write ; /* bytes written out */
XLogRecPtr Flush ; /* bytes flushed */
1999-09-27 17:48:12 +02:00
} XLgwrResult ;
typedef struct XLogCtlInsert
{
2000-12-28 14:00:29 +01:00
XLgwrResult LgwrResult ;
XLogRecPtr PrevRecord ;
uint16 curridx ; /* current block index in cache */
XLogPageHeader currpage ;
char * currpos ;
XLogRecPtr RedoRecPtr ;
1999-09-27 17:48:12 +02:00
} XLogCtlInsert ;
typedef struct XLogCtlWrite
{
2000-04-12 19:17:23 +02:00
XLgwrResult LgwrResult ;
uint16 curridx ; /* index of next block to write */
1999-09-27 17:48:12 +02:00
} XLogCtlWrite ;
2000-03-20 08:25:39 +01:00
1999-09-27 17:48:12 +02:00
typedef struct XLogCtlData
{
2000-11-05 23:50:21 +01:00
XLogCtlInsert Insert ;
XLgwrRqst LgwrRqst ;
XLgwrResult LgwrResult ;
XLogCtlWrite Write ;
char * pages ;
XLogRecPtr * xlblocks ; /* 1st byte ptr-s + BLCKSZ */
uint32 XLogCacheByte ;
uint32 XLogCacheBlck ;
StartUpID ThisStartUpID ;
2000-12-28 14:00:29 +01:00
XLogRecPtr RedoRecPtr ; /* for postmaster */
2000-11-05 23:50:21 +01:00
slock_t insert_lck ;
slock_t info_lck ;
slock_t lgwr_lck ;
slock_t chkp_lck ; /* checkpoint lock */
1999-09-27 17:48:12 +02:00
} XLogCtlData ;
2000-04-12 19:17:23 +02:00
static XLogCtlData * XLogCtl = NULL ;
1999-09-27 17:48:12 +02:00
2000-11-25 21:33:54 +01:00
/*
* Contents of pg_control
*/
1999-09-27 17:48:12 +02:00
typedef enum DBState
{
1999-10-06 23:58:18 +02:00
DB_STARTUP = 0 ,
DB_SHUTDOWNED ,
1999-09-27 17:48:12 +02:00
DB_SHUTDOWNING ,
DB_IN_RECOVERY ,
DB_IN_PRODUCTION
} DBState ;
2000-11-25 21:33:54 +01:00
# define LOCALE_NAME_BUFLEN 128
1999-09-27 17:48:12 +02:00
typedef struct ControlFileData
{
2000-12-28 14:00:29 +01:00
crc64 crc ;
2000-04-12 19:17:23 +02:00
uint32 logId ; /* current log file id */
uint32 logSeg ; /* current log file segment (1-based) */
XLogRecPtr checkPoint ; /* last check point record ptr */
time_t time ; /* time stamp of last modification */
2000-11-25 21:33:54 +01:00
DBState state ; /* see enum above */
1999-10-16 11:32:23 +02:00
/*
2000-04-12 19:17:23 +02:00
* this data is used to make sure that configuration of this DB is
2000-11-25 21:33:54 +01:00
* compatible with the backend executable
1999-10-16 11:32:23 +02:00
*/
2000-04-12 19:17:23 +02:00
uint32 blcksz ; /* block size for this DB */
uint32 relseg_size ; /* blocks per segment of large relation */
uint32 catalog_version_no ; /* internal version number */
2000-11-25 21:33:54 +01:00
/* active locales --- "C" if compiled without USE_LOCALE: */
char lc_collate [ LOCALE_NAME_BUFLEN ] ;
char lc_ctype [ LOCALE_NAME_BUFLEN ] ;
1999-10-24 22:42:27 +02:00
/*
2000-11-25 21:33:54 +01:00
* important directory locations
1999-09-27 17:48:12 +02:00
*/
2000-11-25 21:33:54 +01:00
char archdir [ MAXPGPATH ] ; /* where to move offline log files */
1999-09-27 17:48:12 +02:00
} ControlFileData ;
2000-04-12 19:17:23 +02:00
static ControlFileData * ControlFile = NULL ;
1999-09-27 17:48:12 +02:00
typedef struct CheckPoint
{
2000-10-21 17:43:36 +02:00
XLogRecPtr redo ; /* next RecPtr available when we */
/* began to create CheckPoint */
/* (i.e. REDO start point) */
XLogRecPtr undo ; /* first record of oldest in-progress */
/* transaction when we started */
/* (i.e. UNDO end point) */
StartUpID ThisStartUpID ;
TransactionId nextXid ;
Oid nextOid ;
bool Shutdown ;
1999-09-27 17:48:12 +02:00
} CheckPoint ;
2000-10-21 17:43:36 +02:00
# define XLOG_CHECKPOINT 0x00
2000-11-03 12:39:36 +01:00
# define XLOG_NEXTOID 0x10
2000-10-21 17:43:36 +02:00
2000-12-28 14:00:29 +01:00
typedef struct BkpBlock
{
crc64 crc ;
RelFileNode node ;
BlockNumber block ;
} BkpBlock ;
2000-04-12 19:17:23 +02:00
/*
* We break each log file in 16 Mb segments
1999-09-27 17:48:12 +02:00
*/
1999-10-06 23:58:18 +02:00
# define XLogSegSize (16*1024*1024)
2000-04-12 19:17:23 +02:00
# define XLogLastSeg (0xffffffff / XLogSegSize)
# define XLogFileSize (XLogLastSeg * XLogSegSize)
1999-09-27 17:48:12 +02:00
2000-04-12 19:17:23 +02:00
# define XLogFileName(path, log, seg) \
1999-10-25 05:08:03 +02:00
snprintf ( path , MAXPGPATH , " %s%c%08X%08X " , \
XLogDir , SEP_CHAR , log , seg )
1999-09-27 17:48:12 +02:00
2000-11-05 23:50:21 +01:00
# define XLogTempFileName(path, log, seg) \
snprintf ( path , MAXPGPATH , " %s%cT%08X%08X " , \
XLogDir , SEP_CHAR , log , seg )
2000-04-12 19:17:23 +02:00
# define PrevBufIdx(curridx) \
1999-09-27 17:48:12 +02:00
( ( curridx = = 0 ) ? XLogCtl - > XLogCacheBlck : ( curridx - 1 ) )
2000-04-12 19:17:23 +02:00
# define NextBufIdx(curridx) \
1999-09-27 17:48:12 +02:00
( ( curridx = = XLogCtl - > XLogCacheBlck ) ? 0 : ( curridx + 1 ) )
2000-04-12 19:17:23 +02:00
# define InitXLBuffer(curridx) (\
1999-09-27 17:48:12 +02:00
XLogCtl - > xlblocks [ curridx ] . xrecoff = \
( XLogCtl - > xlblocks [ Insert - > curridx ] . xrecoff = = XLogFileSize ) ? \
BLCKSZ : ( XLogCtl - > xlblocks [ Insert - > curridx ] . xrecoff + BLCKSZ ) , \
XLogCtl - > xlblocks [ curridx ] . xlogid = \
( XLogCtl - > xlblocks [ Insert - > curridx ] . xrecoff = = XLogFileSize ) ? \
( XLogCtl - > xlblocks [ Insert - > curridx ] . xlogid + 1 ) : \
XLogCtl - > xlblocks [ Insert - > curridx ] . xlogid , \
Insert - > curridx = curridx , \
Insert - > currpage = ( XLogPageHeader ) ( XLogCtl - > pages + curridx * BLCKSZ ) , \
Insert - > currpos = \
( ( char * ) Insert - > currpage ) + SizeOfXLogPHD , \
Insert - > currpage - > xlp_magic = XLOG_PAGE_MAGIC , \
Insert - > currpage - > xlp_info = 0 \
)
2000-04-12 19:17:23 +02:00
# define XRecOffIsValid(xrecoff) \
1999-09-27 17:48:12 +02:00
( xrecoff % BLCKSZ > = SizeOfXLogPHD & & \
( BLCKSZ - xrecoff % BLCKSZ ) > = SizeOfXLogRecord )
2000-12-28 14:00:29 +01:00
# define _INTL_MAXLOGRECSZ (3 * MAXLOGRECSZ)
extern uint32 crc_table [ ] ;
# define INIT_CRC64(crc) (crc.crc1 = 0xffffffff, crc.crc2 = 0xffffffff)
# define FIN_CRC64(crc) (crc.crc1 ^= 0xffffffff, crc.crc2 ^= 0xffffffff)
# define COMP_CRC64(crc, data, len) \
{ \
uint32 __c1 = crc . crc1 ; \
uint32 __c2 = crc . crc2 ; \
char * __data = data ; \
uint32 __len = len ; \
\
while ( __len > = 2 ) \
{ \
__c1 = crc_table [ ( __c1 ^ * __data + + ) & 0xff ] ^ ( __c1 > > 8 ) ; \
__c2 = crc_table [ ( __c2 ^ * __data + + ) & 0xff ] ^ ( __c2 > > 8 ) ; \
__len - = 2 ; \
} \
if ( __len > 0 ) \
__c1 = crc_table [ ( __c1 ^ * __data + + ) & 0xff ] ^ ( __c1 > > 8 ) ; \
crc . crc1 = __c1 ; \
crc . crc2 = __c2 ; \
}
void SetRedoRecPtr ( void ) ;
void GetRedoRecPtr ( void ) ;
2000-04-12 19:17:23 +02:00
static void GetFreeXLBuffer ( void ) ;
static void XLogWrite ( char * buffer ) ;
2000-11-05 23:50:21 +01:00
static int XLogFileInit ( uint32 log , uint32 seg , bool * usexistent ) ;
2000-04-12 19:17:23 +02:00
static int XLogFileOpen ( uint32 log , uint32 seg , bool econt ) ;
static XLogRecord * ReadRecord ( XLogRecPtr * RecPtr , char * buffer ) ;
2000-11-25 21:33:54 +01:00
static void WriteControlFile ( void ) ;
static void ReadControlFile ( void ) ;
2000-04-12 19:17:23 +02:00
static char * str_time ( time_t tnow ) ;
2000-10-21 17:43:36 +02:00
static void xlog_outrec ( char * buf , XLogRecord * record ) ;
2000-04-12 19:17:23 +02:00
static XLgwrResult LgwrResult = { { 0 , 0 } , { 0 , 0 } } ;
static XLgwrRqst LgwrRqst = { { 0 , 0 } , { 0 , 0 } } ;
static int logFile = - 1 ;
static uint32 logId = 0 ;
static uint32 logSeg = 0 ;
static uint32 logOff = 0 ;
static XLogRecPtr ReadRecPtr ;
static XLogRecPtr EndRecPtr ;
static int readFile = - 1 ;
static uint32 readId = 0 ;
static uint32 readSeg = 0 ;
static uint32 readOff = 0 ;
static char readBuf [ BLCKSZ ] ;
static XLogRecord * nextRecord = NULL ;
1999-09-27 17:48:12 +02:00
2000-10-28 18:21:00 +02:00
static bool InRedo = false ;
1999-09-27 17:48:12 +02:00
XLogRecPtr
2000-12-28 14:00:29 +01:00
XLogInsert ( RmgrId rmid , uint8 info , XLogRecData * rdata )
1999-09-27 17:48:12 +02:00
{
2000-11-30 02:47:33 +01:00
XLogCtlInsert * Insert = & XLogCtl - > Insert ;
XLogRecord * record ;
XLogSubRecord * subrecord ;
XLogRecPtr RecPtr ;
2000-12-28 14:00:29 +01:00
uint32 freespace ;
2000-11-30 02:47:33 +01:00
uint16 curridx ;
2000-12-28 14:00:29 +01:00
XLogRecData * rdt ;
Buffer dtbuf [ 2 ] = { InvalidBuffer , InvalidBuffer } ;
bool dtbuf_bkp [ 2 ] = { false , false } ;
XLogRecData dtbuf_rdt [ 4 ] ;
BkpBlock dtbuf_xlg [ 2 ] ;
XLogRecPtr dtbuf_lsn [ 2 ] ;
crc64 dtbuf_crc [ 2 ] ,
rdata_crc ;
uint32 len ;
unsigned i ;
2000-11-30 02:47:33 +01:00
bool updrqst = false ;
2000-12-28 14:00:29 +01:00
bool repeat = false ;
2000-11-30 02:47:33 +01:00
bool no_tran = ( rmid = = RM_XLOG_ID ) ? true : false ;
if ( info & XLR_INFO_MASK )
{
if ( ( info & XLR_INFO_MASK ) ! = XLOG_NO_TRAN )
elog ( STOP , " XLogInsert: invalid info mask %02X " ,
( info & XLR_INFO_MASK ) ) ;
no_tran = true ;
info & = ~ XLR_INFO_MASK ;
}
2000-11-21 10:39:57 +01:00
if ( IsBootstrapProcessingMode ( ) & & rmid ! = RM_XLOG_ID )
2000-10-21 17:43:36 +02:00
{
RecPtr . xlogid = 0 ;
RecPtr . xrecoff = SizeOfXLogPHD ; /* start of 1st checkpoint record */
return ( RecPtr ) ;
}
2000-12-28 14:00:29 +01:00
begin : ;
INIT_CRC64 ( rdata_crc ) ;
for ( len = 0 , rdt = rdata ; ; )
{
if ( rdt - > buffer = = InvalidBuffer )
{
len + = rdt - > len ;
COMP_CRC64 ( rdata_crc , rdt - > data , rdt - > len ) ;
if ( rdt - > next = = NULL )
break ;
rdt = rdt - > next ;
continue ;
}
for ( i = 0 ; i < 2 ; i + + )
{
if ( rdt - > buffer = = dtbuf [ i ] )
{
if ( dtbuf_bkp [ i ] )
rdt - > data = NULL ;
else if ( rdt - > data )
{
len + = rdt - > len ;
COMP_CRC64 ( rdata_crc , rdt - > data , rdt - > len ) ;
}
break ;
}
if ( dtbuf [ i ] = = InvalidBuffer )
{
dtbuf [ i ] = rdt - > buffer ;
dtbuf_lsn [ i ] = * ( ( XLogRecPtr * ) ( BufferGetBlock ( rdt - > buffer ) ) ) ;
if ( XLByteLE ( dtbuf_lsn [ i ] , RedoRecPtr ) )
{
crc64 crc ;
dtbuf_bkp [ i ] = true ;
rdt - > data = NULL ;
INIT_CRC64 ( crc ) ;
COMP_CRC64 ( crc , ( ( char * ) BufferGetBlock ( dtbuf [ i ] ) ) , BLCKSZ ) ;
dtbuf_crc [ i ] = crc ;
}
else if ( rdt - > data )
{
len + = rdt - > len ;
COMP_CRC64 ( rdata_crc , rdt - > data , rdt - > len ) ;
}
break ;
}
}
if ( i > = 2 )
elog ( STOP , " XLogInsert: can backup 2 blocks at most " ) ;
if ( rdt - > next = = NULL )
break ;
rdt = rdt - > next ;
}
if ( len = = 0 | | len > MAXLOGRECSZ )
elog ( STOP , " XLogInsert: invalid record len %u " , len ) ;
2000-12-03 11:27:29 +01:00
START_CRIT_CODE ;
1999-09-27 17:48:12 +02:00
/* obtain xlog insert lock */
if ( TAS ( & ( XLogCtl - > insert_lck ) ) ) /* busy */
{
bool do_lgwr = true ;
2000-12-28 14:00:29 +01:00
for ( i = 0 ; ; )
1999-09-27 17:48:12 +02:00
{
/* try to read LgwrResult while waiting for insert lock */
if ( ! TAS ( & ( XLogCtl - > info_lck ) ) )
{
LgwrRqst = XLogCtl - > LgwrRqst ;
LgwrResult = XLogCtl - > LgwrResult ;
S_UNLOCK ( & ( XLogCtl - > info_lck ) ) ;
2000-04-12 19:17:23 +02:00
1999-09-27 17:48:12 +02:00
/*
* If cache is half filled then try to acquire lgwr lock
* and do LGWR work , but only once .
*/
2000-04-12 19:17:23 +02:00
if ( do_lgwr & &
( LgwrRqst . Write . xlogid ! = LgwrResult . Write . xlogid | |
( LgwrRqst . Write . xrecoff - LgwrResult . Write . xrecoff > =
XLogCtl - > XLogCacheByte / 2 ) ) )
1999-09-27 17:48:12 +02:00
{
if ( ! TAS ( & ( XLogCtl - > lgwr_lck ) ) )
{
LgwrResult = XLogCtl - > Write . LgwrResult ;
if ( ! TAS ( & ( XLogCtl - > info_lck ) ) )
{
LgwrRqst = XLogCtl - > LgwrRqst ;
S_UNLOCK ( & ( XLogCtl - > info_lck ) ) ;
}
if ( XLByteLT ( LgwrResult . Write , LgwrRqst . Write ) )
{
XLogWrite ( NULL ) ;
do_lgwr = false ;
}
S_UNLOCK ( & ( XLogCtl - > lgwr_lck ) ) ;
}
}
}
2000-12-29 22:31:21 +01:00
S_LOCK_SLEEP ( & ( XLogCtl - > insert_lck ) , i + + ) ;
1999-09-27 17:48:12 +02:00
if ( ! TAS ( & ( XLogCtl - > insert_lck ) ) )
break ;
}
}
2000-12-28 14:00:29 +01:00
/* Race condition: RedoRecPtr was changed */
RedoRecPtr = Insert - > RedoRecPtr ;
repeat = false ;
for ( i = 0 ; i < 2 ; i + + )
{
if ( dtbuf [ i ] = = InvalidBuffer )
continue ;
if ( dtbuf_bkp [ i ] = = false & &
XLByteLE ( dtbuf_lsn [ i ] , RedoRecPtr ) )
{
dtbuf [ i ] = InvalidBuffer ;
repeat = true ;
}
}
if ( repeat )
{
S_UNLOCK ( & ( XLogCtl - > insert_lck ) ) ;
END_CRIT_CODE ;
goto begin ;
}
/* Attach backup blocks to record data */
for ( i = 0 ; i < 2 ; i + + )
{
if ( dtbuf [ i ] = = InvalidBuffer | | ! ( dtbuf_bkp [ i ] ) )
continue ;
info | = ( XLR_SET_BKP_BLOCK ( i ) ) ;
dtbuf_xlg [ i ] . node = BufferGetFileNode ( dtbuf [ i ] ) ;
dtbuf_xlg [ i ] . block = BufferGetBlockNumber ( dtbuf [ i ] ) ;
COMP_CRC64 ( dtbuf_crc [ i ] ,
( ( char * ) & ( dtbuf_xlg [ i ] ) + offsetof ( BkpBlock , node ) ) ,
( sizeof ( BkpBlock ) - offsetof ( BkpBlock , node ) ) ) ;
FIN_CRC64 ( dtbuf_crc [ i ] ) ;
dtbuf_xlg [ i ] . crc = dtbuf_crc [ i ] ;
rdt - > next = & ( dtbuf_rdt [ 2 * i ] ) ;
dtbuf_rdt [ 2 * i ] . data = ( char * ) & ( dtbuf_xlg [ i ] ) ;
dtbuf_rdt [ 2 * i ] . len = sizeof ( BkpBlock ) ;
len + = sizeof ( BkpBlock ) ;
rdt = dtbuf_rdt [ 2 * i ] . next = & ( dtbuf_rdt [ 2 * i + 1 ] ) ;
dtbuf_rdt [ 2 * i + 1 ] . data = ( char * ) ( BufferGetBlock ( dtbuf [ i ] ) ) ;
dtbuf_rdt [ 2 * i + 1 ] . len = BLCKSZ ;
len + = BLCKSZ ;
dtbuf_rdt [ 2 * i + 1 ] . next = NULL ;
}
/* Insert record */
2000-04-12 19:17:23 +02:00
freespace = ( ( char * ) Insert - > currpage ) + BLCKSZ - Insert - > currpos ;
1999-09-27 17:48:12 +02:00
if ( freespace < SizeOfXLogRecord )
{
curridx = NextBufIdx ( Insert - > curridx ) ;
if ( XLByteLE ( XLogCtl - > xlblocks [ curridx ] , LgwrResult . Write ) )
InitXLBuffer ( curridx ) ;
2000-04-12 19:17:23 +02:00
else
1999-09-27 17:48:12 +02:00
GetFreeXLBuffer ( ) ;
freespace = BLCKSZ - SizeOfXLogPHD ;
}
else
curridx = Insert - > curridx ;
freespace - = SizeOfXLogRecord ;
2000-04-12 19:17:23 +02:00
record = ( XLogRecord * ) Insert - > currpos ;
1999-09-27 17:48:12 +02:00
record - > xl_prev = Insert - > PrevRecord ;
2000-11-30 02:47:33 +01:00
if ( no_tran )
1999-10-06 23:58:18 +02:00
{
record - > xl_xact_prev . xlogid = 0 ;
record - > xl_xact_prev . xrecoff = 0 ;
}
2000-11-30 02:47:33 +01:00
else
record - > xl_xact_prev = MyLastRecPtr ;
1999-09-27 17:48:12 +02:00
record - > xl_xid = GetCurrentTransactionId ( ) ;
2000-12-28 14:00:29 +01:00
record - > xl_len = len ;
record - > xl_info = info ;
1999-09-27 17:48:12 +02:00
record - > xl_rmid = rmid ;
2000-12-28 14:00:29 +01:00
COMP_CRC64 ( rdata_crc , ( ( char * ) record + offsetof ( XLogRecord , xl_prev ) ) ,
( SizeOfXLogRecord - offsetof ( XLogRecord , xl_prev ) ) ) ;
FIN_CRC64 ( rdata_crc ) ;
record - > xl_crc = rdata_crc ;
1999-09-27 17:48:12 +02:00
RecPtr . xlogid = XLogCtl - > xlblocks [ curridx ] . xlogid ;
2000-04-12 19:17:23 +02:00
RecPtr . xrecoff =
XLogCtl - > xlblocks [ curridx ] . xrecoff - BLCKSZ +
Insert - > currpos - ( ( char * ) Insert - > currpage ) ;
2000-11-30 02:47:33 +01:00
if ( MyLastRecPtr . xrecoff = = 0 & & ! no_tran )
1999-09-27 17:48:12 +02:00
{
SpinAcquire ( SInvalLock ) ;
MyProc - > logRec = RecPtr ;
SpinRelease ( SInvalLock ) ;
}
2000-10-21 17:43:36 +02:00
Insert - > PrevRecord = RecPtr ;
if ( XLOG_DEBUG )
{
char buf [ 8192 ] ;
sprintf ( buf , " INSERT @ %u/%u: " , RecPtr . xlogid , RecPtr . xrecoff ) ;
xlog_outrec ( buf , record ) ;
2000-12-28 14:00:29 +01:00
if ( rdata - > data ! = NULL )
2000-10-21 17:43:36 +02:00
{
strcat ( buf , " - " ) ;
2000-12-28 14:00:29 +01:00
RmgrTable [ record - > xl_rmid ] . rm_desc ( buf , record - > xl_info , rdata - > data ) ;
2000-10-21 17:43:36 +02:00
}
strcat ( buf , " \n " ) ;
write ( 2 , buf , strlen ( buf ) ) ;
}
2000-06-02 12:20:27 +02:00
MyLastRecPtr = RecPtr ; /* begin of record */
1999-09-27 17:48:12 +02:00
Insert - > currpos + = SizeOfXLogRecord ;
2000-12-28 14:00:29 +01:00
while ( len )
1999-09-27 17:48:12 +02:00
{
2000-12-28 14:00:29 +01:00
while ( rdata - > data = = NULL )
rdata = rdata - > next ;
if ( freespace > 0 )
1999-09-27 17:48:12 +02:00
{
2000-12-28 14:00:29 +01:00
if ( rdata - > len > freespace )
{
memcpy ( Insert - > currpos , rdata - > data , freespace ) ;
rdata - > data + = freespace ;
rdata - > len - = freespace ;
len - = freespace ;
}
else
{
memcpy ( Insert - > currpos , rdata - > data , rdata - > len ) ;
freespace - = rdata - > len ;
len - = rdata - > len ;
Insert - > currpos + = rdata - > len ;
rdata = rdata - > next ;
continue ;
}
1999-09-27 17:48:12 +02:00
}
2000-12-28 14:00:29 +01:00
/* Use next buffer */
1999-09-27 17:48:12 +02:00
curridx = NextBufIdx ( curridx ) ;
if ( XLByteLE ( XLogCtl - > xlblocks [ curridx ] , LgwrResult . Write ) )
{
InitXLBuffer ( curridx ) ;
updrqst = true ;
}
else
GetFreeXLBuffer ( ) ;
freespace = BLCKSZ - SizeOfXLogPHD - SizeOfXLogSubRecord ;
Insert - > currpage - > xlp_info | = XLP_FIRST_IS_SUBRECORD ;
2000-04-12 19:17:23 +02:00
subrecord = ( XLogSubRecord * ) Insert - > currpos ;
2000-12-28 14:00:29 +01:00
subrecord - > xl_len = len ;
1999-09-27 17:48:12 +02:00
Insert - > currpos + = SizeOfXLogSubRecord ;
}
2000-12-28 14:00:29 +01:00
Insert - > currpos = ( ( char * ) Insert - > currpage ) +
MAXALIGN ( Insert - > currpos - ( ( char * ) Insert - > currpage ) ) ;
2000-04-12 19:17:23 +02:00
freespace = ( ( char * ) Insert - > currpage ) + BLCKSZ - Insert - > currpos ;
2000-06-02 12:20:27 +02:00
/*
* Begin of the next record will be stored as LSN for
* changed data page . . .
*/
RecPtr . xlogid = XLogCtl - > xlblocks [ curridx ] . xlogid ;
RecPtr . xrecoff =
XLogCtl - > xlblocks [ curridx ] . xrecoff - BLCKSZ +
Insert - > currpos - ( ( char * ) Insert - > currpage ) ;
2000-12-28 14:00:29 +01:00
/* Need to update global LgwrRqst if some block was filled up */
1999-09-27 17:48:12 +02:00
if ( freespace < SizeOfXLogRecord )
2000-12-28 14:00:29 +01:00
updrqst = true ; /* curridx is filled and available for writing out */
1999-09-27 17:48:12 +02:00
else
curridx = PrevBufIdx ( curridx ) ;
LgwrRqst . Write = XLogCtl - > xlblocks [ curridx ] ;
S_UNLOCK ( & ( XLogCtl - > insert_lck ) ) ;
if ( updrqst )
{
2000-12-29 22:31:21 +01:00
S_LOCK ( & ( XLogCtl - > info_lck ) ) ;
if ( XLByteLT ( XLogCtl - > LgwrRqst . Write , LgwrRqst . Write ) )
XLogCtl - > LgwrRqst . Write = LgwrRqst . Write ;
S_UNLOCK ( & ( XLogCtl - > info_lck ) ) ;
1999-09-27 17:48:12 +02:00
}
2000-12-03 11:27:29 +01:00
END_CRIT_CODE ;
1999-09-27 17:48:12 +02:00
return ( RecPtr ) ;
2000-04-12 19:17:23 +02:00
}
1999-09-27 17:48:12 +02:00
void
XLogFlush ( XLogRecPtr record )
{
2000-04-12 19:17:23 +02:00
XLogRecPtr WriteRqst ;
char buffer [ BLCKSZ ] ;
char * usebuf = NULL ;
2000-12-29 22:31:21 +01:00
unsigned spins = 0 ;
2000-04-12 19:17:23 +02:00
bool force_lgwr = false ;
1999-09-27 17:48:12 +02:00
2000-10-28 18:21:00 +02:00
if ( XLOG_DEBUG )
{
fprintf ( stderr , " XLogFlush%s%s: rqst %u/%u; wrt %u/%u; flsh %u/%u \n " ,
( IsBootstrapProcessingMode ( ) ) ? " (bootstrap) " : " " ,
( InRedo ) ? " (redo) " : " " ,
record . xlogid , record . xrecoff ,
LgwrResult . Write . xlogid , LgwrResult . Write . xrecoff ,
LgwrResult . Flush . xlogid , LgwrResult . Flush . xrecoff ) ;
fflush ( stderr ) ;
}
2000-11-21 10:39:57 +01:00
if ( InRedo )
2000-10-28 18:21:00 +02:00
return ;
1999-09-27 17:48:12 +02:00
if ( XLByteLE ( record , LgwrResult . Flush ) )
return ;
2000-12-03 11:27:29 +01:00
START_CRIT_CODE ;
1999-09-27 17:48:12 +02:00
WriteRqst = LgwrRqst . Write ;
2000-04-12 19:17:23 +02:00
for ( ; ; )
1999-09-27 17:48:12 +02:00
{
/* try to read LgwrResult */
if ( ! TAS ( & ( XLogCtl - > info_lck ) ) )
{
LgwrResult = XLogCtl - > LgwrResult ;
if ( XLByteLE ( record , LgwrResult . Flush ) )
{
S_UNLOCK ( & ( XLogCtl - > info_lck ) ) ;
2000-12-03 11:27:29 +01:00
END_CRIT_CODE ;
1999-09-27 17:48:12 +02:00
return ;
}
if ( XLByteLT ( XLogCtl - > LgwrRqst . Flush , record ) )
XLogCtl - > LgwrRqst . Flush = record ;
if ( XLByteLT ( WriteRqst , XLogCtl - > LgwrRqst . Write ) )
{
WriteRqst = XLogCtl - > LgwrRqst . Write ;
usebuf = NULL ;
}
S_UNLOCK ( & ( XLogCtl - > info_lck ) ) ;
}
/* if something was added to log cache then try to flush this too */
if ( ! TAS ( & ( XLogCtl - > insert_lck ) ) )
{
2000-04-12 19:17:23 +02:00
XLogCtlInsert * Insert = & XLogCtl - > Insert ;
uint32 freespace =
( ( char * ) Insert - > currpage ) + BLCKSZ - Insert - > currpos ;
1999-09-27 17:48:12 +02:00
if ( freespace < SizeOfXLogRecord ) /* buffer is full */
{
usebuf = NULL ;
LgwrRqst . Write = WriteRqst = XLogCtl - > xlblocks [ Insert - > curridx ] ;
}
else
{
usebuf = buffer ;
memcpy ( usebuf , Insert - > currpage , BLCKSZ - freespace ) ;
memset ( usebuf + BLCKSZ - freespace , 0 , freespace ) ;
WriteRqst = XLogCtl - > xlblocks [ Insert - > curridx ] ;
2000-04-12 19:17:23 +02:00
WriteRqst . xrecoff = WriteRqst . xrecoff - BLCKSZ +
Insert - > currpos - ( ( char * ) Insert - > currpage ) ;
1999-09-27 17:48:12 +02:00
}
S_UNLOCK ( & ( XLogCtl - > insert_lck ) ) ;
force_lgwr = true ;
}
2000-04-12 19:17:23 +02:00
if ( force_lgwr | | WriteRqst . xlogid > record . xlogid | |
( WriteRqst . xlogid = = record . xlogid & &
1999-09-27 17:48:12 +02:00
WriteRqst . xrecoff > = record . xrecoff + BLCKSZ ) )
{
if ( ! TAS ( & ( XLogCtl - > lgwr_lck ) ) )
{
LgwrResult = XLogCtl - > Write . LgwrResult ;
if ( XLByteLE ( record , LgwrResult . Flush ) )
{
S_UNLOCK ( & ( XLogCtl - > lgwr_lck ) ) ;
2000-12-03 11:27:29 +01:00
END_CRIT_CODE ;
1999-09-27 17:48:12 +02:00
return ;
}
if ( XLByteLT ( LgwrResult . Write , WriteRqst ) )
{
LgwrRqst . Flush = LgwrRqst . Write = WriteRqst ;
XLogWrite ( usebuf ) ;
S_UNLOCK ( & ( XLogCtl - > lgwr_lck ) ) ;
if ( XLByteLT ( LgwrResult . Flush , record ) )
2000-11-21 23:27:26 +01:00
elog ( STOP , " XLogFlush: request is not satisfied " ) ;
2000-12-03 11:27:29 +01:00
END_CRIT_CODE ;
1999-09-27 17:48:12 +02:00
return ;
}
break ;
}
}
2000-12-29 22:31:21 +01:00
S_LOCK_SLEEP ( & ( XLogCtl - > lgwr_lck ) , spins + + ) ;
1999-09-27 17:48:12 +02:00
}
2000-04-12 19:17:23 +02:00
if ( logFile > = 0 & & ( LgwrResult . Write . xlogid ! = logId | |
( LgwrResult . Write . xrecoff - 1 ) / XLogSegSize ! = logSeg ) )
1999-09-27 17:48:12 +02:00
{
if ( close ( logFile ) ! = 0 )
2000-11-21 23:27:26 +01:00
elog ( STOP , " close(logfile %u seg %u) failed: %m " ,
logId , logSeg ) ;
1999-09-27 17:48:12 +02:00
logFile = - 1 ;
}
if ( logFile < 0 )
{
logId = LgwrResult . Write . xlogid ;
logSeg = ( LgwrResult . Write . xrecoff - 1 ) / XLogSegSize ;
1999-10-06 23:58:18 +02:00
logOff = 0 ;
1999-09-27 17:48:12 +02:00
logFile = XLogFileOpen ( logId , logSeg , false ) ;
}
2000-12-08 23:21:33 +01:00
if ( pg_fsync ( logFile ) ! = 0 )
2000-11-21 23:27:26 +01:00
elog ( STOP , " fsync(logfile %u seg %u) failed: %m " ,
logId , logSeg ) ;
1999-09-27 17:48:12 +02:00
LgwrResult . Flush = LgwrResult . Write ;
2000-12-29 22:31:21 +01:00
S_LOCK ( & ( XLogCtl - > info_lck ) ) ;
XLogCtl - > LgwrResult = LgwrResult ;
if ( XLByteLT ( XLogCtl - > LgwrRqst . Write , LgwrResult . Write ) )
XLogCtl - > LgwrRqst . Write = LgwrResult . Write ;
S_UNLOCK ( & ( XLogCtl - > info_lck ) ) ;
1999-09-27 17:48:12 +02:00
XLogCtl - > Write . LgwrResult = LgwrResult ;
S_UNLOCK ( & ( XLogCtl - > lgwr_lck ) ) ;
2000-12-03 11:27:29 +01:00
END_CRIT_CODE ;
1999-09-27 17:48:12 +02:00
return ;
}
static void
GetFreeXLBuffer ( )
{
2000-04-12 19:17:23 +02:00
XLogCtlInsert * Insert = & XLogCtl - > Insert ;
XLogCtlWrite * Write = & XLogCtl - > Write ;
uint16 curridx = NextBufIdx ( Insert - > curridx ) ;
2000-12-29 22:31:21 +01:00
unsigned spins = 0 ;
1999-09-27 17:48:12 +02:00
LgwrRqst . Write = XLogCtl - > xlblocks [ Insert - > curridx ] ;
2000-04-12 19:17:23 +02:00
for ( ; ; )
1999-09-27 17:48:12 +02:00
{
if ( ! TAS ( & ( XLogCtl - > info_lck ) ) )
{
LgwrResult = XLogCtl - > LgwrResult ;
XLogCtl - > LgwrRqst . Write = LgwrRqst . Write ;
S_UNLOCK ( & ( XLogCtl - > info_lck ) ) ;
if ( XLByteLE ( XLogCtl - > xlblocks [ curridx ] , LgwrResult . Write ) )
{
Insert - > LgwrResult = LgwrResult ;
InitXLBuffer ( curridx ) ;
return ;
}
}
2000-04-12 19:17:23 +02:00
1999-09-27 17:48:12 +02:00
/*
* LgwrResult lock is busy or un - updated . Try to acquire lgwr lock
* and write full blocks .
*/
if ( ! TAS ( & ( XLogCtl - > lgwr_lck ) ) )
{
LgwrResult = Write - > LgwrResult ;
if ( XLByteLE ( XLogCtl - > xlblocks [ curridx ] , LgwrResult . Write ) )
{
S_UNLOCK ( & ( XLogCtl - > lgwr_lck ) ) ;
Insert - > LgwrResult = LgwrResult ;
InitXLBuffer ( curridx ) ;
return ;
}
2000-04-12 19:17:23 +02:00
/*
* Have to write buffers while holding insert lock - not
* good . . .
1999-09-27 17:48:12 +02:00
*/
XLogWrite ( NULL ) ;
S_UNLOCK ( & ( XLogCtl - > lgwr_lck ) ) ;
Insert - > LgwrResult = LgwrResult ;
InitXLBuffer ( curridx ) ;
return ;
}
2000-12-29 22:31:21 +01:00
S_LOCK_SLEEP ( & ( XLogCtl - > lgwr_lck ) , spins + + ) ;
1999-09-27 17:48:12 +02:00
}
}
static void
XLogWrite ( char * buffer )
{
2000-04-12 19:17:23 +02:00
XLogCtlWrite * Write = & XLogCtl - > Write ;
char * from ;
uint32 wcnt = 0 ;
2000-11-05 23:50:21 +01:00
bool usexistent ;
1999-09-27 17:48:12 +02:00
2000-04-12 19:17:23 +02:00
for ( ; XLByteLT ( LgwrResult . Write , LgwrRqst . Write ) ; )
1999-09-27 17:48:12 +02:00
{
LgwrResult . Write = XLogCtl - > xlblocks [ Write - > curridx ] ;
2000-04-12 19:17:23 +02:00
if ( LgwrResult . Write . xlogid ! = logId | |
1999-09-27 17:48:12 +02:00
( LgwrResult . Write . xrecoff - 1 ) / XLogSegSize ! = logSeg )
{
if ( wcnt > 0 )
{
2000-12-08 23:21:33 +01:00
if ( pg_fsync ( logFile ) ! = 0 )
2000-11-21 23:27:26 +01:00
elog ( STOP , " fsync(logfile %u seg %u) failed: %m " ,
logId , logSeg ) ;
1999-09-27 17:48:12 +02:00
if ( LgwrResult . Write . xlogid ! = logId )
LgwrResult . Flush . xrecoff = XLogFileSize ;
else
LgwrResult . Flush . xrecoff = LgwrResult . Write . xrecoff - BLCKSZ ;
LgwrResult . Flush . xlogid = logId ;
if ( ! TAS ( & ( XLogCtl - > info_lck ) ) )
{
XLogCtl - > LgwrResult . Flush = LgwrResult . Flush ;
XLogCtl - > LgwrResult . Write = LgwrResult . Flush ;
if ( XLByteLT ( XLogCtl - > LgwrRqst . Write , LgwrResult . Flush ) )
XLogCtl - > LgwrRqst . Write = LgwrResult . Flush ;
if ( XLByteLT ( XLogCtl - > LgwrRqst . Flush , LgwrResult . Flush ) )
XLogCtl - > LgwrRqst . Flush = LgwrResult . Flush ;
S_UNLOCK ( & ( XLogCtl - > info_lck ) ) ;
}
}
if ( logFile > = 0 )
{
if ( close ( logFile ) ! = 0 )
2000-11-21 23:27:26 +01:00
elog ( STOP , " close(logfile %u seg %u) failed: %m " ,
logId , logSeg ) ;
1999-09-27 17:48:12 +02:00
logFile = - 1 ;
}
logId = LgwrResult . Write . xlogid ;
logSeg = ( LgwrResult . Write . xrecoff - 1 ) / XLogSegSize ;
1999-10-06 23:58:18 +02:00
logOff = 0 ;
1999-09-27 17:48:12 +02:00
SpinAcquire ( ControlFileLockId ) ;
2000-11-05 23:50:21 +01:00
/* create/use new log file */
usexistent = true ;
logFile = XLogFileInit ( logId , logSeg , & usexistent ) ;
1999-09-27 17:48:12 +02:00
ControlFile - > logId = logId ;
ControlFile - > logSeg = logSeg + 1 ;
ControlFile - > time = time ( NULL ) ;
UpdateControlFile ( ) ;
SpinRelease ( ControlFileLockId ) ;
2000-11-05 23:50:21 +01:00
if ( ! usexistent ) /* there was no file */
elog ( LOG , " XLogWrite: had to create new log file - "
" you probably should do checkpoints more often " ) ;
1999-09-27 17:48:12 +02:00
}
if ( logFile < 0 )
{
logId = LgwrResult . Write . xlogid ;
logSeg = ( LgwrResult . Write . xrecoff - 1 ) / XLogSegSize ;
1999-10-06 23:58:18 +02:00
logOff = 0 ;
1999-09-27 17:48:12 +02:00
logFile = XLogFileOpen ( logId , logSeg , false ) ;
}
if ( logOff ! = ( LgwrResult . Write . xrecoff - BLCKSZ ) % XLogSegSize )
{
logOff = ( LgwrResult . Write . xrecoff - BLCKSZ ) % XLogSegSize ;
2000-04-12 19:17:23 +02:00
if ( lseek ( logFile , ( off_t ) logOff , SEEK_SET ) < 0 )
2000-11-21 23:27:26 +01:00
elog ( STOP , " lseek(logfile %u seg %u off %u) failed: %m " ,
logId , logSeg , logOff ) ;
1999-09-27 17:48:12 +02:00
}
if ( buffer ! = NULL & & XLByteLT ( LgwrRqst . Write , LgwrResult . Write ) )
from = buffer ;
else
from = XLogCtl - > pages + Write - > curridx * BLCKSZ ;
if ( write ( logFile , from , BLCKSZ ) ! = BLCKSZ )
2000-11-21 23:27:26 +01:00
elog ( STOP , " write(logfile %u seg %u off %u) failed: %m " ,
logId , logSeg , logOff ) ;
1999-09-27 17:48:12 +02:00
wcnt + + ;
logOff + = BLCKSZ ;
if ( from ! = buffer )
Write - > curridx = NextBufIdx ( Write - > curridx ) ;
else
LgwrResult . Write = LgwrRqst . Write ;
}
if ( wcnt = = 0 )
elog ( STOP , " XLogWrite: nothing written " ) ;
2000-04-12 19:17:23 +02:00
if ( XLByteLT ( LgwrResult . Flush , LgwrRqst . Flush ) & &
1999-09-27 17:48:12 +02:00
XLByteLE ( LgwrRqst . Flush , LgwrResult . Write ) )
{
2000-12-08 23:21:33 +01:00
if ( pg_fsync ( logFile ) ! = 0 )
2000-11-21 23:27:26 +01:00
elog ( STOP , " fsync(logfile %u seg %u) failed: %m " ,
logId , logSeg ) ;
1999-09-27 17:48:12 +02:00
LgwrResult . Flush = LgwrResult . Write ;
}
2000-12-29 22:31:21 +01:00
S_LOCK ( & ( XLogCtl - > info_lck ) ) ;
XLogCtl - > LgwrResult = LgwrResult ;
if ( XLByteLT ( XLogCtl - > LgwrRqst . Write , LgwrResult . Write ) )
XLogCtl - > LgwrRqst . Write = LgwrResult . Write ;
S_UNLOCK ( & ( XLogCtl - > info_lck ) ) ;
1999-09-27 17:48:12 +02:00
Write - > LgwrResult = LgwrResult ;
}
static int
2000-11-05 23:50:21 +01:00
XLogFileInit ( uint32 log , uint32 seg , bool * usexistent )
1999-09-27 17:48:12 +02:00
{
2000-04-12 19:17:23 +02:00
char path [ MAXPGPATH ] ;
2000-11-05 23:50:21 +01:00
char tpath [ MAXPGPATH ] ;
2000-04-12 19:17:23 +02:00
int fd ;
1999-09-27 17:48:12 +02:00
XLogFileName ( path , log , seg ) ;
2000-11-05 23:50:21 +01:00
/*
* Try to use existent file ( checkpoint maker
* creates it sometime ) .
*/
if ( * usexistent )
{
fd = BasicOpenFile ( path , O_RDWR | PG_BINARY , S_IRUSR | S_IWUSR ) ;
if ( fd < 0 )
{
if ( errno ! = ENOENT )
2000-11-21 23:27:26 +01:00
elog ( STOP , " InitOpen(logfile %u seg %u) failed: %m " ,
logId , logSeg ) ;
2000-11-05 23:50:21 +01:00
}
else
return ( fd ) ;
* usexistent = false ;
}
XLogTempFileName ( tpath , log , seg ) ;
unlink ( tpath ) ;
1999-09-27 17:48:12 +02:00
unlink ( path ) ;
2000-11-05 23:50:21 +01:00
fd = BasicOpenFile ( tpath , O_RDWR | O_CREAT | O_EXCL | PG_BINARY , S_IRUSR | S_IWUSR ) ;
1999-09-27 17:48:12 +02:00
if ( fd < 0 )
2000-11-21 23:27:26 +01:00
elog ( STOP , " InitCreate(logfile %u seg %u) failed: %m " ,
logId , logSeg ) ;
1999-09-27 17:48:12 +02:00
if ( lseek ( fd , XLogSegSize - 1 , SEEK_SET ) ! = ( off_t ) ( XLogSegSize - 1 ) )
2000-11-21 23:27:26 +01:00
elog ( STOP , " lseek(logfile %u seg %u) failed: %m " ,
logId , logSeg ) ;
1999-09-27 17:48:12 +02:00
if ( write ( fd , " " , 1 ) ! = 1 )
2000-11-21 23:27:26 +01:00
elog ( STOP , " write(logfile %u seg %u) failed: %m " ,
logId , logSeg ) ;
1999-09-27 17:48:12 +02:00
2000-12-08 23:21:33 +01:00
if ( pg_fsync ( fd ) ! = 0 )
2000-11-21 23:27:26 +01:00
elog ( STOP , " fsync(logfile %u seg %u) failed: %m " ,
logId , logSeg ) ;
1999-09-27 17:48:12 +02:00
1999-10-06 23:58:18 +02:00
if ( lseek ( fd , 0 , SEEK_SET ) < 0 )
2000-11-21 23:27:26 +01:00
elog ( STOP , " lseek(logfile %u seg %u off %u) failed: %m " ,
log , seg , 0 ) ;
1999-10-06 23:58:18 +02:00
2000-11-05 23:50:21 +01:00
close ( fd ) ;
2000-11-27 06:36:12 +01:00
2000-12-18 19:45:05 +01:00
# ifndef __BEOS__
2000-11-27 06:36:12 +01:00
if ( link ( tpath , path ) < 0 )
2000-12-18 19:45:05 +01:00
# else
if ( rename ( tpath , path ) < 0 )
# endif
2000-11-27 06:36:12 +01:00
elog ( STOP , " InitRelink(logfile %u seg %u) failed: %m " ,
logId , logSeg ) ;
2000-11-05 23:50:21 +01:00
unlink ( tpath ) ;
fd = BasicOpenFile ( path , O_RDWR | PG_BINARY , S_IRUSR | S_IWUSR ) ;
if ( fd < 0 )
2000-11-21 23:27:26 +01:00
elog ( STOP , " InitReopen(logfile %u seg %u) failed: %m " ,
logId , logSeg ) ;
2000-11-05 23:50:21 +01:00
2000-04-12 19:17:23 +02:00
return ( fd ) ;
1999-09-27 17:48:12 +02:00
}
static int
XLogFileOpen ( uint32 log , uint32 seg , bool econt )
{
2000-04-12 19:17:23 +02:00
char path [ MAXPGPATH ] ;
int fd ;
1999-09-27 17:48:12 +02:00
XLogFileName ( path , log , seg ) ;
2000-06-02 17:57:44 +02:00
fd = BasicOpenFile ( path , O_RDWR | PG_BINARY , S_IRUSR | S_IWUSR ) ;
1999-09-27 17:48:12 +02:00
if ( fd < 0 )
{
if ( econt & & errno = = ENOENT )
{
2000-11-21 23:27:26 +01:00
elog ( LOG , " open(logfile %u seg %u) failed: %m " ,
2000-04-12 19:17:23 +02:00
logId , logSeg ) ;
1999-09-27 17:48:12 +02:00
return ( fd ) ;
}
2000-10-21 17:43:36 +02:00
abort ( ) ;
2000-11-21 23:27:26 +01:00
elog ( STOP , " open(logfile %u seg %u) failed: %m " ,
logId , logSeg ) ;
1999-09-27 17:48:12 +02:00
}
2000-04-12 19:17:23 +02:00
return ( fd ) ;
1999-09-27 17:48:12 +02:00
}
2000-11-05 23:50:21 +01:00
/*
* ( Re ) move offline log files older or equal to passwd one
*/
static void
MoveOfflineLogs ( char * archdir , uint32 _logId , uint32 _logSeg )
{
DIR * xldir ;
struct dirent * xlde ;
char lastoff [ 32 ] ;
char path [ MAXPGPATH ] ;
Assert ( archdir [ 0 ] = = 0 ) ; /* ! implemented yet */
xldir = opendir ( XLogDir ) ;
if ( xldir = = NULL )
2000-11-21 23:27:26 +01:00
elog ( STOP , " MoveOfflineLogs: cannot open xlog dir: %m " ) ;
2000-11-05 23:50:21 +01:00
sprintf ( lastoff , " %08X%08X " , _logId , _logSeg ) ;
errno = 0 ;
while ( ( xlde = readdir ( xldir ) ) ! = NULL )
{
if ( strlen ( xlde - > d_name ) ! = 16 | |
strspn ( xlde - > d_name , " 0123456789ABCDEF " ) ! = 16 )
continue ;
if ( strcmp ( xlde - > d_name , lastoff ) > 0 )
{
errno = 0 ;
continue ;
}
elog ( LOG , " MoveOfflineLogs: %s %s " , ( archdir [ 0 ] ) ?
" archive " : " remove " , xlde - > d_name ) ;
sprintf ( path , " %s%c%s " , XLogDir , SEP_CHAR , xlde - > d_name ) ;
2000-11-09 12:26:00 +01:00
if ( archdir [ 0 ] = = 0 )
2000-11-05 23:50:21 +01:00
unlink ( path ) ;
errno = 0 ;
}
if ( errno )
2000-11-21 23:27:26 +01:00
elog ( STOP , " MoveOfflineLogs: cannot read xlog dir: %m " ) ;
2000-11-05 23:50:21 +01:00
closedir ( xldir ) ;
}
2000-12-28 14:00:29 +01:00
static void
RestoreBkpBlocks ( XLogRecord * record , XLogRecPtr lsn )
{
Relation reln ;
Buffer buffer ;
Page page ;
BkpBlock bkpb ;
char * blk ;
int i ;
for ( i = 0 , blk = ( char * ) XLogRecGetData ( record ) + record - > xl_len ; i < 2 ; i + + )
{
if ( ! ( record - > xl_info & ( XLR_SET_BKP_BLOCK ( i ) ) ) )
continue ;
memcpy ( ( char * ) & bkpb , blk , sizeof ( BkpBlock ) ) ;
blk + = sizeof ( BkpBlock ) ;
reln = XLogOpenRelation ( true , record - > xl_rmid , bkpb . node ) ;
if ( reln )
{
buffer = XLogReadBuffer ( true , reln , bkpb . block ) ;
if ( BufferIsValid ( buffer ) )
{
page = ( Page ) BufferGetPage ( buffer ) ;
memcpy ( ( char * ) page , blk , BLCKSZ ) ;
PageSetLSN ( page , lsn ) ;
PageSetSUI ( page , ThisStartUpID ) ;
UnlockAndWriteBuffer ( buffer ) ;
}
}
blk + = BLCKSZ ;
}
}
static bool
RecordIsValid ( XLogRecord * record , XLogRecPtr recptr , int emode )
{
crc64 crc ;
crc64 cbuf ;
int i ;
uint32 len = record - > xl_len ;
char * blk ;
for ( i = 0 ; i < 2 ; i + + )
{
if ( ! ( record - > xl_info & ( XLR_SET_BKP_BLOCK ( i ) ) ) )
continue ;
if ( len < = ( sizeof ( BkpBlock ) + BLCKSZ ) )
{
elog ( emode , " ReadRecord: record at %u/%u is too short to keep bkp block " ,
recptr . xlogid , recptr . xrecoff ) ;
return ( false ) ;
}
len - = sizeof ( BkpBlock ) ;
len - = BLCKSZ ;
}
/* CRC of rmgr data */
INIT_CRC64 ( crc ) ;
COMP_CRC64 ( crc , ( ( char * ) XLogRecGetData ( record ) ) , len ) ;
COMP_CRC64 ( crc , ( ( char * ) record + offsetof ( XLogRecord , xl_prev ) ) ,
( SizeOfXLogRecord - offsetof ( XLogRecord , xl_prev ) ) ) ;
FIN_CRC64 ( crc ) ;
if ( record - > xl_crc . crc1 ! = crc . crc1 | | record - > xl_crc . crc2 ! = crc . crc2 )
{
elog ( emode , " ReadRecord: bad rmgr data CRC in record at %u/%u " ,
recptr . xlogid , recptr . xrecoff ) ;
return ( false ) ;
}
if ( record - > xl_len = = len )
return ( true ) ;
for ( i = 0 , blk = ( char * ) XLogRecGetData ( record ) + len ; i < 2 ; i + + )
{
if ( ! ( record - > xl_info & ( XLR_SET_BKP_BLOCK ( i ) ) ) )
continue ;
INIT_CRC64 ( crc ) ;
COMP_CRC64 ( crc , ( blk + sizeof ( BkpBlock ) ) , BLCKSZ ) ;
COMP_CRC64 ( crc , ( blk + offsetof ( BkpBlock , node ) ) ,
( sizeof ( BkpBlock ) - offsetof ( BkpBlock , node ) ) ) ;
FIN_CRC64 ( crc ) ;
memcpy ( ( char * ) & cbuf , blk , sizeof ( crc64 ) ) ;
if ( cbuf . crc1 ! = crc . crc1 | | cbuf . crc2 ! = crc . crc2 )
{
elog ( emode , " ReadRecord: bad bkp block %d CRC in record at %u/%u " ,
i + 1 , recptr . xlogid , recptr . xrecoff ) ;
return ( false ) ;
}
blk + = sizeof ( BkpBlock ) ;
blk + = BLCKSZ ;
}
record - > xl_len = len ; /* !!! */
return ( true ) ;
}
2000-04-12 19:17:23 +02:00
static XLogRecord *
1999-10-06 23:58:18 +02:00
ReadRecord ( XLogRecPtr * RecPtr , char * buffer )
1999-09-27 17:48:12 +02:00
{
2000-04-12 19:17:23 +02:00
XLogRecord * record ;
XLogRecPtr tmpRecPtr = EndRecPtr ;
2000-12-28 14:00:29 +01:00
uint32 len ;
2000-04-12 19:17:23 +02:00
bool nextmode = ( RecPtr = = NULL ) ;
int emode = ( nextmode ) ? LOG : STOP ;
bool noBlck = false ;
1999-09-27 17:48:12 +02:00
1999-10-06 23:58:18 +02:00
if ( nextmode )
1999-09-27 17:48:12 +02:00
{
1999-10-06 23:58:18 +02:00
RecPtr = & tmpRecPtr ;
if ( nextRecord ! = NULL )
{
record = nextRecord ;
goto got_record ;
}
if ( tmpRecPtr . xrecoff % BLCKSZ ! = 0 )
tmpRecPtr . xrecoff + = ( BLCKSZ - tmpRecPtr . xrecoff % BLCKSZ ) ;
if ( tmpRecPtr . xrecoff > = XLogFileSize )
{
( tmpRecPtr . xlogid ) + + ;
tmpRecPtr . xrecoff = 0 ;
}
tmpRecPtr . xrecoff + = SizeOfXLogPHD ;
1999-09-27 17:48:12 +02:00
}
1999-10-06 23:58:18 +02:00
else if ( ! XRecOffIsValid ( RecPtr - > xrecoff ) )
elog ( STOP , " ReadRecord: invalid record offset in (%u, %u) " ,
2000-04-12 19:17:23 +02:00
RecPtr - > xlogid , RecPtr - > xrecoff ) ;
1999-09-27 17:48:12 +02:00
2000-04-12 19:17:23 +02:00
if ( readFile > = 0 & & ( RecPtr - > xlogid ! = readId | |
RecPtr - > xrecoff / XLogSegSize ! = readSeg ) )
1999-09-27 17:48:12 +02:00
{
1999-10-06 23:58:18 +02:00
close ( readFile ) ;
readFile = - 1 ;
1999-09-27 17:48:12 +02:00
}
1999-10-06 23:58:18 +02:00
readId = RecPtr - > xlogid ;
readSeg = RecPtr - > xrecoff / XLogSegSize ;
if ( readFile < 0 )
1999-09-27 17:48:12 +02:00
{
1999-10-06 23:58:18 +02:00
noBlck = true ;
readFile = XLogFileOpen ( readId , readSeg , nextmode ) ;
if ( readFile < 0 )
goto next_record_is_invalid ;
1999-09-27 17:48:12 +02:00
}
1999-10-06 23:58:18 +02:00
if ( noBlck | | readOff ! = ( RecPtr - > xrecoff % XLogSegSize ) / BLCKSZ )
1999-09-27 17:48:12 +02:00
{
readOff = ( RecPtr - > xrecoff % XLogSegSize ) / BLCKSZ ;
2000-04-12 19:17:23 +02:00
if ( lseek ( readFile , ( off_t ) ( readOff * BLCKSZ ) , SEEK_SET ) < 0 )
2000-11-21 23:27:26 +01:00
elog ( STOP , " ReadRecord: lseek(logfile %u seg %u off %u) failed: %m " ,
readId , readSeg , readOff ) ;
1999-09-27 17:48:12 +02:00
if ( read ( readFile , readBuf , BLCKSZ ) ! = BLCKSZ )
2000-11-21 23:27:26 +01:00
elog ( STOP , " ReadRecord: read(logfile %u seg %u off %u) failed: %m " ,
readId , readSeg , readOff ) ;
2000-04-12 19:17:23 +02:00
if ( ( ( XLogPageHeader ) readBuf ) - > xlp_magic ! = XLOG_PAGE_MAGIC )
1999-09-27 17:48:12 +02:00
{
elog ( emode , " ReadRecord: invalid magic number %u in logfile %u seg %u off %u " ,
2000-04-12 19:17:23 +02:00
( ( XLogPageHeader ) readBuf ) - > xlp_magic ,
readId , readSeg , readOff ) ;
1999-09-27 17:48:12 +02:00
goto next_record_is_invalid ;
}
}
2000-04-12 19:17:23 +02:00
if ( ( ( ( XLogPageHeader ) readBuf ) - > xlp_info & XLP_FIRST_IS_SUBRECORD ) & &
1999-09-27 17:48:12 +02:00
RecPtr - > xrecoff % BLCKSZ = = SizeOfXLogPHD )
{
elog ( emode , " ReadRecord: subrecord is requested by (%u, %u) " ,
2000-04-12 19:17:23 +02:00
RecPtr - > xlogid , RecPtr - > xrecoff ) ;
1999-09-27 17:48:12 +02:00
goto next_record_is_invalid ;
}
2000-04-12 19:17:23 +02:00
record = ( XLogRecord * ) ( ( char * ) readBuf + RecPtr - > xrecoff % BLCKSZ ) ;
1999-09-27 17:48:12 +02:00
got_record : ;
2000-12-28 14:00:29 +01:00
if ( record - > xl_len > _INTL_MAXLOGRECSZ )
1999-09-27 17:48:12 +02:00
{
2000-12-28 14:00:29 +01:00
elog ( emode , " ReadRecord: too long record len %u in (%u, %u) " ,
record - > xl_len , RecPtr - > xlogid , RecPtr - > xrecoff ) ;
1999-09-27 17:48:12 +02:00
goto next_record_is_invalid ;
}
if ( record - > xl_rmid > RM_MAX_ID )
{
elog ( emode , " ReadRecord: invalid resource managed id %u in (%u, %u) " ,
2000-04-12 19:17:23 +02:00
record - > xl_rmid , RecPtr - > xlogid , RecPtr - > xrecoff ) ;
1999-09-27 17:48:12 +02:00
goto next_record_is_invalid ;
}
nextRecord = NULL ;
2000-12-28 14:00:29 +01:00
len = BLCKSZ - RecPtr - > xrecoff % BLCKSZ - SizeOfXLogRecord ;
if ( record - > xl_len > len )
1999-09-27 17:48:12 +02:00
{
2000-12-28 14:00:29 +01:00
XLogSubRecord * subrecord ;
uint32 gotlen = len ;
1999-09-27 17:48:12 +02:00
2000-12-28 14:00:29 +01:00
memcpy ( buffer , record , len + SizeOfXLogRecord ) ;
2000-04-12 19:17:23 +02:00
record = ( XLogRecord * ) buffer ;
2000-12-28 14:00:29 +01:00
buffer + = len + SizeOfXLogRecord ;
2000-04-12 19:17:23 +02:00
for ( ; ; )
1999-09-27 17:48:12 +02:00
{
readOff + + ;
if ( readOff = = XLogSegSize / BLCKSZ )
{
readSeg + + ;
if ( readSeg = = XLogLastSeg )
{
readSeg = 0 ;
readId + + ;
}
close ( readFile ) ;
1999-10-06 23:58:18 +02:00
readOff = 0 ;
1999-09-27 17:48:12 +02:00
readFile = XLogFileOpen ( readId , readSeg , nextmode ) ;
if ( readFile < 0 )
goto next_record_is_invalid ;
}
if ( read ( readFile , readBuf , BLCKSZ ) ! = BLCKSZ )
2000-11-21 23:27:26 +01:00
elog ( STOP , " ReadRecord: read(logfile %u seg %u off %u) failed: %m " ,
readId , readSeg , readOff ) ;
2000-04-12 19:17:23 +02:00
if ( ( ( XLogPageHeader ) readBuf ) - > xlp_magic ! = XLOG_PAGE_MAGIC )
1999-09-27 17:48:12 +02:00
{
elog ( emode , " ReadRecord: invalid magic number %u in logfile %u seg %u off %u " ,
2000-04-12 19:17:23 +02:00
( ( XLogPageHeader ) readBuf ) - > xlp_magic ,
readId , readSeg , readOff ) ;
1999-09-27 17:48:12 +02:00
goto next_record_is_invalid ;
}
2000-04-12 19:17:23 +02:00
if ( ! ( ( ( XLogPageHeader ) readBuf ) - > xlp_info & XLP_FIRST_IS_SUBRECORD ) )
1999-09-27 17:48:12 +02:00
{
elog ( emode , " ReadRecord: there is no subrecord flag in logfile %u seg %u off %u " ,
2000-04-12 19:17:23 +02:00
readId , readSeg , readOff ) ;
1999-09-27 17:48:12 +02:00
goto next_record_is_invalid ;
}
2000-04-12 19:17:23 +02:00
subrecord = ( XLogSubRecord * ) ( ( char * ) readBuf + SizeOfXLogPHD ) ;
2000-12-28 14:00:29 +01:00
if ( subrecord - > xl_len = = 0 | |
record - > xl_len < ( subrecord - > xl_len + gotlen ) )
1999-09-27 17:48:12 +02:00
{
elog ( emode , " ReadRecord: invalid subrecord len %u in logfile %u seg %u off %u " ,
2000-04-12 19:17:23 +02:00
subrecord - > xl_len , readId , readSeg , readOff ) ;
1999-09-27 17:48:12 +02:00
goto next_record_is_invalid ;
}
2000-12-28 14:00:29 +01:00
len = BLCKSZ - SizeOfXLogPHD - SizeOfXLogSubRecord ;
if ( subrecord - > xl_len > len )
1999-09-27 17:48:12 +02:00
{
2000-12-28 14:00:29 +01:00
memcpy ( buffer , ( char * ) subrecord + SizeOfXLogSubRecord , len ) ;
gotlen + = len ;
buffer + = len ;
continue ;
1999-09-27 17:48:12 +02:00
}
2000-12-28 14:00:29 +01:00
if ( record - > xl_len ! = ( subrecord - > xl_len + gotlen ) )
1999-09-27 17:48:12 +02:00
{
2000-12-28 14:00:29 +01:00
elog ( emode , " ReadRecord: invalid len %u of constracted record in logfile %u seg %u off %u " ,
subrecord - > xl_len + gotlen , readId , readSeg , readOff ) ;
goto next_record_is_invalid ;
1999-09-27 17:48:12 +02:00
}
2000-12-28 14:00:29 +01:00
memcpy ( buffer , ( char * ) subrecord + SizeOfXLogSubRecord , subrecord - > xl_len ) ;
1999-09-27 17:48:12 +02:00
break ;
}
2000-12-28 14:00:29 +01:00
if ( ! RecordIsValid ( record , * RecPtr , emode ) )
goto next_record_is_invalid ;
2000-10-21 17:43:36 +02:00
if ( BLCKSZ - SizeOfXLogRecord > = MAXALIGN ( subrecord - > xl_len ) +
2000-06-02 12:20:27 +02:00
SizeOfXLogPHD + SizeOfXLogSubRecord )
1999-09-27 17:48:12 +02:00
{
2000-06-02 12:20:27 +02:00
nextRecord = ( XLogRecord * ) ( ( char * ) subrecord +
2000-10-21 17:43:36 +02:00
MAXALIGN ( subrecord - > xl_len ) + SizeOfXLogSubRecord ) ;
1999-09-27 17:48:12 +02:00
}
EndRecPtr . xlogid = readId ;
2000-04-12 19:17:23 +02:00
EndRecPtr . xrecoff = readSeg * XLogSegSize + readOff * BLCKSZ +
2000-06-02 12:20:27 +02:00
SizeOfXLogPHD + SizeOfXLogSubRecord +
2000-10-21 17:43:36 +02:00
MAXALIGN ( subrecord - > xl_len ) ;
1999-09-27 17:48:12 +02:00
ReadRecPtr = * RecPtr ;
2000-04-12 19:17:23 +02:00
return ( record ) ;
1999-09-27 17:48:12 +02:00
}
2000-12-28 14:00:29 +01:00
if ( ! RecordIsValid ( record , * RecPtr , emode ) )
goto next_record_is_invalid ;
2000-10-21 17:43:36 +02:00
if ( BLCKSZ - SizeOfXLogRecord > = MAXALIGN ( record - > xl_len ) +
2000-06-02 12:20:27 +02:00
RecPtr - > xrecoff % BLCKSZ + SizeOfXLogRecord )
nextRecord = ( XLogRecord * ) ( ( char * ) record +
2000-10-21 17:43:36 +02:00
MAXALIGN ( record - > xl_len ) + SizeOfXLogRecord ) ;
1999-09-27 17:48:12 +02:00
EndRecPtr . xlogid = RecPtr - > xlogid ;
2000-06-02 12:20:27 +02:00
EndRecPtr . xrecoff = RecPtr - > xrecoff +
2000-10-21 17:43:36 +02:00
MAXALIGN ( record - > xl_len ) + SizeOfXLogRecord ;
1999-09-27 17:48:12 +02:00
ReadRecPtr = * RecPtr ;
2000-04-12 19:17:23 +02:00
return ( record ) ;
1999-09-27 17:48:12 +02:00
next_record_is_invalid : ;
close ( readFile ) ;
readFile = - 1 ;
nextRecord = NULL ;
memset ( buffer , 0 , SizeOfXLogRecord ) ;
2000-04-12 19:17:23 +02:00
record = ( XLogRecord * ) buffer ;
1999-09-27 17:48:12 +02:00
/*
* If we assumed that next record began on the same page where
* previous one ended - zero end of page .
*/
if ( XLByteEQ ( tmpRecPtr , EndRecPtr ) )
{
2000-04-12 19:17:23 +02:00
Assert ( EndRecPtr . xrecoff % BLCKSZ > ( SizeOfXLogPHD + SizeOfXLogSubRecord ) & &
BLCKSZ - EndRecPtr . xrecoff % BLCKSZ > = SizeOfXLogRecord ) ;
1999-09-27 17:48:12 +02:00
readId = EndRecPtr . xlogid ;
readSeg = EndRecPtr . xrecoff / XLogSegSize ;
readOff = ( EndRecPtr . xrecoff % XLogSegSize ) / BLCKSZ ;
2000-02-15 04:00:37 +01:00
elog ( LOG , " Formatting logfile %u seg %u block %u at offset %u " ,
2000-04-12 19:17:23 +02:00
readId , readSeg , readOff , EndRecPtr . xrecoff % BLCKSZ ) ;
1999-09-27 17:48:12 +02:00
readFile = XLogFileOpen ( readId , readSeg , false ) ;
2000-04-12 19:17:23 +02:00
if ( lseek ( readFile , ( off_t ) ( readOff * BLCKSZ ) , SEEK_SET ) < 0 )
2000-11-21 23:27:26 +01:00
elog ( STOP , " ReadRecord: lseek(logfile %u seg %u off %u) failed: %m " ,
readId , readSeg , readOff ) ;
1999-09-27 17:48:12 +02:00
if ( read ( readFile , readBuf , BLCKSZ ) ! = BLCKSZ )
2000-11-21 23:27:26 +01:00
elog ( STOP , " ReadRecord: read(logfile %u seg %u off %u) failed: %m " ,
readId , readSeg , readOff ) ;
2000-04-12 19:17:23 +02:00
memset ( readBuf + EndRecPtr . xrecoff % BLCKSZ , 0 ,
BLCKSZ - EndRecPtr . xrecoff % BLCKSZ ) ;
if ( lseek ( readFile , ( off_t ) ( readOff * BLCKSZ ) , SEEK_SET ) < 0 )
2000-11-21 23:27:26 +01:00
elog ( STOP , " ReadRecord: lseek(logfile %u seg %u off %u) failed: %m " ,
readId , readSeg , readOff ) ;
1999-09-27 17:48:12 +02:00
if ( write ( readFile , readBuf , BLCKSZ ) ! = BLCKSZ )
2000-11-21 23:27:26 +01:00
elog ( STOP , " ReadRecord: write(logfile %u seg %u off %u) failed: %m " ,
readId , readSeg , readOff ) ;
1999-09-27 17:48:12 +02:00
readOff + + ;
}
else
{
2000-04-12 19:17:23 +02:00
Assert ( EndRecPtr . xrecoff % BLCKSZ = = 0 | |
BLCKSZ - EndRecPtr . xrecoff % BLCKSZ < SizeOfXLogRecord ) ;
1999-09-27 17:48:12 +02:00
readId = tmpRecPtr . xlogid ;
readSeg = tmpRecPtr . xrecoff / XLogSegSize ;
readOff = ( tmpRecPtr . xrecoff % XLogSegSize ) / BLCKSZ ;
1999-10-06 23:58:18 +02:00
Assert ( readOff > 0 ) ;
1999-09-27 17:48:12 +02:00
}
if ( readOff > 0 )
{
1999-10-06 23:58:18 +02:00
if ( ! XLByteEQ ( tmpRecPtr , EndRecPtr ) )
2000-02-15 04:00:37 +01:00
elog ( LOG , " Formatting logfile %u seg %u block %u at offset 0 " ,
2000-04-12 19:17:23 +02:00
readId , readSeg , readOff ) ;
1999-09-27 17:48:12 +02:00
readOff * = BLCKSZ ;
memset ( readBuf , 0 , BLCKSZ ) ;
readFile = XLogFileOpen ( readId , readSeg , false ) ;
2000-04-12 19:17:23 +02:00
if ( lseek ( readFile , ( off_t ) readOff , SEEK_SET ) < 0 )
2000-11-21 23:27:26 +01:00
elog ( STOP , " ReadRecord: lseek(logfile %u seg %u off %u) failed: %m " ,
readId , readSeg , readOff ) ;
1999-09-27 17:48:12 +02:00
while ( readOff < XLogSegSize )
{
if ( write ( readFile , readBuf , BLCKSZ ) ! = BLCKSZ )
2000-11-21 23:27:26 +01:00
elog ( STOP , " ReadRecord: write(logfile %u seg %u off %u) failed: %m " ,
readId , readSeg , readOff ) ;
1999-09-27 17:48:12 +02:00
readOff + = BLCKSZ ;
}
}
if ( readFile > = 0 )
{
2000-12-08 23:21:33 +01:00
if ( pg_fsync ( readFile ) < 0 )
2000-11-21 23:27:26 +01:00
elog ( STOP , " ReadRecord: fsync(logfile %u seg %u) failed: %m " ,
readId , readSeg ) ;
1999-09-27 17:48:12 +02:00
close ( readFile ) ;
readFile = - 1 ;
}
readId = EndRecPtr . xlogid ;
readSeg = ( EndRecPtr . xrecoff - 1 ) / XLogSegSize + 1 ;
elog ( LOG , " The last logId/logSeg is (%u, %u) " , readId , readSeg - 1 ) ;
if ( ControlFile - > logId ! = readId | | ControlFile - > logSeg ! = readSeg )
{
elog ( LOG , " Set logId/logSeg in control file " ) ;
ControlFile - > logId = readId ;
ControlFile - > logSeg = readSeg ;
ControlFile - > time = time ( NULL ) ;
UpdateControlFile ( ) ;
}
if ( readSeg = = XLogLastSeg )
{
readSeg = 0 ;
readId + + ;
}
{
2000-04-12 19:17:23 +02:00
char path [ MAXPGPATH ] ;
1999-09-27 17:48:12 +02:00
XLogFileName ( path , readId , readSeg ) ;
unlink ( path ) ;
}
2000-04-12 19:17:23 +02:00
return ( record ) ;
1999-09-27 17:48:12 +02:00
}
2000-11-25 21:33:54 +01:00
/*
* I / O routines for pg_control
*
* * ControlFile is a buffer in shared memory that holds an image of the
* contents of pg_control . WriteControlFile ( ) initializes pg_control
* given a preloaded buffer , ReadControlFile ( ) loads the buffer from
* the pg_control file ( during postmaster or standalone - backend startup ) ,
* and UpdateControlFile ( ) rewrites pg_control after we modify xlog state .
*
* For simplicity , WriteControlFile ( ) initializes the fields of pg_control
* that are related to checking backend / database compatibility , and
* ReadControlFile ( ) verifies they are correct . We could split out the
* I / O and compatibility - check functions , but there seems no need currently .
*/
void
XLOGPathInit ( void )
{
/* Init XLOG file paths */
snprintf ( XLogDir , MAXPGPATH , " %s/pg_xlog " , DataDir ) ;
snprintf ( ControlFilePath , MAXPGPATH , " %s/global/pg_control " , DataDir ) ;
}
static void
WriteControlFile ( void )
{
int fd ;
char buffer [ BLCKSZ ] ;
# ifdef USE_LOCALE
char * localeptr ;
# endif
/*
* Initialize compatibility - check fields
*/
ControlFile - > blcksz = BLCKSZ ;
ControlFile - > relseg_size = RELSEG_SIZE ;
ControlFile - > catalog_version_no = CATALOG_VERSION_NO ;
# ifdef USE_LOCALE
localeptr = setlocale ( LC_COLLATE , NULL ) ;
if ( ! localeptr )
elog ( STOP , " Invalid LC_COLLATE setting " ) ;
StrNCpy ( ControlFile - > lc_collate , localeptr , LOCALE_NAME_BUFLEN ) ;
localeptr = setlocale ( LC_CTYPE , NULL ) ;
if ( ! localeptr )
elog ( STOP , " Invalid LC_CTYPE setting " ) ;
StrNCpy ( ControlFile - > lc_ctype , localeptr , LOCALE_NAME_BUFLEN ) ;
/*
* Issue warning notice if initdb ' ing in a locale that will not permit
* LIKE index optimization . This is not a clean place to do it , but
* I don ' t see a better place either . . .
*/
if ( ! locale_is_like_safe ( ) )
elog ( NOTICE , " Initializing database with %s collation order. "
" \n \t This locale setting will prevent use of index optimization for "
" \n \t LIKE and regexp searches. If you are concerned about speed of "
" \n \t such queries, you may wish to set LC_COLLATE to \" C \" and "
" \n \t re-initdb. For more information see the Administrator's Guide. " ,
ControlFile - > lc_collate ) ;
# else
strcpy ( ControlFile - > lc_collate , " C " ) ;
strcpy ( ControlFile - > lc_ctype , " C " ) ;
# endif
/*
* We write out BLCKSZ bytes into pg_control , zero - padding the
* excess over sizeof ( ControlFileData ) . This reduces the odds
* of premature - EOF errors when reading pg_control . We ' ll still
* fail when we check the contents of the file , but hopefully with
* a more specific error than " couldn't read pg_control " .
*/
if ( sizeof ( ControlFileData ) > BLCKSZ )
elog ( STOP , " sizeof(ControlFileData) is too large ... fix xlog.c " ) ;
2000-12-28 14:00:29 +01:00
INIT_CRC64 ( ControlFile - > crc ) ;
COMP_CRC64 ( ControlFile - > crc ,
( ( char * ) ControlFile + offsetof ( ControlFileData , logId ) ) ,
( sizeof ( ControlFileData ) - offsetof ( ControlFileData , logId ) ) ) ;
FIN_CRC64 ( ControlFile - > crc ) ;
2000-11-25 21:33:54 +01:00
memset ( buffer , 0 , BLCKSZ ) ;
memcpy ( buffer , ControlFile , sizeof ( ControlFileData ) ) ;
fd = BasicOpenFile ( ControlFilePath , O_RDWR | O_CREAT | O_EXCL | PG_BINARY , S_IRUSR | S_IWUSR ) ;
if ( fd < 0 )
elog ( STOP , " WriteControlFile failed to create control file (%s): %m " ,
ControlFilePath ) ;
if ( write ( fd , buffer , BLCKSZ ) ! = BLCKSZ )
elog ( STOP , " WriteControlFile failed to write control file: %m " ) ;
2000-12-08 23:21:33 +01:00
if ( pg_fsync ( fd ) ! = 0 )
2000-11-25 21:33:54 +01:00
elog ( STOP , " WriteControlFile failed to fsync control file: %m " ) ;
close ( fd ) ;
}
static void
ReadControlFile ( void )
{
2000-12-28 14:00:29 +01:00
crc64 crc ;
2000-11-25 21:33:54 +01:00
int fd ;
/*
* Read data . . .
*/
fd = BasicOpenFile ( ControlFilePath , O_RDWR | PG_BINARY , S_IRUSR | S_IWUSR ) ;
if ( fd < 0 )
elog ( STOP , " open( \" %s \" ) failed: %m " , ControlFilePath ) ;
if ( read ( fd , ControlFile , sizeof ( ControlFileData ) ) ! = sizeof ( ControlFileData ) )
elog ( STOP , " read( \" %s \" ) failed: %m " , ControlFilePath ) ;
close ( fd ) ;
2000-12-28 14:00:29 +01:00
INIT_CRC64 ( crc ) ;
COMP_CRC64 ( crc ,
( ( char * ) ControlFile + offsetof ( ControlFileData , logId ) ) ,
( sizeof ( ControlFileData ) - offsetof ( ControlFileData , logId ) ) ) ;
FIN_CRC64 ( crc ) ;
if ( crc . crc1 ! = ControlFile - > crc . crc1 | | crc . crc2 ! = ControlFile - > crc . crc2 )
elog ( STOP , " Invalid CRC in control file " ) ;
2000-11-25 21:33:54 +01:00
/*
* Do compatibility checking immediately . We do this here for 2 reasons :
*
* ( 1 ) if the database isn ' t compatible with the backend executable ,
* we want to abort before we can possibly do any damage ;
*
* ( 2 ) this code is executed in the postmaster , so the setlocale ( ) will
* propagate to forked backends , which aren ' t going to read this file
* for themselves . ( These locale settings are considered critical
* compatibility items because they can affect sort order of indexes . )
*/
if ( ControlFile - > blcksz ! = BLCKSZ )
elog ( STOP , " database was initialized with BLCKSZ %d, \n \t but the backend was compiled with BLCKSZ %d. \n \t looks like you need to initdb. " ,
ControlFile - > blcksz , BLCKSZ ) ;
if ( ControlFile - > relseg_size ! = RELSEG_SIZE )
elog ( STOP , " database was initialized with RELSEG_SIZE %d, \n \t but the backend was compiled with RELSEG_SIZE %d. \n \t looks like you need to initdb. " ,
ControlFile - > relseg_size , RELSEG_SIZE ) ;
if ( ControlFile - > catalog_version_no ! = CATALOG_VERSION_NO )
elog ( STOP , " database was initialized with CATALOG_VERSION_NO %d, \n \t but the backend was compiled with CATALOG_VERSION_NO %d. \n \t looks like you need to initdb. " ,
ControlFile - > catalog_version_no , CATALOG_VERSION_NO ) ;
# ifdef USE_LOCALE
if ( setlocale ( LC_COLLATE , ControlFile - > lc_collate ) = = NULL )
elog ( STOP , " database was initialized with LC_COLLATE '%s', \n \t which is not recognized by setlocale(). \n \t looks like you need to initdb. " ,
ControlFile - > lc_collate ) ;
if ( setlocale ( LC_CTYPE , ControlFile - > lc_ctype ) = = NULL )
elog ( STOP , " database was initialized with LC_CTYPE '%s', \n \t which is not recognized by setlocale(). \n \t looks like you need to initdb. " ,
ControlFile - > lc_ctype ) ;
# else
if ( strcmp ( ControlFile - > lc_collate , " C " ) ! = 0 | |
strcmp ( ControlFile - > lc_ctype , " C " ) ! = 0 )
elog ( STOP , " database was initialized with LC_COLLATE '%s' and LC_CTYPE '%s', \n \t but the backend was compiled without locale support. \n \t looks like you need to initdb or recompile. " ,
ControlFile - > lc_collate , ControlFile - > lc_ctype ) ;
# endif
}
1999-10-06 23:58:18 +02:00
void
2000-11-25 21:33:54 +01:00
UpdateControlFile ( void )
1999-10-06 23:58:18 +02:00
{
2000-04-12 19:17:23 +02:00
int fd ;
1999-10-06 23:58:18 +02:00
2000-12-28 14:00:29 +01:00
INIT_CRC64 ( ControlFile - > crc ) ;
COMP_CRC64 ( ControlFile - > crc ,
( ( char * ) ControlFile + offsetof ( ControlFileData , logId ) ) ,
( sizeof ( ControlFileData ) - offsetof ( ControlFileData , logId ) ) ) ;
FIN_CRC64 ( ControlFile - > crc ) ;
2000-06-02 17:57:44 +02:00
fd = BasicOpenFile ( ControlFilePath , O_RDWR | PG_BINARY , S_IRUSR | S_IWUSR ) ;
1999-10-06 23:58:18 +02:00
if ( fd < 0 )
2000-11-25 21:33:54 +01:00
elog ( STOP , " open( \" %s \" ) failed: %m " , ControlFilePath ) ;
1999-10-06 23:58:18 +02:00
2000-11-25 21:33:54 +01:00
if ( write ( fd , ControlFile , sizeof ( ControlFileData ) ) ! = sizeof ( ControlFileData ) )
2000-11-21 23:27:26 +01:00
elog ( STOP , " write(cntlfile) failed: %m " ) ;
1999-10-06 23:58:18 +02:00
2000-12-08 23:21:33 +01:00
if ( pg_fsync ( fd ) ! = 0 )
2000-11-21 23:27:26 +01:00
elog ( STOP , " fsync(cntlfile) failed: %m " ) ;
1999-10-06 23:58:18 +02:00
close ( fd ) ;
}
2000-11-25 21:33:54 +01:00
/*
* Management of shared memory for XLOG
*/
1999-10-06 23:58:18 +02:00
int
2000-11-21 22:16:06 +01:00
XLOGShmemSize ( void )
1999-10-06 23:58:18 +02:00
{
if ( XLOGbuffers < MinXLOGbuffers )
XLOGbuffers = MinXLOGbuffers ;
2000-04-12 19:17:23 +02:00
return ( sizeof ( XLogCtlData ) + BLCKSZ * XLOGbuffers +
2000-11-25 21:33:54 +01:00
sizeof ( XLogRecPtr ) * XLOGbuffers +
sizeof ( ControlFileData ) ) ;
1999-10-06 23:58:18 +02:00
}
void
XLOGShmemInit ( void )
{
2000-04-12 19:17:23 +02:00
bool found ;
1999-10-06 23:58:18 +02:00
2000-11-25 21:33:54 +01:00
/* this must agree with space requested by XLOGShmemSize() */
1999-10-06 23:58:18 +02:00
if ( XLOGbuffers < MinXLOGbuffers )
XLOGbuffers = MinXLOGbuffers ;
2000-04-12 19:17:23 +02:00
XLogCtl = ( XLogCtlData * )
ShmemInitStruct ( " XLOG Ctl " , sizeof ( XLogCtlData ) + BLCKSZ * XLOGbuffers +
1999-10-06 23:58:18 +02:00
sizeof ( XLogRecPtr ) * XLOGbuffers , & found ) ;
Assert ( ! found ) ;
2000-11-25 21:33:54 +01:00
ControlFile = ( ControlFileData * )
ShmemInitStruct ( " Control File " , sizeof ( ControlFileData ) , & found ) ;
Assert ( ! found ) ;
/*
* If we are not in bootstrap mode , pg_control should already exist .
* Read and validate it immediately ( see comments in ReadControlFile ( )
* for the reasons why ) .
*/
if ( ! IsBootstrapProcessingMode ( ) )
ReadControlFile ( ) ;
1999-10-06 23:58:18 +02:00
}
/*
* This func must be called ONCE on system install
*/
void
BootStrapXLOG ( )
{
2000-04-12 19:17:23 +02:00
CheckPoint checkPoint ;
2000-11-25 21:33:54 +01:00
char buffer [ BLCKSZ ] ;
2000-11-16 07:16:00 +01:00
bool usexistent = false ;
2000-04-12 19:17:23 +02:00
XLogPageHeader page = ( XLogPageHeader ) buffer ;
XLogRecord * record ;
2000-12-28 14:00:29 +01:00
crc64 crc ;
1999-10-06 23:58:18 +02:00
checkPoint . redo . xlogid = 0 ;
checkPoint . redo . xrecoff = SizeOfXLogPHD ;
checkPoint . undo = checkPoint . redo ;
checkPoint . nextXid = FirstTransactionId ;
2000-04-12 19:17:23 +02:00
checkPoint . nextOid = BootstrapObjectIdData ;
2000-10-21 17:43:36 +02:00
checkPoint . ThisStartUpID = 0 ;
2000-11-21 11:17:57 +01:00
checkPoint . Shutdown = true ;
1999-10-06 23:58:18 +02:00
2000-11-21 03:11:06 +01:00
ShmemVariableCache - > nextXid = checkPoint . nextXid ;
ShmemVariableCache - > nextOid = checkPoint . nextOid ;
ShmemVariableCache - > oidCount = 0 ;
1999-10-06 23:58:18 +02:00
memset ( buffer , 0 , BLCKSZ ) ;
page - > xlp_magic = XLOG_PAGE_MAGIC ;
page - > xlp_info = 0 ;
2000-04-12 19:17:23 +02:00
record = ( XLogRecord * ) ( ( char * ) page + SizeOfXLogPHD ) ;
record - > xl_prev . xlogid = 0 ;
record - > xl_prev . xrecoff = 0 ;
1999-10-06 23:58:18 +02:00
record - > xl_xact_prev = record - > xl_prev ;
record - > xl_xid = InvalidTransactionId ;
record - > xl_len = sizeof ( checkPoint ) ;
record - > xl_info = 0 ;
record - > xl_rmid = RM_XLOG_ID ;
2000-04-12 19:17:23 +02:00
memcpy ( ( char * ) record + SizeOfXLogRecord , & checkPoint , sizeof ( checkPoint ) ) ;
1999-10-06 23:58:18 +02:00
2000-12-28 14:00:29 +01:00
INIT_CRC64 ( crc ) ;
COMP_CRC64 ( crc , ( ( char * ) & checkPoint ) , sizeof ( checkPoint ) ) ;
COMP_CRC64 ( crc , ( ( char * ) record + offsetof ( XLogRecord , xl_prev ) ) ,
( SizeOfXLogRecord - offsetof ( XLogRecord , xl_prev ) ) ) ;
FIN_CRC64 ( crc ) ;
record - > xl_crc = crc ;
2000-11-05 23:50:21 +01:00
logFile = XLogFileInit ( 0 , 0 , & usexistent ) ;
2000-03-20 08:25:39 +01:00
1999-10-06 23:58:18 +02:00
if ( write ( logFile , buffer , BLCKSZ ) ! = BLCKSZ )
2000-11-21 23:27:26 +01:00
elog ( STOP , " BootStrapXLOG failed to write logfile: %m " ) ;
1999-10-06 23:58:18 +02:00
2000-12-08 23:21:33 +01:00
if ( pg_fsync ( logFile ) ! = 0 )
2000-11-21 23:27:26 +01:00
elog ( STOP , " BootStrapXLOG failed to fsync logfile: %m " ) ;
1999-10-06 23:58:18 +02:00
close ( logFile ) ;
logFile = - 1 ;
2000-11-25 21:33:54 +01:00
memset ( ControlFile , 0 , sizeof ( ControlFileData ) ) ;
1999-10-06 23:58:18 +02:00
ControlFile - > logId = 0 ;
ControlFile - > logSeg = 1 ;
ControlFile - > checkPoint = checkPoint . redo ;
ControlFile - > time = time ( NULL ) ;
ControlFile - > state = DB_SHUTDOWNED ;
2000-11-25 21:33:54 +01:00
/* some additional ControlFile fields are set in WriteControlFile() */
1999-10-06 23:58:18 +02:00
2000-11-25 21:33:54 +01:00
WriteControlFile ( ) ;
1999-10-06 23:58:18 +02:00
}
2000-04-12 19:17:23 +02:00
static char *
1999-10-06 23:58:18 +02:00
str_time ( time_t tnow )
{
2000-11-21 23:27:26 +01:00
static char buf [ 20 ] ;
1999-10-06 23:58:18 +02:00
2000-11-21 23:27:26 +01:00
strftime ( buf , sizeof ( buf ) ,
" %Y-%m-%d %H:%M:%S " ,
localtime ( & tnow ) ) ;
1999-10-06 23:58:18 +02:00
2000-11-21 23:27:26 +01:00
return buf ;
1999-10-06 23:58:18 +02:00
}
/*
* This func must be called ONCE on system startup
*/
void
StartupXLOG ( )
{
2000-04-12 19:17:23 +02:00
XLogCtlInsert * Insert ;
CheckPoint checkPoint ;
XLogRecPtr RecPtr ,
LastRec ;
XLogRecord * record ;
2000-12-28 14:00:29 +01:00
char buffer [ _INTL_MAXLOGRECSZ + SizeOfXLogRecord ] ;
2000-04-12 19:17:23 +02:00
2000-11-21 23:27:26 +01:00
elog ( LOG , " starting up " ) ;
2000-12-18 01:44:50 +01:00
CritSectionCount + + ;
1999-10-06 23:58:18 +02:00
2000-04-12 19:17:23 +02:00
XLogCtl - > xlblocks = ( XLogRecPtr * ) ( ( ( char * ) XLogCtl ) + sizeof ( XLogCtlData ) ) ;
XLogCtl - > pages = ( ( char * ) XLogCtl - > xlblocks + sizeof ( XLogRecPtr ) * XLOGbuffers ) ;
1999-10-06 23:58:18 +02:00
XLogCtl - > XLogCacheByte = BLCKSZ * XLOGbuffers ;
XLogCtl - > XLogCacheBlck = XLOGbuffers - 1 ;
memset ( XLogCtl - > xlblocks , 0 , sizeof ( XLogRecPtr ) * XLOGbuffers ) ;
XLogCtl - > LgwrRqst = LgwrRqst ;
XLogCtl - > LgwrResult = LgwrResult ;
XLogCtl - > Insert . LgwrResult = LgwrResult ;
XLogCtl - > Insert . curridx = 0 ;
XLogCtl - > Insert . currpage = ( XLogPageHeader ) ( XLogCtl - > pages ) ;
XLogCtl - > Write . LgwrResult = LgwrResult ;
XLogCtl - > Write . curridx = 0 ;
S_INIT_LOCK ( & ( XLogCtl - > insert_lck ) ) ;
S_INIT_LOCK ( & ( XLogCtl - > info_lck ) ) ;
S_INIT_LOCK ( & ( XLogCtl - > lgwr_lck ) ) ;
2000-11-05 23:50:21 +01:00
S_INIT_LOCK ( & ( XLogCtl - > chkp_lck ) ) ;
1999-10-06 23:58:18 +02:00
/*
2000-11-25 21:33:54 +01:00
* Read control file and check XLOG status looks valid .
*
* Note : in most control paths , * ControlFile is already valid and we
* need not do ReadControlFile ( ) here , but might as well do it to be sure .
1999-10-06 23:58:18 +02:00
*/
2000-11-25 21:33:54 +01:00
ReadControlFile ( ) ;
1999-10-06 23:58:18 +02:00
2000-04-12 19:17:23 +02:00
if ( ControlFile - > logSeg = = 0 | |
ControlFile - > time < = 0 | |
ControlFile - > state < DB_SHUTDOWNED | |
ControlFile - > state > DB_IN_PRODUCTION | |
1999-10-06 23:58:18 +02:00
! XRecOffIsValid ( ControlFile - > checkPoint . xrecoff ) )
2000-11-21 23:27:26 +01:00
elog ( STOP , " control file context is broken " ) ;
1999-10-06 23:58:18 +02:00
if ( ControlFile - > state = = DB_SHUTDOWNED )
2000-11-21 23:27:26 +01:00
elog ( LOG , " database system was shut down at %s " ,
2000-04-12 19:17:23 +02:00
str_time ( ControlFile - > time ) ) ;
1999-10-06 23:58:18 +02:00
else if ( ControlFile - > state = = DB_SHUTDOWNING )
2000-11-21 23:27:26 +01:00
elog ( LOG , " database system shutdown was interrupted at %s " ,
2000-04-12 19:17:23 +02:00
str_time ( ControlFile - > time ) ) ;
1999-10-06 23:58:18 +02:00
else if ( ControlFile - > state = = DB_IN_RECOVERY )
2000-11-21 23:27:26 +01:00
elog ( LOG , " database system was interrupted being in recovery at %s \n "
2000-04-12 19:17:23 +02:00
" \t This propably means that some data blocks are corrupted \n "
2000-11-21 23:27:26 +01:00
" \t and you will have to use last backup for recovery. " ,
2000-04-12 19:17:23 +02:00
str_time ( ControlFile - > time ) ) ;
1999-10-06 23:58:18 +02:00
else if ( ControlFile - > state = = DB_IN_PRODUCTION )
2000-11-21 23:27:26 +01:00
elog ( LOG , " database system was interrupted at %s " ,
2000-04-12 19:17:23 +02:00
str_time ( ControlFile - > time ) ) ;
1999-10-06 23:58:18 +02:00
LastRec = RecPtr = ControlFile - > checkPoint ;
if ( ! XRecOffIsValid ( RecPtr . xrecoff ) )
elog ( STOP , " Invalid checkPoint in control file " ) ;
elog ( LOG , " CheckPoint record at (%u, %u) " , RecPtr . xlogid , RecPtr . xrecoff ) ;
record = ReadRecord ( & RecPtr , buffer ) ;
if ( record - > xl_rmid ! = RM_XLOG_ID )
elog ( STOP , " Invalid RMID in checkPoint record " ) ;
if ( record - > xl_len ! = sizeof ( checkPoint ) )
elog ( STOP , " Invalid length of checkPoint record " ) ;
2000-04-12 19:17:23 +02:00
checkPoint = * ( ( CheckPoint * ) ( ( char * ) record + SizeOfXLogRecord ) ) ;
1999-10-06 23:58:18 +02:00
2000-10-24 11:56:23 +02:00
elog ( LOG , " Redo record at (%u, %u); Undo record at (%u, %u); Shutdown %s " ,
2000-04-12 19:17:23 +02:00
checkPoint . redo . xlogid , checkPoint . redo . xrecoff ,
2000-10-24 11:56:23 +02:00
checkPoint . undo . xlogid , checkPoint . undo . xrecoff ,
( checkPoint . Shutdown ) ? " TRUE " : " FALSE " ) ;
1999-10-06 23:58:18 +02:00
elog ( LOG , " NextTransactionId: %u; NextOid: %u " ,
2000-04-12 19:17:23 +02:00
checkPoint . nextXid , checkPoint . nextOid ) ;
if ( checkPoint . nextXid < FirstTransactionId | |
1999-10-06 23:58:18 +02:00
checkPoint . nextOid < BootstrapObjectIdData )
elog ( STOP , " Invalid NextTransactionId/NextOid " ) ;
ShmemVariableCache - > nextXid = checkPoint . nextXid ;
ShmemVariableCache - > nextOid = checkPoint . nextOid ;
2000-11-03 12:39:36 +01:00
ShmemVariableCache - > oidCount = 0 ;
1999-10-06 23:58:18 +02:00
2000-10-21 17:43:36 +02:00
ThisStartUpID = checkPoint . ThisStartUpID ;
2000-12-28 14:00:29 +01:00
RedoRecPtr = XLogCtl - > Insert . RedoRecPtr =
XLogCtl - > RedoRecPtr = checkPoint . redo ;
2000-10-21 17:43:36 +02:00
1999-10-06 23:58:18 +02:00
if ( XLByteLT ( RecPtr , checkPoint . redo ) )
elog ( STOP , " Invalid redo in checkPoint record " ) ;
if ( checkPoint . undo . xrecoff = = 0 )
checkPoint . undo = RecPtr ;
if ( XLByteLT ( RecPtr , checkPoint . undo ) )
elog ( STOP , " Invalid undo in checkPoint record " ) ;
2000-10-24 11:56:23 +02:00
if ( XLByteLT ( checkPoint . undo , RecPtr ) | |
XLByteLT ( checkPoint . redo , RecPtr ) )
1999-10-06 23:58:18 +02:00
{
2000-10-24 11:56:23 +02:00
if ( checkPoint . Shutdown )
elog ( STOP , " Invalid Redo/Undo record in shutdown checkpoint " ) ;
1999-10-06 23:58:18 +02:00
if ( ControlFile - > state = = DB_SHUTDOWNED )
2000-11-21 23:27:26 +01:00
elog ( STOP , " Invalid Redo/Undo record in shut down state " ) ;
2000-10-28 18:21:00 +02:00
InRecovery = true ;
1999-10-06 23:58:18 +02:00
}
else if ( ControlFile - > state ! = DB_SHUTDOWNED )
2000-10-24 11:56:23 +02:00
{
2000-10-28 18:21:00 +02:00
InRecovery = true ;
2000-10-24 11:56:23 +02:00
}
1999-10-06 23:58:18 +02:00
2000-10-28 18:21:00 +02:00
/* REDO */
if ( InRecovery )
1999-10-06 23:58:18 +02:00
{
2000-11-21 23:27:26 +01:00
elog ( LOG , " database system was not properly shut down; "
" automatic recovery in progress... " ) ;
1999-10-06 23:58:18 +02:00
ControlFile - > state = DB_IN_RECOVERY ;
ControlFile - > time = time ( NULL ) ;
UpdateControlFile ( ) ;
2000-10-24 11:56:23 +02:00
XLogOpenLogRelation ( ) ; /* open pg_log */
2000-10-28 18:21:00 +02:00
XLogInitRelationCache ( ) ;
2000-10-24 11:56:23 +02:00
1999-10-06 23:58:18 +02:00
/* Is REDO required ? */
if ( XLByteLT ( checkPoint . redo , RecPtr ) )
record = ReadRecord ( & ( checkPoint . redo ) , buffer ) ;
2000-12-28 14:00:29 +01:00
else /* read past CheckPoint record */
1999-10-06 23:58:18 +02:00
record = ReadRecord ( NULL , buffer ) ;
if ( record - > xl_len ! = 0 )
{
2000-10-28 18:21:00 +02:00
InRedo = true ;
2000-11-21 23:27:26 +01:00
elog ( LOG , " redo starts at (%u, %u) " ,
2000-04-12 19:17:23 +02:00
ReadRecPtr . xlogid , ReadRecPtr . xrecoff ) ;
1999-10-06 23:58:18 +02:00
do
{
if ( record - > xl_xid > = ShmemVariableCache - > nextXid )
ShmemVariableCache - > nextXid = record - > xl_xid + 1 ;
2000-10-21 17:43:36 +02:00
if ( XLOG_DEBUG )
{
char buf [ 8192 ] ;
2000-10-23 06:10:24 +02:00
sprintf ( buf , " REDO @ %u/%u; LSN %u/%u: " ,
ReadRecPtr . xlogid , ReadRecPtr . xrecoff ,
EndRecPtr . xlogid , EndRecPtr . xrecoff ) ;
2000-10-21 17:43:36 +02:00
xlog_outrec ( buf , record ) ;
strcat ( buf , " - " ) ;
RmgrTable [ record - > xl_rmid ] . rm_desc ( buf ,
record - > xl_info , XLogRecGetData ( record ) ) ;
strcat ( buf , " \n " ) ;
write ( 2 , buf , strlen ( buf ) ) ;
}
2000-12-28 14:00:29 +01:00
if ( record - > xl_info & ( XLR_BKP_BLOCK_1 | XLR_BKP_BLOCK_2 ) )
RestoreBkpBlocks ( record , EndRecPtr ) ;
1999-10-06 23:58:18 +02:00
RmgrTable [ record - > xl_rmid ] . rm_redo ( EndRecPtr , record ) ;
record = ReadRecord ( NULL , buffer ) ;
} while ( record - > xl_len ! = 0 ) ;
2000-11-21 23:27:26 +01:00
elog ( LOG , " redo done at (%u, %u) " ,
2000-04-12 19:17:23 +02:00
ReadRecPtr . xlogid , ReadRecPtr . xrecoff ) ;
1999-10-06 23:58:18 +02:00
LastRec = ReadRecPtr ;
2000-10-28 18:21:00 +02:00
InRedo = false ;
1999-10-06 23:58:18 +02:00
}
else
2000-11-21 23:27:26 +01:00
elog ( LOG , " redo is not required " ) ;
2000-10-28 18:21:00 +02:00
}
/* Init xlog buffer cache */
record = ReadRecord ( & LastRec , buffer ) ;
logId = EndRecPtr . xlogid ;
logSeg = ( EndRecPtr . xrecoff - 1 ) / XLogSegSize ;
logOff = 0 ;
logFile = XLogFileOpen ( logId , logSeg , false ) ;
XLogCtl - > xlblocks [ 0 ] . xlogid = logId ;
XLogCtl - > xlblocks [ 0 ] . xrecoff =
( ( EndRecPtr . xrecoff - 1 ) / BLCKSZ + 1 ) * BLCKSZ ;
Insert = & XLogCtl - > Insert ;
memcpy ( ( char * ) ( Insert - > currpage ) , readBuf , BLCKSZ ) ;
Insert - > currpos = ( ( char * ) Insert - > currpage ) +
( EndRecPtr . xrecoff + BLCKSZ - XLogCtl - > xlblocks [ 0 ] . xrecoff ) ;
Insert - > PrevRecord = LastRec ;
LgwrRqst . Write = LgwrRqst . Flush =
LgwrResult . Write = LgwrResult . Flush = EndRecPtr ;
XLogCtl - > Write . LgwrResult = LgwrResult ;
Insert - > LgwrResult = LgwrResult ;
XLogCtl - > LgwrRqst = LgwrRqst ;
XLogCtl - > LgwrResult = LgwrResult ;
1999-10-06 23:58:18 +02:00
2000-10-24 11:56:23 +02:00
# ifdef NOT_USED
2000-10-28 18:21:00 +02:00
/* UNDO */
if ( InRecovery )
{
1999-10-06 23:58:18 +02:00
RecPtr = ReadRecPtr ;
if ( XLByteLT ( checkPoint . undo , RecPtr ) )
{
2000-11-21 23:27:26 +01:00
elog ( LOG , " undo starts at (%u, %u) " ,
2000-04-12 19:17:23 +02:00
RecPtr . xlogid , RecPtr . xrecoff ) ;
1999-10-06 23:58:18 +02:00
do
{
record = ReadRecord ( & RecPtr , buffer ) ;
2000-04-12 19:17:23 +02:00
if ( TransactionIdIsValid ( record - > xl_xid ) & &
1999-10-06 23:58:18 +02:00
! TransactionIdDidCommit ( record - > xl_xid ) )
2000-07-04 03:49:44 +02:00
RmgrTable [ record - > xl_rmid ] . rm_undo ( EndRecPtr , record ) ;
1999-10-06 23:58:18 +02:00
RecPtr = record - > xl_prev ;
} while ( XLByteLE ( checkPoint . undo , RecPtr ) ) ;
2000-11-21 23:27:26 +01:00
elog ( LOG , " undo done at (%u, %u) " ,
2000-04-12 19:17:23 +02:00
ReadRecPtr . xlogid , ReadRecPtr . xrecoff ) ;
1999-10-06 23:58:18 +02:00
}
else
2000-11-21 23:27:26 +01:00
elog ( LOG , " undo is not required " ) ;
1999-10-06 23:58:18 +02:00
}
2000-10-28 18:21:00 +02:00
# endif
1999-10-06 23:58:18 +02:00
2000-10-28 18:21:00 +02:00
if ( InRecovery )
1999-10-06 23:58:18 +02:00
{
CreateCheckPoint ( true ) ;
2000-10-28 18:21:00 +02:00
XLogCloseRelationCache ( ) ;
1999-10-06 23:58:18 +02:00
}
2000-10-28 18:21:00 +02:00
InRecovery = false ;
1999-10-06 23:58:18 +02:00
ControlFile - > state = DB_IN_PRODUCTION ;
ControlFile - > time = time ( NULL ) ;
UpdateControlFile ( ) ;
2000-10-21 17:43:36 +02:00
ThisStartUpID + + ;
XLogCtl - > ThisStartUpID = ThisStartUpID ;
2000-11-21 23:27:26 +01:00
elog ( LOG , " database system is in production state " ) ;
2000-12-18 01:44:50 +01:00
CritSectionCount - - ;
1999-10-06 23:58:18 +02:00
return ;
}
2000-10-21 17:43:36 +02:00
/*
2000-12-28 14:00:29 +01:00
* Postmaster uses it to set ThisStartUpID & RedoRecPtr from
* XLogCtlData located in shmem after successful startup .
2000-10-21 17:43:36 +02:00
*/
void
SetThisStartUpID ( void )
{
ThisStartUpID = XLogCtl - > ThisStartUpID ;
2000-12-28 14:00:29 +01:00
RedoRecPtr = XLogCtl - > RedoRecPtr ;
}
/*
* CheckPoint - er called by postmaster creates copy of RedoRecPtr
* for postmaster in shmem . Postmaster uses GetRedoRecPtr after
* that to update its own copy of RedoRecPtr .
*/
void
SetRedoRecPtr ( void )
{
XLogCtl - > RedoRecPtr = RedoRecPtr ;
}
void
GetRedoRecPtr ( void )
{
RedoRecPtr = XLogCtl - > RedoRecPtr ;
2000-10-21 17:43:36 +02:00
}
1999-10-06 23:58:18 +02:00
/*
* This func must be called ONCE on system shutdown
*/
void
ShutdownXLOG ( )
{
2000-11-21 23:27:26 +01:00
elog ( LOG , " shutting down " ) ;
1999-10-06 23:58:18 +02:00
2000-12-18 01:44:50 +01:00
CritSectionCount + + ;
2000-11-09 12:26:00 +01:00
CreateDummyCaches ( ) ;
1999-10-06 23:58:18 +02:00
CreateCheckPoint ( true ) ;
2000-12-18 01:44:50 +01:00
CritSectionCount - - ;
1999-10-06 23:58:18 +02:00
2000-11-21 23:27:26 +01:00
elog ( LOG , " database system is shut down " ) ;
1999-10-06 23:58:18 +02:00
}
2000-11-05 23:50:21 +01:00
extern XLogRecPtr GetUndoRecPtr ( void ) ;
1999-09-27 17:48:12 +02:00
void
CreateCheckPoint ( bool shutdown )
{
2000-04-12 19:17:23 +02:00
CheckPoint checkPoint ;
XLogRecPtr recptr ;
XLogCtlInsert * Insert = & XLogCtl - > Insert ;
2000-12-28 14:00:29 +01:00
XLogRecData rdata ;
2000-04-12 19:17:23 +02:00
uint32 freespace ;
uint16 curridx ;
2000-11-05 23:50:21 +01:00
uint32 _logId ;
uint32 _logSeg ;
char archdir [ MAXPGPATH ] ;
2000-12-29 22:31:21 +01:00
unsigned spins = 0 ;
2000-11-05 23:50:21 +01:00
if ( MyLastRecPtr . xrecoff ! = 0 )
elog ( ERROR , " CreateCheckPoint: cannot be called inside transaction block " ) ;
2000-12-03 11:27:29 +01:00
START_CRIT_CODE ;
2000-12-29 22:31:21 +01:00
/* Grab lock, using larger than normal sleep between tries (1 sec) */
2000-11-05 23:50:21 +01:00
while ( TAS ( & ( XLogCtl - > chkp_lck ) ) )
{
2000-12-29 22:31:21 +01:00
S_LOCK_SLEEP_INTERVAL ( & ( XLogCtl - > chkp_lck ) , spins + + , 1000000 ) ;
2000-11-05 23:50:21 +01:00
}
1999-09-27 17:48:12 +02:00
memset ( & checkPoint , 0 , sizeof ( checkPoint ) ) ;
if ( shutdown )
{
ControlFile - > state = DB_SHUTDOWNING ;
ControlFile - > time = time ( NULL ) ;
UpdateControlFile ( ) ;
}
2000-10-21 17:43:36 +02:00
checkPoint . ThisStartUpID = ThisStartUpID ;
checkPoint . Shutdown = shutdown ;
1999-09-27 17:48:12 +02:00
/* Get REDO record ptr */
2000-12-29 22:31:21 +01:00
S_LOCK ( & ( XLogCtl - > insert_lck ) ) ;
2000-04-12 19:17:23 +02:00
freespace = ( ( char * ) Insert - > currpage ) + BLCKSZ - Insert - > currpos ;
1999-09-27 17:48:12 +02:00
if ( freespace < SizeOfXLogRecord )
{
curridx = NextBufIdx ( Insert - > curridx ) ;
if ( XLByteLE ( XLogCtl - > xlblocks [ curridx ] , LgwrResult . Write ) )
InitXLBuffer ( curridx ) ;
2000-04-12 19:17:23 +02:00
else
1999-09-27 17:48:12 +02:00
GetFreeXLBuffer ( ) ;
freespace = BLCKSZ - SizeOfXLogPHD ;
}
else
curridx = Insert - > curridx ;
checkPoint . redo . xlogid = XLogCtl - > xlblocks [ curridx ] . xlogid ;
2000-04-12 19:17:23 +02:00
checkPoint . redo . xrecoff = XLogCtl - > xlblocks [ curridx ] . xrecoff - BLCKSZ +
Insert - > currpos - ( ( char * ) Insert - > currpage ) ;
2000-12-28 14:00:29 +01:00
RedoRecPtr = XLogCtl - > Insert . RedoRecPtr = checkPoint . redo ;
1999-09-27 17:48:12 +02:00
S_UNLOCK ( & ( XLogCtl - > insert_lck ) ) ;
SpinAcquire ( XidGenLockId ) ;
checkPoint . nextXid = ShmemVariableCache - > nextXid ;
SpinRelease ( XidGenLockId ) ;
SpinAcquire ( OidGenLockId ) ;
checkPoint . nextOid = ShmemVariableCache - > nextOid ;
2000-11-03 12:39:36 +01:00
if ( ! shutdown )
checkPoint . nextOid + = ShmemVariableCache - > oidCount ;
1999-09-27 17:48:12 +02:00
SpinRelease ( OidGenLockId ) ;
1999-09-28 13:41:09 +02:00
FlushBufferPool ( ) ;
1999-09-27 17:48:12 +02:00
2000-10-24 11:56:23 +02:00
/* Get UNDO record ptr - should use oldest of PROC->logRec */
2000-11-05 23:50:21 +01:00
checkPoint . undo = GetUndoRecPtr ( ) ;
1999-09-27 17:48:12 +02:00
if ( shutdown & & checkPoint . undo . xrecoff ! = 0 )
elog ( STOP , " Active transaction while data base is shutting down " ) ;
2000-12-28 14:00:29 +01:00
rdata . buffer = InvalidBuffer ;
rdata . data = ( char * ) ( & checkPoint ) ;
rdata . len = sizeof ( checkPoint ) ;
rdata . next = NULL ;
recptr = XLogInsert ( RM_XLOG_ID , XLOG_CHECKPOINT , & rdata ) ;
1999-09-27 17:48:12 +02:00
if ( shutdown & & ! XLByteEQ ( checkPoint . redo , MyLastRecPtr ) )
elog ( STOP , " XLog concurrent activity while data base is shutting down " ) ;
XLogFlush ( recptr ) ;
SpinAcquire ( ControlFileLockId ) ;
if ( shutdown )
ControlFile - > state = DB_SHUTDOWNED ;
2000-11-05 23:50:21 +01:00
else /* create new log file */
{
if ( recptr . xrecoff % XLogSegSize > =
( uint32 ) ( 0.75 * XLogSegSize ) )
{
int lf ;
bool usexistent = true ;
_logId = recptr . xlogid ;
_logSeg = recptr . xrecoff / XLogSegSize ;
if ( _logSeg > = XLogLastSeg )
{
_logId + + ;
_logSeg = 0 ;
}
else
_logSeg + + ;
lf = XLogFileInit ( _logId , _logSeg , & usexistent ) ;
close ( lf ) ;
}
}
1999-09-27 17:48:12 +02:00
ControlFile - > checkPoint = MyLastRecPtr ;
2000-11-05 23:50:21 +01:00
_logId = ControlFile - > logId ;
_logSeg = ControlFile - > logSeg - 1 ;
strcpy ( archdir , ControlFile - > archdir ) ;
1999-09-27 17:48:12 +02:00
ControlFile - > time = time ( NULL ) ;
UpdateControlFile ( ) ;
SpinRelease ( ControlFileLockId ) ;
2000-11-05 23:50:21 +01:00
/*
* Delete offline log files . Get oldest online
* log file from undo rec if it ' s valid .
*/
if ( checkPoint . undo . xrecoff ! = 0 )
{
_logId = checkPoint . undo . xlogid ;
_logSeg = checkPoint . undo . xrecoff / XLogSegSize ;
}
if ( _logId | | _logSeg )
{
if ( _logSeg )
_logSeg - - ;
else
{
_logId - - ;
_logSeg = 0 ;
}
MoveOfflineLogs ( archdir , _logId , _logSeg ) ;
}
S_UNLOCK ( & ( XLogCtl - > chkp_lck ) ) ;
MyLastRecPtr . xrecoff = 0 ; /* to avoid commit record */
2000-12-03 11:27:29 +01:00
END_CRIT_CODE ;
2000-11-05 23:50:21 +01:00
1999-09-27 17:48:12 +02:00
return ;
}
2000-10-21 17:43:36 +02:00
2000-11-03 12:39:36 +01:00
void XLogPutNextOid ( Oid nextOid ) ;
void
XLogPutNextOid ( Oid nextOid )
{
2000-12-28 14:00:29 +01:00
XLogRecData rdata ;
2000-11-03 12:39:36 +01:00
2000-12-28 14:00:29 +01:00
rdata . buffer = InvalidBuffer ;
rdata . data = ( char * ) ( & nextOid ) ;
rdata . len = sizeof ( Oid ) ;
rdata . next = NULL ;
( void ) XLogInsert ( RM_XLOG_ID , XLOG_NEXTOID , & rdata ) ;
}
2000-10-21 17:43:36 +02:00
void
xlog_redo ( XLogRecPtr lsn , XLogRecord * record )
{
2000-11-03 12:39:36 +01:00
uint8 info = record - > xl_info & ~ XLR_INFO_MASK ;
if ( info = = XLOG_NEXTOID )
{
Oid nextOid ;
memcpy ( & nextOid , XLogRecGetData ( record ) , sizeof ( Oid ) ) ;
if ( ShmemVariableCache - > nextOid < nextOid )
ShmemVariableCache - > nextOid = nextOid ;
}
2000-10-21 17:43:36 +02:00
}
void
xlog_undo ( XLogRecPtr lsn , XLogRecord * record )
{
}
void
xlog_desc ( char * buf , uint8 xl_info , char * rec )
{
uint8 info = xl_info & ~ XLR_INFO_MASK ;
if ( info = = XLOG_CHECKPOINT )
{
CheckPoint * checkpoint = ( CheckPoint * ) rec ;
sprintf ( buf + strlen ( buf ) , " checkpoint: redo %u/%u; undo %u/%u; "
" sui %u; xid %u; oid %u; %s " ,
checkpoint - > redo . xlogid , checkpoint - > redo . xrecoff ,
checkpoint - > undo . xlogid , checkpoint - > undo . xrecoff ,
checkpoint - > ThisStartUpID , checkpoint - > nextXid ,
checkpoint - > nextOid ,
( checkpoint - > Shutdown ) ? " shutdown " : " online " ) ;
}
2000-11-03 12:39:36 +01:00
else if ( info = = XLOG_NEXTOID )
{
Oid nextOid ;
memcpy ( & nextOid , rec , sizeof ( Oid ) ) ;
sprintf ( buf + strlen ( buf ) , " nextOid: %u " , nextOid ) ;
}
2000-10-21 17:43:36 +02:00
else
strcat ( buf , " UNKNOWN " ) ;
}
static void
xlog_outrec ( char * buf , XLogRecord * record )
{
2000-12-30 07:52:34 +01:00
int bkpb ;
int i ;
sprintf ( buf + strlen ( buf ) , " prev %u/%u; xprev %u/%u; xid %u " ,
2000-10-21 17:43:36 +02:00
record - > xl_prev . xlogid , record - > xl_prev . xrecoff ,
record - > xl_xact_prev . xlogid , record - > xl_xact_prev . xrecoff ,
2000-12-30 07:52:34 +01:00
record - > xl_xid ) ;
for ( i = 0 , bkpb = 0 ; i < 2 ; i + + )
{
if ( ! ( record - > xl_info & ( XLR_SET_BKP_BLOCK ( i ) ) ) )
continue ;
bkpb + + ;
}
if ( bkpb )
sprintf ( buf + strlen ( buf ) , " ; bkpb %d " , bkpb ) ;
sprintf ( buf + strlen ( buf ) , " : %s " ,
2000-10-21 17:43:36 +02:00
RmgrTable [ record - > xl_rmid ] . rm_name ) ;
}