Tweak smgrblindwrt per advice from Vadim: add parameter indicating

whether to do fsync or not, and if so (which should be seldom) just
do the fsync immediately.  This way we need not build data structures
in md.c/fd.c for blind writes.
This commit is contained in:
Tom Lane 2000-04-10 23:41:52 +00:00
parent a447ae2221
commit 2692d329eb
5 changed files with 95 additions and 135 deletions

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.78 2000/04/09 04:43:18 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.79 2000/04/10 23:41:49 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -1127,7 +1127,8 @@ BufferSync()
bufHdr->blind.relname, bufHdr->blind.relname,
bufdb, bufrel, bufdb, bufrel,
bufHdr->tag.blockNum, bufHdr->tag.blockNum,
(char *) MAKE_PTR(bufHdr->data)); (char *) MAKE_PTR(bufHdr->data),
true); /* must fsync */
} }
else else
{ {
@ -1529,7 +1530,8 @@ BufferReplace(BufferDesc *bufHdr)
status = smgrblindwrt(DEFAULT_SMGR, bufHdr->blind.dbname, status = smgrblindwrt(DEFAULT_SMGR, bufHdr->blind.dbname,
bufHdr->blind.relname, bufdb, bufrel, bufHdr->blind.relname, bufdb, bufrel,
bufHdr->tag.blockNum, bufHdr->tag.blockNum,
(char *) MAKE_PTR(bufHdr->data)); (char *) MAKE_PTR(bufHdr->data),
false); /* no fsync */
} }
#ifndef OPTIMIZE_SINGLE #ifndef OPTIMIZE_SINGLE
@ -1544,8 +1546,10 @@ BufferReplace(BufferDesc *bufHdr)
return FALSE; return FALSE;
/* If we had marked this buffer as needing to be fsync'd, we can forget /* If we had marked this buffer as needing to be fsync'd, we can forget
* about that, because it's now the storage manager's responsibility. * about that, because it's now the storage manager's responsibility
* (but only if we called smgrwrite, not smgrblindwrt).
*/ */
if (reln != (Relation) NULL)
ClearBufferDirtiedByMe(BufferDescriptorGetBuffer(bufHdr), bufHdr); ClearBufferDirtiedByMe(BufferDescriptorGetBuffer(bufHdr), bufHdr);
BufferFlushCount++; BufferFlushCount++;

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.65 2000/04/09 04:43:20 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.66 2000/04/10 23:41:51 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -48,11 +48,10 @@
typedef struct _MdfdVec typedef struct _MdfdVec
{ {
int mdfd_vfd; /* fd number in vfd pool */ int mdfd_vfd; /* fd number in vfd pool */
int mdfd_flags; /* free, temporary */ int mdfd_flags; /* fd status flags */
/* these are the assigned bits in mdfd_flags: */ /* these are the assigned bits in mdfd_flags: */
#define MDFD_FREE (1 << 0)/* unused entry */ #define MDFD_FREE (1 << 0)/* unused entry */
#define MDFD_TEMP (1 << 1)/* close this entry at transaction end */
int mdfd_lstbcnt; /* most recent block count */ int mdfd_lstbcnt; /* most recent block count */
int mdfd_nextFree; /* next free vector */ int mdfd_nextFree; /* next free vector */
@ -72,7 +71,7 @@ static void mdclose_fd(int fd);
static int _mdfd_getrelnfd(Relation reln); static int _mdfd_getrelnfd(Relation reln);
static MdfdVec *_mdfd_openseg(Relation reln, int segno, int oflags); static MdfdVec *_mdfd_openseg(Relation reln, int segno, int oflags);
static MdfdVec *_mdfd_getseg(Relation reln, int blkno); static MdfdVec *_mdfd_getseg(Relation reln, int blkno);
static MdfdVec *_mdfd_blind_getseg(char *dbname, char *relname, static int _mdfd_blind_getseg(char *dbname, char *relname,
Oid dbid, Oid relid, int blkno); Oid dbid, Oid relid, int blkno);
static int _fdvec_alloc(void); static int _fdvec_alloc(void);
static void _fdvec_free(int); static void _fdvec_free(int);
@ -572,7 +571,8 @@ mdflush(Relation reln, BlockNumber blocknum, char *buffer)
* *
* We have to be able to do this using only the name and OID of * We have to be able to do this using only the name and OID of
* the database and relation in which the block belongs. Otherwise * the database and relation in which the block belongs. Otherwise
* this is just like mdwrite(). * this is much like mdwrite(). If dofsync is TRUE, then we fsync
* the file, making it more like mdflush().
*/ */
int int
mdblindwrt(char *dbname, mdblindwrt(char *dbname,
@ -580,15 +580,16 @@ mdblindwrt(char *dbname,
Oid dbid, Oid dbid,
Oid relid, Oid relid,
BlockNumber blkno, BlockNumber blkno,
char *buffer) char *buffer,
bool dofsync)
{ {
int status; int status;
long seekpos; long seekpos;
MdfdVec *v; int fd;
v = _mdfd_blind_getseg(dbname, relname, dbid, relid, blkno); fd = _mdfd_blind_getseg(dbname, relname, dbid, relid, blkno);
if (v == NULL) if (fd < 0)
return SM_FAIL; return SM_FAIL;
#ifndef LET_OS_MANAGE_FILESIZE #ifndef LET_OS_MANAGE_FILESIZE
@ -601,11 +602,22 @@ mdblindwrt(char *dbname,
seekpos = (long) (BLCKSZ * (blkno)); seekpos = (long) (BLCKSZ * (blkno));
#endif #endif
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) if (lseek(fd, seekpos, SEEK_SET) != seekpos)
{
close(fd);
return SM_FAIL; return SM_FAIL;
}
status = SM_SUCCESS; status = SM_SUCCESS;
if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)
/* write and optionally sync the block */
if (write(fd, buffer, BLCKSZ) != BLCKSZ)
status = SM_FAIL;
else if (dofsync &&
pg_fsync(fd) < 0)
status = SM_FAIL;
if (close(fd) < 0)
status = SM_FAIL; status = SM_FAIL;
return status; return status;
@ -633,7 +645,8 @@ mdmarkdirty(Relation reln, BlockNumber blkno)
* *
* We have to be able to do this using only the name and OID of * We have to be able to do this using only the name and OID of
* the database and relation in which the block belongs. Otherwise * the database and relation in which the block belongs. Otherwise
* this is just like mdmarkdirty(). * this is much like mdmarkdirty(). However, we do the fsync immediately
* rather than building md/fd datastructures to postpone it till later.
*/ */
int int
mdblindmarkdirty(char *dbname, mdblindmarkdirty(char *dbname,
@ -642,16 +655,23 @@ mdblindmarkdirty(char *dbname,
Oid relid, Oid relid,
BlockNumber blkno) BlockNumber blkno)
{ {
MdfdVec *v; int status;
int fd;
v = _mdfd_blind_getseg(dbname, relname, dbid, relid, blkno); fd = _mdfd_blind_getseg(dbname, relname, dbid, relid, blkno);
if (v == NULL) if (fd < 0)
return SM_FAIL; return SM_FAIL;
FileMarkDirty(v->mdfd_vfd); status = SM_SUCCESS;
return SM_SUCCESS; if (pg_fsync(fd) < 0)
status = SM_FAIL;
if (close(fd) < 0)
status = SM_FAIL;
return status;
} }
/* /*
@ -820,15 +840,7 @@ mdcommit()
v = &Md_fdvec[i]; v = &Md_fdvec[i];
if (v->mdfd_flags & MDFD_FREE) if (v->mdfd_flags & MDFD_FREE)
continue; continue;
if (v->mdfd_flags & MDFD_TEMP) /* Sync the file entry */
{
/* Sync and close the file */
mdclose_fd(i);
}
else
{
/* Sync, but keep the file entry */
#ifndef LET_OS_MANAGE_FILESIZE #ifndef LET_OS_MANAGE_FILESIZE
for ( ; v != (MdfdVec *) NULL; v = v->mdfd_chain) for ( ; v != (MdfdVec *) NULL; v = v->mdfd_chain)
#else #else
@ -839,7 +851,6 @@ mdcommit()
return SM_FAIL; return SM_FAIL;
} }
} }
}
return SM_SUCCESS; return SM_SUCCESS;
#endif /* XLOG */ #endif /* XLOG */
@ -854,21 +865,9 @@ mdcommit()
int int
mdabort() mdabort()
{ {
int i; /* We don't actually have to do anything here. fd.c will discard
MdfdVec *v; * fsync-needed bits in its AtEOXact_Files() routine.
*/
for (i = 0; i < CurFd; i++)
{
v = &Md_fdvec[i];
if (v->mdfd_flags & MDFD_FREE)
continue;
if (v->mdfd_flags & MDFD_TEMP)
{
/* Close the file */
mdclose_fd(i);
}
}
return SM_SUCCESS; return SM_SUCCESS;
} }
@ -1057,102 +1056,52 @@ _mdfd_getseg(Relation reln, int blkno)
return v; return v;
} }
/* Find the segment of the relation holding the specified block. /*
* This is the same as _mdfd_getseg() except that we must work * Find the segment of the relation holding the specified block.
* "blind" with no Relation struct.
* *
* NOTE: we have no easy way to tell whether a FD already exists for the * This performs the same work as _mdfd_getseg() except that we must work
* target relation, so we always make a new one. This should probably * "blind" with no Relation struct. We assume that we are not likely to
* be improved somehow, but I doubt it's a significant performance issue * touch the same relation again soon, so we do not create an FD entry for
* under normal circumstances. The FD is marked to be closed at end of xact * the relation --- we just open a kernel file descriptor which will be
* so that we don't accumulate a lot of dead FDs. * used and promptly closed. The return value is the kernel descriptor,
* or -1 on failure.
*/ */
static MdfdVec * static int
_mdfd_blind_getseg(char *dbname, char *relname, Oid dbid, Oid relid, _mdfd_blind_getseg(char *dbname, char *relname, Oid dbid, Oid relid,
int blkno) int blkno)
{ {
MdfdVec *v;
char *path; char *path;
int fd; int fd;
int vfd;
#ifndef LET_OS_MANAGE_FILESIZE #ifndef LET_OS_MANAGE_FILESIZE
int segno; int segno;
int targsegno;
#endif #endif
/* construct the path to the file and open it */ /* construct the path to the relation */
path = relpath_blind(dbname, relname, dbid, relid); path = relpath_blind(dbname, relname, dbid, relid);
#ifndef __CYGWIN32__
fd = FileNameOpenFile(path, O_RDWR, 0600);
#else
fd = FileNameOpenFile(path, O_RDWR | O_BINARY, 0600);
#endif
if (fd < 0)
return NULL;
vfd = _fdvec_alloc();
if (vfd < 0)
return NULL;
Md_fdvec[vfd].mdfd_vfd = fd;
Md_fdvec[vfd].mdfd_flags = MDFD_TEMP;
Md_fdvec[vfd].mdfd_lstbcnt = _mdnblocks(fd, BLCKSZ);
#ifndef LET_OS_MANAGE_FILESIZE #ifndef LET_OS_MANAGE_FILESIZE
Md_fdvec[vfd].mdfd_chain = (MdfdVec *) NULL; /* append the '.segno', if needed */
segno = blkno / RELSEG_SIZE;
#ifdef DIAGNOSTIC if (segno > 0)
if (Md_fdvec[vfd].mdfd_lstbcnt > RELSEG_SIZE)
elog(FATAL, "segment too big on relopen!");
#endif
targsegno = blkno / RELSEG_SIZE;
for (v = &Md_fdvec[vfd], segno = 1; segno <= targsegno; segno++)
{ {
char *segpath; char *segpath = (char *) palloc(strlen(path) + 12);
MdfdVec *newv;
MemoryContext oldcxt;
segpath = (char *) palloc(strlen(path) + 12);
sprintf(segpath, "%s.%d", path, segno); sprintf(segpath, "%s.%d", path, segno);
pfree(path);
path = segpath;
}
#endif
#ifndef __CYGWIN32__ #ifndef __CYGWIN32__
fd = FileNameOpenFile(segpath, O_RDWR | O_CREAT, 0600); fd = open(path, O_RDWR, 0600);
#else #else
fd = FileNameOpenFile(segpath, O_RDWR | O_BINARY | O_CREAT, 0600); fd = open(path, O_RDWR | O_BINARY, 0600);
#endif
pfree(segpath);
if (fd < 0)
return (MdfdVec *) NULL;
/* allocate an mdfdvec entry for it */
oldcxt = MemoryContextSwitchTo(MdCxt);
newv = (MdfdVec *) palloc(sizeof(MdfdVec));
MemoryContextSwitchTo(oldcxt);
/* fill the entry */
newv->mdfd_vfd = fd;
newv->mdfd_flags = MDFD_TEMP;
newv->mdfd_lstbcnt = _mdnblocks(fd, BLCKSZ);
newv->mdfd_chain = (MdfdVec *) NULL;
#ifdef DIAGNOSTIC
if (newv->mdfd_lstbcnt > RELSEG_SIZE)
elog(FATAL, "segment too big on open!");
#endif
v->mdfd_chain = newv;
v = newv;
}
#else
v = &Md_fdvec[vfd];
#endif #endif
pfree(path); pfree(path);
return v; return fd;
} }
static BlockNumber static BlockNumber

View File

@ -11,7 +11,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.18 2000/01/26 05:57:05 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.19 2000/04/10 23:41:51 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -478,7 +478,8 @@ mmblindwrt(char *dbstr,
Oid dbid, Oid dbid,
Oid relid, Oid relid,
BlockNumber blkno, BlockNumber blkno,
char *buffer) char *buffer,
bool dofsync)
{ {
return SM_FAIL; return SM_FAIL;
} }

View File

@ -11,7 +11,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.33 2000/04/09 04:43:20 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.34 2000/04/10 23:41:52 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -38,7 +38,8 @@ typedef struct f_smgr
char *buffer); char *buffer);
int (*smgr_blindwrt) (char *dbname, char *relname, int (*smgr_blindwrt) (char *dbname, char *relname,
Oid dbid, Oid relid, Oid dbid, Oid relid,
BlockNumber blkno, char *buffer); BlockNumber blkno, char *buffer,
bool dofsync);
int (*smgr_markdirty) (Relation reln, BlockNumber blkno); int (*smgr_markdirty) (Relation reln, BlockNumber blkno);
int (*smgr_blindmarkdirty) (char *dbname, char *relname, int (*smgr_blindmarkdirty) (char *dbname, char *relname,
Oid dbid, Oid relid, Oid dbid, Oid relid,
@ -293,7 +294,8 @@ smgrflush(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
* this case, the buffer manager will call smgrblindwrt() with * this case, the buffer manager will call smgrblindwrt() with
* the name and OID of the database and the relation to which the * the name and OID of the database and the relation to which the
* buffer belongs. Every storage manager must be able to force * buffer belongs. Every storage manager must be able to force
* this page down to stable storage in this circumstance. * this page down to stable storage in this circumstance. The
* write should be synchronous if dofsync is true.
*/ */
int int
smgrblindwrt(int16 which, smgrblindwrt(int16 which,
@ -302,7 +304,8 @@ smgrblindwrt(int16 which,
Oid dbid, Oid dbid,
Oid relid, Oid relid,
BlockNumber blkno, BlockNumber blkno,
char *buffer) char *buffer,
bool dofsync)
{ {
char *dbstr; char *dbstr;
char *relstr; char *relstr;
@ -313,7 +316,7 @@ smgrblindwrt(int16 which,
relstr = pstrdup(relname); relstr = pstrdup(relname);
status = (*(smgrsw[which].smgr_blindwrt)) (dbstr, relstr, dbid, relid, status = (*(smgrsw[which].smgr_blindwrt)) (dbstr, relstr, dbid, relid,
blkno, buffer); blkno, buffer, dofsync);
if (status == SM_FAIL) if (status == SM_FAIL)
elog(ERROR, "cannot write block %d of %s [%s] blind", elog(ERROR, "cannot write block %d of %s [%s] blind",

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: smgr.h,v 1.18 2000/04/09 04:43:18 tgl Exp $ * $Id: smgr.h,v 1.19 2000/04/10 23:41:45 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -37,7 +37,8 @@ extern int smgrflush(int16 which, Relation reln, BlockNumber blocknum,
char *buffer); char *buffer);
extern int smgrblindwrt(int16 which, char *dbname, char *relname, extern int smgrblindwrt(int16 which, char *dbname, char *relname,
Oid dbid, Oid relid, Oid dbid, Oid relid,
BlockNumber blkno, char *buffer); BlockNumber blkno, char *buffer,
bool dofsync);
extern int smgrmarkdirty(int16 which, Relation reln, BlockNumber blkno); extern int smgrmarkdirty(int16 which, Relation reln, BlockNumber blkno);
extern int smgrblindmarkdirty(int16 which, char *dbname, char *relname, extern int smgrblindmarkdirty(int16 which, char *dbname, char *relname,
Oid dbid, Oid relid, Oid dbid, Oid relid,
@ -62,7 +63,8 @@ extern int mdread(Relation reln, BlockNumber blocknum, char *buffer);
extern int mdwrite(Relation reln, BlockNumber blocknum, char *buffer); extern int mdwrite(Relation reln, BlockNumber blocknum, char *buffer);
extern int mdflush(Relation reln, BlockNumber blocknum, char *buffer); extern int mdflush(Relation reln, BlockNumber blocknum, char *buffer);
extern int mdblindwrt(char *dbname, char *relname, Oid dbid, Oid relid, extern int mdblindwrt(char *dbname, char *relname, Oid dbid, Oid relid,
BlockNumber blkno, char *buffer); BlockNumber blkno, char *buffer,
bool dofsync);
extern int mdmarkdirty(Relation reln, BlockNumber blkno); extern int mdmarkdirty(Relation reln, BlockNumber blkno);
extern int mdblindmarkdirty(char *dbname, char *relname, Oid dbid, Oid relid, extern int mdblindmarkdirty(char *dbname, char *relname, Oid dbid, Oid relid,
BlockNumber blkno); BlockNumber blkno);
@ -84,7 +86,8 @@ extern int mmread(Relation reln, BlockNumber blocknum, char *buffer);
extern int mmwrite(Relation reln, BlockNumber blocknum, char *buffer); extern int mmwrite(Relation reln, BlockNumber blocknum, char *buffer);
extern int mmflush(Relation reln, BlockNumber blocknum, char *buffer); extern int mmflush(Relation reln, BlockNumber blocknum, char *buffer);
extern int mmblindwrt(char *dbname, char *relname, Oid dbid, Oid relid, extern int mmblindwrt(char *dbname, char *relname, Oid dbid, Oid relid,
BlockNumber blkno, char *buffer); BlockNumber blkno, char *buffer,
bool dofsync);
extern int mmmarkdirty(Relation reln, BlockNumber blkno); extern int mmmarkdirty(Relation reln, BlockNumber blkno);
extern int mmblindmarkdirty(char *dbname, char *relname, Oid dbid, Oid relid, extern int mmblindmarkdirty(char *dbname, char *relname, Oid dbid, Oid relid,
BlockNumber blkno); BlockNumber blkno);