Fix sloppy handling of corner-case errors in fd.c.

Several places in fd.c had badly-thought-through handling of error returns from lseek() and close(). The fact that those would seldom fail on valid FDs is probably the reason we've not noticed this up to now; but if they did fail, we'd get quite confused. LruDelete and LruInsert actually just Assert'd that lseek never fails, which is pretty awful on its face. In LruDelete, we indeed can't throw an error, because that's likely to get called during error abort and so throwing an error would probably just lead to an infinite loop. But by the same token, throwing an error from the close() right after that was ill-advised, not to mention that it would've left the LRU state corrupted since we'd already unlinked the VFD from the list. I also noticed that really, most of the time, we should know the current seek position and it shouldn't be necessary to do an lseek here at all. As patched, if we don't have a seek position and an lseek attempt doesn't give us one, we'll close the file but then subsequent re-open attempts will fail (except in the somewhat-unlikely case that a FileSeek(SEEK_SET) call comes between and allows us to re-establish a known target seek position). This isn't great but it won't result in any state corruption. Meanwhile, having an Assert instead of an honest test in LruInsert is really dangerous: if that lseek failed, a subsequent read or write would read or write from the start of the file, not where the caller expected, leading to data corruption. In both LruDelete and FileClose, if close() fails, just LOG that and mark the VFD closed anyway. Possibly leaking an FD is preferable to getting into an infinite loop or corrupting the VFD list. Besides, as far as I can tell from the POSIX spec, it's unspecified whether or not the file has been closed, so treating it as still open could be the wrong thing anyhow. I also fixed a number of other places that were being sloppy about behaving correctly when the seekPos is unknown. Also, I changed FileSeek to return -1 with EINVAL for the cases where it detects a bad offset, rather than throwing a hard elog(ERROR). It seemed pretty inconsistent that some bad-offset cases would get a failure return while others got elog(ERROR). It was missing an offset validity check for the SEEK_CUR case on a closed file, too. Back-patch to all supported branches, since all this code is fundamentally identical in all of them. Discussion: https://postgr.es/m/2982.1487617365@sss.pgh.pa.us
2017-02-21 17:51:27 -05:00 · 2017-02-21 17:51:27 -05:00 · f97de05a14
parent 30820982b2
commit f97de05a14
1 changed files with 136 additions and 59 deletions
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@ -160,7 +160,14 @@ int			max_safe_fds = 32;	/* default if not changed */

 #define FileIsNotOpen(file) (VfdCache[file].fd == VFD_CLOSED)

+/*
+ * Note: a VFD's seekPos is normally always valid, but if for some reason
+ * an lseek() fails, it might become set to FileUnknownPos.  We can struggle
+ * along without knowing the seek position in many cases, but in some places
+ * we have to fail if we don't have it.
+ */
 #define FileUnknownPos ((off_t) -1)
+#define FilePosIsUnknown(pos) ((pos) < 0)

 /* these are the assigned bits in fdstate below: */
 #define FD_TEMPORARY		(1 << 0)	/* T = delete when closed */
@ -174,7 +181,7 @@ typedef struct vfd
 	File		nextFree;		/* link to next free VFD, if in freelist */
 	File		lruMoreRecently;	/* doubly linked recency-of-use list */
 	File		lruLessRecently;
-	off_t		seekPos;		/* current logical file position */
+	off_t		seekPos;		/* current logical file position, or -1 */
 	off_t		fileSize;		/* current size of file (0 if not temporary) */
 	char	   *fileName;		/* name of file, or NULL for unused VFD */
 	/* NB: fileName is malloc'd, and must be free'd when closing the VFD */
@ -967,19 +974,33 @@ LruDelete(File file)

 	vfdP = &VfdCache[file];

+	/*
+	 * Normally we should know the seek position, but if for some reason we
+	 * have lost track of it, try again to get it.  If we still can't get it,
+	 * we have a problem: we will be unable to restore the file seek position
+	 * when and if the file is re-opened.  But we can't really throw an error
+	 * and refuse to close the file, or activities such as transaction cleanup
+	 * will be broken.
+	 */
+	if (FilePosIsUnknown(vfdP->seekPos))
+	{
+		vfdP->seekPos = lseek(vfdP->fd, (off_t) 0, SEEK_CUR);
+		if (FilePosIsUnknown(vfdP->seekPos))
+			elog(LOG, "could not seek file \"%s\" before closing: %m",
+				 vfdP->fileName);
+	}
+
+	/*
+	 * Close the file.  We aren't expecting this to fail; if it does, better
+	 * to leak the FD than to mess up our internal state.
+	 */
+	if (close(vfdP->fd))
+		elog(LOG, "could not close file \"%s\": %m", vfdP->fileName);
+	vfdP->fd = VFD_CLOSED;
+	--nfile;
+
 	/* delete the vfd record from the LRU ring */
 	Delete(file);
-
-	/* save the seek position */
-	vfdP->seekPos = lseek(vfdP->fd, (off_t) 0, SEEK_CUR);
-	Assert(vfdP->seekPos != (off_t) -1);
-
-	/* close the file */
-	if (close(vfdP->fd))
-		elog(ERROR, "could not close file \"%s\": %m", vfdP->fileName);
-
-	--nfile;
-	vfdP->fd = VFD_CLOSED;
 }

 static void
@ -1030,22 +1051,39 @@ LruInsert(File file)
 								 vfdP->fileMode);
 		if (vfdP->fd < 0)
 		{
-			DO_DB(elog(LOG, "RE_OPEN FAILED: %d", errno));
+			DO_DB(elog(LOG, "re-open failed: %m"));
 			return -1;
 		}
 		else
 		{
-			DO_DB(elog(LOG, "RE_OPEN SUCCESS"));
 			++nfile;
 		}

-		/* seek to the right position */
+		/*
+		 * Seek to the right position.  We need no special case for seekPos
+		 * equal to FileUnknownPos, as lseek() will certainly reject that
+		 * (thus completing the logic noted in LruDelete() that we will fail
+		 * to re-open a file if we couldn't get its seek position before
+		 * closing).
+		 */
 		if (vfdP->seekPos != (off_t) 0)
 		{
-			off_t returnValue PG_USED_FOR_ASSERTS_ONLY;
+			if (lseek(vfdP->fd, vfdP->seekPos, SEEK_SET) < 0)
+			{
+				/*
+				 * If we fail to restore the seek position, treat it like an
+				 * open() failure.
+				 */
+				int			save_errno = errno;

-			returnValue = lseek(vfdP->fd, vfdP->seekPos, SEEK_SET);
-			Assert(returnValue != (off_t) -1);
+				elog(LOG, "could not seek file \"%s\" after re-opening: %m",
+					 vfdP->fileName);
+				(void) close(vfdP->fd);
+				vfdP->fd = VFD_CLOSED;
+				--nfile;
+				errno = save_errno;
+				return -1;
+			}
 		}
 	}

@ -1428,15 +1466,15 @@ FileClose(File file)

 	if (!FileIsNotOpen(file))
 	{
-		/* remove the file from the lru ring */
-		Delete(file);
-
 		/* close the file */
 		if (close(vfdP->fd))
-			elog(ERROR, "could not close file \"%s\": %m", vfdP->fileName);
+			elog(LOG, "could not close file \"%s\": %m", vfdP->fileName);

 		--nfile;
 		vfdP->fd = VFD_CLOSED;
+
+		/* remove the file from the lru ring */
+		Delete(file);
 	}

 	/*
@ -1566,6 +1604,7 @@ int
 FileRead(File file, char *buffer, int amount)
 {
 	int			returnCode;
+	Vfd		   *vfdP;

 	Assert(FileIsValid(file));

@ -1578,11 +1617,17 @@ FileRead(File file, char *buffer, int amount)
 	if (returnCode < 0)
 		return returnCode;

+	vfdP = &VfdCache[file];
+
 retry:
-	returnCode = read(VfdCache[file].fd, buffer, amount);
+	returnCode = read(vfdP->fd, buffer, amount);

 	if (returnCode >= 0)
-		VfdCache[file].seekPos += returnCode;
+	{
+		/* if seekPos is unknown, leave it that way */
+		if (!FilePosIsUnknown(vfdP->seekPos))
+			vfdP->seekPos += returnCode;
+	}
 	else
 	{
 		/*
@ -1611,7 +1656,7 @@ retry:
 			goto retry;

 		/* Trouble, so assume we don't know the file position anymore */
-		VfdCache[file].seekPos = FileUnknownPos;
+		vfdP->seekPos = FileUnknownPos;
 	}

 	return returnCode;
@ -1621,6 +1666,7 @@ int
 FileWrite(File file, char *buffer, int amount)
 {
 	int			returnCode;
+	Vfd		   *vfdP;

 	Assert(FileIsValid(file));

@ -1633,6 +1679,8 @@ FileWrite(File file, char *buffer, int amount)
 	if (returnCode < 0)
 		return returnCode;

+	vfdP = &VfdCache[file];
+
 	/*
 	 * If enforcing temp_file_limit and it's a temp file, check to see if the
 	 * write would overrun temp_file_limit, and throw error if so.  Note: it's
@ -1641,15 +1689,28 @@ FileWrite(File file, char *buffer, int amount)
 	 * message if we do that.  All current callers would just throw error
 	 * immediately anyway, so this is safe at present.
 	 */
-	if (temp_file_limit >= 0 && (VfdCache[file].fdstate & FD_TEMPORARY))
+	if (temp_file_limit >= 0 && (vfdP->fdstate & FD_TEMPORARY))
 	{
-		off_t		newPos = VfdCache[file].seekPos + amount;
+		off_t		newPos;

-		if (newPos > VfdCache[file].fileSize)
+		/*
+		 * Normally we should know the seek position, but if for some reason
+		 * we have lost track of it, try again to get it.  Here, it's fine to
+		 * throw an error if we still can't get it.
+		 */
+		if (FilePosIsUnknown(vfdP->seekPos))
+		{
+			vfdP->seekPos = lseek(vfdP->fd, (off_t) 0, SEEK_CUR);
+			if (FilePosIsUnknown(vfdP->seekPos))
+				elog(ERROR, "could not seek file \"%s\": %m", vfdP->fileName);
+		}
+
+		newPos = vfdP->seekPos + amount;
+		if (newPos > vfdP->fileSize)
 		{
 			uint64		newTotal = temporary_files_size;

-			newTotal += newPos - VfdCache[file].fileSize;
+			newTotal += newPos - vfdP->fileSize;
 			if (newTotal > (uint64) temp_file_limit * (uint64) 1024)
 				ereport(ERROR,
 						(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
@ -1660,7 +1721,7 @@ FileWrite(File file, char *buffer, int amount)

 retry:
 	errno = 0;
-	returnCode = write(VfdCache[file].fd, buffer, amount);
+	returnCode = write(vfdP->fd, buffer, amount);

 	/* if write didn't set errno, assume problem is no disk space */
 	if (returnCode != amount && errno == 0)
@ -1668,17 +1729,25 @@ retry:

 	if (returnCode >= 0)
 	{
-		VfdCache[file].seekPos += returnCode;
+		/* if seekPos is unknown, leave it that way */
+		if (!FilePosIsUnknown(vfdP->seekPos))
+			vfdP->seekPos += returnCode;

-		/* maintain fileSize and temporary_files_size if it's a temp file */
-		if (VfdCache[file].fdstate & FD_TEMPORARY)
+		/*
+		 * Maintain fileSize and temporary_files_size if it's a temp file.
+		 *
+		 * If seekPos is -1 (unknown), this will do nothing; but we could only
+		 * get here in that state if we're not enforcing temporary_files_size,
+		 * so we don't care.
+		 */
+		if (vfdP->fdstate & FD_TEMPORARY)
 		{
-			off_t		newPos = VfdCache[file].seekPos;
+			off_t		newPos = vfdP->seekPos;

-			if (newPos > VfdCache[file].fileSize)
+			if (newPos > vfdP->fileSize)
 			{
-				temporary_files_size += newPos - VfdCache[file].fileSize;
-				VfdCache[file].fileSize = newPos;
+				temporary_files_size += newPos - vfdP->fileSize;
+				vfdP->fileSize = newPos;
 			}
 		}
 	}
@ -1706,7 +1775,7 @@ retry:
 			goto retry;

 		/* Trouble, so assume we don't know the file position anymore */
-		VfdCache[file].seekPos = FileUnknownPos;
+		vfdP->seekPos = FileUnknownPos;
 	}

 	return returnCode;
@ -1732,7 +1801,7 @@ FileSync(File file)
 off_t
 FileSeek(File file, off_t offset, int whence)
 {
-	int			returnCode;
+	Vfd		   *vfdP;

 	Assert(FileIsValid(file));

@ -1741,25 +1810,33 @@ FileSeek(File file, off_t offset, int whence)
 			   (int64) VfdCache[file].seekPos,
 			   (int64) offset, whence));

+	vfdP = &VfdCache[file];
+
 	if (FileIsNotOpen(file))
 	{
 		switch (whence)
 		{
 			case SEEK_SET:
 				if (offset < 0)
-					elog(ERROR, "invalid seek offset: " INT64_FORMAT,
-						 (int64) offset);
-				VfdCache[file].seekPos = offset;
+				{
+					errno = EINVAL;
+					return (off_t) -1;
+				}
+				vfdP->seekPos = offset;
 				break;
 			case SEEK_CUR:
-				VfdCache[file].seekPos += offset;
+				if (FilePosIsUnknown(vfdP->seekPos) ||
+					vfdP->seekPos + offset < 0)
+				{
+					errno = EINVAL;
+					return (off_t) -1;
+				}
+				vfdP->seekPos += offset;
 				break;
 			case SEEK_END:
-				returnCode = FileAccess(file);
-				if (returnCode < 0)
-					return returnCode;
-				VfdCache[file].seekPos = lseek(VfdCache[file].fd,
-											   offset, whence);
+				if (FileAccess(file) < 0)
+					return (off_t) -1;
+				vfdP->seekPos = lseek(vfdP->fd, offset, whence);
 				break;
 			default:
 				elog(ERROR, "invalid whence: %d", whence);
@ -1772,27 +1849,27 @@ FileSeek(File file, off_t offset, int whence)
 		{
 			case SEEK_SET:
 				if (offset < 0)
-					elog(ERROR, "invalid seek offset: " INT64_FORMAT,
-						 (int64) offset);
-				if (VfdCache[file].seekPos != offset)
-					VfdCache[file].seekPos = lseek(VfdCache[file].fd,
-												   offset, whence);
+				{
+					errno = EINVAL;
+					return (off_t) -1;
+				}
+				if (vfdP->seekPos != offset)
+					vfdP->seekPos = lseek(vfdP->fd, offset, whence);
 				break;
 			case SEEK_CUR:
-				if (offset != 0 || VfdCache[file].seekPos == FileUnknownPos)
-					VfdCache[file].seekPos = lseek(VfdCache[file].fd,
-												   offset, whence);
+				if (offset != 0 || FilePosIsUnknown(vfdP->seekPos))
+					vfdP->seekPos = lseek(vfdP->fd, offset, whence);
 				break;
 			case SEEK_END:
-				VfdCache[file].seekPos = lseek(VfdCache[file].fd,
-											   offset, whence);
+				vfdP->seekPos = lseek(vfdP->fd, offset, whence);
 				break;
 			default:
 				elog(ERROR, "invalid whence: %d", whence);
 				break;
 		}
 	}
-	return VfdCache[file].seekPos;
+
+	return vfdP->seekPos;
 }

 /*