Clean up assorted messiness around AllocateDir() usage.

This patch fixes a couple of low-probability bugs that could lead to
reporting an irrelevant errno value (and hence possibly a wrong SQLSTATE)
concerning directory-open or file-open failures.  It also fixes places
where we took shortcuts in reporting such errors, either by using elog
instead of ereport or by using ereport but forgetting to specify an
errcode.  And it eliminates a lot of just plain redundant error-handling
code.

In service of all this, export fd.c's formerly-static function
ReadDirExtended, so that external callers can make use of the coding
pattern

	dir = AllocateDir(path);
	while ((de = ReadDirExtended(dir, path, LOG)) != NULL)

if they'd like to treat directory-open failures as mere LOG conditions
rather than errors.  Also fix FreeDir to be a no-op if we reach it
with dir == NULL, as such a coding pattern would cause.

Then, remove code at many call sites that was throwing an error or log
message for AllocateDir failure, as ReadDir or ReadDirExtended can handle
that job just fine.  Aside from being a net code savings, this gets rid of
a lot of not-quite-up-to-snuff reports, as mentioned above.  (In some
places these changes result in replacing a custom error message such as
"could not open tablespace directory" with more generic wording "could not
open directory", but it was agreed that the custom wording buys little as
long as we report the directory name.)  In some other call sites where we
can't just remove code, change the error reports to be fully
project-style-compliant.

Also reorder code in restoreTwoPhaseData that was acquiring a lock
between AllocateDir and ReadDir; in the unlikely but surely not
impossible case that LWLockAcquire changes errno, AllocateDir failures
would be misreported.  There is no great value in opening the directory
before acquiring TwoPhaseStateLock, so just do it in the other order.

Also fix CheckXLogRemoved to guarantee that it preserves errno,
as quite a number of call sites are implicitly assuming.  (Again,
it's unlikely but I think not impossible that errno could change
during a SpinLockAcquire.  If so, this function was broken for its
own purposes as well as breaking callers.)

And change a few places that were using not-per-project-style messages,
such as "could not read directory" when "could not open directory" is
more correct.

Back-patch the exporting of ReadDirExtended, in case we have occasion
to back-patch some fix that makes use of it; it's not needed right now
but surely making it global is pretty harmless.  Also back-patch the
restoreTwoPhaseData and CheckXLogRemoved fixes.  The rest of this is
essentially cosmetic and need not get back-patched.

Michael Paquier, with a bit of additional work by me

Discussion: https://postgr.es/m/CAB7nPqRpOCxjiirHmebEFhXVTK7V5Jvw4bz82p7Oimtsm3TyZA@mail.gmail.com
This commit is contained in:
Tom Lane 2017-12-04 17:02:52 -05:00
parent ab6eaee884
commit 2069e6faa0
17 changed files with 102 additions and 153 deletions

View File

@ -319,7 +319,7 @@ pg_logdir_ls(PG_FUNCTION_ARGS)
if (!fctx->dirdesc)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read directory \"%s\": %m",
errmsg("could not open directory \"%s\": %m",
fctx->location)));
funcctx->user_fctx = fctx;

View File

@ -1735,8 +1735,8 @@ restoreTwoPhaseData(void)
DIR *cldir;
struct dirent *clde;
cldir = AllocateDir(TWOPHASE_DIR);
LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
cldir = AllocateDir(TWOPHASE_DIR);
while ((clde = ReadDir(cldir, TWOPHASE_DIR)) != NULL)
{
if (strlen(clde->d_name) == 8 &&

View File

@ -3764,10 +3764,16 @@ PreallocXlogFiles(XLogRecPtr endptr)
* existed while the server has been running, as this function always
* succeeds if no WAL segments have been removed since startup.
* 'tli' is only used in the error message.
*
* Note: this function guarantees to keep errno unchanged on return.
* This supports callers that use this to possibly deliver a better
* error message about a missing file, while still being able to throw
* a normal file-access error afterwards, if this does return.
*/
void
CheckXLogRemoved(XLogSegNo segno, TimeLineID tli)
{
int save_errno = errno;
XLogSegNo lastRemovedSegNo;
SpinLockAcquire(&XLogCtl->info_lck);
@ -3779,11 +3785,13 @@ CheckXLogRemoved(XLogSegNo segno, TimeLineID tli)
char filename[MAXFNAMELEN];
XLogFileName(filename, tli, segno, wal_segment_size);
errno = save_errno;
ereport(ERROR,
(errcode_for_file_access(),
errmsg("requested WAL segment %s has already been removed",
filename)));
}
errno = save_errno;
}
/*
@ -3837,13 +3845,6 @@ RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr)
struct dirent *xlde;
char lastoff[MAXFNAMELEN];
xldir = AllocateDir(XLOGDIR);
if (xldir == NULL)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open write-ahead log directory \"%s\": %m",
XLOGDIR)));
/*
* Construct a filename of the last segment to be kept. The timeline ID
* doesn't matter, we ignore that in the comparison. (During recovery,
@ -3854,6 +3855,8 @@ RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr)
elog(DEBUG2, "attempting to remove WAL segments older than log file %s",
lastoff);
xldir = AllocateDir(XLOGDIR);
while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
{
/* Ignore files that are not XLOG segments */
@ -3912,13 +3915,6 @@ RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI)
XLByteToPrevSeg(switchpoint, endLogSegNo, wal_segment_size);
xldir = AllocateDir(XLOGDIR);
if (xldir == NULL)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open write-ahead log directory \"%s\": %m",
XLOGDIR)));
/*
* Construct a filename of the last segment to be kept.
*/
@ -3927,6 +3923,8 @@ RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI)
elog(DEBUG2, "attempting to remove WAL segments newer than log file %s",
switchseg);
xldir = AllocateDir(XLOGDIR);
while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
{
/* Ignore files that are not XLOG segments */
@ -4108,11 +4106,6 @@ CleanupBackupHistory(void)
char path[MAXPGPATH + sizeof(XLOGDIR)];
xldir = AllocateDir(XLOGDIR);
if (xldir == NULL)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open write-ahead log directory \"%s\": %m",
XLOGDIR)));
while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
{

View File

@ -89,7 +89,9 @@ pg_start_backup(PG_FUNCTION_ARGS)
dir = AllocateDir("pg_tblspc");
if (!dir)
ereport(ERROR,
(errmsg("could not open directory \"%s\": %m", "pg_tblspc")));
(errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m",
"pg_tblspc")));
if (exclusive)
{

View File

@ -673,11 +673,6 @@ pgarch_readyXlog(char *xlog)
snprintf(XLogArchiveStatusDir, MAXPGPATH, XLOGDIR "/archive_status");
rldir = AllocateDir(XLogArchiveStatusDir);
if (rldir == NULL)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open archive status directory \"%s\": %m",
XLogArchiveStatusDir)));
while ((rlde = ReadDir(rldir, XLogArchiveStatusDir)) != NULL)
{

View File

@ -367,9 +367,6 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
XLogFileName(lastoff, ThisTimeLineID, endsegno, wal_segment_size);
dir = AllocateDir("pg_wal");
if (!dir)
ereport(ERROR,
(errmsg("could not open directory \"%s\": %m", "pg_wal")));
while ((de = ReadDir(dir, "pg_wal")) != NULL)
{
/* Does it look like a WAL segment, and is it in the range? */
@ -713,7 +710,9 @@ SendBaseBackup(BaseBackupCmd *cmd)
dir = AllocateDir("pg_tblspc");
if (!dir)
ereport(ERROR,
(errmsg("could not open directory \"%s\": %m", "pg_tblspc")));
(errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m",
"pg_tblspc")));
perform_base_backup(&opt, dir);

View File

@ -47,10 +47,6 @@ copydir(char *fromdir, char *todir, bool recurse)
errmsg("could not create directory \"%s\": %m", todir)));
xldir = AllocateDir(fromdir);
if (xldir == NULL)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m", fromdir)));
while ((xlde = ReadDir(xldir, fromdir)) != NULL)
{
@ -90,10 +86,6 @@ copydir(char *fromdir, char *todir, bool recurse)
return;
xldir = AllocateDir(todir);
if (xldir == NULL)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m", todir)));
while ((xlde = ReadDir(xldir, todir)) != NULL)
{

View File

@ -318,7 +318,6 @@ static int FileAccess(File file);
static File OpenTemporaryFileInTablespace(Oid tblspcOid, bool rejectError);
static bool reserveAllocatedDesc(void);
static int FreeDesc(AllocateDesc *desc);
static struct dirent *ReadDirExtended(DIR *dir, const char *dirname, int elevel);
static void AtProcExit_Files(int code, Datum arg);
static void CleanupTempFiles(bool isProcExit);
@ -2587,6 +2586,10 @@ CloseTransientFile(int fd)
* necessary to open the directory, and with closing it after an elog.
* When done, call FreeDir rather than closedir.
*
* Returns NULL, with errno set, on failure. Note that failure detection
* is commonly left to the following call of ReadDir or ReadDirExtended;
* see the comments for ReadDir.
*
* Ideally this should be the *only* direct call of opendir() in the backend.
*/
DIR *
@ -2649,8 +2652,8 @@ TryAgain:
* FreeDir(dir);
*
* since a NULL dir parameter is taken as indicating AllocateDir failed.
* (Make sure errno hasn't been changed since AllocateDir if you use this
* shortcut.)
* (Make sure errno isn't changed between AllocateDir and ReadDir if you
* use this shortcut.)
*
* The pathname passed to AllocateDir must be passed to this routine too,
* but it is only used for error reporting.
@ -2662,10 +2665,15 @@ ReadDir(DIR *dir, const char *dirname)
}
/*
* Alternate version that allows caller to specify the elevel for any
* error report. If elevel < ERROR, returns NULL on any error.
* Alternate version of ReadDir that allows caller to specify the elevel
* for any error report (whether it's reporting an initial failure of
* AllocateDir or a subsequent directory read failure).
*
* If elevel < ERROR, returns NULL after any error. With the normal coding
* pattern, this will result in falling out of the loop immediately as
* though the directory contained no (more) entries.
*/
static struct dirent *
struct dirent *
ReadDirExtended(DIR *dir, const char *dirname, int elevel)
{
struct dirent *dent;
@ -2695,14 +2703,22 @@ ReadDirExtended(DIR *dir, const char *dirname, int elevel)
/*
* Close a directory opened with AllocateDir.
*
* Note we do not check closedir's return value --- it is up to the caller
* to handle close errors.
* Returns closedir's return value (with errno set if it's not 0).
* Note we do not check the return value --- it is up to the caller
* to handle close errors if wanted.
*
* Does nothing if dir == NULL; we assume that directory open failure was
* already reported if desired.
*/
int
FreeDir(DIR *dir)
{
int i;
/* Nothing to do if AllocateDir failed */
if (dir == NULL)
return 0;
DO_DB(elog(LOG, "FreeDir: Allocated %d", numAllocatedDescs));
/* Remove dir from list of allocated dirs, if it's present */
@ -3043,9 +3059,10 @@ RemovePgTempFilesInDir(const char *tmpdirname, bool unlink_all)
{
/* anything except ENOENT is fishy */
if (errno != ENOENT)
elog(LOG,
"could not open temporary-files directory \"%s\": %m",
tmpdirname);
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m",
tmpdirname)));
return;
}
@ -3099,9 +3116,10 @@ RemovePgTempRelationFiles(const char *tsdirname)
{
/* anything except ENOENT is fishy */
if (errno != ENOENT)
elog(LOG,
"could not open tablespace directory \"%s\": %m",
tsdirname);
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m",
tsdirname)));
return;
}
@ -3136,16 +3154,8 @@ RemovePgTempRelationFilesInDbspace(const char *dbspacedirname)
char rm_path[MAXPGPATH * 2];
dbspace_dir = AllocateDir(dbspacedirname);
if (dbspace_dir == NULL)
{
/* we just saw this directory, so it really ought to be there */
elog(LOG,
"could not open dbspace directory \"%s\": %m",
dbspacedirname);
return;
}
while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
while ((de = ReadDirExtended(dbspace_dir, dbspacedirname, LOG)) != NULL)
{
if (!looks_like_temp_rel_name(de->d_name))
continue;
@ -3310,13 +3320,6 @@ walkdir(const char *path,
struct dirent *de;
dir = AllocateDir(path);
if (dir == NULL)
{
ereport(elevel,
(errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m", path)));
return;
}
while ((de = ReadDirExtended(dir, path, elevel)) != NULL)
{
@ -3356,9 +3359,11 @@ walkdir(const char *path,
/*
* It's important to fsync the destination directory itself as individual
* file fsyncs don't guarantee that the directory entry for the file is
* synced.
* synced. However, skip this if AllocateDir failed; the action function
* might not be robust against that.
*/
(*action) (path, true, elevel);
if (dir)
(*action) (path, true, elevel);
}

View File

@ -111,9 +111,10 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
{
/* anything except ENOENT is fishy */
if (errno != ENOENT)
elog(LOG,
"could not open tablespace directory \"%s\": %m",
tsdirname);
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m",
tsdirname)));
return;
}
@ -164,9 +165,10 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
dbspace_dir = AllocateDir(dbspacedirname);
if (dbspace_dir == NULL)
{
elog(LOG,
"could not open dbspace directory \"%s\": %m",
dbspacedirname);
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m",
dbspacedirname)));
return;
}
@ -226,9 +228,10 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
dbspace_dir = AllocateDir(dbspacedirname);
if (dbspace_dir == NULL)
{
elog(LOG,
"could not open dbspace directory \"%s\": %m",
dbspacedirname);
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m",
dbspacedirname)));
hash_destroy(hash);
return;
}
@ -296,9 +299,10 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
if (dbspace_dir == NULL)
{
/* we just saw this directory, so it really ought to be there */
elog(LOG,
"could not open dbspace directory \"%s\": %m",
dbspacedirname);
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m",
dbspacedirname)));
return;
}
@ -349,9 +353,10 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
if (dbspace_dir == NULL)
{
/* we just saw this directory, so it really ought to be there */
elog(LOG,
"could not open dbspace directory \"%s\": %m",
dbspacedirname);
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m",
dbspacedirname)));
return;
}

View File

@ -294,14 +294,9 @@ dsm_cleanup_for_mmap(void)
DIR *dir;
struct dirent *dent;
/* Open the directory; can't use AllocateDir in postmaster. */
if ((dir = AllocateDir(PG_DYNSHMEM_DIR)) == NULL)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m",
PG_DYNSHMEM_DIR)));
/* Scan the directory for something with a name of the correct format. */
dir = AllocateDir(PG_DYNSHMEM_DIR);
/* Scan for something with a name of the correct format. */
while ((dent = ReadDir(dir, PG_DYNSHMEM_DIR)) != NULL)
{
if (strncmp(dent->d_name, PG_DYNSHMEM_MMAP_FILE_PREFIX,
@ -315,17 +310,9 @@ dsm_cleanup_for_mmap(void)
/* We found a matching file; so remove it. */
if (unlink(buf) != 0)
{
int save_errno;
save_errno = errno;
closedir(dir);
errno = save_errno;
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m", buf)));
}
}
}

View File

@ -110,11 +110,6 @@ calculate_database_size(Oid dbOid)
/* Scan the non-default tablespaces */
snprintf(dirpath, MAXPGPATH, "pg_tblspc");
dirdesc = AllocateDir(dirpath);
if (!dirdesc)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open tablespace directory \"%s\": %m",
dirpath)));
while ((direntry = ReadDir(dirdesc, dirpath)) != NULL)
{

View File

@ -508,7 +508,7 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir)
if (!fctx->dirdesc)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read directory \"%s\": %m",
errmsg("could not open directory \"%s\": %m",
fctx->location)));
funcctx->user_fctx = fctx;

View File

@ -26,6 +26,7 @@
#include "catalog/pg_tablespace.h"
#include "catalog/pg_type.h"
#include "commands/dbcommands.h"
#include "commands/tablespace.h"
#include "common/keywords.h"
#include "funcapi.h"
#include "miscadmin.h"
@ -425,9 +426,9 @@ pg_tablespace_databases(PG_FUNCTION_ARGS)
while ((de = ReadDir(fctx->dirdesc, fctx->location)) != NULL)
{
char *subdir;
DIR *dirdesc;
Oid datOid = atooid(de->d_name);
char *subdir;
bool isempty;
/* this test skips . and .., but is awfully weak */
if (!datOid)
@ -436,16 +437,10 @@ pg_tablespace_databases(PG_FUNCTION_ARGS)
/* if database subdir is empty, don't report tablespace as used */
subdir = psprintf("%s/%s", fctx->location, de->d_name);
dirdesc = AllocateDir(subdir);
while ((de = ReadDir(dirdesc, subdir)) != NULL)
{
if (strcmp(de->d_name, ".") != 0 && strcmp(de->d_name, "..") != 0)
break;
}
FreeDir(dirdesc);
isempty = directory_is_empty(subdir);
pfree(subdir);
if (!de)
if (isempty)
continue; /* indeed, nothing in it */
SRF_RETURN_NEXT(funcctx, ObjectIdGetDatum(datOid));

View File

@ -6119,14 +6119,8 @@ RelationCacheInitFileRemove(void)
/* Scan the tablespace link directory to find non-default tablespaces */
dir = AllocateDir(tblspcdir);
if (dir == NULL)
{
elog(LOG, "could not open tablespace link directory \"%s\": %m",
tblspcdir);
return;
}
while ((de = ReadDir(dir, tblspcdir)) != NULL)
while ((de = ReadDirExtended(dir, tblspcdir, LOG)) != NULL)
{
if (strspn(de->d_name, "0123456789") == strlen(de->d_name))
{
@ -6150,14 +6144,8 @@ RelationCacheInitFileRemoveInDir(const char *tblspcpath)
/* Scan the tablespace directory to find per-database directories */
dir = AllocateDir(tblspcpath);
if (dir == NULL)
{
elog(LOG, "could not open tablespace directory \"%s\": %m",
tblspcpath);
return;
}
while ((de = ReadDir(dir, tblspcpath)) != NULL)
while ((de = ReadDirExtended(dir, tblspcpath, LOG)) != NULL)
{
if (strspn(de->d_name, "0123456789") == strlen(de->d_name))
{

View File

@ -1619,27 +1619,25 @@ DeleteAllExportedSnapshotFiles(void)
DIR *s_dir;
struct dirent *s_de;
if (!(s_dir = AllocateDir(SNAPSHOT_EXPORT_DIR)))
{
/*
* We really should have that directory in a sane cluster setup. But
* then again if we don't, it's not fatal enough to make it FATAL.
* Since we're running in the postmaster, LOG is our best bet.
*/
elog(LOG, "could not open directory \"%s\": %m", SNAPSHOT_EXPORT_DIR);
return;
}
/*
* Problems in reading the directory, or unlinking files, are reported at
* LOG level. Since we're running in the startup process, ERROR level
* would prevent database start, and it's not important enough for that.
*/
s_dir = AllocateDir(SNAPSHOT_EXPORT_DIR);
while ((s_de = ReadDir(s_dir, SNAPSHOT_EXPORT_DIR)) != NULL)
while ((s_de = ReadDirExtended(s_dir, SNAPSHOT_EXPORT_DIR, LOG)) != NULL)
{
if (strcmp(s_de->d_name, ".") == 0 ||
strcmp(s_de->d_name, "..") == 0)
continue;
snprintf(buf, sizeof(buf), SNAPSHOT_EXPORT_DIR "/%s", s_de->d_name);
/* Again, unlink failure is not worthy of FATAL */
if (unlink(buf))
elog(LOG, "could not unlink file \"%s\": %m", buf);
if (unlink(buf) != 0)
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m", buf)));
}
FreeDir(s_dir);

View File

@ -98,6 +98,8 @@ extern int ClosePipeStream(FILE *file);
/* Operations to allow use of the <dirent.h> library routines */
extern DIR *AllocateDir(const char *dirname);
extern struct dirent *ReadDir(DIR *dir, const char *dirname);
extern struct dirent *ReadDirExtended(DIR *dir, const char *dirname,
int elevel);
extern int FreeDir(DIR *dir);
/* Operations to allow use of a plain kernel FD, with automatic cleanup */

View File

@ -156,15 +156,8 @@ scan_directory_ci(const char *dirname, const char *fname, int fnamelen,
struct dirent *direntry;
dirdesc = AllocateDir(dirname);
if (!dirdesc)
{
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m", dirname)));
return false;
}
while ((direntry = ReadDir(dirdesc, dirname)) != NULL)
while ((direntry = ReadDirExtended(dirdesc, dirname, LOG)) != NULL)
{
/*
* Ignore . and .., plus any other "hidden" files. This is a security