Avoid holding a directory FD open across pg_ls_dir_files() calls.

This coding technique is undesirable because (a) it leaks the FD for
the rest of the transaction if the SRF is not run to completion, and
(b) allocated FDs are a scarce resource, but multiple interleaved
uses of the relevant functions could eat many such FDs.

In v11 and later, a query such as "SELECT pg_ls_waldir() LIMIT 1"
yields a warning about the leaked FD, and the only reason there's
no warning in earlier branches is that fd.c didn't whine about such
leaks before commit 9cb7db3f0.  Even disregarding the warning, it
wouldn't be too hard to run a backend out of FDs with careless use
of these SQL functions.

Hence, rewrite the function so that it reads the directory within
a single call, returning the results as a tuplestore rather than
via value-per-call mode.

There are half a dozen other built-in SRFs with similar problems,
but let's fix this one to start with, just to see if the buildfarm
finds anything wrong with the code.

In passing, fix bogus error report for stat() failure: it was
whining about the directory when it should be fingering the
individual file.  Doubtless a copy-and-paste error.

Back-patch to v10 where this function was added.

Justin Pryzby, with cosmetic tweaks and test cases by me

Discussion: https://postgr.es/m/20200308173103.GC1357@telsasoft.com
This commit is contained in:
Tom Lane 2020-03-11 15:27:59 -04:00
parent bf68b79e50
commit 085b6b6679
3 changed files with 122 additions and 49 deletions

View File

@ -523,75 +523,82 @@ pg_ls_dir_1arg(PG_FUNCTION_ARGS)
return pg_ls_dir(fcinfo);
}
/* Generic function to return a directory listing of files */
/*
* Generic function to return a directory listing of files.
*
* If the directory isn't there, silently return an empty set if missing_ok.
* Other unreadable-directory cases throw an error.
*/
static Datum
pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok)
{
FuncCallContext *funcctx;
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
bool randomAccess;
TupleDesc tupdesc;
Tuplestorestate *tupstore;
DIR *dirdesc;
struct dirent *de;
directory_fctx *fctx;
MemoryContext oldcontext;
if (SRF_IS_FIRSTCALL())
/* check to see if caller supports us returning a tuplestore */
if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("set-valued function called in context that cannot accept a set")));
if (!(rsinfo->allowedModes & SFRM_Materialize))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("materialize mode required, but it is not "
"allowed in this context")));
/* The tupdesc and tuplestore must be created in ecxt_per_query_memory */
oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory);
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type");
randomAccess = (rsinfo->allowedModes & SFRM_Materialize_Random) != 0;
tupstore = tuplestore_begin_heap(randomAccess, false, work_mem);
rsinfo->returnMode = SFRM_Materialize;
rsinfo->setResult = tupstore;
rsinfo->setDesc = tupdesc;
MemoryContextSwitchTo(oldcontext);
/*
* Now walk the directory. Note that we must do this within a single SRF
* call, not leave the directory open across multiple calls, since we
* can't count on the SRF being run to completion.
*/
dirdesc = AllocateDir(dir);
if (!dirdesc)
{
MemoryContext oldcontext;
TupleDesc tupdesc;
funcctx = SRF_FIRSTCALL_INIT();
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
fctx = palloc(sizeof(directory_fctx));
tupdesc = CreateTemplateTupleDesc(3);
TupleDescInitEntry(tupdesc, (AttrNumber) 1, "name",
TEXTOID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 2, "size",
INT8OID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 3, "modification",
TIMESTAMPTZOID, -1, 0);
funcctx->tuple_desc = BlessTupleDesc(tupdesc);
fctx->location = pstrdup(dir);
fctx->dirdesc = AllocateDir(fctx->location);
if (!fctx->dirdesc)
/* Return empty tuplestore if appropriate */
if (missing_ok && errno == ENOENT)
{
if (missing_ok && errno == ENOENT)
{
MemoryContextSwitchTo(oldcontext);
SRF_RETURN_DONE(funcctx);
}
else
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m",
fctx->location)));
tuplestore_donestoring(tupstore);
return (Datum) 0;
}
funcctx->user_fctx = fctx;
MemoryContextSwitchTo(oldcontext);
/* Otherwise, we can let ReadDir() throw the error */
}
funcctx = SRF_PERCALL_SETUP();
fctx = (directory_fctx *) funcctx->user_fctx;
while ((de = ReadDir(fctx->dirdesc, fctx->location)) != NULL)
while ((de = ReadDir(dirdesc, dir)) != NULL)
{
Datum values[3];
bool nulls[3];
char path[MAXPGPATH * 2];
struct stat attrib;
HeapTuple tuple;
/* Skip hidden files */
if (de->d_name[0] == '.')
continue;
/* Get the file info */
snprintf(path, sizeof(path), "%s/%s", fctx->location, de->d_name);
snprintf(path, sizeof(path), "%s/%s", dir, de->d_name);
if (stat(path, &attrib) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not stat directory \"%s\": %m", dir)));
errmsg("could not stat file \"%s\": %m", path)));
/* Ignore anything but regular files */
if (!S_ISREG(attrib.st_mode))
@ -602,12 +609,12 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok)
values[2] = TimestampTzGetDatum(time_t_to_timestamptz(attrib.st_mtime));
memset(nulls, 0, sizeof(nulls));
tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
tuplestore_putvalues(tupstore, tupdesc, values, nulls);
}
FreeDir(fctx->dirdesc);
SRF_RETURN_DONE(funcctx);
FreeDir(dirdesc);
tuplestore_donestoring(tupstore);
return (Datum) 0;
}
/* Function to return the list of files in the log directory */

View File

@ -133,6 +133,52 @@ ERROR: function num_nulls() does not exist
LINE 1: SELECT num_nulls();
^
HINT: No function matches the given name and argument types. You might need to add explicit type casts.
--
-- Test some built-in SRFs
--
-- The outputs of these are variable, so we can't just print their results
-- directly, but we can at least verify that the code doesn't fail.
--
select setting as segsize
from pg_settings where name = 'wal_segment_size'
\gset
select count(*) > 0 as ok from pg_ls_waldir();
ok
----
t
(1 row)
-- Test ProjectSet as well as FunctionScan
select count(*) > 0 as ok from (select pg_ls_waldir()) ss;
ok
----
t
(1 row)
-- Test not-run-to-completion cases.
select * from pg_ls_waldir() limit 0;
name | size | modification
------+------+--------------
(0 rows)
select count(*) > 0 as ok from (select * from pg_ls_waldir() limit 1) ss;
ok
----
t
(1 row)
select (pg_ls_waldir()).size = :segsize as ok limit 1;
ok
----
t
(1 row)
select count(*) >= 0 as ok from pg_ls_archive_statusdir();
ok
----
t
(1 row)
--
-- Test adding a support function to a subject function
--

View File

@ -30,6 +30,26 @@ SELECT num_nulls(VARIADIC '{}'::int[]);
SELECT num_nonnulls();
SELECT num_nulls();
--
-- Test some built-in SRFs
--
-- The outputs of these are variable, so we can't just print their results
-- directly, but we can at least verify that the code doesn't fail.
--
select setting as segsize
from pg_settings where name = 'wal_segment_size'
\gset
select count(*) > 0 as ok from pg_ls_waldir();
-- Test ProjectSet as well as FunctionScan
select count(*) > 0 as ok from (select pg_ls_waldir()) ss;
-- Test not-run-to-completion cases.
select * from pg_ls_waldir() limit 0;
select count(*) > 0 as ok from (select * from pg_ls_waldir() limit 1) ss;
select (pg_ls_waldir()).size = :segsize as ok limit 1;
select count(*) >= 0 as ok from pg_ls_archive_statusdir();
--
-- Test adding a support function to a subject function
--