From 63b51dfec18eb3c280adfa0f6e970946c441f574 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 11 Mar 2020 15:27:59 -0400 Subject: [PATCH] Avoid holding a directory FD open across pg_ls_dir_files() calls. This coding technique is undesirable because (a) it leaks the FD for the rest of the transaction if the SRF is not run to completion, and (b) allocated FDs are a scarce resource, but multiple interleaved uses of the relevant functions could eat many such FDs. In v11 and later, a query such as "SELECT pg_ls_waldir() LIMIT 1" yields a warning about the leaked FD, and the only reason there's no warning in earlier branches is that fd.c didn't whine about such leaks before commit 9cb7db3f0. Even disregarding the warning, it wouldn't be too hard to run a backend out of FDs with careless use of these SQL functions. Hence, rewrite the function so that it reads the directory within a single call, returning the results as a tuplestore rather than via value-per-call mode. There are half a dozen other built-in SRFs with similar problems, but let's fix this one to start with, just to see if the buildfarm finds anything wrong with the code. In passing, fix bogus error report for stat() failure: it was whining about the directory when it should be fingering the individual file. Doubtless a copy-and-paste error. Back-patch to v10 where this function was added. Justin Pryzby, with cosmetic tweaks and test cases by me Discussion: https://postgr.es/m/20200308173103.GC1357@telsasoft.com --- src/backend/utils/adt/genfile.c | 83 ++++++++++---------- src/test/regress/expected/misc_functions.out | 40 ++++++++++ src/test/regress/sql/misc_functions.sql | 18 +++++ 3 files changed, 100 insertions(+), 41 deletions(-) diff --git a/src/backend/utils/adt/genfile.c b/src/backend/utils/adt/genfile.c index a97cbea248..a5822d6201 100644 --- a/src/backend/utils/adt/genfile.c +++ b/src/backend/utils/adt/genfile.c @@ -518,67 +518,68 @@ pg_ls_dir_1arg(PG_FUNCTION_ARGS) return pg_ls_dir(fcinfo); } -/* Generic function to return a directory listing of files */ +/* + * Generic function to return a directory listing of files. + */ static Datum pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir) { - FuncCallContext *funcctx; + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + bool randomAccess; + TupleDesc tupdesc; + Tuplestorestate *tupstore; + DIR *dirdesc; struct dirent *de; - directory_fctx *fctx; + MemoryContext oldcontext; - if (SRF_IS_FIRSTCALL()) - { - MemoryContext oldcontext; - TupleDesc tupdesc; + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("materialize mode required, but it is not " + "allowed in this context"))); - funcctx = SRF_FIRSTCALL_INIT(); - oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + /* The tupdesc and tuplestore must be created in ecxt_per_query_memory */ + oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory); - fctx = palloc(sizeof(directory_fctx)); + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); - tupdesc = CreateTemplateTupleDesc(3, false); - TupleDescInitEntry(tupdesc, (AttrNumber) 1, "name", - TEXTOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 2, "size", - INT8OID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 3, "modification", - TIMESTAMPTZOID, -1, 0); - funcctx->tuple_desc = BlessTupleDesc(tupdesc); + randomAccess = (rsinfo->allowedModes & SFRM_Materialize_Random) != 0; + tupstore = tuplestore_begin_heap(randomAccess, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupdesc; - fctx->location = pstrdup(dir); - fctx->dirdesc = AllocateDir(fctx->location); + MemoryContextSwitchTo(oldcontext); - if (!fctx->dirdesc) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not open directory \"%s\": %m", - fctx->location))); - - funcctx->user_fctx = fctx; - MemoryContextSwitchTo(oldcontext); - } - - funcctx = SRF_PERCALL_SETUP(); - fctx = (directory_fctx *) funcctx->user_fctx; - - while ((de = ReadDir(fctx->dirdesc, fctx->location)) != NULL) + /* + * Now walk the directory. Note that we must do this within a single SRF + * call, not leave the directory open across multiple calls, since we + * can't count on the SRF being run to completion. + */ + dirdesc = AllocateDir(dir); + while ((de = ReadDir(dirdesc, dir)) != NULL) { Datum values[3]; bool nulls[3]; char path[MAXPGPATH * 2]; struct stat attrib; - HeapTuple tuple; /* Skip hidden files */ if (de->d_name[0] == '.') continue; /* Get the file info */ - snprintf(path, sizeof(path), "%s/%s", fctx->location, de->d_name); + snprintf(path, sizeof(path), "%s/%s", dir, de->d_name); if (stat(path, &attrib) < 0) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not stat directory \"%s\": %m", dir))); + errmsg("could not stat file \"%s\": %m", path))); /* Ignore anything but regular files */ if (!S_ISREG(attrib.st_mode)) @@ -589,12 +590,12 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir) values[2] = TimestampTzGetDatum(time_t_to_timestamptz(attrib.st_mtime)); memset(nulls, 0, sizeof(nulls)); - tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); - SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); + tuplestore_putvalues(tupstore, tupdesc, values, nulls); } - FreeDir(fctx->dirdesc); - SRF_RETURN_DONE(funcctx); + FreeDir(dirdesc); + tuplestore_donestoring(tupstore); + return (Datum) 0; } /* Function to return the list of files in the log directory */ diff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out index 130a0e4be3..a1ee4dbb3b 100644 --- a/src/test/regress/expected/misc_functions.out +++ b/src/test/regress/expected/misc_functions.out @@ -133,3 +133,43 @@ ERROR: function num_nulls() does not exist LINE 1: SELECT num_nulls(); ^ HINT: No function matches the given name and argument types. You might need to add explicit type casts. +-- +-- Test some built-in SRFs +-- +-- The outputs of these are variable, so we can't just print their results +-- directly, but we can at least verify that the code doesn't fail. +-- +select setting as segsize +from pg_settings where name = 'wal_segment_size' +\gset +select count(*) > 0 as ok from pg_ls_waldir(); + ok +---- + t +(1 row) + +-- Test ProjectSet as well as FunctionScan +select count(*) > 0 as ok from (select pg_ls_waldir()) ss; + ok +---- + t +(1 row) + +-- Test not-run-to-completion cases. +select * from pg_ls_waldir() limit 0; + name | size | modification +------+------+-------------- +(0 rows) + +select count(*) > 0 as ok from (select * from pg_ls_waldir() limit 1) ss; + ok +---- + t +(1 row) + +select (pg_ls_waldir()).size = :segsize as ok limit 1; + ok +---- + t +(1 row) + diff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql index 1a20c1f765..795587e70f 100644 --- a/src/test/regress/sql/misc_functions.sql +++ b/src/test/regress/sql/misc_functions.sql @@ -29,3 +29,21 @@ SELECT num_nulls(VARIADIC '{}'::int[]); -- should fail, one or more arguments is required SELECT num_nonnulls(); SELECT num_nulls(); + +-- +-- Test some built-in SRFs +-- +-- The outputs of these are variable, so we can't just print their results +-- directly, but we can at least verify that the code doesn't fail. +-- +select setting as segsize +from pg_settings where name = 'wal_segment_size' +\gset + +select count(*) > 0 as ok from pg_ls_waldir(); +-- Test ProjectSet as well as FunctionScan +select count(*) > 0 as ok from (select pg_ls_waldir()) ss; +-- Test not-run-to-completion cases. +select * from pg_ls_waldir() limit 0; +select count(*) > 0 as ok from (select * from pg_ls_waldir() limit 1) ss; +select (pg_ls_waldir()).size = :segsize as ok limit 1;