2005-08-12 05:25:13 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* genfile.c
|
2005-08-12 20:23:56 +02:00
|
|
|
* Functions for direct access to files
|
2005-08-12 05:25:13 +02:00
|
|
|
*
|
|
|
|
*
|
2024-01-04 02:49:05 +01:00
|
|
|
* Copyright (c) 2004-2024, PostgreSQL Global Development Group
|
2005-08-12 05:25:13 +02:00
|
|
|
*
|
|
|
|
* Author: Andreas Pflug <pgadmin@pse-consulting.de>
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/utils/adt/genfile.c
|
2005-08-12 05:25:13 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
#include <sys/file.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <dirent.h>
|
|
|
|
|
2012-08-30 22:15:44 +02:00
|
|
|
#include "access/htup_details.h"
|
2017-03-16 20:05:02 +01:00
|
|
|
#include "access/xlog_internal.h"
|
2018-04-06 20:47:10 +02:00
|
|
|
#include "catalog/pg_authid.h"
|
2018-10-05 02:21:48 +02:00
|
|
|
#include "catalog/pg_tablespace_d.h"
|
2005-08-12 05:25:13 +02:00
|
|
|
#include "catalog/pg_type.h"
|
|
|
|
#include "funcapi.h"
|
2010-01-05 02:29:36 +01:00
|
|
|
#include "mb/pg_wchar.h"
|
2005-08-12 20:23:56 +02:00
|
|
|
#include "miscadmin.h"
|
|
|
|
#include "postmaster/syslogger.h"
|
2021-11-23 11:29:42 +01:00
|
|
|
#include "replication/slot.h"
|
2005-08-12 20:23:56 +02:00
|
|
|
#include "storage/fd.h"
|
2020-03-10 10:22:52 +01:00
|
|
|
#include "utils/acl.h"
|
2005-08-12 20:23:56 +02:00
|
|
|
#include "utils/builtins.h"
|
|
|
|
#include "utils/memutils.h"
|
2018-10-05 02:21:48 +02:00
|
|
|
#include "utils/syscache.h"
|
2011-09-09 19:23:41 +02:00
|
|
|
#include "utils/timestamp.h"
|
2005-08-12 05:25:13 +02:00
|
|
|
|
2005-08-12 20:23:56 +02:00
|
|
|
|
2005-08-12 05:25:13 +02:00
|
|
|
/*
|
2006-11-06 04:06:41 +01:00
|
|
|
* Convert a "text" filename argument to C string, and check it's allowable.
|
2005-08-12 20:23:56 +02:00
|
|
|
*
|
2006-11-06 04:06:41 +01:00
|
|
|
* Filename may be absolute or relative to the DataDir, but we only allow
|
|
|
|
* absolute paths that match DataDir or Log_directory.
|
2018-04-06 20:47:10 +02:00
|
|
|
*
|
|
|
|
* This does a privilege check against the 'pg_read_server_files' role, so
|
|
|
|
* this function is really only appropriate for callers who are only checking
|
|
|
|
* 'read' access. Do not use this function if you are looking for a check
|
|
|
|
* for 'write' or 'program' access without updating it to access the type
|
|
|
|
* of check as an argument and checking the appropriate role membership.
|
2005-08-12 05:25:13 +02:00
|
|
|
*/
|
2005-08-12 20:23:56 +02:00
|
|
|
static char *
|
2006-11-06 04:06:41 +01:00
|
|
|
convert_and_check_filename(text *arg)
|
2005-08-12 05:25:13 +02:00
|
|
|
{
|
2008-03-25 23:42:46 +01:00
|
|
|
char *filename;
|
2005-08-12 05:25:13 +02:00
|
|
|
|
2008-03-25 23:42:46 +01:00
|
|
|
filename = text_to_cstring(arg);
|
2005-08-12 23:07:53 +02:00
|
|
|
canonicalize_path(filename); /* filename can change length here */
|
2005-08-12 05:25:13 +02:00
|
|
|
|
2018-04-06 20:47:10 +02:00
|
|
|
/*
|
2022-04-11 10:49:41 +02:00
|
|
|
* Roles with privileges of the 'pg_read_server_files' role are allowed to
|
|
|
|
* access any files on the server as the PG user, so no need to do any
|
|
|
|
* further checks here.
|
2018-04-06 20:47:10 +02:00
|
|
|
*/
|
2022-03-28 21:10:04 +02:00
|
|
|
if (has_privs_of_role(GetUserId(), ROLE_PG_READ_SERVER_FILES))
|
2018-04-06 20:47:10 +02:00
|
|
|
return filename;
|
|
|
|
|
2021-04-01 21:32:06 +02:00
|
|
|
/*
|
|
|
|
* User isn't a member of the pg_read_server_files role, so check if it's
|
|
|
|
* allowable
|
|
|
|
*/
|
2005-08-12 05:25:13 +02:00
|
|
|
if (is_absolute_path(filename))
|
|
|
|
{
|
2011-02-12 15:47:51 +01:00
|
|
|
/*
|
|
|
|
* Allow absolute paths if within DataDir or Log_directory, even
|
|
|
|
* though Log_directory might be outside DataDir.
|
|
|
|
*/
|
|
|
|
if (!path_is_prefix_of_path(DataDir, filename) &&
|
|
|
|
(!is_absolute_path(Log_directory) ||
|
|
|
|
!path_is_prefix_of_path(Log_directory, filename)))
|
|
|
|
ereport(ERROR,
|
2005-08-12 05:25:13 +02:00
|
|
|
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
2020-01-30 17:32:04 +01:00
|
|
|
errmsg("absolute path not allowed")));
|
2005-08-12 05:25:13 +02:00
|
|
|
}
|
2011-02-12 15:47:51 +01:00
|
|
|
else if (!path_is_relative_and_below_cwd(filename))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
2023-03-16 17:04:08 +01:00
|
|
|
errmsg("path must be in or below the data directory")));
|
2011-02-12 15:47:51 +01:00
|
|
|
|
|
|
|
return filename;
|
2005-08-12 05:25:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2005-08-12 20:23:56 +02:00
|
|
|
/*
|
2010-12-15 22:56:28 +01:00
|
|
|
* Read a section of a file, returning it as bytea
|
|
|
|
*
|
2011-02-08 22:08:41 +01:00
|
|
|
* Caller is responsible for all permissions checking.
|
|
|
|
*
|
|
|
|
* We read the whole of the file when bytes_to_read is negative.
|
2005-08-12 20:23:56 +02:00
|
|
|
*/
|
2015-06-28 20:35:46 +02:00
|
|
|
static bytea *
|
|
|
|
read_binary_file(const char *filename, int64 seek_offset, int64 bytes_to_read,
|
|
|
|
bool missing_ok)
|
2005-08-12 05:25:13 +02:00
|
|
|
{
|
2010-12-15 22:56:28 +01:00
|
|
|
bytea *buf;
|
Read until EOF vice stat-reported size in read_binary_file
read_binary_file(), used by SQL functions pg_read_file() and friends,
uses stat to determine file length to read, when not passed an explicit
length as an argument. This is problematic, for example, if the file
being read is a virtual file with a stat-reported length of zero.
Arrange to read until EOF, or StringInfo data string lenth limit, is
reached instead.
Original complaint and patch by me, with significant review, corrections,
advice, and code optimizations by Tom Lane. Backpatched to v11. Prior to
that only paths relative to the data and log dirs were allowed for files,
so no "zero length" files were reachable anyway.
Reviewed-By: Tom Lane
Discussion: https://postgr.es/m/flat/969b8d82-5bb2-5fa8-4eb1-f0e685c5d736%40joeconway.com
Backpatch-through: 11
2020-07-04 12:26:53 +02:00
|
|
|
size_t nbytes = 0;
|
2005-08-12 05:25:13 +02:00
|
|
|
FILE *file;
|
|
|
|
|
Read until EOF vice stat-reported size in read_binary_file
read_binary_file(), used by SQL functions pg_read_file() and friends,
uses stat to determine file length to read, when not passed an explicit
length as an argument. This is problematic, for example, if the file
being read is a virtual file with a stat-reported length of zero.
Arrange to read until EOF, or StringInfo data string lenth limit, is
reached instead.
Original complaint and patch by me, with significant review, corrections,
advice, and code optimizations by Tom Lane. Backpatched to v11. Prior to
that only paths relative to the data and log dirs were allowed for files,
so no "zero length" files were reachable anyway.
Reviewed-By: Tom Lane
Discussion: https://postgr.es/m/flat/969b8d82-5bb2-5fa8-4eb1-f0e685c5d736%40joeconway.com
Backpatch-through: 11
2020-07-04 12:26:53 +02:00
|
|
|
/* clamp request size to what we can actually deliver */
|
|
|
|
if (bytes_to_read > (int64) (MaxAllocSize - VARHDRSZ))
|
2010-12-15 22:56:28 +01:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("requested length too large")));
|
|
|
|
|
2005-08-12 05:25:13 +02:00
|
|
|
if ((file = AllocateFile(filename, PG_BINARY_R)) == NULL)
|
2015-06-28 20:35:46 +02:00
|
|
|
{
|
|
|
|
if (missing_ok && errno == ENOENT)
|
|
|
|
return NULL;
|
|
|
|
else
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not open file \"%s\" for reading: %m",
|
|
|
|
filename)));
|
|
|
|
}
|
2005-08-12 05:25:13 +02:00
|
|
|
|
2005-08-12 20:23:56 +02:00
|
|
|
if (fseeko(file, (off_t) seek_offset,
|
|
|
|
(seek_offset >= 0) ? SEEK_SET : SEEK_END) != 0)
|
2005-08-12 05:25:13 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
2005-08-12 20:23:56 +02:00
|
|
|
errmsg("could not seek in file \"%s\": %m", filename)));
|
2005-08-12 05:25:13 +02:00
|
|
|
|
Read until EOF vice stat-reported size in read_binary_file
read_binary_file(), used by SQL functions pg_read_file() and friends,
uses stat to determine file length to read, when not passed an explicit
length as an argument. This is problematic, for example, if the file
being read is a virtual file with a stat-reported length of zero.
Arrange to read until EOF, or StringInfo data string lenth limit, is
reached instead.
Original complaint and patch by me, with significant review, corrections,
advice, and code optimizations by Tom Lane. Backpatched to v11. Prior to
that only paths relative to the data and log dirs were allowed for files,
so no "zero length" files were reachable anyway.
Reviewed-By: Tom Lane
Discussion: https://postgr.es/m/flat/969b8d82-5bb2-5fa8-4eb1-f0e685c5d736%40joeconway.com
Backpatch-through: 11
2020-07-04 12:26:53 +02:00
|
|
|
if (bytes_to_read >= 0)
|
|
|
|
{
|
|
|
|
/* If passed explicit read size just do it */
|
|
|
|
buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ);
|
|
|
|
|
|
|
|
nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Negative read size, read rest of file */
|
|
|
|
StringInfoData sbuf;
|
|
|
|
|
|
|
|
initStringInfo(&sbuf);
|
|
|
|
/* Leave room in the buffer for the varlena length word */
|
|
|
|
sbuf.len += VARHDRSZ;
|
|
|
|
Assert(sbuf.len < sbuf.maxlen);
|
|
|
|
|
|
|
|
while (!(feof(file) || ferror(file)))
|
|
|
|
{
|
|
|
|
size_t rbytes;
|
|
|
|
|
|
|
|
/* Minimum amount to read at a time */
|
|
|
|
#define MIN_READ_SIZE 4096
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If not at end of file, and sbuf.len is equal to MaxAllocSize -
|
|
|
|
* 1, then either the file is too large, or there is nothing left
|
|
|
|
* to read. Attempt to read one more byte to see if the end of
|
|
|
|
* file has been reached. If not, the file is too large; we'd
|
|
|
|
* rather give the error message for that ourselves.
|
|
|
|
*/
|
|
|
|
if (sbuf.len == MaxAllocSize - 1)
|
|
|
|
{
|
2020-07-17 15:16:13 +02:00
|
|
|
char rbuf[1];
|
2005-08-12 05:25:13 +02:00
|
|
|
|
2020-07-04 19:46:31 +02:00
|
|
|
if (fread(rbuf, 1, 1, file) != 0 || !feof(file))
|
Read until EOF vice stat-reported size in read_binary_file
read_binary_file(), used by SQL functions pg_read_file() and friends,
uses stat to determine file length to read, when not passed an explicit
length as an argument. This is problematic, for example, if the file
being read is a virtual file with a stat-reported length of zero.
Arrange to read until EOF, or StringInfo data string lenth limit, is
reached instead.
Original complaint and patch by me, with significant review, corrections,
advice, and code optimizations by Tom Lane. Backpatched to v11. Prior to
that only paths relative to the data and log dirs were allowed for files,
so no "zero length" files were reachable anyway.
Reviewed-By: Tom Lane
Discussion: https://postgr.es/m/flat/969b8d82-5bb2-5fa8-4eb1-f0e685c5d736%40joeconway.com
Backpatch-through: 11
2020-07-04 12:26:53 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
|
|
|
errmsg("file length too large")));
|
|
|
|
else
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* OK, ensure that we can read at least MIN_READ_SIZE */
|
|
|
|
enlargeStringInfo(&sbuf, MIN_READ_SIZE);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* stringinfo.c likes to allocate in powers of 2, so it's likely
|
|
|
|
* that much more space is available than we asked for. Use all
|
|
|
|
* of it, rather than making more fread calls than necessary.
|
|
|
|
*/
|
|
|
|
rbytes = fread(sbuf.data + sbuf.len, 1,
|
|
|
|
(size_t) (sbuf.maxlen - sbuf.len - 1), file);
|
|
|
|
sbuf.len += rbytes;
|
|
|
|
nbytes += rbytes;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now we can commandeer the stringinfo's buffer as the result */
|
|
|
|
buf = (bytea *) sbuf.data;
|
|
|
|
}
|
2005-08-12 05:25:13 +02:00
|
|
|
|
2005-10-15 21:47:09 +02:00
|
|
|
if (ferror(file))
|
2005-08-12 05:25:13 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
2005-08-12 20:23:56 +02:00
|
|
|
errmsg("could not read file \"%s\": %m", filename)));
|
|
|
|
|
2007-02-28 00:48:10 +01:00
|
|
|
SET_VARSIZE(buf, nbytes + VARHDRSZ);
|
2005-08-12 05:25:13 +02:00
|
|
|
|
|
|
|
FreeFile(file);
|
2005-08-12 20:23:56 +02:00
|
|
|
|
2010-12-15 22:56:28 +01:00
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2010-12-16 03:02:31 +01:00
|
|
|
* Similar to read_binary_file, but we verify that the contents are valid
|
2010-12-15 22:56:28 +01:00
|
|
|
* in the database encoding.
|
|
|
|
*/
|
|
|
|
static text *
|
2015-06-28 20:35:46 +02:00
|
|
|
read_text_file(const char *filename, int64 seek_offset, int64 bytes_to_read,
|
|
|
|
bool missing_ok)
|
2010-12-15 22:56:28 +01:00
|
|
|
{
|
2011-02-08 22:08:41 +01:00
|
|
|
bytea *buf;
|
|
|
|
|
2015-06-28 20:35:46 +02:00
|
|
|
buf = read_binary_file(filename, seek_offset, bytes_to_read, missing_ok);
|
2010-12-15 22:56:28 +01:00
|
|
|
|
2015-06-28 20:35:46 +02:00
|
|
|
if (buf != NULL)
|
|
|
|
{
|
|
|
|
/* Make sure the input is valid */
|
|
|
|
pg_verifymbstr(VARDATA(buf), VARSIZE(buf) - VARHDRSZ, false);
|
2010-12-15 22:56:28 +01:00
|
|
|
|
2015-06-28 20:35:46 +02:00
|
|
|
/* OK, we can cast it to text safely */
|
|
|
|
return (text *) buf;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
return NULL;
|
2010-12-15 22:56:28 +01:00
|
|
|
}
|
|
|
|
|
2018-04-06 20:47:10 +02:00
|
|
|
/*
|
|
|
|
* Read a section of a file, returning it as text
|
|
|
|
*
|
|
|
|
* No superuser check done here- instead privileges are handled by the
|
|
|
|
* GRANT system.
|
2022-07-29 21:38:49 +02:00
|
|
|
*
|
|
|
|
* If read_to_eof is true, bytes_to_read must be -1, otherwise negative values
|
|
|
|
* are not allowed for bytes_to_read.
|
2018-04-06 20:47:10 +02:00
|
|
|
*/
|
2022-07-29 21:38:49 +02:00
|
|
|
static text *
|
|
|
|
pg_read_file_common(text *filename_t, int64 seek_offset, int64 bytes_to_read,
|
|
|
|
bool read_to_eof, bool missing_ok)
|
2018-04-06 20:47:10 +02:00
|
|
|
{
|
2022-07-29 21:38:49 +02:00
|
|
|
if (read_to_eof)
|
|
|
|
Assert(bytes_to_read == -1);
|
|
|
|
else if (bytes_to_read < 0)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("requested length cannot be negative")));
|
2011-02-08 22:08:41 +01:00
|
|
|
|
2022-07-29 21:38:49 +02:00
|
|
|
return read_text_file(convert_and_check_filename(filename_t),
|
|
|
|
seek_offset, bytes_to_read, missing_ok);
|
2010-12-15 22:56:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Read a section of a file, returning it as bytea
|
2022-07-29 21:38:49 +02:00
|
|
|
*
|
|
|
|
* Parameters are interpreted the same as pg_read_file_common().
|
2010-12-15 22:56:28 +01:00
|
|
|
*/
|
2022-07-29 21:38:49 +02:00
|
|
|
static bytea *
|
|
|
|
pg_read_binary_file_common(text *filename_t,
|
|
|
|
int64 seek_offset, int64 bytes_to_read,
|
|
|
|
bool read_to_eof, bool missing_ok)
|
2010-12-15 22:56:28 +01:00
|
|
|
{
|
2022-07-29 21:38:49 +02:00
|
|
|
if (read_to_eof)
|
|
|
|
Assert(bytes_to_read == -1);
|
|
|
|
else if (bytes_to_read < 0)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("requested length cannot be negative")));
|
2010-12-15 22:56:28 +01:00
|
|
|
|
2022-07-29 21:38:49 +02:00
|
|
|
return read_binary_file(convert_and_check_filename(filename_t),
|
|
|
|
seek_offset, bytes_to_read, missing_ok);
|
2010-12-15 22:56:28 +01:00
|
|
|
}
|
|
|
|
|
2015-06-28 20:35:46 +02:00
|
|
|
|
2010-12-15 22:56:28 +01:00
|
|
|
/*
|
2022-07-29 21:38:49 +02:00
|
|
|
* Wrapper functions for the variants of SQL functions pg_read_file() and
|
|
|
|
* pg_read_binary_file().
|
2015-06-28 20:35:46 +02:00
|
|
|
*
|
|
|
|
* These are necessary to pass the sanity check in opr_sanity, which checks
|
|
|
|
* that all built-in functions that share the implementing C function take
|
|
|
|
* the same number of arguments.
|
2010-12-15 22:56:28 +01:00
|
|
|
*/
|
|
|
|
Datum
|
2015-06-28 20:35:46 +02:00
|
|
|
pg_read_file_off_len(PG_FUNCTION_ARGS)
|
2010-12-15 22:56:28 +01:00
|
|
|
{
|
2022-07-29 21:38:49 +02:00
|
|
|
text *filename_t = PG_GETARG_TEXT_PP(0);
|
|
|
|
int64 seek_offset = PG_GETARG_INT64(1);
|
|
|
|
int64 bytes_to_read = PG_GETARG_INT64(2);
|
|
|
|
text *ret;
|
|
|
|
|
|
|
|
ret = pg_read_file_common(filename_t, seek_offset, bytes_to_read,
|
|
|
|
false, false);
|
|
|
|
if (!ret)
|
|
|
|
PG_RETURN_NULL();
|
|
|
|
|
|
|
|
PG_RETURN_TEXT_P(ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
pg_read_file_off_len_missing(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
text *filename_t = PG_GETARG_TEXT_PP(0);
|
|
|
|
int64 seek_offset = PG_GETARG_INT64(1);
|
|
|
|
int64 bytes_to_read = PG_GETARG_INT64(2);
|
|
|
|
bool missing_ok = PG_GETARG_BOOL(3);
|
|
|
|
text *ret;
|
|
|
|
|
|
|
|
ret = pg_read_file_common(filename_t, seek_offset, bytes_to_read,
|
|
|
|
false, missing_ok);
|
|
|
|
|
|
|
|
if (!ret)
|
|
|
|
PG_RETURN_NULL();
|
|
|
|
|
|
|
|
PG_RETURN_TEXT_P(ret);
|
2015-06-28 20:35:46 +02:00
|
|
|
}
|
2011-02-08 22:08:41 +01:00
|
|
|
|
2015-06-28 20:35:46 +02:00
|
|
|
Datum
|
|
|
|
pg_read_file_all(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2022-07-29 21:38:49 +02:00
|
|
|
text *filename_t = PG_GETARG_TEXT_PP(0);
|
|
|
|
text *ret;
|
|
|
|
|
|
|
|
ret = pg_read_file_common(filename_t, 0, -1, true, false);
|
|
|
|
|
|
|
|
if (!ret)
|
|
|
|
PG_RETURN_NULL();
|
|
|
|
|
|
|
|
PG_RETURN_TEXT_P(ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
pg_read_file_all_missing(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
text *filename_t = PG_GETARG_TEXT_PP(0);
|
|
|
|
bool missing_ok = PG_GETARG_BOOL(1);
|
|
|
|
text *ret;
|
|
|
|
|
|
|
|
ret = pg_read_file_common(filename_t, 0, -1, true, missing_ok);
|
|
|
|
|
|
|
|
if (!ret)
|
|
|
|
PG_RETURN_NULL();
|
|
|
|
|
|
|
|
PG_RETURN_TEXT_P(ret);
|
2015-06-28 20:35:46 +02:00
|
|
|
}
|
2011-02-08 22:08:41 +01:00
|
|
|
|
2015-06-28 20:35:46 +02:00
|
|
|
Datum
|
|
|
|
pg_read_binary_file_off_len(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2022-07-29 21:38:49 +02:00
|
|
|
text *filename_t = PG_GETARG_TEXT_PP(0);
|
|
|
|
int64 seek_offset = PG_GETARG_INT64(1);
|
|
|
|
int64 bytes_to_read = PG_GETARG_INT64(2);
|
|
|
|
text *ret;
|
|
|
|
|
|
|
|
ret = pg_read_binary_file_common(filename_t, seek_offset, bytes_to_read,
|
|
|
|
false, false);
|
|
|
|
if (!ret)
|
|
|
|
PG_RETURN_NULL();
|
|
|
|
|
|
|
|
PG_RETURN_BYTEA_P(ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
pg_read_binary_file_off_len_missing(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
text *filename_t = PG_GETARG_TEXT_PP(0);
|
|
|
|
int64 seek_offset = PG_GETARG_INT64(1);
|
|
|
|
int64 bytes_to_read = PG_GETARG_INT64(2);
|
|
|
|
bool missing_ok = PG_GETARG_BOOL(3);
|
|
|
|
text *ret;
|
|
|
|
|
|
|
|
ret = pg_read_binary_file_common(filename_t, seek_offset, bytes_to_read,
|
|
|
|
false, missing_ok);
|
|
|
|
if (!ret)
|
|
|
|
PG_RETURN_NULL();
|
|
|
|
|
|
|
|
PG_RETURN_BYTEA_P(ret);
|
2015-06-28 20:35:46 +02:00
|
|
|
}
|
2010-12-15 22:56:28 +01:00
|
|
|
|
2015-06-28 20:35:46 +02:00
|
|
|
Datum
|
|
|
|
pg_read_binary_file_all(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2022-07-29 21:38:49 +02:00
|
|
|
text *filename_t = PG_GETARG_TEXT_PP(0);
|
|
|
|
text *ret;
|
|
|
|
|
|
|
|
ret = pg_read_binary_file_common(filename_t, 0, -1, true, false);
|
|
|
|
|
|
|
|
if (!ret)
|
|
|
|
PG_RETURN_NULL();
|
|
|
|
|
|
|
|
PG_RETURN_BYTEA_P(ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
pg_read_binary_file_all_missing(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
text *filename_t = PG_GETARG_TEXT_PP(0);
|
|
|
|
bool missing_ok = PG_GETARG_BOOL(1);
|
|
|
|
text *ret;
|
|
|
|
|
|
|
|
ret = pg_read_binary_file_common(filename_t, 0, -1, true, missing_ok);
|
|
|
|
|
|
|
|
if (!ret)
|
|
|
|
PG_RETURN_NULL();
|
|
|
|
|
|
|
|
PG_RETURN_BYTEA_P(ret);
|
2005-08-12 05:25:13 +02:00
|
|
|
}
|
|
|
|
|
2005-08-12 20:23:56 +02:00
|
|
|
/*
|
|
|
|
* stat a file
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
pg_stat_file(PG_FUNCTION_ARGS)
|
2005-08-12 05:25:13 +02:00
|
|
|
{
|
2017-03-13 00:35:34 +01:00
|
|
|
text *filename_t = PG_GETARG_TEXT_PP(0);
|
2005-08-12 20:23:56 +02:00
|
|
|
char *filename;
|
2005-08-12 05:25:13 +02:00
|
|
|
struct stat fst;
|
2005-08-16 01:00:14 +02:00
|
|
|
Datum values[6];
|
|
|
|
bool isnull[6];
|
2005-08-12 05:25:13 +02:00
|
|
|
HeapTuple tuple;
|
2005-08-12 20:23:56 +02:00
|
|
|
TupleDesc tupdesc;
|
2015-06-28 20:35:46 +02:00
|
|
|
bool missing_ok = false;
|
2005-08-12 05:25:13 +02:00
|
|
|
|
2015-06-28 20:35:46 +02:00
|
|
|
/* check the optional argument */
|
|
|
|
if (PG_NARGS() == 2)
|
|
|
|
missing_ok = PG_GETARG_BOOL(1);
|
|
|
|
|
2006-11-06 04:06:41 +01:00
|
|
|
filename = convert_and_check_filename(filename_t);
|
2005-08-12 05:25:13 +02:00
|
|
|
|
|
|
|
if (stat(filename, &fst) < 0)
|
2015-06-28 20:35:46 +02:00
|
|
|
{
|
|
|
|
if (missing_ok && errno == ENOENT)
|
|
|
|
PG_RETURN_NULL();
|
|
|
|
else
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not stat file \"%s\": %m", filename)));
|
|
|
|
}
|
2005-08-12 20:23:56 +02:00
|
|
|
|
2005-08-13 21:02:34 +02:00
|
|
|
/*
|
|
|
|
* This record type had better match the output parameters declared for me
|
2006-11-24 22:18:42 +01:00
|
|
|
* in pg_proc.h.
|
2005-08-13 21:02:34 +02:00
|
|
|
*/
|
Remove WITH OIDS support, change oid catalog column visibility.
Previously tables declared WITH OIDS, including a significant fraction
of the catalog tables, stored the oid column not as a normal column,
but as part of the tuple header.
This special column was not shown by default, which was somewhat odd,
as it's often (consider e.g. pg_class.oid) one of the more important
parts of a row. Neither pg_dump nor COPY included the contents of the
oid column by default.
The fact that the oid column was not an ordinary column necessitated a
significant amount of special case code to support oid columns. That
already was painful for the existing, but upcoming work aiming to make
table storage pluggable, would have required expanding and duplicating
that "specialness" significantly.
WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0).
Remove it.
Removing includes:
- CREATE TABLE and ALTER TABLE syntax for declaring the table to be
WITH OIDS has been removed (WITH (oids[ = true]) will error out)
- pg_dump does not support dumping tables declared WITH OIDS and will
issue a warning when dumping one (and ignore the oid column).
- restoring an pg_dump archive with pg_restore will warn when
restoring a table with oid contents (and ignore the oid column)
- COPY will refuse to load binary dump that includes oids.
- pg_upgrade will error out when encountering tables declared WITH
OIDS, they have to be altered to remove the oid column first.
- Functionality to access the oid of the last inserted row (like
plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed.
The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false)
for CREATE TABLE) is still supported. While that requires a bit of
support code, it seems unnecessary to break applications / dumps that
do not use oids, and are explicit about not using them.
The biggest user of WITH OID columns was postgres' catalog. This
commit changes all 'magic' oid columns to be columns that are normally
declared and stored. To reduce unnecessary query breakage all the
newly added columns are still named 'oid', even if a table's column
naming scheme would indicate 'reloid' or such. This obviously
requires adapting a lot code, mostly replacing oid access via
HeapTupleGetOid() with access to the underlying Form_pg_*->oid column.
The bootstrap process now assigns oids for all oid columns in
genbki.pl that do not have an explicit value (starting at the largest
oid previously used), only oids assigned later by oids will be above
FirstBootstrapObjectId. As the oid column now is a normal column the
special bootstrap syntax for oids has been removed.
Oids are not automatically assigned during insertion anymore, all
backend code explicitly assigns oids with GetNewOidWithIndex(). For
the rare case that insertions into the catalog via SQL are called for
the new pg_nextoid() function can be used (which only works on catalog
tables).
The fact that oid columns on system tables are now normal columns
means that they will be included in the set of columns expanded
by * (i.e. SELECT * FROM pg_class will now include the table's oid,
previously it did not). It'd not technically be hard to hide oid
column by default, but that'd mean confusing behavior would either
have to be carried forward forever, or it'd cause breakage down the
line.
While it's not unlikely that further adjustments are needed, the
scope/invasiveness of the patch makes it worthwhile to get merge this
now. It's painful to maintain externally, too complicated to commit
after the code code freeze, and a dependency of a number of other
patches.
Catversion bump, for obvious reasons.
Author: Andres Freund, with contributions by John Naylor
Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
|
|
|
tupdesc = CreateTemplateTupleDesc(6);
|
2005-08-12 20:23:56 +02:00
|
|
|
TupleDescInitEntry(tupdesc, (AttrNumber) 1,
|
2005-08-16 01:00:14 +02:00
|
|
|
"size", INT8OID, -1, 0);
|
2005-08-12 20:23:56 +02:00
|
|
|
TupleDescInitEntry(tupdesc, (AttrNumber) 2,
|
2005-08-16 01:00:14 +02:00
|
|
|
"access", TIMESTAMPTZOID, -1, 0);
|
2005-08-12 20:23:56 +02:00
|
|
|
TupleDescInitEntry(tupdesc, (AttrNumber) 3,
|
2005-08-16 01:00:14 +02:00
|
|
|
"modification", TIMESTAMPTZOID, -1, 0);
|
2005-08-12 20:23:56 +02:00
|
|
|
TupleDescInitEntry(tupdesc, (AttrNumber) 4,
|
2005-08-16 01:00:14 +02:00
|
|
|
"change", TIMESTAMPTZOID, -1, 0);
|
2005-08-12 20:23:56 +02:00
|
|
|
TupleDescInitEntry(tupdesc, (AttrNumber) 5,
|
2005-08-16 01:00:14 +02:00
|
|
|
"creation", TIMESTAMPTZOID, -1, 0);
|
|
|
|
TupleDescInitEntry(tupdesc, (AttrNumber) 6,
|
2005-08-12 20:23:56 +02:00
|
|
|
"isdir", BOOLOID, -1, 0);
|
|
|
|
BlessTupleDesc(tupdesc);
|
|
|
|
|
2005-08-16 01:00:14 +02:00
|
|
|
memset(isnull, false, sizeof(isnull));
|
|
|
|
|
2005-08-12 20:23:56 +02:00
|
|
|
values[0] = Int64GetDatum((int64) fst.st_size);
|
|
|
|
values[1] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_atime));
|
|
|
|
values[2] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_mtime));
|
2005-08-16 01:00:14 +02:00
|
|
|
/* Unix has file status change time, while Win32 has creation time */
|
|
|
|
#if !defined(WIN32) && !defined(__CYGWIN__)
|
2005-08-12 20:23:56 +02:00
|
|
|
values[3] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_ctime));
|
2005-08-16 01:00:14 +02:00
|
|
|
isnull[4] = true;
|
|
|
|
#else
|
|
|
|
isnull[3] = true;
|
|
|
|
values[4] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_ctime));
|
|
|
|
#endif
|
Fix a number of places that were making file-type tests infelicitously.
The places that did, eg,
(statbuf.st_mode & S_IFMT) == S_IFDIR
were correct, but there is no good reason not to use S_ISDIR() instead,
especially when that's what the other 90% of our code does. The places
that did, eg,
(statbuf.st_mode & S_IFDIR)
were flat out *wrong* and would fail in various platform-specific ways,
eg a symlink could be mistaken for a regular file on most Unixen.
The actual impact of this is probably small, since the problem cases
seem to always involve symlinks or sockets, which are unlikely to be
found in the directories that PG code might be scanning. But it's
clearly trouble waiting to happen, so patch all the way back anyway.
(There seem to be no occurrences of the mistake in 7.4.)
2008-03-31 03:31:43 +02:00
|
|
|
values[5] = BoolGetDatum(S_ISDIR(fst.st_mode));
|
2005-08-12 20:23:56 +02:00
|
|
|
|
|
|
|
tuple = heap_form_tuple(tupdesc, values, isnull);
|
2005-08-12 05:25:13 +02:00
|
|
|
|
2005-08-12 20:23:56 +02:00
|
|
|
pfree(filename);
|
2005-08-12 05:25:13 +02:00
|
|
|
|
2005-08-12 20:23:56 +02:00
|
|
|
PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
|
2005-08-12 05:25:13 +02:00
|
|
|
}
|
|
|
|
|
2015-06-28 20:35:46 +02:00
|
|
|
/*
|
|
|
|
* stat a file (1 argument version)
|
|
|
|
*
|
|
|
|
* note: this wrapper is necessary to pass the sanity check in opr_sanity,
|
|
|
|
* which checks that all built-in functions that share the implementing C
|
|
|
|
* function take the same number of arguments
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
pg_stat_file_1arg(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
return pg_stat_file(fcinfo);
|
|
|
|
}
|
2005-08-12 05:25:13 +02:00
|
|
|
|
2005-08-12 20:23:56 +02:00
|
|
|
/*
|
|
|
|
* List a directory (returns the filenames only)
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
pg_ls_dir(PG_FUNCTION_ARGS)
|
2005-08-12 05:25:13 +02:00
|
|
|
{
|
2020-03-17 02:05:28 +01:00
|
|
|
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
|
|
|
|
char *location;
|
|
|
|
bool missing_ok = false;
|
|
|
|
bool include_dot_dirs = false;
|
|
|
|
DIR *dirdesc;
|
2005-08-12 05:25:13 +02:00
|
|
|
struct dirent *de;
|
|
|
|
|
2020-03-17 02:05:28 +01:00
|
|
|
location = convert_and_check_filename(PG_GETARG_TEXT_PP(0));
|
|
|
|
|
|
|
|
/* check the optional arguments */
|
|
|
|
if (PG_NARGS() == 3)
|
2005-08-12 05:25:13 +02:00
|
|
|
{
|
2020-03-17 02:05:28 +01:00
|
|
|
if (!PG_ARGISNULL(1))
|
|
|
|
missing_ok = PG_GETARG_BOOL(1);
|
|
|
|
if (!PG_ARGISNULL(2))
|
|
|
|
include_dot_dirs = PG_GETARG_BOOL(2);
|
|
|
|
}
|
2015-06-28 20:35:46 +02:00
|
|
|
|
2022-10-18 03:22:35 +02:00
|
|
|
InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC);
|
2005-08-12 05:25:13 +02:00
|
|
|
|
2020-03-17 02:05:28 +01:00
|
|
|
dirdesc = AllocateDir(location);
|
|
|
|
if (!dirdesc)
|
|
|
|
{
|
|
|
|
/* Return empty tuplestore if appropriate */
|
|
|
|
if (missing_ok && errno == ENOENT)
|
|
|
|
return (Datum) 0;
|
|
|
|
/* Otherwise, we can let ReadDir() throw the error */
|
2005-08-12 05:25:13 +02:00
|
|
|
}
|
|
|
|
|
2020-03-17 02:05:28 +01:00
|
|
|
while ((de = ReadDir(dirdesc, location)) != NULL)
|
2005-08-12 05:25:13 +02:00
|
|
|
{
|
2020-03-17 02:05:28 +01:00
|
|
|
Datum values[1];
|
|
|
|
bool nulls[1];
|
|
|
|
|
|
|
|
if (!include_dot_dirs &&
|
2015-06-28 20:35:46 +02:00
|
|
|
(strcmp(de->d_name, ".") == 0 ||
|
|
|
|
strcmp(de->d_name, "..") == 0))
|
2005-08-12 05:25:13 +02:00
|
|
|
continue;
|
|
|
|
|
2020-03-17 02:05:28 +01:00
|
|
|
values[0] = CStringGetTextDatum(de->d_name);
|
|
|
|
nulls[0] = false;
|
2005-08-12 05:25:13 +02:00
|
|
|
|
Create routine able to set single-call SRFs for Materialize mode
Set-returning functions that use the Materialize mode, creating a
tuplestore to include all the tuples returned in a set rather than doing
so in multiple calls, use roughly the same set of steps to prepare
ReturnSetInfo for this job:
- Check if ReturnSetInfo supports returning a tuplestore and if the
materialize mode is enabled.
- Create a tuplestore for all the tuples part of the returned set in the
per-query memory context, stored in ReturnSetInfo->setResult.
- Build a tuple descriptor mostly from get_call_result_type(), then
stored in ReturnSetInfo->setDesc. Note that there are some cases where
the SRF's tuple descriptor has to be the one specified by the function
caller.
This refactoring is done so as there are (well, should be) no behavior
changes in any of the in-core functions refactored, and the centralized
function that checks and sets up the function's ReturnSetInfo can be
controlled with a set of bits32 options. Two of them prove to be
necessary now:
- SRF_SINGLE_USE_EXPECTED to use expectedDesc as tuple descriptor, as
expected by the function's caller.
- SRF_SINGLE_BLESS to validate the tuple descriptor for the SRF.
The same initialization pattern is simplified in 28 places per my
count as of src/backend/, shaving up to ~900 lines of code. These
mostly come from the removal of the per-query initializations and the
sanity checks now grouped in a single location. There are more
locations that could be simplified in contrib/, that are left for a
follow-up cleanup.
fcc2817, 07daca5 and d61a361 have prepared the areas of the code related
to this change, to ease this refactoring.
Author: Melanie Plageman, Michael Paquier
Reviewed-by: Álvaro Herrera, Justin Pryzby
Discussion: https://postgr.es/m/CAAKRu_azyd1Z3W_r7Ou4sorTjRCs+PxeHw1CWJeXKofkE6TuZg@mail.gmail.com
2022-03-07 02:26:29 +01:00
|
|
|
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
|
|
|
|
values, nulls);
|
2020-03-17 02:05:28 +01:00
|
|
|
}
|
2005-08-12 20:23:56 +02:00
|
|
|
|
2020-03-17 02:05:28 +01:00
|
|
|
FreeDir(dirdesc);
|
|
|
|
return (Datum) 0;
|
2005-08-12 05:25:13 +02:00
|
|
|
}
|
2015-06-28 20:35:46 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* List a directory (1 argument version)
|
|
|
|
*
|
|
|
|
* note: this wrapper is necessary to pass the sanity check in opr_sanity,
|
|
|
|
* which checks that all built-in functions that share the implementing C
|
|
|
|
* function take the same number of arguments.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
pg_ls_dir_1arg(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
return pg_ls_dir(fcinfo);
|
|
|
|
}
|
2017-03-16 20:05:02 +01:00
|
|
|
|
Avoid holding a directory FD open across pg_ls_dir_files() calls.
This coding technique is undesirable because (a) it leaks the FD for
the rest of the transaction if the SRF is not run to completion, and
(b) allocated FDs are a scarce resource, but multiple interleaved
uses of the relevant functions could eat many such FDs.
In v11 and later, a query such as "SELECT pg_ls_waldir() LIMIT 1"
yields a warning about the leaked FD, and the only reason there's
no warning in earlier branches is that fd.c didn't whine about such
leaks before commit 9cb7db3f0. Even disregarding the warning, it
wouldn't be too hard to run a backend out of FDs with careless use
of these SQL functions.
Hence, rewrite the function so that it reads the directory within
a single call, returning the results as a tuplestore rather than
via value-per-call mode.
There are half a dozen other built-in SRFs with similar problems,
but let's fix this one to start with, just to see if the buildfarm
finds anything wrong with the code.
In passing, fix bogus error report for stat() failure: it was
whining about the directory when it should be fingering the
individual file. Doubtless a copy-and-paste error.
Back-patch to v10 where this function was added.
Justin Pryzby, with cosmetic tweaks and test cases by me
Discussion: https://postgr.es/m/20200308173103.GC1357@telsasoft.com
2020-03-11 20:27:59 +01:00
|
|
|
/*
|
|
|
|
* Generic function to return a directory listing of files.
|
|
|
|
*
|
|
|
|
* If the directory isn't there, silently return an empty set if missing_ok.
|
|
|
|
* Other unreadable-directory cases throw an error.
|
|
|
|
*/
|
2017-03-16 20:05:02 +01:00
|
|
|
static Datum
|
2018-10-05 02:21:48 +02:00
|
|
|
pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok)
|
2017-03-16 20:05:02 +01:00
|
|
|
{
|
Avoid holding a directory FD open across pg_ls_dir_files() calls.
This coding technique is undesirable because (a) it leaks the FD for
the rest of the transaction if the SRF is not run to completion, and
(b) allocated FDs are a scarce resource, but multiple interleaved
uses of the relevant functions could eat many such FDs.
In v11 and later, a query such as "SELECT pg_ls_waldir() LIMIT 1"
yields a warning about the leaked FD, and the only reason there's
no warning in earlier branches is that fd.c didn't whine about such
leaks before commit 9cb7db3f0. Even disregarding the warning, it
wouldn't be too hard to run a backend out of FDs with careless use
of these SQL functions.
Hence, rewrite the function so that it reads the directory within
a single call, returning the results as a tuplestore rather than
via value-per-call mode.
There are half a dozen other built-in SRFs with similar problems,
but let's fix this one to start with, just to see if the buildfarm
finds anything wrong with the code.
In passing, fix bogus error report for stat() failure: it was
whining about the directory when it should be fingering the
individual file. Doubtless a copy-and-paste error.
Back-patch to v10 where this function was added.
Justin Pryzby, with cosmetic tweaks and test cases by me
Discussion: https://postgr.es/m/20200308173103.GC1357@telsasoft.com
2020-03-11 20:27:59 +01:00
|
|
|
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
|
|
|
|
DIR *dirdesc;
|
2017-03-16 20:05:02 +01:00
|
|
|
struct dirent *de;
|
|
|
|
|
2022-10-18 03:22:35 +02:00
|
|
|
InitMaterializedSRF(fcinfo, 0);
|
2017-03-16 20:05:02 +01:00
|
|
|
|
Avoid holding a directory FD open across pg_ls_dir_files() calls.
This coding technique is undesirable because (a) it leaks the FD for
the rest of the transaction if the SRF is not run to completion, and
(b) allocated FDs are a scarce resource, but multiple interleaved
uses of the relevant functions could eat many such FDs.
In v11 and later, a query such as "SELECT pg_ls_waldir() LIMIT 1"
yields a warning about the leaked FD, and the only reason there's
no warning in earlier branches is that fd.c didn't whine about such
leaks before commit 9cb7db3f0. Even disregarding the warning, it
wouldn't be too hard to run a backend out of FDs with careless use
of these SQL functions.
Hence, rewrite the function so that it reads the directory within
a single call, returning the results as a tuplestore rather than
via value-per-call mode.
There are half a dozen other built-in SRFs with similar problems,
but let's fix this one to start with, just to see if the buildfarm
finds anything wrong with the code.
In passing, fix bogus error report for stat() failure: it was
whining about the directory when it should be fingering the
individual file. Doubtless a copy-and-paste error.
Back-patch to v10 where this function was added.
Justin Pryzby, with cosmetic tweaks and test cases by me
Discussion: https://postgr.es/m/20200308173103.GC1357@telsasoft.com
2020-03-11 20:27:59 +01:00
|
|
|
/*
|
|
|
|
* Now walk the directory. Note that we must do this within a single SRF
|
|
|
|
* call, not leave the directory open across multiple calls, since we
|
|
|
|
* can't count on the SRF being run to completion.
|
|
|
|
*/
|
|
|
|
dirdesc = AllocateDir(dir);
|
|
|
|
if (!dirdesc)
|
|
|
|
{
|
|
|
|
/* Return empty tuplestore if appropriate */
|
|
|
|
if (missing_ok && errno == ENOENT)
|
|
|
|
return (Datum) 0;
|
|
|
|
/* Otherwise, we can let ReadDir() throw the error */
|
2017-03-16 20:05:02 +01:00
|
|
|
}
|
|
|
|
|
Avoid holding a directory FD open across pg_ls_dir_files() calls.
This coding technique is undesirable because (a) it leaks the FD for
the rest of the transaction if the SRF is not run to completion, and
(b) allocated FDs are a scarce resource, but multiple interleaved
uses of the relevant functions could eat many such FDs.
In v11 and later, a query such as "SELECT pg_ls_waldir() LIMIT 1"
yields a warning about the leaked FD, and the only reason there's
no warning in earlier branches is that fd.c didn't whine about such
leaks before commit 9cb7db3f0. Even disregarding the warning, it
wouldn't be too hard to run a backend out of FDs with careless use
of these SQL functions.
Hence, rewrite the function so that it reads the directory within
a single call, returning the results as a tuplestore rather than
via value-per-call mode.
There are half a dozen other built-in SRFs with similar problems,
but let's fix this one to start with, just to see if the buildfarm
finds anything wrong with the code.
In passing, fix bogus error report for stat() failure: it was
whining about the directory when it should be fingering the
individual file. Doubtless a copy-and-paste error.
Back-patch to v10 where this function was added.
Justin Pryzby, with cosmetic tweaks and test cases by me
Discussion: https://postgr.es/m/20200308173103.GC1357@telsasoft.com
2020-03-11 20:27:59 +01:00
|
|
|
while ((de = ReadDir(dirdesc, dir)) != NULL)
|
2017-03-16 20:05:02 +01:00
|
|
|
{
|
|
|
|
Datum values[3];
|
|
|
|
bool nulls[3];
|
2017-04-11 20:13:31 +02:00
|
|
|
char path[MAXPGPATH * 2];
|
2017-03-16 20:05:02 +01:00
|
|
|
struct stat attrib;
|
|
|
|
|
|
|
|
/* Skip hidden files */
|
|
|
|
if (de->d_name[0] == '.')
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Get the file info */
|
Avoid holding a directory FD open across pg_ls_dir_files() calls.
This coding technique is undesirable because (a) it leaks the FD for
the rest of the transaction if the SRF is not run to completion, and
(b) allocated FDs are a scarce resource, but multiple interleaved
uses of the relevant functions could eat many such FDs.
In v11 and later, a query such as "SELECT pg_ls_waldir() LIMIT 1"
yields a warning about the leaked FD, and the only reason there's
no warning in earlier branches is that fd.c didn't whine about such
leaks before commit 9cb7db3f0. Even disregarding the warning, it
wouldn't be too hard to run a backend out of FDs with careless use
of these SQL functions.
Hence, rewrite the function so that it reads the directory within
a single call, returning the results as a tuplestore rather than
via value-per-call mode.
There are half a dozen other built-in SRFs with similar problems,
but let's fix this one to start with, just to see if the buildfarm
finds anything wrong with the code.
In passing, fix bogus error report for stat() failure: it was
whining about the directory when it should be fingering the
individual file. Doubtless a copy-and-paste error.
Back-patch to v10 where this function was added.
Justin Pryzby, with cosmetic tweaks and test cases by me
Discussion: https://postgr.es/m/20200308173103.GC1357@telsasoft.com
2020-03-11 20:27:59 +01:00
|
|
|
snprintf(path, sizeof(path), "%s/%s", dir, de->d_name);
|
2017-03-16 20:05:02 +01:00
|
|
|
if (stat(path, &attrib) < 0)
|
2020-03-31 18:57:55 +02:00
|
|
|
{
|
|
|
|
/* Ignore concurrently-deleted files, else complain */
|
|
|
|
if (errno == ENOENT)
|
|
|
|
continue;
|
2017-03-16 20:05:02 +01:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
Avoid holding a directory FD open across pg_ls_dir_files() calls.
This coding technique is undesirable because (a) it leaks the FD for
the rest of the transaction if the SRF is not run to completion, and
(b) allocated FDs are a scarce resource, but multiple interleaved
uses of the relevant functions could eat many such FDs.
In v11 and later, a query such as "SELECT pg_ls_waldir() LIMIT 1"
yields a warning about the leaked FD, and the only reason there's
no warning in earlier branches is that fd.c didn't whine about such
leaks before commit 9cb7db3f0. Even disregarding the warning, it
wouldn't be too hard to run a backend out of FDs with careless use
of these SQL functions.
Hence, rewrite the function so that it reads the directory within
a single call, returning the results as a tuplestore rather than
via value-per-call mode.
There are half a dozen other built-in SRFs with similar problems,
but let's fix this one to start with, just to see if the buildfarm
finds anything wrong with the code.
In passing, fix bogus error report for stat() failure: it was
whining about the directory when it should be fingering the
individual file. Doubtless a copy-and-paste error.
Back-patch to v10 where this function was added.
Justin Pryzby, with cosmetic tweaks and test cases by me
Discussion: https://postgr.es/m/20200308173103.GC1357@telsasoft.com
2020-03-11 20:27:59 +01:00
|
|
|
errmsg("could not stat file \"%s\": %m", path)));
|
2020-03-31 18:57:55 +02:00
|
|
|
}
|
2017-03-16 20:05:02 +01:00
|
|
|
|
|
|
|
/* Ignore anything but regular files */
|
|
|
|
if (!S_ISREG(attrib.st_mode))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
values[0] = CStringGetTextDatum(de->d_name);
|
|
|
|
values[1] = Int64GetDatum((int64) attrib.st_size);
|
|
|
|
values[2] = TimestampTzGetDatum(time_t_to_timestamptz(attrib.st_mtime));
|
|
|
|
memset(nulls, 0, sizeof(nulls));
|
|
|
|
|
Create routine able to set single-call SRFs for Materialize mode
Set-returning functions that use the Materialize mode, creating a
tuplestore to include all the tuples returned in a set rather than doing
so in multiple calls, use roughly the same set of steps to prepare
ReturnSetInfo for this job:
- Check if ReturnSetInfo supports returning a tuplestore and if the
materialize mode is enabled.
- Create a tuplestore for all the tuples part of the returned set in the
per-query memory context, stored in ReturnSetInfo->setResult.
- Build a tuple descriptor mostly from get_call_result_type(), then
stored in ReturnSetInfo->setDesc. Note that there are some cases where
the SRF's tuple descriptor has to be the one specified by the function
caller.
This refactoring is done so as there are (well, should be) no behavior
changes in any of the in-core functions refactored, and the centralized
function that checks and sets up the function's ReturnSetInfo can be
controlled with a set of bits32 options. Two of them prove to be
necessary now:
- SRF_SINGLE_USE_EXPECTED to use expectedDesc as tuple descriptor, as
expected by the function's caller.
- SRF_SINGLE_BLESS to validate the tuple descriptor for the SRF.
The same initialization pattern is simplified in 28 places per my
count as of src/backend/, shaving up to ~900 lines of code. These
mostly come from the removal of the per-query initializations and the
sanity checks now grouped in a single location. There are more
locations that could be simplified in contrib/, that are left for a
follow-up cleanup.
fcc2817, 07daca5 and d61a361 have prepared the areas of the code related
to this change, to ease this refactoring.
Author: Melanie Plageman, Michael Paquier
Reviewed-by: Álvaro Herrera, Justin Pryzby
Discussion: https://postgr.es/m/CAAKRu_azyd1Z3W_r7Ou4sorTjRCs+PxeHw1CWJeXKofkE6TuZg@mail.gmail.com
2022-03-07 02:26:29 +01:00
|
|
|
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
|
2017-03-16 20:05:02 +01:00
|
|
|
}
|
|
|
|
|
Avoid holding a directory FD open across pg_ls_dir_files() calls.
This coding technique is undesirable because (a) it leaks the FD for
the rest of the transaction if the SRF is not run to completion, and
(b) allocated FDs are a scarce resource, but multiple interleaved
uses of the relevant functions could eat many such FDs.
In v11 and later, a query such as "SELECT pg_ls_waldir() LIMIT 1"
yields a warning about the leaked FD, and the only reason there's
no warning in earlier branches is that fd.c didn't whine about such
leaks before commit 9cb7db3f0. Even disregarding the warning, it
wouldn't be too hard to run a backend out of FDs with careless use
of these SQL functions.
Hence, rewrite the function so that it reads the directory within
a single call, returning the results as a tuplestore rather than
via value-per-call mode.
There are half a dozen other built-in SRFs with similar problems,
but let's fix this one to start with, just to see if the buildfarm
finds anything wrong with the code.
In passing, fix bogus error report for stat() failure: it was
whining about the directory when it should be fingering the
individual file. Doubtless a copy-and-paste error.
Back-patch to v10 where this function was added.
Justin Pryzby, with cosmetic tweaks and test cases by me
Discussion: https://postgr.es/m/20200308173103.GC1357@telsasoft.com
2020-03-11 20:27:59 +01:00
|
|
|
FreeDir(dirdesc);
|
|
|
|
return (Datum) 0;
|
2017-03-16 20:05:02 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Function to return the list of files in the log directory */
|
|
|
|
Datum
|
|
|
|
pg_ls_logdir(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2018-10-05 02:21:48 +02:00
|
|
|
return pg_ls_dir_files(fcinfo, Log_directory, false);
|
2017-03-16 20:05:02 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Function to return the list of files in the WAL directory */
|
|
|
|
Datum
|
|
|
|
pg_ls_waldir(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2018-10-05 02:21:48 +02:00
|
|
|
return pg_ls_dir_files(fcinfo, XLOGDIR, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Generic function to return the list of files in pgsql_tmp
|
|
|
|
*/
|
|
|
|
static Datum
|
|
|
|
pg_ls_tmpdir(FunctionCallInfo fcinfo, Oid tblspc)
|
|
|
|
{
|
|
|
|
char path[MAXPGPATH];
|
|
|
|
|
|
|
|
if (!SearchSysCacheExists1(TABLESPACEOID, ObjectIdGetDatum(tblspc)))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_UNDEFINED_OBJECT),
|
|
|
|
errmsg("tablespace with OID %u does not exist",
|
|
|
|
tblspc)));
|
|
|
|
|
|
|
|
TempTablespacePath(path, tblspc);
|
|
|
|
return pg_ls_dir_files(fcinfo, path, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Function to return the list of temporary files in the pg_default tablespace's
|
|
|
|
* pgsql_tmp directory
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
pg_ls_tmpdir_noargs(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
return pg_ls_tmpdir(fcinfo, DEFAULTTABLESPACE_OID);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Function to return the list of temporary files in the specified tablespace's
|
|
|
|
* pgsql_tmp directory
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
pg_ls_tmpdir_1arg(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
return pg_ls_tmpdir(fcinfo, PG_GETARG_OID(0));
|
2017-03-16 20:05:02 +01:00
|
|
|
}
|
2018-10-09 15:29:09 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Function to return the list of files in the WAL archive status directory.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
pg_ls_archive_statusdir(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
return pg_ls_dir_files(fcinfo, XLOGDIR "/archive_status", true);
|
|
|
|
}
|
2021-11-23 11:29:42 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Function to return the list of files in the pg_logical/snapshots directory.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
pg_ls_logicalsnapdir(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
return pg_ls_dir_files(fcinfo, "pg_logical/snapshots", false);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Function to return the list of files in the pg_logical/mappings directory.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
pg_ls_logicalmapdir(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
return pg_ls_dir_files(fcinfo, "pg_logical/mappings", false);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Function to return the list of files in the pg_replslot/<replication_slot>
|
|
|
|
* directory.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
pg_ls_replslotdir(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
text *slotname_t;
|
|
|
|
char path[MAXPGPATH];
|
|
|
|
char *slotname;
|
|
|
|
|
|
|
|
slotname_t = PG_GETARG_TEXT_PP(0);
|
|
|
|
|
|
|
|
slotname = text_to_cstring(slotname_t);
|
|
|
|
|
|
|
|
if (!SearchNamedReplicationSlot(slotname, true))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_UNDEFINED_OBJECT),
|
|
|
|
errmsg("replication slot \"%s\" does not exist",
|
|
|
|
slotname)));
|
|
|
|
|
|
|
|
snprintf(path, sizeof(path), "pg_replslot/%s", slotname);
|
|
|
|
return pg_ls_dir_files(fcinfo, path, false);
|
|
|
|
}
|