postgresql/src/backend/utils/adt/misc.c

846 lines
20 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* misc.c
*
*
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/backend/utils/adt/misc.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <sys/file.h>
#include <dirent.h>
#include <math.h>
#include <unistd.h>
#include "access/sysattr.h"
#include "access/table.h"
#include "catalog/catalog.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_type.h"
2003-06-27 19:07:03 +02:00
#include "commands/dbcommands.h"
Clean up assorted messiness around AllocateDir() usage. This patch fixes a couple of low-probability bugs that could lead to reporting an irrelevant errno value (and hence possibly a wrong SQLSTATE) concerning directory-open or file-open failures. It also fixes places where we took shortcuts in reporting such errors, either by using elog instead of ereport or by using ereport but forgetting to specify an errcode. And it eliminates a lot of just plain redundant error-handling code. In service of all this, export fd.c's formerly-static function ReadDirExtended, so that external callers can make use of the coding pattern dir = AllocateDir(path); while ((de = ReadDirExtended(dir, path, LOG)) != NULL) if they'd like to treat directory-open failures as mere LOG conditions rather than errors. Also fix FreeDir to be a no-op if we reach it with dir == NULL, as such a coding pattern would cause. Then, remove code at many call sites that was throwing an error or log message for AllocateDir failure, as ReadDir or ReadDirExtended can handle that job just fine. Aside from being a net code savings, this gets rid of a lot of not-quite-up-to-snuff reports, as mentioned above. (In some places these changes result in replacing a custom error message such as "could not open tablespace directory" with more generic wording "could not open directory", but it was agreed that the custom wording buys little as long as we report the directory name.) In some other call sites where we can't just remove code, change the error reports to be fully project-style-compliant. Also reorder code in restoreTwoPhaseData that was acquiring a lock between AllocateDir and ReadDir; in the unlikely but surely not impossible case that LWLockAcquire changes errno, AllocateDir failures would be misreported. There is no great value in opening the directory before acquiring TwoPhaseStateLock, so just do it in the other order. Also fix CheckXLogRemoved to guarantee that it preserves errno, as quite a number of call sites are implicitly assuming. (Again, it's unlikely but I think not impossible that errno could change during a SpinLockAcquire. If so, this function was broken for its own purposes as well as breaking callers.) And change a few places that were using not-per-project-style messages, such as "could not read directory" when "could not open directory" is more correct. Back-patch the exporting of ReadDirExtended, in case we have occasion to back-patch some fix that makes use of it; it's not needed right now but surely making it global is pretty harmless. Also back-patch the restoreTwoPhaseData and CheckXLogRemoved fixes. The rest of this is essentially cosmetic and need not get back-patched. Michael Paquier, with a bit of additional work by me Discussion: https://postgr.es/m/CAB7nPqRpOCxjiirHmebEFhXVTK7V5Jvw4bz82p7Oimtsm3TyZA@mail.gmail.com
2017-12-04 23:02:52 +01:00
#include "commands/tablespace.h"
#include "common/keywords.h"
#include "funcapi.h"
#include "miscadmin.h"
#include "parser/scansup.h"
#include "pgstat.h"
#include "postmaster/syslogger.h"
#include "rewrite/rewriteHandler.h"
#include "storage/fd.h"
#include "tcop/tcopprot.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/ruleutils.h"
#include "utils/timestamp.h"
/*
* Common subroutine for num_nulls() and num_nonnulls().
* Returns true if successful, false if function should return NULL.
* If successful, total argument count and number of nulls are
* returned into *nargs and *nulls.
*/
static bool
count_nulls(FunctionCallInfo fcinfo,
int32 *nargs, int32 *nulls)
{
int32 count = 0;
int i;
/* Did we get a VARIADIC array argument, or separate arguments? */
if (get_fn_expr_variadic(fcinfo->flinfo))
{
ArrayType *arr;
int ndims,
nitems,
*dims;
bits8 *bitmap;
Assert(PG_NARGS() == 1);
/*
* If we get a null as VARIADIC array argument, we can't say anything
* useful about the number of elements, so return NULL. This behavior
* is consistent with other variadic functions - see concat_internal.
*/
if (PG_ARGISNULL(0))
return false;
/*
* Non-null argument had better be an array. We assume that any call
* context that could let get_fn_expr_variadic return true will have
* checked that a VARIADIC-labeled parameter actually is an array. So
* it should be okay to just Assert that it's an array rather than
* doing a full-fledged error check.
*/
Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, 0))));
/* OK, safe to fetch the array value */
arr = PG_GETARG_ARRAYTYPE_P(0);
/* Count the array elements */
ndims = ARR_NDIM(arr);
dims = ARR_DIMS(arr);
nitems = ArrayGetNItems(ndims, dims);
/* Count those that are NULL */
bitmap = ARR_NULLBITMAP(arr);
if (bitmap)
{
int bitmask = 1;
for (i = 0; i < nitems; i++)
{
if ((*bitmap & bitmask) == 0)
count++;
bitmask <<= 1;
if (bitmask == 0x100)
{
bitmap++;
bitmask = 1;
}
}
}
*nargs = nitems;
*nulls = count;
}
else
{
/* Separate arguments, so just count 'em */
for (i = 0; i < PG_NARGS(); i++)
{
if (PG_ARGISNULL(i))
count++;
}
*nargs = PG_NARGS();
*nulls = count;
}
return true;
}
/*
* num_nulls()
* Count the number of NULL arguments
*/
Datum
pg_num_nulls(PG_FUNCTION_ARGS)
{
int32 nargs,
nulls;
if (!count_nulls(fcinfo, &nargs, &nulls))
PG_RETURN_NULL();
PG_RETURN_INT32(nulls);
}
/*
* num_nonnulls()
* Count the number of non-NULL arguments
*/
Datum
pg_num_nonnulls(PG_FUNCTION_ARGS)
{
int32 nargs,
nulls;
if (!count_nulls(fcinfo, &nargs, &nulls))
PG_RETURN_NULL();
PG_RETURN_INT32(nargs - nulls);
}
/*
* current_database()
* Expose the current database to the user
*/
Datum
current_database(PG_FUNCTION_ARGS)
{
2002-09-04 22:31:48 +02:00
Name db;
db = (Name) palloc(NAMEDATALEN);
2003-06-27 16:45:32 +02:00
namestrcpy(db, get_database_name(MyDatabaseId));
PG_RETURN_NAME(db);
}
/*
* current_query()
* Expose the current query to the user (useful in stored procedures)
* We might want to use ActivePortal->sourceText someday.
*/
Datum
current_query(PG_FUNCTION_ARGS)
{
/* there is no easy way to access the more concise 'query_string' */
if (debug_query_string)
PG_RETURN_TEXT_P(cstring_to_text(debug_query_string));
else
PG_RETURN_NULL();
}
/* Function to find out which databases make use of a tablespace */
2004-08-29 07:07:03 +02:00
Datum
pg_tablespace_databases(PG_FUNCTION_ARGS)
{
Oid tablespaceOid = PG_GETARG_OID(0);
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
bool randomAccess;
TupleDesc tupdesc;
Tuplestorestate *tupstore;
char *location;
DIR *dirdesc;
struct dirent *de;
MemoryContext oldcontext;
/* check to see if caller supports us returning a tuplestore */
if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("set-valued function called in context that cannot accept a set")));
if (!(rsinfo->allowedModes & SFRM_Materialize))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("materialize mode required, but it is not allowed in this context")));
/* The tupdesc and tuplestore must be created in ecxt_per_query_memory */
oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory);
tupdesc = CreateTemplateTupleDesc(1);
TupleDescInitEntry(tupdesc, (AttrNumber) 1, "pg_tablespace_databases",
OIDOID, -1, 0);
randomAccess = (rsinfo->allowedModes & SFRM_Materialize_Random) != 0;
tupstore = tuplestore_begin_heap(randomAccess, false, work_mem);
rsinfo->returnMode = SFRM_Materialize;
rsinfo->setResult = tupstore;
rsinfo->setDesc = tupdesc;
MemoryContextSwitchTo(oldcontext);
if (tablespaceOid == GLOBALTABLESPACE_OID)
{
ereport(WARNING,
(errmsg("global tablespace never has databases")));
/* return empty tuplestore */
return (Datum) 0;
}
if (tablespaceOid == DEFAULTTABLESPACE_OID)
location = psprintf("base");
else
location = psprintf("pg_tblspc/%u/%s", tablespaceOid,
TABLESPACE_VERSION_DIRECTORY);
dirdesc = AllocateDir(location);
if (!dirdesc)
{
/* the only expected error is ENOENT */
if (errno != ENOENT)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m",
location)));
ereport(WARNING,
(errmsg("%u is not a tablespace OID", tablespaceOid)));
/* return empty tuplestore */
return (Datum) 0;
}
while ((de = ReadDir(dirdesc, location)) != NULL)
{
2004-08-29 07:07:03 +02:00
Oid datOid = atooid(de->d_name);
Clean up assorted messiness around AllocateDir() usage. This patch fixes a couple of low-probability bugs that could lead to reporting an irrelevant errno value (and hence possibly a wrong SQLSTATE) concerning directory-open or file-open failures. It also fixes places where we took shortcuts in reporting such errors, either by using elog instead of ereport or by using ereport but forgetting to specify an errcode. And it eliminates a lot of just plain redundant error-handling code. In service of all this, export fd.c's formerly-static function ReadDirExtended, so that external callers can make use of the coding pattern dir = AllocateDir(path); while ((de = ReadDirExtended(dir, path, LOG)) != NULL) if they'd like to treat directory-open failures as mere LOG conditions rather than errors. Also fix FreeDir to be a no-op if we reach it with dir == NULL, as such a coding pattern would cause. Then, remove code at many call sites that was throwing an error or log message for AllocateDir failure, as ReadDir or ReadDirExtended can handle that job just fine. Aside from being a net code savings, this gets rid of a lot of not-quite-up-to-snuff reports, as mentioned above. (In some places these changes result in replacing a custom error message such as "could not open tablespace directory" with more generic wording "could not open directory", but it was agreed that the custom wording buys little as long as we report the directory name.) In some other call sites where we can't just remove code, change the error reports to be fully project-style-compliant. Also reorder code in restoreTwoPhaseData that was acquiring a lock between AllocateDir and ReadDir; in the unlikely but surely not impossible case that LWLockAcquire changes errno, AllocateDir failures would be misreported. There is no great value in opening the directory before acquiring TwoPhaseStateLock, so just do it in the other order. Also fix CheckXLogRemoved to guarantee that it preserves errno, as quite a number of call sites are implicitly assuming. (Again, it's unlikely but I think not impossible that errno could change during a SpinLockAcquire. If so, this function was broken for its own purposes as well as breaking callers.) And change a few places that were using not-per-project-style messages, such as "could not read directory" when "could not open directory" is more correct. Back-patch the exporting of ReadDirExtended, in case we have occasion to back-patch some fix that makes use of it; it's not needed right now but surely making it global is pretty harmless. Also back-patch the restoreTwoPhaseData and CheckXLogRemoved fixes. The rest of this is essentially cosmetic and need not get back-patched. Michael Paquier, with a bit of additional work by me Discussion: https://postgr.es/m/CAB7nPqRpOCxjiirHmebEFhXVTK7V5Jvw4bz82p7Oimtsm3TyZA@mail.gmail.com
2017-12-04 23:02:52 +01:00
char *subdir;
bool isempty;
Datum values[1];
bool nulls[1];
/* this test skips . and .., but is awfully weak */
if (!datOid)
continue;
/* if database subdir is empty, don't report tablespace as used */
subdir = psprintf("%s/%s", location, de->d_name);
Clean up assorted messiness around AllocateDir() usage. This patch fixes a couple of low-probability bugs that could lead to reporting an irrelevant errno value (and hence possibly a wrong SQLSTATE) concerning directory-open or file-open failures. It also fixes places where we took shortcuts in reporting such errors, either by using elog instead of ereport or by using ereport but forgetting to specify an errcode. And it eliminates a lot of just plain redundant error-handling code. In service of all this, export fd.c's formerly-static function ReadDirExtended, so that external callers can make use of the coding pattern dir = AllocateDir(path); while ((de = ReadDirExtended(dir, path, LOG)) != NULL) if they'd like to treat directory-open failures as mere LOG conditions rather than errors. Also fix FreeDir to be a no-op if we reach it with dir == NULL, as such a coding pattern would cause. Then, remove code at many call sites that was throwing an error or log message for AllocateDir failure, as ReadDir or ReadDirExtended can handle that job just fine. Aside from being a net code savings, this gets rid of a lot of not-quite-up-to-snuff reports, as mentioned above. (In some places these changes result in replacing a custom error message such as "could not open tablespace directory" with more generic wording "could not open directory", but it was agreed that the custom wording buys little as long as we report the directory name.) In some other call sites where we can't just remove code, change the error reports to be fully project-style-compliant. Also reorder code in restoreTwoPhaseData that was acquiring a lock between AllocateDir and ReadDir; in the unlikely but surely not impossible case that LWLockAcquire changes errno, AllocateDir failures would be misreported. There is no great value in opening the directory before acquiring TwoPhaseStateLock, so just do it in the other order. Also fix CheckXLogRemoved to guarantee that it preserves errno, as quite a number of call sites are implicitly assuming. (Again, it's unlikely but I think not impossible that errno could change during a SpinLockAcquire. If so, this function was broken for its own purposes as well as breaking callers.) And change a few places that were using not-per-project-style messages, such as "could not read directory" when "could not open directory" is more correct. Back-patch the exporting of ReadDirExtended, in case we have occasion to back-patch some fix that makes use of it; it's not needed right now but surely making it global is pretty harmless. Also back-patch the restoreTwoPhaseData and CheckXLogRemoved fixes. The rest of this is essentially cosmetic and need not get back-patched. Michael Paquier, with a bit of additional work by me Discussion: https://postgr.es/m/CAB7nPqRpOCxjiirHmebEFhXVTK7V5Jvw4bz82p7Oimtsm3TyZA@mail.gmail.com
2017-12-04 23:02:52 +01:00
isempty = directory_is_empty(subdir);
pfree(subdir);
Clean up assorted messiness around AllocateDir() usage. This patch fixes a couple of low-probability bugs that could lead to reporting an irrelevant errno value (and hence possibly a wrong SQLSTATE) concerning directory-open or file-open failures. It also fixes places where we took shortcuts in reporting such errors, either by using elog instead of ereport or by using ereport but forgetting to specify an errcode. And it eliminates a lot of just plain redundant error-handling code. In service of all this, export fd.c's formerly-static function ReadDirExtended, so that external callers can make use of the coding pattern dir = AllocateDir(path); while ((de = ReadDirExtended(dir, path, LOG)) != NULL) if they'd like to treat directory-open failures as mere LOG conditions rather than errors. Also fix FreeDir to be a no-op if we reach it with dir == NULL, as such a coding pattern would cause. Then, remove code at many call sites that was throwing an error or log message for AllocateDir failure, as ReadDir or ReadDirExtended can handle that job just fine. Aside from being a net code savings, this gets rid of a lot of not-quite-up-to-snuff reports, as mentioned above. (In some places these changes result in replacing a custom error message such as "could not open tablespace directory" with more generic wording "could not open directory", but it was agreed that the custom wording buys little as long as we report the directory name.) In some other call sites where we can't just remove code, change the error reports to be fully project-style-compliant. Also reorder code in restoreTwoPhaseData that was acquiring a lock between AllocateDir and ReadDir; in the unlikely but surely not impossible case that LWLockAcquire changes errno, AllocateDir failures would be misreported. There is no great value in opening the directory before acquiring TwoPhaseStateLock, so just do it in the other order. Also fix CheckXLogRemoved to guarantee that it preserves errno, as quite a number of call sites are implicitly assuming. (Again, it's unlikely but I think not impossible that errno could change during a SpinLockAcquire. If so, this function was broken for its own purposes as well as breaking callers.) And change a few places that were using not-per-project-style messages, such as "could not read directory" when "could not open directory" is more correct. Back-patch the exporting of ReadDirExtended, in case we have occasion to back-patch some fix that makes use of it; it's not needed right now but surely making it global is pretty harmless. Also back-patch the restoreTwoPhaseData and CheckXLogRemoved fixes. The rest of this is essentially cosmetic and need not get back-patched. Michael Paquier, with a bit of additional work by me Discussion: https://postgr.es/m/CAB7nPqRpOCxjiirHmebEFhXVTK7V5Jvw4bz82p7Oimtsm3TyZA@mail.gmail.com
2017-12-04 23:02:52 +01:00
if (isempty)
continue; /* indeed, nothing in it */
values[0] = ObjectIdGetDatum(datOid);
nulls[0] = false;
tuplestore_putvalues(tupstore, tupdesc, values, nulls);
}
FreeDir(dirdesc);
return (Datum) 0;
}
/*
* pg_tablespace_location - get location for a tablespace
*/
Datum
pg_tablespace_location(PG_FUNCTION_ARGS)
{
Oid tablespaceOid = PG_GETARG_OID(0);
char sourcepath[MAXPGPATH];
char targetpath[MAXPGPATH];
int rllen;
/*
* It's useful to apply this function to pg_class.reltablespace, wherein
* zero means "the database's default tablespace". So, rather than
* throwing an error for zero, we choose to assume that's what is meant.
*/
if (tablespaceOid == InvalidOid)
tablespaceOid = MyDatabaseTableSpace;
/*
* Return empty string for the cluster's default tablespaces
*/
if (tablespaceOid == DEFAULTTABLESPACE_OID ||
tablespaceOid == GLOBALTABLESPACE_OID)
PG_RETURN_TEXT_P(cstring_to_text(""));
#if defined(HAVE_READLINK) || defined(WIN32)
/*
* Find the location of the tablespace by reading the symbolic link that
* is in pg_tblspc/<oid>.
*/
snprintf(sourcepath, sizeof(sourcepath), "pg_tblspc/%u", tablespaceOid);
rllen = readlink(sourcepath, targetpath, sizeof(targetpath));
if (rllen < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read symbolic link \"%s\": %m",
sourcepath)));
if (rllen >= sizeof(targetpath))
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("symbolic link \"%s\" target is too long",
sourcepath)));
targetpath[rllen] = '\0';
PG_RETURN_TEXT_P(cstring_to_text(targetpath));
#else
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("tablespaces are not supported on this platform")));
PG_RETURN_NULL();
#endif
}
/*
* pg_sleep - delay for N seconds
*/
Datum
pg_sleep(PG_FUNCTION_ARGS)
{
float8 secs = PG_GETARG_FLOAT8(0);
float8 endtime;
/*
* We sleep using WaitLatch, to ensure that we'll wake up promptly if an
* important signal (such as SIGALRM or SIGINT) arrives. Because
* WaitLatch's upper limit of delay is INT_MAX milliseconds, and the user
* might ask for more than that, we sleep for at most 10 minutes and then
* loop.
*
2006-10-04 02:30:14 +02:00
* By computing the intended stop time initially, we avoid accumulation of
* extra delay across multiple sleeps. This also ensures we won't delay
* less than the specified time when WaitLatch is terminated early by a
2016-07-15 04:48:26 +02:00
* non-query-canceling signal such as SIGHUP.
*/
#define GetNowFloat() ((float8) GetCurrentTimestamp() / 1000000.0)
endtime = GetNowFloat() + secs;
for (;;)
{
float8 delay;
long delay_ms;
CHECK_FOR_INTERRUPTS();
delay = endtime - GetNowFloat();
if (delay >= 600.0)
delay_ms = 600000;
else if (delay > 0.0)
delay_ms = (long) ceil(delay * 1000.0);
else
break;
(void) WaitLatch(MyLatch,
2018-11-23 08:16:41 +01:00
WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
delay_ms,
WAIT_EVENT_PG_SLEEP);
ResetLatch(MyLatch);
}
PG_RETURN_VOID();
}
/* Function to return the list of grammar keywords */
Datum
pg_get_keywords(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
if (SRF_IS_FIRSTCALL())
{
MemoryContext oldcontext;
TupleDesc tupdesc;
funcctx = SRF_FIRSTCALL_INIT();
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
tupdesc = CreateTemplateTupleDesc(3);
TupleDescInitEntry(tupdesc, (AttrNumber) 1, "word",
TEXTOID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 2, "catcode",
CHAROID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 3, "catdesc",
TEXTOID, -1, 0);
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
MemoryContextSwitchTo(oldcontext);
}
funcctx = SRF_PERCALL_SETUP();
Replace the data structure used for keyword lookup. Previously, ScanKeywordLookup was passed an array of string pointers. This had some performance deficiencies: the strings themselves might be scattered all over the place depending on the compiler (and some quick checking shows that at least with gcc-on-Linux, they indeed weren't reliably close together). That led to very cache-unfriendly behavior as the binary search touched strings in many different pages. Also, depending on the platform, the string pointers might need to be adjusted at program start, so that they couldn't be simple constant data. And the ScanKeyword struct had been designed with an eye to 32-bit machines originally; on 64-bit it requires 16 bytes per keyword, making it even more cache-unfriendly. Redesign so that the keyword strings themselves are allocated consecutively (as part of one big char-string constant), thereby eliminating the touch-lots-of-unrelated-pages syndrome. And get rid of the ScanKeyword array in favor of three separate arrays: uint16 offsets into the keyword array, uint16 token codes, and uint8 keyword categories. That reduces the overhead per keyword to 5 bytes instead of 16 (even less in programs that only need one of the token codes and categories); moreover, the binary search only touches the offsets array, further reducing its cache footprint. This also lets us put the token codes somewhere else than the keyword strings are, which avoids some unpleasant build dependencies. While we're at it, wrap the data used by ScanKeywordLookup into a struct that can be treated as an opaque type by most callers. That doesn't change things much right now, but it will make it less painful to switch to a hash-based lookup method, as is being discussed in the mailing list thread. Most of the change here is associated with adding a generator script that can build the new data structure from the same list-of-PG_KEYWORD header representation we used before. The PG_KEYWORD lists that plpgsql and ecpg used to embed in their scanner .c files have to be moved into headers, and the Makefiles have to be taught to invoke the generator script. This work is also necessary if we're to consider hash-based lookup, since the generator script is what would be responsible for constructing a hash table. Aside from saving a few kilobytes in each program that includes the keyword table, this seems to speed up raw parsing (flex+bison) by a few percent. So it's worth doing even as it stands, though we think we can gain even more with a follow-on patch to switch to hash-based lookup. John Naylor, with further hacking by me Discussion: https://postgr.es/m/CAJVSVGXdFVU2sgym89XPL=Lv1zOS5=EHHQ8XWNzFL=mTXkKMLw@mail.gmail.com
2019-01-06 23:02:57 +01:00
if (funcctx->call_cntr < ScanKeywords.num_keywords)
{
char *values[3];
HeapTuple tuple;
/* cast-away-const is ugly but alternatives aren't much better */
Replace the data structure used for keyword lookup. Previously, ScanKeywordLookup was passed an array of string pointers. This had some performance deficiencies: the strings themselves might be scattered all over the place depending on the compiler (and some quick checking shows that at least with gcc-on-Linux, they indeed weren't reliably close together). That led to very cache-unfriendly behavior as the binary search touched strings in many different pages. Also, depending on the platform, the string pointers might need to be adjusted at program start, so that they couldn't be simple constant data. And the ScanKeyword struct had been designed with an eye to 32-bit machines originally; on 64-bit it requires 16 bytes per keyword, making it even more cache-unfriendly. Redesign so that the keyword strings themselves are allocated consecutively (as part of one big char-string constant), thereby eliminating the touch-lots-of-unrelated-pages syndrome. And get rid of the ScanKeyword array in favor of three separate arrays: uint16 offsets into the keyword array, uint16 token codes, and uint8 keyword categories. That reduces the overhead per keyword to 5 bytes instead of 16 (even less in programs that only need one of the token codes and categories); moreover, the binary search only touches the offsets array, further reducing its cache footprint. This also lets us put the token codes somewhere else than the keyword strings are, which avoids some unpleasant build dependencies. While we're at it, wrap the data used by ScanKeywordLookup into a struct that can be treated as an opaque type by most callers. That doesn't change things much right now, but it will make it less painful to switch to a hash-based lookup method, as is being discussed in the mailing list thread. Most of the change here is associated with adding a generator script that can build the new data structure from the same list-of-PG_KEYWORD header representation we used before. The PG_KEYWORD lists that plpgsql and ecpg used to embed in their scanner .c files have to be moved into headers, and the Makefiles have to be taught to invoke the generator script. This work is also necessary if we're to consider hash-based lookup, since the generator script is what would be responsible for constructing a hash table. Aside from saving a few kilobytes in each program that includes the keyword table, this seems to speed up raw parsing (flex+bison) by a few percent. So it's worth doing even as it stands, though we think we can gain even more with a follow-on patch to switch to hash-based lookup. John Naylor, with further hacking by me Discussion: https://postgr.es/m/CAJVSVGXdFVU2sgym89XPL=Lv1zOS5=EHHQ8XWNzFL=mTXkKMLw@mail.gmail.com
2019-01-06 23:02:57 +01:00
values[0] = unconstify(char *,
GetScanKeyword(funcctx->call_cntr,
&ScanKeywords));
Replace the data structure used for keyword lookup. Previously, ScanKeywordLookup was passed an array of string pointers. This had some performance deficiencies: the strings themselves might be scattered all over the place depending on the compiler (and some quick checking shows that at least with gcc-on-Linux, they indeed weren't reliably close together). That led to very cache-unfriendly behavior as the binary search touched strings in many different pages. Also, depending on the platform, the string pointers might need to be adjusted at program start, so that they couldn't be simple constant data. And the ScanKeyword struct had been designed with an eye to 32-bit machines originally; on 64-bit it requires 16 bytes per keyword, making it even more cache-unfriendly. Redesign so that the keyword strings themselves are allocated consecutively (as part of one big char-string constant), thereby eliminating the touch-lots-of-unrelated-pages syndrome. And get rid of the ScanKeyword array in favor of three separate arrays: uint16 offsets into the keyword array, uint16 token codes, and uint8 keyword categories. That reduces the overhead per keyword to 5 bytes instead of 16 (even less in programs that only need one of the token codes and categories); moreover, the binary search only touches the offsets array, further reducing its cache footprint. This also lets us put the token codes somewhere else than the keyword strings are, which avoids some unpleasant build dependencies. While we're at it, wrap the data used by ScanKeywordLookup into a struct that can be treated as an opaque type by most callers. That doesn't change things much right now, but it will make it less painful to switch to a hash-based lookup method, as is being discussed in the mailing list thread. Most of the change here is associated with adding a generator script that can build the new data structure from the same list-of-PG_KEYWORD header representation we used before. The PG_KEYWORD lists that plpgsql and ecpg used to embed in their scanner .c files have to be moved into headers, and the Makefiles have to be taught to invoke the generator script. This work is also necessary if we're to consider hash-based lookup, since the generator script is what would be responsible for constructing a hash table. Aside from saving a few kilobytes in each program that includes the keyword table, this seems to speed up raw parsing (flex+bison) by a few percent. So it's worth doing even as it stands, though we think we can gain even more with a follow-on patch to switch to hash-based lookup. John Naylor, with further hacking by me Discussion: https://postgr.es/m/CAJVSVGXdFVU2sgym89XPL=Lv1zOS5=EHHQ8XWNzFL=mTXkKMLw@mail.gmail.com
2019-01-06 23:02:57 +01:00
switch (ScanKeywordCategories[funcctx->call_cntr])
{
case UNRESERVED_KEYWORD:
values[1] = "U";
values[2] = _("unreserved");
break;
case COL_NAME_KEYWORD:
values[1] = "C";
values[2] = _("unreserved (cannot be function or type name)");
break;
case TYPE_FUNC_NAME_KEYWORD:
values[1] = "T";
values[2] = _("reserved (can be function or type name)");
break;
case RESERVED_KEYWORD:
values[1] = "R";
values[2] = _("reserved");
break;
default: /* shouldn't be possible */
values[1] = NULL;
values[2] = NULL;
break;
}
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
}
SRF_RETURN_DONE(funcctx);
}
2008-11-03 18:51:13 +01:00
/*
* Return the type of the argument.
*/
Datum
pg_typeof(PG_FUNCTION_ARGS)
{
PG_RETURN_OID(get_fn_expr_argtype(fcinfo->flinfo, 0));
}
/*
* Implementation of the COLLATE FOR expression; returns the collation
* of the argument.
*/
Datum
pg_collation_for(PG_FUNCTION_ARGS)
{
Oid typeid;
Oid collid;
typeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
if (!typeid)
PG_RETURN_NULL();
if (!type_is_collatable(typeid) && typeid != UNKNOWNOID)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("collations are not supported by type %s",
format_type_be(typeid))));
collid = PG_GET_COLLATION();
if (!collid)
PG_RETURN_NULL();
PG_RETURN_TEXT_P(cstring_to_text(generate_collation_name(collid)));
}
/*
* pg_relation_is_updatable - determine which update events the specified
* relation supports.
*
* This relies on relation_is_updatable() in rewriteHandler.c, which see
* for additional information.
*/
Datum
pg_relation_is_updatable(PG_FUNCTION_ARGS)
{
Oid reloid = PG_GETARG_OID(0);
bool include_triggers = PG_GETARG_BOOL(1);
PG_RETURN_INT32(relation_is_updatable(reloid, NIL, include_triggers, NULL));
}
/*
* pg_column_is_updatable - determine whether a column is updatable
*
* This function encapsulates the decision about just what
* information_schema.columns.is_updatable actually means. It's not clear
* whether deletability of the column's relation should be required, so
* we want that decision in C code where we could change it without initdb.
*/
Datum
pg_column_is_updatable(PG_FUNCTION_ARGS)
{
Oid reloid = PG_GETARG_OID(0);
AttrNumber attnum = PG_GETARG_INT16(1);
AttrNumber col = attnum - FirstLowInvalidHeapAttributeNumber;
bool include_triggers = PG_GETARG_BOOL(2);
int events;
/* System columns are never updatable */
if (attnum <= 0)
PG_RETURN_BOOL(false);
events = relation_is_updatable(reloid, NIL, include_triggers,
bms_make_singleton(col));
/* We require both updatability and deletability of the relation */
#define REQ_EVENTS ((1 << CMD_UPDATE) | (1 << CMD_DELETE))
PG_RETURN_BOOL((events & REQ_EVENTS) == REQ_EVENTS);
}
/*
* Is character a valid identifier start?
* Must match scan.l's {ident_start} character class.
*/
static bool
is_ident_start(unsigned char c)
{
/* Underscores and ASCII letters are OK */
if (c == '_')
return true;
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
return true;
/* Any high-bit-set character is OK (might be part of a multibyte char) */
if (IS_HIGHBIT_SET(c))
return true;
return false;
}
/*
* Is character a valid identifier continuation?
* Must match scan.l's {ident_cont} character class.
*/
static bool
is_ident_cont(unsigned char c)
{
/* Can be digit or dollar sign ... */
if ((c >= '0' && c <= '9') || c == '$')
return true;
/* ... or an identifier start character */
return is_ident_start(c);
}
/*
* parse_ident - parse a SQL qualified identifier into separate identifiers.
* When strict mode is active (second parameter), then any chars after
* the last identifier are disallowed.
*/
Datum
parse_ident(PG_FUNCTION_ARGS)
{
text *qualname = PG_GETARG_TEXT_PP(0);
bool strict = PG_GETARG_BOOL(1);
char *qualname_str = text_to_cstring(qualname);
ArrayBuildState *astate = NULL;
char *nextp;
bool after_dot = false;
/*
* The code below scribbles on qualname_str in some cases, so we should
* reconvert qualname if we need to show the original string in error
* messages.
*/
nextp = qualname_str;
/* skip leading whitespace */
Tighten checks for whitespace in functions that parse identifiers etc. This patch replaces isspace() calls with scanner_isspace() in functions that are likely to be presented with non-ASCII input. isspace() has the small advantage that it will correctly recognize no-break space in single-byte encodings (such as LATIN1); but it cannot work successfully for any multibyte character, and depending on platform it might return false positive results for some fragments of multibyte characters. That's disastrous for functions that are trying to discard whitespace between valid strings, as noted in bug #14662 from Justin Muise. Even treating no-break space as whitespace is pretty questionable for the usages touched here, because the core scanner would think it is an identifier character. Affected functions are parse_ident(), parseNameAndArgTypes (underlying regprocedurein() and siblings), SplitIdentifierString (used for parsing GUCs and options that are qualified names or lists of names), and SplitDirectoriesString (used for parsing GUCs that are lists of directories). All the functions adjusted here are parsing SQL identifiers and similar constructs, so it's reasonable to insist that their definition of whitespace match the core scanner. So we can hope that this won't cause many backwards-compatibility problems. I've left alone isspace() calls in places that aren't really expecting any non-ASCII input characters, such as float8in(). Back-patch to all supported branches. Discussion: https://postgr.es/m/10129.1495302480@sss.pgh.pa.us
2017-05-24 21:28:34 +02:00
while (scanner_isspace(*nextp))
nextp++;
for (;;)
{
char *curname;
bool missing_ident = true;
if (*nextp == '"')
{
char *endp;
curname = nextp + 1;
for (;;)
{
endp = strchr(nextp + 1, '"');
if (endp == NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("string is not a valid identifier: \"%s\"",
text_to_cstring(qualname)),
errdetail("String has unclosed double quotes.")));
if (endp[1] != '"')
break;
memmove(endp, endp + 1, strlen(endp));
nextp = endp;
}
nextp = endp + 1;
*endp = '\0';
if (endp - curname == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("string is not a valid identifier: \"%s\"",
text_to_cstring(qualname)),
errdetail("Quoted identifier must not be empty.")));
astate = accumArrayResult(astate, CStringGetTextDatum(curname),
false, TEXTOID, CurrentMemoryContext);
missing_ident = false;
}
else if (is_ident_start((unsigned char) *nextp))
{
char *downname;
int len;
text *part;
curname = nextp++;
while (is_ident_cont((unsigned char) *nextp))
nextp++;
len = nextp - curname;
/*
* We don't implicitly truncate identifiers. This is useful for
* allowing the user to check for specific parts of the identifier
* being too long. It's easy enough for the user to get the
* truncated names by casting our output to name[].
*/
downname = downcase_identifier(curname, len, false, false);
part = cstring_to_text_with_len(downname, len);
astate = accumArrayResult(astate, PointerGetDatum(part), false,
TEXTOID, CurrentMemoryContext);
missing_ident = false;
}
if (missing_ident)
{
/* Different error messages based on where we failed. */
if (*nextp == '.')
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("string is not a valid identifier: \"%s\"",
text_to_cstring(qualname)),
2016-06-10 00:02:36 +02:00
errdetail("No valid identifier before \".\".")));
else if (after_dot)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("string is not a valid identifier: \"%s\"",
text_to_cstring(qualname)),
2016-06-10 00:02:36 +02:00
errdetail("No valid identifier after \".\".")));
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("string is not a valid identifier: \"%s\"",
text_to_cstring(qualname))));
}
Tighten checks for whitespace in functions that parse identifiers etc. This patch replaces isspace() calls with scanner_isspace() in functions that are likely to be presented with non-ASCII input. isspace() has the small advantage that it will correctly recognize no-break space in single-byte encodings (such as LATIN1); but it cannot work successfully for any multibyte character, and depending on platform it might return false positive results for some fragments of multibyte characters. That's disastrous for functions that are trying to discard whitespace between valid strings, as noted in bug #14662 from Justin Muise. Even treating no-break space as whitespace is pretty questionable for the usages touched here, because the core scanner would think it is an identifier character. Affected functions are parse_ident(), parseNameAndArgTypes (underlying regprocedurein() and siblings), SplitIdentifierString (used for parsing GUCs and options that are qualified names or lists of names), and SplitDirectoriesString (used for parsing GUCs that are lists of directories). All the functions adjusted here are parsing SQL identifiers and similar constructs, so it's reasonable to insist that their definition of whitespace match the core scanner. So we can hope that this won't cause many backwards-compatibility problems. I've left alone isspace() calls in places that aren't really expecting any non-ASCII input characters, such as float8in(). Back-patch to all supported branches. Discussion: https://postgr.es/m/10129.1495302480@sss.pgh.pa.us
2017-05-24 21:28:34 +02:00
while (scanner_isspace(*nextp))
nextp++;
if (*nextp == '.')
{
after_dot = true;
nextp++;
Tighten checks for whitespace in functions that parse identifiers etc. This patch replaces isspace() calls with scanner_isspace() in functions that are likely to be presented with non-ASCII input. isspace() has the small advantage that it will correctly recognize no-break space in single-byte encodings (such as LATIN1); but it cannot work successfully for any multibyte character, and depending on platform it might return false positive results for some fragments of multibyte characters. That's disastrous for functions that are trying to discard whitespace between valid strings, as noted in bug #14662 from Justin Muise. Even treating no-break space as whitespace is pretty questionable for the usages touched here, because the core scanner would think it is an identifier character. Affected functions are parse_ident(), parseNameAndArgTypes (underlying regprocedurein() and siblings), SplitIdentifierString (used for parsing GUCs and options that are qualified names or lists of names), and SplitDirectoriesString (used for parsing GUCs that are lists of directories). All the functions adjusted here are parsing SQL identifiers and similar constructs, so it's reasonable to insist that their definition of whitespace match the core scanner. So we can hope that this won't cause many backwards-compatibility problems. I've left alone isspace() calls in places that aren't really expecting any non-ASCII input characters, such as float8in(). Back-patch to all supported branches. Discussion: https://postgr.es/m/10129.1495302480@sss.pgh.pa.us
2017-05-24 21:28:34 +02:00
while (scanner_isspace(*nextp))
nextp++;
}
else if (*nextp == '\0')
{
break;
}
else
{
if (strict)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("string is not a valid identifier: \"%s\"",
text_to_cstring(qualname))));
break;
}
}
PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
}
/*
* pg_current_logfile
*
* Report current log file used by log collector by scanning current_logfiles.
*/
Datum
pg_current_logfile(PG_FUNCTION_ARGS)
{
FILE *fd;
char lbuffer[MAXPGPATH];
char *logfmt;
char *log_filepath;
char *log_format = lbuffer;
char *nlpos;
/* The log format parameter is optional */
if (PG_NARGS() == 0 || PG_ARGISNULL(0))
logfmt = NULL;
else
{
logfmt = text_to_cstring(PG_GETARG_TEXT_PP(0));
if (strcmp(logfmt, "stderr") != 0 && strcmp(logfmt, "csvlog") != 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("log format \"%s\" is not supported", logfmt),
errhint("The supported log formats are \"stderr\" and \"csvlog\".")));
}
fd = AllocateFile(LOG_METAINFO_DATAFILE, "r");
if (fd == NULL)
{
if (errno != ENOENT)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read file \"%s\": %m",
LOG_METAINFO_DATAFILE)));
PG_RETURN_NULL();
}
/*
* Read the file to gather current log filename(s) registered by the
* syslogger.
*/
while (fgets(lbuffer, sizeof(lbuffer), fd) != NULL)
{
/*
* Extract log format and log file path from the line; lbuffer ==
* log_format, they share storage.
*/
log_filepath = strchr(lbuffer, ' ');
if (log_filepath == NULL)
{
/* Uh oh. No space found, so file content is corrupted. */
elog(ERROR,
"missing space character in \"%s\"", LOG_METAINFO_DATAFILE);
break;
}
*log_filepath = '\0';
log_filepath++;
nlpos = strchr(log_filepath, '\n');
if (nlpos == NULL)
{
/* Uh oh. No newline found, so file content is corrupted. */
elog(ERROR,
"missing newline character in \"%s\"", LOG_METAINFO_DATAFILE);
break;
}
*nlpos = '\0';
if (logfmt == NULL || strcmp(logfmt, log_format) == 0)
{
FreeFile(fd);
PG_RETURN_TEXT_P(cstring_to_text(log_filepath));
}
}
/* Close the current log filename file. */
FreeFile(fd);
PG_RETURN_NULL();
}
/*
* Report current log file used by log collector (1 argument version)
*
* note: this wrapper is necessary to pass the sanity check in opr_sanity,
* which checks that all built-in functions that share the implementing C
* function take the same number of arguments
*/
Datum
pg_current_logfile_1arg(PG_FUNCTION_ARGS)
{
return pg_current_logfile(fcinfo);
}
/*
* SQL wrapper around RelationGetReplicaIndex().
*/
Datum
pg_get_replica_identity_index(PG_FUNCTION_ARGS)
{
Oid reloid = PG_GETARG_OID(0);
Oid idxoid;
Relation rel;
rel = table_open(reloid, AccessShareLock);
idxoid = RelationGetReplicaIndex(rel);
table_close(rel, AccessShareLock);
if (OidIsValid(idxoid))
PG_RETURN_OID(idxoid);
else
PG_RETURN_NULL();
}