postgresql/src/timezone/pgtz.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

498 lines
13 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* pgtz.c
* Timezone Library Integration Functions
*
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/timezone/pgtz.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <ctype.h>
#include <fcntl.h>
#include <time.h>
#include "common/file_utils.h"
#include "datatype/timestamp.h"
#include "miscadmin.h"
#include "pgtz.h"
#include "storage/fd.h"
#include "utils/hsearch.h"
/* Current session timezone (controlled by TimeZone GUC) */
pg_tz *session_timezone = NULL;
/* Current log timezone (controlled by log_timezone GUC) */
pg_tz *log_timezone = NULL;
static bool scan_directory_ci(const char *dirname,
const char *fname, int fnamelen,
char *canonname, int canonnamelen);
/*
* Return full pathname of timezone data directory
*/
static const char *
pg_TZDIR(void)
{
#ifndef SYSTEMTZDIR
/* normal case: timezone stuff is under our share dir */
static bool done_tzdir = false;
static char tzdir[MAXPGPATH];
if (done_tzdir)
return tzdir;
get_share_path(my_exec_path, tzdir);
strlcpy(tzdir + strlen(tzdir), "/timezone", MAXPGPATH - strlen(tzdir));
done_tzdir = true;
return tzdir;
#else
/* we're configured to use system's timezone database */
return SYSTEMTZDIR;
#endif
}
/*
* Given a timezone name, open() the timezone data file. Return the
* file descriptor if successful, -1 if not.
*
* The input name is searched for case-insensitively (we assume that the
* timezone database does not contain case-equivalent names).
*
* If "canonname" is not NULL, then on success the canonical spelling of the
* given name is stored there (the buffer must be > TZ_STRLEN_MAX bytes!).
*/
int
pg_open_tzfile(const char *name, char *canonname)
{
const char *fname;
char fullname[MAXPGPATH];
int fullnamelen;
int orignamelen;
Improve performance of timezone loading, especially pg_timezone_names view. tzparse() would attempt to load the "posixrules" timezone database file on each call. That might seem like it would only be an issue when selecting a POSIX-style zone name rather than a zone defined in the timezone database, but it turns out that each zone definition file contains a POSIX-style zone string and tzload() will call tzparse() to parse that. Thus, when scanning the whole timezone file tree as we do in the pg_timezone_names view, "posixrules" was read repetitively for each zone definition file. Fix that by caching the file on first use within any given process. (We cache other zone definitions for the life of the process, so there seems little reason not to cache this one as well.) This probably won't help much in processes that never run pg_timezone_names, but even one additional SET of the timezone GUC would come out ahead. An even worse problem for pg_timezone_names is that pg_open_tzfile() has an inefficient way of identifying the canonical case of a zone name: it basically re-descends the directory tree to the zone file. That's not awful for an individual "SET timezone" operation, but it's pretty horrid when we're inspecting every zone in the database. And it's pointless too because we already know the canonical spelling, having just read it from the filesystem. Fix by teaching pg_open_tzfile() to avoid the directory search if it's not asked for the canonical name, and backfilling the proper result in pg_tzenumerate_next(). In combination these changes seem to make the pg_timezone_names view about 3x faster to read, for me. Since a scan of pg_timezone_names has up to now been one of the slowest queries in the regression tests, this should help some little bit for buildfarm cycle times. Back-patch to all supported branches, not so much because it's likely that users will care much about the view's performance as because tracking changes in the upstream IANA timezone code is really painful if we don't keep all the branches in sync. Discussion: https://postgr.es/m/27962.1493671706@sss.pgh.pa.us
2017-05-03 03:50:35 +02:00
/* Initialize fullname with base name of tzdata directory */
strlcpy(fullname, pg_TZDIR(), sizeof(fullname));
orignamelen = fullnamelen = strlen(fullname);
if (fullnamelen + 1 + strlen(name) >= MAXPGPATH)
return -1; /* not gonna fit */
/*
* If the caller doesn't need the canonical spelling, first just try to
* open the name as-is. This can be expected to succeed if the given name
* is already case-correct, or if the filesystem is case-insensitive; and
* we don't need to distinguish those situations if we aren't tasked with
* reporting the canonical spelling.
*/
if (canonname == NULL)
{
int result;
fullname[fullnamelen] = '/';
/* test above ensured this will fit: */
strcpy(fullname + fullnamelen + 1, name);
result = open(fullname, O_RDONLY | PG_BINARY, 0);
if (result >= 0)
return result;
/* If that didn't work, fall through to do it the hard way */
fullname[fullnamelen] = '\0';
Improve performance of timezone loading, especially pg_timezone_names view. tzparse() would attempt to load the "posixrules" timezone database file on each call. That might seem like it would only be an issue when selecting a POSIX-style zone name rather than a zone defined in the timezone database, but it turns out that each zone definition file contains a POSIX-style zone string and tzload() will call tzparse() to parse that. Thus, when scanning the whole timezone file tree as we do in the pg_timezone_names view, "posixrules" was read repetitively for each zone definition file. Fix that by caching the file on first use within any given process. (We cache other zone definitions for the life of the process, so there seems little reason not to cache this one as well.) This probably won't help much in processes that never run pg_timezone_names, but even one additional SET of the timezone GUC would come out ahead. An even worse problem for pg_timezone_names is that pg_open_tzfile() has an inefficient way of identifying the canonical case of a zone name: it basically re-descends the directory tree to the zone file. That's not awful for an individual "SET timezone" operation, but it's pretty horrid when we're inspecting every zone in the database. And it's pointless too because we already know the canonical spelling, having just read it from the filesystem. Fix by teaching pg_open_tzfile() to avoid the directory search if it's not asked for the canonical name, and backfilling the proper result in pg_tzenumerate_next(). In combination these changes seem to make the pg_timezone_names view about 3x faster to read, for me. Since a scan of pg_timezone_names has up to now been one of the slowest queries in the regression tests, this should help some little bit for buildfarm cycle times. Back-patch to all supported branches, not so much because it's likely that users will care much about the view's performance as because tracking changes in the upstream IANA timezone code is really painful if we don't keep all the branches in sync. Discussion: https://postgr.es/m/27962.1493671706@sss.pgh.pa.us
2017-05-03 03:50:35 +02:00
}
/*
* Loop to split the given name into directory levels; for each level,
* search using scan_directory_ci().
*/
fname = name;
for (;;)
{
const char *slashptr;
int fnamelen;
slashptr = strchr(fname, '/');
if (slashptr)
fnamelen = slashptr - fname;
else
fnamelen = strlen(fname);
if (!scan_directory_ci(fullname, fname, fnamelen,
fullname + fullnamelen + 1,
MAXPGPATH - fullnamelen - 1))
return -1;
fullname[fullnamelen++] = '/';
fullnamelen += strlen(fullname + fullnamelen);
if (slashptr)
fname = slashptr + 1;
else
break;
}
if (canonname)
strlcpy(canonname, fullname + orignamelen + 1, TZ_STRLEN_MAX + 1);
return open(fullname, O_RDONLY | PG_BINARY, 0);
}
/*
* Scan specified directory for a case-insensitive match to fname
* (of length fnamelen --- fname may not be null terminated!). If found,
* copy the actual filename into canonname and return true.
*/
static bool
scan_directory_ci(const char *dirname, const char *fname, int fnamelen,
char *canonname, int canonnamelen)
{
bool found = false;
DIR *dirdesc;
struct dirent *direntry;
dirdesc = AllocateDir(dirname);
Clean up assorted messiness around AllocateDir() usage. This patch fixes a couple of low-probability bugs that could lead to reporting an irrelevant errno value (and hence possibly a wrong SQLSTATE) concerning directory-open or file-open failures. It also fixes places where we took shortcuts in reporting such errors, either by using elog instead of ereport or by using ereport but forgetting to specify an errcode. And it eliminates a lot of just plain redundant error-handling code. In service of all this, export fd.c's formerly-static function ReadDirExtended, so that external callers can make use of the coding pattern dir = AllocateDir(path); while ((de = ReadDirExtended(dir, path, LOG)) != NULL) if they'd like to treat directory-open failures as mere LOG conditions rather than errors. Also fix FreeDir to be a no-op if we reach it with dir == NULL, as such a coding pattern would cause. Then, remove code at many call sites that was throwing an error or log message for AllocateDir failure, as ReadDir or ReadDirExtended can handle that job just fine. Aside from being a net code savings, this gets rid of a lot of not-quite-up-to-snuff reports, as mentioned above. (In some places these changes result in replacing a custom error message such as "could not open tablespace directory" with more generic wording "could not open directory", but it was agreed that the custom wording buys little as long as we report the directory name.) In some other call sites where we can't just remove code, change the error reports to be fully project-style-compliant. Also reorder code in restoreTwoPhaseData that was acquiring a lock between AllocateDir and ReadDir; in the unlikely but surely not impossible case that LWLockAcquire changes errno, AllocateDir failures would be misreported. There is no great value in opening the directory before acquiring TwoPhaseStateLock, so just do it in the other order. Also fix CheckXLogRemoved to guarantee that it preserves errno, as quite a number of call sites are implicitly assuming. (Again, it's unlikely but I think not impossible that errno could change during a SpinLockAcquire. If so, this function was broken for its own purposes as well as breaking callers.) And change a few places that were using not-per-project-style messages, such as "could not read directory" when "could not open directory" is more correct. Back-patch the exporting of ReadDirExtended, in case we have occasion to back-patch some fix that makes use of it; it's not needed right now but surely making it global is pretty harmless. Also back-patch the restoreTwoPhaseData and CheckXLogRemoved fixes. The rest of this is essentially cosmetic and need not get back-patched. Michael Paquier, with a bit of additional work by me Discussion: https://postgr.es/m/CAB7nPqRpOCxjiirHmebEFhXVTK7V5Jvw4bz82p7Oimtsm3TyZA@mail.gmail.com
2017-12-04 23:02:52 +01:00
while ((direntry = ReadDirExtended(dirdesc, dirname, LOG)) != NULL)
{
/*
* Ignore . and .., plus any other "hidden" files. This is a security
* measure to prevent access to files outside the timezone directory.
*/
if (direntry->d_name[0] == '.')
continue;
if (strlen(direntry->d_name) == fnamelen &&
pg_strncasecmp(direntry->d_name, fname, fnamelen) == 0)
{
/* Found our match */
strlcpy(canonname, direntry->d_name, canonnamelen);
found = true;
break;
}
}
FreeDir(dirdesc);
return found;
}
/*
* We keep loaded timezones in a hashtable so we don't have to
* load and parse the TZ definition file every time one is selected.
* Because we want timezone names to be found case-insensitively,
* the hash key is the uppercased name of the zone.
*/
typedef struct
{
/* tznameupper contains the all-upper-case name of the timezone */
char tznameupper[TZ_STRLEN_MAX + 1];
pg_tz tz;
} pg_tz_cache;
static HTAB *timezone_cache = NULL;
static bool
init_timezone_hashtable(void)
{
HASHCTL hash_ctl;
hash_ctl.keysize = TZ_STRLEN_MAX + 1;
hash_ctl.entrysize = sizeof(pg_tz_cache);
timezone_cache = hash_create("Timezones",
4,
&hash_ctl,
Improve hash_create()'s API for some added robustness. Invent a new flag bit HASH_STRINGS to specify C-string hashing, which was formerly the default; and add assertions insisting that exactly one of the bits HASH_STRINGS, HASH_BLOBS, and HASH_FUNCTION be set. This is in hopes of preventing recurrences of the type of oversight fixed in commit a1b8aa1e4 (i.e., mistakenly omitting HASH_BLOBS). Also, when HASH_STRINGS is specified, insist that the keysize be more than 8 bytes. This is a heuristic, but it should catch accidental use of HASH_STRINGS for integer or pointer keys. (Nearly all existing use-cases set the keysize to NAMEDATALEN or more, so there's little reason to think this restriction should be problematic.) Tweak hash_create() to insist that the HASH_ELEM flag be set, and remove the defaults it had for keysize and entrysize. Since those defaults were undocumented and basically useless, no callers omitted HASH_ELEM anyway. Also, remove memset's zeroing the HASHCTL parameter struct from those callers that had one. This has never been really necessary, and while it wasn't a bad coding convention it was confusing that some callers did it and some did not. We might as well save a few cycles by standardizing on "not". Also improve the documentation for hash_create(). In passing, improve reinit.c's usage of a hash table by storing the key as a binary Oid rather than a string; and, since that's a temporary hash table, allocate it in CurrentMemoryContext for neatness. Discussion: https://postgr.es/m/590625.1607878171@sss.pgh.pa.us
2020-12-15 17:38:53 +01:00
HASH_ELEM | HASH_STRINGS);
if (!timezone_cache)
return false;
return true;
}
/*
* Load a timezone from file or from cache.
* Does not verify that the timezone is acceptable!
*
* "GMT" is always interpreted as the tzparse() definition, without attempting
* to load a definition from the filesystem. This has a number of benefits:
* 1. It's guaranteed to succeed, so we don't have the failure mode wherein
* the bootstrap default timezone setting doesn't work (as could happen if
* the OS attempts to supply a leap-second-aware version of "GMT").
* 2. Because we aren't accessing the filesystem, we can safely initialize
* the "GMT" zone definition before my_exec_path is known.
* 3. It's quick enough that we don't waste much time when the bootstrap
* default timezone setting is later overridden from postgresql.conf.
*/
pg_tz *
pg_tzset(const char *tzname)
{
pg_tz_cache *tzp;
struct state tzstate;
char uppername[TZ_STRLEN_MAX + 1];
char canonname[TZ_STRLEN_MAX + 1];
char *p;
2005-10-15 04:49:52 +02:00
if (strlen(tzname) > TZ_STRLEN_MAX)
return NULL; /* not going to fit */
if (!timezone_cache)
if (!init_timezone_hashtable())
return NULL;
/*
* Upcase the given name to perform a case-insensitive hashtable search.
* (We could alternatively downcase it, but we prefer upcase so that we
* can get consistently upcased results from tzparse() in case the name is
* a POSIX-style timezone spec.)
*/
p = uppername;
while (*tzname)
*p++ = pg_toupper((unsigned char) *tzname++);
*p = '\0';
tzp = (pg_tz_cache *) hash_search(timezone_cache,
uppername,
HASH_FIND,
NULL);
if (tzp)
{
/* Timezone found in cache, nothing more to do */
return &tzp->tz;
}
/*
* "GMT" is always sent to tzparse(), as per discussion above.
*/
if (strcmp(uppername, "GMT") == 0)
{
if (!tzparse(uppername, &tzstate, true))
{
/* This really, really should not happen ... */
elog(ERROR, "could not initialize GMT time zone");
}
/* Use uppercase name as canonical */
strcpy(canonname, uppername);
}
else if (tzload(uppername, canonname, &tzstate, true) != 0)
{
if (uppername[0] == ':' || !tzparse(uppername, &tzstate, false))
{
/* Unknown timezone. Fail our call instead of loading GMT! */
return NULL;
}
/* For POSIX timezone specs, use uppercase name as canonical */
strcpy(canonname, uppername);
}
/* Save timezone in the cache */
tzp = (pg_tz_cache *) hash_search(timezone_cache,
uppername,
HASH_ENTER,
NULL);
2005-10-15 04:49:52 +02:00
/* hash_search already copied uppername into the hash key */
strcpy(tzp->tz.TZname, canonname);
memcpy(&tzp->tz.state, &tzstate, sizeof(tzstate));
return &tzp->tz;
}
Fix some odd behaviors when using a SQL-style simple GMT offset timezone. Formerly, when using a SQL-spec timezone setting with a fixed GMT offset (called a "brute force" timezone in the code), the session_timezone variable was not updated to match the nominal timezone; rather, all code was expected to ignore session_timezone if HasCTZSet was true. This is of course obviously fragile, though a search of the code finds only timeofday() failing to honor the rule. A bigger problem was that DetermineTimeZoneOffset() supposed that if its pg_tz parameter was pointer-equal to session_timezone, then HasCTZSet should override the parameter. This would cause datetime input containing an explicit zone name to be treated as referencing the brute-force zone instead, if the zone name happened to match the session timezone that had prevailed before installing the brute-force zone setting (as reported in bug #8572). The same malady could affect AT TIME ZONE operators. To fix, set up session_timezone so that it matches the brute-force zone specification, which we can do using the POSIX timezone definition syntax "<abbrev>offset", and get rid of the bogus lookaside check in DetermineTimeZoneOffset(). Aside from fixing the erroneous behavior in datetime parsing and AT TIME ZONE, this will cause the timeofday() function to print its result in the user-requested time zone rather than some previously-set zone. It might also affect results in third-party extensions, if there are any that make use of session_timezone without considering HasCTZSet, but in all cases the new behavior should be saner than before. Back-patch to all supported branches.
2013-11-01 17:13:18 +01:00
/*
* Load a fixed-GMT-offset timezone.
* This is used for SQL-spec SET TIME ZONE INTERVAL 'foo' cases.
* It's otherwise equivalent to pg_tzset().
*
* The GMT offset is specified in seconds, positive values meaning west of
* Greenwich (ie, POSIX not ISO sign convention). However, we use ISO
* sign convention in the displayable abbreviation for the zone.
*
* Caution: this can fail (return NULL) if the specified offset is outside
* the range allowed by the zic library.
Fix some odd behaviors when using a SQL-style simple GMT offset timezone. Formerly, when using a SQL-spec timezone setting with a fixed GMT offset (called a "brute force" timezone in the code), the session_timezone variable was not updated to match the nominal timezone; rather, all code was expected to ignore session_timezone if HasCTZSet was true. This is of course obviously fragile, though a search of the code finds only timeofday() failing to honor the rule. A bigger problem was that DetermineTimeZoneOffset() supposed that if its pg_tz parameter was pointer-equal to session_timezone, then HasCTZSet should override the parameter. This would cause datetime input containing an explicit zone name to be treated as referencing the brute-force zone instead, if the zone name happened to match the session timezone that had prevailed before installing the brute-force zone setting (as reported in bug #8572). The same malady could affect AT TIME ZONE operators. To fix, set up session_timezone so that it matches the brute-force zone specification, which we can do using the POSIX timezone definition syntax "<abbrev>offset", and get rid of the bogus lookaside check in DetermineTimeZoneOffset(). Aside from fixing the erroneous behavior in datetime parsing and AT TIME ZONE, this will cause the timeofday() function to print its result in the user-requested time zone rather than some previously-set zone. It might also affect results in third-party extensions, if there are any that make use of session_timezone without considering HasCTZSet, but in all cases the new behavior should be saner than before. Back-patch to all supported branches.
2013-11-01 17:13:18 +01:00
*/
pg_tz *
pg_tzset_offset(long gmtoffset)
{
long absoffset = (gmtoffset < 0) ? -gmtoffset : gmtoffset;
char offsetstr[64];
char tzname[128];
snprintf(offsetstr, sizeof(offsetstr),
"%02ld", absoffset / SECS_PER_HOUR);
absoffset %= SECS_PER_HOUR;
Fix some odd behaviors when using a SQL-style simple GMT offset timezone. Formerly, when using a SQL-spec timezone setting with a fixed GMT offset (called a "brute force" timezone in the code), the session_timezone variable was not updated to match the nominal timezone; rather, all code was expected to ignore session_timezone if HasCTZSet was true. This is of course obviously fragile, though a search of the code finds only timeofday() failing to honor the rule. A bigger problem was that DetermineTimeZoneOffset() supposed that if its pg_tz parameter was pointer-equal to session_timezone, then HasCTZSet should override the parameter. This would cause datetime input containing an explicit zone name to be treated as referencing the brute-force zone instead, if the zone name happened to match the session timezone that had prevailed before installing the brute-force zone setting (as reported in bug #8572). The same malady could affect AT TIME ZONE operators. To fix, set up session_timezone so that it matches the brute-force zone specification, which we can do using the POSIX timezone definition syntax "<abbrev>offset", and get rid of the bogus lookaside check in DetermineTimeZoneOffset(). Aside from fixing the erroneous behavior in datetime parsing and AT TIME ZONE, this will cause the timeofday() function to print its result in the user-requested time zone rather than some previously-set zone. It might also affect results in third-party extensions, if there are any that make use of session_timezone without considering HasCTZSet, but in all cases the new behavior should be saner than before. Back-patch to all supported branches.
2013-11-01 17:13:18 +01:00
if (absoffset != 0)
{
snprintf(offsetstr + strlen(offsetstr),
sizeof(offsetstr) - strlen(offsetstr),
":%02ld", absoffset / SECS_PER_MINUTE);
absoffset %= SECS_PER_MINUTE;
Fix some odd behaviors when using a SQL-style simple GMT offset timezone. Formerly, when using a SQL-spec timezone setting with a fixed GMT offset (called a "brute force" timezone in the code), the session_timezone variable was not updated to match the nominal timezone; rather, all code was expected to ignore session_timezone if HasCTZSet was true. This is of course obviously fragile, though a search of the code finds only timeofday() failing to honor the rule. A bigger problem was that DetermineTimeZoneOffset() supposed that if its pg_tz parameter was pointer-equal to session_timezone, then HasCTZSet should override the parameter. This would cause datetime input containing an explicit zone name to be treated as referencing the brute-force zone instead, if the zone name happened to match the session timezone that had prevailed before installing the brute-force zone setting (as reported in bug #8572). The same malady could affect AT TIME ZONE operators. To fix, set up session_timezone so that it matches the brute-force zone specification, which we can do using the POSIX timezone definition syntax "<abbrev>offset", and get rid of the bogus lookaside check in DetermineTimeZoneOffset(). Aside from fixing the erroneous behavior in datetime parsing and AT TIME ZONE, this will cause the timeofday() function to print its result in the user-requested time zone rather than some previously-set zone. It might also affect results in third-party extensions, if there are any that make use of session_timezone without considering HasCTZSet, but in all cases the new behavior should be saner than before. Back-patch to all supported branches.
2013-11-01 17:13:18 +01:00
if (absoffset != 0)
snprintf(offsetstr + strlen(offsetstr),
sizeof(offsetstr) - strlen(offsetstr),
":%02ld", absoffset);
}
if (gmtoffset > 0)
snprintf(tzname, sizeof(tzname), "<-%s>+%s",
offsetstr, offsetstr);
else
snprintf(tzname, sizeof(tzname), "<+%s>-%s",
offsetstr, offsetstr);
return pg_tzset(tzname);
}
/*
* Initialize timezone library
*
* This is called before GUC variable initialization begins. Its purpose
* is to ensure that log_timezone has a valid value before any logging GUC
* variables could become set to values that require elog.c to provide
* timestamps (e.g., log_line_prefix). We may as well initialize
* session_timezone to something valid, too.
*/
void
pg_timezone_initialize(void)
{
Split PGC_S_DEFAULT into two values, for true boot_val vs computed default. Failure to distinguish these cases is the real cause behind the recent reports of Windows builds crashing on 'infinity'::timestamp, which was directly due to failure to establish a value of timezone_abbreviations in postmaster child processes. The postmaster had the desired value, but write_one_nondefault_variable() didn't transmit it to backends. To fix that, invent a new value PGC_S_DYNAMIC_DEFAULT, and be sure to use that or PGC_S_ENV_VAR (as appropriate) for "default" settings that are computed during initialization. (We need both because there's at least one variable that could receive a value from either source.) This commit also fixes ProcessConfigFile's failure to restore the correct default value for certain GUC variables if they are set in postgresql.conf and then removed/commented out of the file. We have to recompute and reinstall the value for any GUC variable that could have received a value from PGC_S_DYNAMIC_DEFAULT or PGC_S_ENV_VAR sources, and there were a number of oversights. (That whole thing is a crock that needs to be redesigned, but not today.) However, I intentionally didn't make it work "exactly right" for the cases of timezone and log_timezone. The exactly right behavior would involve running select_default_timezone, which we'd have to do independently in each postgres process, causing the whole database to become entirely unresponsive for as much as several seconds. That didn't seem like a good idea, especially since the variable's removal from postgresql.conf might be just an accidental edit. Instead the behavior is to adopt the previously active setting as if it were default. Note that this patch creates an ABI break for extensions that use any of the PGC_S_XXX constants; they'll need to be recompiled.
2011-05-12 01:57:38 +02:00
/*
* We may not yet know where PGSHAREDIR is (in particular this is true in
* an EXEC_BACKEND subprocess). So use "GMT", which pg_tzset forces to be
* interpreted without reference to the filesystem. This corresponds to
* the bootstrap default for these variables in guc_tables.c, although in
* principle it could be different.
Split PGC_S_DEFAULT into two values, for true boot_val vs computed default. Failure to distinguish these cases is the real cause behind the recent reports of Windows builds crashing on 'infinity'::timestamp, which was directly due to failure to establish a value of timezone_abbreviations in postmaster child processes. The postmaster had the desired value, but write_one_nondefault_variable() didn't transmit it to backends. To fix that, invent a new value PGC_S_DYNAMIC_DEFAULT, and be sure to use that or PGC_S_ENV_VAR (as appropriate) for "default" settings that are computed during initialization. (We need both because there's at least one variable that could receive a value from either source.) This commit also fixes ProcessConfigFile's failure to restore the correct default value for certain GUC variables if they are set in postgresql.conf and then removed/commented out of the file. We have to recompute and reinstall the value for any GUC variable that could have received a value from PGC_S_DYNAMIC_DEFAULT or PGC_S_ENV_VAR sources, and there were a number of oversights. (That whole thing is a crock that needs to be redesigned, but not today.) However, I intentionally didn't make it work "exactly right" for the cases of timezone and log_timezone. The exactly right behavior would involve running select_default_timezone, which we'd have to do independently in each postgres process, causing the whole database to become entirely unresponsive for as much as several seconds. That didn't seem like a good idea, especially since the variable's removal from postgresql.conf might be just an accidental edit. Instead the behavior is to adopt the previously active setting as if it were default. Note that this patch creates an ABI break for extensions that use any of the PGC_S_XXX constants; they'll need to be recompiled.
2011-05-12 01:57:38 +02:00
*/
session_timezone = pg_tzset("GMT");
log_timezone = session_timezone;
}
/*
* Functions to enumerate available timezones
*
* Note that pg_tzenumerate_next() will return a pointer into the pg_tzenum
* structure, so the data is only valid up to the next call.
*
* All data is allocated using palloc in the current context.
*/
#define MAX_TZDIR_DEPTH 10
struct pg_tzenum
{
int baselen;
int depth;
DIR *dirdesc[MAX_TZDIR_DEPTH];
char *dirname[MAX_TZDIR_DEPTH];
struct pg_tz tz;
};
2006-10-04 02:30:14 +02:00
/* typedef pg_tzenum is declared in pgtime.h */
pg_tzenum *
pg_tzenumerate_start(void)
{
pg_tzenum *ret = (pg_tzenum *) palloc0(sizeof(pg_tzenum));
char *startdir = pstrdup(pg_TZDIR());
ret->baselen = strlen(startdir) + 1;
ret->depth = 0;
ret->dirname[0] = startdir;
ret->dirdesc[0] = AllocateDir(startdir);
if (!ret->dirdesc[0])
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m", startdir)));
return ret;
}
void
pg_tzenumerate_end(pg_tzenum *dir)
{
while (dir->depth >= 0)
{
FreeDir(dir->dirdesc[dir->depth]);
pfree(dir->dirname[dir->depth]);
dir->depth--;
}
pfree(dir);
}
pg_tz *
pg_tzenumerate_next(pg_tzenum *dir)
{
while (dir->depth >= 0)
{
struct dirent *direntry;
char fullname[MAXPGPATH * 2];
direntry = ReadDir(dir->dirdesc[dir->depth], dir->dirname[dir->depth]);
if (!direntry)
{
/* End of this directory */
FreeDir(dir->dirdesc[dir->depth]);
pfree(dir->dirname[dir->depth]);
dir->depth--;
continue;
}
if (direntry->d_name[0] == '.')
continue;
snprintf(fullname, sizeof(fullname), "%s/%s",
dir->dirname[dir->depth], direntry->d_name);
if (get_dirent_type(fullname, direntry, true, ERROR) == PGFILETYPE_DIR)
{
/* Step into the subdirectory */
if (dir->depth >= MAX_TZDIR_DEPTH - 1)
ereport(ERROR,
(errmsg_internal("timezone directory stack overflow")));
dir->depth++;
dir->dirname[dir->depth] = pstrdup(fullname);
dir->dirdesc[dir->depth] = AllocateDir(fullname);
if (!dir->dirdesc[dir->depth])
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m",
fullname)));
/* Start over reading in the new directory */
continue;
}
/*
* Load this timezone using tzload() not pg_tzset(), so we don't fill
Improve performance of timezone loading, especially pg_timezone_names view. tzparse() would attempt to load the "posixrules" timezone database file on each call. That might seem like it would only be an issue when selecting a POSIX-style zone name rather than a zone defined in the timezone database, but it turns out that each zone definition file contains a POSIX-style zone string and tzload() will call tzparse() to parse that. Thus, when scanning the whole timezone file tree as we do in the pg_timezone_names view, "posixrules" was read repetitively for each zone definition file. Fix that by caching the file on first use within any given process. (We cache other zone definitions for the life of the process, so there seems little reason not to cache this one as well.) This probably won't help much in processes that never run pg_timezone_names, but even one additional SET of the timezone GUC would come out ahead. An even worse problem for pg_timezone_names is that pg_open_tzfile() has an inefficient way of identifying the canonical case of a zone name: it basically re-descends the directory tree to the zone file. That's not awful for an individual "SET timezone" operation, but it's pretty horrid when we're inspecting every zone in the database. And it's pointless too because we already know the canonical spelling, having just read it from the filesystem. Fix by teaching pg_open_tzfile() to avoid the directory search if it's not asked for the canonical name, and backfilling the proper result in pg_tzenumerate_next(). In combination these changes seem to make the pg_timezone_names view about 3x faster to read, for me. Since a scan of pg_timezone_names has up to now been one of the slowest queries in the regression tests, this should help some little bit for buildfarm cycle times. Back-patch to all supported branches, not so much because it's likely that users will care much about the view's performance as because tracking changes in the upstream IANA timezone code is really painful if we don't keep all the branches in sync. Discussion: https://postgr.es/m/27962.1493671706@sss.pgh.pa.us
2017-05-03 03:50:35 +02:00
* the cache. Also, don't ask for the canonical spelling: we already
* know it, and pg_open_tzfile's way of finding it out is pretty
* inefficient.
*/
Improve performance of timezone loading, especially pg_timezone_names view. tzparse() would attempt to load the "posixrules" timezone database file on each call. That might seem like it would only be an issue when selecting a POSIX-style zone name rather than a zone defined in the timezone database, but it turns out that each zone definition file contains a POSIX-style zone string and tzload() will call tzparse() to parse that. Thus, when scanning the whole timezone file tree as we do in the pg_timezone_names view, "posixrules" was read repetitively for each zone definition file. Fix that by caching the file on first use within any given process. (We cache other zone definitions for the life of the process, so there seems little reason not to cache this one as well.) This probably won't help much in processes that never run pg_timezone_names, but even one additional SET of the timezone GUC would come out ahead. An even worse problem for pg_timezone_names is that pg_open_tzfile() has an inefficient way of identifying the canonical case of a zone name: it basically re-descends the directory tree to the zone file. That's not awful for an individual "SET timezone" operation, but it's pretty horrid when we're inspecting every zone in the database. And it's pointless too because we already know the canonical spelling, having just read it from the filesystem. Fix by teaching pg_open_tzfile() to avoid the directory search if it's not asked for the canonical name, and backfilling the proper result in pg_tzenumerate_next(). In combination these changes seem to make the pg_timezone_names view about 3x faster to read, for me. Since a scan of pg_timezone_names has up to now been one of the slowest queries in the regression tests, this should help some little bit for buildfarm cycle times. Back-patch to all supported branches, not so much because it's likely that users will care much about the view's performance as because tracking changes in the upstream IANA timezone code is really painful if we don't keep all the branches in sync. Discussion: https://postgr.es/m/27962.1493671706@sss.pgh.pa.us
2017-05-03 03:50:35 +02:00
if (tzload(fullname + dir->baselen, NULL, &dir->tz.state, true) != 0)
{
/* Zone could not be loaded, ignore it */
continue;
}
if (!pg_tz_acceptable(&dir->tz))
{
/* Ignore leap-second zones */
continue;
}
Improve performance of timezone loading, especially pg_timezone_names view. tzparse() would attempt to load the "posixrules" timezone database file on each call. That might seem like it would only be an issue when selecting a POSIX-style zone name rather than a zone defined in the timezone database, but it turns out that each zone definition file contains a POSIX-style zone string and tzload() will call tzparse() to parse that. Thus, when scanning the whole timezone file tree as we do in the pg_timezone_names view, "posixrules" was read repetitively for each zone definition file. Fix that by caching the file on first use within any given process. (We cache other zone definitions for the life of the process, so there seems little reason not to cache this one as well.) This probably won't help much in processes that never run pg_timezone_names, but even one additional SET of the timezone GUC would come out ahead. An even worse problem for pg_timezone_names is that pg_open_tzfile() has an inefficient way of identifying the canonical case of a zone name: it basically re-descends the directory tree to the zone file. That's not awful for an individual "SET timezone" operation, but it's pretty horrid when we're inspecting every zone in the database. And it's pointless too because we already know the canonical spelling, having just read it from the filesystem. Fix by teaching pg_open_tzfile() to avoid the directory search if it's not asked for the canonical name, and backfilling the proper result in pg_tzenumerate_next(). In combination these changes seem to make the pg_timezone_names view about 3x faster to read, for me. Since a scan of pg_timezone_names has up to now been one of the slowest queries in the regression tests, this should help some little bit for buildfarm cycle times. Back-patch to all supported branches, not so much because it's likely that users will care much about the view's performance as because tracking changes in the upstream IANA timezone code is really painful if we don't keep all the branches in sync. Discussion: https://postgr.es/m/27962.1493671706@sss.pgh.pa.us
2017-05-03 03:50:35 +02:00
/* OK, return the canonical zone name spelling. */
strlcpy(dir->tz.TZname, fullname + dir->baselen,
sizeof(dir->tz.TZname));
/* Timezone loaded OK. */
return &dir->tz;
}
/* Nothing more found */
return NULL;
}