postgresql/src/backend/utils/misc/tzparser.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

485 lines
12 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* tzparser.c
* Functions for parsing timezone offset files
*
* Note: this code is invoked from the check_hook for the GUC variable
* timezone_abbreviations. Therefore, it should report problems using
* GUC_check_errmsg() and related functions, and try to avoid throwing
* elog(ERROR). This is not completely bulletproof at present --- in
* particular out-of-memory will throw an error. Could probably fix with
* PG_TRY if necessary.
*
*
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/backend/utils/misc/tzparser.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <ctype.h>
#include "miscadmin.h"
#include "storage/fd.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/tzparser.h"
#define WHITESPACE " \t\n\r"
static bool validateTzEntry(tzEntry *tzentry);
static bool splitTzLine(const char *filename, int lineno,
char *line, tzEntry *tzentry);
static int addToArray(tzEntry **base, int *arraysize, int n,
tzEntry *entry, bool override);
static int ParseTzFile(const char *filename, int depth,
tzEntry **base, int *arraysize, int n);
/*
* Apply additional validation checks to a tzEntry
*
* Returns true if OK, else false
*/
static bool
validateTzEntry(tzEntry *tzentry)
{
unsigned char *p;
/*
* Check restrictions imposed by datetktbl storage format (see datetime.c)
*/
if (strlen(tzentry->abbrev) > TOKMAXLEN)
{
GUC_check_errmsg("time zone abbreviation \"%s\" is too long (maximum %d characters) in time zone file \"%s\", line %d",
tzentry->abbrev, TOKMAXLEN,
tzentry->filename, tzentry->lineno);
return false;
}
/*
* Sanity-check the offset: shouldn't exceed 14 hours
*/
if (tzentry->offset > 14 * 60 * 60 ||
tzentry->offset < -14 * 60 * 60)
{
GUC_check_errmsg("time zone offset %d is out of range in time zone file \"%s\", line %d",
tzentry->offset,
tzentry->filename, tzentry->lineno);
return false;
}
/*
* Convert abbrev to lowercase (must match datetime.c's conversion)
*/
for (p = (unsigned char *) tzentry->abbrev; *p; p++)
*p = pg_tolower(*p);
return true;
}
/*
Support timezone abbreviations that sometimes change. Up to now, PG has assumed that any given timezone abbreviation (such as "EDT") represents a constant GMT offset in the usage of any particular region; we had a way to configure what that offset was, but not for it to be changeable over time. But, as with most things horological, this view of the world is too simplistic: there are numerous regions that have at one time or another switched to a different GMT offset but kept using the same timezone abbreviation. Almost the entire Russian Federation did that a few years ago, and later this month they're going to do it again. And there are similar examples all over the world. To cope with this, invent the notion of a "dynamic timezone abbreviation", which is one that is referenced to a particular underlying timezone (as defined in the IANA timezone database) and means whatever it currently means in that zone. For zones that use or have used daylight-savings time, the standard and DST abbreviations continue to have the property that you can specify standard or DST time and get that time offset whether or not DST was theoretically in effect at the time. However, the abbreviations mean what they meant at the time in question (or most recently before that time) rather than being absolutely fixed. The standard abbreviation-list files have been changed to use this behavior for abbreviations that have actually varied in meaning since 1970. The old simple-numeric definitions are kept for abbreviations that have not changed, since they are a bit faster to resolve. While this is clearly a new feature, it seems necessary to back-patch it into all active branches, because otherwise use of Russian zone abbreviations is going to become even more problematic than it already was. This change supersedes the changes in commit 513d06ded et al to modify the fixed meanings of the Russian abbreviations; since we've not shipped that yet, this will avoid an undesirably incompatible (not to mention incorrect) change in behavior for timestamps between 2011 and 2014. This patch makes some cosmetic changes in ecpglib to keep its usage of datetime lookup tables as similar as possible to the backend code, but doesn't do anything about the increasingly obsolete set of timezone abbreviation definitions that are hard-wired into ecpglib. Whatever we do about that will likely not be appropriate material for back-patching. Also, a potential free() of a garbage pointer after an out-of-memory failure in ecpglib has been fixed. This patch also fixes pre-existing bugs in DetermineTimeZoneOffset() that caused it to produce unexpected results near a timezone transition, if both the "before" and "after" states are marked as standard time. We'd only ever thought about or tested transitions between standard and DST time, but that's not what's happening when a zone simply redefines their base GMT offset. In passing, update the SGML documentation to refer to the Olson/zoneinfo/ zic timezone database as the "IANA" database, since it's now being maintained under the auspices of IANA.
2014-10-16 21:22:10 +02:00
* Attempt to parse the line as a timezone abbrev spec
*
* Valid formats are:
* name zone
* name offset dst
*
* Returns true if OK, else false; data is stored in *tzentry
*/
static bool
splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry)
{
char *abbrev;
char *offset;
char *offset_endptr;
char *remain;
char *is_dst;
tzentry->lineno = lineno;
tzentry->filename = filename;
abbrev = strtok(line, WHITESPACE);
if (!abbrev)
{
GUC_check_errmsg("missing time zone abbreviation in time zone file \"%s\", line %d",
filename, lineno);
return false;
}
Support timezone abbreviations that sometimes change. Up to now, PG has assumed that any given timezone abbreviation (such as "EDT") represents a constant GMT offset in the usage of any particular region; we had a way to configure what that offset was, but not for it to be changeable over time. But, as with most things horological, this view of the world is too simplistic: there are numerous regions that have at one time or another switched to a different GMT offset but kept using the same timezone abbreviation. Almost the entire Russian Federation did that a few years ago, and later this month they're going to do it again. And there are similar examples all over the world. To cope with this, invent the notion of a "dynamic timezone abbreviation", which is one that is referenced to a particular underlying timezone (as defined in the IANA timezone database) and means whatever it currently means in that zone. For zones that use or have used daylight-savings time, the standard and DST abbreviations continue to have the property that you can specify standard or DST time and get that time offset whether or not DST was theoretically in effect at the time. However, the abbreviations mean what they meant at the time in question (or most recently before that time) rather than being absolutely fixed. The standard abbreviation-list files have been changed to use this behavior for abbreviations that have actually varied in meaning since 1970. The old simple-numeric definitions are kept for abbreviations that have not changed, since they are a bit faster to resolve. While this is clearly a new feature, it seems necessary to back-patch it into all active branches, because otherwise use of Russian zone abbreviations is going to become even more problematic than it already was. This change supersedes the changes in commit 513d06ded et al to modify the fixed meanings of the Russian abbreviations; since we've not shipped that yet, this will avoid an undesirably incompatible (not to mention incorrect) change in behavior for timestamps between 2011 and 2014. This patch makes some cosmetic changes in ecpglib to keep its usage of datetime lookup tables as similar as possible to the backend code, but doesn't do anything about the increasingly obsolete set of timezone abbreviation definitions that are hard-wired into ecpglib. Whatever we do about that will likely not be appropriate material for back-patching. Also, a potential free() of a garbage pointer after an out-of-memory failure in ecpglib has been fixed. This patch also fixes pre-existing bugs in DetermineTimeZoneOffset() that caused it to produce unexpected results near a timezone transition, if both the "before" and "after" states are marked as standard time. We'd only ever thought about or tested transitions between standard and DST time, but that's not what's happening when a zone simply redefines their base GMT offset. In passing, update the SGML documentation to refer to the Olson/zoneinfo/ zic timezone database as the "IANA" database, since it's now being maintained under the auspices of IANA.
2014-10-16 21:22:10 +02:00
tzentry->abbrev = pstrdup(abbrev);
offset = strtok(NULL, WHITESPACE);
if (!offset)
{
GUC_check_errmsg("missing time zone offset in time zone file \"%s\", line %d",
filename, lineno);
return false;
}
Support timezone abbreviations that sometimes change. Up to now, PG has assumed that any given timezone abbreviation (such as "EDT") represents a constant GMT offset in the usage of any particular region; we had a way to configure what that offset was, but not for it to be changeable over time. But, as with most things horological, this view of the world is too simplistic: there are numerous regions that have at one time or another switched to a different GMT offset but kept using the same timezone abbreviation. Almost the entire Russian Federation did that a few years ago, and later this month they're going to do it again. And there are similar examples all over the world. To cope with this, invent the notion of a "dynamic timezone abbreviation", which is one that is referenced to a particular underlying timezone (as defined in the IANA timezone database) and means whatever it currently means in that zone. For zones that use or have used daylight-savings time, the standard and DST abbreviations continue to have the property that you can specify standard or DST time and get that time offset whether or not DST was theoretically in effect at the time. However, the abbreviations mean what they meant at the time in question (or most recently before that time) rather than being absolutely fixed. The standard abbreviation-list files have been changed to use this behavior for abbreviations that have actually varied in meaning since 1970. The old simple-numeric definitions are kept for abbreviations that have not changed, since they are a bit faster to resolve. While this is clearly a new feature, it seems necessary to back-patch it into all active branches, because otherwise use of Russian zone abbreviations is going to become even more problematic than it already was. This change supersedes the changes in commit 513d06ded et al to modify the fixed meanings of the Russian abbreviations; since we've not shipped that yet, this will avoid an undesirably incompatible (not to mention incorrect) change in behavior for timestamps between 2011 and 2014. This patch makes some cosmetic changes in ecpglib to keep its usage of datetime lookup tables as similar as possible to the backend code, but doesn't do anything about the increasingly obsolete set of timezone abbreviation definitions that are hard-wired into ecpglib. Whatever we do about that will likely not be appropriate material for back-patching. Also, a potential free() of a garbage pointer after an out-of-memory failure in ecpglib has been fixed. This patch also fixes pre-existing bugs in DetermineTimeZoneOffset() that caused it to produce unexpected results near a timezone transition, if both the "before" and "after" states are marked as standard time. We'd only ever thought about or tested transitions between standard and DST time, but that's not what's happening when a zone simply redefines their base GMT offset. In passing, update the SGML documentation to refer to the Olson/zoneinfo/ zic timezone database as the "IANA" database, since it's now being maintained under the auspices of IANA.
2014-10-16 21:22:10 +02:00
/* We assume zone names don't begin with a digit or sign */
if (isdigit((unsigned char) *offset) || *offset == '+' || *offset == '-')
{
Support timezone abbreviations that sometimes change. Up to now, PG has assumed that any given timezone abbreviation (such as "EDT") represents a constant GMT offset in the usage of any particular region; we had a way to configure what that offset was, but not for it to be changeable over time. But, as with most things horological, this view of the world is too simplistic: there are numerous regions that have at one time or another switched to a different GMT offset but kept using the same timezone abbreviation. Almost the entire Russian Federation did that a few years ago, and later this month they're going to do it again. And there are similar examples all over the world. To cope with this, invent the notion of a "dynamic timezone abbreviation", which is one that is referenced to a particular underlying timezone (as defined in the IANA timezone database) and means whatever it currently means in that zone. For zones that use or have used daylight-savings time, the standard and DST abbreviations continue to have the property that you can specify standard or DST time and get that time offset whether or not DST was theoretically in effect at the time. However, the abbreviations mean what they meant at the time in question (or most recently before that time) rather than being absolutely fixed. The standard abbreviation-list files have been changed to use this behavior for abbreviations that have actually varied in meaning since 1970. The old simple-numeric definitions are kept for abbreviations that have not changed, since they are a bit faster to resolve. While this is clearly a new feature, it seems necessary to back-patch it into all active branches, because otherwise use of Russian zone abbreviations is going to become even more problematic than it already was. This change supersedes the changes in commit 513d06ded et al to modify the fixed meanings of the Russian abbreviations; since we've not shipped that yet, this will avoid an undesirably incompatible (not to mention incorrect) change in behavior for timestamps between 2011 and 2014. This patch makes some cosmetic changes in ecpglib to keep its usage of datetime lookup tables as similar as possible to the backend code, but doesn't do anything about the increasingly obsolete set of timezone abbreviation definitions that are hard-wired into ecpglib. Whatever we do about that will likely not be appropriate material for back-patching. Also, a potential free() of a garbage pointer after an out-of-memory failure in ecpglib has been fixed. This patch also fixes pre-existing bugs in DetermineTimeZoneOffset() that caused it to produce unexpected results near a timezone transition, if both the "before" and "after" states are marked as standard time. We'd only ever thought about or tested transitions between standard and DST time, but that's not what's happening when a zone simply redefines their base GMT offset. In passing, update the SGML documentation to refer to the Olson/zoneinfo/ zic timezone database as the "IANA" database, since it's now being maintained under the auspices of IANA.
2014-10-16 21:22:10 +02:00
tzentry->zone = NULL;
tzentry->offset = strtol(offset, &offset_endptr, 10);
if (offset_endptr == offset || *offset_endptr != '\0')
{
GUC_check_errmsg("invalid number for time zone offset in time zone file \"%s\", line %d",
filename, lineno);
return false;
}
is_dst = strtok(NULL, WHITESPACE);
if (is_dst && pg_strcasecmp(is_dst, "D") == 0)
{
tzentry->is_dst = true;
remain = strtok(NULL, WHITESPACE);
}
else
{
/* there was no 'D' dst specifier */
tzentry->is_dst = false;
remain = is_dst;
}
}
else
{
Support timezone abbreviations that sometimes change. Up to now, PG has assumed that any given timezone abbreviation (such as "EDT") represents a constant GMT offset in the usage of any particular region; we had a way to configure what that offset was, but not for it to be changeable over time. But, as with most things horological, this view of the world is too simplistic: there are numerous regions that have at one time or another switched to a different GMT offset but kept using the same timezone abbreviation. Almost the entire Russian Federation did that a few years ago, and later this month they're going to do it again. And there are similar examples all over the world. To cope with this, invent the notion of a "dynamic timezone abbreviation", which is one that is referenced to a particular underlying timezone (as defined in the IANA timezone database) and means whatever it currently means in that zone. For zones that use or have used daylight-savings time, the standard and DST abbreviations continue to have the property that you can specify standard or DST time and get that time offset whether or not DST was theoretically in effect at the time. However, the abbreviations mean what they meant at the time in question (or most recently before that time) rather than being absolutely fixed. The standard abbreviation-list files have been changed to use this behavior for abbreviations that have actually varied in meaning since 1970. The old simple-numeric definitions are kept for abbreviations that have not changed, since they are a bit faster to resolve. While this is clearly a new feature, it seems necessary to back-patch it into all active branches, because otherwise use of Russian zone abbreviations is going to become even more problematic than it already was. This change supersedes the changes in commit 513d06ded et al to modify the fixed meanings of the Russian abbreviations; since we've not shipped that yet, this will avoid an undesirably incompatible (not to mention incorrect) change in behavior for timestamps between 2011 and 2014. This patch makes some cosmetic changes in ecpglib to keep its usage of datetime lookup tables as similar as possible to the backend code, but doesn't do anything about the increasingly obsolete set of timezone abbreviation definitions that are hard-wired into ecpglib. Whatever we do about that will likely not be appropriate material for back-patching. Also, a potential free() of a garbage pointer after an out-of-memory failure in ecpglib has been fixed. This patch also fixes pre-existing bugs in DetermineTimeZoneOffset() that caused it to produce unexpected results near a timezone transition, if both the "before" and "after" states are marked as standard time. We'd only ever thought about or tested transitions between standard and DST time, but that's not what's happening when a zone simply redefines their base GMT offset. In passing, update the SGML documentation to refer to the Olson/zoneinfo/ zic timezone database as the "IANA" database, since it's now being maintained under the auspices of IANA.
2014-10-16 21:22:10 +02:00
/*
* Assume entry is a zone name. We do not try to validate it by
* looking up the zone, because that would force loading of a lot of
* zones that probably will never be used in the current session.
*/
tzentry->zone = pstrdup(offset);
tzentry->offset = 0;
tzentry->is_dst = false;
Support timezone abbreviations that sometimes change. Up to now, PG has assumed that any given timezone abbreviation (such as "EDT") represents a constant GMT offset in the usage of any particular region; we had a way to configure what that offset was, but not for it to be changeable over time. But, as with most things horological, this view of the world is too simplistic: there are numerous regions that have at one time or another switched to a different GMT offset but kept using the same timezone abbreviation. Almost the entire Russian Federation did that a few years ago, and later this month they're going to do it again. And there are similar examples all over the world. To cope with this, invent the notion of a "dynamic timezone abbreviation", which is one that is referenced to a particular underlying timezone (as defined in the IANA timezone database) and means whatever it currently means in that zone. For zones that use or have used daylight-savings time, the standard and DST abbreviations continue to have the property that you can specify standard or DST time and get that time offset whether or not DST was theoretically in effect at the time. However, the abbreviations mean what they meant at the time in question (or most recently before that time) rather than being absolutely fixed. The standard abbreviation-list files have been changed to use this behavior for abbreviations that have actually varied in meaning since 1970. The old simple-numeric definitions are kept for abbreviations that have not changed, since they are a bit faster to resolve. While this is clearly a new feature, it seems necessary to back-patch it into all active branches, because otherwise use of Russian zone abbreviations is going to become even more problematic than it already was. This change supersedes the changes in commit 513d06ded et al to modify the fixed meanings of the Russian abbreviations; since we've not shipped that yet, this will avoid an undesirably incompatible (not to mention incorrect) change in behavior for timestamps between 2011 and 2014. This patch makes some cosmetic changes in ecpglib to keep its usage of datetime lookup tables as similar as possible to the backend code, but doesn't do anything about the increasingly obsolete set of timezone abbreviation definitions that are hard-wired into ecpglib. Whatever we do about that will likely not be appropriate material for back-patching. Also, a potential free() of a garbage pointer after an out-of-memory failure in ecpglib has been fixed. This patch also fixes pre-existing bugs in DetermineTimeZoneOffset() that caused it to produce unexpected results near a timezone transition, if both the "before" and "after" states are marked as standard time. We'd only ever thought about or tested transitions between standard and DST time, but that's not what's happening when a zone simply redefines their base GMT offset. In passing, update the SGML documentation to refer to the Olson/zoneinfo/ zic timezone database as the "IANA" database, since it's now being maintained under the auspices of IANA.
2014-10-16 21:22:10 +02:00
remain = strtok(NULL, WHITESPACE);
}
if (!remain) /* no more non-whitespace chars */
return true;
if (remain[0] != '#') /* must be a comment */
{
GUC_check_errmsg("invalid syntax in time zone file \"%s\", line %d",
filename, lineno);
return false;
}
return true;
}
/*
* Insert entry into sorted array
*
* *base: base address of array (changeable if must enlarge array)
* *arraysize: allocated length of array (changeable if must enlarge array)
* n: current number of valid elements in array
* entry: new data to insert
* override: true if OK to override
*
* Returns the new array length (new value for n), or -1 if error
*/
static int
addToArray(tzEntry **base, int *arraysize, int n,
tzEntry *entry, bool override)
{
tzEntry *arrayptr;
int low;
int high;
/*
* Search the array for a duplicate; as a useful side effect, the array is
* maintained in sorted order. We use strcmp() to ensure we match the
* sort order datetime.c expects.
*/
arrayptr = *base;
low = 0;
high = n - 1;
while (low <= high)
{
int mid = (low + high) >> 1;
tzEntry *midptr = arrayptr + mid;
int cmp;
cmp = strcmp(entry->abbrev, midptr->abbrev);
if (cmp < 0)
high = mid - 1;
else if (cmp > 0)
low = mid + 1;
else
{
/*
* Found a duplicate entry; complain unless it's the same.
*/
Support timezone abbreviations that sometimes change. Up to now, PG has assumed that any given timezone abbreviation (such as "EDT") represents a constant GMT offset in the usage of any particular region; we had a way to configure what that offset was, but not for it to be changeable over time. But, as with most things horological, this view of the world is too simplistic: there are numerous regions that have at one time or another switched to a different GMT offset but kept using the same timezone abbreviation. Almost the entire Russian Federation did that a few years ago, and later this month they're going to do it again. And there are similar examples all over the world. To cope with this, invent the notion of a "dynamic timezone abbreviation", which is one that is referenced to a particular underlying timezone (as defined in the IANA timezone database) and means whatever it currently means in that zone. For zones that use or have used daylight-savings time, the standard and DST abbreviations continue to have the property that you can specify standard or DST time and get that time offset whether or not DST was theoretically in effect at the time. However, the abbreviations mean what they meant at the time in question (or most recently before that time) rather than being absolutely fixed. The standard abbreviation-list files have been changed to use this behavior for abbreviations that have actually varied in meaning since 1970. The old simple-numeric definitions are kept for abbreviations that have not changed, since they are a bit faster to resolve. While this is clearly a new feature, it seems necessary to back-patch it into all active branches, because otherwise use of Russian zone abbreviations is going to become even more problematic than it already was. This change supersedes the changes in commit 513d06ded et al to modify the fixed meanings of the Russian abbreviations; since we've not shipped that yet, this will avoid an undesirably incompatible (not to mention incorrect) change in behavior for timestamps between 2011 and 2014. This patch makes some cosmetic changes in ecpglib to keep its usage of datetime lookup tables as similar as possible to the backend code, but doesn't do anything about the increasingly obsolete set of timezone abbreviation definitions that are hard-wired into ecpglib. Whatever we do about that will likely not be appropriate material for back-patching. Also, a potential free() of a garbage pointer after an out-of-memory failure in ecpglib has been fixed. This patch also fixes pre-existing bugs in DetermineTimeZoneOffset() that caused it to produce unexpected results near a timezone transition, if both the "before" and "after" states are marked as standard time. We'd only ever thought about or tested transitions between standard and DST time, but that's not what's happening when a zone simply redefines their base GMT offset. In passing, update the SGML documentation to refer to the Olson/zoneinfo/ zic timezone database as the "IANA" database, since it's now being maintained under the auspices of IANA.
2014-10-16 21:22:10 +02:00
if ((midptr->zone == NULL && entry->zone == NULL &&
midptr->offset == entry->offset &&
midptr->is_dst == entry->is_dst) ||
(midptr->zone != NULL && entry->zone != NULL &&
strcmp(midptr->zone, entry->zone) == 0))
{
/* return unchanged array */
return n;
}
if (override)
{
/* same abbrev but something is different, override */
Support timezone abbreviations that sometimes change. Up to now, PG has assumed that any given timezone abbreviation (such as "EDT") represents a constant GMT offset in the usage of any particular region; we had a way to configure what that offset was, but not for it to be changeable over time. But, as with most things horological, this view of the world is too simplistic: there are numerous regions that have at one time or another switched to a different GMT offset but kept using the same timezone abbreviation. Almost the entire Russian Federation did that a few years ago, and later this month they're going to do it again. And there are similar examples all over the world. To cope with this, invent the notion of a "dynamic timezone abbreviation", which is one that is referenced to a particular underlying timezone (as defined in the IANA timezone database) and means whatever it currently means in that zone. For zones that use or have used daylight-savings time, the standard and DST abbreviations continue to have the property that you can specify standard or DST time and get that time offset whether or not DST was theoretically in effect at the time. However, the abbreviations mean what they meant at the time in question (or most recently before that time) rather than being absolutely fixed. The standard abbreviation-list files have been changed to use this behavior for abbreviations that have actually varied in meaning since 1970. The old simple-numeric definitions are kept for abbreviations that have not changed, since they are a bit faster to resolve. While this is clearly a new feature, it seems necessary to back-patch it into all active branches, because otherwise use of Russian zone abbreviations is going to become even more problematic than it already was. This change supersedes the changes in commit 513d06ded et al to modify the fixed meanings of the Russian abbreviations; since we've not shipped that yet, this will avoid an undesirably incompatible (not to mention incorrect) change in behavior for timestamps between 2011 and 2014. This patch makes some cosmetic changes in ecpglib to keep its usage of datetime lookup tables as similar as possible to the backend code, but doesn't do anything about the increasingly obsolete set of timezone abbreviation definitions that are hard-wired into ecpglib. Whatever we do about that will likely not be appropriate material for back-patching. Also, a potential free() of a garbage pointer after an out-of-memory failure in ecpglib has been fixed. This patch also fixes pre-existing bugs in DetermineTimeZoneOffset() that caused it to produce unexpected results near a timezone transition, if both the "before" and "after" states are marked as standard time. We'd only ever thought about or tested transitions between standard and DST time, but that's not what's happening when a zone simply redefines their base GMT offset. In passing, update the SGML documentation to refer to the Olson/zoneinfo/ zic timezone database as the "IANA" database, since it's now being maintained under the auspices of IANA.
2014-10-16 21:22:10 +02:00
midptr->zone = entry->zone;
midptr->offset = entry->offset;
midptr->is_dst = entry->is_dst;
return n;
}
/* same abbrev but something is different, complain */
GUC_check_errmsg("time zone abbreviation \"%s\" is multiply defined",
entry->abbrev);
GUC_check_errdetail("Entry in time zone file \"%s\", line %d, conflicts with entry in file \"%s\", line %d.",
midptr->filename, midptr->lineno,
entry->filename, entry->lineno);
return -1;
}
}
/*
* No match, insert at position "low".
*/
if (n >= *arraysize)
{
*arraysize *= 2;
*base = (tzEntry *) repalloc(*base, *arraysize * sizeof(tzEntry));
}
arrayptr = *base + low;
memmove(arrayptr + 1, arrayptr, (n - low) * sizeof(tzEntry));
memcpy(arrayptr, entry, sizeof(tzEntry));
return n + 1;
}
/*
* Parse a single timezone abbrev file --- can recurse to handle @INCLUDE
*
* filename: user-specified file name (does not include path)
* depth: current recursion depth
* *base: array for results (changeable if must enlarge array)
* *arraysize: allocated length of array (changeable if must enlarge array)
* n: current number of valid elements in array
*
* Returns the new array length (new value for n), or -1 if error
*/
static int
ParseTzFile(const char *filename, int depth,
tzEntry **base, int *arraysize, int n)
{
char share_path[MAXPGPATH];
char file_path[MAXPGPATH];
FILE *tzFile;
char tzbuf[1024];
char *line;
tzEntry tzentry;
int lineno = 0;
bool override = false;
const char *p;
/*
* We enforce that the filename is all alpha characters. This may be
* overly restrictive, but we don't want to allow access to anything
* outside the timezonesets directory, so for instance '/' *must* be
* rejected.
*/
for (p = filename; *p; p++)
{
if (!isalpha((unsigned char) *p))
{
/* at level 0, just use guc.c's regular "invalid value" message */
if (depth > 0)
GUC_check_errmsg("invalid time zone file name \"%s\"",
filename);
return -1;
}
}
/*
* The maximal recursion depth is a pretty arbitrary setting. It is hard
* to imagine that someone needs more than 3 levels so stick with this
* conservative setting until someone complains.
*/
if (depth > 3)
{
GUC_check_errmsg("time zone file recursion limit exceeded in file \"%s\"",
filename);
return -1;
}
get_share_path(my_exec_path, share_path);
snprintf(file_path, sizeof(file_path), "%s/timezonesets/%s",
share_path, filename);
tzFile = AllocateFile(file_path, "r");
if (!tzFile)
{
/*
* Check to see if the problem is not the filename but the directory.
* This is worth troubling over because if the installation share/
* directory is missing or unreadable, this is likely to be the first
* place we notice a problem during postmaster startup.
*/
int save_errno = errno;
DIR *tzdir;
snprintf(file_path, sizeof(file_path), "%s/timezonesets",
share_path);
tzdir = AllocateDir(file_path);
if (tzdir == NULL)
{
GUC_check_errmsg("could not open directory \"%s\": %m",
file_path);
GUC_check_errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
my_exec_path);
return -1;
}
FreeDir(tzdir);
errno = save_errno;
/*
* otherwise, if file doesn't exist and it's level 0, guc.c's
* complaint is enough
*/
if (errno != ENOENT || depth > 0)
GUC_check_errmsg("could not read time zone file \"%s\": %m",
filename);
return -1;
}
while (!feof(tzFile))
{
lineno++;
if (fgets(tzbuf, sizeof(tzbuf), tzFile) == NULL)
{
if (ferror(tzFile))
{
GUC_check_errmsg("could not read time zone file \"%s\": %m",
filename);
n = -1;
break;
}
/* else we're at EOF after all */
break;
}
if (strlen(tzbuf) == sizeof(tzbuf) - 1)
{
/* the line is too long for tzbuf */
GUC_check_errmsg("line is too long in time zone file \"%s\", line %d",
filename, lineno);
n = -1;
break;
}
/* skip over whitespace */
line = tzbuf;
while (*line && isspace((unsigned char) *line))
line++;
if (*line == '\0') /* empty line */
continue;
if (*line == '#') /* comment line */
continue;
if (pg_strncasecmp(line, "@INCLUDE", strlen("@INCLUDE")) == 0)
{
/* pstrdup so we can use filename in result data structure */
char *includeFile = pstrdup(line + strlen("@INCLUDE"));
includeFile = strtok(includeFile, WHITESPACE);
if (!includeFile || !*includeFile)
{
GUC_check_errmsg("@INCLUDE without file name in time zone file \"%s\", line %d",
filename, lineno);
n = -1;
break;
}
n = ParseTzFile(includeFile, depth + 1,
base, arraysize, n);
if (n < 0)
break;
continue;
}
if (pg_strncasecmp(line, "@OVERRIDE", strlen("@OVERRIDE")) == 0)
{
override = true;
continue;
}
if (!splitTzLine(filename, lineno, line, &tzentry))
{
n = -1;
break;
}
if (!validateTzEntry(&tzentry))
{
n = -1;
break;
}
n = addToArray(base, arraysize, n, &tzentry, override);
if (n < 0)
break;
}
FreeFile(tzFile);
return n;
}
/*
* load_tzoffsets --- read and parse the specified timezone offset file
*
* On success, return a filled-in TimeZoneAbbrevTable, which must have been
* guc_malloc'd not palloc'd. On failure, return NULL, using GUC_check_errmsg
* and friends to give details of the problem.
*/
TimeZoneAbbrevTable *
load_tzoffsets(const char *filename)
{
TimeZoneAbbrevTable *result = NULL;
MemoryContext tmpContext;
MemoryContext oldContext;
tzEntry *array;
int arraysize;
int n;
/*
* Create a temp memory context to work in. This makes it easy to clean
* up afterwards.
*/
tmpContext = AllocSetContextCreate(CurrentMemoryContext,
"TZParserMemory",
Add macros to make AllocSetContextCreate() calls simpler and safer. I found that half a dozen (nearly 5%) of our AllocSetContextCreate calls had typos in the context-sizing parameters. While none of these led to especially significant problems, they did create minor inefficiencies, and it's now clear that expecting people to copy-and-paste those calls accurately is not a great idea. Let's reduce the risk of future errors by introducing single macros that encapsulate the common use-cases. Three such macros are enough to cover all but two special-purpose contexts; those two calls can be left as-is, I think. While this patch doesn't in itself improve matters for third-party extensions, it doesn't break anything for them either, and they can gradually adopt the simplified notation over time. In passing, change TopMemoryContext to use the default allocation parameters. Formerly it could only be extended 8K at a time. That was probably reasonable when this code was written; but nowadays we create many more contexts than we did then, so that it's not unusual to have a couple hundred K in TopMemoryContext, even without considering various dubious code that sticks other things there. There seems no good reason not to let it use growing blocks like most other contexts. Back-patch to 9.6, mostly because that's still close enough to HEAD that it's easy to do so, and keeping the branches in sync can be expected to avoid some future back-patching pain. The bugs fixed by these changes don't seem to be significant enough to justify fixing them further back. Discussion: <21072.1472321324@sss.pgh.pa.us>
2016-08-27 23:50:38 +02:00
ALLOCSET_SMALL_SIZES);
oldContext = MemoryContextSwitchTo(tmpContext);
/* Initialize array at a reasonable size */
arraysize = 128;
array = (tzEntry *) palloc(arraysize * sizeof(tzEntry));
/* Parse the file(s) */
n = ParseTzFile(filename, 0, &array, &arraysize, 0);
Support timezone abbreviations that sometimes change. Up to now, PG has assumed that any given timezone abbreviation (such as "EDT") represents a constant GMT offset in the usage of any particular region; we had a way to configure what that offset was, but not for it to be changeable over time. But, as with most things horological, this view of the world is too simplistic: there are numerous regions that have at one time or another switched to a different GMT offset but kept using the same timezone abbreviation. Almost the entire Russian Federation did that a few years ago, and later this month they're going to do it again. And there are similar examples all over the world. To cope with this, invent the notion of a "dynamic timezone abbreviation", which is one that is referenced to a particular underlying timezone (as defined in the IANA timezone database) and means whatever it currently means in that zone. For zones that use or have used daylight-savings time, the standard and DST abbreviations continue to have the property that you can specify standard or DST time and get that time offset whether or not DST was theoretically in effect at the time. However, the abbreviations mean what they meant at the time in question (or most recently before that time) rather than being absolutely fixed. The standard abbreviation-list files have been changed to use this behavior for abbreviations that have actually varied in meaning since 1970. The old simple-numeric definitions are kept for abbreviations that have not changed, since they are a bit faster to resolve. While this is clearly a new feature, it seems necessary to back-patch it into all active branches, because otherwise use of Russian zone abbreviations is going to become even more problematic than it already was. This change supersedes the changes in commit 513d06ded et al to modify the fixed meanings of the Russian abbreviations; since we've not shipped that yet, this will avoid an undesirably incompatible (not to mention incorrect) change in behavior for timestamps between 2011 and 2014. This patch makes some cosmetic changes in ecpglib to keep its usage of datetime lookup tables as similar as possible to the backend code, but doesn't do anything about the increasingly obsolete set of timezone abbreviation definitions that are hard-wired into ecpglib. Whatever we do about that will likely not be appropriate material for back-patching. Also, a potential free() of a garbage pointer after an out-of-memory failure in ecpglib has been fixed. This patch also fixes pre-existing bugs in DetermineTimeZoneOffset() that caused it to produce unexpected results near a timezone transition, if both the "before" and "after" states are marked as standard time. We'd only ever thought about or tested transitions between standard and DST time, but that's not what's happening when a zone simply redefines their base GMT offset. In passing, update the SGML documentation to refer to the Olson/zoneinfo/ zic timezone database as the "IANA" database, since it's now being maintained under the auspices of IANA.
2014-10-16 21:22:10 +02:00
/* If no errors so far, let datetime.c allocate memory & convert format */
if (n >= 0)
{
Support timezone abbreviations that sometimes change. Up to now, PG has assumed that any given timezone abbreviation (such as "EDT") represents a constant GMT offset in the usage of any particular region; we had a way to configure what that offset was, but not for it to be changeable over time. But, as with most things horological, this view of the world is too simplistic: there are numerous regions that have at one time or another switched to a different GMT offset but kept using the same timezone abbreviation. Almost the entire Russian Federation did that a few years ago, and later this month they're going to do it again. And there are similar examples all over the world. To cope with this, invent the notion of a "dynamic timezone abbreviation", which is one that is referenced to a particular underlying timezone (as defined in the IANA timezone database) and means whatever it currently means in that zone. For zones that use or have used daylight-savings time, the standard and DST abbreviations continue to have the property that you can specify standard or DST time and get that time offset whether or not DST was theoretically in effect at the time. However, the abbreviations mean what they meant at the time in question (or most recently before that time) rather than being absolutely fixed. The standard abbreviation-list files have been changed to use this behavior for abbreviations that have actually varied in meaning since 1970. The old simple-numeric definitions are kept for abbreviations that have not changed, since they are a bit faster to resolve. While this is clearly a new feature, it seems necessary to back-patch it into all active branches, because otherwise use of Russian zone abbreviations is going to become even more problematic than it already was. This change supersedes the changes in commit 513d06ded et al to modify the fixed meanings of the Russian abbreviations; since we've not shipped that yet, this will avoid an undesirably incompatible (not to mention incorrect) change in behavior for timestamps between 2011 and 2014. This patch makes some cosmetic changes in ecpglib to keep its usage of datetime lookup tables as similar as possible to the backend code, but doesn't do anything about the increasingly obsolete set of timezone abbreviation definitions that are hard-wired into ecpglib. Whatever we do about that will likely not be appropriate material for back-patching. Also, a potential free() of a garbage pointer after an out-of-memory failure in ecpglib has been fixed. This patch also fixes pre-existing bugs in DetermineTimeZoneOffset() that caused it to produce unexpected results near a timezone transition, if both the "before" and "after" states are marked as standard time. We'd only ever thought about or tested transitions between standard and DST time, but that's not what's happening when a zone simply redefines their base GMT offset. In passing, update the SGML documentation to refer to the Olson/zoneinfo/ zic timezone database as the "IANA" database, since it's now being maintained under the auspices of IANA.
2014-10-16 21:22:10 +02:00
result = ConvertTimeZoneAbbrevs(array, n);
if (!result)
GUC_check_errmsg("out of memory");
}
/* Clean up */
MemoryContextSwitchTo(oldContext);
MemoryContextDelete(tmpContext);
return result;
}