postgresql/src/backend/utils/adt/formatting.c

6624 lines
160 KiB
C
Raw Normal View History

/* -----------------------------------------------------------------------
* formatting.c
*
2010-09-20 22:08:53 +02:00
* src/backend/utils/adt/formatting.c
*
*
* Portions Copyright (c) 1999-2020, PostgreSQL Global Development Group
*
*
* TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER();
*
* The PostgreSQL routines for a timestamp/int/float/numeric formatting,
* inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines.
*
*
* Cache & Memory:
* Routines use (itself) internal cache for format pictures.
*
* The cache uses a static buffer and is persistent across transactions. If
* the format-picture is bigger than the cache buffer, the parser is called
* always.
*
* NOTE for Number version:
* All in this version is implemented as keywords ( => not used
* suffixes), because a format picture is for *one* item (number)
* only. It not is as a timestamp version, where each keyword (can)
* has suffix.
*
* NOTE for Timestamp routines:
* In this module the POSIX 'struct tm' type is *not* used, but rather
* PgSQL type, which has tm_mon based on one (*non* zero) and
* year *not* based on 1900, but is used full year number.
2000-04-07 21:17:51 +02:00
* Module supports AD / BC / AM / PM.
*
* Supported types for to_char():
*
* Timestamp, Numeric, int4, int8, float4, float8
*
* Supported types for reverse conversion:
*
* Timestamp - to_timestamp()
* Date - to_date()
* Numeric - to_number()
*
*
* Karel Zak
*
* TODO
* - better number building (formatting) / parsing, now it isn't
* ideal code
* - use Assert()
* - add support for roman number to standard number conversion
* - add support for number spelling
* - add support for string to string formatting (we must be better
2001-03-22 05:01:46 +01:00
* than Oracle :-),
* to_char('Hello', 'X X X X X') -> 'H e l l o'
*
* -----------------------------------------------------------------------
*/
#ifdef DEBUG_TO_FROM_CHAR
#define DEBUG_elog_output DEBUG3
#endif
#include "postgres.h"
#include <ctype.h>
#include <unistd.h>
#include <math.h>
2000-01-26 07:33:49 +01:00
#include <float.h>
#include <limits.h>
/*
* towlower() and friends should be in <wctype.h>, but some pre-C99 systems
* declare them in <wchar.h>, so include that too.
*/
#include <wchar.h>
#ifdef HAVE_WCTYPE_H
#include <wctype.h>
#endif
#ifdef USE_ICU
#include <unicode/ustring.h>
#endif
#include "catalog/pg_collation.h"
#include "catalog/pg_type.h"
#include "mb/pg_wchar.h"
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
#include "parser/scansup.h"
#include "utils/builtins.h"
2000-07-01 23:27:14 +02:00
#include "utils/date.h"
#include "utils/datetime.h"
#include "utils/float.h"
#include "utils/formatting.h"
2000-07-01 23:27:14 +02:00
#include "utils/int8.h"
#include "utils/memutils.h"
#include "utils/numeric.h"
2000-07-01 23:27:14 +02:00
#include "utils/pg_locale.h"
/* ----------
* Convenience macros for error handling
* ----------
*
* Two macros below help to handle errors in functions that take
* 'bool *have_error' argument. When this argument is not NULL, it's expected
* that function will suppress ereports when possible. Instead it should
* return some default value and set *have_error flag.
*
* RETURN_ERROR() macro intended to wrap ereport() calls. When have_error
* function argument is not NULL, then instead of ereport'ing we set
* *have_error flag and go to on_error label. It's supposed that jump
* resources will be freed and some 'default' value returned.
*
* CHECK_ERROR() jumps on_error label when *have_error flag is defined and set.
* It's supposed to be used for immediate exit from the function on error
* after call of another function with 'bool *have_error' argument.
*/
#define RETURN_ERROR(throw_error) \
do { \
if (have_error) \
{ \
*have_error = true; \
goto on_error; \
} \
else \
{ \
throw_error; \
} \
} while (0)
#define CHECK_ERROR \
do { \
if (have_error && *have_error) \
goto on_error; \
} while (0)
/* ----------
* Routines flags
* ----------
*/
#define DCH_FLAG 0x1 /* DATE-TIME flag */
#define NUM_FLAG 0x2 /* NUMBER flag */
#define STD_FLAG 0x4 /* STANDARD flag */
/* ----------
* KeyWord Index (ascii from position 32 (' ') to 126 (~))
* ----------
*/
2000-04-07 21:17:51 +02:00
#define KeyWord_INDEX_SIZE ('~' - ' ')
#define KeyWord_INDEX_FILTER(_c) ((_c) <= ' ' || (_c) >= '~' ? 0 : 1)
/* ----------
* Maximal length of one node
* ----------
*/
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
#define DCH_MAX_ITEM_SIZ 12 /* max localized day name */
#define NUM_MAX_ITEM_SIZ 8 /* roman number (RN has 15 chars) */
/* ----------
* Format parser structs
* ----------
*/
typedef struct
{
const char *name; /* suffix string */
2001-03-22 05:01:46 +01:00
int len, /* suffix length */
id, /* used in node->suffix */
type; /* prefix / postfix */
} KeySuffix;
/* ----------
* FromCharDateMode
* ----------
*
* This value is used to nominate one of several distinct (and mutually
* exclusive) date conventions that a keyword can belong to.
*/
typedef enum
{
FROM_CHAR_DATE_NONE = 0, /* Value does not affect date mode. */
FROM_CHAR_DATE_GREGORIAN, /* Gregorian (day, month, year) style date */
FROM_CHAR_DATE_ISOWEEK /* ISO 8601 week date */
} FromCharDateMode;
typedef struct
{
const char *name;
int len;
int id;
bool is_digit;
FromCharDateMode date_mode;
} KeyWord;
typedef struct
{
uint8 type; /* NODE_TYPE_XXX, see below */
char character[MAX_MULTIBYTE_CHAR_LEN + 1]; /* if type is CHAR */
uint8 suffix; /* keyword prefix/suffix code, if any */
const KeyWord *key; /* if type is ACTION */
} FormatNode;
#define NODE_TYPE_END 1
#define NODE_TYPE_ACTION 2
#define NODE_TYPE_CHAR 3
#define NODE_TYPE_SEPARATOR 4
#define NODE_TYPE_SPACE 5
#define SUFFTYPE_PREFIX 1
#define SUFFTYPE_POSTFIX 2
#define CLOCK_24_HOUR 0
#define CLOCK_12_HOUR 1
/* ----------
* Full months
* ----------
*/
static const char *const months_full[] = {
"January", "February", "March", "April", "May", "June", "July",
"August", "September", "October", "November", "December", NULL
};
static const char *const days_short[] = {
"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
};
/* ----------
* AD / BC
* ----------
* There is no 0 AD. Years go from 1 BC to 1 AD, so we make it
* positive and map year == -1 to year zero, and shift all negative
* years up one. For interval years, we just return the year.
*/
#define ADJUST_YEAR(year, is_interval) ((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year)))
2000-04-07 21:17:51 +02:00
#define A_D_STR "A.D."
#define a_d_STR "a.d."
#define AD_STR "AD"
#define ad_STR "ad"
#define B_C_STR "B.C."
#define b_c_STR "b.c."
#define BC_STR "BC"
#define bc_STR "bc"
/*
* AD / BC strings for seq_search.
*
* These are given in two variants, a long form with periods and a standard
* form without.
*
* The array is laid out such that matches for AD have an even index, and
* matches for BC have an odd index. So the boolean value for BC is given by
* taking the array index of the match, modulo 2.
*/
static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL};
static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL};
2000-04-07 21:17:51 +02:00
/* ----------
* AM / PM
* ----------
*/
#define A_M_STR "A.M."
#define a_m_STR "a.m."
#define AM_STR "AM"
#define am_STR "am"
#define P_M_STR "P.M."
#define p_m_STR "p.m."
#define PM_STR "PM"
#define pm_STR "pm"
/*
* AM / PM strings for seq_search.
*
* These are given in two variants, a long form with periods and a standard
* form without.
*
* The array is laid out such that matches for AM have an even index, and
* matches for PM have an odd index. So the boolean value for PM is given by
* taking the array index of the match, modulo 2.
*/
static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL};
static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL};
/* ----------
* Months in roman-numeral
* (Must be in reverse order for seq_search (in FROM_CHAR), because
* 'VIII' must have higher precedence than 'V')
* ----------
*/
static const char *const rm_months_upper[] =
{"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL};
2000-04-07 21:17:51 +02:00
static const char *const rm_months_lower[] =
{"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL};
/* ----------
* Roman numbers
* ----------
*/
static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL};
static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL};
static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL};
/* ----------
* Ordinal postfixes
* ----------
*/
static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL};
static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
/* ----------
* Flags & Options:
* ----------
*/
#define TH_UPPER 1
#define TH_LOWER 2
/* ----------
* Number description struct
* ----------
*/
typedef struct
{
2001-03-22 05:01:46 +01:00
int pre, /* (count) numbers before decimal */
post, /* (count) numbers after decimal */
lsign, /* want locales sign */
flag, /* number parameters */
2001-03-22 05:01:46 +01:00
pre_lsign_num, /* tmp value for lsign */
multi, /* multiplier for 'V' */
zero_start, /* position of first zero */
zero_end, /* position of last zero */
need_locale; /* needs it locale */
} NUMDesc;
/* ----------
* Flags for NUMBER version
* ----------
*/
2003-03-27 17:35:31 +01:00
#define NUM_F_DECIMAL (1 << 1)
#define NUM_F_LDECIMAL (1 << 2)
#define NUM_F_ZERO (1 << 3)
2003-08-04 02:43:34 +02:00
#define NUM_F_BLANK (1 << 4)
2003-03-27 17:35:31 +01:00
#define NUM_F_FILLMODE (1 << 5)
2003-08-04 02:43:34 +02:00
#define NUM_F_LSIGN (1 << 6)
2003-03-27 17:35:31 +01:00
#define NUM_F_BRACKET (1 << 7)
2003-08-04 02:43:34 +02:00
#define NUM_F_MINUS (1 << 8)
#define NUM_F_PLUS (1 << 9)
2003-08-04 02:43:34 +02:00
#define NUM_F_ROMAN (1 << 10)
#define NUM_F_MULTI (1 << 11)
2003-08-04 02:43:34 +02:00
#define NUM_F_PLUS_POST (1 << 12)
2003-03-27 17:35:31 +01:00
#define NUM_F_MINUS_POST (1 << 13)
#define NUM_F_EEEE (1 << 14)
#define NUM_LSIGN_PRE (-1)
#define NUM_LSIGN_POST 1
#define NUM_LSIGN_NONE 0
/* ----------
* Tests
* ----------
*/
#define IS_DECIMAL(_f) ((_f)->flag & NUM_F_DECIMAL)
#define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL)
2001-03-22 05:01:46 +01:00
#define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO)
#define IS_BLANK(_f) ((_f)->flag & NUM_F_BLANK)
#define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE)
#define IS_BRACKET(_f) ((_f)->flag & NUM_F_BRACKET)
#define IS_MINUS(_f) ((_f)->flag & NUM_F_MINUS)
#define IS_LSIGN(_f) ((_f)->flag & NUM_F_LSIGN)
2001-03-22 05:01:46 +01:00
#define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS)
#define IS_ROMAN(_f) ((_f)->flag & NUM_F_ROMAN)
#define IS_MULTI(_f) ((_f)->flag & NUM_F_MULTI)
#define IS_EEEE(_f) ((_f)->flag & NUM_F_EEEE)
/* ----------
* Format picture cache
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
*
* We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long;
* likewise number format pictures up to NUM_CACHE_SIZE bytes long.
*
* For simplicity, the cache entries are fixed-size, so they allow for the
* worst case of a FormatNode for each byte in the picture string.
*
* The CACHE_SIZE constants are computed to make sizeof(DCHCacheEntry) and
* sizeof(NUMCacheEntry) be powers of 2, or just less than that, so that
* we don't waste too much space by palloc'ing them individually. Be sure
* to adjust those macros if you add fields to those structs.
*
* The max number of entries in each cache is DCH_CACHE_ENTRIES
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
* resp. NUM_CACHE_ENTRIES.
* ----------
*/
#define DCH_CACHE_OVERHEAD \
MAXALIGN(sizeof(bool) + sizeof(int))
#define NUM_CACHE_OVERHEAD \
MAXALIGN(sizeof(bool) + sizeof(int) + sizeof(NUMDesc))
#define DCH_CACHE_SIZE \
((2048 - DCH_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
#define NUM_CACHE_SIZE \
((1024 - NUM_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
#define DCH_CACHE_ENTRIES 20
#define NUM_CACHE_ENTRIES 20
typedef struct
{
FormatNode format[DCH_CACHE_SIZE + 1];
char str[DCH_CACHE_SIZE + 1];
bool std;
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
bool valid;
2001-03-22 05:01:46 +01:00
int age;
} DCHCacheEntry;
typedef struct
{
FormatNode format[NUM_CACHE_SIZE + 1];
char str[NUM_CACHE_SIZE + 1];
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
bool valid;
2001-03-22 05:01:46 +01:00
int age;
NUMDesc Num;
} NUMCacheEntry;
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
/* global cache for date/time format pictures */
static DCHCacheEntry *DCHCache[DCH_CACHE_ENTRIES];
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
static int n_DCHCache = 0; /* current number of entries */
static int DCHCounter = 0; /* aging-event counter */
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
/* global cache for number format pictures */
static NUMCacheEntry *NUMCache[NUM_CACHE_ENTRIES];
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
static int n_NUMCache = 0; /* current number of entries */
static int NUMCounter = 0; /* aging-event counter */
/* ----------
* For char->date/time conversion
* ----------
*/
2001-03-22 05:01:46 +01:00
typedef struct
{
FromCharDateMode mode;
2001-03-22 05:01:46 +01:00
int hh,
pm,
mi,
ss,
ssss,
d, /* stored as 1-7, Sunday = 1, 0 means missing */
2001-03-22 05:01:46 +01:00
dd,
ddd,
mm,
ms,
year,
2001-03-22 05:01:46 +01:00
bc,
ww,
w,
2001-03-22 05:01:46 +01:00
cc,
j,
us,
yysz, /* is it YY or YYYY ? */
clock, /* 12 or 24 hour clock? */
tzsign, /* +1, -1 or 0 if timezone info is absent */
tzh,
tzm,
ff; /* fractional precision */
} TmFromChar;
#define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar))
/* ----------
* Debug
* ----------
*/
#ifdef DEBUG_TO_FROM_CHAR
#define DEBUG_TMFC(_X) \
elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \
(_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \
(_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \
(_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \
(_X)->yysz, (_X)->clock)
#define DEBUG_TM(_X) \
elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\
(_X)->tm_sec, (_X)->tm_year,\
(_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\
(_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon)
#else
#define DEBUG_TMFC(_X)
#define DEBUG_TM(_X)
#endif
/* ----------
* Datetime to char conversion
* ----------
*/
typedef struct TmToChar
{
2004-08-29 07:07:03 +02:00
struct pg_tm tm; /* classic 'tm' struct */
Support alternate storage scheme of 64-bit integer for date/time types. Use "--enable-integer-datetimes" in configuration to use this rather than the original float8 storage. I would recommend the integer-based storage for any platform on which it is available. We perhaps should make this the default for the production release. Change timezone(timestamptz) results to return timestamp rather than a character string. Formerly, we didn't have a way to represent timestamps with an explicit time zone other than freezing the info into a string. Now, we can reasonably omit the explicit time zone from the result and return a timestamp with values appropriate for the specified time zone. Much cleaner, and if you need the time zone in the result you can put it into a character string pretty easily anyway. Allow fractional seconds in date/time types even for dates prior to 1BC. Limit timestamp data types to 6 decimal places of precision. Just right for a micro-second storage of int8 date/time types, and reduces the number of places ad-hoc rounding was occuring for the float8-based types. Use lookup tables for precision/rounding calculations for timestamp and interval types. Formerly used pow() to calculate the desired value but with a more limited range there is no reason to not type in a lookup table. Should be *much* better performance, though formerly there were some optimizations to help minimize the number of times pow() was called. Define a HAVE_INT64_TIMESTAMP variable. Based on the configure option "--enable-integer-datetimes" and the existing internal INT64_IS_BUSTED. Add explicit date/interval operators and functions for addition and subtraction. Formerly relied on implicit type promotion from date to timestamp with time zone. Change timezone conversion functions for the timetz type from "timetz()" to "timezone()". This is consistant with other time zone coersion functions for other types. Bump the catalog version to 200204201. Fix up regression tests to reflect changes in fractional seconds representation for date/times in BC eras. All regression tests pass on my Linux box.
2002-04-21 21:52:18 +02:00
fsec_t fsec; /* fractional seconds */
const char *tzn; /* timezone */
} TmToChar;
#define tmtcTm(_X) (&(_X)->tm)
#define tmtcTzn(_X) ((_X)->tzn)
#define tmtcFsec(_X) ((_X)->fsec)
#define ZERO_tm(_X) \
do { \
(_X)->tm_sec = (_X)->tm_year = (_X)->tm_min = (_X)->tm_wday = \
(_X)->tm_hour = (_X)->tm_yday = (_X)->tm_isdst = 0; \
(_X)->tm_mday = (_X)->tm_mon = 1; \
(_X)->tm_zone = NULL; \
} while(0)
#define ZERO_tmtc(_X) \
do { \
ZERO_tm( tmtcTm(_X) ); \
tmtcFsec(_X) = 0; \
tmtcTzn(_X) = NULL; \
} while(0)
/*
* to_char(time) appears to to_char() as an interval, so this check
* is really for interval and time data types.
*/
#define INVALID_FOR_INTERVAL \
do { \
if (is_interval) \
ereport(ERROR, \
(errcode(ERRCODE_INVALID_DATETIME_FORMAT), \
errmsg("invalid format specification for an interval value"), \
errhint("Intervals are not tied to specific calendar dates."))); \
} while(0)
2005-10-15 04:49:52 +02:00
/*****************************************************************************
* KeyWord definitions
*****************************************************************************/
/* ----------
* Suffixes (FormatNode.suffix is an OR of these codes)
* ----------
*/
#define DCH_S_FM 0x01
#define DCH_S_TH 0x02
#define DCH_S_th 0x04
#define DCH_S_SP 0x08
#define DCH_S_TM 0x10
/* ----------
* Suffix tests
* ----------
*/
#define S_THth(_s) ((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0)
#define S_TH(_s) (((_s) & DCH_S_TH) ? 1 : 0)
#define S_th(_s) (((_s) & DCH_S_th) ? 1 : 0)
#define S_TH_TYPE(_s) (((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER)
/* Oracle toggles FM behavior, we don't; see docs. */
#define S_FM(_s) (((_s) & DCH_S_FM) ? 1 : 0)
#define S_SP(_s) (((_s) & DCH_S_SP) ? 1 : 0)
#define S_TM(_s) (((_s) & DCH_S_TM) ? 1 : 0)
/* ----------
* Suffixes definition for DATE-TIME TO/FROM CHAR
* ----------
*/
#define TM_SUFFIX_LEN 2
static const KeySuffix DCH_suff[] = {
{"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX},
{"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX},
{"TM", TM_SUFFIX_LEN, DCH_S_TM, SUFFTYPE_PREFIX},
{"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX},
{"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX},
{"th", 2, DCH_S_th, SUFFTYPE_POSTFIX},
{"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX},
/* last */
{NULL, 0, 0, 0}
};
/* ----------
* Format-pictures (KeyWord).
*
* The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted
* complicated -to-> easy:
*
* (example: "DDD","DD","Day","D" )
*
* (this specific sort needs the algorithm for sequential search for strings,
* which not has exact end; -> How keyword is in "HH12blabla" ? - "HH"
* or "HH12"? You must first try "HH12", because "HH" is in string, but
* it is not good.
*
* (!)
* - Position for the keyword is similar as position in the enum DCH/NUM_poz.
* (!)
*
* For fast search is used the 'int index[]', index is ascii table from position
* 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII
* position or -1 if char is not used in the KeyWord. Search example for
* string "MM":
* 1) see in index to index['M' - 32],
* 2) take keywords position (enum DCH_MI) from index
* 3) run sequential search in keywords[] from this position
*
* ----------
*/
typedef enum
{
DCH_A_D,
DCH_A_M,
DCH_AD,
DCH_AM,
DCH_B_C,
DCH_BC,
DCH_CC,
DCH_DAY,
DCH_DDD,
DCH_DD,
DCH_DY,
DCH_Day,
DCH_Dy,
DCH_D,
DCH_FF1,
DCH_FF2,
DCH_FF3,
DCH_FF4,
DCH_FF5,
DCH_FF6,
DCH_FX, /* global suffix */
DCH_HH24,
DCH_HH12,
DCH_HH,
DCH_IDDD,
DCH_ID,
DCH_IW,
DCH_IYYY,
DCH_IYY,
DCH_IY,
DCH_I,
DCH_J,
DCH_MI,
DCH_MM,
DCH_MONTH,
DCH_MON,
DCH_MS,
DCH_Month,
DCH_Mon,
DCH_OF,
DCH_P_M,
DCH_PM,
DCH_Q,
DCH_RM,
DCH_SSSSS,
DCH_SSSS,
DCH_SS,
DCH_TZH,
DCH_TZM,
DCH_TZ,
DCH_US,
DCH_WW,
DCH_W,
DCH_Y_YYY,
DCH_YYYY,
DCH_YYY,
DCH_YY,
DCH_Y,
DCH_a_d,
DCH_a_m,
DCH_ad,
DCH_am,
DCH_b_c,
DCH_bc,
DCH_cc,
DCH_day,
DCH_ddd,
DCH_dd,
DCH_dy,
DCH_d,
DCH_ff1,
DCH_ff2,
DCH_ff3,
DCH_ff4,
DCH_ff5,
DCH_ff6,
DCH_fx,
DCH_hh24,
DCH_hh12,
DCH_hh,
DCH_iddd,
DCH_id,
DCH_iw,
DCH_iyyy,
DCH_iyy,
DCH_iy,
DCH_i,
DCH_j,
DCH_mi,
DCH_mm,
DCH_month,
DCH_mon,
DCH_ms,
DCH_p_m,
DCH_pm,
DCH_q,
DCH_rm,
DCH_sssss,
DCH_ssss,
DCH_ss,
DCH_tz,
DCH_us,
DCH_ww,
DCH_w,
DCH_y_yyy,
DCH_yyyy,
DCH_yyy,
DCH_yy,
DCH_y,
2000-04-07 21:17:51 +02:00
/* last */
_DCH_last_
2017-06-21 20:39:04 +02:00
} DCH_poz;
typedef enum
{
NUM_COMMA,
NUM_DEC,
NUM_0,
NUM_9,
NUM_B,
NUM_C,
NUM_D,
NUM_E,
NUM_FM,
NUM_G,
NUM_L,
NUM_MI,
NUM_PL,
NUM_PR,
NUM_RN,
NUM_SG,
NUM_SP,
NUM_S,
NUM_TH,
NUM_V,
NUM_b,
NUM_c,
NUM_d,
NUM_e,
NUM_fm,
NUM_g,
NUM_l,
NUM_mi,
NUM_pl,
NUM_pr,
NUM_rn,
NUM_sg,
NUM_sp,
NUM_s,
NUM_th,
NUM_v,
/* last */
_NUM_last_
2017-06-21 20:39:04 +02:00
} NUM_poz;
/* ----------
* KeyWords for DATE-TIME version
* ----------
*/
static const KeyWord DCH_keywords[] = {
/* name, len, id, is_digit, date_mode */
{"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE}, /* A */
{"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE},
{"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE},
{"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE},
{"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE}, /* B */
{"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE},
{"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* C */
{"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE}, /* D */
{"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
{"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
{"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE},
{"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE},
{"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE},
{"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
{"FF1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* F */
{"FF2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
{"FF3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
{"FF4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
{"FF5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
{"FF6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
{"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
{"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* H */
{"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
{"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
{"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* I */
{"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
{"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
{"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
{"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
{"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
{"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
{"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */
{"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* M */
{"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
{"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN},
{"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN},
{"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
{"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN},
{"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN},
{"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* O */
{"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE}, /* P */
{"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE},
{"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */
{"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */
{"SSSSS", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* S */
{"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
{"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
{"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* T */
{"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
{"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE},
{"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* U */
{"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* W */
{"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
{"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* Y */
{"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
{"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
{"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
{"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
{"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE}, /* a */
{"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE},
{"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE},
{"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE},
{"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE}, /* b */
{"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE},
{"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* c */
{"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE}, /* d */
{"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
{"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
{"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE},
{"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
{"ff1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* f */
{"ff2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
{"ff3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
{"ff4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
{"ff5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
{"ff6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
{"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
{"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* h */
{"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
{"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
{"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* i */
{"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
{"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
{"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
{"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
{"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
{"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
{"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */
{"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* m */
{"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
{"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN},
{"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN},
{"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
{"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE}, /* p */
{"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE},
{"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */
{"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */
{"sssss", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* s */
{"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
{"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
{"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE}, /* t */
{"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* u */
{"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* w */
{"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
{"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* y */
{"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
{"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
{"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
{"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
/* last */
{NULL, 0, 0, 0, 0}
};
/* ----------
* KeyWords for NUMBER version
*
* The is_digit and date_mode fields are not relevant here.
* ----------
*/
static const KeyWord NUM_keywords[] = {
/* name, len, id is in Index */
{",", 1, NUM_COMMA}, /* , */
{".", 1, NUM_DEC}, /* . */
{"0", 1, NUM_0}, /* 0 */
{"9", 1, NUM_9}, /* 9 */
{"B", 1, NUM_B}, /* B */
{"C", 1, NUM_C}, /* C */
{"D", 1, NUM_D}, /* D */
{"EEEE", 4, NUM_E}, /* E */
{"FM", 2, NUM_FM}, /* F */
{"G", 1, NUM_G}, /* G */
{"L", 1, NUM_L}, /* L */
{"MI", 2, NUM_MI}, /* M */
{"PL", 2, NUM_PL}, /* P */
{"PR", 2, NUM_PR},
{"RN", 2, NUM_RN}, /* R */
{"SG", 2, NUM_SG}, /* S */
{"SP", 2, NUM_SP},
{"S", 1, NUM_S},
{"TH", 2, NUM_TH}, /* T */
{"V", 1, NUM_V}, /* V */
{"b", 1, NUM_B}, /* b */
{"c", 1, NUM_C}, /* c */
{"d", 1, NUM_D}, /* d */
{"eeee", 4, NUM_E}, /* e */
{"fm", 2, NUM_FM}, /* f */
{"g", 1, NUM_G}, /* g */
{"l", 1, NUM_L}, /* l */
{"mi", 2, NUM_MI}, /* m */
{"pl", 2, NUM_PL}, /* p */
{"pr", 2, NUM_PR},
{"rn", 2, NUM_rn}, /* r */
{"sg", 2, NUM_SG}, /* s */
{"sp", 2, NUM_SP},
{"s", 1, NUM_S},
{"th", 2, NUM_th}, /* t */
{"v", 1, NUM_V}, /* v */
/* last */
{NULL, 0, 0}
};
/* ----------
* KeyWords index for DATE-TIME version
* ----------
*/
2005-10-15 04:49:52 +02:00
static const int DCH_index[KeyWord_INDEX_SIZE] = {
/*
0 1 2 3 4 5 6 7 8 9
*/
/*---- first 0..31 chars are skipped ----*/
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1,
DCH_FF1, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF,
DCH_P_M, DCH_Q, DCH_RM, DCH_SSSSS, DCH_TZH, DCH_US, -1, DCH_WW, -1, DCH_Y_YYY,
-1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc,
DCH_day, -1, DCH_ff1, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi,
-1, -1, DCH_p_m, DCH_q, DCH_rm, DCH_sssss, DCH_tz, DCH_us, -1, DCH_ww,
-1, DCH_y_yyy, -1, -1, -1, -1
/*---- chars over 126 are skipped ----*/
};
/* ----------
* KeyWords index for NUMBER version
* ----------
*/
2005-10-15 04:49:52 +02:00
static const int NUM_index[KeyWord_INDEX_SIZE] = {
/*
0 1 2 3 4 5 6 7 8 9
*/
/*---- first 0..31 chars are skipped ----*/
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1,
-1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1,
-1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E,
NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1,
NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c,
NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi,
-1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1,
-1, -1, -1, -1, -1, -1
/*---- chars over 126 are skipped ----*/
};
/* ----------
* Number processor struct
* ----------
*/
typedef struct NUMProc
{
bool is_to_char;
NUMDesc *Num; /* number description */
2001-03-22 05:01:46 +01:00
int sign, /* '-' or '+' */
sign_wrote, /* was sign write */
num_count, /* number of write digits */
num_in, /* is inside number */
num_curr, /* current position in number */
2015-05-24 03:35:49 +02:00
out_pre_spaces, /* spaces before first digit */
2001-03-22 05:01:46 +01:00
read_dec, /* to_number - was read dec. point */
read_post, /* to_number - number of dec. digit */
read_pre; /* to_number - number non-dec. digit */
2001-03-22 05:01:46 +01:00
char *number, /* string with number */
*number_p, /* pointer to current number position */
2001-03-22 05:01:46 +01:00
*inout, /* in / out buffer */
*inout_p, /* pointer to current inout position */
2005-10-15 04:49:52 +02:00
*last_relevant, /* last relevant number after decimal point */
2001-03-22 05:01:46 +01:00
*L_negative_sign, /* Locale */
2001-03-22 05:01:46 +01:00
*L_positive_sign,
*decimal,
*L_thousands_sep,
*L_currency_symbol;
} NUMProc;
/* Return flags for DCH_from_char() */
#define DCH_DATED 0x01
#define DCH_TIMED 0x02
#define DCH_ZONED 0x04
/* ----------
* Functions
* ----------
*/
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
static const KeyWord *index_seq_search(const char *str, const KeyWord *kw,
const int *index);
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type);
static bool is_separator_char(const char *str);
static void NUMDesc_prepare(NUMDesc *num, FormatNode *n);
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num);
static void DCH_to_char(FormatNode *node, bool is_interval,
TmToChar *in, char *out, Oid collid);
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
static void DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
Oid collid, bool std, bool *have_error);
#ifdef DEBUG_TO_FROM_CHAR
static void dump_index(const KeyWord *k, const int *index);
static void dump_node(FormatNode *node, int max);
#endif
static const char *get_th(char *num, int type);
static char *str_numth(char *dest, char *num, int type);
static int adjust_partial_year_to_2020(int year);
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
static int strspace_len(const char *str);
static void from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode,
bool *have_error);
static void from_char_set_int(int *dest, const int value, const FormatNode *node,
bool *have_error);
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
static int from_char_parse_int_len(int *dest, const char **src, const int len,
FormatNode *node, bool *have_error);
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
static int from_char_parse_int(int *dest, const char **src, FormatNode *node,
bool *have_error);
static int seq_search_ascii(const char *name, const char *const *array, int *len);
static int seq_search_localized(const char *name, char **array, int *len,
Oid collid);
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
static int from_char_seq_search(int *dest, const char **src,
const char *const *array,
char **localized_array, Oid collid,
FormatNode *node, bool *have_error);
static void do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
struct pg_tm *tm, fsec_t *fsec, int *fprec,
uint32 *flags, bool *have_error);
static char *fill_str(char *str, int c, int max);
static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
static char *int_to_roman(int number);
static void NUM_prepare_locale(NUMProc *Np);
static char *get_last_relevant_decnum(char *num);
static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len);
static void NUM_numpart_to_char(NUMProc *Np, int id);
static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
char *number, int input_len, int to_char_out_pre_spaces,
int sign, bool is_to_char, Oid collid);
static DCHCacheEntry *DCH_cache_getnew(const char *str, bool std);
static DCHCacheEntry *DCH_cache_search(const char *str, bool std);
static DCHCacheEntry *DCH_cache_fetch(const char *str, bool std);
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
static NUMCacheEntry *NUM_cache_getnew(const char *str);
static NUMCacheEntry *NUM_cache_search(const char *str);
static NUMCacheEntry *NUM_cache_fetch(const char *str);
/* ----------
* Fast sequential search, use index for data selection which
* go to seq. cycle (it is very fast for unwanted strings)
* (can't be used binary search in format parsing)
* ----------
*/
static const KeyWord *
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
index_seq_search(const char *str, const KeyWord *kw, const int *index)
{
int poz;
if (!KeyWord_INDEX_FILTER(*str))
return NULL;
if ((poz = *(index + (*str - ' '))) > -1)
{
const KeyWord *k = kw + poz;
do
{
if (strncmp(str, k->name, k->len) == 0)
return k;
k++;
if (!k->name)
return NULL;
} while (*str == *k->name);
}
return NULL;
}
static const KeySuffix *
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
suff_search(const char *str, const KeySuffix *suf, int type)
{
const KeySuffix *s;
for (s = suf; s->name != NULL; s++)
{
if (s->type != type)
continue;
if (strncmp(str, s->name, s->len) == 0)
return s;
}
return NULL;
}
static bool
is_separator_char(const char *str)
{
/* ASCII printable character, but not letter or digit */
return (*str > 0x20 && *str < 0x7F &&
!(*str >= 'A' && *str <= 'Z') &&
!(*str >= 'a' && *str <= 'z') &&
!(*str >= '0' && *str <= '9'));
}
/* ----------
* Prepare NUMDesc (number description struct) via FormatNode struct
* ----------
*/
static void
NUMDesc_prepare(NUMDesc *num, FormatNode *n)
{
if (n->type != NODE_TYPE_ACTION)
return;
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
if (IS_EEEE(num) && n->key->id != NUM_E)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("\"EEEE\" must be the last pattern used")));
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
switch (n->key->id)
{
case NUM_9:
if (IS_BRACKET(num))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("\"9\" must be ahead of \"PR\"")));
if (IS_MULTI(num))
{
++num->multi;
2010-02-26 03:01:40 +01:00
break;
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
}
if (IS_DECIMAL(num))
++num->post;
else
++num->pre;
break;
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
case NUM_0:
if (IS_BRACKET(num))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("\"0\" must be ahead of \"PR\"")));
if (!IS_ZERO(num) && !IS_DECIMAL(num))
{
num->flag |= NUM_F_ZERO;
num->zero_start = num->pre + 1;
}
if (!IS_DECIMAL(num))
++num->pre;
else
++num->post;
num->zero_end = num->pre + num->post;
break;
case NUM_B:
if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num)))
num->flag |= NUM_F_BLANK;
break;
case NUM_D:
num->flag |= NUM_F_LDECIMAL;
num->need_locale = true;
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
/* FALLTHROUGH */
case NUM_DEC:
if (IS_DECIMAL(num))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("multiple decimal points")));
if (IS_MULTI(num))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("cannot use \"V\" and decimal point together")));
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
num->flag |= NUM_F_DECIMAL;
break;
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
case NUM_FM:
num->flag |= NUM_F_FILLMODE;
break;
2010-02-26 03:01:40 +01:00
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
case NUM_S:
if (IS_LSIGN(num))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("cannot use \"S\" twice")));
if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together")));
if (!IS_DECIMAL(num))
{
num->lsign = NUM_LSIGN_PRE;
num->pre_lsign_num = num->pre;
num->need_locale = true;
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
num->flag |= NUM_F_LSIGN;
}
else if (num->lsign == NUM_LSIGN_NONE)
{
num->lsign = NUM_LSIGN_POST;
num->need_locale = true;
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
num->flag |= NUM_F_LSIGN;
}
break;
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
case NUM_MI:
if (IS_LSIGN(num))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("cannot use \"S\" and \"MI\" together")));
num->flag |= NUM_F_MINUS;
if (IS_DECIMAL(num))
num->flag |= NUM_F_MINUS_POST;
break;
case NUM_PL:
if (IS_LSIGN(num))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("cannot use \"S\" and \"PL\" together")));
num->flag |= NUM_F_PLUS;
if (IS_DECIMAL(num))
num->flag |= NUM_F_PLUS_POST;
break;
case NUM_SG:
if (IS_LSIGN(num))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("cannot use \"S\" and \"SG\" together")));
num->flag |= NUM_F_MINUS;
num->flag |= NUM_F_PLUS;
break;
case NUM_PR:
if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together")));
num->flag |= NUM_F_BRACKET;
break;
case NUM_rn:
case NUM_RN:
num->flag |= NUM_F_ROMAN;
break;
case NUM_L:
case NUM_G:
num->need_locale = true;
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
break;
case NUM_V:
if (IS_DECIMAL(num))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("cannot use \"V\" and decimal point together")));
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
num->flag |= NUM_F_MULTI;
break;
case NUM_E:
if (IS_EEEE(num))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("cannot use \"EEEE\" twice")));
if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) ||
IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) ||
IS_ROMAN(num) || IS_MULTI(num))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("\"EEEE\" is incompatible with other formats"),
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
errdetail("\"EEEE\" may only be used together with digit and decimal point patterns.")));
num->flag |= NUM_F_EEEE;
break;
}
}
/* ----------
* Format parser, search small keywords and keyword's suffixes, and make
* format-node tree.
*
* for DATE-TIME & NUMBER version
* ----------
*/
static void
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
parse_format(FormatNode *node, const char *str, const KeyWord *kw,
const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num)
{
2001-03-22 05:01:46 +01:00
FormatNode *n;
#ifdef DEBUG_TO_FROM_CHAR
elog(DEBUG_elog_output, "to_char/number(): run parser");
#endif
n = node;
while (*str)
{
2017-11-18 18:16:37 +01:00
int suffix = 0;
const KeySuffix *s;
/*
* Prefix
*/
if ((flags & DCH_FLAG) &&
2017-11-18 18:16:37 +01:00
(s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
{
suffix |= s->id;
if (s->len)
str += s->len;
}
/*
* Keyword
*/
if (*str && (n->key = index_seq_search(str, kw, index)) != NULL)
{
n->type = NODE_TYPE_ACTION;
2017-11-18 18:16:37 +01:00
n->suffix = suffix;
if (n->key->len)
str += n->key->len;
/*
* NUM version: Prepare global NUMDesc struct
*/
if (flags & NUM_FLAG)
NUMDesc_prepare(Num, n);
/*
* Postfix
*/
if ((flags & DCH_FLAG) && *str &&
2017-11-18 18:16:37 +01:00
(s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
{
2017-11-18 18:16:37 +01:00
n->suffix |= s->id;
if (s->len)
str += s->len;
}
2017-11-18 18:16:37 +01:00
n++;
}
else if (*str)
{
int chlen;
if (flags & STD_FLAG)
{
/*
* Standard mode, allow only following separators: "-./,':; "
*/
if (strchr("-./,':; ", *str) == NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("invalid datetime format separator: \"%s\"",
pnstrdup(str, pg_mblen(str)))));
if (*str == ' ')
n->type = NODE_TYPE_SPACE;
else
n->type = NODE_TYPE_SEPARATOR;
n->character[0] = *str;
n->character[1] = '\0';
n->key = NULL;
n->suffix = 0;
n++;
str++;
}
else if (*str == '"')
{
/*
* Process double-quoted literal string, if any
*/
str++;
while (*str)
{
2017-11-18 18:16:37 +01:00
if (*str == '"')
{
str++;
break;
}
2017-11-18 18:16:37 +01:00
/* backslash quotes the next character, if any */
if (*str == '\\' && *(str + 1))
str++;
chlen = pg_mblen(str);
n->type = NODE_TYPE_CHAR;
memcpy(n->character, str, chlen);
n->character[chlen] = '\0';
n->key = NULL;
n->suffix = 0;
2017-11-18 18:16:37 +01:00
n++;
str += chlen;
}
}
2017-11-18 18:16:37 +01:00
else
{
2017-11-18 18:16:37 +01:00
/*
* Outside double-quoted strings, backslash is only special if
* it immediately precedes a double quote.
*/
if (*str == '\\' && *(str + 1) == '"')
str++;
chlen = pg_mblen(str);
if ((flags & DCH_FLAG) && is_separator_char(str))
n->type = NODE_TYPE_SEPARATOR;
else if (isspace((unsigned char) *str))
n->type = NODE_TYPE_SPACE;
else
n->type = NODE_TYPE_CHAR;
memcpy(n->character, str, chlen);
n->character[chlen] = '\0';
n->key = NULL;
2017-11-18 18:16:37 +01:00
n->suffix = 0;
n++;
str += chlen;
}
}
}
n->type = NODE_TYPE_END;
n->suffix = 0;
}
/* ----------
* DEBUG: Dump the FormatNode Tree (debug)
* ----------
*/
#ifdef DEBUG_TO_FROM_CHAR
#define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " "))
#define DUMP_FM(_suf) (S_FM(_suf) ? "FM" : " ")
static void
dump_node(FormatNode *node, int max)
{
FormatNode *n;
int a;
elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT");
for (a = 0, n = node; a <= max; n++, a++)
{
if (n->type == NODE_TYPE_ACTION)
elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)",
2005-10-15 04:49:52 +02:00
a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix));
else if (n->type == NODE_TYPE_CHAR)
elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'",
a, n->character);
else if (n->type == NODE_TYPE_END)
{
elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a);
return;
}
else
elog(DEBUG_elog_output, "%d:\t unknown NODE!", a);
}
}
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
#endif /* DEBUG */
/*****************************************************************************
* Private utils
*****************************************************************************/
/* ----------
* Return ST/ND/RD/TH for simple (1..9) numbers
* type --> 0 upper, 1 lower
* ----------
*/
static const char *
get_th(char *num, int type)
{
2001-03-22 05:01:46 +01:00
int len = strlen(num),
last,
seclast;
last = *(num + (len - 1));
if (!isdigit((unsigned char) last))
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("\"%s\" is not a number", num)));
2000-07-01 23:27:14 +02:00
/*
2001-03-22 05:01:46 +01:00
* All "teens" (<x>1[0-9]) get 'TH/th', while <x>[02-9][123] still get
* 'ST/st', 'ND/nd', 'RD/rd', respectively
2000-07-01 23:27:14 +02:00
*/
2001-03-22 05:01:46 +01:00
if ((len > 1) && ((seclast = num[len - 2]) == '1'))
last = 0;
switch (last)
{
case '1':
if (type == TH_UPPER)
return numTH[0];
return numth[0];
case '2':
if (type == TH_UPPER)
return numTH[1];
return numth[1];
case '3':
if (type == TH_UPPER)
return numTH[2];
return numth[2];
default:
if (type == TH_UPPER)
return numTH[3];
return numth[3];
}
}
/* ----------
* Convert string-number to ordinal string-number
* type --> 0 upper, 1 lower
* ----------
*/
static char *
str_numth(char *dest, char *num, int type)
{
if (dest != num)
strcpy(dest, num);
strcat(dest, get_th(num, type));
return dest;
}
/*****************************************************************************
* upper/lower/initcap functions
*****************************************************************************/
#ifdef USE_ICU
typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
2017-06-21 20:39:04 +02:00
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode);
static int32_t
icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
UChar **buff_dest, UChar *buff_source, int32_t len_source)
{
UErrorCode status;
int32_t len_dest;
len_dest = len_source; /* try first with same length */
*buff_dest = palloc(len_dest * sizeof(**buff_dest));
status = U_ZERO_ERROR;
len_dest = func(*buff_dest, len_dest, buff_source, len_source,
mylocale->info.icu.locale, &status);
if (status == U_BUFFER_OVERFLOW_ERROR)
{
/* try again with adjusted length */
pfree(*buff_dest);
*buff_dest = palloc(len_dest * sizeof(**buff_dest));
status = U_ZERO_ERROR;
len_dest = func(*buff_dest, len_dest, buff_source, len_source,
mylocale->info.icu.locale, &status);
}
if (U_FAILURE(status))
ereport(ERROR,
(errmsg("case conversion failed: %s", u_errorName(status))));
return len_dest;
}
static int32_t
u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode)
{
return u_strToTitle(dest, destCapacity, src, srcLength,
NULL, locale, pErrorCode);
}
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
#endif /* USE_ICU */
/*
* If the system provides the needed functions for wide-character manipulation
* (which are all standardized by C99), then we implement upper/lower/initcap
* using wide-character functions, if necessary. Otherwise we use the
* traditional <ctype.h> functions, which of course will not work as desired
* in multibyte character sets. Note that in either case we are effectively
* assuming that the database character encoding matches the encoding implied
* by LC_CTYPE.
*
* If the system provides locale_t and associated functions (which are
* standardized by Open Group's XBD), we can support collations that are
* neither default nor C. The code is written to handle both combinations
* of have-wide-characters and have-locale_t, though it's rather unlikely
* a platform would have the latter without the former.
*/
/*
* collation-aware, wide-character-aware lower function
*
* We pass the number of bytes so we can pass varlena and char*
* to this function. The result is a palloc'd, null-terminated string.
*/
char *
str_tolower(const char *buff, size_t nbytes, Oid collid)
{
char *result;
if (!buff)
return NULL;
/* C/POSIX collations use this path regardless of database encoding */
if (lc_ctype_is_c(collid))
{
result = asc_tolower(buff, nbytes);
}
else
{
2011-04-10 17:42:00 +02:00
pg_locale_t mylocale = 0;
if (collid != DEFAULT_COLLATION_OID)
{
if (!OidIsValid(collid))
{
/*
* This typically means that the parser could not resolve a
* conflict of implicit collations, so report it that way.
*/
ereport(ERROR,
(errcode(ERRCODE_INDETERMINATE_COLLATION),
errmsg("could not determine which collation to use for %s function",
"lower()"),
errhint("Use the COLLATE clause to set the collation explicitly.")));
}
mylocale = pg_newlocale_from_collation(collid);
}
#ifdef USE_ICU
if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
{
int32_t len_uchar;
int32_t len_conv;
UChar *buff_uchar;
UChar *buff_conv;
len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
len_conv = icu_convert_case(u_strToLower, mylocale,
&buff_conv, buff_uchar, len_uchar);
icu_from_uchar(&result, buff_conv, len_conv);
Fix memory leakage in ICU encoding conversion, and other code review. Callers of icu_to_uchar() neglected to pfree the result string when done with it. This results in catastrophic memory leaks in varstr_cmp(), because of our prevailing assumption that btree comparison functions don't leak memory. For safety, make all the call sites clean up leaks, though I suspect that we could get away without it in formatting.c. I audited callers of icu_from_uchar() as well, but found no places that seemed to have a comparable issue. Add function API specifications for icu_to_uchar() and icu_from_uchar(); the lack of any thought-through specification is perhaps not unrelated to the existence of this bug in the first place. Fix icu_to_uchar() to guarantee a nul-terminated result; although no existing caller appears to care, the fact that it would have been nul-terminated except in extreme corner cases seems ideally designed to bite someone on the rear someday. Fix ucnv_fromUChars() destCapacity argument --- in the worst case, that could perhaps have led to a non-nul-terminated result, too. Fix icu_from_uchar() to have a more reasonable definition of the function result --- no callers are actually paying attention, so this isn't a live bug, but it's certainly sloppily designed. Const-ify icu_from_uchar()'s input string for consistency. That is not the end of what needs to be done to these functions, but it's as much as I have the patience for right now. Discussion: https://postgr.es/m/1955.1498181798@sss.pgh.pa.us
2017-06-23 18:22:06 +02:00
pfree(buff_uchar);
pfree(buff_conv);
}
else
#endif
{
if (pg_database_encoding_max_length() > 1)
{
wchar_t *workspace;
size_t curr_char;
size_t result_size;
/* Overflow paranoia */
if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
/* Output workspace cannot have more codes than input bytes */
workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
{
#ifdef HAVE_LOCALE_T
if (mylocale)
workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
else
#endif
workspace[curr_char] = towlower(workspace[curr_char]);
}
/*
* Make result large enough; case change might change number
* of bytes
*/
result_size = curr_char * pg_database_encoding_max_length() + 1;
result = palloc(result_size);
wchar2char(result, workspace, result_size, mylocale);
pfree(workspace);
}
else
{
char *p;
result = pnstrdup(buff, nbytes);
/*
* Note: we assume that tolower_l() will not be so broken as
* to need an isupper_l() guard test. When using the default
* collation, we apply the traditional Postgres behavior that
* forces ASCII-style treatment of I/i, but in non-default
* collations you get exactly what the collation says.
*/
for (p = result; *p; p++)
{
#ifdef HAVE_LOCALE_T
if (mylocale)
*p = tolower_l((unsigned char) *p, mylocale->info.lt);
else
#endif
*p = pg_tolower((unsigned char) *p);
}
}
}
}
return result;
}
/*
* collation-aware, wide-character-aware upper function
*
* We pass the number of bytes so we can pass varlena and char*
* to this function. The result is a palloc'd, null-terminated string.
*/
char *
str_toupper(const char *buff, size_t nbytes, Oid collid)
{
char *result;
if (!buff)
return NULL;
/* C/POSIX collations use this path regardless of database encoding */
if (lc_ctype_is_c(collid))
{
result = asc_toupper(buff, nbytes);
}
else
{
2011-04-10 17:42:00 +02:00
pg_locale_t mylocale = 0;
if (collid != DEFAULT_COLLATION_OID)
{
if (!OidIsValid(collid))
{
/*
* This typically means that the parser could not resolve a
* conflict of implicit collations, so report it that way.
*/
ereport(ERROR,
(errcode(ERRCODE_INDETERMINATE_COLLATION),
errmsg("could not determine which collation to use for %s function",
"upper()"),
errhint("Use the COLLATE clause to set the collation explicitly.")));
}
mylocale = pg_newlocale_from_collation(collid);
}
#ifdef USE_ICU
if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
{
int32_t len_uchar,
len_conv;
UChar *buff_uchar;
UChar *buff_conv;
len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
len_conv = icu_convert_case(u_strToUpper, mylocale,
&buff_conv, buff_uchar, len_uchar);
icu_from_uchar(&result, buff_conv, len_conv);
Fix memory leakage in ICU encoding conversion, and other code review. Callers of icu_to_uchar() neglected to pfree the result string when done with it. This results in catastrophic memory leaks in varstr_cmp(), because of our prevailing assumption that btree comparison functions don't leak memory. For safety, make all the call sites clean up leaks, though I suspect that we could get away without it in formatting.c. I audited callers of icu_from_uchar() as well, but found no places that seemed to have a comparable issue. Add function API specifications for icu_to_uchar() and icu_from_uchar(); the lack of any thought-through specification is perhaps not unrelated to the existence of this bug in the first place. Fix icu_to_uchar() to guarantee a nul-terminated result; although no existing caller appears to care, the fact that it would have been nul-terminated except in extreme corner cases seems ideally designed to bite someone on the rear someday. Fix ucnv_fromUChars() destCapacity argument --- in the worst case, that could perhaps have led to a non-nul-terminated result, too. Fix icu_from_uchar() to have a more reasonable definition of the function result --- no callers are actually paying attention, so this isn't a live bug, but it's certainly sloppily designed. Const-ify icu_from_uchar()'s input string for consistency. That is not the end of what needs to be done to these functions, but it's as much as I have the patience for right now. Discussion: https://postgr.es/m/1955.1498181798@sss.pgh.pa.us
2017-06-23 18:22:06 +02:00
pfree(buff_uchar);
pfree(buff_conv);
}
else
#endif
{
if (pg_database_encoding_max_length() > 1)
{
wchar_t *workspace;
size_t curr_char;
size_t result_size;
/* Overflow paranoia */
if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
/* Output workspace cannot have more codes than input bytes */
workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
{
#ifdef HAVE_LOCALE_T
if (mylocale)
workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
else
#endif
workspace[curr_char] = towupper(workspace[curr_char]);
}
/*
* Make result large enough; case change might change number
* of bytes
*/
result_size = curr_char * pg_database_encoding_max_length() + 1;
result = palloc(result_size);
wchar2char(result, workspace, result_size, mylocale);
pfree(workspace);
}
else
{
char *p;
result = pnstrdup(buff, nbytes);
/*
* Note: we assume that toupper_l() will not be so broken as
* to need an islower_l() guard test. When using the default
* collation, we apply the traditional Postgres behavior that
* forces ASCII-style treatment of I/i, but in non-default
* collations you get exactly what the collation says.
*/
for (p = result; *p; p++)
{
#ifdef HAVE_LOCALE_T
if (mylocale)
*p = toupper_l((unsigned char) *p, mylocale->info.lt);
else
#endif
*p = pg_toupper((unsigned char) *p);
}
}
}
}
return result;
}
/*
* collation-aware, wide-character-aware initcap function
*
* We pass the number of bytes so we can pass varlena and char*
* to this function. The result is a palloc'd, null-terminated string.
*/
char *
str_initcap(const char *buff, size_t nbytes, Oid collid)
{
char *result;
int wasalnum = false;
if (!buff)
return NULL;
/* C/POSIX collations use this path regardless of database encoding */
if (lc_ctype_is_c(collid))
{
result = asc_initcap(buff, nbytes);
}
else
{
2011-04-10 17:42:00 +02:00
pg_locale_t mylocale = 0;
if (collid != DEFAULT_COLLATION_OID)
{
if (!OidIsValid(collid))
{
/*
* This typically means that the parser could not resolve a
* conflict of implicit collations, so report it that way.
*/
ereport(ERROR,
(errcode(ERRCODE_INDETERMINATE_COLLATION),
errmsg("could not determine which collation to use for %s function",
"initcap()"),
errhint("Use the COLLATE clause to set the collation explicitly.")));
}
mylocale = pg_newlocale_from_collation(collid);
}
#ifdef USE_ICU
if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
{
int32_t len_uchar,
len_conv;
UChar *buff_uchar;
UChar *buff_conv;
len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
&buff_conv, buff_uchar, len_uchar);
icu_from_uchar(&result, buff_conv, len_conv);
Fix memory leakage in ICU encoding conversion, and other code review. Callers of icu_to_uchar() neglected to pfree the result string when done with it. This results in catastrophic memory leaks in varstr_cmp(), because of our prevailing assumption that btree comparison functions don't leak memory. For safety, make all the call sites clean up leaks, though I suspect that we could get away without it in formatting.c. I audited callers of icu_from_uchar() as well, but found no places that seemed to have a comparable issue. Add function API specifications for icu_to_uchar() and icu_from_uchar(); the lack of any thought-through specification is perhaps not unrelated to the existence of this bug in the first place. Fix icu_to_uchar() to guarantee a nul-terminated result; although no existing caller appears to care, the fact that it would have been nul-terminated except in extreme corner cases seems ideally designed to bite someone on the rear someday. Fix ucnv_fromUChars() destCapacity argument --- in the worst case, that could perhaps have led to a non-nul-terminated result, too. Fix icu_from_uchar() to have a more reasonable definition of the function result --- no callers are actually paying attention, so this isn't a live bug, but it's certainly sloppily designed. Const-ify icu_from_uchar()'s input string for consistency. That is not the end of what needs to be done to these functions, but it's as much as I have the patience for right now. Discussion: https://postgr.es/m/1955.1498181798@sss.pgh.pa.us
2017-06-23 18:22:06 +02:00
pfree(buff_uchar);
pfree(buff_conv);
}
else
#endif
{
if (pg_database_encoding_max_length() > 1)
{
wchar_t *workspace;
size_t curr_char;
size_t result_size;
/* Overflow paranoia */
if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
/* Output workspace cannot have more codes than input bytes */
workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
{
#ifdef HAVE_LOCALE_T
if (mylocale)
{
if (wasalnum)
workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
else
workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
}
else
#endif
{
if (wasalnum)
workspace[curr_char] = towlower(workspace[curr_char]);
else
workspace[curr_char] = towupper(workspace[curr_char]);
wasalnum = iswalnum(workspace[curr_char]);
}
}
/*
* Make result large enough; case change might change number
* of bytes
*/
result_size = curr_char * pg_database_encoding_max_length() + 1;
result = palloc(result_size);
wchar2char(result, workspace, result_size, mylocale);
pfree(workspace);
}
else
{
char *p;
result = pnstrdup(buff, nbytes);
/*
* Note: we assume that toupper_l()/tolower_l() will not be so
* broken as to need guard tests. When using the default
* collation, we apply the traditional Postgres behavior that
* forces ASCII-style treatment of I/i, but in non-default
* collations you get exactly what the collation says.
*/
for (p = result; *p; p++)
{
#ifdef HAVE_LOCALE_T
if (mylocale)
{
if (wasalnum)
*p = tolower_l((unsigned char) *p, mylocale->info.lt);
else
*p = toupper_l((unsigned char) *p, mylocale->info.lt);
wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
}
else
#endif
{
if (wasalnum)
*p = pg_tolower((unsigned char) *p);
else
*p = pg_toupper((unsigned char) *p);
wasalnum = isalnum((unsigned char) *p);
}
}
}
}
}
return result;
}
/*
* ASCII-only lower function
*
* We pass the number of bytes so we can pass varlena and char*
* to this function. The result is a palloc'd, null-terminated string.
*/
char *
asc_tolower(const char *buff, size_t nbytes)
{
char *result;
char *p;
if (!buff)
return NULL;
result = pnstrdup(buff, nbytes);
for (p = result; *p; p++)
*p = pg_ascii_tolower((unsigned char) *p);
return result;
}
/*
* ASCII-only upper function
*
* We pass the number of bytes so we can pass varlena and char*
* to this function. The result is a palloc'd, null-terminated string.
*/
char *
asc_toupper(const char *buff, size_t nbytes)
{
char *result;
char *p;
if (!buff)
return NULL;
result = pnstrdup(buff, nbytes);
for (p = result; *p; p++)
*p = pg_ascii_toupper((unsigned char) *p);
return result;
}
/*
* ASCII-only initcap function
*
* We pass the number of bytes so we can pass varlena and char*
* to this function. The result is a palloc'd, null-terminated string.
*/
char *
asc_initcap(const char *buff, size_t nbytes)
{
char *result;
char *p;
int wasalnum = false;
if (!buff)
return NULL;
result = pnstrdup(buff, nbytes);
for (p = result; *p; p++)
{
char c;
if (wasalnum)
*p = c = pg_ascii_tolower((unsigned char) *p);
else
*p = c = pg_ascii_toupper((unsigned char) *p);
/* we don't trust isalnum() here */
wasalnum = ((c >= 'A' && c <= 'Z') ||
(c >= 'a' && c <= 'z') ||
(c >= '0' && c <= '9'));
}
return result;
}
/* convenience routines for when the input is null-terminated */
static char *
str_tolower_z(const char *buff, Oid collid)
{
return str_tolower(buff, strlen(buff), collid);
}
static char *
str_toupper_z(const char *buff, Oid collid)
{
return str_toupper(buff, strlen(buff), collid);
}
static char *
str_initcap_z(const char *buff, Oid collid)
{
return str_initcap(buff, strlen(buff), collid);
}
static char *
asc_tolower_z(const char *buff)
{
return asc_tolower(buff, strlen(buff));
}
static char *
asc_toupper_z(const char *buff)
{
return asc_toupper(buff, strlen(buff));
}
/* asc_initcap_z is not currently needed */
/* ----------
* Skip TM / th in FROM_CHAR
*
* If S_THth is on, skip two chars, assuming there are two available
* ----------
*/
#define SKIP_THth(ptr, _suf) \
do { \
if (S_THth(_suf)) \
{ \
if (*(ptr)) (ptr) += pg_mblen(ptr); \
if (*(ptr)) (ptr) += pg_mblen(ptr); \
} \
} while (0)
#ifdef DEBUG_TO_FROM_CHAR
/* -----------
* DEBUG: Call for debug and for index checking; (Show ASCII char
* and defined keyword for each used position
* ----------
*/
static void
dump_index(const KeyWord *k, const int *index)
{
2001-03-22 05:01:46 +01:00
int i,
count = 0,
free_i = 0;
elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:");
for (i = 0; i < KeyWord_INDEX_SIZE; i++)
{
if (index[i] != -1)
{
elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name);
count++;
}
else
{
free_i++;
elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]);
}
}
elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d",
count, free_i);
}
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
#endif /* DEBUG */
/* ----------
* Return true if next format picture is not digit value
* ----------
*/
static bool
is_next_separator(FormatNode *n)
{
if (n->type == NODE_TYPE_END)
return false;
2001-03-22 05:01:46 +01:00
if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix))
return true;
2001-03-22 05:01:46 +01:00
/*
* Next node
*/
2001-03-22 05:01:46 +01:00
n++;
/* end of format string is treated like a non-digit separator */
if (n->type == NODE_TYPE_END)
return true;
2001-03-22 05:01:46 +01:00
if (n->type == NODE_TYPE_ACTION)
{
if (n->key->is_digit)
return false;
2001-03-22 05:01:46 +01:00
return true;
2001-03-22 05:01:46 +01:00
}
else if (n->character[1] == '\0' &&
isdigit((unsigned char) n->character[0]))
return false;
2001-03-22 05:01:46 +01:00
return true; /* some non-digit input (separator) */
}
static int
adjust_partial_year_to_2020(int year)
{
/*
* Adjust all dates toward 2020; this is effectively what happens when we
* assume '70' is 1970 and '69' is 2069.
*/
/* Force 0-69 into the 2000's */
if (year < 70)
return year + 2000;
/* Force 70-99 into the 1900's */
else if (year < 100)
return year + 1900;
/* Force 100-519 into the 2000's */
else if (year < 520)
return year + 2000;
/* Force 520-999 into the 1000's */
else if (year < 1000)
return year + 1000;
else
return year;
}
static int
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
strspace_len(const char *str)
{
int len = 0;
while (*str && isspace((unsigned char) *str))
{
str++;
len++;
}
return len;
}
/*
* Set the date mode of a from-char conversion.
*
* Puke if the date mode has already been set, and the caller attempts to set
* it to a conflicting mode.
*
* If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
*/
static void
from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode, bool *have_error)
{
if (mode != FROM_CHAR_DATE_NONE)
{
if (tmfc->mode == FROM_CHAR_DATE_NONE)
tmfc->mode = mode;
else if (tmfc->mode != mode)
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("invalid combination of date conventions"),
errhint("Do not mix Gregorian and ISO week date "
"conventions in a formatting template."))));
}
on_error:
return;
}
/*
* Set the integer pointed to by 'dest' to the given value.
*
* Puke if the destination integer has previously been set to some other
* non-zero value.
*
* If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
*/
static void
from_char_set_int(int *dest, const int value, const FormatNode *node,
bool *have_error)
{
if (*dest != 0 && *dest != value)
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("conflicting values for \"%s\" field in "
"formatting string",
node->key->name),
errdetail("This value contradicts a previous setting "
"for the same field type."))));
*dest = value;
on_error:
return;
}
/*
* Read a single integer from the source string, into the int pointed to by
* 'dest'. If 'dest' is NULL, the result is discarded.
*
* In fixed-width mode (the node does not have the FM suffix), consume at most
* 'len' characters. However, any leading whitespace isn't counted in 'len'.
*
* We use strtol() to recover the integer value from the source string, in
* accordance with the given FormatNode.
*
* If the conversion completes successfully, src will have been advanced to
* point at the character immediately following the last character used in the
* conversion.
*
* Return the number of characters consumed.
*
* Note that from_char_parse_int() provides a more convenient wrapper where
* the length of the field is the same as the length of the format keyword (as
* with DD and MI).
*
* If 'have_error' is NULL, then errors are thrown, else '*have_error' is set
* and -1 is returned.
*/
static int
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
from_char_parse_int_len(int *dest, const char **src, const int len, FormatNode *node,
bool *have_error)
{
long result;
char copy[DCH_MAX_ITEM_SIZ + 1];
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
const char *init = *src;
int used;
/*
* Skip any whitespace before parsing the integer.
*/
*src += strspace_len(*src);
Assert(len <= DCH_MAX_ITEM_SIZ);
used = (int) strlcpy(copy, *src, len + 1);
if (S_FM(node->suffix) || is_next_separator(node))
{
/*
* This node is in Fill Mode, or the next node is known to be a
* non-digit value, so we just slurp as many characters as we can get.
*/
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
char *endptr;
errno = 0;
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
result = strtol(init, &endptr, 10);
*src = endptr;
}
else
{
/*
* We need to pull exactly the number of characters given in 'len' out
* of the string, and convert those.
*/
char *last;
if (used < len)
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("source string too short for \"%s\" "
"formatting field",
node->key->name),
errdetail("Field requires %d characters, "
"but only %d remain.",
len, used),
errhint("If your source string is not fixed-width, "
"try using the \"FM\" modifier."))));
errno = 0;
result = strtol(copy, &last, 10);
used = last - copy;
if (used > 0 && used < len)
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("invalid value \"%s\" for \"%s\"",
copy, node->key->name),
errdetail("Field requires %d characters, "
"but only %d could be parsed.",
len, used),
errhint("If your source string is not fixed-width, "
"try using the \"FM\" modifier."))));
*src += used;
}
if (*src == init)
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("invalid value \"%s\" for \"%s\"",
copy, node->key->name),
errdetail("Value must be an integer."))));
if (errno == ERANGE || result < INT_MIN || result > INT_MAX)
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("value for \"%s\" in source string is out of range",
node->key->name),
errdetail("Value must be in the range %d to %d.",
INT_MIN, INT_MAX))));
if (dest != NULL)
{
from_char_set_int(dest, (int) result, node, have_error);
CHECK_ERROR;
}
return *src - init;
on_error:
return -1;
}
/*
* Call from_char_parse_int_len(), using the length of the format keyword as
* the expected length of the field.
*
* Don't call this function if the field differs in length from the format
* keyword (as with HH24; the keyword length is 4, but the field length is 2).
* In such cases, call from_char_parse_int_len() instead to specify the
* required length explicitly.
*/
static int
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
from_char_parse_int(int *dest, const char **src, FormatNode *node, bool *have_error)
{
return from_char_parse_int_len(dest, src, node->key->len, node, have_error);
}
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
/*
* Sequentially search null-terminated "array" for a case-insensitive match
* to the initial character(s) of "name".
*
* Returns array index of match, or -1 for no match.
*
* *len is set to the length of the match, or 0 for no match.
*
* Case-insensitivity is defined per pg_ascii_tolower, so this is only
* suitable for comparisons to ASCII strings.
*/
static int
seq_search_ascii(const char *name, const char *const *array, int *len)
{
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
unsigned char firstc;
2017-06-21 20:39:04 +02:00
const char *const *a;
*len = 0;
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
/* empty string can't match anything */
if (!*name)
return -1;
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
/* we handle first char specially to gain some speed */
firstc = pg_ascii_tolower((unsigned char) *name);
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
for (a = array; *a != NULL; a++)
{
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
const char *p;
const char *n;
/* compare first chars */
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
if (pg_ascii_tolower((unsigned char) **a) != firstc)
continue;
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
/* compare rest of string */
for (p = *a + 1, n = name + 1;; p++, n++)
{
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
/* return success if we matched whole array entry */
if (*p == '\0')
{
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
*len = n - name;
return a - array;
}
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
/* else, must have another character in "name" ... */
if (*n == '\0')
break;
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
/* ... and it must match */
if (pg_ascii_tolower((unsigned char) *p) !=
pg_ascii_tolower((unsigned char) *n))
break;
}
}
return -1;
}
/*
* Sequentially search an array of possibly non-English words for
* a case-insensitive match to the initial character(s) of "name".
*
* This has the same API as seq_search_ascii(), but we use a more general
* case-folding transformation to achieve case-insensitivity. Case folding
* is done per the rules of the collation identified by "collid".
*
* The array is treated as const, but we don't declare it that way because
* the arrays exported by pg_locale.c aren't const.
*/
static int
seq_search_localized(const char *name, char **array, int *len, Oid collid)
{
char **a;
char *upper_name;
char *lower_name;
*len = 0;
/* empty string can't match anything */
if (!*name)
return -1;
/*
* The case-folding processing done below is fairly expensive, so before
* doing that, make a quick pass to see if there is an exact match.
*/
for (a = array; *a != NULL; a++)
{
int element_len = strlen(*a);
if (strncmp(name, *a, element_len) == 0)
{
*len = element_len;
return a - array;
}
}
/*
* Fold to upper case, then to lower case, so that we can match reliably
* even in languages in which case conversions are not injective.
*/
upper_name = str_toupper(unconstify(char *, name), strlen(name), collid);
lower_name = str_tolower(upper_name, strlen(upper_name), collid);
pfree(upper_name);
for (a = array; *a != NULL; a++)
{
char *upper_element;
char *lower_element;
int element_len;
/* Likewise upper/lower-case array element */
upper_element = str_toupper(*a, strlen(*a), collid);
lower_element = str_tolower(upper_element, strlen(upper_element),
collid);
pfree(upper_element);
element_len = strlen(lower_element);
/* Match? */
if (strncmp(lower_name, lower_element, element_len) == 0)
{
*len = element_len;
pfree(lower_element);
pfree(lower_name);
return a - array;
}
pfree(lower_element);
}
pfree(lower_name);
return -1;
}
/*
* Perform a sequential search in 'array' (or 'localized_array', if that's
* not NULL) for an entry matching the first character(s) of the 'src'
* string case-insensitively.
*
* The 'array' is presumed to be English words (all-ASCII), but
* if 'localized_array' is supplied, that might be non-English
* so we need a more expensive case-folding transformation
* (which will follow the rules of the collation 'collid').
*
* If a match is found, copy the array index of the match into the integer
* pointed to by 'dest', advance 'src' to the end of the part of the string
* which matched, and return the number of characters consumed.
*
* If the string doesn't match, throw an error if 'have_error' is NULL,
* otherwise set '*have_error' and return -1.
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
*
* 'node' is used only for error reports: node->key->name identifies the
* field type we were searching for.
*/
static int
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
from_char_seq_search(int *dest, const char **src, const char *const *array,
char **localized_array, Oid collid,
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
FormatNode *node, bool *have_error)
{
int len;
if (localized_array == NULL)
*dest = seq_search_ascii(*src, array, &len);
else
*dest = seq_search_localized(*src, localized_array, &len, collid);
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
if (len <= 0)
{
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
/*
* In the error report, truncate the string at the next whitespace (if
* any) to avoid including irrelevant data.
*/
char *copy = pstrdup(*src);
char *c;
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
for (c = copy; *c; c++)
{
if (scanner_isspace(*c))
{
*c = '\0';
break;
}
}
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("invalid value \"%s\" for \"%s\"",
copy, node->key->name),
errdetail("The given value did not match any of "
"the allowed values for this field."))));
}
*src += len;
return len;
on_error:
return -1;
}
/* ----------
* Process a TmToChar struct as denoted by a list of FormatNodes.
* The formatted data is written to the string pointed to by 'out'.
* ----------
*/
static void
DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid)
{
FormatNode *n;
char *s;
struct pg_tm *tm = &in->tm;
int i;
/* cache localized days and months */
cache_locale_time();
s = out;
for (n = node; n->type != NODE_TYPE_END; n++)
{
if (n->type != NODE_TYPE_ACTION)
{
strcpy(s, n->character);
s += strlen(s);
continue;
}
switch (n->key->id)
{
case DCH_A_M:
case DCH_P_M:
strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
? P_M_STR : A_M_STR);
s += strlen(s);
break;
case DCH_AM:
case DCH_PM:
strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
? PM_STR : AM_STR);
s += strlen(s);
break;
case DCH_a_m:
case DCH_p_m:
strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
? p_m_STR : a_m_STR);
s += strlen(s);
break;
case DCH_am:
case DCH_pm:
strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
? pm_STR : am_STR);
s += strlen(s);
break;
case DCH_HH:
case DCH_HH12:
2010-02-26 03:01:40 +01:00
/*
* display time as shown on a 12-hour clock, even for
* intervals
*/
sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ? HOURS_PER_DAY / 2 :
tm->tm_hour % (HOURS_PER_DAY / 2));
if (S_THth(n->suffix))
str_numth(s, s, S_TH_TYPE(n->suffix));
s += strlen(s);
break;
case DCH_HH24:
sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
tm->tm_hour);
if (S_THth(n->suffix))
str_numth(s, s, S_TH_TYPE(n->suffix));
s += strlen(s);
break;
case DCH_MI:
sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3,
tm->tm_min);
if (S_THth(n->suffix))
str_numth(s, s, S_TH_TYPE(n->suffix));
s += strlen(s);
break;
case DCH_SS:
sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3,
tm->tm_sec);
if (S_THth(n->suffix))
str_numth(s, s, S_TH_TYPE(n->suffix));
s += strlen(s);
break;
#define DCH_to_char_fsec(frac_fmt, frac_val) \
sprintf(s, frac_fmt, (int) (frac_val)); \
if (S_THth(n->suffix)) \
str_numth(s, s, S_TH_TYPE(n->suffix)); \
s += strlen(s)
case DCH_FF1: /* tenth of second */
DCH_to_char_fsec("%01d", in->fsec / 100000);
break;
case DCH_FF2: /* hundredth of second */
DCH_to_char_fsec("%02d", in->fsec / 10000);
break;
case DCH_FF3:
case DCH_MS: /* millisecond */
DCH_to_char_fsec("%03d", in->fsec / 1000);
break;
case DCH_FF4: /* tenth of a millisecond */
DCH_to_char_fsec("%04d", in->fsec / 100);
break;
case DCH_FF5: /* hundredth of a millisecond */
DCH_to_char_fsec("%05d", in->fsec / 10);
break;
case DCH_FF6:
case DCH_US: /* microsecond */
DCH_to_char_fsec("%06d", in->fsec);
break;
#undef DCH_to_char_fsec
case DCH_SSSS:
sprintf(s, "%d", tm->tm_hour * SECS_PER_HOUR +
tm->tm_min * SECS_PER_MINUTE +
tm->tm_sec);
if (S_THth(n->suffix))
str_numth(s, s, S_TH_TYPE(n->suffix));
s += strlen(s);
break;
case DCH_tz:
INVALID_FOR_INTERVAL;
if (tmtcTzn(in))
{
/* We assume here that timezone names aren't localized */
char *p = asc_tolower_z(tmtcTzn(in));
strcpy(s, p);
pfree(p);
s += strlen(s);
}
break;
case DCH_TZ:
INVALID_FOR_INTERVAL;
if (tmtcTzn(in))
{
strcpy(s, tmtcTzn(in));
s += strlen(s);
}
break;
case DCH_TZH:
INVALID_FOR_INTERVAL;
sprintf(s, "%c%02d",
(tm->tm_gmtoff >= 0) ? '+' : '-',
abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
s += strlen(s);
break;
case DCH_TZM:
INVALID_FOR_INTERVAL;
sprintf(s, "%02d",
(abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
s += strlen(s);
break;
case DCH_OF:
INVALID_FOR_INTERVAL;
sprintf(s, "%c%0*d",
(tm->tm_gmtoff >= 0) ? '+' : '-',
S_FM(n->suffix) ? 0 : 2,
abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
s += strlen(s);
if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0)
{
sprintf(s, ":%02d",
(abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
s += strlen(s);
}
break;
case DCH_A_D:
case DCH_B_C:
INVALID_FOR_INTERVAL;
strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR));
s += strlen(s);
break;
case DCH_AD:
case DCH_BC:
INVALID_FOR_INTERVAL;
strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR));
s += strlen(s);
break;
case DCH_a_d:
case DCH_b_c:
INVALID_FOR_INTERVAL;
strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR));
s += strlen(s);
break;
case DCH_ad:
case DCH_bc:
INVALID_FOR_INTERVAL;
strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR));
s += strlen(s);
break;
case DCH_MONTH:
INVALID_FOR_INTERVAL;
if (!tm->tm_mon)
break;
if (S_TM(n->suffix))
{
2015-05-24 03:35:49 +02:00
char *str = str_toupper_z(localized_full_months[tm->tm_mon - 1], collid);
if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
strcpy(s, str);
else
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("localized string format value too long")));
}
else
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
asc_toupper_z(months_full[tm->tm_mon - 1]));
s += strlen(s);
break;
case DCH_Month:
INVALID_FOR_INTERVAL;
if (!tm->tm_mon)
break;
if (S_TM(n->suffix))
{
2015-05-24 03:35:49 +02:00
char *str = str_initcap_z(localized_full_months[tm->tm_mon - 1], collid);
if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
strcpy(s, str);
else
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("localized string format value too long")));
}
else
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
months_full[tm->tm_mon - 1]);
s += strlen(s);
break;
case DCH_month:
INVALID_FOR_INTERVAL;
if (!tm->tm_mon)
break;
if (S_TM(n->suffix))
{
2015-05-24 03:35:49 +02:00
char *str = str_tolower_z(localized_full_months[tm->tm_mon - 1], collid);
if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
strcpy(s, str);
else
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("localized string format value too long")));
}
else
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
asc_tolower_z(months_full[tm->tm_mon - 1]));
s += strlen(s);
break;
case DCH_MON:
INVALID_FOR_INTERVAL;
if (!tm->tm_mon)
break;
if (S_TM(n->suffix))
{
2015-05-24 03:35:49 +02:00
char *str = str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid);
if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
strcpy(s, str);
else
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("localized string format value too long")));
}
else
strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
s += strlen(s);
break;
case DCH_Mon:
INVALID_FOR_INTERVAL;
if (!tm->tm_mon)
break;
if (S_TM(n->suffix))
{
2015-05-24 03:35:49 +02:00
char *str = str_initcap_z(localized_abbrev_months[tm->tm_mon - 1], collid);
if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
strcpy(s, str);
else
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("localized string format value too long")));
}
else
strcpy(s, months[tm->tm_mon - 1]);
s += strlen(s);
break;
case DCH_mon:
INVALID_FOR_INTERVAL;
if (!tm->tm_mon)
break;
if (S_TM(n->suffix))
{
2015-05-24 03:35:49 +02:00
char *str = str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid);
if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
strcpy(s, str);
else
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("localized string format value too long")));
}
else
strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
s += strlen(s);
break;
case DCH_MM:
sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3,
tm->tm_mon);
if (S_THth(n->suffix))
str_numth(s, s, S_TH_TYPE(n->suffix));
s += strlen(s);
break;
case DCH_DAY:
INVALID_FOR_INTERVAL;
if (S_TM(n->suffix))
{
2015-05-24 03:35:49 +02:00
char *str = str_toupper_z(localized_full_days[tm->tm_wday], collid);
if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
strcpy(s, str);
else
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("localized string format value too long")));
}
else
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
asc_toupper_z(days[tm->tm_wday]));
s += strlen(s);
break;
case DCH_Day:
INVALID_FOR_INTERVAL;
if (S_TM(n->suffix))
{
2015-05-24 03:35:49 +02:00
char *str = str_initcap_z(localized_full_days[tm->tm_wday], collid);
if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
strcpy(s, str);
else
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("localized string format value too long")));
}
else
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
days[tm->tm_wday]);
s += strlen(s);
break;
case DCH_day:
INVALID_FOR_INTERVAL;
if (S_TM(n->suffix))
{
2015-05-24 03:35:49 +02:00
char *str = str_tolower_z(localized_full_days[tm->tm_wday], collid);
if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
strcpy(s, str);
else
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("localized string format value too long")));
}
else
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
asc_tolower_z(days[tm->tm_wday]));
s += strlen(s);
break;
case DCH_DY:
INVALID_FOR_INTERVAL;
if (S_TM(n->suffix))
{
2015-05-24 03:35:49 +02:00
char *str = str_toupper_z(localized_abbrev_days[tm->tm_wday], collid);
if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
strcpy(s, str);
else
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("localized string format value too long")));
}
else
strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
s += strlen(s);
break;
case DCH_Dy:
INVALID_FOR_INTERVAL;
if (S_TM(n->suffix))
{
2015-05-24 03:35:49 +02:00
char *str = str_initcap_z(localized_abbrev_days[tm->tm_wday], collid);
if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
strcpy(s, str);
else
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("localized string format value too long")));
}
2001-03-22 05:01:46 +01:00
else
strcpy(s, days_short[tm->tm_wday]);
s += strlen(s);
break;
case DCH_dy:
INVALID_FOR_INTERVAL;
if (S_TM(n->suffix))
{
2015-05-24 03:35:49 +02:00
char *str = str_tolower_z(localized_abbrev_days[tm->tm_wday], collid);
if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
strcpy(s, str);
else
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("localized string format value too long")));
}
else
strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
s += strlen(s);
break;
case DCH_DDD:
case DCH_IDDD:
sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3,
(n->key->id == DCH_DDD) ?
tm->tm_yday :
date2isoyearday(tm->tm_year, tm->tm_mon, tm->tm_mday));
if (S_THth(n->suffix))
str_numth(s, s, S_TH_TYPE(n->suffix));
s += strlen(s);
break;
case DCH_DD:
sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday);
if (S_THth(n->suffix))
str_numth(s, s, S_TH_TYPE(n->suffix));
s += strlen(s);
break;
case DCH_D:
INVALID_FOR_INTERVAL;
sprintf(s, "%d", tm->tm_wday + 1);
if (S_THth(n->suffix))
str_numth(s, s, S_TH_TYPE(n->suffix));
s += strlen(s);
break;
case DCH_ID:
INVALID_FOR_INTERVAL;
sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday);
if (S_THth(n->suffix))
str_numth(s, s, S_TH_TYPE(n->suffix));
s += strlen(s);
break;
case DCH_WW:
sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
(tm->tm_yday - 1) / 7 + 1);
if (S_THth(n->suffix))
str_numth(s, s, S_TH_TYPE(n->suffix));
s += strlen(s);
break;
case DCH_IW:
sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday));
if (S_THth(n->suffix))
str_numth(s, s, S_TH_TYPE(n->suffix));
s += strlen(s);
break;
case DCH_Q:
if (!tm->tm_mon)
break;
sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1);
if (S_THth(n->suffix))
str_numth(s, s, S_TH_TYPE(n->suffix));
s += strlen(s);
break;
case DCH_CC:
if (is_interval) /* straight calculation */
i = tm->tm_year / 100;
else
{
if (tm->tm_year > 0)
/* Century 20 == 1901 - 2000 */
i = (tm->tm_year - 1) / 100 + 1;
else
/* Century 6BC == 600BC - 501BC */
i = tm->tm_year / 100 - 1;
}
if (i <= 99 && i >= -99)
sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i);
else
sprintf(s, "%d", i);
if (S_THth(n->suffix))
str_numth(s, s, S_TH_TYPE(n->suffix));
s += strlen(s);
break;
case DCH_Y_YYY:
i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000;
sprintf(s, "%d,%03d", i,
ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000));
if (S_THth(n->suffix))
str_numth(s, s, S_TH_TYPE(n->suffix));
s += strlen(s);
break;
case DCH_YYYY:
case DCH_IYYY:
sprintf(s, "%0*d",
S_FM(n->suffix) ? 0 :
(ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5,
(n->key->id == DCH_YYYY ?
ADJUST_YEAR(tm->tm_year, is_interval) :
ADJUST_YEAR(date2isoyear(tm->tm_year,
tm->tm_mon,
tm->tm_mday),
is_interval)));
if (S_THth(n->suffix))
str_numth(s, s, S_TH_TYPE(n->suffix));
s += strlen(s);
break;
case DCH_YYY:
case DCH_IYY:
sprintf(s, "%0*d",
S_FM(n->suffix) ? 0 :
(ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4,
(n->key->id == DCH_YYY ?
ADJUST_YEAR(tm->tm_year, is_interval) :
ADJUST_YEAR(date2isoyear(tm->tm_year,
tm->tm_mon,
tm->tm_mday),
is_interval)) % 1000);
if (S_THth(n->suffix))
str_numth(s, s, S_TH_TYPE(n->suffix));
s += strlen(s);
break;
case DCH_YY:
case DCH_IY:
sprintf(s, "%0*d",
S_FM(n->suffix) ? 0 :
(ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3,
(n->key->id == DCH_YY ?
ADJUST_YEAR(tm->tm_year, is_interval) :
ADJUST_YEAR(date2isoyear(tm->tm_year,
tm->tm_mon,
tm->tm_mday),
is_interval)) % 100);
if (S_THth(n->suffix))
str_numth(s, s, S_TH_TYPE(n->suffix));
s += strlen(s);
break;
case DCH_Y:
case DCH_I:
sprintf(s, "%1d",
(n->key->id == DCH_Y ?
ADJUST_YEAR(tm->tm_year, is_interval) :
ADJUST_YEAR(date2isoyear(tm->tm_year,
tm->tm_mon,
tm->tm_mday),
is_interval)) % 10);
if (S_THth(n->suffix))
str_numth(s, s, S_TH_TYPE(n->suffix));
s += strlen(s);
break;
case DCH_RM:
if (!tm->tm_mon)
break;
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
rm_months_upper[MONTHS_PER_YEAR - tm->tm_mon]);
s += strlen(s);
break;
case DCH_rm:
if (!tm->tm_mon)
break;
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
rm_months_lower[MONTHS_PER_YEAR - tm->tm_mon]);
s += strlen(s);
break;
case DCH_W:
sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1);
if (S_THth(n->suffix))
str_numth(s, s, S_TH_TYPE(n->suffix));
s += strlen(s);
break;
case DCH_J:
sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday));
if (S_THth(n->suffix))
str_numth(s, s, S_TH_TYPE(n->suffix));
s += strlen(s);
break;
}
}
*s = '\0';
}
/*
* Process the string 'in' as denoted by the array of FormatNodes 'node[]'.
* The TmFromChar struct pointed to by 'out' is populated with the results.
*
* 'collid' identifies the collation to use, if needed.
* 'std' specifies standard parsing mode.
* If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
*
* Note: we currently don't have any to_interval() function, so there
* is no need here for INVALID_FOR_INTERVAL checks.
*/
static void
DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
Oid collid, bool std, bool *have_error)
{
FormatNode *n;
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
const char *s;
int len,
value;
bool fx_mode = std;
/* number of extra skipped characters (more than given in format string) */
int extra_skip = 0;
/* cache localized days and months */
cache_locale_time();
for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++)
{
/*
* Ignore spaces at the beginning of the string and before fields when
* not in FX (fixed width) mode.
*/
if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) &&
(n->type == NODE_TYPE_ACTION || n == node))
{
while (*s != '\0' && isspace((unsigned char) *s))
{
s++;
extra_skip++;
}
}
if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR)
{
if (std)
{
/*
* Standard mode requires strict matching between format
* string separators/spaces and input string.
*/
Assert(n->character[0] && !n->character[1]);
if (*s == n->character[0])
s++;
else
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("unmatched format separator \"%c\"",
n->character[0]))));
}
else if (!fx_mode)
{
/*
* In non FX (fixed format) mode one format string space or
* separator match to one space or separator in input string.
* Or match nothing if there is no space or separator in the
* current position of input string.
*/
extra_skip--;
if (isspace((unsigned char) *s) || is_separator_char(s))
{
s++;
extra_skip++;
}
}
else
{
/*
* In FX mode, on format string space or separator we consume
* exactly one character from input string. Notice we don't
* insist that the consumed character match the format's
* character.
*/
s += pg_mblen(s);
}
continue;
}
else if (n->type != NODE_TYPE_ACTION)
{
/*
* Text character, so consume one character from input string.
* Notice we don't insist that the consumed character match the
* format's character.
*/
if (!fx_mode)
{
/*
* In non FX mode we might have skipped some extra characters
* (more than specified in format string) before. In this
* case we don't skip input string character, because it might
* be part of field.
*/
if (extra_skip > 0)
extra_skip--;
else
s += pg_mblen(s);
}
else
{
s += pg_mblen(s);
}
continue;
}
from_char_set_mode(out, n->key->date_mode, have_error);
CHECK_ERROR;
switch (n->key->id)
{
case DCH_FX:
fx_mode = true;
break;
case DCH_A_M:
case DCH_P_M:
case DCH_a_m:
case DCH_p_m:
from_char_seq_search(&value, &s, ampm_strings_long,
NULL, InvalidOid,
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
n, have_error);
CHECK_ERROR;
from_char_set_int(&out->pm, value % 2, n, have_error);
CHECK_ERROR;
out->clock = CLOCK_12_HOUR;
break;
case DCH_AM:
case DCH_PM:
case DCH_am:
case DCH_pm:
from_char_seq_search(&value, &s, ampm_strings,
NULL, InvalidOid,
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
n, have_error);
CHECK_ERROR;
from_char_set_int(&out->pm, value % 2, n, have_error);
CHECK_ERROR;
out->clock = CLOCK_12_HOUR;
break;
case DCH_HH:
case DCH_HH12:
from_char_parse_int_len(&out->hh, &s, 2, n, have_error);
CHECK_ERROR;
out->clock = CLOCK_12_HOUR;
SKIP_THth(s, n->suffix);
break;
case DCH_HH24:
from_char_parse_int_len(&out->hh, &s, 2, n, have_error);
CHECK_ERROR;
SKIP_THth(s, n->suffix);
break;
case DCH_MI:
from_char_parse_int(&out->mi, &s, n, have_error);
CHECK_ERROR;
SKIP_THth(s, n->suffix);
break;
case DCH_SS:
from_char_parse_int(&out->ss, &s, n, have_error);
CHECK_ERROR;
SKIP_THth(s, n->suffix);
break;
case DCH_MS: /* millisecond */
len = from_char_parse_int_len(&out->ms, &s, 3, n, have_error);
CHECK_ERROR;
2007-11-15 22:14:46 +01:00
/*
* 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25
*/
out->ms *= len == 1 ? 100 :
len == 2 ? 10 : 1;
SKIP_THth(s, n->suffix);
break;
case DCH_FF1:
case DCH_FF2:
case DCH_FF3:
case DCH_FF4:
case DCH_FF5:
case DCH_FF6:
out->ff = n->key->id - DCH_FF1 + 1;
/* FALLTHROUGH */
case DCH_US: /* microsecond */
len = from_char_parse_int_len(&out->us, &s,
n->key->id == DCH_US ? 6 :
out->ff, n, have_error);
CHECK_ERROR;
out->us *= len == 1 ? 100000 :
len == 2 ? 10000 :
len == 3 ? 1000 :
len == 4 ? 100 :
len == 5 ? 10 : 1;
SKIP_THth(s, n->suffix);
break;
case DCH_SSSS:
from_char_parse_int(&out->ssss, &s, n, have_error);
CHECK_ERROR;
SKIP_THth(s, n->suffix);
break;
case DCH_tz:
case DCH_TZ:
case DCH_OF:
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("formatting field \"%s\" is only supported in to_char",
n->key->name))));
CHECK_ERROR;
break;
case DCH_TZH:
/*
* Value of TZH might be negative. And the issue is that we
* might swallow minus sign as the separator. So, if we have
* skipped more characters than specified in the format
* string, then we consider prepending last skipped minus to
* TZH.
*/
if (*s == '+' || *s == '-' || *s == ' ')
{
out->tzsign = *s == '-' ? -1 : +1;
s++;
}
else
{
if (extra_skip > 0 && *(s - 1) == '-')
out->tzsign = -1;
else
out->tzsign = +1;
}
from_char_parse_int_len(&out->tzh, &s, 2, n, have_error);
CHECK_ERROR;
break;
case DCH_TZM:
/* assign positive timezone sign if TZH was not seen before */
if (!out->tzsign)
out->tzsign = +1;
from_char_parse_int_len(&out->tzm, &s, 2, n, have_error);
CHECK_ERROR;
break;
case DCH_A_D:
case DCH_B_C:
case DCH_a_d:
case DCH_b_c:
from_char_seq_search(&value, &s, adbc_strings_long,
NULL, InvalidOid,
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
n, have_error);
CHECK_ERROR;
from_char_set_int(&out->bc, value % 2, n, have_error);
CHECK_ERROR;
break;
case DCH_AD:
case DCH_BC:
case DCH_ad:
case DCH_bc:
from_char_seq_search(&value, &s, adbc_strings,
NULL, InvalidOid,
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
n, have_error);
CHECK_ERROR;
from_char_set_int(&out->bc, value % 2, n, have_error);
CHECK_ERROR;
break;
case DCH_MONTH:
case DCH_Month:
case DCH_month:
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
from_char_seq_search(&value, &s, months_full,
S_TM(n->suffix) ? localized_full_months : NULL,
collid,
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
n, have_error);
CHECK_ERROR;
from_char_set_int(&out->mm, value + 1, n, have_error);
CHECK_ERROR;
break;
case DCH_MON:
case DCH_Mon:
case DCH_mon:
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
from_char_seq_search(&value, &s, months,
S_TM(n->suffix) ? localized_abbrev_months : NULL,
collid,
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
n, have_error);
CHECK_ERROR;
from_char_set_int(&out->mm, value + 1, n, have_error);
CHECK_ERROR;
break;
case DCH_MM:
from_char_parse_int(&out->mm, &s, n, have_error);
CHECK_ERROR;
SKIP_THth(s, n->suffix);
break;
case DCH_DAY:
case DCH_Day:
case DCH_day:
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
from_char_seq_search(&value, &s, days,
S_TM(n->suffix) ? localized_full_days : NULL,
collid,
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
n, have_error);
CHECK_ERROR;
from_char_set_int(&out->d, value, n, have_error);
CHECK_ERROR;
out->d++;
break;
case DCH_DY:
case DCH_Dy:
case DCH_dy:
from_char_seq_search(&value, &s, days_short,
S_TM(n->suffix) ? localized_abbrev_days : NULL,
collid,
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
n, have_error);
CHECK_ERROR;
from_char_set_int(&out->d, value, n, have_error);
CHECK_ERROR;
out->d++;
break;
case DCH_DDD:
from_char_parse_int(&out->ddd, &s, n, have_error);
CHECK_ERROR;
SKIP_THth(s, n->suffix);
break;
case DCH_IDDD:
from_char_parse_int_len(&out->ddd, &s, 3, n, have_error);
CHECK_ERROR;
SKIP_THth(s, n->suffix);
break;
case DCH_DD:
from_char_parse_int(&out->dd, &s, n, have_error);
CHECK_ERROR;
SKIP_THth(s, n->suffix);
break;
case DCH_D:
from_char_parse_int(&out->d, &s, n, have_error);
CHECK_ERROR;
SKIP_THth(s, n->suffix);
break;
case DCH_ID:
from_char_parse_int_len(&out->d, &s, 1, n, have_error);
CHECK_ERROR;
/* Shift numbering to match Gregorian where Sunday = 1 */
if (++out->d > 7)
out->d = 1;
SKIP_THth(s, n->suffix);
break;
case DCH_WW:
case DCH_IW:
from_char_parse_int(&out->ww, &s, n, have_error);
CHECK_ERROR;
SKIP_THth(s, n->suffix);
break;
case DCH_Q:
2010-07-06 21:19:02 +02:00
/*
2010-07-06 21:19:02 +02:00
* We ignore 'Q' when converting to date because it is unclear
* which date in the quarter to use, and some people specify
* both quarter and month, so if it was honored it might
* conflict with the supplied month. That is also why we don't
* throw an error.
*
* We still parse the source string for an integer, but it
* isn't stored anywhere in 'out'.
*/
from_char_parse_int((int *) NULL, &s, n, have_error);
CHECK_ERROR;
SKIP_THth(s, n->suffix);
break;
case DCH_CC:
from_char_parse_int(&out->cc, &s, n, have_error);
CHECK_ERROR;
SKIP_THth(s, n->suffix);
break;
case DCH_Y_YYY:
{
int matched,
years,
millennia,
nch;
matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch);
if (matched < 2)
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("invalid input string for \"Y,YYY\""))));
years += (millennia * 1000);
from_char_set_int(&out->year, years, n, have_error);
CHECK_ERROR;
out->yysz = 4;
s += nch;
SKIP_THth(s, n->suffix);
}
break;
case DCH_YYYY:
case DCH_IYYY:
from_char_parse_int(&out->year, &s, n, have_error);
CHECK_ERROR;
out->yysz = 4;
SKIP_THth(s, n->suffix);
break;
case DCH_YYY:
case DCH_IYY:
len = from_char_parse_int(&out->year, &s, n, have_error);
CHECK_ERROR;
if (len < 4)
out->year = adjust_partial_year_to_2020(out->year);
out->yysz = 3;
SKIP_THth(s, n->suffix);
break;
case DCH_YY:
case DCH_IY:
len = from_char_parse_int(&out->year, &s, n, have_error);
CHECK_ERROR;
if (len < 4)
out->year = adjust_partial_year_to_2020(out->year);
out->yysz = 2;
SKIP_THth(s, n->suffix);
break;
case DCH_Y:
case DCH_I:
len = from_char_parse_int(&out->year, &s, n, have_error);
CHECK_ERROR;
if (len < 4)
out->year = adjust_partial_year_to_2020(out->year);
out->yysz = 1;
SKIP_THth(s, n->suffix);
break;
case DCH_RM:
case DCH_rm:
from_char_seq_search(&value, &s, rm_months_lower,
NULL, InvalidOid,
Clean up formatting.c's logic for matching constant strings. seq_search(), which is used to match input substrings to constants such as month and day names, had a lot of bizarre and unnecessary behaviors. It was mostly possible to avert our eyes from that before, but we don't want to duplicate those behaviors in the upcoming patch to allow recognition of non-English month and day names. So it's time to clean this up. In particular: * seq_search scribbled on the input string, which is a pretty dangerous thing to do, especially in the badly underdocumented way it was done here. Fortunately the input string is a temporary copy, but that was being made three subroutine levels away, making it something easy to break accidentally. The behavior is externally visible nonetheless, in the form of odd case-folding in error reports about unrecognized month/day names. The scribbling is evidently being done to save a few calls to pg_tolower, but that's such a cheap function (at least for ASCII data) that it's pretty pointless to worry about. In HEAD I switched it to be pg_ascii_tolower to ensure it is cheap in all cases; but there are corner cases in Turkish where this'd change behavior, so leave it as pg_tolower in the back branches. * seq_search insisted on knowing the case form (all-upper, all-lower, or initcap) of the constant strings, so that it didn't have to case-fold them to perform case-insensitive comparisons. This likewise seems like excessive micro-optimization, given that pg_tolower is certainly very cheap for ASCII data. It seems unsafe to assume that we know the case form that will come out of pg_locale.c for localized month/day names, so it's better just to define the comparison rule as "downcase all strings before comparing". (The choice between downcasing and upcasing is arbitrary so far as English is concerned, but it might not be in other locales, so follow citext's lead here.) * seq_search also had a parameter that'd cause it to report a match after a maximum number of characters, even if the constant string were longer than that. This was not actually used because no caller passed a value small enough to cut off a comparison. Replicating that behavior for localized month/day names seems expensive as well as useless, so let's get rid of that too. * from_char_seq_search used the maximum-length parameter to truncate the input string in error reports about not finding a matching name. This leads to rather confusing reports in many cases. Worse, it is outright dangerous if the input string isn't all-ASCII, because we risk truncating the string in the middle of a multibyte character. That'd lead either to delivering an illegible error message to the client, or to encoding-conversion failures that obscure the actual data problem. Get rid of that in favor of truncating at whitespace if any (a suggestion due to Alvaro Herrera). In addition to fixing these things, I const-ified the input string pointers of DCH_from_char and its subroutines, to make sure there aren't any other scribbling-on-input problems. The risk of generating a badly-encoded error message seems like enough of a bug to justify back-patching, so patch all supported branches. Discussion: https://postgr.es/m/29432.1579731087@sss.pgh.pa.us
2020-01-23 19:42:09 +01:00
n, have_error);
CHECK_ERROR;
from_char_set_int(&out->mm, MONTHS_PER_YEAR - value,
n, have_error);
CHECK_ERROR;
break;
case DCH_W:
from_char_parse_int(&out->w, &s, n, have_error);
CHECK_ERROR;
SKIP_THth(s, n->suffix);
break;
case DCH_J:
from_char_parse_int(&out->j, &s, n, have_error);
CHECK_ERROR;
SKIP_THth(s, n->suffix);
break;
}
/* Ignore all spaces after fields */
if (!fx_mode)
{
extra_skip = 0;
while (*s != '\0' && isspace((unsigned char) *s))
{
s++;
extra_skip++;
}
}
}
/*
* Standard parsing mode doesn't allow unmatched format patterns or
* trailing characters in the input string.
*/
if (std)
{
if (n->type != NODE_TYPE_END)
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("input string is too short for datetime format"))));
while (*s != '\0' && isspace((unsigned char) *s))
s++;
if (*s != '\0')
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("trailing characters remain in input string "
"after datetime format"))));
}
on_error:
return;
}
/*
* The invariant for DCH cache entry management is that DCHCounter is equal
* to the maximum age value among the existing entries, and we increment it
* whenever an access occurs. If we approach overflow, deal with that by
* halving all the age values, so that we retain a fairly accurate idea of
* which entries are oldest.
*/
static inline void
DCH_prevent_counter_overflow(void)
{
if (DCHCounter >= (INT_MAX - 1))
{
for (int i = 0; i < n_DCHCache; i++)
DCHCache[i]->age >>= 1;
DCHCounter >>= 1;
}
}
/*
* Get mask of date/time/zone components present in format nodes.
*
* If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
*/
static int
DCH_datetime_type(FormatNode *node, bool *have_error)
{
FormatNode *n;
int flags = 0;
for (n = node; n->type != NODE_TYPE_END; n++)
{
if (n->type != NODE_TYPE_ACTION)
continue;
switch (n->key->id)
{
case DCH_FX:
break;
case DCH_A_M:
case DCH_P_M:
case DCH_a_m:
case DCH_p_m:
case DCH_AM:
case DCH_PM:
case DCH_am:
case DCH_pm:
case DCH_HH:
case DCH_HH12:
case DCH_HH24:
case DCH_MI:
case DCH_SS:
case DCH_MS: /* millisecond */
case DCH_US: /* microsecond */
case DCH_FF1:
case DCH_FF2:
case DCH_FF3:
case DCH_FF4:
case DCH_FF5:
case DCH_FF6:
case DCH_SSSS:
flags |= DCH_TIMED;
break;
case DCH_tz:
case DCH_TZ:
case DCH_OF:
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("formatting field \"%s\" is only supported in to_char",
n->key->name))));
flags |= DCH_ZONED;
break;
case DCH_TZH:
case DCH_TZM:
flags |= DCH_ZONED;
break;
case DCH_A_D:
case DCH_B_C:
case DCH_a_d:
case DCH_b_c:
case DCH_AD:
case DCH_BC:
case DCH_ad:
case DCH_bc:
case DCH_MONTH:
case DCH_Month:
case DCH_month:
case DCH_MON:
case DCH_Mon:
case DCH_mon:
case DCH_MM:
case DCH_DAY:
case DCH_Day:
case DCH_day:
case DCH_DY:
case DCH_Dy:
case DCH_dy:
case DCH_DDD:
case DCH_IDDD:
case DCH_DD:
case DCH_D:
case DCH_ID:
case DCH_WW:
case DCH_Q:
case DCH_CC:
case DCH_Y_YYY:
case DCH_YYYY:
case DCH_IYYY:
case DCH_YYY:
case DCH_IYY:
case DCH_YY:
case DCH_IY:
case DCH_Y:
case DCH_I:
case DCH_RM:
case DCH_rm:
case DCH_W:
case DCH_J:
flags |= DCH_DATED;
break;
}
}
on_error:
return flags;
}
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
/* select a DCHCacheEntry to hold the given format picture */
static DCHCacheEntry *
DCH_cache_getnew(const char *str, bool std)
{
DCHCacheEntry *ent;
/* Ensure we can advance DCHCounter below */
DCH_prevent_counter_overflow();
/*
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
* If cache is full, remove oldest entry (or recycle first not-valid one)
*/
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
if (n_DCHCache >= DCH_CACHE_ENTRIES)
{
DCHCacheEntry *old = DCHCache[0];
#ifdef DEBUG_TO_FROM_CHAR
elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache);
#endif
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
if (old->valid)
{
for (int i = 1; i < DCH_CACHE_ENTRIES; i++)
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
{
ent = DCHCache[i];
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
if (!ent->valid)
{
old = ent;
break;
}
if (ent->age < old->age)
old = ent;
}
}
#ifdef DEBUG_TO_FROM_CHAR
elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age);
#endif
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
old->valid = false;
StrNCpy(old->str, str, DCH_CACHE_SIZE + 1);
old->age = (++DCHCounter);
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
/* caller is expected to fill format, then set valid */
return old;
}
else
{
#ifdef DEBUG_TO_FROM_CHAR
elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache);
#endif
Assert(DCHCache[n_DCHCache] == NULL);
DCHCache[n_DCHCache] = ent = (DCHCacheEntry *)
MemoryContextAllocZero(TopMemoryContext, sizeof(DCHCacheEntry));
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
ent->valid = false;
StrNCpy(ent->str, str, DCH_CACHE_SIZE + 1);
ent->std = std;
ent->age = (++DCHCounter);
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
/* caller is expected to fill format, then set valid */
++n_DCHCache;
return ent;
}
}
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
/* look for an existing DCHCacheEntry matching the given format picture */
static DCHCacheEntry *
DCH_cache_search(const char *str, bool std)
{
/* Ensure we can advance DCHCounter below */
DCH_prevent_counter_overflow();
for (int i = 0; i < n_DCHCache; i++)
{
DCHCacheEntry *ent = DCHCache[i];
if (ent->valid && strcmp(ent->str, str) == 0 && ent->std == std)
{
ent->age = (++DCHCounter);
return ent;
}
}
return NULL;
}
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
/* Find or create a DCHCacheEntry for the given format picture */
static DCHCacheEntry *
DCH_cache_fetch(const char *str, bool std)
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
{
DCHCacheEntry *ent;
if ((ent = DCH_cache_search(str, std)) == NULL)
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
{
/*
* Not in the cache, must run parser and save a new format-picture to
* the cache. Do not mark the cache entry valid until parsing
* succeeds.
*/
ent = DCH_cache_getnew(str, std);
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
parse_format(ent->format, str, DCH_keywords, DCH_suff, DCH_index,
DCH_FLAG | (std ? STD_FLAG : 0), NULL);
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
ent->valid = true;
}
return ent;
}
/*
* Format a date/time or interval into a string according to fmt.
* We parse fmt into a list of FormatNodes. This is then passed to DCH_to_char
* for formatting.
*/
static text *
datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid)
{
2001-03-22 05:01:46 +01:00
FormatNode *format;
char *fmt_str,
2004-08-29 07:07:03 +02:00
*result;
bool incache;
int fmt_len;
text *res;
/*
* Convert fmt to C string
*/
fmt_str = text_to_cstring(fmt);
fmt_len = strlen(fmt_str);
/*
* Allocate workspace for result as C string
*/
result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1);
*result = '\0';
if (fmt_len > DCH_CACHE_SIZE)
{
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
/*
* Allocate new memory if format picture is bigger than static cache
* and do not use cache (call parser always)
*/
incache = false;
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
parse_format(format, fmt_str, DCH_keywords,
DCH_suff, DCH_index, DCH_FLAG, NULL);
}
else
{
/*
* Use cache buffers
*/
DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false);
2004-08-29 07:07:03 +02:00
incache = true;
format = ent->format;
}
/* The real work is here */
DCH_to_char(format, is_interval, tmtc, result, collid);
if (!incache)
pfree(format);
pfree(fmt_str);
/* convert C-string result to TEXT format */
res = cstring_to_text(result);
pfree(result);
return res;
}
2001-03-22 05:01:46 +01:00
/****************************************************************************
* Public routines
***************************************************************************/
/* -------------------
* TIMESTAMP to_char()
* -------------------
*/
Datum
timestamp_to_char(PG_FUNCTION_ARGS)
{
Timestamp dt = PG_GETARG_TIMESTAMP(0);
text *fmt = PG_GETARG_TEXT_PP(1),
*res;
TmToChar tmtc;
struct pg_tm *tm;
int thisdate;
if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
PG_RETURN_NULL();
ZERO_tmtc(&tmtc);
tm = tmtcTm(&tmtc);
if (timestamp2tm(dt, NULL, tm, &tmtcFsec(&tmtc), NULL, NULL) != 0)
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range")));
thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
tm->tm_wday = (thisdate + 1) % 7;
tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
PG_RETURN_NULL();
PG_RETURN_TEXT_P(res);
}
Datum
timestamptz_to_char(PG_FUNCTION_ARGS)
{
TimestampTz dt = PG_GETARG_TIMESTAMP(0);
text *fmt = PG_GETARG_TEXT_PP(1),
*res;
TmToChar tmtc;
int tz;
struct pg_tm *tm;
int thisdate;
if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
PG_RETURN_NULL();
ZERO_tmtc(&tmtc);
tm = tmtcTm(&tmtc);
if (timestamp2tm(dt, &tz, tm, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0)
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range")));
thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
tm->tm_wday = (thisdate + 1) % 7;
tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
PG_RETURN_NULL();
PG_RETURN_TEXT_P(res);
}
/* -------------------
* INTERVAL to_char()
* -------------------
*/
Datum
interval_to_char(PG_FUNCTION_ARGS)
{
Interval *it = PG_GETARG_INTERVAL_P(0);
text *fmt = PG_GETARG_TEXT_PP(1),
*res;
TmToChar tmtc;
struct pg_tm *tm;
if (VARSIZE_ANY_EXHDR(fmt) <= 0)
PG_RETURN_NULL();
ZERO_tmtc(&tmtc);
tm = tmtcTm(&tmtc);
if (interval2tm(*it, tm, &tmtcFsec(&tmtc)) != 0)
PG_RETURN_NULL();
/* wday is meaningless, yday approximates the total span in days */
tm->tm_yday = (tm->tm_year * MONTHS_PER_YEAR + tm->tm_mon) * DAYS_PER_MONTH + tm->tm_mday;
if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION())))
PG_RETURN_NULL();
PG_RETURN_TEXT_P(res);
}
/* ---------------------
* TO_TIMESTAMP()
*
* Make Timestamp from date_str which is formatted at argument 'fmt'
* ( to_timestamp is reverse to_char() )
* ---------------------
*/
2000-07-01 23:27:14 +02:00
Datum
to_timestamp(PG_FUNCTION_ARGS)
{
text *date_txt = PG_GETARG_TEXT_PP(0);
text *fmt = PG_GETARG_TEXT_PP(1);
Oid collid = PG_GET_COLLATION();
Timestamp result;
int tz;
2004-08-29 07:07:03 +02:00
struct pg_tm tm;
fsec_t fsec;
int fprec;
do_to_timestamp(date_txt, fmt, collid, false,
&tm, &fsec, &fprec, NULL, NULL);
/* Use the specified time zone, if any. */
if (tm.tm_zone)
{
int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), &tz);
if (dterr)
DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
}
else
tz = DetermineTimeZoneOffset(&tm, session_timezone);
if (tm2timestamp(&tm, fsec, &tz, &result) != 0)
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range")));
/* Use the specified fractional precision, if any. */
if (fprec)
AdjustTimestampForTypmod(&result, fprec);
PG_RETURN_TIMESTAMP(result);
}
/* ----------
* TO_DATE
* Make Date from date_str which is formatted at argument 'fmt'
* ----------
*/
Datum
to_date(PG_FUNCTION_ARGS)
{
text *date_txt = PG_GETARG_TEXT_PP(0);
text *fmt = PG_GETARG_TEXT_PP(1);
Oid collid = PG_GET_COLLATION();
DateADT result;
2004-08-29 07:07:03 +02:00
struct pg_tm tm;
fsec_t fsec;
do_to_timestamp(date_txt, fmt, collid, false,
&tm, &fsec, NULL, NULL, NULL);
/* Prevent overflow in Julian-day routines */
if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("date out of range: \"%s\"",
text_to_cstring(date_txt))));
result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE;
/* Now check for just-out-of-range dates */
if (!IS_VALID_DATE(result))
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("date out of range: \"%s\"",
text_to_cstring(date_txt))));
PG_RETURN_DATEADT(result);
}
/*
* Convert the 'date_txt' input to a datetime type using argument 'fmt'
* as a format string. The collation 'collid' may be used for case-folding
* rules in some cases. 'strict' specifies standard parsing mode.
*
* The actual data type (returned in 'typid', 'typmod') is determined by
* the presence of date/time/zone components in the format string.
*
* When timezone component is present, the corresponding offset is
* returned in '*tz'.
*
* If 'have_error' is NULL, then errors are thrown, else '*have_error' is set
* and zero value is returned.
*/
Datum
parse_datetime(text *date_txt, text *fmt, Oid collid, bool strict,
Oid *typid, int32 *typmod, int *tz,
bool *have_error)
{
struct pg_tm tm;
fsec_t fsec;
int fprec;
uint32 flags;
do_to_timestamp(date_txt, fmt, collid, strict,
&tm, &fsec, &fprec, &flags, have_error);
CHECK_ERROR;
*typmod = fprec ? fprec : -1; /* fractional part precision */
if (flags & DCH_DATED)
{
if (flags & DCH_TIMED)
{
if (flags & DCH_ZONED)
{
TimestampTz result;
if (tm.tm_zone)
{
int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
if (dterr)
DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
}
else
{
/*
* Time zone is present in format string, but not in input
* string. Assuming do_to_timestamp() triggers no error
* this should be possible only in non-strict case.
*/
Assert(!strict);
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("missing time zone in input string for type timestamptz"))));
}
if (tm2timestamp(&tm, fsec, tz, &result) != 0)
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timestamptz out of range"))));
AdjustTimestampForTypmod(&result, *typmod);
*typid = TIMESTAMPTZOID;
return TimestampTzGetDatum(result);
}
else
{
Timestamp result;
if (tm2timestamp(&tm, fsec, NULL, &result) != 0)
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range"))));
AdjustTimestampForTypmod(&result, *typmod);
*typid = TIMESTAMPOID;
return TimestampGetDatum(result);
}
}
else
{
if (flags & DCH_ZONED)
{
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("datetime format is zoned but not timed"))));
}
else
{
DateADT result;
/* Prevent overflow in Julian-day routines */
if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("date out of range: \"%s\"",
text_to_cstring(date_txt)))));
result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) -
POSTGRES_EPOCH_JDATE;
/* Now check for just-out-of-range dates */
if (!IS_VALID_DATE(result))
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("date out of range: \"%s\"",
text_to_cstring(date_txt)))));
*typid = DATEOID;
return DateADTGetDatum(result);
}
}
}
else if (flags & DCH_TIMED)
{
if (flags & DCH_ZONED)
{
TimeTzADT *result = palloc(sizeof(TimeTzADT));
if (tm.tm_zone)
{
int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
if (dterr)
RETURN_ERROR(DateTimeParseError(dterr, text_to_cstring(date_txt), "timetz"));
}
else
{
/*
* Time zone is present in format string, but not in input
* string. Assuming do_to_timestamp() triggers no error this
* should be possible only in non-strict case.
*/
Assert(!strict);
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("missing time zone in input string for type timetz"))));
}
if (tm2timetz(&tm, fsec, *tz, result) != 0)
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timetz out of range"))));
AdjustTimeForTypmod(&result->time, *typmod);
*typid = TIMETZOID;
return TimeTzADTPGetDatum(result);
}
else
{
TimeADT result;
if (tm2time(&tm, fsec, &result) != 0)
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("time out of range"))));
AdjustTimeForTypmod(&result, *typmod);
*typid = TIMEOID;
return TimeADTGetDatum(result);
}
}
else
{
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("datetime format is not dated and not timed"))));
}
on_error:
return (Datum) 0;
}
/*
* do_to_timestamp: shared code for to_timestamp and to_date
*
* Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm,
* fractional seconds, and fractional precision.
*
* 'collid' identifies the collation to use, if needed.
* 'std' specifies standard parsing mode.
* Bit mask of date/time/zone components found in 'fmt' is returned in 'flags',
* if that is not NULL.
* If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
*
* We parse 'fmt' into a list of FormatNodes, which is then passed to
* DCH_from_char to populate a TmFromChar with the parsed contents of
* 'date_txt'.
*
* The TmFromChar is then analysed and converted into the final results in
* struct 'tm', 'fsec', and 'fprec'.
*/
static void
do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
struct pg_tm *tm, fsec_t *fsec, int *fprec,
uint32 *flags, bool *have_error)
{
FormatNode *format = NULL;
TmFromChar tmfc;
int fmt_len;
char *date_str;
int fmask;
bool incache = false;
Assert(tm != NULL);
Assert(fsec != NULL);
date_str = text_to_cstring(date_txt);
ZERO_tmfc(&tmfc);
ZERO_tm(tm);
*fsec = 0;
if (fprec)
*fprec = 0;
if (flags)
*flags = 0;
fmask = 0; /* bit mask for ValidateDate() */
fmt_len = VARSIZE_ANY_EXHDR(fmt);
if (fmt_len)
{
2004-08-29 07:07:03 +02:00
char *fmt_str;
fmt_str = text_to_cstring(fmt);
if (fmt_len > DCH_CACHE_SIZE)
{
/*
* Allocate new memory if format picture is bigger than static
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
* cache and do not use cache (call parser always)
*/
format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
parse_format(format, fmt_str, DCH_keywords, DCH_suff, DCH_index,
DCH_FLAG | (std ? STD_FLAG : 0), NULL);
}
else
{
/*
* Use cache buffers
*/
DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, std);
2004-08-29 07:07:03 +02:00
incache = true;
format = ent->format;
}
#ifdef DEBUG_TO_FROM_CHAR
/* dump_node(format, fmt_len); */
/* dump_index(DCH_keywords, DCH_index); */
#endif
DCH_from_char(format, date_str, &tmfc, collid, std, have_error);
CHECK_ERROR;
pfree(fmt_str);
if (flags)
*flags = DCH_datetime_type(format, have_error);
if (!incache)
{
pfree(format);
format = NULL;
}
CHECK_ERROR;
}
DEBUG_TMFC(&tmfc);
/*
* Convert to_date/to_timestamp input fields to standard 'tm'
2001-03-22 05:01:46 +01:00
*/
if (tmfc.ssss)
{
int x = tmfc.ssss;
tm->tm_hour = x / SECS_PER_HOUR;
x %= SECS_PER_HOUR;
tm->tm_min = x / SECS_PER_MINUTE;
x %= SECS_PER_MINUTE;
tm->tm_sec = x;
}
if (tmfc.ss)
tm->tm_sec = tmfc.ss;
if (tmfc.mi)
tm->tm_min = tmfc.mi;
if (tmfc.hh)
tm->tm_hour = tmfc.hh;
2001-03-22 05:01:46 +01:00
if (tmfc.clock == CLOCK_12_HOUR)
2001-03-22 05:01:46 +01:00
{
if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2)
{
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("hour \"%d\" is invalid for the 12-hour clock",
tm->tm_hour),
errhint("Use the 24-hour clock, or give an hour between 1 and 12."))));
}
2001-03-22 05:01:46 +01:00
if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2)
tm->tm_hour += HOURS_PER_DAY / 2;
else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2)
tm->tm_hour = 0;
2001-03-22 05:01:46 +01:00
}
if (tmfc.year)
{
/*
2007-11-15 22:14:46 +01:00
* If CC and YY (or Y) are provided, use YY as 2 low-order digits for
* the year in the given century. Keep in mind that the 21st century
* AD runs from 2001-2100, not 2000-2099; 6th century BC runs from
* 600BC to 501BC.
*/
if (tmfc.cc && tmfc.yysz <= 2)
{
if (tmfc.bc)
tmfc.cc = -tmfc.cc;
tm->tm_year = tmfc.year % 100;
if (tm->tm_year)
{
if (tmfc.cc >= 0)
tm->tm_year += (tmfc.cc - 1) * 100;
else
tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1;
}
else
{
/* find century year for dates ending in "00" */
tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1);
}
}
else
{
/* If a 4-digit year is provided, we use that and ignore CC. */
tm->tm_year = tmfc.year;
if (tmfc.bc && tm->tm_year > 0)
tm->tm_year = -(tm->tm_year - 1);
}
fmask |= DTK_M(YEAR);
}
else if (tmfc.cc)
{
/* use first year of century */
if (tmfc.bc)
tmfc.cc = -tmfc.cc;
if (tmfc.cc >= 0)
2014-05-22 05:18:38 +02:00
/* +1 because 21st century started in 2001 */
tm->tm_year = (tmfc.cc - 1) * 100 + 1;
else
/* +1 because year == 599 is 600 BC */
tm->tm_year = tmfc.cc * 100 + 1;
fmask |= DTK_M(YEAR);
2001-03-22 05:01:46 +01:00
}
if (tmfc.j)
{
j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
fmask |= DTK_DATE_M;
}
2001-03-22 05:01:46 +01:00
if (tmfc.ww)
{
if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
{
/*
* If tmfc.d is not set, then the date is left at the beginning of
* the ISO week (Monday).
*/
if (tmfc.d)
isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
else
isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
fmask |= DTK_DATE_M;
}
else
tmfc.ddd = (tmfc.ww - 1) * 7 + 1;
}
if (tmfc.w)
tmfc.dd = (tmfc.w - 1) * 7 + 1;
if (tmfc.dd)
{
tm->tm_mday = tmfc.dd;
fmask |= DTK_M(DAY);
}
if (tmfc.mm)
{
tm->tm_mon = tmfc.mm;
fmask |= DTK_M(MONTH);
}
if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1))
{
/*
* The month and day field have not been set, so we use the
* day-of-year field to populate them. Depending on the date mode,
* this field may be interpreted as a Gregorian day-of-year, or an ISO
* week date day-of-year.
*/
if (!tm->tm_year && !tmfc.bc)
{
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("cannot calculate day of year without year information"))));
}
if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
{
2007-11-15 22:14:46 +01:00
int j0; /* zeroth day of the ISO year, in Julian */
j0 = isoweek2j(tm->tm_year, 1) - 1;
2001-03-22 05:01:46 +01:00
j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
fmask |= DTK_DATE_M;
}
else
{
const int *y;
int i;
2001-03-22 05:01:46 +01:00
static const int ysum[2][13] = {
{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
{0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}};
2001-03-22 05:01:46 +01:00
y = ysum[isleap(tm->tm_year)];
2001-03-22 05:01:46 +01:00
for (i = 1; i <= MONTHS_PER_YEAR; i++)
{
if (tmfc.ddd <= y[i])
break;
}
if (tm->tm_mon <= 1)
tm->tm_mon = i;
if (tm->tm_mday <= 1)
tm->tm_mday = tmfc.ddd - y[i - 1];
fmask |= DTK_M(MONTH) | DTK_M(DAY);
}
}
2001-03-22 05:01:46 +01:00
Support alternate storage scheme of 64-bit integer for date/time types. Use "--enable-integer-datetimes" in configuration to use this rather than the original float8 storage. I would recommend the integer-based storage for any platform on which it is available. We perhaps should make this the default for the production release. Change timezone(timestamptz) results to return timestamp rather than a character string. Formerly, we didn't have a way to represent timestamps with an explicit time zone other than freezing the info into a string. Now, we can reasonably omit the explicit time zone from the result and return a timestamp with values appropriate for the specified time zone. Much cleaner, and if you need the time zone in the result you can put it into a character string pretty easily anyway. Allow fractional seconds in date/time types even for dates prior to 1BC. Limit timestamp data types to 6 decimal places of precision. Just right for a micro-second storage of int8 date/time types, and reduces the number of places ad-hoc rounding was occuring for the float8-based types. Use lookup tables for precision/rounding calculations for timestamp and interval types. Formerly used pow() to calculate the desired value but with a more limited range there is no reason to not type in a lookup table. Should be *much* better performance, though formerly there were some optimizations to help minimize the number of times pow() was called. Define a HAVE_INT64_TIMESTAMP variable. Based on the configure option "--enable-integer-datetimes" and the existing internal INT64_IS_BUSTED. Add explicit date/interval operators and functions for addition and subtraction. Formerly relied on implicit type promotion from date to timestamp with time zone. Change timezone conversion functions for the timetz type from "timetz()" to "timezone()". This is consistant with other time zone coersion functions for other types. Bump the catalog version to 200204201. Fix up regression tests to reflect changes in fractional seconds representation for date/times in BC eras. All regression tests pass on my Linux box.
2002-04-21 21:52:18 +02:00
if (tmfc.ms)
*fsec += tmfc.ms * 1000;
Support alternate storage scheme of 64-bit integer for date/time types. Use "--enable-integer-datetimes" in configuration to use this rather than the original float8 storage. I would recommend the integer-based storage for any platform on which it is available. We perhaps should make this the default for the production release. Change timezone(timestamptz) results to return timestamp rather than a character string. Formerly, we didn't have a way to represent timestamps with an explicit time zone other than freezing the info into a string. Now, we can reasonably omit the explicit time zone from the result and return a timestamp with values appropriate for the specified time zone. Much cleaner, and if you need the time zone in the result you can put it into a character string pretty easily anyway. Allow fractional seconds in date/time types even for dates prior to 1BC. Limit timestamp data types to 6 decimal places of precision. Just right for a micro-second storage of int8 date/time types, and reduces the number of places ad-hoc rounding was occuring for the float8-based types. Use lookup tables for precision/rounding calculations for timestamp and interval types. Formerly used pow() to calculate the desired value but with a more limited range there is no reason to not type in a lookup table. Should be *much* better performance, though formerly there were some optimizations to help minimize the number of times pow() was called. Define a HAVE_INT64_TIMESTAMP variable. Based on the configure option "--enable-integer-datetimes" and the existing internal INT64_IS_BUSTED. Add explicit date/interval operators and functions for addition and subtraction. Formerly relied on implicit type promotion from date to timestamp with time zone. Change timezone conversion functions for the timetz type from "timetz()" to "timezone()". This is consistant with other time zone coersion functions for other types. Bump the catalog version to 200204201. Fix up regression tests to reflect changes in fractional seconds representation for date/times in BC eras. All regression tests pass on my Linux box.
2002-04-21 21:52:18 +02:00
if (tmfc.us)
*fsec += tmfc.us;
if (fprec)
*fprec = tmfc.ff; /* fractional precision, if specified */
/* Range-check date fields according to bit mask computed above */
if (fmask != 0)
{
/* We already dealt with AD/BC, so pass isjulian = true */
int dterr = ValidateDate(fmask, true, false, false, tm);
if (dterr != 0)
{
/*
* Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate
* said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an
* irrelevant hint about datestyle.
*/
RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"));
}
}
/* Range-check time fields too */
if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY ||
tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR ||
tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE ||
*fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC)
{
RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"));
}
/* Save parsed time-zone into tm->tm_zone if it was specified */
if (tmfc.tzsign)
{
char *tz;
if (tmfc.tzh < 0 || tmfc.tzh > MAX_TZDISP_HOUR ||
tmfc.tzm < 0 || tmfc.tzm >= MINS_PER_HOUR)
{
RETURN_ERROR(DateTimeParseError(DTERR_TZDISP_OVERFLOW, date_str, "timestamp"));
}
tz = psprintf("%c%02d:%02d",
tmfc.tzsign > 0 ? '+' : '-', tmfc.tzh, tmfc.tzm);
tm->tm_zone = tz;
}
DEBUG_TM(tm);
on_error:
if (format && !incache)
pfree(format);
pfree(date_str);
}
/**********************************************************************
* the NUMBER version part
*********************************************************************/
static char *
fill_str(char *str, int c, int max)
{
memset(str, c, max);
*(str + max) = '\0';
return str;
}
#define zeroize_NUM(_n) \
do { \
(_n)->flag = 0; \
(_n)->lsign = 0; \
(_n)->pre = 0; \
(_n)->post = 0; \
2001-03-22 05:01:46 +01:00
(_n)->pre_lsign_num = 0; \
(_n)->need_locale = 0; \
(_n)->multi = 0; \
(_n)->zero_start = 0; \
(_n)->zero_end = 0; \
} while(0)
/* This works the same as DCH_prevent_counter_overflow */
static inline void
NUM_prevent_counter_overflow(void)
{
if (NUMCounter >= (INT_MAX - 1))
{
for (int i = 0; i < n_NUMCache; i++)
NUMCache[i]->age >>= 1;
NUMCounter >>= 1;
}
}
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
/* select a NUMCacheEntry to hold the given format picture */
static NUMCacheEntry *
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
NUM_cache_getnew(const char *str)
{
NUMCacheEntry *ent;
/* Ensure we can advance NUMCounter below */
NUM_prevent_counter_overflow();
/*
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
* If cache is full, remove oldest entry (or recycle first not-valid one)
*/
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
if (n_NUMCache >= NUM_CACHE_ENTRIES)
{
NUMCacheEntry *old = NUMCache[0];
#ifdef DEBUG_TO_FROM_CHAR
elog(DEBUG_elog_output, "Cache is full (%d)", n_NUMCache);
#endif
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
if (old->valid)
{
for (int i = 1; i < NUM_CACHE_ENTRIES; i++)
{
ent = NUMCache[i];
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
if (!ent->valid)
{
old = ent;
break;
}
if (ent->age < old->age)
old = ent;
}
}
#ifdef DEBUG_TO_FROM_CHAR
elog(DEBUG_elog_output, "OLD: \"%s\" AGE: %d", old->str, old->age);
#endif
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
old->valid = false;
StrNCpy(old->str, str, NUM_CACHE_SIZE + 1);
old->age = (++NUMCounter);
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
/* caller is expected to fill format and Num, then set valid */
return old;
}
else
{
#ifdef DEBUG_TO_FROM_CHAR
elog(DEBUG_elog_output, "NEW (%d)", n_NUMCache);
#endif
Assert(NUMCache[n_NUMCache] == NULL);
NUMCache[n_NUMCache] = ent = (NUMCacheEntry *)
MemoryContextAllocZero(TopMemoryContext, sizeof(NUMCacheEntry));
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
ent->valid = false;
StrNCpy(ent->str, str, NUM_CACHE_SIZE + 1);
ent->age = (++NUMCounter);
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
/* caller is expected to fill format and Num, then set valid */
++n_NUMCache;
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
return ent;
}
}
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
/* look for an existing NUMCacheEntry matching the given format picture */
static NUMCacheEntry *
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
NUM_cache_search(const char *str)
{
/* Ensure we can advance NUMCounter below */
NUM_prevent_counter_overflow();
for (int i = 0; i < n_NUMCache; i++)
{
NUMCacheEntry *ent = NUMCache[i];
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
if (ent->valid && strcmp(ent->str, str) == 0)
{
ent->age = (++NUMCounter);
return ent;
}
}
return NULL;
}
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
/* Find or create a NUMCacheEntry for the given format picture */
static NUMCacheEntry *
NUM_cache_fetch(const char *str)
{
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
NUMCacheEntry *ent;
if ((ent = NUM_cache_search(str)) == NULL)
{
/*
* Not in the cache, must run parser and save a new format-picture to
* the cache. Do not mark the cache entry valid until parsing
* succeeds.
*/
ent = NUM_cache_getnew(str);
zeroize_NUM(&ent->Num);
parse_format(ent->format, str, NUM_keywords,
NULL, NUM_index, NUM_FLAG, &ent->Num);
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
ent->valid = true;
}
return ent;
}
/* ----------
* Cache routine for NUM to_char version
* ----------
*/
static FormatNode *
NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree)
{
FormatNode *format = NULL;
char *str;
str = text_to_cstring(pars_str);
if (len > NUM_CACHE_SIZE)
{
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
/*
* Allocate new memory if format picture is bigger than static cache
* and do not use cache (call parser always)
*/
format = (FormatNode *) palloc((len + 1) * sizeof(FormatNode));
*shouldFree = true;
zeroize_NUM(Num);
parse_format(format, str, NUM_keywords,
NULL, NUM_index, NUM_FLAG, Num);
}
else
{
/*
* Use cache buffers
*/
Rationalize format-picture caching logic in formatting.c. Add a validity flag to DCHCacheEntry and NUMCacheEntry entries, and do not set it true until after we've parsed the supplied format string. This allows dealing with possible errors while parsing the format without the baroque hack that was there before (which only covered errors within NUMDesc_prepare, anyway). We can get rid of the PG_TRY in NUMDesc_prepare, as well as last_NUMCacheEntry and NUM_cache_remove. (Essentially, this reverts commit ff783fbae in favor of a less fragile solution; the problems with that approach are well illustrated by later hacking such as 55f927a46.) In passing, define the size of these caches as DCH_CACHE_ENTRIES not DCH_CACHE_FIELDS + 1 (whoever thought that was a good definition?) and likewise for the NUM cache. Also const-ify format string parameters where convenient, and merge duplicated cache lookup logic. This is primarily driven by a proposed patch from Artur Zakirov, which introduced some ereport's into format string parsing for the datetime case. He proposed preventing the creation of invalid cache entries by parsing the format string first into a local-variable array, and then copying that to a cache entry. That seemed a bit ugly to me, and anyway randomly different from the way the identical problem had been solved for the numeric case. Let's make the two sets of code more similar not less so. I'm not sure whether we'll adopt the new error conditions Artur proposes, but this patch seems like good code cleanup and future-proofing in any case. The existing code is critically (and undocumented-ly) dependent on no elog being thrown out of several nontrivial functions, which is trouble waiting to happen, though it doesn't seem to be actively broken today. Discussion: <b2a39359-3282-b402-f4a3-057aae500ee7@postgrespro.ru>
2016-09-28 23:08:40 +02:00
NUMCacheEntry *ent = NUM_cache_fetch(str);
*shouldFree = false;
format = ent->format;
/*
* Copy cache to used struct
*/
Num->flag = ent->Num.flag;
Num->lsign = ent->Num.lsign;
Num->pre = ent->Num.pre;
Num->post = ent->Num.post;
Num->pre_lsign_num = ent->Num.pre_lsign_num;
Num->need_locale = ent->Num.need_locale;
Num->multi = ent->Num.multi;
Num->zero_start = ent->Num.zero_start;
Num->zero_end = ent->Num.zero_end;
}
#ifdef DEBUG_TO_FROM_CHAR
2000-04-07 21:17:51 +02:00
/* dump_node(format, len); */
dump_index(NUM_keywords, NUM_index);
#endif
2000-04-07 21:17:51 +02:00
pfree(str);
return format;
}
static char *
int_to_roman(int number)
{
2001-03-22 05:01:46 +01:00
int len = 0,
num = 0;
2001-03-22 05:01:46 +01:00
char *p = NULL,
*result,
numstr[12];
result = (char *) palloc(16);
*result = '\0';
if (number > 3999 || number < 1)
{
fill_str(result, '#', 15);
return result;
}
2001-02-27 09:13:31 +01:00
len = snprintf(numstr, sizeof(numstr), "%d", number);
for (p = numstr; *p != '\0'; p++, --len)
{
num = *p - 49; /* 48 ascii + 1 */
if (num < 0)
continue;
if (len > 3)
{
while (num-- != -1)
strcat(result, "M");
}
else
{
if (len == 3)
strcat(result, rm100[num]);
else if (len == 2)
strcat(result, rm10[num]);
else if (len == 1)
strcat(result, rm1[num]);
}
}
return result;
}
/* ----------
* Locale
* ----------
*/
static void
NUM_prepare_locale(NUMProc *Np)
{
if (Np->Num->need_locale)
{
struct lconv *lconv;
/*
* Get locales
*/
lconv = PGLC_localeconv();
/*
* Positive / Negative number sign
*/
if (lconv->negative_sign && *lconv->negative_sign)
Np->L_negative_sign = lconv->negative_sign;
else
Np->L_negative_sign = "-";
if (lconv->positive_sign && *lconv->positive_sign)
Np->L_positive_sign = lconv->positive_sign;
else
Np->L_positive_sign = "+";
/*
* Number decimal point
*/
if (lconv->decimal_point && *lconv->decimal_point)
Np->decimal = lconv->decimal_point;
else
Np->decimal = ".";
if (!IS_LDECIMAL(Np->Num))
Np->decimal = ".";
/*
* Number thousands separator
2007-11-15 22:14:46 +01:00
*
* Some locales (e.g. broken glibc pt_BR), have a comma for decimal,
* but "" for thousands_sep, so we set the thousands_sep too.
* http://archives.postgresql.org/pgsql-hackers/2007-11/msg00772.php
*/
if (lconv->thousands_sep && *lconv->thousands_sep)
Np->L_thousands_sep = lconv->thousands_sep;
/* Make sure thousands separator doesn't match decimal point symbol. */
else if (strcmp(Np->decimal, ",") !=0)
Np->L_thousands_sep = ",";
else
Np->L_thousands_sep = ".";
/*
* Currency symbol
*/
if (lconv->currency_symbol && *lconv->currency_symbol)
Np->L_currency_symbol = lconv->currency_symbol;
else
Np->L_currency_symbol = " ";
}
else
{
/*
* Default values
*/
Np->L_negative_sign = "-";
Np->L_positive_sign = "+";
Np->decimal = ".";
2007-11-15 22:14:46 +01:00
Np->L_thousands_sep = ",";
Np->L_currency_symbol = " ";
}
}
/* ----------
* Return pointer of last relevant number after decimal point
* 12.0500 --> last relevant is '5'
* 12.0000 --> last relevant is '.'
* If there is no decimal point, return NULL (which will result in same
* behavior as if FM hadn't been specified).
* ----------
*/
static char *
get_last_relevant_decnum(char *num)
{
char *result,
2001-03-22 05:01:46 +01:00
*p = strchr(num, '.');
#ifdef DEBUG_TO_FROM_CHAR
elog(DEBUG_elog_output, "get_last_relevant_decnum()");
#endif
if (!p)
return NULL;
result = p;
while (*(++p))
{
if (*p != '0')
result = p;
}
return result;
}
/*
* These macros are used in NUM_processor() and its subsidiary routines.
* OVERLOAD_TEST: true if we've reached end of input string
* AMOUNT_TEST(s): true if at least s bytes remain in string
*/
#define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len)
#define AMOUNT_TEST(s) (Np->inout_p <= Np->inout + (input_len - (s)))
/* ----------
* Number extraction for TO_NUMBER()
* ----------
*/
static void
NUM_numpart_from_char(NUMProc *Np, int id, int input_len)
{
bool isread = false;
2005-10-15 04:49:52 +02:00
#ifdef DEBUG_TO_FROM_CHAR
elog(DEBUG_elog_output, " --- scan start --- id=%s",
2005-10-15 04:49:52 +02:00
(id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???");
#endif
if (OVERLOAD_TEST)
return;
if (*Np->inout_p == ' ')
Np->inout_p++;
if (OVERLOAD_TEST)
return;
/*
* read sign before number
*/
if (*Np->number == ' ' && (id == NUM_0 || id == NUM_9) &&
2005-10-15 04:49:52 +02:00
(Np->read_pre + Np->read_post) == 0)
{
#ifdef DEBUG_TO_FROM_CHAR
2005-10-15 04:49:52 +02:00
elog(DEBUG_elog_output, "Try read sign (%c), locale positive: %s, negative: %s",
*Np->inout_p, Np->L_positive_sign, Np->L_negative_sign);
#endif
/*
* locale sign
*/
if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_PRE)
{
2005-10-15 04:49:52 +02:00
int x = 0;
#ifdef DEBUG_TO_FROM_CHAR
elog(DEBUG_elog_output, "Try read locale pre-sign (%c)", *Np->inout_p);
#endif
2005-10-15 04:49:52 +02:00
if ((x = strlen(Np->L_negative_sign)) &&
AMOUNT_TEST(x) &&
2005-10-15 04:49:52 +02:00
strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
{
Np->inout_p += x;
*Np->number = '-';
}
2005-10-15 04:49:52 +02:00
else if ((x = strlen(Np->L_positive_sign)) &&
AMOUNT_TEST(x) &&
strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
{
Np->inout_p += x;
*Np->number = '+';
}
}
else
{
#ifdef DEBUG_TO_FROM_CHAR
elog(DEBUG_elog_output, "Try read simple sign (%c)", *Np->inout_p);
#endif
2005-10-15 04:49:52 +02:00
/*
* simple + - < >
*/
if (*Np->inout_p == '-' || (IS_BRACKET(Np->Num) &&
*Np->inout_p == '<'))
{
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
*Np->number = '-'; /* set - */
Np->inout_p++;
}
else if (*Np->inout_p == '+')
{
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
*Np->number = '+'; /* set + */
Np->inout_p++;
}
}
}
if (OVERLOAD_TEST)
return;
2005-10-15 04:49:52 +02:00
#ifdef DEBUG_TO_FROM_CHAR
elog(DEBUG_elog_output, "Scan for numbers (%c), current number: '%s'", *Np->inout_p, Np->number);
#endif
2005-10-15 04:49:52 +02:00
/*
* read digit or decimal point
*/
if (isdigit((unsigned char) *Np->inout_p))
{
if (Np->read_dec && Np->read_post == Np->Num->post)
return;
*Np->number_p = *Np->inout_p;
Np->number_p++;
if (Np->read_dec)
Np->read_post++;
else
Np->read_pre++;
isread = true;
2005-10-15 04:49:52 +02:00
#ifdef DEBUG_TO_FROM_CHAR
elog(DEBUG_elog_output, "Read digit (%c)", *Np->inout_p);
#endif
}
else if (IS_DECIMAL(Np->Num) && Np->read_dec == false)
{
/*
* We need not test IS_LDECIMAL(Np->Num) explicitly here, because
* Np->decimal is always just "." if we don't have a D format token.
* So we just unconditionally match to Np->decimal.
*/
int x = strlen(Np->decimal);
#ifdef DEBUG_TO_FROM_CHAR
elog(DEBUG_elog_output, "Try read decimal point (%c)",
*Np->inout_p);
#endif
if (x && AMOUNT_TEST(x) && strncmp(Np->inout_p, Np->decimal, x) == 0)
{
Np->inout_p += x - 1;
*Np->number_p = '.';
Np->number_p++;
Np->read_dec = true;
isread = true;
}
}
if (OVERLOAD_TEST)
return;
2005-10-15 04:49:52 +02:00
/*
* Read sign behind "last" number
*
2005-10-15 04:49:52 +02:00
* We need sign detection because determine exact position of post-sign is
* difficult:
*
* FM9999.9999999S -> 123.001- 9.9S -> .5- FM9.999999MI ->
* 5.01-
*/
if (*Np->number == ' ' && Np->read_pre + Np->read_post > 0)
{
/*
2005-10-15 04:49:52 +02:00
* locale sign (NUM_S) is always anchored behind a last number, if: -
* locale sign expected - last read char was NUM_0/9 or NUM_DEC - and
* next char is not digit
*/
if (IS_LSIGN(Np->Num) && isread &&
(Np->inout_p + 1) < Np->inout + input_len &&
2005-10-15 04:49:52 +02:00
!isdigit((unsigned char) *(Np->inout_p + 1)))
{
2005-10-15 04:49:52 +02:00
int x;
char *tmp = Np->inout_p++;
#ifdef DEBUG_TO_FROM_CHAR
elog(DEBUG_elog_output, "Try read locale post-sign (%c)", *Np->inout_p);
#endif
2005-10-15 04:49:52 +02:00
if ((x = strlen(Np->L_negative_sign)) &&
AMOUNT_TEST(x) &&
2005-10-15 04:49:52 +02:00
strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
{
2005-10-15 04:49:52 +02:00
Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
*Np->number = '-';
}
2005-10-15 04:49:52 +02:00
else if ((x = strlen(Np->L_positive_sign)) &&
AMOUNT_TEST(x) &&
strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
{
2005-10-15 04:49:52 +02:00
Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
*Np->number = '+';
}
if (*Np->number == ' ')
/* no sign read */
Np->inout_p = tmp;
}
2005-10-15 04:49:52 +02:00
/*
* try read non-locale sign, it's happen only if format is not exact
* and we cannot determine sign position of MI/PL/SG, an example:
*
2005-10-15 04:49:52 +02:00
* FM9.999999MI -> 5.01-
*
* if (.... && IS_LSIGN(Np->Num)==false) prevents read wrong formats
* like to_number('1 -', '9S') where sign is not anchored to last
* number.
*/
else if (isread == false && IS_LSIGN(Np->Num) == false &&
(IS_PLUS(Np->Num) || IS_MINUS(Np->Num)))
{
#ifdef DEBUG_TO_FROM_CHAR
elog(DEBUG_elog_output, "Try read simple post-sign (%c)", *Np->inout_p);
#endif
2005-10-15 04:49:52 +02:00
/*
* simple + -
*/
if (*Np->inout_p == '-' || *Np->inout_p == '+')
/* NUM_processor() do inout_p++ */
*Np->number = *Np->inout_p;
}
}
}
2003-03-27 17:35:31 +01:00
#define IS_PREDEC_SPACE(_n) \
(IS_ZERO((_n)->Num)==false && \
(_n)->number == (_n)->number_p && \
*(_n)->number == '0' && \
(_n)->Num->post != 0)
2003-03-27 17:35:31 +01:00
/* ----------
* Add digit or sign to number-string
* ----------
*/
static void
NUM_numpart_to_char(NUMProc *Np, int id)
{
2003-08-04 02:43:34 +02:00
int end;
if (IS_ROMAN(Np->Num))
return;
/* Note: in this elog() output not set '\0' in 'inout' */
#ifdef DEBUG_TO_FROM_CHAR
/*
* Np->num_curr is number of current item in format-picture, it is not
* current position in inout!
*/
elog(DEBUG_elog_output,
"SIGN_WROTE: %d, CURRENT: %d, NUMBER_P: \"%s\", INOUT: \"%s\"",
Np->sign_wrote,
Np->num_curr,
Np->number_p,
Np->inout);
#endif
Np->num_in = false;
/*
2005-10-15 04:49:52 +02:00
* Write sign if real number will write to output Note: IS_PREDEC_SPACE()
* handle "9.9" --> " .1"
*/
if (Np->sign_wrote == false &&
(Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start == Np->num_curr)) &&
(IS_PREDEC_SPACE(Np) == false || (Np->last_relevant && *Np->last_relevant == '.')))
2003-08-04 02:43:34 +02:00
{
if (IS_LSIGN(Np->Num))
{
if (Np->Num->lsign == NUM_LSIGN_PRE)
2003-03-27 17:35:31 +01:00
{
if (Np->sign == '-')
strcpy(Np->inout_p, Np->L_negative_sign);
else
strcpy(Np->inout_p, Np->L_positive_sign);
Np->inout_p += strlen(Np->inout_p);
Np->sign_wrote = true;
2003-03-27 17:35:31 +01:00
}
}
else if (IS_BRACKET(Np->Num))
{
2003-03-27 17:35:31 +01:00
*Np->inout_p = Np->sign == '+' ? ' ' : '<';
++Np->inout_p;
Np->sign_wrote = true;
}
else if (Np->sign == '+')
{
if (!IS_FILLMODE(Np->Num))
2003-03-27 17:35:31 +01:00
{
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
*Np->inout_p = ' '; /* Write + */
2003-03-27 17:35:31 +01:00
++Np->inout_p;
}
Np->sign_wrote = true;
}
else if (Np->sign == '-')
{ /* Write - */
*Np->inout_p = '-';
++Np->inout_p;
Np->sign_wrote = true;
}
}
2003-08-04 02:43:34 +02:00
/*
* digits / FM / Zero / Dec. point
*/
2003-03-27 17:35:31 +01:00
if (id == NUM_9 || id == NUM_0 || id == NUM_D || id == NUM_DEC)
{
if (Np->num_curr < Np->out_pre_spaces &&
(Np->Num->zero_start > Np->num_curr || !IS_ZERO(Np->Num)))
{
/*
* Write blank space
*/
if (!IS_FILLMODE(Np->Num))
{
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
*Np->inout_p = ' '; /* Write ' ' */
++Np->inout_p;
}
}
else if (IS_ZERO(Np->Num) &&
Np->num_curr < Np->out_pre_spaces &&
Np->Num->zero_start <= Np->num_curr)
{
/*
* Write ZERO
*/
*Np->inout_p = '0'; /* Write '0' */
++Np->inout_p;
Np->num_in = true;
}
else
{
/*
* Write Decimal point
*/
if (*Np->number_p == '.')
{
if (!Np->last_relevant || *Np->last_relevant != '.')
{
strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */
Np->inout_p += strlen(Np->inout_p);
}
2003-08-04 02:43:34 +02:00
/*
* Ora 'n' -- FM9.9 --> 'n.'
*/
else if (IS_FILLMODE(Np->Num) &&
Np->last_relevant && *Np->last_relevant == '.')
{
strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */
Np->inout_p += strlen(Np->inout_p);
}
}
else
{
/*
* Write Digits
*/
if (Np->last_relevant && Np->number_p > Np->last_relevant &&
id != NUM_0)
;
2003-08-04 02:43:34 +02:00
/*
2003-03-27 17:35:31 +01:00
* '0.1' -- 9.9 --> ' .1'
*/
2003-03-27 17:35:31 +01:00
else if (IS_PREDEC_SPACE(Np))
{
if (!IS_FILLMODE(Np->Num))
{
*Np->inout_p = ' ';
++Np->inout_p;
}
2003-08-04 02:43:34 +02:00
/*
2003-03-27 17:35:31 +01:00
* '0' -- FM9.9 --> '0.'
*/
else if (Np->last_relevant && *Np->last_relevant == '.')
{
*Np->inout_p = '0';
++Np->inout_p;
}
}
else
{
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
*Np->inout_p = *Np->number_p; /* Write DIGIT */
++Np->inout_p;
Np->num_in = true;
}
}
/* do no exceed string length */
if (*Np->number_p)
++Np->number_p;
}
2003-03-27 17:35:31 +01:00
end = Np->num_count + (Np->out_pre_spaces ? 1 : 0) + (IS_DECIMAL(Np->Num) ? 1 : 0);
2003-08-04 02:43:34 +02:00
if (Np->last_relevant && Np->last_relevant == Np->number_p)
2003-03-27 17:35:31 +01:00
end = Np->num_curr;
2003-08-04 02:43:34 +02:00
if (Np->num_curr + 1 == end)
2003-03-27 17:35:31 +01:00
{
if (Np->sign_wrote == true && IS_BRACKET(Np->Num))
2003-03-27 17:35:31 +01:00
{
*Np->inout_p = Np->sign == '+' ? ' ' : '>';
++Np->inout_p;
}
else if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_POST)
2003-03-27 17:35:31 +01:00
{
if (Np->sign == '-')
strcpy(Np->inout_p, Np->L_negative_sign);
else
strcpy(Np->inout_p, Np->L_positive_sign);
Np->inout_p += strlen(Np->inout_p);
}
}
}
++Np->num_curr;
}
/*
* Skip over "n" input characters, but only if they aren't numeric data
*/
static void
NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len)
{
while (n-- > 0)
{
if (OVERLOAD_TEST)
break; /* end of input */
if (strchr("0123456789.,+-", *Np->inout_p) != NULL)
break; /* it's a data character */
Np->inout_p += pg_mblen(Np->inout_p);
}
}
static char *
NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
char *number, int input_len, int to_char_out_pre_spaces,
int sign, bool is_to_char, Oid collid)
{
FormatNode *n;
2001-03-22 05:01:46 +01:00
NUMProc _Np,
*Np = &_Np;
const char *pattern;
int pattern_len;
MemSet(Np, 0, sizeof(NUMProc));
Np->Num = Num;
Np->is_to_char = is_to_char;
Np->number = number;
Np->inout = inout;
Np->last_relevant = NULL;
Np->read_post = 0;
2005-10-15 04:49:52 +02:00
Np->read_pre = 0;
Np->read_dec = false;
if (Np->Num->zero_start)
--Np->Num->zero_start;
2003-08-04 02:43:34 +02:00
if (IS_EEEE(Np->Num))
{
if (!Np->is_to_char)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("\"EEEE\" not supported for input")));
return strcpy(inout, number);
}
/*
* Roman correction
*/
if (IS_ROMAN(Np->Num))
{
if (!Np->is_to_char)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("\"RN\" not supported for input")));
Np->Num->lsign = Np->Num->pre_lsign_num = Np->Num->post =
Np->Num->pre = Np->out_pre_spaces = Np->sign = 0;
if (IS_FILLMODE(Np->Num))
{
Np->Num->flag = 0;
Np->Num->flag |= NUM_F_FILLMODE;
}
else
Np->Num->flag = 0;
Np->Num->flag |= NUM_F_ROMAN;
}
/*
* Sign
*/
if (is_to_char)
{
Np->sign = sign;
2003-08-04 02:43:34 +02:00
2003-03-27 17:35:31 +01:00
/* MI/PL/SG - write sign itself and not in number */
if (IS_PLUS(Np->Num) || IS_MINUS(Np->Num))
{
if (IS_PLUS(Np->Num) && IS_MINUS(Np->Num) == false)
Np->sign_wrote = false; /* need sign */
else
Np->sign_wrote = true; /* needn't sign */
}
else
2003-03-27 17:35:31 +01:00
{
if (Np->sign != '-')
{
if (IS_BRACKET(Np->Num) && IS_FILLMODE(Np->Num))
Np->Num->flag &= ~NUM_F_BRACKET;
if (IS_MINUS(Np->Num))
Np->Num->flag &= ~NUM_F_MINUS;
2003-03-27 17:35:31 +01:00
}
else if (Np->sign != '+' && IS_PLUS(Np->Num))
Np->Num->flag &= ~NUM_F_PLUS;
if (Np->sign == '+' && IS_FILLMODE(Np->Num) && IS_LSIGN(Np->Num) == false)
Np->sign_wrote = true; /* needn't sign */
2003-03-27 17:35:31 +01:00
else
Np->sign_wrote = false; /* need sign */
if (Np->Num->lsign == NUM_LSIGN_PRE && Np->Num->pre == Np->Num->pre_lsign_num)
Np->Num->lsign = NUM_LSIGN_POST;
2003-03-27 17:35:31 +01:00
}
}
else
Np->sign = false;
/*
* Count
*/
Np->num_count = Np->Num->post + Np->Num->pre - 1;
if (is_to_char)
{
Np->out_pre_spaces = to_char_out_pre_spaces;
if (IS_FILLMODE(Np->Num) && IS_DECIMAL(Np->Num))
{
Np->last_relevant = get_last_relevant_decnum(Np->number);
/*
* If any '0' specifiers are present, make sure we don't strip
* those digits.
*/
if (Np->last_relevant && Np->Num->zero_end > Np->out_pre_spaces)
{
char *last_zero;
last_zero = Np->number + (Np->Num->zero_end - Np->out_pre_spaces);
if (Np->last_relevant < last_zero)
Np->last_relevant = last_zero;
}
}
if (Np->sign_wrote == false && Np->out_pre_spaces == 0)
++Np->num_count;
}
else
{
Np->out_pre_spaces = 0;
*Np->number = ' '; /* sign space */
*(Np->number + 1) = '\0';
}
Np->num_in = 0;
Np->num_curr = 0;
#ifdef DEBUG_TO_FROM_CHAR
elog(DEBUG_elog_output,
"\n\tSIGN: '%c'\n\tNUM: '%s'\n\tPRE: %d\n\tPOST: %d\n\tNUM_COUNT: %d\n\tNUM_PRE: %d\n\tSIGN_WROTE: %s\n\tZERO: %s\n\tZERO_START: %d\n\tZERO_END: %d\n\tLAST_RELEVANT: %s\n\tBRACKET: %s\n\tPLUS: %s\n\tMINUS: %s\n\tFILLMODE: %s\n\tROMAN: %s\n\tEEEE: %s",
2003-03-27 17:35:31 +01:00
Np->sign,
Np->number,
Np->Num->pre,
Np->Num->post,
Np->num_count,
Np->out_pre_spaces,
Np->sign_wrote ? "Yes" : "No",
IS_ZERO(Np->Num) ? "Yes" : "No",
Np->Num->zero_start,
Np->Num->zero_end,
2003-03-27 17:35:31 +01:00
Np->last_relevant ? Np->last_relevant : "<not set>",
IS_BRACKET(Np->Num) ? "Yes" : "No",
IS_PLUS(Np->Num) ? "Yes" : "No",
IS_MINUS(Np->Num) ? "Yes" : "No",
IS_FILLMODE(Np->Num) ? "Yes" : "No",
IS_ROMAN(Np->Num) ? "Yes" : "No",
IS_EEEE(Np->Num) ? "Yes" : "No"
2003-08-04 02:43:34 +02:00
);
#endif
/*
* Locale
*/
NUM_prepare_locale(Np);
/*
* Processor direct cycle
*/
if (Np->is_to_char)
Np->number_p = Np->number;
else
Np->number_p = Np->number + 1; /* first char is space for sign */
for (n = node, Np->inout_p = Np->inout; n->type != NODE_TYPE_END; n++)
{
if (!Np->is_to_char)
{
/*
* Check at least one byte remains to be scanned. (In actions
* below, must use AMOUNT_TEST if we want to read more bytes than
* that.)
*/
if (OVERLOAD_TEST)
break;
}
/*
* Format pictures actions
*/
if (n->type == NODE_TYPE_ACTION)
{
/*
* Create/read digit/zero/blank/sign/special-case
*
2005-10-15 04:49:52 +02:00
* 'NUM_S' note: The locale sign is anchored to number and we
* read/write it when we work with first or last number
* (NUM_0/NUM_9). This is why NUM_S is missing in switch().
*
* Notice the "Np->inout_p++" at the bottom of the loop. This is
* why most of the actions advance inout_p one less than you might
* expect. In cases where we don't want that increment to happen,
* a switch case ends with "continue" not "break".
*/
switch (n->key->id)
{
case NUM_9:
case NUM_0:
case NUM_DEC:
case NUM_D:
if (Np->is_to_char)
{
NUM_numpart_to_char(Np, n->key->id);
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
continue; /* for() */
}
else
{
NUM_numpart_from_char(Np, n->key->id, input_len);
break; /* switch() case: */
}
case NUM_COMMA:
if (Np->is_to_char)
{
if (!Np->num_in)
{
if (IS_FILLMODE(Np->Num))
continue;
else
*Np->inout_p = ' ';
}
else
*Np->inout_p = ',';
}
else
{
if (!Np->num_in)
{
if (IS_FILLMODE(Np->Num))
continue;
}
if (*Np->inout_p != ',')
continue;
}
break;
case NUM_G:
pattern = Np->L_thousands_sep;
pattern_len = strlen(pattern);
if (Np->is_to_char)
{
if (!Np->num_in)
{
if (IS_FILLMODE(Np->Num))
continue;
else
{
/* just in case there are MB chars */
pattern_len = pg_mbstrlen(pattern);
memset(Np->inout_p, ' ', pattern_len);
Np->inout_p += pattern_len - 1;
}
}
else
{
strcpy(Np->inout_p, pattern);
Np->inout_p += pattern_len - 1;
}
}
else
{
if (!Np->num_in)
{
if (IS_FILLMODE(Np->Num))
continue;
}
/*
* Because L_thousands_sep typically contains data
* characters (either '.' or ','), we can't use
* NUM_eat_non_data_chars here. Instead skip only if
* the input matches L_thousands_sep.
*/
if (AMOUNT_TEST(pattern_len) &&
strncmp(Np->inout_p, pattern, pattern_len) == 0)
Np->inout_p += pattern_len - 1;
else
continue;
}
break;
case NUM_L:
pattern = Np->L_currency_symbol;
if (Np->is_to_char)
{
strcpy(Np->inout_p, pattern);
Np->inout_p += strlen(pattern) - 1;
}
else
{
NUM_eat_non_data_chars(Np, pg_mbstrlen(pattern), input_len);
continue;
}
break;
case NUM_RN:
if (IS_FILLMODE(Np->Num))
{
strcpy(Np->inout_p, Np->number_p);
Np->inout_p += strlen(Np->inout_p) - 1;
}
else
{
sprintf(Np->inout_p, "%15s", Np->number_p);
Np->inout_p += strlen(Np->inout_p) - 1;
}
break;
case NUM_rn:
if (IS_FILLMODE(Np->Num))
{
strcpy(Np->inout_p, asc_tolower_z(Np->number_p));
Np->inout_p += strlen(Np->inout_p) - 1;
}
else
{
sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p));
Np->inout_p += strlen(Np->inout_p) - 1;
}
break;
case NUM_th:
if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
Np->sign == '-' || IS_DECIMAL(Np->Num))
continue;
if (Np->is_to_char)
{
strcpy(Np->inout_p, get_th(Np->number, TH_LOWER));
Np->inout_p += 1;
}
else
{
/* All variants of 'th' occupy 2 characters */
NUM_eat_non_data_chars(Np, 2, input_len);
continue;
}
break;
case NUM_TH:
if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
Np->sign == '-' || IS_DECIMAL(Np->Num))
continue;
if (Np->is_to_char)
{
strcpy(Np->inout_p, get_th(Np->number, TH_UPPER));
Np->inout_p += 1;
}
else
{
/* All variants of 'TH' occupy 2 characters */
NUM_eat_non_data_chars(Np, 2, input_len);
continue;
}
break;
case NUM_MI:
if (Np->is_to_char)
{
if (Np->sign == '-')
*Np->inout_p = '-';
else if (IS_FILLMODE(Np->Num))
2003-03-27 17:35:31 +01:00
continue;
else
*Np->inout_p = ' ';
}
else
{
if (*Np->inout_p == '-')
*Np->number = '-';
else
{
NUM_eat_non_data_chars(Np, 1, input_len);
continue;
}
}
break;
case NUM_PL:
if (Np->is_to_char)
{
if (Np->sign == '+')
*Np->inout_p = '+';
else if (IS_FILLMODE(Np->Num))
2003-03-27 17:35:31 +01:00
continue;
else
*Np->inout_p = ' ';
}
else
{
if (*Np->inout_p == '+')
*Np->number = '+';
else
{
NUM_eat_non_data_chars(Np, 1, input_len);
continue;
}
}
break;
case NUM_SG:
if (Np->is_to_char)
*Np->inout_p = Np->sign;
else
{
if (*Np->inout_p == '-')
*Np->number = '-';
else if (*Np->inout_p == '+')
*Np->number = '+';
else
{
NUM_eat_non_data_chars(Np, 1, input_len);
continue;
}
}
break;
default:
continue;
break;
}
}
else
{
/*
* In TO_CHAR, non-pattern characters in the format are copied to
* the output. In TO_NUMBER, we skip one input character for each
* non-pattern format character, whether or not it matches the
* format character.
*/
if (Np->is_to_char)
{
strcpy(Np->inout_p, n->character);
Np->inout_p += strlen(Np->inout_p);
}
else
{
Np->inout_p += pg_mblen(Np->inout_p);
}
continue;
}
Np->inout_p++;
}
if (Np->is_to_char)
{
*Np->inout_p = '\0';
return Np->inout;
}
else
{
if (*(Np->number_p - 1) == '.')
*(Np->number_p - 1) = '\0';
else
*Np->number_p = '\0';
/*
* Correction - precision of dec. number
*/
Np->Num->post = Np->read_post;
#ifdef DEBUG_TO_FROM_CHAR
elog(DEBUG_elog_output, "TO_NUMBER (number): '%s'", Np->number);
#endif
return Np->number;
}
}
/* ----------
* MACRO: Start part of NUM - for all NUM's to_char variants
* (sorry, but I hate copy same code - macro is better..)
* ----------
*/
#define NUM_TOCHAR_prepare \
do { \
int len = VARSIZE_ANY_EXHDR(fmt); \
if (len <= 0 || len >= (INT_MAX-VARHDRSZ)/NUM_MAX_ITEM_SIZ) \
PG_RETURN_TEXT_P(cstring_to_text("")); \
result = (text *) palloc0((len * NUM_MAX_ITEM_SIZ) + 1 + VARHDRSZ); \
format = NUM_cache(len, &Num, fmt, &shouldFree); \
} while (0)
/* ----------
* MACRO: Finish part of NUM
* ----------
*/
#define NUM_TOCHAR_finish \
do { \
int len; \
\
NUM_processor(format, &Num, VARDATA(result), numstr, 0, out_pre_spaces, sign, true, PG_GET_COLLATION()); \
\
if (shouldFree) \
pfree(format); \
\
/* \
* Convert null-terminated representation of result to standard text. \
* The result is usually much bigger than it needs to be, but there \
* seems little point in realloc'ing it smaller. \
*/ \
len = strlen(VARDATA(result)); \
SET_VARSIZE(result, len + VARHDRSZ); \
} while (0)
/* -------------------
* NUMERIC to_number() (convert string to numeric)
* -------------------
*/
2000-07-01 23:27:14 +02:00
Datum
numeric_to_number(PG_FUNCTION_ARGS)
{
text *value = PG_GETARG_TEXT_PP(0);
text *fmt = PG_GETARG_TEXT_PP(1);
NUMDesc Num;
2001-03-22 05:01:46 +01:00
Datum result;
FormatNode *format;
2001-03-22 05:01:46 +01:00
char *numstr;
bool shouldFree;
2001-03-22 05:01:46 +01:00
int len = 0;
int scale,
precision;
len = VARSIZE_ANY_EXHDR(fmt);
2007-11-15 22:14:46 +01:00
if (len <= 0 || len >= INT_MAX / NUM_MAX_ITEM_SIZ)
2000-07-01 23:27:14 +02:00
PG_RETURN_NULL();
format = NUM_cache(len, &Num, fmt, &shouldFree);
numstr = (char *) palloc((len * NUM_MAX_ITEM_SIZ) + 1);
NUM_processor(format, &Num, VARDATA_ANY(value), numstr,
VARSIZE_ANY_EXHDR(value), 0, 0, false, PG_GET_COLLATION());
scale = Num.post;
precision = Num.pre + Num.multi + scale;
if (shouldFree)
pfree(format);
2000-07-01 23:27:14 +02:00
result = DirectFunctionCall3(numeric_in,
2001-03-22 05:01:46 +01:00
CStringGetDatum(numstr),
ObjectIdGetDatum(InvalidOid),
Int32GetDatum(((precision << 16) | scale) + VARHDRSZ));
if (IS_MULTI(&Num))
{
Numeric x;
Numeric a = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
Int32GetDatum(10)));
Numeric b = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
Int32GetDatum(-Num.multi)));
x = DatumGetNumeric(DirectFunctionCall2(numeric_power,
NumericGetDatum(a),
NumericGetDatum(b)));
result = DirectFunctionCall2(numeric_mul,
result,
NumericGetDatum(x));
}
pfree(numstr);
return result;
}
/* ------------------
* NUMERIC to_char()
* ------------------
*/
2000-07-01 23:27:14 +02:00
Datum
numeric_to_char(PG_FUNCTION_ARGS)
{
2001-03-22 05:01:46 +01:00
Numeric value = PG_GETARG_NUMERIC(0);
text *fmt = PG_GETARG_TEXT_PP(1);
NUMDesc Num;
FormatNode *format;
text *result;
bool shouldFree;
int out_pre_spaces = 0,
2001-03-22 05:01:46 +01:00
sign = 0;
char *numstr,
*orgnum,
*p;
Numeric x;
NUM_TOCHAR_prepare;
/*
* On DateType depend part (numeric)
*/
if (IS_ROMAN(&Num))
{
2000-07-01 23:27:14 +02:00
x = DatumGetNumeric(DirectFunctionCall2(numeric_round,
2001-03-22 05:01:46 +01:00
NumericGetDatum(value),
Int32GetDatum(0)));
numstr = orgnum =
int_to_roman(DatumGetInt32(DirectFunctionCall1(numeric_int4,
NumericGetDatum(x))));
}
else if (IS_EEEE(&Num))
{
orgnum = numeric_out_sci(value, Num.post);
/*
* numeric_out_sci() does not emit a sign for positive numbers. We
* need to add a space in this case so that positive and negative
* numbers are aligned. We also have to do the right thing for NaN.
*/
if (strcmp(orgnum, "NaN") == 0)
{
/*
2010-02-26 03:01:40 +01:00
* Allow 6 characters for the leading sign, the decimal point,
* "e", the exponent's sign and two exponent digits.
*/
numstr = (char *) palloc(Num.pre + Num.post + 7);
fill_str(numstr, '#', Num.pre + Num.post + 6);
*numstr = ' ';
*(numstr + Num.pre + 1) = '.';
}
else if (*orgnum != '-')
{
numstr = (char *) palloc(strlen(orgnum) + 2);
*numstr = ' ';
strcpy(numstr + 1, orgnum);
}
else
{
numstr = orgnum;
}
}
else
{
int numstr_pre_len;
Numeric val = value;
if (IS_MULTI(&Num))
{
2000-07-01 23:27:14 +02:00
Numeric a = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
Int32GetDatum(10)));
2000-07-01 23:27:14 +02:00
Numeric b = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
Int32GetDatum(Num.multi)));
x = DatumGetNumeric(DirectFunctionCall2(numeric_power,
NumericGetDatum(a),
NumericGetDatum(b)));
val = DatumGetNumeric(DirectFunctionCall2(numeric_mul,
2005-10-15 04:49:52 +02:00
NumericGetDatum(value),
NumericGetDatum(x)));
Num.pre += Num.multi;
}
2000-07-01 23:27:14 +02:00
x = DatumGetNumeric(DirectFunctionCall2(numeric_round,
2001-03-22 05:01:46 +01:00
NumericGetDatum(val),
Int32GetDatum(Num.post)));
2000-07-01 23:27:14 +02:00
orgnum = DatumGetCString(DirectFunctionCall1(numeric_out,
2001-03-22 05:01:46 +01:00
NumericGetDatum(x)));
if (*orgnum == '-')
{
sign = '-';
numstr = orgnum + 1;
}
else
{
sign = '+';
numstr = orgnum;
}
if ((p = strchr(numstr, '.')))
numstr_pre_len = p - numstr;
else
numstr_pre_len = strlen(numstr);
/* needs padding? */
if (numstr_pre_len < Num.pre)
out_pre_spaces = Num.pre - numstr_pre_len;
/* overflowed prefix digit format? */
else if (numstr_pre_len > Num.pre)
{
numstr = (char *) palloc(Num.pre + Num.post + 2);
fill_str(numstr, '#', Num.pre + Num.post + 1);
*(numstr + Num.pre) = '.';
}
}
NUM_TOCHAR_finish;
2000-07-01 23:27:14 +02:00
PG_RETURN_TEXT_P(result);
}
/* ---------------
* INT4 to_char()
* ---------------
*/
2000-07-01 23:27:14 +02:00
Datum
int4_to_char(PG_FUNCTION_ARGS)
{
2001-03-22 05:01:46 +01:00
int32 value = PG_GETARG_INT32(0);
text *fmt = PG_GETARG_TEXT_PP(1);
NUMDesc Num;
FormatNode *format;
text *result;
bool shouldFree;
int out_pre_spaces = 0,
2001-03-22 05:01:46 +01:00
sign = 0;
char *numstr,
*orgnum;
NUM_TOCHAR_prepare;
/*
* On DateType depend part (int32)
*/
if (IS_ROMAN(&Num))
numstr = orgnum = int_to_roman(value);
else if (IS_EEEE(&Num))
{
/* we can do it easily because float8 won't lose any precision */
2010-02-26 03:01:40 +01:00
float8 val = (float8) value;
orgnum = (char *) psprintf("%+.*e", Num.post, val);
/*
* Swap a leading positive sign for a space.
*/
if (*orgnum == '+')
*orgnum = ' ';
numstr = orgnum;
}
else
{
int numstr_pre_len;
if (IS_MULTI(&Num))
{
orgnum = DatumGetCString(DirectFunctionCall1(int4out,
Int32GetDatum(value * ((int32) pow((double) 10, (double) Num.multi)))));
Num.pre += Num.multi;
}
else
{
orgnum = DatumGetCString(DirectFunctionCall1(int4out,
Int32GetDatum(value)));
}
if (*orgnum == '-')
{
sign = '-';
orgnum++;
}
else
sign = '+';
numstr_pre_len = strlen(orgnum);
/* post-decimal digits? Pad out with zeros. */
if (Num.post)
{
numstr = (char *) palloc(numstr_pre_len + Num.post + 2);
strcpy(numstr, orgnum);
*(numstr + numstr_pre_len) = '.';
memset(numstr + numstr_pre_len + 1, '0', Num.post);
*(numstr + numstr_pre_len + Num.post + 1) = '\0';
}
else
numstr = orgnum;
/* needs padding? */
if (numstr_pre_len < Num.pre)
out_pre_spaces = Num.pre - numstr_pre_len;
/* overflowed prefix digit format? */
else if (numstr_pre_len > Num.pre)
{
numstr = (char *) palloc(Num.pre + Num.post + 2);
fill_str(numstr, '#', Num.pre + Num.post + 1);
*(numstr + Num.pre) = '.';
}
}
NUM_TOCHAR_finish;
2000-07-01 23:27:14 +02:00
PG_RETURN_TEXT_P(result);
}
/* ---------------
* INT8 to_char()
* ---------------
*/
2000-07-01 23:27:14 +02:00
Datum
int8_to_char(PG_FUNCTION_ARGS)
{
2001-03-22 05:01:46 +01:00
int64 value = PG_GETARG_INT64(0);
text *fmt = PG_GETARG_TEXT_PP(1);
NUMDesc Num;
FormatNode *format;
text *result;
bool shouldFree;
int out_pre_spaces = 0,
2001-03-22 05:01:46 +01:00
sign = 0;
char *numstr,
*orgnum;
NUM_TOCHAR_prepare;
/*
* On DateType depend part (int32)
*/
if (IS_ROMAN(&Num))
{
2000-07-01 23:27:14 +02:00
/* Currently don't support int8 conversion to roman... */
numstr = orgnum = int_to_roman(DatumGetInt32(DirectFunctionCall1(int84, Int64GetDatum(value))));
}
else if (IS_EEEE(&Num))
{
/* to avoid loss of precision, must go via numeric not float8 */
2010-02-26 03:01:40 +01:00
Numeric val;
val = DatumGetNumeric(DirectFunctionCall1(int8_numeric,
Int64GetDatum(value)));
orgnum = numeric_out_sci(val, Num.post);
/*
* numeric_out_sci() does not emit a sign for positive numbers. We
* need to add a space in this case so that positive and negative
* numbers are aligned. We don't have to worry about NaN here.
*/
if (*orgnum != '-')
{
numstr = (char *) palloc(strlen(orgnum) + 2);
*numstr = ' ';
strcpy(numstr + 1, orgnum);
}
else
{
numstr = orgnum;
}
}
else
{
int numstr_pre_len;
if (IS_MULTI(&Num))
{
double multi = pow((double) 10, (double) Num.multi);
2000-07-01 23:27:14 +02:00
value = DatumGetInt64(DirectFunctionCall2(int8mul,
2005-10-15 04:49:52 +02:00
Int64GetDatum(value),
DirectFunctionCall1(dtoi8,
Float8GetDatum(multi))));
Num.pre += Num.multi;
}
2000-07-01 23:27:14 +02:00
orgnum = DatumGetCString(DirectFunctionCall1(int8out,
2005-10-15 04:49:52 +02:00
Int64GetDatum(value)));
if (*orgnum == '-')
{
sign = '-';
orgnum++;
}
else
sign = '+';
numstr_pre_len = strlen(orgnum);
/* post-decimal digits? Pad out with zeros. */
if (Num.post)
{
numstr = (char *) palloc(numstr_pre_len + Num.post + 2);
strcpy(numstr, orgnum);
*(numstr + numstr_pre_len) = '.';
memset(numstr + numstr_pre_len + 1, '0', Num.post);
*(numstr + numstr_pre_len + Num.post + 1) = '\0';
}
else
numstr = orgnum;
/* needs padding? */
if (numstr_pre_len < Num.pre)
out_pre_spaces = Num.pre - numstr_pre_len;
/* overflowed prefix digit format? */
else if (numstr_pre_len > Num.pre)
{
numstr = (char *) palloc(Num.pre + Num.post + 2);
fill_str(numstr, '#', Num.pre + Num.post + 1);
*(numstr + Num.pre) = '.';
}
}
NUM_TOCHAR_finish;
2000-07-01 23:27:14 +02:00
PG_RETURN_TEXT_P(result);
}
/* -----------------
* FLOAT4 to_char()
* -----------------
*/
2000-07-01 23:27:14 +02:00
Datum
float4_to_char(PG_FUNCTION_ARGS)
{
2001-03-22 05:01:46 +01:00
float4 value = PG_GETARG_FLOAT4(0);
text *fmt = PG_GETARG_TEXT_PP(1);
NUMDesc Num;
FormatNode *format;
text *result;
bool shouldFree;
int out_pre_spaces = 0,
2001-03-22 05:01:46 +01:00
sign = 0;
char *numstr,
*orgnum,
*p;
NUM_TOCHAR_prepare;
if (IS_ROMAN(&Num))
2000-07-01 23:27:14 +02:00
numstr = orgnum = int_to_roman((int) rint(value));
else if (IS_EEEE(&Num))
{
if (isnan(value) || isinf(value))
{
/*
2010-02-26 03:01:40 +01:00
* Allow 6 characters for the leading sign, the decimal point,
* "e", the exponent's sign and two exponent digits.
*/
numstr = (char *) palloc(Num.pre + Num.post + 7);
fill_str(numstr, '#', Num.pre + Num.post + 6);
*numstr = ' ';
*(numstr + Num.pre + 1) = '.';
}
else
{
numstr = orgnum = psprintf("%+.*e", Num.post, value);
/*
* Swap a leading positive sign for a space.
*/
if (*orgnum == '+')
*orgnum = ' ';
numstr = orgnum;
}
}
else
{
2000-07-01 23:27:14 +02:00
float4 val = value;
int numstr_pre_len;
if (IS_MULTI(&Num))
{
float multi = pow((double) 10, (double) Num.multi);
2000-07-01 23:27:14 +02:00
val = value * multi;
Num.pre += Num.multi;
}
orgnum = (char *) psprintf("%.0f", fabs(val));
numstr_pre_len = strlen(orgnum);
/* adjust post digits to fit max float digits */
if (numstr_pre_len >= FLT_DIG)
Num.post = 0;
else if (numstr_pre_len + Num.post > FLT_DIG)
Num.post = FLT_DIG - numstr_pre_len;
orgnum = psprintf("%.*f", Num.post, val);
if (*orgnum == '-')
{ /* < 0 */
sign = '-';
numstr = orgnum + 1;
}
else
{
sign = '+';
numstr = orgnum;
}
if ((p = strchr(numstr, '.')))
numstr_pre_len = p - numstr;
else
numstr_pre_len = strlen(numstr);
/* needs padding? */
if (numstr_pre_len < Num.pre)
out_pre_spaces = Num.pre - numstr_pre_len;
/* overflowed prefix digit format? */
else if (numstr_pre_len > Num.pre)
{
numstr = (char *) palloc(Num.pre + Num.post + 2);
fill_str(numstr, '#', Num.pre + Num.post + 1);
*(numstr + Num.pre) = '.';
}
}
NUM_TOCHAR_finish;
2000-07-01 23:27:14 +02:00
PG_RETURN_TEXT_P(result);
}
/* -----------------
* FLOAT8 to_char()
* -----------------
*/
2000-07-01 23:27:14 +02:00
Datum
float8_to_char(PG_FUNCTION_ARGS)
{
2001-03-22 05:01:46 +01:00
float8 value = PG_GETARG_FLOAT8(0);
text *fmt = PG_GETARG_TEXT_PP(1);
NUMDesc Num;
FormatNode *format;
text *result;
bool shouldFree;
int out_pre_spaces = 0,
2001-03-22 05:01:46 +01:00
sign = 0;
char *numstr,
*orgnum,
*p;
NUM_TOCHAR_prepare;
if (IS_ROMAN(&Num))
2000-07-01 23:27:14 +02:00
numstr = orgnum = int_to_roman((int) rint(value));
else if (IS_EEEE(&Num))
{
if (isnan(value) || isinf(value))
{
/*
2010-02-26 03:01:40 +01:00
* Allow 6 characters for the leading sign, the decimal point,
* "e", the exponent's sign and two exponent digits.
*/
numstr = (char *) palloc(Num.pre + Num.post + 7);
fill_str(numstr, '#', Num.pre + Num.post + 6);
*numstr = ' ';
*(numstr + Num.pre + 1) = '.';
}
else
{
numstr = orgnum = (char *) psprintf("%+.*e", Num.post, value);
/*
* Swap a leading positive sign for a space.
*/
if (*orgnum == '+')
*orgnum = ' ';
numstr = orgnum;
}
}
else
{
2000-07-01 23:27:14 +02:00
float8 val = value;
int numstr_pre_len;
if (IS_MULTI(&Num))
{
double multi = pow((double) 10, (double) Num.multi);
2000-07-01 23:27:14 +02:00
val = value * multi;
Num.pre += Num.multi;
}
orgnum = psprintf("%.0f", fabs(val));
numstr_pre_len = strlen(orgnum);
/* adjust post digits to fit max double digits */
if (numstr_pre_len >= DBL_DIG)
Num.post = 0;
else if (numstr_pre_len + Num.post > DBL_DIG)
Num.post = DBL_DIG - numstr_pre_len;
orgnum = psprintf("%.*f", Num.post, val);
if (*orgnum == '-')
{ /* < 0 */
sign = '-';
numstr = orgnum + 1;
}
else
{
sign = '+';
numstr = orgnum;
}
if ((p = strchr(numstr, '.')))
numstr_pre_len = p - numstr;
else
numstr_pre_len = strlen(numstr);
/* needs padding? */
if (numstr_pre_len < Num.pre)
out_pre_spaces = Num.pre - numstr_pre_len;
/* overflowed prefix digit format? */
else if (numstr_pre_len > Num.pre)
{
numstr = (char *) palloc(Num.pre + Num.post + 2);
fill_str(numstr, '#', Num.pre + Num.post + 1);
*(numstr + Num.pre) = '.';
}
}
NUM_TOCHAR_finish;
2000-07-01 23:27:14 +02:00
PG_RETURN_TEXT_P(result);
}