diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 335900a86e..4331bebc96 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -6262,16 +6262,57 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); to_timestamp and to_date - skip multiple blank spaces in the input string unless the - FX option is used. For example, - to_timestamp('2000    JUN', 'YYYY MON') works, but + skip multiple blank spaces at the beginning of the input string and + around date and time values unless the FX option is used. For example, + to_timestamp(' 2000    JUN', 'YYYY MON') and + to_timestamp('2000 - JUN', 'YYYY-MON') work, but to_timestamp('2000    JUN', 'FXYYYY MON') returns an error - because to_timestamp expects one space only. + because to_timestamp expects a single space only. FX must be specified as the first item in the template. + + + A separator (a space or a non-letter/non-digit character) in the template string of + to_timestamp and to_date + matches any single separator in the input string or is skipped, + unless the FX option is used. + For example, to_timestamp('2000JUN', 'YYYY///MON') and + to_timestamp('2000/JUN', 'YYYY MON') work, but + to_timestamp('2000//JUN', 'YYYY/MON') + returns an error because the number of separators in the input string + exceeds the number of separators in the template. + + + If FX is specified, separator in template string + matches to exactly one character in input string. Notice we don't insist + input string character to be the same as template string separator. + For example, to_timestamp('2000/JUN', 'FXYYYY MON') + works, but to_timestamp('2000/JUN', 'FXYYYY  MON') + returns an error because a space second template string space consumed + letter J from the input string. + + + + + + TZH template pattern can match a signed number. + Without the FX option, it may lead to ambiguity in + interpretation of the minus sign, which can also be interpreted as a separator. + This ambiguity is resolved as follows. If the number of separators before + TZH in the template string is less than the number of + separators before the minus sign in the input string, the minus sign + is interpreted as part of TZH. + Otherwise, the minus sign is considered to be a separator between values. + For example, to_timestamp('2000 -10', 'YYYY TZH') matches + -10 to TZH, but + to_timestamp('2000 -10', 'YYYY  TZH') + matches 10 to TZH. + + + Ordinary text is allowed in to_char @@ -6287,6 +6328,19 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); string; for example "XX" skips two input characters (whether or not they are XX). + + + Prior to PostgreSQL 12, it was possible to + skip arbitrary text in the input string using non-letter or non-digit + characters. For example, + to_timestamp('2000y6m1d', 'yyyy-MM-DD') used to + work. Now you can only use letter characters for this purpose. For example, + to_timestamp('2000y6m1d', 'yyyytMMtDDt') and + to_timestamp('2000y6m1d', 'yyyy"y"MM"m"DD"d"') + skip y, m, and + d. + + diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 30696e3575..2ed8ca675b 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -165,6 +165,8 @@ typedef struct #define NODE_TYPE_END 1 #define NODE_TYPE_ACTION 2 #define NODE_TYPE_CHAR 3 +#define NODE_TYPE_SEPARATOR 4 +#define NODE_TYPE_SPACE 5 #define SUFFTYPE_PREFIX 1 #define SUFFTYPE_POSTFIX 2 @@ -955,6 +957,7 @@ typedef struct NUMProc static const KeyWord *index_seq_search(const char *str, const KeyWord *kw, const int *index); static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type); +static bool is_separator_char(const char *str); static void NUMDesc_prepare(NUMDesc *num, FormatNode *n); static void parse_format(FormatNode *node, const char *str, const KeyWord *kw, const KeySuffix *suf, const int *index, int ver, NUMDesc *Num); @@ -1044,6 +1047,16 @@ suff_search(const char *str, const KeySuffix *suf, int type) return NULL; } +static bool +is_separator_char(const char *str) +{ + /* ASCII printable character, but not letter or digit */ + return (*str > 0x20 && *str < 0x7F && + !(*str >= 'A' && *str <= 'Z') && + !(*str >= 'a' && *str <= 'z') && + !(*str >= '0' && *str <= '9')); +} + /* ---------- * Prepare NUMDesc (number description struct) via FormatNode struct * ---------- @@ -1319,7 +1332,14 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw, if (*str == '\\' && *(str + 1) == '"') str++; chlen = pg_mblen(str); - n->type = NODE_TYPE_CHAR; + + if (ver == DCH_TYPE && is_separator_char(str)) + n->type = NODE_TYPE_SEPARATOR; + else if (isspace((unsigned char) *str)) + n->type = NODE_TYPE_SPACE; + else + n->type = NODE_TYPE_CHAR; + memcpy(n->character, str, chlen); n->character[chlen] = '\0'; n->key = NULL; @@ -2987,27 +3007,66 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) int len, value; bool fx_mode = false; + /* number of extra skipped characters (more than given in format string) */ + int extra_skip = 0; for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++) { - if (n->type != NODE_TYPE_ACTION) + /* + * Ignore spaces at the beginning of the string and before fields when + * not in FX (fixed width) mode. + */ + if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) && + (n->type == NODE_TYPE_ACTION || n == node)) + { + while (*s != '\0' && isspace((unsigned char) *s)) + { + s++; + extra_skip++; + } + } + + if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR) + { + if (!fx_mode) + { + /* + * In non FX (fixed format) mode one format string space or + * separator match to one space or separator in input string. + * Or match nothing if there is no space or separator in + * the current position of input string. + */ + extra_skip--; + if (isspace((unsigned char) *s) || is_separator_char(s)) + { + s++; + extra_skip++; + } + } + else + { + /* + * In FX mode, on format string space or separator we consume + * exactly one character from input string. Notice we don't + * insist that the consumed character match the format's + * character. + */ + s += pg_mblen(s); + } + continue; + } + else if (n->type != NODE_TYPE_ACTION) { /* - * Separator, so consume one character from input string. Notice - * we don't insist that the consumed character match the format's - * character. + * Text character, so consume one character from input string. + * Notice we don't insist that the consumed character match the + * format's character. + * Text field ignores FX mode. */ s += pg_mblen(s); continue; } - /* Ignore spaces before fields when not in FX (fixed width) mode */ - if (!fx_mode && n->key->id != DCH_FX) - { - while (*s != '\0' && isspace((unsigned char) *s)) - s++; - } - from_char_set_mode(out, n->key->date_mode); switch (n->key->id) @@ -3086,10 +3145,24 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) n->key->name))); break; case DCH_TZH: - out->tzsign = *s == '-' ? -1 : +1; - + /* + * Value of TZH might be negative. And the issue is that we + * might swallow minus sign as the separator. So, if we have + * skipped more characters than specified in the format string, + * then we consider prepending last skipped minus to TZH. + */ if (*s == '+' || *s == '-' || *s == ' ') + { + out->tzsign = *s == '-' ? -1 : +1; s++; + } + else + { + if (extra_skip > 0 && *(s - 1) == '-') + out->tzsign = -1; + else + out->tzsign = +1; + } from_char_parse_int_len(&out->tzh, &s, 2, n); break; @@ -3261,6 +3334,17 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) SKIP_THth(s, n->suffix); break; } + + /* Ignore all spaces after fields */ + if (!fx_mode) + { + extra_skip = 0; + while (*s != '\0' && isspace((unsigned char) *s)) + { + s++; + extra_skip++; + } + } } } diff --git a/src/test/regress/expected/horology.out b/src/test/regress/expected/horology.out index 63e39198e6..7d11f25158 100644 --- a/src/test/regress/expected/horology.out +++ b/src/test/regress/expected/horology.out @@ -2769,14 +2769,32 @@ SELECT to_timestamp('97/2/16 8:14:30', 'FMYYYY/FMMM/FMDD FMHH:FMMI:FMSS'); Sat Feb 16 08:14:30 0097 PST (1 row) +SELECT to_timestamp('2011$03!18 23_38_15', 'YYYY-MM-DD HH24:MI:SS'); + to_timestamp +------------------------------ + Fri Mar 18 23:38:15 2011 PDT +(1 row) + SELECT to_timestamp('1985 January 12', 'YYYY FMMonth DD'); to_timestamp ------------------------------ Sat Jan 12 00:00:00 1985 PST (1 row) +SELECT to_timestamp('1985 FMMonth 12', 'YYYY "FMMonth" DD'); + to_timestamp +------------------------------ + Sat Jan 12 00:00:00 1985 PST +(1 row) + +SELECT to_timestamp('1985 \ 12', 'YYYY \\ DD'); + to_timestamp +------------------------------ + Sat Jan 12 00:00:00 1985 PST +(1 row) + SELECT to_timestamp('My birthday-> Year: 1976, Month: May, Day: 16', - '"My birthday-> Year" YYYY, "Month:" FMMonth, "Day:" DD'); + '"My birthday-> Year:" YYYY, "Month:" FMMonth, "Day:" DD'); to_timestamp ------------------------------ Sun May 16 00:00:00 1976 PDT @@ -2789,7 +2807,7 @@ SELECT to_timestamp('1,582nd VIII 21', 'Y,YYYth FMRM DD'); (1 row) SELECT to_timestamp('15 "text between quote marks" 98 54 45', - E'HH24 "\\text between quote marks\\"" YY MI SS'); + E'HH24 "\\"text between quote marks\\"" YY MI SS'); to_timestamp ------------------------------ Thu Jan 01 15:54:45 1998 PST @@ -2810,6 +2828,24 @@ SELECT to_timestamp('2000January09Sunday', 'YYYYFMMonthDDFMDay'); SELECT to_timestamp('97/Feb/16', 'YYMonDD'); ERROR: invalid value "/Fe" for "Mon" DETAIL: The given value did not match any of the allowed values for this field. +SELECT to_timestamp('97/Feb/16', 'YY:Mon:DD'); + to_timestamp +------------------------------ + Sun Feb 16 00:00:00 1997 PST +(1 row) + +SELECT to_timestamp('97/Feb/16', 'FXYY:Mon:DD'); + to_timestamp +------------------------------ + Sun Feb 16 00:00:00 1997 PST +(1 row) + +SELECT to_timestamp('97/Feb/16', 'FXYY/Mon/DD'); + to_timestamp +------------------------------ + Sun Feb 16 00:00:00 1997 PST +(1 row) + SELECT to_timestamp('19971116', 'YYYYMMDD'); to_timestamp ------------------------------ @@ -2966,7 +3002,7 @@ SELECT to_timestamp('2011-12-18 11:38 20', 'YYYY-MM-DD HH12:MI TZM'); SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); to_timestamp ------------------------------ - Sun Dec 18 03:38:15 2011 PST + Sun Dec 18 23:38:15 2011 PST (1 row) SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); @@ -2996,7 +3032,64 @@ SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); to_timestamp ------------------------------ - Sun Dec 18 03:38:15 2011 PST + Sun Dec 18 23:38:15 2011 PST +(1 row) + +SELECT to_timestamp('2000+ JUN', 'YYYY/MON'); + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + +SELECT to_timestamp(' 2000 +JUN', 'YYYY/MON'); + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + +SELECT to_timestamp(' 2000 +JUN', 'YYYY//MON'); + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + +SELECT to_timestamp('2000 +JUN', 'YYYY//MON'); + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + +SELECT to_timestamp('2000 + JUN', 'YYYY MON'); + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + +SELECT to_timestamp('2000 ++ JUN', 'YYYY MON'); + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + +SELECT to_timestamp('2000 + + JUN', 'YYYY MON'); +ERROR: invalid value "+ J" for "MON" +DETAIL: The given value did not match any of the allowed values for this field. +SELECT to_timestamp('2000 + + JUN', 'YYYY MON'); + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + +SELECT to_timestamp('2000 -10', 'YYYY TZH'); + to_timestamp +------------------------------ + Sat Jan 01 02:00:00 2000 PST +(1 row) + +SELECT to_timestamp('2000 -10', 'YYYY TZH'); + to_timestamp +------------------------------ + Fri Dec 31 06:00:00 1999 PST (1 row) SELECT to_date('2011 12 18', 'YYYY MM DD'); @@ -3014,13 +3107,13 @@ SELECT to_date('2011 12 18', 'YYYY MM DD'); SELECT to_date('2011 12 18', 'YYYY MM DD'); to_date ------------ - 12-08-2011 + 12-18-2011 (1 row) SELECT to_date('2011 12 18', 'YYYY MM DD'); to_date ------------ - 02-18-2011 + 12-18-2011 (1 row) SELECT to_date('2011 12 18', 'YYYY MM DD'); diff --git a/src/test/regress/sql/horology.sql b/src/test/regress/sql/horology.sql index ebb196a1cf..807037be76 100644 --- a/src/test/regress/sql/horology.sql +++ b/src/test/regress/sql/horology.sql @@ -392,15 +392,21 @@ SELECT to_timestamp('0097/Feb/16 --> 08:14:30', 'YYYY/Mon/DD --> HH:MI:SS'); SELECT to_timestamp('97/2/16 8:14:30', 'FMYYYY/FMMM/FMDD FMHH:FMMI:FMSS'); +SELECT to_timestamp('2011$03!18 23_38_15', 'YYYY-MM-DD HH24:MI:SS'); + SELECT to_timestamp('1985 January 12', 'YYYY FMMonth DD'); +SELECT to_timestamp('1985 FMMonth 12', 'YYYY "FMMonth" DD'); + +SELECT to_timestamp('1985 \ 12', 'YYYY \\ DD'); + SELECT to_timestamp('My birthday-> Year: 1976, Month: May, Day: 16', - '"My birthday-> Year" YYYY, "Month:" FMMonth, "Day:" DD'); + '"My birthday-> Year:" YYYY, "Month:" FMMonth, "Day:" DD'); SELECT to_timestamp('1,582nd VIII 21', 'Y,YYYth FMRM DD'); SELECT to_timestamp('15 "text between quote marks" 98 54 45', - E'HH24 "\\text between quote marks\\"" YY MI SS'); + E'HH24 "\\"text between quote marks\\"" YY MI SS'); SELECT to_timestamp('05121445482000', 'MMDDHH24MISSYYYY'); @@ -408,6 +414,12 @@ SELECT to_timestamp('2000January09Sunday', 'YYYYFMMonthDDFMDay'); SELECT to_timestamp('97/Feb/16', 'YYMonDD'); +SELECT to_timestamp('97/Feb/16', 'YY:Mon:DD'); + +SELECT to_timestamp('97/Feb/16', 'FXYY:Mon:DD'); + +SELECT to_timestamp('97/Feb/16', 'FXYY/Mon/DD'); + SELECT to_timestamp('19971116', 'YYYYMMDD'); SELECT to_timestamp('20000-1116', 'YYYY-MMDD'); @@ -464,6 +476,17 @@ SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); +SELECT to_timestamp('2000+ JUN', 'YYYY/MON'); +SELECT to_timestamp(' 2000 +JUN', 'YYYY/MON'); +SELECT to_timestamp(' 2000 +JUN', 'YYYY//MON'); +SELECT to_timestamp('2000 +JUN', 'YYYY//MON'); +SELECT to_timestamp('2000 + JUN', 'YYYY MON'); +SELECT to_timestamp('2000 ++ JUN', 'YYYY MON'); +SELECT to_timestamp('2000 + + JUN', 'YYYY MON'); +SELECT to_timestamp('2000 + + JUN', 'YYYY MON'); +SELECT to_timestamp('2000 -10', 'YYYY TZH'); +SELECT to_timestamp('2000 -10', 'YYYY TZH'); + SELECT to_date('2011 12 18', 'YYYY MM DD'); SELECT to_date('2011 12 18', 'YYYY MM DD'); SELECT to_date('2011 12 18', 'YYYY MM DD');