diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 67de029c6a..a58835082b 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -5832,6 +5832,17 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); + + + to_timestamp and to_date + exist to handle input formats that cannot be converted by + simple casting. For most standard date/time formats, simply casting the + source string to the required data type works, and is much easier. + Similarly, to_number is unnecessary for standard numeric + representations. + + + In a to_char output template string, there are certain patterns that are recognized and replaced with appropriately-formatted @@ -6038,7 +6049,7 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); Q - quarter (ignored by to_date and to_timestamp) + quarter RM @@ -6156,20 +6167,6 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); - - - to_timestamp and to_date - exist to handle input formats that cannot be converted by - simple casting. These functions interpret input liberally, - with minimal error checking. While they produce valid output, - the conversion can yield unexpected results. For example, - input to these functions is not restricted by normal ranges, - thus to_date('20096040','YYYYMMDD') returns - 2014-01-17 rather than causing an error. - Casting does not have this behavior. - - - Ordinary text is allowed in to_char @@ -6195,7 +6192,8 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); - If the year format specification is less than four digits, e.g. + In to_timestamp and to_date, + if the year format specification is less than four digits, e.g. YYY, and the supplied year is less than four digits, the year will be adjusted to be nearest to the year 2020, e.g. 95 becomes 1995. @@ -6204,8 +6202,9 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); - The YYYY conversion from string to timestamp or - date has a restriction when processing years with more than 4 digits. You must + In to_timestamp and to_date, + the YYYY conversion has a restriction when + processing years with more than 4 digits. You must use some non-digit character or template after YYYY, otherwise the year is always interpreted as 4 digits. For example (with the year 20000): @@ -6219,12 +6218,12 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); - In conversions from string to timestamp or - date, the CC (century) field is ignored + In to_timestamp and to_date, + the CC (century) field is accepted but ignored if there is a YYY, YYYY or Y,YYY field. If CC is used with - YY or Y then the year is computed - as the year in the specified century. If the century is + YY or Y then the result is + computed as that year in the specified century. If the century is specified but the year is not, the first year of the century is assumed. @@ -6232,9 +6231,19 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); - An ISO 8601 week-numbering date (as distinct from a Gregorian date) - can be specified to to_timestamp and - to_date in one of two ways: + In to_timestamp and to_date, + weekday names or numbers (DAY, D, + and related field types) are accepted but are ignored for purposes of + computing the result. The same is true for quarter + (Q) fields. + + + + + + In to_timestamp and to_date, + an ISO 8601 week-numbering date (as distinct from a Gregorian date) + can be specified in one of two ways: @@ -6276,23 +6285,24 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); - In a conversion from string to timestamp, millisecond + In to_timestamp, millisecond (MS) or microsecond (US) - values are used as the + fields are used as the seconds digits after the decimal point. For example - to_timestamp('12:3', 'SS:MS') is not 3 milliseconds, - but 300, because the conversion counts it as 12 + 0.3 seconds. - This means for the format SS:MS, the input values - 12:3, 12:30, and 12:300 specify the - same number of milliseconds. To get three milliseconds, one must use - 12:003, which the conversion counts as + to_timestamp('12.3', 'SS.MS') is not 3 milliseconds, + but 300, because the conversion treats it as 12 + 0.3 seconds. + So, for the format SS.MS, the input values + 12.3, 12.30, + and 12.300 specify the + same number of milliseconds. To get three milliseconds, one must write + 12.003, which the conversion treats as 12 + 0.003 = 12.003 seconds. Here is a more complex example: - to_timestamp('15:12:02.020.001230', 'HH:MI:SS.MS.US') + to_timestamp('15:12:02.020.001230', 'HH24:MI:SS.MS.US') is 15 hours, 12 minutes, and 2 seconds + 20 milliseconds + 1230 microseconds = 2.021230 seconds. @@ -6310,9 +6320,10 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); to_char(interval) formats HH and - HH12 as shown on a 12-hour clock, i.e. zero hours - and 36 hours output as 12, while HH24 - outputs the full hour value, which can exceed 23 for intervals. + HH12 as shown on a 12-hour clock, for example zero hours + and 36 hours both output as 12, while HH24 + outputs the full hour value, which can exceed 23 in + an interval value. diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index bbd97dc84b..d2d23d31ff 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -3553,9 +3553,6 @@ to_date(PG_FUNCTION_ARGS) * * The TmFromChar is then analysed and converted into the final results in * struct 'tm' and 'fsec'. - * - * This function does very little error checking, e.g. - * to_timestamp('20096040','YYYYMMDD') works */ static void do_to_timestamp(text *date_txt, text *fmt, @@ -3564,30 +3561,35 @@ do_to_timestamp(text *date_txt, text *fmt, FormatNode *format; TmFromChar tmfc; int fmt_len; + char *date_str; + int fmask; + + date_str = text_to_cstring(date_txt); ZERO_tmfc(&tmfc); ZERO_tm(tm); *fsec = 0; + fmask = 0; /* bit mask for ValidateDate() */ fmt_len = VARSIZE_ANY_EXHDR(fmt); if (fmt_len) { char *fmt_str; - char *date_str; bool incache; fmt_str = text_to_cstring(fmt); - /* - * Allocate new memory if format picture is bigger than static cache - * and not use cache (call parser always) - */ if (fmt_len > DCH_CACHE_SIZE) { - format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode)); + /* + * Allocate new memory if format picture is bigger than static + * cache and not use cache (call parser always) + */ incache = FALSE; + format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode)); + parse_format(format, fmt_str, DCH_keywords, DCH_suff, DCH_index, DCH_TYPE, NULL); @@ -3604,33 +3606,27 @@ do_to_timestamp(text *date_txt, text *fmt, if ((ent = DCH_cache_search(fmt_str)) == NULL) { - ent = DCH_cache_getnew(fmt_str); - /* * Not in the cache, must run parser and save a new * format-picture to the cache. */ + ent = DCH_cache_getnew(fmt_str); + parse_format(ent->format, fmt_str, DCH_keywords, DCH_suff, DCH_index, DCH_TYPE, NULL); (ent->format + fmt_len)->type = NODE_TYPE_END; /* Paranoia? */ -#ifdef DEBUG_TO_FROM_CHAR - /* dump_node(ent->format, fmt_len); */ - /* dump_index(DCH_keywords, DCH_index); */ -#endif } format = ent->format; } #ifdef DEBUG_TO_FROM_CHAR /* dump_node(format, fmt_len); */ + /* dump_index(DCH_keywords, DCH_index); */ #endif - date_str = text_to_cstring(date_txt); - DCH_from_char(format, date_str, &tmfc); - pfree(date_str); pfree(fmt_str); if (!incache) pfree(format); @@ -3639,8 +3635,7 @@ do_to_timestamp(text *date_txt, text *fmt, DEBUG_TMFC(&tmfc); /* - * Convert values that user define for FROM_CHAR (to_date/to_timestamp) to - * standard 'tm' + * Convert to_date/to_timestamp input fields to standard 'tm' */ if (tmfc.ssss) { @@ -3696,19 +3691,23 @@ do_to_timestamp(text *date_txt, text *fmt, tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1; } else + { /* find century year for dates ending in "00" */ tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1); + } } else - /* If a 4-digit year is provided, we use that and ignore CC. */ { + /* If a 4-digit year is provided, we use that and ignore CC. */ tm->tm_year = tmfc.year; if (tmfc.bc && tm->tm_year > 0) tm->tm_year = -(tm->tm_year - 1); } + fmask |= DTK_M(YEAR); } - else if (tmfc.cc) /* use first year of century */ + else if (tmfc.cc) { + /* use first year of century */ if (tmfc.bc) tmfc.cc = -tmfc.cc; if (tmfc.cc >= 0) @@ -3717,10 +3716,14 @@ do_to_timestamp(text *date_txt, text *fmt, else /* +1 because year == 599 is 600 BC */ tm->tm_year = tmfc.cc * 100 + 1; + fmask |= DTK_M(YEAR); } if (tmfc.j) + { j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); + fmask |= DTK_DATE_M; + } if (tmfc.ww) { @@ -3734,6 +3737,7 @@ do_to_timestamp(text *date_txt, text *fmt, isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); else isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); + fmask |= DTK_DATE_M; } else tmfc.ddd = (tmfc.ww - 1) * 7 + 1; @@ -3741,14 +3745,16 @@ do_to_timestamp(text *date_txt, text *fmt, if (tmfc.w) tmfc.dd = (tmfc.w - 1) * 7 + 1; - if (tmfc.d) - tm->tm_wday = tmfc.d - 1; /* convert to native numbering */ if (tmfc.dd) + { tm->tm_mday = tmfc.dd; - if (tmfc.ddd) - tm->tm_yday = tmfc.ddd; + fmask |= DTK_M(DAY); + } if (tmfc.mm) + { tm->tm_mon = tmfc.mm; + fmask |= DTK_M(MONTH); + } if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1)) { @@ -3771,6 +3777,7 @@ do_to_timestamp(text *date_txt, text *fmt, j0 = isoweek2j(tm->tm_year, 1) - 1; j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday); + fmask |= DTK_DATE_M; } else { @@ -3785,7 +3792,7 @@ do_to_timestamp(text *date_txt, text *fmt, for (i = 1; i <= MONTHS_PER_YEAR; i++) { - if (tmfc.ddd < y[i]) + if (tmfc.ddd <= y[i]) break; } if (tm->tm_mon <= 1) @@ -3793,6 +3800,8 @@ do_to_timestamp(text *date_txt, text *fmt, if (tm->tm_mday <= 1) tm->tm_mday = tmfc.ddd - y[i - 1]; + + fmask |= DTK_M(MONTH) | DTK_M(DAY); } } @@ -3808,7 +3817,38 @@ do_to_timestamp(text *date_txt, text *fmt, *fsec += (double) tmfc.us / 1000000; #endif + /* Range-check date fields according to bit mask computed above */ + if (fmask != 0) + { + /* We already dealt with AD/BC, so pass isjulian = true */ + int dterr = ValidateDate(fmask, true, false, false, tm); + + if (dterr != 0) + { + /* + * Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate + * said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an + * irrelevant hint about datestyle. + */ + DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"); + } + } + + /* Range-check time fields too */ + if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY || + tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR || + tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE || +#ifdef HAVE_INT64_TIMESTAMP + *fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC +#else + *fsec < 0 || *fsec >= 1 +#endif + ) + DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"); + DEBUG_TM(tm); + + pfree(date_str); } diff --git a/src/test/regress/expected/horology.out b/src/test/regress/expected/horology.out index 1fe02be093..f9d12e0f8a 100644 --- a/src/test/regress/expected/horology.out +++ b/src/test/regress/expected/horology.out @@ -2822,6 +2822,18 @@ SELECT to_timestamp('20000-1116', 'YYYY-MMDD'); Thu Nov 16 00:00:00 20000 PST (1 row) +SELECT to_timestamp('1997 AD 11 16', 'YYYY BC MM DD'); + to_timestamp +------------------------------ + Sun Nov 16 00:00:00 1997 PST +(1 row) + +SELECT to_timestamp('1997 BC 11 16', 'YYYY BC MM DD'); + to_timestamp +--------------------------------- + Tue Nov 16 00:00:00 1997 PST BC +(1 row) + SELECT to_timestamp('9-1116', 'Y-MMDD'); to_timestamp ------------------------------ @@ -2906,6 +2918,18 @@ SELECT to_timestamp(' 20050302', 'YYYYMMDD'); Wed Mar 02 00:00:00 2005 PST (1 row) +SELECT to_timestamp('2011-12-18 11:38 AM', 'YYYY-MM-DD HH12:MI PM'); + to_timestamp +------------------------------ + Sun Dec 18 11:38:00 2011 PST +(1 row) + +SELECT to_timestamp('2011-12-18 11:38 PM', 'YYYY-MM-DD HH12:MI PM'); + to_timestamp +------------------------------ + Sun Dec 18 23:38:00 2011 PST +(1 row) + -- -- Check handling of multiple spaces in format and/or input -- @@ -2982,7 +3006,7 @@ SELECT to_date('2011 12 18', 'YYYY MM DD'); (1 row) -- --- Check errors for some incorrect usages of to_timestamp() +-- Check errors for some incorrect usages of to_timestamp() and to_date() -- -- Mixture of date conventions (ISO week and Gregorian): SELECT to_timestamp('2005527', 'YYYYIWID'); @@ -3010,6 +3034,76 @@ DETAIL: Value must be an integer. SELECT to_timestamp('10000000000', 'FMYYYY'); ERROR: value for "YYYY" in source string is out of range DETAIL: Value must be in the range -2147483648 to 2147483647. +-- Out-of-range and not-quite-out-of-range fields: +SELECT to_timestamp('2016-06-13 25:00:00', 'YYYY-MM-DD HH24:MI:SS'); +ERROR: date/time field value out of range: "2016-06-13 25:00:00" +SELECT to_timestamp('2016-06-13 15:60:00', 'YYYY-MM-DD HH24:MI:SS'); +ERROR: date/time field value out of range: "2016-06-13 15:60:00" +SELECT to_timestamp('2016-06-13 15:50:60', 'YYYY-MM-DD HH24:MI:SS'); +ERROR: date/time field value out of range: "2016-06-13 15:50:60" +SELECT to_timestamp('2016-06-13 15:50:55', 'YYYY-MM-DD HH24:MI:SS'); -- ok + to_timestamp +------------------------------ + Mon Jun 13 15:50:55 2016 PDT +(1 row) + +SELECT to_timestamp('2016-06-13 15:50:55', 'YYYY-MM-DD HH:MI:SS'); +ERROR: hour "15" is invalid for the 12-hour clock +HINT: Use the 24-hour clock, or give an hour between 1 and 12. +SELECT to_timestamp('2016-13-01 15:50:55', 'YYYY-MM-DD HH24:MI:SS'); +ERROR: date/time field value out of range: "2016-13-01 15:50:55" +SELECT to_timestamp('2016-02-30 15:50:55', 'YYYY-MM-DD HH24:MI:SS'); +ERROR: date/time field value out of range: "2016-02-30 15:50:55" +SELECT to_timestamp('2016-02-29 15:50:55', 'YYYY-MM-DD HH24:MI:SS'); -- ok + to_timestamp +------------------------------ + Mon Feb 29 15:50:55 2016 PST +(1 row) + +SELECT to_timestamp('2015-02-29 15:50:55', 'YYYY-MM-DD HH24:MI:SS'); +ERROR: date/time field value out of range: "2015-02-29 15:50:55" +SELECT to_timestamp('2015-02-11 86000', 'YYYY-MM-DD SSSS'); -- ok + to_timestamp +------------------------------ + Wed Feb 11 23:53:20 2015 PST +(1 row) + +SELECT to_timestamp('2015-02-11 86400', 'YYYY-MM-DD SSSS'); +ERROR: date/time field value out of range: "2015-02-11 86400" +SELECT to_date('2016-13-10', 'YYYY-MM-DD'); +ERROR: date/time field value out of range: "2016-13-10" +SELECT to_date('2016-02-30', 'YYYY-MM-DD'); +ERROR: date/time field value out of range: "2016-02-30" +SELECT to_date('2016-02-29', 'YYYY-MM-DD'); -- ok + to_date +------------ + 02-29-2016 +(1 row) + +SELECT to_date('2015-02-29', 'YYYY-MM-DD'); +ERROR: date/time field value out of range: "2015-02-29" +SELECT to_date('2015 365', 'YYYY DDD'); -- ok + to_date +------------ + 12-31-2015 +(1 row) + +SELECT to_date('2015 366', 'YYYY DDD'); +ERROR: date/time field value out of range: "2015 366" +SELECT to_date('2016 365', 'YYYY DDD'); -- ok + to_date +------------ + 12-30-2016 +(1 row) + +SELECT to_date('2016 366', 'YYYY DDD'); -- ok + to_date +------------ + 12-31-2016 +(1 row) + +SELECT to_date('2016 367', 'YYYY DDD'); +ERROR: date/time field value out of range: "2016 367" -- -- Check behavior with SQL-style fixed-GMT-offset time zone (cf bug #8572) -- diff --git a/src/test/regress/sql/horology.sql b/src/test/regress/sql/horology.sql index c81437ba35..a7bc9dcfc4 100644 --- a/src/test/regress/sql/horology.sql +++ b/src/test/regress/sql/horology.sql @@ -412,6 +412,9 @@ SELECT to_timestamp('19971116', 'YYYYMMDD'); SELECT to_timestamp('20000-1116', 'YYYY-MMDD'); +SELECT to_timestamp('1997 AD 11 16', 'YYYY BC MM DD'); +SELECT to_timestamp('1997 BC 11 16', 'YYYY BC MM DD'); + SELECT to_timestamp('9-1116', 'Y-MMDD'); SELECT to_timestamp('95-1116', 'YY-MMDD'); @@ -440,6 +443,9 @@ SELECT to_timestamp(' 2005 03 02', 'YYYYMMDD'); SELECT to_timestamp(' 20050302', 'YYYYMMDD'); +SELECT to_timestamp('2011-12-18 11:38 AM', 'YYYY-MM-DD HH12:MI PM'); +SELECT to_timestamp('2011-12-18 11:38 PM', 'YYYY-MM-DD HH12:MI PM'); + -- -- Check handling of multiple spaces in format and/or input -- @@ -461,7 +467,7 @@ SELECT to_date('2011 12 18', 'YYYY MM DD'); SELECT to_date('2011 12 18', 'YYYY MM DD'); -- --- Check errors for some incorrect usages of to_timestamp() +-- Check errors for some incorrect usages of to_timestamp() and to_date() -- -- Mixture of date conventions (ISO week and Gregorian): @@ -482,6 +488,28 @@ SELECT to_timestamp('199711xy', 'YYYYMMDD'); -- Input that doesn't fit in an int: SELECT to_timestamp('10000000000', 'FMYYYY'); +-- Out-of-range and not-quite-out-of-range fields: +SELECT to_timestamp('2016-06-13 25:00:00', 'YYYY-MM-DD HH24:MI:SS'); +SELECT to_timestamp('2016-06-13 15:60:00', 'YYYY-MM-DD HH24:MI:SS'); +SELECT to_timestamp('2016-06-13 15:50:60', 'YYYY-MM-DD HH24:MI:SS'); +SELECT to_timestamp('2016-06-13 15:50:55', 'YYYY-MM-DD HH24:MI:SS'); -- ok +SELECT to_timestamp('2016-06-13 15:50:55', 'YYYY-MM-DD HH:MI:SS'); +SELECT to_timestamp('2016-13-01 15:50:55', 'YYYY-MM-DD HH24:MI:SS'); +SELECT to_timestamp('2016-02-30 15:50:55', 'YYYY-MM-DD HH24:MI:SS'); +SELECT to_timestamp('2016-02-29 15:50:55', 'YYYY-MM-DD HH24:MI:SS'); -- ok +SELECT to_timestamp('2015-02-29 15:50:55', 'YYYY-MM-DD HH24:MI:SS'); +SELECT to_timestamp('2015-02-11 86000', 'YYYY-MM-DD SSSS'); -- ok +SELECT to_timestamp('2015-02-11 86400', 'YYYY-MM-DD SSSS'); +SELECT to_date('2016-13-10', 'YYYY-MM-DD'); +SELECT to_date('2016-02-30', 'YYYY-MM-DD'); +SELECT to_date('2016-02-29', 'YYYY-MM-DD'); -- ok +SELECT to_date('2015-02-29', 'YYYY-MM-DD'); +SELECT to_date('2015 365', 'YYYY DDD'); -- ok +SELECT to_date('2015 366', 'YYYY DDD'); +SELECT to_date('2016 365', 'YYYY DDD'); -- ok +SELECT to_date('2016 366', 'YYYY DDD'); -- ok +SELECT to_date('2016 367', 'YYYY DDD'); + -- -- Check behavior with SQL-style fixed-GMT-offset time zone (cf bug #8572) --