From 66c74f8b6e347ba5830bf06468bef8081601c187 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Wed, 25 Sep 2019 21:50:55 +0300 Subject: [PATCH] Implement parse_datetime() function This commit adds parse_datetime() function, which implements datetime parsing with extended features demanded by upcoming jsonpath .datetime() method: * Dynamic type identification based on template string, * Support for standard-conforming 'strict' mode, * Timezone offset is returned as separate value. Extracted from original patch by Nikita Glukhov, Teodor Sigaev, Oleg Bartunov. Revised by me. Discussion: https://postgr.es/m/fcc6fc6a-b497-f39a-923d-aa34d0c588e8%402ndQuadrant.com Discussion: https://postgr.es/m/CAPpHfdsZgYEra_PeCLGNoXOWYx6iU-S3wF8aX0ObQUcZU%2B4XTw%40mail.gmail.com Author: Nikita Glukhov, Teodor Sigaev, Oleg Bartunov, Alexander Korotkov Reviewed-by: Anastasia Lubennikova, Peter Eisentraut --- src/backend/utils/adt/date.c | 11 +- src/backend/utils/adt/formatting.c | 291 ++++++++++++++++++++++++++++- src/include/utils/date.h | 3 + src/include/utils/formatting.h | 3 + 4 files changed, 296 insertions(+), 12 deletions(-) diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c index 4b1afb10f9..9e291b5c7b 100644 --- a/src/backend/utils/adt/date.c +++ b/src/backend/utils/adt/date.c @@ -41,11 +41,6 @@ #endif -static int tm2time(struct pg_tm *tm, fsec_t fsec, TimeADT *result); -static int tm2timetz(struct pg_tm *tm, fsec_t fsec, int tz, TimeTzADT *result); -static void AdjustTimeForTypmod(TimeADT *time, int32 typmod); - - /* common code for timetypmodin and timetztypmodin */ static int32 anytime_typmodin(bool istz, ArrayType *ta) @@ -1203,7 +1198,7 @@ time_in(PG_FUNCTION_ARGS) /* tm2time() * Convert a tm structure to a time data type. */ -static int +int tm2time(struct pg_tm *tm, fsec_t fsec, TimeADT *result) { *result = ((((tm->tm_hour * MINS_PER_HOUR + tm->tm_min) * SECS_PER_MINUTE) + tm->tm_sec) @@ -1379,7 +1374,7 @@ time_scale(PG_FUNCTION_ARGS) * have a fundamental tie together but rather a coincidence of * implementation. - thomas */ -static void +void AdjustTimeForTypmod(TimeADT *time, int32 typmod) { static const int64 TimeScales[MAX_TIME_PRECISION + 1] = { @@ -1957,7 +1952,7 @@ time_part(PG_FUNCTION_ARGS) /* tm2timetz() * Convert a tm structure to a time data type. */ -static int +int tm2timetz(struct pg_tm *tm, fsec_t fsec, int tz, TimeTzADT *result) { result->time = ((((tm->tm_hour * MINS_PER_HOUR + tm->tm_min) * SECS_PER_MINUTE) + tm->tm_sec) * diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index d2f7666eed..462c333544 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -992,6 +992,11 @@ typedef struct NUMProc *L_currency_symbol; } NUMProc; +/* Return flags for DCH_from_char() */ +#define DCH_DATED 0x01 +#define DCH_TIMED 0x02 +#define DCH_ZONED 0x04 + /* ---------- * Functions * ---------- @@ -1025,7 +1030,8 @@ static int from_char_parse_int(int *dest, char **src, FormatNode *node); static int seq_search(char *name, const char *const *array, int type, int max, int *len); static int from_char_seq_search(int *dest, char **src, const char *const *array, int type, int max, FormatNode *node); static void do_to_timestamp(text *date_txt, text *fmt, bool std, - struct pg_tm *tm, fsec_t *fsec, int *fprec); + struct pg_tm *tm, fsec_t *fsec, int *fprec, + uint32 *flags); static char *fill_str(char *str, int c, int max); static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree); static char *int_to_roman(int number); @@ -3517,6 +3523,109 @@ DCH_prevent_counter_overflow(void) } } +/* Get mask of date/time/zone components present in format nodes. */ +static int +DCH_datetime_type(FormatNode *node) +{ + FormatNode *n; + int flags = 0; + + for (n = node; n->type != NODE_TYPE_END; n++) + { + if (n->type != NODE_TYPE_ACTION) + continue; + + switch (n->key->id) + { + case DCH_FX: + break; + case DCH_A_M: + case DCH_P_M: + case DCH_a_m: + case DCH_p_m: + case DCH_AM: + case DCH_PM: + case DCH_am: + case DCH_pm: + case DCH_HH: + case DCH_HH12: + case DCH_HH24: + case DCH_MI: + case DCH_SS: + case DCH_MS: /* millisecond */ + case DCH_US: /* microsecond */ + case DCH_FF1: + case DCH_FF2: + case DCH_FF3: + case DCH_FF4: + case DCH_FF5: + case DCH_FF6: + case DCH_SSSS: + flags |= DCH_TIMED; + break; + case DCH_tz: + case DCH_TZ: + case DCH_OF: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("formatting field \"%s\" is only supported in to_char", + n->key->name))); + flags |= DCH_ZONED; + break; + case DCH_TZH: + case DCH_TZM: + flags |= DCH_ZONED; + break; + case DCH_A_D: + case DCH_B_C: + case DCH_a_d: + case DCH_b_c: + case DCH_AD: + case DCH_BC: + case DCH_ad: + case DCH_bc: + case DCH_MONTH: + case DCH_Month: + case DCH_month: + case DCH_MON: + case DCH_Mon: + case DCH_mon: + case DCH_MM: + case DCH_DAY: + case DCH_Day: + case DCH_day: + case DCH_DY: + case DCH_Dy: + case DCH_dy: + case DCH_DDD: + case DCH_IDDD: + case DCH_DD: + case DCH_D: + case DCH_ID: + case DCH_WW: + case DCH_Q: + case DCH_CC: + case DCH_Y_YYY: + case DCH_YYYY: + case DCH_IYYY: + case DCH_YYY: + case DCH_IYY: + case DCH_YY: + case DCH_IY: + case DCH_Y: + case DCH_I: + case DCH_RM: + case DCH_rm: + case DCH_W: + case DCH_J: + flags |= DCH_DATED; + break; + } + } + + return flags; +} + /* select a DCHCacheEntry to hold the given format picture */ static DCHCacheEntry * DCH_cache_getnew(const char *str, bool std) @@ -3808,7 +3917,7 @@ to_timestamp(PG_FUNCTION_ARGS) fsec_t fsec; int fprec; - do_to_timestamp(date_txt, fmt, false, &tm, &fsec, &fprec); + do_to_timestamp(date_txt, fmt, false, &tm, &fsec, &fprec, NULL); /* Use the specified time zone, if any. */ if (tm.tm_zone) @@ -3847,7 +3956,7 @@ to_date(PG_FUNCTION_ARGS) struct pg_tm tm; fsec_t fsec; - do_to_timestamp(date_txt, fmt, false, &tm, &fsec, NULL); + do_to_timestamp(date_txt, fmt, false, &tm, &fsec, NULL, NULL); /* Prevent overflow in Julian-day routines */ if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday)) @@ -3868,6 +3977,176 @@ to_date(PG_FUNCTION_ARGS) PG_RETURN_DATEADT(result); } +/* + * Convert the 'date_txt' input to a datetime type using argument 'fmt' as a format string. + * The actual data type (returned in 'typid', 'typmod') is determined by + * the presence of date/time/zone components in the format string. + * + * When timezone component is present, the corresponding offset is set to '*tz'. + */ +Datum +parse_datetime(text *date_txt, text *fmt, bool strict, Oid *typid, + int32 *typmod, int *tz) +{ + struct pg_tm tm; + fsec_t fsec; + int fprec = 0; + uint32 flags; + + do_to_timestamp(date_txt, fmt, strict, &tm, &fsec, &fprec, &flags); + + *typmod = fprec ? fprec : -1; /* fractional part precision */ + + if (flags & DCH_DATED) + { + if (flags & DCH_TIMED) + { + if (flags & DCH_ZONED) + { + TimestampTz result; + + if (tm.tm_zone) + { + int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz); + + if (dterr) + DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz"); + } + else + { + /* + * Time zone is present in format string, but not in input + * string. Assuming do_to_timestamp() triggers no error + * this should be possible only in non-strict case. + */ + Assert(!strict); + + ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("missing time zone in input string for type timestamptz"))); + } + + if (tm2timestamp(&tm, fsec, tz, &result) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamptz out of range"))); + + AdjustTimestampForTypmod(&result, *typmod); + + *typid = TIMESTAMPTZOID; + return TimestampTzGetDatum(result); + } + else + { + Timestamp result; + + if (tm2timestamp(&tm, fsec, NULL, &result) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timestamp out of range"))); + + AdjustTimestampForTypmod(&result, *typmod); + + *typid = TIMESTAMPOID; + return TimestampGetDatum(result); + } + } + else + { + if (flags & DCH_ZONED) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("datetime format is zoned but not timed"))); + } + else + { + DateADT result; + + /* Prevent overflow in Julian-day routines */ + if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range: \"%s\"", + text_to_cstring(date_txt)))); + + result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - + POSTGRES_EPOCH_JDATE; + + /* Now check for just-out-of-range dates */ + if (!IS_VALID_DATE(result)) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("date out of range: \"%s\"", + text_to_cstring(date_txt)))); + + *typid = DATEOID; + return DateADTGetDatum(result); + } + } + } + else if (flags & DCH_TIMED) + { + if (flags & DCH_ZONED) + { + TimeTzADT *result = palloc(sizeof(TimeTzADT)); + + if (tm.tm_zone) + { + int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz); + + if (dterr) + DateTimeParseError(dterr, text_to_cstring(date_txt), "timetz"); + } + else + { + /* + * Time zone is present in format string, but not in input + * string. Assuming do_to_timestamp() triggers no error this + * should be possible only in non-strict case. + */ + Assert(!strict); + + ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("missing time zone in input string for type timetz"))); + } + + if (tm2timetz(&tm, fsec, *tz, result) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("timetz out of range"))); + + AdjustTimeForTypmod(&result->time, *typmod); + + *typid = TIMETZOID; + return TimeTzADTPGetDatum(result); + } + else + { + TimeADT result; + + if (tm2time(&tm, fsec, &result) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), + errmsg("time out of range"))); + + AdjustTimeForTypmod(&result, *typmod); + + *typid = TIMEOID; + return TimeADTGetDatum(result); + } + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("datetime format is not dated and not timed"))); + } + + return (Datum) 0; +} + /* * do_to_timestamp: shared code for to_timestamp and to_date * @@ -3883,7 +4162,8 @@ to_date(PG_FUNCTION_ARGS) */ static void do_to_timestamp(text *date_txt, text *fmt, bool std, - struct pg_tm *tm, fsec_t *fsec, int *fprec) + struct pg_tm *tm, fsec_t *fsec, int *fprec, + uint32 *flags) { FormatNode *format; TmFromChar tmfc; @@ -3940,6 +4220,9 @@ do_to_timestamp(text *date_txt, text *fmt, bool std, pfree(fmt_str); + if (flags) + *flags = DCH_datetime_type(format); + if (!incache) pfree(format); } diff --git a/src/include/utils/date.h b/src/include/utils/date.h index bec129aff1..bd15bfa5bb 100644 --- a/src/include/utils/date.h +++ b/src/include/utils/date.h @@ -76,5 +76,8 @@ extern TimeTzADT *GetSQLCurrentTime(int32 typmod); extern TimeADT GetSQLLocalTime(int32 typmod); extern int time2tm(TimeADT time, struct pg_tm *tm, fsec_t *fsec); extern int timetz2tm(TimeTzADT *time, struct pg_tm *tm, fsec_t *fsec, int *tzp); +extern int tm2time(struct pg_tm *tm, fsec_t fsec, TimeADT *result); +extern int tm2timetz(struct pg_tm *tm, fsec_t fsec, int tz, TimeTzADT *result); +extern void AdjustTimeForTypmod(TimeADT *time, int32 typmod); #endif /* DATE_H */ diff --git a/src/include/utils/formatting.h b/src/include/utils/formatting.h index 0117144779..beeaf10c33 100644 --- a/src/include/utils/formatting.h +++ b/src/include/utils/formatting.h @@ -26,4 +26,7 @@ extern char *asc_tolower(const char *buff, size_t nbytes); extern char *asc_toupper(const char *buff, size_t nbytes); extern char *asc_initcap(const char *buff, size_t nbytes); +extern Datum parse_datetime(text *date_txt, text *fmt, bool std, + Oid *typid, int32 *typmod, int *tz); + #endif