Implement parse_datetime() function

This commit adds parse_datetime() function, which implements datetime
parsing with extended features demanded by upcoming jsonpath .datetime()
method:

 * Dynamic type identification based on template string,
 * Support for standard-conforming 'strict' mode,
 * Timezone offset is returned as separate value.

Extracted from original patch by Nikita Glukhov, Teodor Sigaev, Oleg Bartunov.
Revised by me.

Discussion: https://postgr.es/m/fcc6fc6a-b497-f39a-923d-aa34d0c588e8%402ndQuadrant.com
Discussion: https://postgr.es/m/CAPpHfdsZgYEra_PeCLGNoXOWYx6iU-S3wF8aX0ObQUcZU%2B4XTw%40mail.gmail.com
Author: Nikita Glukhov, Teodor Sigaev, Oleg Bartunov, Alexander Korotkov
Reviewed-by: Anastasia Lubennikova, Peter Eisentraut
This commit is contained in:
Alexander Korotkov 2019-09-25 21:50:55 +03:00
parent 1a950f37d0
commit 66c74f8b6e
4 changed files with 296 additions and 12 deletions

View File

@ -41,11 +41,6 @@
#endif
static int tm2time(struct pg_tm *tm, fsec_t fsec, TimeADT *result);
static int tm2timetz(struct pg_tm *tm, fsec_t fsec, int tz, TimeTzADT *result);
static void AdjustTimeForTypmod(TimeADT *time, int32 typmod);
/* common code for timetypmodin and timetztypmodin */
static int32
anytime_typmodin(bool istz, ArrayType *ta)
@ -1203,7 +1198,7 @@ time_in(PG_FUNCTION_ARGS)
/* tm2time()
* Convert a tm structure to a time data type.
*/
static int
int
tm2time(struct pg_tm *tm, fsec_t fsec, TimeADT *result)
{
*result = ((((tm->tm_hour * MINS_PER_HOUR + tm->tm_min) * SECS_PER_MINUTE) + tm->tm_sec)
@ -1379,7 +1374,7 @@ time_scale(PG_FUNCTION_ARGS)
* have a fundamental tie together but rather a coincidence of
* implementation. - thomas
*/
static void
void
AdjustTimeForTypmod(TimeADT *time, int32 typmod)
{
static const int64 TimeScales[MAX_TIME_PRECISION + 1] = {
@ -1957,7 +1952,7 @@ time_part(PG_FUNCTION_ARGS)
/* tm2timetz()
* Convert a tm structure to a time data type.
*/
static int
int
tm2timetz(struct pg_tm *tm, fsec_t fsec, int tz, TimeTzADT *result)
{
result->time = ((((tm->tm_hour * MINS_PER_HOUR + tm->tm_min) * SECS_PER_MINUTE) + tm->tm_sec) *

View File

@ -992,6 +992,11 @@ typedef struct NUMProc
*L_currency_symbol;
} NUMProc;
/* Return flags for DCH_from_char() */
#define DCH_DATED 0x01
#define DCH_TIMED 0x02
#define DCH_ZONED 0x04
/* ----------
* Functions
* ----------
@ -1025,7 +1030,8 @@ static int from_char_parse_int(int *dest, char **src, FormatNode *node);
static int seq_search(char *name, const char *const *array, int type, int max, int *len);
static int from_char_seq_search(int *dest, char **src, const char *const *array, int type, int max, FormatNode *node);
static void do_to_timestamp(text *date_txt, text *fmt, bool std,
struct pg_tm *tm, fsec_t *fsec, int *fprec);
struct pg_tm *tm, fsec_t *fsec, int *fprec,
uint32 *flags);
static char *fill_str(char *str, int c, int max);
static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
static char *int_to_roman(int number);
@ -3517,6 +3523,109 @@ DCH_prevent_counter_overflow(void)
}
}
/* Get mask of date/time/zone components present in format nodes. */
static int
DCH_datetime_type(FormatNode *node)
{
FormatNode *n;
int flags = 0;
for (n = node; n->type != NODE_TYPE_END; n++)
{
if (n->type != NODE_TYPE_ACTION)
continue;
switch (n->key->id)
{
case DCH_FX:
break;
case DCH_A_M:
case DCH_P_M:
case DCH_a_m:
case DCH_p_m:
case DCH_AM:
case DCH_PM:
case DCH_am:
case DCH_pm:
case DCH_HH:
case DCH_HH12:
case DCH_HH24:
case DCH_MI:
case DCH_SS:
case DCH_MS: /* millisecond */
case DCH_US: /* microsecond */
case DCH_FF1:
case DCH_FF2:
case DCH_FF3:
case DCH_FF4:
case DCH_FF5:
case DCH_FF6:
case DCH_SSSS:
flags |= DCH_TIMED;
break;
case DCH_tz:
case DCH_TZ:
case DCH_OF:
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("formatting field \"%s\" is only supported in to_char",
n->key->name)));
flags |= DCH_ZONED;
break;
case DCH_TZH:
case DCH_TZM:
flags |= DCH_ZONED;
break;
case DCH_A_D:
case DCH_B_C:
case DCH_a_d:
case DCH_b_c:
case DCH_AD:
case DCH_BC:
case DCH_ad:
case DCH_bc:
case DCH_MONTH:
case DCH_Month:
case DCH_month:
case DCH_MON:
case DCH_Mon:
case DCH_mon:
case DCH_MM:
case DCH_DAY:
case DCH_Day:
case DCH_day:
case DCH_DY:
case DCH_Dy:
case DCH_dy:
case DCH_DDD:
case DCH_IDDD:
case DCH_DD:
case DCH_D:
case DCH_ID:
case DCH_WW:
case DCH_Q:
case DCH_CC:
case DCH_Y_YYY:
case DCH_YYYY:
case DCH_IYYY:
case DCH_YYY:
case DCH_IYY:
case DCH_YY:
case DCH_IY:
case DCH_Y:
case DCH_I:
case DCH_RM:
case DCH_rm:
case DCH_W:
case DCH_J:
flags |= DCH_DATED;
break;
}
}
return flags;
}
/* select a DCHCacheEntry to hold the given format picture */
static DCHCacheEntry *
DCH_cache_getnew(const char *str, bool std)
@ -3808,7 +3917,7 @@ to_timestamp(PG_FUNCTION_ARGS)
fsec_t fsec;
int fprec;
do_to_timestamp(date_txt, fmt, false, &tm, &fsec, &fprec);
do_to_timestamp(date_txt, fmt, false, &tm, &fsec, &fprec, NULL);
/* Use the specified time zone, if any. */
if (tm.tm_zone)
@ -3847,7 +3956,7 @@ to_date(PG_FUNCTION_ARGS)
struct pg_tm tm;
fsec_t fsec;
do_to_timestamp(date_txt, fmt, false, &tm, &fsec, NULL);
do_to_timestamp(date_txt, fmt, false, &tm, &fsec, NULL, NULL);
/* Prevent overflow in Julian-day routines */
if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
@ -3868,6 +3977,176 @@ to_date(PG_FUNCTION_ARGS)
PG_RETURN_DATEADT(result);
}
/*
* Convert the 'date_txt' input to a datetime type using argument 'fmt' as a format string.
* The actual data type (returned in 'typid', 'typmod') is determined by
* the presence of date/time/zone components in the format string.
*
* When timezone component is present, the corresponding offset is set to '*tz'.
*/
Datum
parse_datetime(text *date_txt, text *fmt, bool strict, Oid *typid,
int32 *typmod, int *tz)
{
struct pg_tm tm;
fsec_t fsec;
int fprec = 0;
uint32 flags;
do_to_timestamp(date_txt, fmt, strict, &tm, &fsec, &fprec, &flags);
*typmod = fprec ? fprec : -1; /* fractional part precision */
if (flags & DCH_DATED)
{
if (flags & DCH_TIMED)
{
if (flags & DCH_ZONED)
{
TimestampTz result;
if (tm.tm_zone)
{
int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
if (dterr)
DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
}
else
{
/*
* Time zone is present in format string, but not in input
* string. Assuming do_to_timestamp() triggers no error
* this should be possible only in non-strict case.
*/
Assert(!strict);
ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("missing time zone in input string for type timestamptz")));
}
if (tm2timestamp(&tm, fsec, tz, &result) != 0)
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timestamptz out of range")));
AdjustTimestampForTypmod(&result, *typmod);
*typid = TIMESTAMPTZOID;
return TimestampTzGetDatum(result);
}
else
{
Timestamp result;
if (tm2timestamp(&tm, fsec, NULL, &result) != 0)
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range")));
AdjustTimestampForTypmod(&result, *typmod);
*typid = TIMESTAMPOID;
return TimestampGetDatum(result);
}
}
else
{
if (flags & DCH_ZONED)
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("datetime format is zoned but not timed")));
}
else
{
DateADT result;
/* Prevent overflow in Julian-day routines */
if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("date out of range: \"%s\"",
text_to_cstring(date_txt))));
result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) -
POSTGRES_EPOCH_JDATE;
/* Now check for just-out-of-range dates */
if (!IS_VALID_DATE(result))
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("date out of range: \"%s\"",
text_to_cstring(date_txt))));
*typid = DATEOID;
return DateADTGetDatum(result);
}
}
}
else if (flags & DCH_TIMED)
{
if (flags & DCH_ZONED)
{
TimeTzADT *result = palloc(sizeof(TimeTzADT));
if (tm.tm_zone)
{
int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
if (dterr)
DateTimeParseError(dterr, text_to_cstring(date_txt), "timetz");
}
else
{
/*
* Time zone is present in format string, but not in input
* string. Assuming do_to_timestamp() triggers no error this
* should be possible only in non-strict case.
*/
Assert(!strict);
ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("missing time zone in input string for type timetz")));
}
if (tm2timetz(&tm, fsec, *tz, result) != 0)
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timetz out of range")));
AdjustTimeForTypmod(&result->time, *typmod);
*typid = TIMETZOID;
return TimeTzADTPGetDatum(result);
}
else
{
TimeADT result;
if (tm2time(&tm, fsec, &result) != 0)
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("time out of range")));
AdjustTimeForTypmod(&result, *typmod);
*typid = TIMEOID;
return TimeADTGetDatum(result);
}
}
else
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("datetime format is not dated and not timed")));
}
return (Datum) 0;
}
/*
* do_to_timestamp: shared code for to_timestamp and to_date
*
@ -3883,7 +4162,8 @@ to_date(PG_FUNCTION_ARGS)
*/
static void
do_to_timestamp(text *date_txt, text *fmt, bool std,
struct pg_tm *tm, fsec_t *fsec, int *fprec)
struct pg_tm *tm, fsec_t *fsec, int *fprec,
uint32 *flags)
{
FormatNode *format;
TmFromChar tmfc;
@ -3940,6 +4220,9 @@ do_to_timestamp(text *date_txt, text *fmt, bool std,
pfree(fmt_str);
if (flags)
*flags = DCH_datetime_type(format);
if (!incache)
pfree(format);
}

View File

@ -76,5 +76,8 @@ extern TimeTzADT *GetSQLCurrentTime(int32 typmod);
extern TimeADT GetSQLLocalTime(int32 typmod);
extern int time2tm(TimeADT time, struct pg_tm *tm, fsec_t *fsec);
extern int timetz2tm(TimeTzADT *time, struct pg_tm *tm, fsec_t *fsec, int *tzp);
extern int tm2time(struct pg_tm *tm, fsec_t fsec, TimeADT *result);
extern int tm2timetz(struct pg_tm *tm, fsec_t fsec, int tz, TimeTzADT *result);
extern void AdjustTimeForTypmod(TimeADT *time, int32 typmod);
#endif /* DATE_H */

View File

@ -26,4 +26,7 @@ extern char *asc_tolower(const char *buff, size_t nbytes);
extern char *asc_toupper(const char *buff, size_t nbytes);
extern char *asc_initcap(const char *buff, size_t nbytes);
extern Datum parse_datetime(text *date_txt, text *fmt, bool std,
Oid *typid, int32 *typmod, int *tz);
#endif