1996-07-09 08:22:35 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* numutils.c
|
1996-07-09 08:22:35 +02:00
|
|
|
* utility functions for I/O of built-in numeric types.
|
|
|
|
*
|
2024-01-04 02:49:05 +01:00
|
|
|
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/utils/adt/numutils.c
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
2000-07-13 00:59:15 +02:00
|
|
|
#include "postgres.h"
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
#include <math.h>
|
1997-11-17 17:26:27 +01:00
|
|
|
#include <limits.h>
|
2004-03-11 03:11:14 +01:00
|
|
|
#include <ctype.h>
|
2000-07-13 00:59:15 +02:00
|
|
|
|
2020-02-01 22:57:14 +01:00
|
|
|
#include "port/pg_bitutils.h"
|
2024-03-13 15:07:00 +01:00
|
|
|
#include "utils/builtins.h"
|
2020-02-01 22:57:14 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* A table of all two-digit numbers. This is used to speed up decimal digit
|
|
|
|
* generation by copying pairs of digits into the final output.
|
|
|
|
*/
|
|
|
|
static const char DIGIT_TABLE[200] =
|
|
|
|
"00" "01" "02" "03" "04" "05" "06" "07" "08" "09"
|
|
|
|
"10" "11" "12" "13" "14" "15" "16" "17" "18" "19"
|
|
|
|
"20" "21" "22" "23" "24" "25" "26" "27" "28" "29"
|
|
|
|
"30" "31" "32" "33" "34" "35" "36" "37" "38" "39"
|
|
|
|
"40" "41" "42" "43" "44" "45" "46" "47" "48" "49"
|
|
|
|
"50" "51" "52" "53" "54" "55" "56" "57" "58" "59"
|
|
|
|
"60" "61" "62" "63" "64" "65" "66" "67" "68" "69"
|
|
|
|
"70" "71" "72" "73" "74" "75" "76" "77" "78" "79"
|
|
|
|
"80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
|
|
|
|
"90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
|
|
|
|
*/
|
|
|
|
static inline int
|
|
|
|
decimalLength32(const uint32 v)
|
|
|
|
{
|
|
|
|
int t;
|
|
|
|
static const uint32 PowersOfTen[] = {
|
|
|
|
1, 10, 100,
|
|
|
|
1000, 10000, 100000,
|
|
|
|
1000000, 10000000, 100000000,
|
|
|
|
1000000000
|
|
|
|
};
|
2020-05-14 19:06:38 +02:00
|
|
|
|
2020-02-01 22:57:14 +01:00
|
|
|
/*
|
|
|
|
* Compute base-10 logarithm by dividing the base-2 logarithm by a
|
|
|
|
* good-enough approximation of the base-2 logarithm of 10
|
|
|
|
*/
|
|
|
|
t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
|
|
|
|
return t + (v >= PowersOfTen[t]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int
|
|
|
|
decimalLength64(const uint64 v)
|
|
|
|
{
|
|
|
|
int t;
|
|
|
|
static const uint64 PowersOfTen[] = {
|
|
|
|
UINT64CONST(1), UINT64CONST(10),
|
|
|
|
UINT64CONST(100), UINT64CONST(1000),
|
|
|
|
UINT64CONST(10000), UINT64CONST(100000),
|
|
|
|
UINT64CONST(1000000), UINT64CONST(10000000),
|
|
|
|
UINT64CONST(100000000), UINT64CONST(1000000000),
|
|
|
|
UINT64CONST(10000000000), UINT64CONST(100000000000),
|
|
|
|
UINT64CONST(1000000000000), UINT64CONST(10000000000000),
|
|
|
|
UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
|
|
|
|
UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
|
|
|
|
UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Compute base-10 logarithm by dividing the base-2 logarithm by a
|
|
|
|
* good-enough approximation of the base-2 logarithm of 10
|
|
|
|
*/
|
|
|
|
t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
|
|
|
|
return t + (v >= PowersOfTen[t]);
|
|
|
|
}
|
1997-12-19 03:09:10 +01:00
|
|
|
|
2022-12-14 05:40:38 +01:00
|
|
|
static const int8 hexlookup[128] = {
|
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
|
|
|
|
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
|
|
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
|
|
};
|
|
|
|
|
2018-07-22 23:58:01 +02:00
|
|
|
/*
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
* Convert input string to a signed 16 bit integer. Input strings may be
|
|
|
|
* expressed in base-10, hexadecimal, octal, or binary format, all of which
|
|
|
|
* can be prefixed by an optional sign character, either '+' (the default) or
|
|
|
|
* '-' for negative numbers. Hex strings are recognized by the digits being
|
|
|
|
* prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
|
|
|
|
* prefix. The binary representation is recognized by the 0b or 0B prefix.
|
2018-07-22 23:58:01 +02:00
|
|
|
*
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
* Allows any number of leading or trailing whitespace characters. Digits may
|
|
|
|
* optionally be separated by a single underscore character. These can only
|
|
|
|
* come between digits and not before or after the digits. Underscores have
|
|
|
|
* no effect on the return value and are supported only to assist in improving
|
|
|
|
* the human readability of the input strings.
|
2018-07-22 23:58:01 +02:00
|
|
|
*
|
Convert a few datatype input functions to use "soft" error reporting.
This patch converts the input functions for bool, int2, int4, int8,
float4, float8, numeric, and contrib/cube to the new soft-error style.
array_in and record_in are also converted. There's lots more to do,
but this is enough to provide proof-of-concept that the soft-error
API is usable, as well as reference examples for how to convert
input functions.
This patch is mostly by me, but it owes very substantial debt to
earlier work by Nikita Glukhov, Andrew Dunstan, and Amul Sul.
Thanks to Andres Freund for review.
Discussion: https://postgr.es/m/3bbbb0df-7382-bf87-9737-340ba096e034@postgrespro.ru
2022-12-09 16:14:53 +01:00
|
|
|
* pg_strtoint16() will throw ereport() upon bad input format or overflow;
|
|
|
|
* while pg_strtoint16_safe() instead returns such complaints in *escontext,
|
|
|
|
* if it's an ErrorSaveContext.
|
|
|
|
*
|
2022-12-04 04:18:18 +01:00
|
|
|
* NB: Accumulate input as an unsigned number, to deal with two's complement
|
2018-07-22 23:58:01 +02:00
|
|
|
* representation of the most negative number, which can't be represented as a
|
2022-12-04 04:18:18 +01:00
|
|
|
* signed positive number.
|
2018-07-22 23:58:01 +02:00
|
|
|
*/
|
|
|
|
int16
|
|
|
|
pg_strtoint16(const char *s)
|
Convert a few datatype input functions to use "soft" error reporting.
This patch converts the input functions for bool, int2, int4, int8,
float4, float8, numeric, and contrib/cube to the new soft-error style.
array_in and record_in are also converted. There's lots more to do,
but this is enough to provide proof-of-concept that the soft-error
API is usable, as well as reference examples for how to convert
input functions.
This patch is mostly by me, but it owes very substantial debt to
earlier work by Nikita Glukhov, Andrew Dunstan, and Amul Sul.
Thanks to Andres Freund for review.
Discussion: https://postgr.es/m/3bbbb0df-7382-bf87-9737-340ba096e034@postgrespro.ru
2022-12-09 16:14:53 +01:00
|
|
|
{
|
|
|
|
return pg_strtoint16_safe(s, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
int16
|
|
|
|
pg_strtoint16_safe(const char *s, Node *escontext)
|
2018-07-22 23:58:01 +02:00
|
|
|
{
|
|
|
|
const char *ptr = s;
|
2022-12-14 05:40:38 +01:00
|
|
|
const char *firstdigit;
|
2022-12-04 04:18:18 +01:00
|
|
|
uint16 tmp = 0;
|
2018-07-22 23:58:01 +02:00
|
|
|
bool neg = false;
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
unsigned char digit;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The majority of cases are likely to be base-10 digits without any
|
|
|
|
* underscore separator characters. We'll first try to parse the string
|
|
|
|
* with the assumption that's the case and only fallback on a slower
|
|
|
|
* implementation which handles hex, octal and binary strings and
|
|
|
|
* underscores if the fastpath version cannot parse the string.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* leave it up to the slow path to look for leading spaces */
|
|
|
|
|
|
|
|
if (*ptr == '-')
|
|
|
|
{
|
|
|
|
ptr++;
|
|
|
|
neg = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* a leading '+' is uncommon so leave that for the slow path */
|
|
|
|
|
|
|
|
/* process the first digit */
|
|
|
|
digit = (*ptr - '0');
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Exploit unsigned arithmetic to save having to check both the upper and
|
|
|
|
* lower bounds of the digit.
|
|
|
|
*/
|
|
|
|
if (likely(digit < 10))
|
|
|
|
{
|
|
|
|
ptr++;
|
|
|
|
tmp = digit;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* we need at least one digit */
|
|
|
|
goto slow;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* process remaining digits */
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
digit = (*ptr - '0');
|
|
|
|
|
|
|
|
if (digit >= 10)
|
|
|
|
break;
|
|
|
|
|
|
|
|
ptr++;
|
|
|
|
|
|
|
|
if (unlikely(tmp > -(PG_INT16_MIN / 10)))
|
|
|
|
goto out_of_range;
|
|
|
|
|
|
|
|
tmp = tmp * 10 + digit;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* when the string does not end in a digit, let the slow path handle it */
|
|
|
|
if (unlikely(*ptr != '\0'))
|
|
|
|
goto slow;
|
|
|
|
|
|
|
|
if (neg)
|
|
|
|
{
|
|
|
|
/* check the negative equivalent will fit without overflowing */
|
|
|
|
if (unlikely(tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1))
|
|
|
|
goto out_of_range;
|
|
|
|
return -((int16) tmp);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (unlikely(tmp > PG_INT16_MAX))
|
|
|
|
goto out_of_range;
|
|
|
|
|
|
|
|
return (int16) tmp;
|
|
|
|
|
|
|
|
slow:
|
|
|
|
tmp = 0;
|
|
|
|
ptr = s;
|
|
|
|
/* no need to reset neg */
|
2018-07-22 23:58:01 +02:00
|
|
|
|
|
|
|
/* skip leading spaces */
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
while (isspace((unsigned char) *ptr))
|
2018-07-22 23:58:01 +02:00
|
|
|
ptr++;
|
|
|
|
|
|
|
|
/* handle sign */
|
|
|
|
if (*ptr == '-')
|
|
|
|
{
|
|
|
|
ptr++;
|
|
|
|
neg = true;
|
|
|
|
}
|
|
|
|
else if (*ptr == '+')
|
|
|
|
ptr++;
|
|
|
|
|
|
|
|
/* process digits */
|
2022-12-14 05:40:38 +01:00
|
|
|
if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
|
2018-07-22 23:58:01 +02:00
|
|
|
{
|
2022-12-14 05:40:38 +01:00
|
|
|
firstdigit = ptr += 2;
|
|
|
|
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
for (;;)
|
2022-12-14 05:40:38 +01:00
|
|
|
{
|
2023-02-04 10:48:51 +01:00
|
|
|
if (isxdigit((unsigned char) *ptr))
|
|
|
|
{
|
|
|
|
if (unlikely(tmp > -(PG_INT16_MIN / 16)))
|
|
|
|
goto out_of_range;
|
|
|
|
|
|
|
|
tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
|
|
|
|
}
|
|
|
|
else if (*ptr == '_')
|
|
|
|
{
|
|
|
|
/* underscore must be followed by more digits */
|
|
|
|
ptr++;
|
|
|
|
if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
|
|
|
|
goto invalid_syntax;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
break;
|
2022-12-14 05:40:38 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
|
|
|
|
{
|
|
|
|
firstdigit = ptr += 2;
|
|
|
|
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
for (;;)
|
2022-12-14 05:40:38 +01:00
|
|
|
{
|
2023-02-04 10:48:51 +01:00
|
|
|
if (*ptr >= '0' && *ptr <= '7')
|
|
|
|
{
|
|
|
|
if (unlikely(tmp > -(PG_INT16_MIN / 8)))
|
|
|
|
goto out_of_range;
|
|
|
|
|
|
|
|
tmp = tmp * 8 + (*ptr++ - '0');
|
|
|
|
}
|
|
|
|
else if (*ptr == '_')
|
|
|
|
{
|
|
|
|
/* underscore must be followed by more digits */
|
|
|
|
ptr++;
|
|
|
|
if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
|
|
|
|
goto invalid_syntax;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
break;
|
2022-12-14 05:40:38 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
|
|
|
|
{
|
|
|
|
firstdigit = ptr += 2;
|
|
|
|
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
for (;;)
|
2022-12-14 05:40:38 +01:00
|
|
|
{
|
2023-02-04 10:48:51 +01:00
|
|
|
if (*ptr >= '0' && *ptr <= '1')
|
|
|
|
{
|
|
|
|
if (unlikely(tmp > -(PG_INT16_MIN / 2)))
|
|
|
|
goto out_of_range;
|
|
|
|
|
|
|
|
tmp = tmp * 2 + (*ptr++ - '0');
|
|
|
|
}
|
|
|
|
else if (*ptr == '_')
|
|
|
|
{
|
|
|
|
/* underscore must be followed by more digits */
|
|
|
|
ptr++;
|
|
|
|
if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
|
|
|
|
goto invalid_syntax;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
break;
|
2022-12-14 05:40:38 +01:00
|
|
|
}
|
2018-07-22 23:58:01 +02:00
|
|
|
}
|
2022-12-14 05:40:38 +01:00
|
|
|
else
|
|
|
|
{
|
|
|
|
firstdigit = ptr;
|
|
|
|
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
for (;;)
|
2022-12-14 05:40:38 +01:00
|
|
|
{
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
if (*ptr >= '0' && *ptr <= '9')
|
2023-02-04 10:48:51 +01:00
|
|
|
{
|
|
|
|
if (unlikely(tmp > -(PG_INT16_MIN / 10)))
|
|
|
|
goto out_of_range;
|
|
|
|
|
|
|
|
tmp = tmp * 10 + (*ptr++ - '0');
|
|
|
|
}
|
|
|
|
else if (*ptr == '_')
|
|
|
|
{
|
|
|
|
/* underscore may not be first */
|
|
|
|
if (unlikely(ptr == firstdigit))
|
|
|
|
goto invalid_syntax;
|
|
|
|
/* and it must be followed by more digits */
|
|
|
|
ptr++;
|
|
|
|
if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
|
|
|
|
goto invalid_syntax;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
break;
|
2022-12-14 05:40:38 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* require at least one digit */
|
|
|
|
if (unlikely(ptr == firstdigit))
|
|
|
|
goto invalid_syntax;
|
2018-07-22 23:58:01 +02:00
|
|
|
|
|
|
|
/* allow trailing whitespace, but not other trailing chars */
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
while (isspace((unsigned char) *ptr))
|
2018-07-22 23:58:01 +02:00
|
|
|
ptr++;
|
|
|
|
|
|
|
|
if (unlikely(*ptr != '\0'))
|
|
|
|
goto invalid_syntax;
|
|
|
|
|
2022-12-04 04:18:18 +01:00
|
|
|
if (neg)
|
2018-07-22 23:58:01 +02:00
|
|
|
{
|
2022-12-04 04:18:18 +01:00
|
|
|
/* check the negative equivalent will fit without overflowing */
|
|
|
|
if (tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1)
|
2018-07-22 23:58:01 +02:00
|
|
|
goto out_of_range;
|
2022-12-04 04:18:18 +01:00
|
|
|
return -((int16) tmp);
|
2018-07-22 23:58:01 +02:00
|
|
|
}
|
|
|
|
|
2022-12-04 04:18:18 +01:00
|
|
|
if (tmp > PG_INT16_MAX)
|
|
|
|
goto out_of_range;
|
|
|
|
|
|
|
|
return (int16) tmp;
|
2018-07-22 23:58:01 +02:00
|
|
|
|
|
|
|
out_of_range:
|
Convert a few datatype input functions to use "soft" error reporting.
This patch converts the input functions for bool, int2, int4, int8,
float4, float8, numeric, and contrib/cube to the new soft-error style.
array_in and record_in are also converted. There's lots more to do,
but this is enough to provide proof-of-concept that the soft-error
API is usable, as well as reference examples for how to convert
input functions.
This patch is mostly by me, but it owes very substantial debt to
earlier work by Nikita Glukhov, Andrew Dunstan, and Amul Sul.
Thanks to Andres Freund for review.
Discussion: https://postgr.es/m/3bbbb0df-7382-bf87-9737-340ba096e034@postgrespro.ru
2022-12-09 16:14:53 +01:00
|
|
|
ereturn(escontext, 0,
|
2018-07-22 23:58:01 +02:00
|
|
|
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
|
|
|
|
errmsg("value \"%s\" is out of range for type %s",
|
|
|
|
s, "smallint")));
|
|
|
|
|
|
|
|
invalid_syntax:
|
Convert a few datatype input functions to use "soft" error reporting.
This patch converts the input functions for bool, int2, int4, int8,
float4, float8, numeric, and contrib/cube to the new soft-error style.
array_in and record_in are also converted. There's lots more to do,
but this is enough to provide proof-of-concept that the soft-error
API is usable, as well as reference examples for how to convert
input functions.
This patch is mostly by me, but it owes very substantial debt to
earlier work by Nikita Glukhov, Andrew Dunstan, and Amul Sul.
Thanks to Andres Freund for review.
Discussion: https://postgr.es/m/3bbbb0df-7382-bf87-9737-340ba096e034@postgrespro.ru
2022-12-09 16:14:53 +01:00
|
|
|
ereturn(escontext, 0,
|
2018-07-22 23:58:01 +02:00
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
|
|
|
errmsg("invalid input syntax for type %s: \"%s\"",
|
|
|
|
"smallint", s)));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
* Convert input string to a signed 32 bit integer. Input strings may be
|
|
|
|
* expressed in base-10, hexadecimal, octal, or binary format, all of which
|
|
|
|
* can be prefixed by an optional sign character, either '+' (the default) or
|
|
|
|
* '-' for negative numbers. Hex strings are recognized by the digits being
|
|
|
|
* prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
|
|
|
|
* prefix. The binary representation is recognized by the 0b or 0B prefix.
|
2018-07-22 23:58:01 +02:00
|
|
|
*
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
* Allows any number of leading or trailing whitespace characters. Digits may
|
|
|
|
* optionally be separated by a single underscore character. These can only
|
|
|
|
* come between digits and not before or after the digits. Underscores have
|
|
|
|
* no effect on the return value and are supported only to assist in improving
|
|
|
|
* the human readability of the input strings.
|
Convert a few datatype input functions to use "soft" error reporting.
This patch converts the input functions for bool, int2, int4, int8,
float4, float8, numeric, and contrib/cube to the new soft-error style.
array_in and record_in are also converted. There's lots more to do,
but this is enough to provide proof-of-concept that the soft-error
API is usable, as well as reference examples for how to convert
input functions.
This patch is mostly by me, but it owes very substantial debt to
earlier work by Nikita Glukhov, Andrew Dunstan, and Amul Sul.
Thanks to Andres Freund for review.
Discussion: https://postgr.es/m/3bbbb0df-7382-bf87-9737-340ba096e034@postgrespro.ru
2022-12-09 16:14:53 +01:00
|
|
|
*
|
|
|
|
* pg_strtoint32() will throw ereport() upon bad input format or overflow;
|
|
|
|
* while pg_strtoint32_safe() instead returns such complaints in *escontext,
|
|
|
|
* if it's an ErrorSaveContext.
|
2018-07-22 23:58:01 +02:00
|
|
|
*
|
2022-12-04 04:18:18 +01:00
|
|
|
* NB: Accumulate input as an unsigned number, to deal with two's complement
|
2018-07-22 23:58:01 +02:00
|
|
|
* representation of the most negative number, which can't be represented as a
|
2022-12-04 04:18:18 +01:00
|
|
|
* signed positive number.
|
2018-07-22 23:58:01 +02:00
|
|
|
*/
|
|
|
|
int32
|
|
|
|
pg_strtoint32(const char *s)
|
Convert a few datatype input functions to use "soft" error reporting.
This patch converts the input functions for bool, int2, int4, int8,
float4, float8, numeric, and contrib/cube to the new soft-error style.
array_in and record_in are also converted. There's lots more to do,
but this is enough to provide proof-of-concept that the soft-error
API is usable, as well as reference examples for how to convert
input functions.
This patch is mostly by me, but it owes very substantial debt to
earlier work by Nikita Glukhov, Andrew Dunstan, and Amul Sul.
Thanks to Andres Freund for review.
Discussion: https://postgr.es/m/3bbbb0df-7382-bf87-9737-340ba096e034@postgrespro.ru
2022-12-09 16:14:53 +01:00
|
|
|
{
|
|
|
|
return pg_strtoint32_safe(s, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
int32
|
|
|
|
pg_strtoint32_safe(const char *s, Node *escontext)
|
2018-07-22 23:58:01 +02:00
|
|
|
{
|
|
|
|
const char *ptr = s;
|
2022-12-14 05:40:38 +01:00
|
|
|
const char *firstdigit;
|
2022-12-04 04:18:18 +01:00
|
|
|
uint32 tmp = 0;
|
2018-07-22 23:58:01 +02:00
|
|
|
bool neg = false;
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
unsigned char digit;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The majority of cases are likely to be base-10 digits without any
|
|
|
|
* underscore separator characters. We'll first try to parse the string
|
|
|
|
* with the assumption that's the case and only fallback on a slower
|
|
|
|
* implementation which handles hex, octal and binary strings and
|
|
|
|
* underscores if the fastpath version cannot parse the string.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* leave it up to the slow path to look for leading spaces */
|
|
|
|
|
|
|
|
if (*ptr == '-')
|
|
|
|
{
|
|
|
|
ptr++;
|
|
|
|
neg = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* a leading '+' is uncommon so leave that for the slow path */
|
|
|
|
|
|
|
|
/* process the first digit */
|
|
|
|
digit = (*ptr - '0');
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Exploit unsigned arithmetic to save having to check both the upper and
|
|
|
|
* lower bounds of the digit.
|
|
|
|
*/
|
|
|
|
if (likely(digit < 10))
|
|
|
|
{
|
|
|
|
ptr++;
|
|
|
|
tmp = digit;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* we need at least one digit */
|
|
|
|
goto slow;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* process remaining digits */
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
digit = (*ptr - '0');
|
|
|
|
|
|
|
|
if (digit >= 10)
|
|
|
|
break;
|
|
|
|
|
|
|
|
ptr++;
|
|
|
|
|
|
|
|
if (unlikely(tmp > -(PG_INT32_MIN / 10)))
|
|
|
|
goto out_of_range;
|
|
|
|
|
|
|
|
tmp = tmp * 10 + digit;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* when the string does not end in a digit, let the slow path handle it */
|
|
|
|
if (unlikely(*ptr != '\0'))
|
|
|
|
goto slow;
|
|
|
|
|
|
|
|
if (neg)
|
|
|
|
{
|
|
|
|
/* check the negative equivalent will fit without overflowing */
|
|
|
|
if (unlikely(tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1))
|
|
|
|
goto out_of_range;
|
|
|
|
return -((int32) tmp);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (unlikely(tmp > PG_INT32_MAX))
|
|
|
|
goto out_of_range;
|
|
|
|
|
|
|
|
return (int32) tmp;
|
|
|
|
|
|
|
|
slow:
|
|
|
|
tmp = 0;
|
|
|
|
ptr = s;
|
|
|
|
/* no need to reset neg */
|
2018-07-22 23:58:01 +02:00
|
|
|
|
|
|
|
/* skip leading spaces */
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
while (isspace((unsigned char) *ptr))
|
2018-07-22 23:58:01 +02:00
|
|
|
ptr++;
|
|
|
|
|
|
|
|
/* handle sign */
|
|
|
|
if (*ptr == '-')
|
|
|
|
{
|
|
|
|
ptr++;
|
|
|
|
neg = true;
|
|
|
|
}
|
|
|
|
else if (*ptr == '+')
|
|
|
|
ptr++;
|
|
|
|
|
|
|
|
/* process digits */
|
2022-12-14 05:40:38 +01:00
|
|
|
if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
|
2018-07-22 23:58:01 +02:00
|
|
|
{
|
2022-12-14 05:40:38 +01:00
|
|
|
firstdigit = ptr += 2;
|
|
|
|
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
for (;;)
|
2022-12-14 05:40:38 +01:00
|
|
|
{
|
2023-02-04 10:48:51 +01:00
|
|
|
if (isxdigit((unsigned char) *ptr))
|
|
|
|
{
|
|
|
|
if (unlikely(tmp > -(PG_INT32_MIN / 16)))
|
|
|
|
goto out_of_range;
|
|
|
|
|
|
|
|
tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
|
|
|
|
}
|
|
|
|
else if (*ptr == '_')
|
|
|
|
{
|
|
|
|
/* underscore must be followed by more digits */
|
|
|
|
ptr++;
|
|
|
|
if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
|
|
|
|
goto invalid_syntax;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
break;
|
2022-12-14 05:40:38 +01:00
|
|
|
}
|
2018-07-22 23:58:01 +02:00
|
|
|
}
|
2022-12-14 05:40:38 +01:00
|
|
|
else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
|
|
|
|
{
|
|
|
|
firstdigit = ptr += 2;
|
|
|
|
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
for (;;)
|
2022-12-14 05:40:38 +01:00
|
|
|
{
|
2023-02-04 10:48:51 +01:00
|
|
|
if (*ptr >= '0' && *ptr <= '7')
|
|
|
|
{
|
|
|
|
if (unlikely(tmp > -(PG_INT32_MIN / 8)))
|
|
|
|
goto out_of_range;
|
|
|
|
|
|
|
|
tmp = tmp * 8 + (*ptr++ - '0');
|
|
|
|
}
|
|
|
|
else if (*ptr == '_')
|
|
|
|
{
|
|
|
|
/* underscore must be followed by more digits */
|
|
|
|
ptr++;
|
|
|
|
if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
|
|
|
|
goto invalid_syntax;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
break;
|
2022-12-14 05:40:38 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
|
|
|
|
{
|
|
|
|
firstdigit = ptr += 2;
|
|
|
|
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
for (;;)
|
2022-12-14 05:40:38 +01:00
|
|
|
{
|
2023-02-04 10:48:51 +01:00
|
|
|
if (*ptr >= '0' && *ptr <= '1')
|
|
|
|
{
|
|
|
|
if (unlikely(tmp > -(PG_INT32_MIN / 2)))
|
|
|
|
goto out_of_range;
|
|
|
|
|
|
|
|
tmp = tmp * 2 + (*ptr++ - '0');
|
|
|
|
}
|
|
|
|
else if (*ptr == '_')
|
|
|
|
{
|
|
|
|
/* underscore must be followed by more digits */
|
|
|
|
ptr++;
|
|
|
|
if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
|
|
|
|
goto invalid_syntax;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
break;
|
2022-12-14 05:40:38 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
firstdigit = ptr;
|
|
|
|
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
for (;;)
|
2022-12-14 05:40:38 +01:00
|
|
|
{
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
if (*ptr >= '0' && *ptr <= '9')
|
2023-02-04 10:48:51 +01:00
|
|
|
{
|
|
|
|
if (unlikely(tmp > -(PG_INT32_MIN / 10)))
|
|
|
|
goto out_of_range;
|
|
|
|
|
|
|
|
tmp = tmp * 10 + (*ptr++ - '0');
|
|
|
|
}
|
|
|
|
else if (*ptr == '_')
|
|
|
|
{
|
|
|
|
/* underscore may not be first */
|
|
|
|
if (unlikely(ptr == firstdigit))
|
|
|
|
goto invalid_syntax;
|
|
|
|
/* and it must be followed by more digits */
|
|
|
|
ptr++;
|
|
|
|
if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
|
|
|
|
goto invalid_syntax;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
break;
|
2022-12-14 05:40:38 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* require at least one digit */
|
|
|
|
if (unlikely(ptr == firstdigit))
|
|
|
|
goto invalid_syntax;
|
2018-07-22 23:58:01 +02:00
|
|
|
|
|
|
|
/* allow trailing whitespace, but not other trailing chars */
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
while (isspace((unsigned char) *ptr))
|
2018-07-22 23:58:01 +02:00
|
|
|
ptr++;
|
|
|
|
|
|
|
|
if (unlikely(*ptr != '\0'))
|
|
|
|
goto invalid_syntax;
|
|
|
|
|
2022-12-04 04:18:18 +01:00
|
|
|
if (neg)
|
2018-07-22 23:58:01 +02:00
|
|
|
{
|
2022-12-04 04:18:18 +01:00
|
|
|
/* check the negative equivalent will fit without overflowing */
|
|
|
|
if (tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1)
|
2018-07-22 23:58:01 +02:00
|
|
|
goto out_of_range;
|
2022-12-04 04:18:18 +01:00
|
|
|
return -((int32) tmp);
|
2018-07-22 23:58:01 +02:00
|
|
|
}
|
|
|
|
|
2022-12-04 04:18:18 +01:00
|
|
|
if (tmp > PG_INT32_MAX)
|
|
|
|
goto out_of_range;
|
|
|
|
|
|
|
|
return (int32) tmp;
|
2018-07-22 23:58:01 +02:00
|
|
|
|
|
|
|
out_of_range:
|
Convert a few datatype input functions to use "soft" error reporting.
This patch converts the input functions for bool, int2, int4, int8,
float4, float8, numeric, and contrib/cube to the new soft-error style.
array_in and record_in are also converted. There's lots more to do,
but this is enough to provide proof-of-concept that the soft-error
API is usable, as well as reference examples for how to convert
input functions.
This patch is mostly by me, but it owes very substantial debt to
earlier work by Nikita Glukhov, Andrew Dunstan, and Amul Sul.
Thanks to Andres Freund for review.
Discussion: https://postgr.es/m/3bbbb0df-7382-bf87-9737-340ba096e034@postgrespro.ru
2022-12-09 16:14:53 +01:00
|
|
|
ereturn(escontext, 0,
|
2018-07-22 23:58:01 +02:00
|
|
|
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
|
|
|
|
errmsg("value \"%s\" is out of range for type %s",
|
|
|
|
s, "integer")));
|
|
|
|
|
|
|
|
invalid_syntax:
|
Convert a few datatype input functions to use "soft" error reporting.
This patch converts the input functions for bool, int2, int4, int8,
float4, float8, numeric, and contrib/cube to the new soft-error style.
array_in and record_in are also converted. There's lots more to do,
but this is enough to provide proof-of-concept that the soft-error
API is usable, as well as reference examples for how to convert
input functions.
This patch is mostly by me, but it owes very substantial debt to
earlier work by Nikita Glukhov, Andrew Dunstan, and Amul Sul.
Thanks to Andres Freund for review.
Discussion: https://postgr.es/m/3bbbb0df-7382-bf87-9737-340ba096e034@postgrespro.ru
2022-12-09 16:14:53 +01:00
|
|
|
ereturn(escontext, 0,
|
2018-07-22 23:58:01 +02:00
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
|
|
|
errmsg("invalid input syntax for type %s: \"%s\"",
|
|
|
|
"integer", s)));
|
|
|
|
}
|
|
|
|
|
2022-02-14 21:29:45 +01:00
|
|
|
/*
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
* Convert input string to a signed 64 bit integer. Input strings may be
|
|
|
|
* expressed in base-10, hexadecimal, octal, or binary format, all of which
|
|
|
|
* can be prefixed by an optional sign character, either '+' (the default) or
|
|
|
|
* '-' for negative numbers. Hex strings are recognized by the digits being
|
|
|
|
* prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
|
|
|
|
* prefix. The binary representation is recognized by the 0b or 0B prefix.
|
2022-02-14 21:29:45 +01:00
|
|
|
*
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
* Allows any number of leading or trailing whitespace characters. Digits may
|
|
|
|
* optionally be separated by a single underscore character. These can only
|
|
|
|
* come between digits and not before or after the digits. Underscores have
|
|
|
|
* no effect on the return value and are supported only to assist in improving
|
|
|
|
* the human readability of the input strings.
|
Convert a few datatype input functions to use "soft" error reporting.
This patch converts the input functions for bool, int2, int4, int8,
float4, float8, numeric, and contrib/cube to the new soft-error style.
array_in and record_in are also converted. There's lots more to do,
but this is enough to provide proof-of-concept that the soft-error
API is usable, as well as reference examples for how to convert
input functions.
This patch is mostly by me, but it owes very substantial debt to
earlier work by Nikita Glukhov, Andrew Dunstan, and Amul Sul.
Thanks to Andres Freund for review.
Discussion: https://postgr.es/m/3bbbb0df-7382-bf87-9737-340ba096e034@postgrespro.ru
2022-12-09 16:14:53 +01:00
|
|
|
*
|
|
|
|
* pg_strtoint64() will throw ereport() upon bad input format or overflow;
|
|
|
|
* while pg_strtoint64_safe() instead returns such complaints in *escontext,
|
|
|
|
* if it's an ErrorSaveContext.
|
2022-02-14 21:29:45 +01:00
|
|
|
*
|
2022-12-04 04:18:18 +01:00
|
|
|
* NB: Accumulate input as an unsigned number, to deal with two's complement
|
2022-02-14 21:29:45 +01:00
|
|
|
* representation of the most negative number, which can't be represented as a
|
2022-12-04 04:18:18 +01:00
|
|
|
* signed positive number.
|
2022-02-14 21:29:45 +01:00
|
|
|
*/
|
|
|
|
int64
|
|
|
|
pg_strtoint64(const char *s)
|
Convert a few datatype input functions to use "soft" error reporting.
This patch converts the input functions for bool, int2, int4, int8,
float4, float8, numeric, and contrib/cube to the new soft-error style.
array_in and record_in are also converted. There's lots more to do,
but this is enough to provide proof-of-concept that the soft-error
API is usable, as well as reference examples for how to convert
input functions.
This patch is mostly by me, but it owes very substantial debt to
earlier work by Nikita Glukhov, Andrew Dunstan, and Amul Sul.
Thanks to Andres Freund for review.
Discussion: https://postgr.es/m/3bbbb0df-7382-bf87-9737-340ba096e034@postgrespro.ru
2022-12-09 16:14:53 +01:00
|
|
|
{
|
|
|
|
return pg_strtoint64_safe(s, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
int64
|
|
|
|
pg_strtoint64_safe(const char *s, Node *escontext)
|
2022-02-14 21:29:45 +01:00
|
|
|
{
|
|
|
|
const char *ptr = s;
|
2022-12-14 05:40:38 +01:00
|
|
|
const char *firstdigit;
|
2022-12-04 04:18:18 +01:00
|
|
|
uint64 tmp = 0;
|
2022-02-14 21:29:45 +01:00
|
|
|
bool neg = false;
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
unsigned char digit;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The majority of cases are likely to be base-10 digits without any
|
|
|
|
* underscore separator characters. We'll first try to parse the string
|
|
|
|
* with the assumption that's the case and only fallback on a slower
|
|
|
|
* implementation which handles hex, octal and binary strings and
|
|
|
|
* underscores if the fastpath version cannot parse the string.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* leave it up to the slow path to look for leading spaces */
|
|
|
|
|
|
|
|
if (*ptr == '-')
|
|
|
|
{
|
|
|
|
ptr++;
|
|
|
|
neg = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* a leading '+' is uncommon so leave that for the slow path */
|
|
|
|
|
|
|
|
/* process the first digit */
|
|
|
|
digit = (*ptr - '0');
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Exploit unsigned arithmetic to save having to check both the upper and
|
|
|
|
* lower bounds of the digit.
|
|
|
|
*/
|
|
|
|
if (likely(digit < 10))
|
|
|
|
{
|
|
|
|
ptr++;
|
|
|
|
tmp = digit;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* we need at least one digit */
|
|
|
|
goto slow;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* process remaining digits */
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
digit = (*ptr - '0');
|
|
|
|
|
|
|
|
if (digit >= 10)
|
|
|
|
break;
|
|
|
|
|
|
|
|
ptr++;
|
|
|
|
|
|
|
|
if (unlikely(tmp > -(PG_INT64_MIN / 10)))
|
|
|
|
goto out_of_range;
|
|
|
|
|
|
|
|
tmp = tmp * 10 + digit;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* when the string does not end in a digit, let the slow path handle it */
|
|
|
|
if (unlikely(*ptr != '\0'))
|
|
|
|
goto slow;
|
|
|
|
|
|
|
|
if (neg)
|
|
|
|
{
|
|
|
|
/* check the negative equivalent will fit without overflowing */
|
|
|
|
if (unlikely(tmp > (uint64) (-(PG_INT64_MIN + 1)) + 1))
|
|
|
|
goto out_of_range;
|
|
|
|
return -((int64) tmp);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (unlikely(tmp > PG_INT64_MAX))
|
|
|
|
goto out_of_range;
|
|
|
|
|
|
|
|
return (int64) tmp;
|
|
|
|
|
|
|
|
slow:
|
|
|
|
tmp = 0;
|
|
|
|
ptr = s;
|
|
|
|
/* no need to reset neg */
|
2022-02-14 21:29:45 +01:00
|
|
|
|
|
|
|
/* skip leading spaces */
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
while (isspace((unsigned char) *ptr))
|
2022-02-14 21:29:45 +01:00
|
|
|
ptr++;
|
|
|
|
|
|
|
|
/* handle sign */
|
|
|
|
if (*ptr == '-')
|
|
|
|
{
|
|
|
|
ptr++;
|
|
|
|
neg = true;
|
|
|
|
}
|
|
|
|
else if (*ptr == '+')
|
|
|
|
ptr++;
|
|
|
|
|
|
|
|
/* process digits */
|
2022-12-14 05:40:38 +01:00
|
|
|
if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
|
2022-02-14 21:29:45 +01:00
|
|
|
{
|
2022-12-14 05:40:38 +01:00
|
|
|
firstdigit = ptr += 2;
|
|
|
|
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
for (;;)
|
2022-12-14 05:40:38 +01:00
|
|
|
{
|
2023-02-04 10:48:51 +01:00
|
|
|
if (isxdigit((unsigned char) *ptr))
|
|
|
|
{
|
|
|
|
if (unlikely(tmp > -(PG_INT64_MIN / 16)))
|
|
|
|
goto out_of_range;
|
|
|
|
|
|
|
|
tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
|
|
|
|
}
|
|
|
|
else if (*ptr == '_')
|
|
|
|
{
|
|
|
|
/* underscore must be followed by more digits */
|
|
|
|
ptr++;
|
|
|
|
if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
|
|
|
|
goto invalid_syntax;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
break;
|
2022-12-14 05:40:38 +01:00
|
|
|
}
|
2022-02-14 21:29:45 +01:00
|
|
|
}
|
2022-12-14 05:40:38 +01:00
|
|
|
else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
|
|
|
|
{
|
|
|
|
firstdigit = ptr += 2;
|
|
|
|
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
for (;;)
|
2022-12-14 05:40:38 +01:00
|
|
|
{
|
2023-02-04 10:48:51 +01:00
|
|
|
if (*ptr >= '0' && *ptr <= '7')
|
|
|
|
{
|
|
|
|
if (unlikely(tmp > -(PG_INT64_MIN / 8)))
|
|
|
|
goto out_of_range;
|
|
|
|
|
|
|
|
tmp = tmp * 8 + (*ptr++ - '0');
|
|
|
|
}
|
|
|
|
else if (*ptr == '_')
|
|
|
|
{
|
|
|
|
/* underscore must be followed by more digits */
|
|
|
|
ptr++;
|
|
|
|
if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
|
|
|
|
goto invalid_syntax;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
break;
|
2022-12-14 05:40:38 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
|
|
|
|
{
|
|
|
|
firstdigit = ptr += 2;
|
|
|
|
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
for (;;)
|
2022-12-14 05:40:38 +01:00
|
|
|
{
|
2023-02-04 10:48:51 +01:00
|
|
|
if (*ptr >= '0' && *ptr <= '1')
|
|
|
|
{
|
|
|
|
if (unlikely(tmp > -(PG_INT64_MIN / 2)))
|
|
|
|
goto out_of_range;
|
|
|
|
|
|
|
|
tmp = tmp * 2 + (*ptr++ - '0');
|
|
|
|
}
|
|
|
|
else if (*ptr == '_')
|
|
|
|
{
|
|
|
|
/* underscore must be followed by more digits */
|
|
|
|
ptr++;
|
|
|
|
if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
|
|
|
|
goto invalid_syntax;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
break;
|
2022-12-14 05:40:38 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
firstdigit = ptr;
|
|
|
|
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
for (;;)
|
2022-12-14 05:40:38 +01:00
|
|
|
{
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
if (*ptr >= '0' && *ptr <= '9')
|
2023-02-04 10:48:51 +01:00
|
|
|
{
|
|
|
|
if (unlikely(tmp > -(PG_INT64_MIN / 10)))
|
|
|
|
goto out_of_range;
|
|
|
|
|
|
|
|
tmp = tmp * 10 + (*ptr++ - '0');
|
|
|
|
}
|
|
|
|
else if (*ptr == '_')
|
|
|
|
{
|
|
|
|
/* underscore may not be first */
|
|
|
|
if (unlikely(ptr == firstdigit))
|
|
|
|
goto invalid_syntax;
|
|
|
|
/* and it must be followed by more digits */
|
|
|
|
ptr++;
|
|
|
|
if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
|
|
|
|
goto invalid_syntax;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
break;
|
2022-12-14 05:40:38 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* require at least one digit */
|
|
|
|
if (unlikely(ptr == firstdigit))
|
|
|
|
goto invalid_syntax;
|
2022-02-14 21:29:45 +01:00
|
|
|
|
|
|
|
/* allow trailing whitespace, but not other trailing chars */
|
Fix performance regression in pg_strtointNN_safe functions
Between 6fcda9aba and 1b6f632a3, the pg_strtoint functions became quite
a bit slower in v16, despite efforts in 6b423ec67 to speed these up.
Since the majority of cases for these functions will only contain
base-10 digits, perhaps prefixed by a '-', it makes sense to have a
special case for this and just fall back on the more complex version
which processes hex, octal, binary and underscores if the fast path
version fails to parse the string.
While we're here, update the header comments for these functions to
mention that hex, octal and binary formats along with underscore
separators are now supported.
Author: Andres Freund, David Rowley
Reported-by: Masahiko Sawada
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAD21AoDvDmUQeJtZrau1ovnT_smN940%3DKp6mszNGK3bq9yRN6g%40mail.gmail.com
Backpatch-through: 16, where 6fcda9aba and 1b6f632a3 were added
2023-08-02 02:05:41 +02:00
|
|
|
while (isspace((unsigned char) *ptr))
|
2022-02-14 21:29:45 +01:00
|
|
|
ptr++;
|
|
|
|
|
|
|
|
if (unlikely(*ptr != '\0'))
|
|
|
|
goto invalid_syntax;
|
|
|
|
|
2022-12-04 04:18:18 +01:00
|
|
|
if (neg)
|
2022-02-14 21:29:45 +01:00
|
|
|
{
|
2022-12-04 04:18:18 +01:00
|
|
|
/* check the negative equivalent will fit without overflowing */
|
|
|
|
if (tmp > (uint64) (-(PG_INT64_MIN + 1)) + 1)
|
2022-02-14 21:29:45 +01:00
|
|
|
goto out_of_range;
|
2022-12-04 04:18:18 +01:00
|
|
|
return -((int64) tmp);
|
2022-02-14 21:29:45 +01:00
|
|
|
}
|
|
|
|
|
2022-12-04 04:18:18 +01:00
|
|
|
if (tmp > PG_INT64_MAX)
|
|
|
|
goto out_of_range;
|
|
|
|
|
|
|
|
return (int64) tmp;
|
2022-02-14 21:29:45 +01:00
|
|
|
|
|
|
|
out_of_range:
|
Convert a few datatype input functions to use "soft" error reporting.
This patch converts the input functions for bool, int2, int4, int8,
float4, float8, numeric, and contrib/cube to the new soft-error style.
array_in and record_in are also converted. There's lots more to do,
but this is enough to provide proof-of-concept that the soft-error
API is usable, as well as reference examples for how to convert
input functions.
This patch is mostly by me, but it owes very substantial debt to
earlier work by Nikita Glukhov, Andrew Dunstan, and Amul Sul.
Thanks to Andres Freund for review.
Discussion: https://postgr.es/m/3bbbb0df-7382-bf87-9737-340ba096e034@postgrespro.ru
2022-12-09 16:14:53 +01:00
|
|
|
ereturn(escontext, 0,
|
2022-02-14 21:29:45 +01:00
|
|
|
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
|
|
|
|
errmsg("value \"%s\" is out of range for type %s",
|
|
|
|
s, "bigint")));
|
|
|
|
|
|
|
|
invalid_syntax:
|
Convert a few datatype input functions to use "soft" error reporting.
This patch converts the input functions for bool, int2, int4, int8,
float4, float8, numeric, and contrib/cube to the new soft-error style.
array_in and record_in are also converted. There's lots more to do,
but this is enough to provide proof-of-concept that the soft-error
API is usable, as well as reference examples for how to convert
input functions.
This patch is mostly by me, but it owes very substantial debt to
earlier work by Nikita Glukhov, Andrew Dunstan, and Amul Sul.
Thanks to Andres Freund for review.
Discussion: https://postgr.es/m/3bbbb0df-7382-bf87-9737-340ba096e034@postgrespro.ru
2022-12-09 16:14:53 +01:00
|
|
|
ereturn(escontext, 0,
|
2022-02-14 21:29:45 +01:00
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
|
|
|
errmsg("invalid input syntax for type %s: \"%s\"",
|
|
|
|
"bigint", s)));
|
|
|
|
}
|
|
|
|
|
Detect bad input for types xid, xid8, and cid.
Historically these input functions just called strtoul or strtoull
and returned the result, with no error detection whatever. Upgrade
them to reject garbage input and out-of-range values, similarly to
our other numeric input routines.
To share the code for this with type oid, adjust the existing
"oidin_subr" to be agnostic about the SQL name of the type it is
handling, and move it to numutils.c; then clone it for 64-bit types.
Because the xid types previously accepted hex and octal input by
reason of calling strtoul[l] with third argument zero, I made the
common subroutine do that too, with the consequence that type oid
now also accepts hex and octal input. In view of 6fcda9aba, that
seems like a good thing.
While at it, simplify the existing over-complicated handling of
syntax errors from strtoul: we only need one ereturn not three.
Discussion: https://postgr.es/m/3526121.1672000729@sss.pgh.pa.us
2022-12-27 17:40:01 +01:00
|
|
|
/*
|
|
|
|
* Convert input string to an unsigned 32 bit integer.
|
|
|
|
*
|
|
|
|
* Allows any number of leading or trailing whitespace characters.
|
|
|
|
*
|
|
|
|
* If endloc isn't NULL, store a pointer to the rest of the string there,
|
|
|
|
* so that caller can parse the rest. Otherwise, it's an error if anything
|
|
|
|
* but whitespace follows.
|
|
|
|
*
|
2023-09-25 13:29:34 +02:00
|
|
|
* typname is what is reported in error messages.
|
Detect bad input for types xid, xid8, and cid.
Historically these input functions just called strtoul or strtoull
and returned the result, with no error detection whatever. Upgrade
them to reject garbage input and out-of-range values, similarly to
our other numeric input routines.
To share the code for this with type oid, adjust the existing
"oidin_subr" to be agnostic about the SQL name of the type it is
handling, and move it to numutils.c; then clone it for 64-bit types.
Because the xid types previously accepted hex and octal input by
reason of calling strtoul[l] with third argument zero, I made the
common subroutine do that too, with the consequence that type oid
now also accepts hex and octal input. In view of 6fcda9aba, that
seems like a good thing.
While at it, simplify the existing over-complicated handling of
syntax errors from strtoul: we only need one ereturn not three.
Discussion: https://postgr.es/m/3526121.1672000729@sss.pgh.pa.us
2022-12-27 17:40:01 +01:00
|
|
|
*
|
|
|
|
* If escontext points to an ErrorSaveContext node, that is filled instead
|
|
|
|
* of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
|
|
|
|
* to detect errors.
|
|
|
|
*/
|
|
|
|
uint32
|
|
|
|
uint32in_subr(const char *s, char **endloc,
|
|
|
|
const char *typname, Node *escontext)
|
|
|
|
{
|
|
|
|
uint32 result;
|
|
|
|
unsigned long cvt;
|
|
|
|
char *endptr;
|
|
|
|
|
|
|
|
errno = 0;
|
|
|
|
cvt = strtoul(s, &endptr, 0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* strtoul() normally only sets ERANGE. On some systems it may also set
|
|
|
|
* EINVAL, which simply means it couldn't parse the input string. Be sure
|
|
|
|
* to report that the same way as the standard error indication (that
|
|
|
|
* endptr == s).
|
|
|
|
*/
|
|
|
|
if ((errno && errno != ERANGE) || endptr == s)
|
|
|
|
ereturn(escontext, 0,
|
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
|
|
|
errmsg("invalid input syntax for type %s: \"%s\"",
|
|
|
|
typname, s)));
|
|
|
|
|
|
|
|
if (errno == ERANGE)
|
|
|
|
ereturn(escontext, 0,
|
|
|
|
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
|
|
|
|
errmsg("value \"%s\" is out of range for type %s",
|
|
|
|
s, typname)));
|
|
|
|
|
|
|
|
if (endloc)
|
|
|
|
{
|
|
|
|
/* caller wants to deal with rest of string */
|
|
|
|
*endloc = endptr;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* allow only whitespace after number */
|
|
|
|
while (*endptr && isspace((unsigned char) *endptr))
|
|
|
|
endptr++;
|
|
|
|
if (*endptr)
|
|
|
|
ereturn(escontext, 0,
|
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
|
|
|
errmsg("invalid input syntax for type %s: \"%s\"",
|
|
|
|
typname, s)));
|
|
|
|
}
|
|
|
|
|
|
|
|
result = (uint32) cvt;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Cope with possibility that unsigned long is wider than uint32, in which
|
|
|
|
* case strtoul will not raise an error for some values that are out of
|
|
|
|
* the range of uint32.
|
|
|
|
*
|
|
|
|
* For backwards compatibility, we want to accept inputs that are given
|
|
|
|
* with a minus sign, so allow the input value if it matches after either
|
|
|
|
* signed or unsigned extension to long.
|
|
|
|
*
|
|
|
|
* To ensure consistent results on 32-bit and 64-bit platforms, make sure
|
|
|
|
* the error message is the same as if strtoul() had returned ERANGE.
|
|
|
|
*/
|
|
|
|
#if PG_UINT32_MAX != ULONG_MAX
|
|
|
|
if (cvt != (unsigned long) result &&
|
|
|
|
cvt != (unsigned long) ((int) result))
|
|
|
|
ereturn(escontext, 0,
|
|
|
|
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
|
|
|
|
errmsg("value \"%s\" is out of range for type %s",
|
|
|
|
s, typname)));
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Convert input string to an unsigned 64 bit integer.
|
|
|
|
*
|
|
|
|
* Allows any number of leading or trailing whitespace characters.
|
|
|
|
*
|
|
|
|
* If endloc isn't NULL, store a pointer to the rest of the string there,
|
|
|
|
* so that caller can parse the rest. Otherwise, it's an error if anything
|
|
|
|
* but whitespace follows.
|
|
|
|
*
|
2023-09-25 13:29:34 +02:00
|
|
|
* typname is what is reported in error messages.
|
Detect bad input for types xid, xid8, and cid.
Historically these input functions just called strtoul or strtoull
and returned the result, with no error detection whatever. Upgrade
them to reject garbage input and out-of-range values, similarly to
our other numeric input routines.
To share the code for this with type oid, adjust the existing
"oidin_subr" to be agnostic about the SQL name of the type it is
handling, and move it to numutils.c; then clone it for 64-bit types.
Because the xid types previously accepted hex and octal input by
reason of calling strtoul[l] with third argument zero, I made the
common subroutine do that too, with the consequence that type oid
now also accepts hex and octal input. In view of 6fcda9aba, that
seems like a good thing.
While at it, simplify the existing over-complicated handling of
syntax errors from strtoul: we only need one ereturn not three.
Discussion: https://postgr.es/m/3526121.1672000729@sss.pgh.pa.us
2022-12-27 17:40:01 +01:00
|
|
|
*
|
|
|
|
* If escontext points to an ErrorSaveContext node, that is filled instead
|
|
|
|
* of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
|
|
|
|
* to detect errors.
|
|
|
|
*/
|
|
|
|
uint64
|
|
|
|
uint64in_subr(const char *s, char **endloc,
|
|
|
|
const char *typname, Node *escontext)
|
|
|
|
{
|
|
|
|
uint64 result;
|
|
|
|
char *endptr;
|
|
|
|
|
|
|
|
errno = 0;
|
|
|
|
result = strtou64(s, &endptr, 0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* strtoul[l] normally only sets ERANGE. On some systems it may also set
|
|
|
|
* EINVAL, which simply means it couldn't parse the input string. Be sure
|
|
|
|
* to report that the same way as the standard error indication (that
|
|
|
|
* endptr == s).
|
|
|
|
*/
|
|
|
|
if ((errno && errno != ERANGE) || endptr == s)
|
|
|
|
ereturn(escontext, 0,
|
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
|
|
|
errmsg("invalid input syntax for type %s: \"%s\"",
|
|
|
|
typname, s)));
|
|
|
|
|
|
|
|
if (errno == ERANGE)
|
|
|
|
ereturn(escontext, 0,
|
|
|
|
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
|
|
|
|
errmsg("value \"%s\" is out of range for type %s",
|
|
|
|
s, typname)));
|
|
|
|
|
|
|
|
if (endloc)
|
|
|
|
{
|
|
|
|
/* caller wants to deal with rest of string */
|
|
|
|
*endloc = endptr;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* allow only whitespace after number */
|
|
|
|
while (*endptr && isspace((unsigned char) *endptr))
|
|
|
|
endptr++;
|
|
|
|
if (*endptr)
|
|
|
|
ereturn(escontext, 0,
|
|
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
|
|
|
errmsg("invalid input syntax for type %s: \"%s\"",
|
|
|
|
typname, s)));
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*
|
2010-11-20 04:13:11 +01:00
|
|
|
* pg_itoa: converts a signed 16-bit integer to its string representation
|
2020-06-13 02:32:00 +02:00
|
|
|
* and returns strlen(a).
|
2010-11-20 04:13:11 +01:00
|
|
|
*
|
|
|
|
* Caller must ensure that 'a' points to enough memory to hold the result
|
|
|
|
* (at least 7 bytes, counting a leading sign and trailing NUL).
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2010-11-20 04:13:11 +01:00
|
|
|
* It doesn't seem worth implementing this separately.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2020-06-13 02:32:00 +02:00
|
|
|
int
|
2000-08-01 20:29:35 +02:00
|
|
|
pg_itoa(int16 i, char *a)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2020-06-13 02:32:00 +02:00
|
|
|
return pg_ltoa((int32) i, a);
|
2010-11-20 04:13:11 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2020-02-01 22:57:14 +01:00
|
|
|
* pg_ultoa_n: converts an unsigned 32-bit integer to its string representation,
|
|
|
|
* not NUL-terminated, and returns the length of that string representation
|
2010-11-20 04:13:11 +01:00
|
|
|
*
|
2020-02-01 22:57:14 +01:00
|
|
|
* Caller must ensure that 'a' points to enough memory to hold the result (at
|
|
|
|
* least 10 bytes)
|
2010-11-20 04:13:11 +01:00
|
|
|
*/
|
2020-02-01 22:57:14 +01:00
|
|
|
int
|
|
|
|
pg_ultoa_n(uint32 value, char *a)
|
2010-11-20 04:13:11 +01:00
|
|
|
{
|
2020-02-01 22:57:14 +01:00
|
|
|
int olength,
|
|
|
|
i = 0;
|
2010-11-20 04:13:11 +01:00
|
|
|
|
2020-02-01 22:57:14 +01:00
|
|
|
/* Degenerate case */
|
|
|
|
if (value == 0)
|
2010-11-20 04:13:11 +01:00
|
|
|
{
|
2020-02-01 22:57:14 +01:00
|
|
|
*a = '0';
|
|
|
|
return 1;
|
2010-11-20 04:13:11 +01:00
|
|
|
}
|
|
|
|
|
2020-02-01 22:57:14 +01:00
|
|
|
olength = decimalLength32(value);
|
|
|
|
|
|
|
|
/* Compute the result string. */
|
|
|
|
while (value >= 10000)
|
2010-11-20 04:13:11 +01:00
|
|
|
{
|
2020-02-01 22:57:14 +01:00
|
|
|
const uint32 c = value - 10000 * (value / 10000);
|
|
|
|
const uint32 c0 = (c % 100) << 1;
|
|
|
|
const uint32 c1 = (c / 100) << 1;
|
2010-11-20 18:09:36 +01:00
|
|
|
|
2020-02-01 22:57:14 +01:00
|
|
|
char *pos = a + olength - i;
|
2010-11-20 18:09:36 +01:00
|
|
|
|
2020-02-01 22:57:14 +01:00
|
|
|
value /= 10000;
|
2010-11-20 04:13:11 +01:00
|
|
|
|
2020-02-01 22:57:14 +01:00
|
|
|
memcpy(pos - 2, DIGIT_TABLE + c0, 2);
|
|
|
|
memcpy(pos - 4, DIGIT_TABLE + c1, 2);
|
|
|
|
i += 4;
|
|
|
|
}
|
|
|
|
if (value >= 100)
|
|
|
|
{
|
|
|
|
const uint32 c = (value % 100) << 1;
|
|
|
|
|
|
|
|
char *pos = a + olength - i;
|
|
|
|
|
|
|
|
value /= 100;
|
2010-11-20 04:13:11 +01:00
|
|
|
|
2020-02-01 22:57:14 +01:00
|
|
|
memcpy(pos - 2, DIGIT_TABLE + c, 2);
|
|
|
|
i += 2;
|
|
|
|
}
|
|
|
|
if (value >= 10)
|
2010-11-20 04:13:11 +01:00
|
|
|
{
|
2020-02-01 22:57:14 +01:00
|
|
|
const uint32 c = value << 1;
|
|
|
|
|
|
|
|
char *pos = a + olength - i;
|
2010-11-20 18:09:36 +01:00
|
|
|
|
2020-02-01 22:57:14 +01:00
|
|
|
memcpy(pos - 2, DIGIT_TABLE + c, 2);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
*a = (char) ('0' + value);
|
2010-11-20 04:13:11 +01:00
|
|
|
}
|
2020-02-01 22:57:14 +01:00
|
|
|
|
|
|
|
return olength;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2020-06-13 02:32:00 +02:00
|
|
|
* pg_ltoa: converts a signed 32-bit integer to its string representation and
|
|
|
|
* returns strlen(a).
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2020-02-01 22:57:14 +01:00
|
|
|
* It is the caller's responsibility to ensure that a is at least 12 bytes long,
|
|
|
|
* which is enough room to hold a minus sign, a maximally long int32, and the
|
|
|
|
* above terminating NUL.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2020-06-13 02:32:00 +02:00
|
|
|
int
|
2020-02-01 22:57:14 +01:00
|
|
|
pg_ltoa(int32 value, char *a)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2020-02-01 22:57:14 +01:00
|
|
|
uint32 uvalue = (uint32) value;
|
2020-06-13 02:32:00 +02:00
|
|
|
int len = 0;
|
2020-02-01 22:57:14 +01:00
|
|
|
|
|
|
|
if (value < 0)
|
2010-11-20 04:13:11 +01:00
|
|
|
{
|
2020-02-01 22:57:14 +01:00
|
|
|
uvalue = (uint32) 0 - uvalue;
|
2020-06-13 02:32:00 +02:00
|
|
|
a[len++] = '-';
|
2010-11-20 04:13:11 +01:00
|
|
|
}
|
2020-06-13 02:32:00 +02:00
|
|
|
len += pg_ultoa_n(uvalue, a + len);
|
2020-02-01 22:57:14 +01:00
|
|
|
a[len] = '\0';
|
2020-06-13 02:32:00 +02:00
|
|
|
return len;
|
2020-02-01 22:57:14 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the decimal representation, not NUL-terminated, and return the length of
|
|
|
|
* same. Caller must ensure that a points to at least MAXINT8LEN bytes.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
pg_ulltoa_n(uint64 value, char *a)
|
|
|
|
{
|
|
|
|
int olength,
|
|
|
|
i = 0;
|
|
|
|
uint32 value2;
|
|
|
|
|
|
|
|
/* Degenerate case */
|
|
|
|
if (value == 0)
|
2010-11-20 04:13:11 +01:00
|
|
|
{
|
2020-02-01 22:57:14 +01:00
|
|
|
*a = '0';
|
|
|
|
return 1;
|
2010-11-20 04:13:11 +01:00
|
|
|
}
|
|
|
|
|
2020-02-01 22:57:14 +01:00
|
|
|
olength = decimalLength64(value);
|
|
|
|
|
|
|
|
/* Compute the result string. */
|
|
|
|
while (value >= 100000000)
|
2010-11-20 04:13:11 +01:00
|
|
|
{
|
2020-02-01 22:57:14 +01:00
|
|
|
const uint64 q = value / 100000000;
|
2022-10-05 10:01:41 +02:00
|
|
|
uint32 value3 = (uint32) (value - 100000000 * q);
|
2010-11-20 18:09:36 +01:00
|
|
|
|
2022-10-05 10:01:41 +02:00
|
|
|
const uint32 c = value3 % 10000;
|
|
|
|
const uint32 d = value3 / 10000;
|
2020-02-01 22:57:14 +01:00
|
|
|
const uint32 c0 = (c % 100) << 1;
|
|
|
|
const uint32 c1 = (c / 100) << 1;
|
|
|
|
const uint32 d0 = (d % 100) << 1;
|
|
|
|
const uint32 d1 = (d / 100) << 1;
|
2010-11-20 04:13:11 +01:00
|
|
|
|
2020-02-01 22:57:14 +01:00
|
|
|
char *pos = a + olength - i;
|
|
|
|
|
|
|
|
value = q;
|
|
|
|
|
|
|
|
memcpy(pos - 2, DIGIT_TABLE + c0, 2);
|
|
|
|
memcpy(pos - 4, DIGIT_TABLE + c1, 2);
|
|
|
|
memcpy(pos - 6, DIGIT_TABLE + d0, 2);
|
|
|
|
memcpy(pos - 8, DIGIT_TABLE + d1, 2);
|
|
|
|
i += 8;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Switch to 32-bit for speed */
|
|
|
|
value2 = (uint32) value;
|
2010-11-20 04:13:11 +01:00
|
|
|
|
2020-02-01 22:57:14 +01:00
|
|
|
if (value2 >= 10000)
|
|
|
|
{
|
|
|
|
const uint32 c = value2 - 10000 * (value2 / 10000);
|
|
|
|
const uint32 c0 = (c % 100) << 1;
|
|
|
|
const uint32 c1 = (c / 100) << 1;
|
|
|
|
|
|
|
|
char *pos = a + olength - i;
|
2010-11-20 04:13:11 +01:00
|
|
|
|
2020-02-01 22:57:14 +01:00
|
|
|
value2 /= 10000;
|
|
|
|
|
|
|
|
memcpy(pos - 2, DIGIT_TABLE + c0, 2);
|
|
|
|
memcpy(pos - 4, DIGIT_TABLE + c1, 2);
|
|
|
|
i += 4;
|
|
|
|
}
|
|
|
|
if (value2 >= 100)
|
2010-11-20 04:13:11 +01:00
|
|
|
{
|
2020-02-01 22:57:14 +01:00
|
|
|
const uint32 c = (value2 % 100) << 1;
|
|
|
|
char *pos = a + olength - i;
|
2010-11-20 18:09:36 +01:00
|
|
|
|
2020-02-01 22:57:14 +01:00
|
|
|
value2 /= 100;
|
|
|
|
|
|
|
|
memcpy(pos - 2, DIGIT_TABLE + c, 2);
|
|
|
|
i += 2;
|
2010-11-20 04:13:11 +01:00
|
|
|
}
|
2020-02-01 22:57:14 +01:00
|
|
|
if (value2 >= 10)
|
|
|
|
{
|
|
|
|
const uint32 c = value2 << 1;
|
|
|
|
char *pos = a + olength - i;
|
|
|
|
|
|
|
|
memcpy(pos - 2, DIGIT_TABLE + c, 2);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
*a = (char) ('0' + value2);
|
|
|
|
|
|
|
|
return olength;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2020-06-13 02:32:00 +02:00
|
|
|
* pg_lltoa: converts a signed 64-bit integer to its string representation and
|
|
|
|
* returns strlen(a).
|
2020-02-01 22:57:14 +01:00
|
|
|
*
|
|
|
|
* Caller must ensure that 'a' points to enough memory to hold the result
|
|
|
|
* (at least MAXINT8LEN + 1 bytes, counting a leading sign and trailing NUL).
|
|
|
|
*/
|
2020-06-13 02:32:00 +02:00
|
|
|
int
|
2020-02-01 22:57:14 +01:00
|
|
|
pg_lltoa(int64 value, char *a)
|
|
|
|
{
|
|
|
|
uint64 uvalue = value;
|
2020-06-13 02:32:00 +02:00
|
|
|
int len = 0;
|
2020-02-01 22:57:14 +01:00
|
|
|
|
|
|
|
if (value < 0)
|
|
|
|
{
|
|
|
|
uvalue = (uint64) 0 - uvalue;
|
2020-06-13 02:32:00 +02:00
|
|
|
a[len++] = '-';
|
2020-02-01 22:57:14 +01:00
|
|
|
}
|
2020-06-13 02:32:00 +02:00
|
|
|
|
|
|
|
len += pg_ulltoa_n(uvalue, a + len);
|
|
|
|
a[len] = '\0';
|
|
|
|
return len;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
2016-02-07 05:11:28 +01:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
2020-02-01 22:57:14 +01:00
|
|
|
* pg_ultostr_zeropad
|
2016-02-07 05:11:28 +01:00
|
|
|
* Converts 'value' into a decimal string representation stored at 'str'.
|
|
|
|
* 'minwidth' specifies the minimum width of the result; any extra space
|
|
|
|
* is filled up by prefixing the number with zeros.
|
|
|
|
*
|
|
|
|
* Returns the ending address of the string result (the last character written
|
|
|
|
* plus 1). Note that no NUL terminator is written.
|
|
|
|
*
|
|
|
|
* The intended use-case for this function is to build strings that contain
|
|
|
|
* multiple individual numbers, for example:
|
|
|
|
*
|
2020-06-09 08:43:15 +02:00
|
|
|
* str = pg_ultostr_zeropad(str, hours, 2);
|
2016-02-07 05:11:28 +01:00
|
|
|
* *str++ = ':';
|
2020-06-09 08:43:15 +02:00
|
|
|
* str = pg_ultostr_zeropad(str, mins, 2);
|
2016-02-07 05:11:28 +01:00
|
|
|
* *str++ = ':';
|
2020-06-09 08:43:15 +02:00
|
|
|
* str = pg_ultostr_zeropad(str, secs, 2);
|
2016-02-07 05:11:28 +01:00
|
|
|
* *str = '\0';
|
|
|
|
*
|
|
|
|
* Note: Caller must ensure that 'str' points to enough memory to hold the
|
|
|
|
* result.
|
|
|
|
*/
|
|
|
|
char *
|
2020-02-01 22:57:14 +01:00
|
|
|
pg_ultostr_zeropad(char *str, uint32 value, int32 minwidth)
|
2016-02-07 05:11:28 +01:00
|
|
|
{
|
2020-02-01 22:57:14 +01:00
|
|
|
int len;
|
2016-02-07 05:11:28 +01:00
|
|
|
|
|
|
|
Assert(minwidth > 0);
|
|
|
|
|
2020-02-01 22:57:14 +01:00
|
|
|
if (value < 100 && minwidth == 2) /* Short cut for common case */
|
2016-02-07 05:11:28 +01:00
|
|
|
{
|
2020-02-01 22:57:14 +01:00
|
|
|
memcpy(str, DIGIT_TABLE + value * 2, 2);
|
|
|
|
return str + 2;
|
2016-02-07 05:11:28 +01:00
|
|
|
}
|
|
|
|
|
2020-02-01 22:57:14 +01:00
|
|
|
len = pg_ultoa_n(value, str);
|
|
|
|
if (len >= minwidth)
|
|
|
|
return str + len;
|
2016-02-07 05:11:28 +01:00
|
|
|
|
2020-02-01 22:57:14 +01:00
|
|
|
memmove(str + minwidth - len, str, len);
|
|
|
|
memset(str, '0', minwidth - len);
|
|
|
|
return str + minwidth;
|
2016-02-07 05:11:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2020-06-09 08:43:15 +02:00
|
|
|
* pg_ultostr
|
2016-02-07 05:11:28 +01:00
|
|
|
* Converts 'value' into a decimal string representation stored at 'str'.
|
|
|
|
*
|
|
|
|
* Returns the ending address of the string result (the last character written
|
|
|
|
* plus 1). Note that no NUL terminator is written.
|
|
|
|
*
|
|
|
|
* The intended use-case for this function is to build strings that contain
|
|
|
|
* multiple individual numbers, for example:
|
|
|
|
*
|
2020-06-09 08:43:15 +02:00
|
|
|
* str = pg_ultostr(str, a);
|
2016-02-07 05:11:28 +01:00
|
|
|
* *str++ = ' ';
|
2020-06-09 08:43:15 +02:00
|
|
|
* str = pg_ultostr(str, b);
|
2016-02-07 05:11:28 +01:00
|
|
|
* *str = '\0';
|
|
|
|
*
|
|
|
|
* Note: Caller must ensure that 'str' points to enough memory to hold the
|
|
|
|
* result.
|
|
|
|
*/
|
|
|
|
char *
|
2020-02-01 22:57:14 +01:00
|
|
|
pg_ultostr(char *str, uint32 value)
|
2016-02-07 05:11:28 +01:00
|
|
|
{
|
2020-02-01 22:57:14 +01:00
|
|
|
int len = pg_ultoa_n(value, str);
|
2016-02-07 05:11:28 +01:00
|
|
|
|
2020-02-01 22:57:14 +01:00
|
|
|
return str + len;
|
2016-02-07 05:11:28 +01:00
|
|
|
}
|