1315 lines
30 KiB
C
1315 lines
30 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* numutils.c
|
|
* utility functions for I/O of built-in numeric types.
|
|
*
|
|
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/utils/adt/numutils.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include <math.h>
|
|
#include <limits.h>
|
|
#include <ctype.h>
|
|
|
|
#include "port/pg_bitutils.h"
|
|
#include "utils/builtins.h"
|
|
|
|
/*
|
|
* A table of all two-digit numbers. This is used to speed up decimal digit
|
|
* generation by copying pairs of digits into the final output.
|
|
*/
|
|
static const char DIGIT_TABLE[200] =
|
|
"00" "01" "02" "03" "04" "05" "06" "07" "08" "09"
|
|
"10" "11" "12" "13" "14" "15" "16" "17" "18" "19"
|
|
"20" "21" "22" "23" "24" "25" "26" "27" "28" "29"
|
|
"30" "31" "32" "33" "34" "35" "36" "37" "38" "39"
|
|
"40" "41" "42" "43" "44" "45" "46" "47" "48" "49"
|
|
"50" "51" "52" "53" "54" "55" "56" "57" "58" "59"
|
|
"60" "61" "62" "63" "64" "65" "66" "67" "68" "69"
|
|
"70" "71" "72" "73" "74" "75" "76" "77" "78" "79"
|
|
"80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
|
|
"90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
|
|
|
|
/*
|
|
* Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
|
|
*/
|
|
static inline int
|
|
decimalLength32(const uint32 v)
|
|
{
|
|
int t;
|
|
static const uint32 PowersOfTen[] = {
|
|
1, 10, 100,
|
|
1000, 10000, 100000,
|
|
1000000, 10000000, 100000000,
|
|
1000000000
|
|
};
|
|
|
|
/*
|
|
* Compute base-10 logarithm by dividing the base-2 logarithm by a
|
|
* good-enough approximation of the base-2 logarithm of 10
|
|
*/
|
|
t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
|
|
return t + (v >= PowersOfTen[t]);
|
|
}
|
|
|
|
static inline int
|
|
decimalLength64(const uint64 v)
|
|
{
|
|
int t;
|
|
static const uint64 PowersOfTen[] = {
|
|
UINT64CONST(1), UINT64CONST(10),
|
|
UINT64CONST(100), UINT64CONST(1000),
|
|
UINT64CONST(10000), UINT64CONST(100000),
|
|
UINT64CONST(1000000), UINT64CONST(10000000),
|
|
UINT64CONST(100000000), UINT64CONST(1000000000),
|
|
UINT64CONST(10000000000), UINT64CONST(100000000000),
|
|
UINT64CONST(1000000000000), UINT64CONST(10000000000000),
|
|
UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
|
|
UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
|
|
UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
|
|
};
|
|
|
|
/*
|
|
* Compute base-10 logarithm by dividing the base-2 logarithm by a
|
|
* good-enough approximation of the base-2 logarithm of 10
|
|
*/
|
|
t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
|
|
return t + (v >= PowersOfTen[t]);
|
|
}
|
|
|
|
static const int8 hexlookup[128] = {
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
|
|
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
};
|
|
|
|
/*
|
|
* Convert input string to a signed 16 bit integer. Input strings may be
|
|
* expressed in base-10, hexadecimal, octal, or binary format, all of which
|
|
* can be prefixed by an optional sign character, either '+' (the default) or
|
|
* '-' for negative numbers. Hex strings are recognized by the digits being
|
|
* prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
|
|
* prefix. The binary representation is recognized by the 0b or 0B prefix.
|
|
*
|
|
* Allows any number of leading or trailing whitespace characters. Digits may
|
|
* optionally be separated by a single underscore character. These can only
|
|
* come between digits and not before or after the digits. Underscores have
|
|
* no effect on the return value and are supported only to assist in improving
|
|
* the human readability of the input strings.
|
|
*
|
|
* pg_strtoint16() will throw ereport() upon bad input format or overflow;
|
|
* while pg_strtoint16_safe() instead returns such complaints in *escontext,
|
|
* if it's an ErrorSaveContext.
|
|
*
|
|
* NB: Accumulate input as an unsigned number, to deal with two's complement
|
|
* representation of the most negative number, which can't be represented as a
|
|
* signed positive number.
|
|
*/
|
|
int16
|
|
pg_strtoint16(const char *s)
|
|
{
|
|
return pg_strtoint16_safe(s, NULL);
|
|
}
|
|
|
|
int16
|
|
pg_strtoint16_safe(const char *s, Node *escontext)
|
|
{
|
|
const char *ptr = s;
|
|
const char *firstdigit;
|
|
uint16 tmp = 0;
|
|
bool neg = false;
|
|
unsigned char digit;
|
|
|
|
/*
|
|
* The majority of cases are likely to be base-10 digits without any
|
|
* underscore separator characters. We'll first try to parse the string
|
|
* with the assumption that's the case and only fallback on a slower
|
|
* implementation which handles hex, octal and binary strings and
|
|
* underscores if the fastpath version cannot parse the string.
|
|
*/
|
|
|
|
/* leave it up to the slow path to look for leading spaces */
|
|
|
|
if (*ptr == '-')
|
|
{
|
|
ptr++;
|
|
neg = true;
|
|
}
|
|
|
|
/* a leading '+' is uncommon so leave that for the slow path */
|
|
|
|
/* process the first digit */
|
|
digit = (*ptr - '0');
|
|
|
|
/*
|
|
* Exploit unsigned arithmetic to save having to check both the upper and
|
|
* lower bounds of the digit.
|
|
*/
|
|
if (likely(digit < 10))
|
|
{
|
|
ptr++;
|
|
tmp = digit;
|
|
}
|
|
else
|
|
{
|
|
/* we need at least one digit */
|
|
goto slow;
|
|
}
|
|
|
|
/* process remaining digits */
|
|
for (;;)
|
|
{
|
|
digit = (*ptr - '0');
|
|
|
|
if (digit >= 10)
|
|
break;
|
|
|
|
ptr++;
|
|
|
|
if (unlikely(tmp > -(PG_INT16_MIN / 10)))
|
|
goto out_of_range;
|
|
|
|
tmp = tmp * 10 + digit;
|
|
}
|
|
|
|
/* when the string does not end in a digit, let the slow path handle it */
|
|
if (unlikely(*ptr != '\0'))
|
|
goto slow;
|
|
|
|
if (neg)
|
|
{
|
|
/* check the negative equivalent will fit without overflowing */
|
|
if (unlikely(tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1))
|
|
goto out_of_range;
|
|
return -((int16) tmp);
|
|
}
|
|
|
|
if (unlikely(tmp > PG_INT16_MAX))
|
|
goto out_of_range;
|
|
|
|
return (int16) tmp;
|
|
|
|
slow:
|
|
tmp = 0;
|
|
ptr = s;
|
|
/* no need to reset neg */
|
|
|
|
/* skip leading spaces */
|
|
while (isspace((unsigned char) *ptr))
|
|
ptr++;
|
|
|
|
/* handle sign */
|
|
if (*ptr == '-')
|
|
{
|
|
ptr++;
|
|
neg = true;
|
|
}
|
|
else if (*ptr == '+')
|
|
ptr++;
|
|
|
|
/* process digits */
|
|
if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
|
|
{
|
|
firstdigit = ptr += 2;
|
|
|
|
for (;;)
|
|
{
|
|
if (isxdigit((unsigned char) *ptr))
|
|
{
|
|
if (unlikely(tmp > -(PG_INT16_MIN / 16)))
|
|
goto out_of_range;
|
|
|
|
tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
|
|
}
|
|
else if (*ptr == '_')
|
|
{
|
|
/* underscore must be followed by more digits */
|
|
ptr++;
|
|
if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
|
|
goto invalid_syntax;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
|
|
{
|
|
firstdigit = ptr += 2;
|
|
|
|
for (;;)
|
|
{
|
|
if (*ptr >= '0' && *ptr <= '7')
|
|
{
|
|
if (unlikely(tmp > -(PG_INT16_MIN / 8)))
|
|
goto out_of_range;
|
|
|
|
tmp = tmp * 8 + (*ptr++ - '0');
|
|
}
|
|
else if (*ptr == '_')
|
|
{
|
|
/* underscore must be followed by more digits */
|
|
ptr++;
|
|
if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
|
|
goto invalid_syntax;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
|
|
{
|
|
firstdigit = ptr += 2;
|
|
|
|
for (;;)
|
|
{
|
|
if (*ptr >= '0' && *ptr <= '1')
|
|
{
|
|
if (unlikely(tmp > -(PG_INT16_MIN / 2)))
|
|
goto out_of_range;
|
|
|
|
tmp = tmp * 2 + (*ptr++ - '0');
|
|
}
|
|
else if (*ptr == '_')
|
|
{
|
|
/* underscore must be followed by more digits */
|
|
ptr++;
|
|
if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
|
|
goto invalid_syntax;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
firstdigit = ptr;
|
|
|
|
for (;;)
|
|
{
|
|
if (*ptr >= '0' && *ptr <= '9')
|
|
{
|
|
if (unlikely(tmp > -(PG_INT16_MIN / 10)))
|
|
goto out_of_range;
|
|
|
|
tmp = tmp * 10 + (*ptr++ - '0');
|
|
}
|
|
else if (*ptr == '_')
|
|
{
|
|
/* underscore may not be first */
|
|
if (unlikely(ptr == firstdigit))
|
|
goto invalid_syntax;
|
|
/* and it must be followed by more digits */
|
|
ptr++;
|
|
if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
|
|
goto invalid_syntax;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* require at least one digit */
|
|
if (unlikely(ptr == firstdigit))
|
|
goto invalid_syntax;
|
|
|
|
/* allow trailing whitespace, but not other trailing chars */
|
|
while (isspace((unsigned char) *ptr))
|
|
ptr++;
|
|
|
|
if (unlikely(*ptr != '\0'))
|
|
goto invalid_syntax;
|
|
|
|
if (neg)
|
|
{
|
|
/* check the negative equivalent will fit without overflowing */
|
|
if (tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1)
|
|
goto out_of_range;
|
|
return -((int16) tmp);
|
|
}
|
|
|
|
if (tmp > PG_INT16_MAX)
|
|
goto out_of_range;
|
|
|
|
return (int16) tmp;
|
|
|
|
out_of_range:
|
|
ereturn(escontext, 0,
|
|
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
|
|
errmsg("value \"%s\" is out of range for type %s",
|
|
s, "smallint")));
|
|
|
|
invalid_syntax:
|
|
ereturn(escontext, 0,
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
|
errmsg("invalid input syntax for type %s: \"%s\"",
|
|
"smallint", s)));
|
|
}
|
|
|
|
/*
|
|
* Convert input string to a signed 32 bit integer. Input strings may be
|
|
* expressed in base-10, hexadecimal, octal, or binary format, all of which
|
|
* can be prefixed by an optional sign character, either '+' (the default) or
|
|
* '-' for negative numbers. Hex strings are recognized by the digits being
|
|
* prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
|
|
* prefix. The binary representation is recognized by the 0b or 0B prefix.
|
|
*
|
|
* Allows any number of leading or trailing whitespace characters. Digits may
|
|
* optionally be separated by a single underscore character. These can only
|
|
* come between digits and not before or after the digits. Underscores have
|
|
* no effect on the return value and are supported only to assist in improving
|
|
* the human readability of the input strings.
|
|
*
|
|
* pg_strtoint32() will throw ereport() upon bad input format or overflow;
|
|
* while pg_strtoint32_safe() instead returns such complaints in *escontext,
|
|
* if it's an ErrorSaveContext.
|
|
*
|
|
* NB: Accumulate input as an unsigned number, to deal with two's complement
|
|
* representation of the most negative number, which can't be represented as a
|
|
* signed positive number.
|
|
*/
|
|
int32
|
|
pg_strtoint32(const char *s)
|
|
{
|
|
return pg_strtoint32_safe(s, NULL);
|
|
}
|
|
|
|
int32
|
|
pg_strtoint32_safe(const char *s, Node *escontext)
|
|
{
|
|
const char *ptr = s;
|
|
const char *firstdigit;
|
|
uint32 tmp = 0;
|
|
bool neg = false;
|
|
unsigned char digit;
|
|
|
|
/*
|
|
* The majority of cases are likely to be base-10 digits without any
|
|
* underscore separator characters. We'll first try to parse the string
|
|
* with the assumption that's the case and only fallback on a slower
|
|
* implementation which handles hex, octal and binary strings and
|
|
* underscores if the fastpath version cannot parse the string.
|
|
*/
|
|
|
|
/* leave it up to the slow path to look for leading spaces */
|
|
|
|
if (*ptr == '-')
|
|
{
|
|
ptr++;
|
|
neg = true;
|
|
}
|
|
|
|
/* a leading '+' is uncommon so leave that for the slow path */
|
|
|
|
/* process the first digit */
|
|
digit = (*ptr - '0');
|
|
|
|
/*
|
|
* Exploit unsigned arithmetic to save having to check both the upper and
|
|
* lower bounds of the digit.
|
|
*/
|
|
if (likely(digit < 10))
|
|
{
|
|
ptr++;
|
|
tmp = digit;
|
|
}
|
|
else
|
|
{
|
|
/* we need at least one digit */
|
|
goto slow;
|
|
}
|
|
|
|
/* process remaining digits */
|
|
for (;;)
|
|
{
|
|
digit = (*ptr - '0');
|
|
|
|
if (digit >= 10)
|
|
break;
|
|
|
|
ptr++;
|
|
|
|
if (unlikely(tmp > -(PG_INT32_MIN / 10)))
|
|
goto out_of_range;
|
|
|
|
tmp = tmp * 10 + digit;
|
|
}
|
|
|
|
/* when the string does not end in a digit, let the slow path handle it */
|
|
if (unlikely(*ptr != '\0'))
|
|
goto slow;
|
|
|
|
if (neg)
|
|
{
|
|
/* check the negative equivalent will fit without overflowing */
|
|
if (unlikely(tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1))
|
|
goto out_of_range;
|
|
return -((int32) tmp);
|
|
}
|
|
|
|
if (unlikely(tmp > PG_INT32_MAX))
|
|
goto out_of_range;
|
|
|
|
return (int32) tmp;
|
|
|
|
slow:
|
|
tmp = 0;
|
|
ptr = s;
|
|
/* no need to reset neg */
|
|
|
|
/* skip leading spaces */
|
|
while (isspace((unsigned char) *ptr))
|
|
ptr++;
|
|
|
|
/* handle sign */
|
|
if (*ptr == '-')
|
|
{
|
|
ptr++;
|
|
neg = true;
|
|
}
|
|
else if (*ptr == '+')
|
|
ptr++;
|
|
|
|
/* process digits */
|
|
if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
|
|
{
|
|
firstdigit = ptr += 2;
|
|
|
|
for (;;)
|
|
{
|
|
if (isxdigit((unsigned char) *ptr))
|
|
{
|
|
if (unlikely(tmp > -(PG_INT32_MIN / 16)))
|
|
goto out_of_range;
|
|
|
|
tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
|
|
}
|
|
else if (*ptr == '_')
|
|
{
|
|
/* underscore must be followed by more digits */
|
|
ptr++;
|
|
if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
|
|
goto invalid_syntax;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
|
|
{
|
|
firstdigit = ptr += 2;
|
|
|
|
for (;;)
|
|
{
|
|
if (*ptr >= '0' && *ptr <= '7')
|
|
{
|
|
if (unlikely(tmp > -(PG_INT32_MIN / 8)))
|
|
goto out_of_range;
|
|
|
|
tmp = tmp * 8 + (*ptr++ - '0');
|
|
}
|
|
else if (*ptr == '_')
|
|
{
|
|
/* underscore must be followed by more digits */
|
|
ptr++;
|
|
if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
|
|
goto invalid_syntax;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
|
|
{
|
|
firstdigit = ptr += 2;
|
|
|
|
for (;;)
|
|
{
|
|
if (*ptr >= '0' && *ptr <= '1')
|
|
{
|
|
if (unlikely(tmp > -(PG_INT32_MIN / 2)))
|
|
goto out_of_range;
|
|
|
|
tmp = tmp * 2 + (*ptr++ - '0');
|
|
}
|
|
else if (*ptr == '_')
|
|
{
|
|
/* underscore must be followed by more digits */
|
|
ptr++;
|
|
if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
|
|
goto invalid_syntax;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
firstdigit = ptr;
|
|
|
|
for (;;)
|
|
{
|
|
if (*ptr >= '0' && *ptr <= '9')
|
|
{
|
|
if (unlikely(tmp > -(PG_INT32_MIN / 10)))
|
|
goto out_of_range;
|
|
|
|
tmp = tmp * 10 + (*ptr++ - '0');
|
|
}
|
|
else if (*ptr == '_')
|
|
{
|
|
/* underscore may not be first */
|
|
if (unlikely(ptr == firstdigit))
|
|
goto invalid_syntax;
|
|
/* and it must be followed by more digits */
|
|
ptr++;
|
|
if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
|
|
goto invalid_syntax;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* require at least one digit */
|
|
if (unlikely(ptr == firstdigit))
|
|
goto invalid_syntax;
|
|
|
|
/* allow trailing whitespace, but not other trailing chars */
|
|
while (isspace((unsigned char) *ptr))
|
|
ptr++;
|
|
|
|
if (unlikely(*ptr != '\0'))
|
|
goto invalid_syntax;
|
|
|
|
if (neg)
|
|
{
|
|
/* check the negative equivalent will fit without overflowing */
|
|
if (tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1)
|
|
goto out_of_range;
|
|
return -((int32) tmp);
|
|
}
|
|
|
|
if (tmp > PG_INT32_MAX)
|
|
goto out_of_range;
|
|
|
|
return (int32) tmp;
|
|
|
|
out_of_range:
|
|
ereturn(escontext, 0,
|
|
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
|
|
errmsg("value \"%s\" is out of range for type %s",
|
|
s, "integer")));
|
|
|
|
invalid_syntax:
|
|
ereturn(escontext, 0,
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
|
errmsg("invalid input syntax for type %s: \"%s\"",
|
|
"integer", s)));
|
|
}
|
|
|
|
/*
|
|
* Convert input string to a signed 64 bit integer. Input strings may be
|
|
* expressed in base-10, hexadecimal, octal, or binary format, all of which
|
|
* can be prefixed by an optional sign character, either '+' (the default) or
|
|
* '-' for negative numbers. Hex strings are recognized by the digits being
|
|
* prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
|
|
* prefix. The binary representation is recognized by the 0b or 0B prefix.
|
|
*
|
|
* Allows any number of leading or trailing whitespace characters. Digits may
|
|
* optionally be separated by a single underscore character. These can only
|
|
* come between digits and not before or after the digits. Underscores have
|
|
* no effect on the return value and are supported only to assist in improving
|
|
* the human readability of the input strings.
|
|
*
|
|
* pg_strtoint64() will throw ereport() upon bad input format or overflow;
|
|
* while pg_strtoint64_safe() instead returns such complaints in *escontext,
|
|
* if it's an ErrorSaveContext.
|
|
*
|
|
* NB: Accumulate input as an unsigned number, to deal with two's complement
|
|
* representation of the most negative number, which can't be represented as a
|
|
* signed positive number.
|
|
*/
|
|
int64
|
|
pg_strtoint64(const char *s)
|
|
{
|
|
return pg_strtoint64_safe(s, NULL);
|
|
}
|
|
|
|
int64
|
|
pg_strtoint64_safe(const char *s, Node *escontext)
|
|
{
|
|
const char *ptr = s;
|
|
const char *firstdigit;
|
|
uint64 tmp = 0;
|
|
bool neg = false;
|
|
unsigned char digit;
|
|
|
|
/*
|
|
* The majority of cases are likely to be base-10 digits without any
|
|
* underscore separator characters. We'll first try to parse the string
|
|
* with the assumption that's the case and only fallback on a slower
|
|
* implementation which handles hex, octal and binary strings and
|
|
* underscores if the fastpath version cannot parse the string.
|
|
*/
|
|
|
|
/* leave it up to the slow path to look for leading spaces */
|
|
|
|
if (*ptr == '-')
|
|
{
|
|
ptr++;
|
|
neg = true;
|
|
}
|
|
|
|
/* a leading '+' is uncommon so leave that for the slow path */
|
|
|
|
/* process the first digit */
|
|
digit = (*ptr - '0');
|
|
|
|
/*
|
|
* Exploit unsigned arithmetic to save having to check both the upper and
|
|
* lower bounds of the digit.
|
|
*/
|
|
if (likely(digit < 10))
|
|
{
|
|
ptr++;
|
|
tmp = digit;
|
|
}
|
|
else
|
|
{
|
|
/* we need at least one digit */
|
|
goto slow;
|
|
}
|
|
|
|
/* process remaining digits */
|
|
for (;;)
|
|
{
|
|
digit = (*ptr - '0');
|
|
|
|
if (digit >= 10)
|
|
break;
|
|
|
|
ptr++;
|
|
|
|
if (unlikely(tmp > -(PG_INT64_MIN / 10)))
|
|
goto out_of_range;
|
|
|
|
tmp = tmp * 10 + digit;
|
|
}
|
|
|
|
/* when the string does not end in a digit, let the slow path handle it */
|
|
if (unlikely(*ptr != '\0'))
|
|
goto slow;
|
|
|
|
if (neg)
|
|
{
|
|
/* check the negative equivalent will fit without overflowing */
|
|
if (unlikely(tmp > (uint64) (-(PG_INT64_MIN + 1)) + 1))
|
|
goto out_of_range;
|
|
return -((int64) tmp);
|
|
}
|
|
|
|
if (unlikely(tmp > PG_INT64_MAX))
|
|
goto out_of_range;
|
|
|
|
return (int64) tmp;
|
|
|
|
slow:
|
|
tmp = 0;
|
|
ptr = s;
|
|
/* no need to reset neg */
|
|
|
|
/* skip leading spaces */
|
|
while (isspace((unsigned char) *ptr))
|
|
ptr++;
|
|
|
|
/* handle sign */
|
|
if (*ptr == '-')
|
|
{
|
|
ptr++;
|
|
neg = true;
|
|
}
|
|
else if (*ptr == '+')
|
|
ptr++;
|
|
|
|
/* process digits */
|
|
if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
|
|
{
|
|
firstdigit = ptr += 2;
|
|
|
|
for (;;)
|
|
{
|
|
if (isxdigit((unsigned char) *ptr))
|
|
{
|
|
if (unlikely(tmp > -(PG_INT64_MIN / 16)))
|
|
goto out_of_range;
|
|
|
|
tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
|
|
}
|
|
else if (*ptr == '_')
|
|
{
|
|
/* underscore must be followed by more digits */
|
|
ptr++;
|
|
if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
|
|
goto invalid_syntax;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
|
|
{
|
|
firstdigit = ptr += 2;
|
|
|
|
for (;;)
|
|
{
|
|
if (*ptr >= '0' && *ptr <= '7')
|
|
{
|
|
if (unlikely(tmp > -(PG_INT64_MIN / 8)))
|
|
goto out_of_range;
|
|
|
|
tmp = tmp * 8 + (*ptr++ - '0');
|
|
}
|
|
else if (*ptr == '_')
|
|
{
|
|
/* underscore must be followed by more digits */
|
|
ptr++;
|
|
if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
|
|
goto invalid_syntax;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
|
|
{
|
|
firstdigit = ptr += 2;
|
|
|
|
for (;;)
|
|
{
|
|
if (*ptr >= '0' && *ptr <= '1')
|
|
{
|
|
if (unlikely(tmp > -(PG_INT64_MIN / 2)))
|
|
goto out_of_range;
|
|
|
|
tmp = tmp * 2 + (*ptr++ - '0');
|
|
}
|
|
else if (*ptr == '_')
|
|
{
|
|
/* underscore must be followed by more digits */
|
|
ptr++;
|
|
if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
|
|
goto invalid_syntax;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
firstdigit = ptr;
|
|
|
|
for (;;)
|
|
{
|
|
if (*ptr >= '0' && *ptr <= '9')
|
|
{
|
|
if (unlikely(tmp > -(PG_INT64_MIN / 10)))
|
|
goto out_of_range;
|
|
|
|
tmp = tmp * 10 + (*ptr++ - '0');
|
|
}
|
|
else if (*ptr == '_')
|
|
{
|
|
/* underscore may not be first */
|
|
if (unlikely(ptr == firstdigit))
|
|
goto invalid_syntax;
|
|
/* and it must be followed by more digits */
|
|
ptr++;
|
|
if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
|
|
goto invalid_syntax;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* require at least one digit */
|
|
if (unlikely(ptr == firstdigit))
|
|
goto invalid_syntax;
|
|
|
|
/* allow trailing whitespace, but not other trailing chars */
|
|
while (isspace((unsigned char) *ptr))
|
|
ptr++;
|
|
|
|
if (unlikely(*ptr != '\0'))
|
|
goto invalid_syntax;
|
|
|
|
if (neg)
|
|
{
|
|
/* check the negative equivalent will fit without overflowing */
|
|
if (tmp > (uint64) (-(PG_INT64_MIN + 1)) + 1)
|
|
goto out_of_range;
|
|
return -((int64) tmp);
|
|
}
|
|
|
|
if (tmp > PG_INT64_MAX)
|
|
goto out_of_range;
|
|
|
|
return (int64) tmp;
|
|
|
|
out_of_range:
|
|
ereturn(escontext, 0,
|
|
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
|
|
errmsg("value \"%s\" is out of range for type %s",
|
|
s, "bigint")));
|
|
|
|
invalid_syntax:
|
|
ereturn(escontext, 0,
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
|
errmsg("invalid input syntax for type %s: \"%s\"",
|
|
"bigint", s)));
|
|
}
|
|
|
|
/*
|
|
* Convert input string to an unsigned 32 bit integer.
|
|
*
|
|
* Allows any number of leading or trailing whitespace characters.
|
|
*
|
|
* If endloc isn't NULL, store a pointer to the rest of the string there,
|
|
* so that caller can parse the rest. Otherwise, it's an error if anything
|
|
* but whitespace follows.
|
|
*
|
|
* typname is what is reported in error messages.
|
|
*
|
|
* If escontext points to an ErrorSaveContext node, that is filled instead
|
|
* of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
|
|
* to detect errors.
|
|
*/
|
|
uint32
|
|
uint32in_subr(const char *s, char **endloc,
|
|
const char *typname, Node *escontext)
|
|
{
|
|
uint32 result;
|
|
unsigned long cvt;
|
|
char *endptr;
|
|
|
|
errno = 0;
|
|
cvt = strtoul(s, &endptr, 0);
|
|
|
|
/*
|
|
* strtoul() normally only sets ERANGE. On some systems it may also set
|
|
* EINVAL, which simply means it couldn't parse the input string. Be sure
|
|
* to report that the same way as the standard error indication (that
|
|
* endptr == s).
|
|
*/
|
|
if ((errno && errno != ERANGE) || endptr == s)
|
|
ereturn(escontext, 0,
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
|
errmsg("invalid input syntax for type %s: \"%s\"",
|
|
typname, s)));
|
|
|
|
if (errno == ERANGE)
|
|
ereturn(escontext, 0,
|
|
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
|
|
errmsg("value \"%s\" is out of range for type %s",
|
|
s, typname)));
|
|
|
|
if (endloc)
|
|
{
|
|
/* caller wants to deal with rest of string */
|
|
*endloc = endptr;
|
|
}
|
|
else
|
|
{
|
|
/* allow only whitespace after number */
|
|
while (*endptr && isspace((unsigned char) *endptr))
|
|
endptr++;
|
|
if (*endptr)
|
|
ereturn(escontext, 0,
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
|
errmsg("invalid input syntax for type %s: \"%s\"",
|
|
typname, s)));
|
|
}
|
|
|
|
result = (uint32) cvt;
|
|
|
|
/*
|
|
* Cope with possibility that unsigned long is wider than uint32, in which
|
|
* case strtoul will not raise an error for some values that are out of
|
|
* the range of uint32.
|
|
*
|
|
* For backwards compatibility, we want to accept inputs that are given
|
|
* with a minus sign, so allow the input value if it matches after either
|
|
* signed or unsigned extension to long.
|
|
*
|
|
* To ensure consistent results on 32-bit and 64-bit platforms, make sure
|
|
* the error message is the same as if strtoul() had returned ERANGE.
|
|
*/
|
|
#if PG_UINT32_MAX != ULONG_MAX
|
|
if (cvt != (unsigned long) result &&
|
|
cvt != (unsigned long) ((int) result))
|
|
ereturn(escontext, 0,
|
|
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
|
|
errmsg("value \"%s\" is out of range for type %s",
|
|
s, typname)));
|
|
#endif
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Convert input string to an unsigned 64 bit integer.
|
|
*
|
|
* Allows any number of leading or trailing whitespace characters.
|
|
*
|
|
* If endloc isn't NULL, store a pointer to the rest of the string there,
|
|
* so that caller can parse the rest. Otherwise, it's an error if anything
|
|
* but whitespace follows.
|
|
*
|
|
* typname is what is reported in error messages.
|
|
*
|
|
* If escontext points to an ErrorSaveContext node, that is filled instead
|
|
* of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
|
|
* to detect errors.
|
|
*/
|
|
uint64
|
|
uint64in_subr(const char *s, char **endloc,
|
|
const char *typname, Node *escontext)
|
|
{
|
|
uint64 result;
|
|
char *endptr;
|
|
|
|
errno = 0;
|
|
result = strtou64(s, &endptr, 0);
|
|
|
|
/*
|
|
* strtoul[l] normally only sets ERANGE. On some systems it may also set
|
|
* EINVAL, which simply means it couldn't parse the input string. Be sure
|
|
* to report that the same way as the standard error indication (that
|
|
* endptr == s).
|
|
*/
|
|
if ((errno && errno != ERANGE) || endptr == s)
|
|
ereturn(escontext, 0,
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
|
errmsg("invalid input syntax for type %s: \"%s\"",
|
|
typname, s)));
|
|
|
|
if (errno == ERANGE)
|
|
ereturn(escontext, 0,
|
|
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
|
|
errmsg("value \"%s\" is out of range for type %s",
|
|
s, typname)));
|
|
|
|
if (endloc)
|
|
{
|
|
/* caller wants to deal with rest of string */
|
|
*endloc = endptr;
|
|
}
|
|
else
|
|
{
|
|
/* allow only whitespace after number */
|
|
while (*endptr && isspace((unsigned char) *endptr))
|
|
endptr++;
|
|
if (*endptr)
|
|
ereturn(escontext, 0,
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
|
errmsg("invalid input syntax for type %s: \"%s\"",
|
|
typname, s)));
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* pg_itoa: converts a signed 16-bit integer to its string representation
|
|
* and returns strlen(a).
|
|
*
|
|
* Caller must ensure that 'a' points to enough memory to hold the result
|
|
* (at least 7 bytes, counting a leading sign and trailing NUL).
|
|
*
|
|
* It doesn't seem worth implementing this separately.
|
|
*/
|
|
int
|
|
pg_itoa(int16 i, char *a)
|
|
{
|
|
return pg_ltoa((int32) i, a);
|
|
}
|
|
|
|
/*
|
|
* pg_ultoa_n: converts an unsigned 32-bit integer to its string representation,
|
|
* not NUL-terminated, and returns the length of that string representation
|
|
*
|
|
* Caller must ensure that 'a' points to enough memory to hold the result (at
|
|
* least 10 bytes)
|
|
*/
|
|
int
|
|
pg_ultoa_n(uint32 value, char *a)
|
|
{
|
|
int olength,
|
|
i = 0;
|
|
|
|
/* Degenerate case */
|
|
if (value == 0)
|
|
{
|
|
*a = '0';
|
|
return 1;
|
|
}
|
|
|
|
olength = decimalLength32(value);
|
|
|
|
/* Compute the result string. */
|
|
while (value >= 10000)
|
|
{
|
|
const uint32 c = value - 10000 * (value / 10000);
|
|
const uint32 c0 = (c % 100) << 1;
|
|
const uint32 c1 = (c / 100) << 1;
|
|
|
|
char *pos = a + olength - i;
|
|
|
|
value /= 10000;
|
|
|
|
memcpy(pos - 2, DIGIT_TABLE + c0, 2);
|
|
memcpy(pos - 4, DIGIT_TABLE + c1, 2);
|
|
i += 4;
|
|
}
|
|
if (value >= 100)
|
|
{
|
|
const uint32 c = (value % 100) << 1;
|
|
|
|
char *pos = a + olength - i;
|
|
|
|
value /= 100;
|
|
|
|
memcpy(pos - 2, DIGIT_TABLE + c, 2);
|
|
i += 2;
|
|
}
|
|
if (value >= 10)
|
|
{
|
|
const uint32 c = value << 1;
|
|
|
|
char *pos = a + olength - i;
|
|
|
|
memcpy(pos - 2, DIGIT_TABLE + c, 2);
|
|
}
|
|
else
|
|
{
|
|
*a = (char) ('0' + value);
|
|
}
|
|
|
|
return olength;
|
|
}
|
|
|
|
/*
|
|
* pg_ltoa: converts a signed 32-bit integer to its string representation and
|
|
* returns strlen(a).
|
|
*
|
|
* It is the caller's responsibility to ensure that a is at least 12 bytes long,
|
|
* which is enough room to hold a minus sign, a maximally long int32, and the
|
|
* above terminating NUL.
|
|
*/
|
|
int
|
|
pg_ltoa(int32 value, char *a)
|
|
{
|
|
uint32 uvalue = (uint32) value;
|
|
int len = 0;
|
|
|
|
if (value < 0)
|
|
{
|
|
uvalue = (uint32) 0 - uvalue;
|
|
a[len++] = '-';
|
|
}
|
|
len += pg_ultoa_n(uvalue, a + len);
|
|
a[len] = '\0';
|
|
return len;
|
|
}
|
|
|
|
/*
|
|
* Get the decimal representation, not NUL-terminated, and return the length of
|
|
* same. Caller must ensure that a points to at least MAXINT8LEN bytes.
|
|
*/
|
|
int
|
|
pg_ulltoa_n(uint64 value, char *a)
|
|
{
|
|
int olength,
|
|
i = 0;
|
|
uint32 value2;
|
|
|
|
/* Degenerate case */
|
|
if (value == 0)
|
|
{
|
|
*a = '0';
|
|
return 1;
|
|
}
|
|
|
|
olength = decimalLength64(value);
|
|
|
|
/* Compute the result string. */
|
|
while (value >= 100000000)
|
|
{
|
|
const uint64 q = value / 100000000;
|
|
uint32 value3 = (uint32) (value - 100000000 * q);
|
|
|
|
const uint32 c = value3 % 10000;
|
|
const uint32 d = value3 / 10000;
|
|
const uint32 c0 = (c % 100) << 1;
|
|
const uint32 c1 = (c / 100) << 1;
|
|
const uint32 d0 = (d % 100) << 1;
|
|
const uint32 d1 = (d / 100) << 1;
|
|
|
|
char *pos = a + olength - i;
|
|
|
|
value = q;
|
|
|
|
memcpy(pos - 2, DIGIT_TABLE + c0, 2);
|
|
memcpy(pos - 4, DIGIT_TABLE + c1, 2);
|
|
memcpy(pos - 6, DIGIT_TABLE + d0, 2);
|
|
memcpy(pos - 8, DIGIT_TABLE + d1, 2);
|
|
i += 8;
|
|
}
|
|
|
|
/* Switch to 32-bit for speed */
|
|
value2 = (uint32) value;
|
|
|
|
if (value2 >= 10000)
|
|
{
|
|
const uint32 c = value2 - 10000 * (value2 / 10000);
|
|
const uint32 c0 = (c % 100) << 1;
|
|
const uint32 c1 = (c / 100) << 1;
|
|
|
|
char *pos = a + olength - i;
|
|
|
|
value2 /= 10000;
|
|
|
|
memcpy(pos - 2, DIGIT_TABLE + c0, 2);
|
|
memcpy(pos - 4, DIGIT_TABLE + c1, 2);
|
|
i += 4;
|
|
}
|
|
if (value2 >= 100)
|
|
{
|
|
const uint32 c = (value2 % 100) << 1;
|
|
char *pos = a + olength - i;
|
|
|
|
value2 /= 100;
|
|
|
|
memcpy(pos - 2, DIGIT_TABLE + c, 2);
|
|
i += 2;
|
|
}
|
|
if (value2 >= 10)
|
|
{
|
|
const uint32 c = value2 << 1;
|
|
char *pos = a + olength - i;
|
|
|
|
memcpy(pos - 2, DIGIT_TABLE + c, 2);
|
|
}
|
|
else
|
|
*a = (char) ('0' + value2);
|
|
|
|
return olength;
|
|
}
|
|
|
|
/*
|
|
* pg_lltoa: converts a signed 64-bit integer to its string representation and
|
|
* returns strlen(a).
|
|
*
|
|
* Caller must ensure that 'a' points to enough memory to hold the result
|
|
* (at least MAXINT8LEN + 1 bytes, counting a leading sign and trailing NUL).
|
|
*/
|
|
int
|
|
pg_lltoa(int64 value, char *a)
|
|
{
|
|
uint64 uvalue = value;
|
|
int len = 0;
|
|
|
|
if (value < 0)
|
|
{
|
|
uvalue = (uint64) 0 - uvalue;
|
|
a[len++] = '-';
|
|
}
|
|
|
|
len += pg_ulltoa_n(uvalue, a + len);
|
|
a[len] = '\0';
|
|
return len;
|
|
}
|
|
|
|
|
|
/*
|
|
* pg_ultostr_zeropad
|
|
* Converts 'value' into a decimal string representation stored at 'str'.
|
|
* 'minwidth' specifies the minimum width of the result; any extra space
|
|
* is filled up by prefixing the number with zeros.
|
|
*
|
|
* Returns the ending address of the string result (the last character written
|
|
* plus 1). Note that no NUL terminator is written.
|
|
*
|
|
* The intended use-case for this function is to build strings that contain
|
|
* multiple individual numbers, for example:
|
|
*
|
|
* str = pg_ultostr_zeropad(str, hours, 2);
|
|
* *str++ = ':';
|
|
* str = pg_ultostr_zeropad(str, mins, 2);
|
|
* *str++ = ':';
|
|
* str = pg_ultostr_zeropad(str, secs, 2);
|
|
* *str = '\0';
|
|
*
|
|
* Note: Caller must ensure that 'str' points to enough memory to hold the
|
|
* result.
|
|
*/
|
|
char *
|
|
pg_ultostr_zeropad(char *str, uint32 value, int32 minwidth)
|
|
{
|
|
int len;
|
|
|
|
Assert(minwidth > 0);
|
|
|
|
if (value < 100 && minwidth == 2) /* Short cut for common case */
|
|
{
|
|
memcpy(str, DIGIT_TABLE + value * 2, 2);
|
|
return str + 2;
|
|
}
|
|
|
|
len = pg_ultoa_n(value, str);
|
|
if (len >= minwidth)
|
|
return str + len;
|
|
|
|
memmove(str + minwidth - len, str, len);
|
|
memset(str, '0', minwidth - len);
|
|
return str + minwidth;
|
|
}
|
|
|
|
/*
|
|
* pg_ultostr
|
|
* Converts 'value' into a decimal string representation stored at 'str'.
|
|
*
|
|
* Returns the ending address of the string result (the last character written
|
|
* plus 1). Note that no NUL terminator is written.
|
|
*
|
|
* The intended use-case for this function is to build strings that contain
|
|
* multiple individual numbers, for example:
|
|
*
|
|
* str = pg_ultostr(str, a);
|
|
* *str++ = ' ';
|
|
* str = pg_ultostr(str, b);
|
|
* *str = '\0';
|
|
*
|
|
* Note: Caller must ensure that 'str' points to enough memory to hold the
|
|
* result.
|
|
*/
|
|
char *
|
|
pg_ultostr(char *str, uint32 value)
|
|
{
|
|
int len = pg_ultoa_n(value, str);
|
|
|
|
return str + len;
|
|
}
|