postgresql/src/backend/utils/adt/numutils.c
Bruce Momjian ca3b37487b Update copyright for 2021
Backpatch-through: 9.5
2021-01-02 13:06:25 -05:00

631 lines
15 KiB
C

/*-------------------------------------------------------------------------
*
* numutils.c
* utility functions for I/O of built-in numeric types.
*
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/backend/utils/adt/numutils.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <math.h>
#include <limits.h>
#include <ctype.h>
#include "common/int.h"
#include "utils/builtins.h"
#include "port/pg_bitutils.h"
/*
* A table of all two-digit numbers. This is used to speed up decimal digit
* generation by copying pairs of digits into the final output.
*/
static const char DIGIT_TABLE[200] =
"00" "01" "02" "03" "04" "05" "06" "07" "08" "09"
"10" "11" "12" "13" "14" "15" "16" "17" "18" "19"
"20" "21" "22" "23" "24" "25" "26" "27" "28" "29"
"30" "31" "32" "33" "34" "35" "36" "37" "38" "39"
"40" "41" "42" "43" "44" "45" "46" "47" "48" "49"
"50" "51" "52" "53" "54" "55" "56" "57" "58" "59"
"60" "61" "62" "63" "64" "65" "66" "67" "68" "69"
"70" "71" "72" "73" "74" "75" "76" "77" "78" "79"
"80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
"90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
/*
* Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
*/
static inline int
decimalLength32(const uint32 v)
{
int t;
static const uint32 PowersOfTen[] = {
1, 10, 100,
1000, 10000, 100000,
1000000, 10000000, 100000000,
1000000000
};
/*
* Compute base-10 logarithm by dividing the base-2 logarithm by a
* good-enough approximation of the base-2 logarithm of 10
*/
t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
return t + (v >= PowersOfTen[t]);
}
static inline int
decimalLength64(const uint64 v)
{
int t;
static const uint64 PowersOfTen[] = {
UINT64CONST(1), UINT64CONST(10),
UINT64CONST(100), UINT64CONST(1000),
UINT64CONST(10000), UINT64CONST(100000),
UINT64CONST(1000000), UINT64CONST(10000000),
UINT64CONST(100000000), UINT64CONST(1000000000),
UINT64CONST(10000000000), UINT64CONST(100000000000),
UINT64CONST(1000000000000), UINT64CONST(10000000000000),
UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
};
/*
* Compute base-10 logarithm by dividing the base-2 logarithm by a
* good-enough approximation of the base-2 logarithm of 10
*/
t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
return t + (v >= PowersOfTen[t]);
}
/*
* pg_atoi: convert string to integer
*
* allows any number of leading or trailing whitespace characters.
*
* 'size' is the sizeof() the desired integral result (1, 2, or 4 bytes).
*
* c, if not 0, is a terminator character that may appear after the
* integer (plus whitespace). If 0, the string must end after the integer.
*
* Unlike plain atoi(), this will throw ereport() upon bad input format or
* overflow.
*/
int32
pg_atoi(const char *s, int size, int c)
{
long l;
char *badp;
/*
* Some versions of strtol treat the empty string as an error, but some
* seem not to. Make an explicit test to be sure we catch it.
*/
if (s == NULL)
elog(ERROR, "NULL pointer");
if (*s == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s: \"%s\"",
"integer", s)));
errno = 0;
l = strtol(s, &badp, 10);
/* We made no progress parsing the string, so bail out */
if (s == badp)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s: \"%s\"",
"integer", s)));
switch (size)
{
case sizeof(int32):
if (errno == ERANGE
#if defined(HAVE_LONG_INT_64)
/* won't get ERANGE on these with 64-bit longs... */
|| l < INT_MIN || l > INT_MAX
#endif
)
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("value \"%s\" is out of range for type %s", s,
"integer")));
break;
case sizeof(int16):
if (errno == ERANGE || l < SHRT_MIN || l > SHRT_MAX)
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("value \"%s\" is out of range for type %s", s,
"smallint")));
break;
case sizeof(int8):
if (errno == ERANGE || l < SCHAR_MIN || l > SCHAR_MAX)
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("value \"%s\" is out of range for 8-bit integer", s)));
break;
default:
elog(ERROR, "unsupported result size: %d", size);
}
/*
* Skip any trailing whitespace; if anything but whitespace remains before
* the terminating character, bail out
*/
while (*badp && *badp != c && isspace((unsigned char) *badp))
badp++;
if (*badp && *badp != c)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s: \"%s\"",
"integer", s)));
return (int32) l;
}
/*
* Convert input string to a signed 16 bit integer.
*
* Allows any number of leading or trailing whitespace characters. Will throw
* ereport() upon bad input format or overflow.
*
* NB: Accumulate input as a negative number, to deal with two's complement
* representation of the most negative number, which can't be represented as a
* positive number.
*/
int16
pg_strtoint16(const char *s)
{
const char *ptr = s;
int16 tmp = 0;
bool neg = false;
/* skip leading spaces */
while (likely(*ptr) && isspace((unsigned char) *ptr))
ptr++;
/* handle sign */
if (*ptr == '-')
{
ptr++;
neg = true;
}
else if (*ptr == '+')
ptr++;
/* require at least one digit */
if (unlikely(!isdigit((unsigned char) *ptr)))
goto invalid_syntax;
/* process digits */
while (*ptr && isdigit((unsigned char) *ptr))
{
int8 digit = (*ptr++ - '0');
if (unlikely(pg_mul_s16_overflow(tmp, 10, &tmp)) ||
unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
goto out_of_range;
}
/* allow trailing whitespace, but not other trailing chars */
while (*ptr != '\0' && isspace((unsigned char) *ptr))
ptr++;
if (unlikely(*ptr != '\0'))
goto invalid_syntax;
if (!neg)
{
/* could fail if input is most negative number */
if (unlikely(tmp == PG_INT16_MIN))
goto out_of_range;
tmp = -tmp;
}
return tmp;
out_of_range:
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("value \"%s\" is out of range for type %s",
s, "smallint")));
invalid_syntax:
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s: \"%s\"",
"smallint", s)));
return 0; /* keep compiler quiet */
}
/*
* Convert input string to a signed 32 bit integer.
*
* Allows any number of leading or trailing whitespace characters. Will throw
* ereport() upon bad input format or overflow.
*
* NB: Accumulate input as a negative number, to deal with two's complement
* representation of the most negative number, which can't be represented as a
* positive number.
*/
int32
pg_strtoint32(const char *s)
{
const char *ptr = s;
int32 tmp = 0;
bool neg = false;
/* skip leading spaces */
while (likely(*ptr) && isspace((unsigned char) *ptr))
ptr++;
/* handle sign */
if (*ptr == '-')
{
ptr++;
neg = true;
}
else if (*ptr == '+')
ptr++;
/* require at least one digit */
if (unlikely(!isdigit((unsigned char) *ptr)))
goto invalid_syntax;
/* process digits */
while (*ptr && isdigit((unsigned char) *ptr))
{
int8 digit = (*ptr++ - '0');
if (unlikely(pg_mul_s32_overflow(tmp, 10, &tmp)) ||
unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
goto out_of_range;
}
/* allow trailing whitespace, but not other trailing chars */
while (*ptr != '\0' && isspace((unsigned char) *ptr))
ptr++;
if (unlikely(*ptr != '\0'))
goto invalid_syntax;
if (!neg)
{
/* could fail if input is most negative number */
if (unlikely(tmp == PG_INT32_MIN))
goto out_of_range;
tmp = -tmp;
}
return tmp;
out_of_range:
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("value \"%s\" is out of range for type %s",
s, "integer")));
invalid_syntax:
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s: \"%s\"",
"integer", s)));
return 0; /* keep compiler quiet */
}
/*
* pg_itoa: converts a signed 16-bit integer to its string representation
* and returns strlen(a).
*
* Caller must ensure that 'a' points to enough memory to hold the result
* (at least 7 bytes, counting a leading sign and trailing NUL).
*
* It doesn't seem worth implementing this separately.
*/
int
pg_itoa(int16 i, char *a)
{
return pg_ltoa((int32) i, a);
}
/*
* pg_ultoa_n: converts an unsigned 32-bit integer to its string representation,
* not NUL-terminated, and returns the length of that string representation
*
* Caller must ensure that 'a' points to enough memory to hold the result (at
* least 10 bytes)
*/
int
pg_ultoa_n(uint32 value, char *a)
{
int olength,
i = 0;
/* Degenerate case */
if (value == 0)
{
*a = '0';
return 1;
}
olength = decimalLength32(value);
/* Compute the result string. */
while (value >= 10000)
{
const uint32 c = value - 10000 * (value / 10000);
const uint32 c0 = (c % 100) << 1;
const uint32 c1 = (c / 100) << 1;
char *pos = a + olength - i;
value /= 10000;
memcpy(pos - 2, DIGIT_TABLE + c0, 2);
memcpy(pos - 4, DIGIT_TABLE + c1, 2);
i += 4;
}
if (value >= 100)
{
const uint32 c = (value % 100) << 1;
char *pos = a + olength - i;
value /= 100;
memcpy(pos - 2, DIGIT_TABLE + c, 2);
i += 2;
}
if (value >= 10)
{
const uint32 c = value << 1;
char *pos = a + olength - i;
memcpy(pos - 2, DIGIT_TABLE + c, 2);
}
else
{
*a = (char) ('0' + value);
}
return olength;
}
/*
* pg_ltoa: converts a signed 32-bit integer to its string representation and
* returns strlen(a).
*
* It is the caller's responsibility to ensure that a is at least 12 bytes long,
* which is enough room to hold a minus sign, a maximally long int32, and the
* above terminating NUL.
*/
int
pg_ltoa(int32 value, char *a)
{
uint32 uvalue = (uint32) value;
int len = 0;
if (value < 0)
{
uvalue = (uint32) 0 - uvalue;
a[len++] = '-';
}
len += pg_ultoa_n(uvalue, a + len);
a[len] = '\0';
return len;
}
/*
* Get the decimal representation, not NUL-terminated, and return the length of
* same. Caller must ensure that a points to at least MAXINT8LEN bytes.
*/
int
pg_ulltoa_n(uint64 value, char *a)
{
int olength,
i = 0;
uint32 value2;
/* Degenerate case */
if (value == 0)
{
*a = '0';
return 1;
}
olength = decimalLength64(value);
/* Compute the result string. */
while (value >= 100000000)
{
const uint64 q = value / 100000000;
uint32 value2 = (uint32) (value - 100000000 * q);
const uint32 c = value2 % 10000;
const uint32 d = value2 / 10000;
const uint32 c0 = (c % 100) << 1;
const uint32 c1 = (c / 100) << 1;
const uint32 d0 = (d % 100) << 1;
const uint32 d1 = (d / 100) << 1;
char *pos = a + olength - i;
value = q;
memcpy(pos - 2, DIGIT_TABLE + c0, 2);
memcpy(pos - 4, DIGIT_TABLE + c1, 2);
memcpy(pos - 6, DIGIT_TABLE + d0, 2);
memcpy(pos - 8, DIGIT_TABLE + d1, 2);
i += 8;
}
/* Switch to 32-bit for speed */
value2 = (uint32) value;
if (value2 >= 10000)
{
const uint32 c = value2 - 10000 * (value2 / 10000);
const uint32 c0 = (c % 100) << 1;
const uint32 c1 = (c / 100) << 1;
char *pos = a + olength - i;
value2 /= 10000;
memcpy(pos - 2, DIGIT_TABLE + c0, 2);
memcpy(pos - 4, DIGIT_TABLE + c1, 2);
i += 4;
}
if (value2 >= 100)
{
const uint32 c = (value2 % 100) << 1;
char *pos = a + olength - i;
value2 /= 100;
memcpy(pos - 2, DIGIT_TABLE + c, 2);
i += 2;
}
if (value2 >= 10)
{
const uint32 c = value2 << 1;
char *pos = a + olength - i;
memcpy(pos - 2, DIGIT_TABLE + c, 2);
}
else
*a = (char) ('0' + value2);
return olength;
}
/*
* pg_lltoa: converts a signed 64-bit integer to its string representation and
* returns strlen(a).
*
* Caller must ensure that 'a' points to enough memory to hold the result
* (at least MAXINT8LEN + 1 bytes, counting a leading sign and trailing NUL).
*/
int
pg_lltoa(int64 value, char *a)
{
uint64 uvalue = value;
int len = 0;
if (value < 0)
{
uvalue = (uint64) 0 - uvalue;
a[len++] = '-';
}
len += pg_ulltoa_n(uvalue, a + len);
a[len] = '\0';
return len;
}
/*
* pg_ultostr_zeropad
* Converts 'value' into a decimal string representation stored at 'str'.
* 'minwidth' specifies the minimum width of the result; any extra space
* is filled up by prefixing the number with zeros.
*
* Returns the ending address of the string result (the last character written
* plus 1). Note that no NUL terminator is written.
*
* The intended use-case for this function is to build strings that contain
* multiple individual numbers, for example:
*
* str = pg_ultostr_zeropad(str, hours, 2);
* *str++ = ':';
* str = pg_ultostr_zeropad(str, mins, 2);
* *str++ = ':';
* str = pg_ultostr_zeropad(str, secs, 2);
* *str = '\0';
*
* Note: Caller must ensure that 'str' points to enough memory to hold the
* result.
*/
char *
pg_ultostr_zeropad(char *str, uint32 value, int32 minwidth)
{
int len;
Assert(minwidth > 0);
if (value < 100 && minwidth == 2) /* Short cut for common case */
{
memcpy(str, DIGIT_TABLE + value * 2, 2);
return str + 2;
}
len = pg_ultoa_n(value, str);
if (len >= minwidth)
return str + len;
memmove(str + minwidth - len, str, len);
memset(str, '0', minwidth - len);
return str + minwidth;
}
/*
* pg_ultostr
* Converts 'value' into a decimal string representation stored at 'str'.
*
* Returns the ending address of the string result (the last character written
* plus 1). Note that no NUL terminator is written.
*
* The intended use-case for this function is to build strings that contain
* multiple individual numbers, for example:
*
* str = pg_ultostr(str, a);
* *str++ = ' ';
* str = pg_ultostr(str, b);
* *str = '\0';
*
* Note: Caller must ensure that 'str' points to enough memory to hold the
* result.
*/
char *
pg_ultostr(char *str, uint32 value)
{
int len = pg_ultoa_n(value, str);
return str + len;
}
/*
* pg_strtouint64
* Converts 'str' into an unsigned 64-bit integer.
*
* This has the identical API to strtoul(3), except that it will handle
* 64-bit ints even where "long" is narrower than that.
*
* For the moment it seems sufficient to assume that the platform has
* such a function somewhere; let's not roll our own.
*/
uint64
pg_strtouint64(const char *str, char **endptr, int base)
{
#ifdef _MSC_VER /* MSVC only */
return _strtoui64(str, endptr, base);
#elif defined(HAVE_STRTOULL) && SIZEOF_LONG < 8
return strtoull(str, endptr, base);
#else
return strtoul(str, endptr, base);
#endif
}