Improve performance of pg_strtointNN functions

Experiments have shown that modern versions of both gcc and clang are unable to fully optimize the multiplication by 10 that we're doing in the pg_strtointNN functions. Both compilers seem to be making use of "imul", which is not the most efficient way to multiply by 10. This seems to be due to the overflow checking that we're doing. Without the overflow checks, both those compilers switch to a more efficient method of multiplying by 10. In absence of overflow concern, integer multiplication by 10 can be done by bit-shifting left 3 places to multiply by 8 and then adding the original value twice. To allow compilers this flexibility, here we adjust the code so that we accumulate the number as an unsigned version of the type and remove the use of pg_mul_sNN_overflow() and pg_sub_sNN_overflow(). The overflow checking can be done simply by checking if the accumulated value has gone beyond a 10th of the maximum *signed* value for the given type. If it has then the accumulation of the next digit will cause an overflow. After this is done, we do a final overflow check before converting the unsigned version of the number back to its signed counterpart. Testing has shown about an 8% speedup of a COPY into a table containing 2 INT columns. Author: David Rowley, Dean Rasheed Discussion: https://postgr.es/m/CAApHDvrL6_+wKgPqRHr7gH_6xy3hXM6a3QCsZ5ForurjDFfenA@mail.gmail.com Discussion: https://postgr.es/m/CAApHDvrdYByjfj-=WbmVNFgmVZg88-dE7heukw8p55aJ+W=qxQ@mail.gmail.com
2022-12-04 16:18:18 +13:00 · 2022-12-04 16:18:18 +13:00 · 6b423ec677
parent 29452de734
commit 6b423ec677
1 changed files with 42 additions and 44 deletions
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@ -91,15 +91,15 @@ decimalLength64(const uint64 v)
 * Allows any number of leading or trailing whitespace characters. Will throw
 * ereport() upon bad input format or overflow.
 *
- * NB: Accumulate input as a negative number, to deal with two's complement
+ * NB: Accumulate input as an unsigned number, to deal with two's complement
 * representation of the most negative number, which can't be represented as a
- * positive number.
+ * signed positive number.
 */
 int16
 pg_strtoint16(const char *s)
 {
 	const char *ptr = s;
-	int16		tmp = 0;
+	uint16		tmp = 0;
 	bool		neg = false;

 	/* skip leading spaces */
@ -122,11 +122,10 @@ pg_strtoint16(const char *s)
 	/* process digits */
 	while (*ptr && isdigit((unsigned char) *ptr))
 	{
-		int8		digit = (*ptr++ - '0');
-
-		if (unlikely(pg_mul_s16_overflow(tmp, 10, &tmp)) ||
-			unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+		if (unlikely(tmp > (PG_INT16_MAX / 10)))
 			goto out_of_range;
+
+		tmp = tmp * 10 + (*ptr++ - '0');
 	}

 	/* allow trailing whitespace, but not other trailing chars */
@ -136,15 +135,18 @@ pg_strtoint16(const char *s)
 	if (unlikely(*ptr != '\0'))
 		goto invalid_syntax;

-	if (!neg)
+	if (neg)
 	{
-		/* could fail if input is most negative number */
-		if (unlikely(tmp == PG_INT16_MIN))
+		/* check the negative equivalent will fit without overflowing */
+		if (tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1)
 			goto out_of_range;
-		tmp = -tmp;
+		return -((int16) tmp);
 	}

-	return tmp;
+	if (tmp > PG_INT16_MAX)
+		goto out_of_range;
+
+	return (int16) tmp;

 out_of_range:
 	ereport(ERROR,
@ -167,15 +169,15 @@ invalid_syntax:
 * Allows any number of leading or trailing whitespace characters. Will throw
 * ereport() upon bad input format or overflow.
 *
- * NB: Accumulate input as a negative number, to deal with two's complement
+ * NB: Accumulate input as an unsigned number, to deal with two's complement
 * representation of the most negative number, which can't be represented as a
- * positive number.
+ * signed positive number.
 */
 int32
 pg_strtoint32(const char *s)
 {
 	const char *ptr = s;
-	int32		tmp = 0;
+	uint32		tmp = 0;
 	bool		neg = false;

 	/* skip leading spaces */
@ -198,11 +200,10 @@ pg_strtoint32(const char *s)
 	/* process digits */
 	while (*ptr && isdigit((unsigned char) *ptr))
 	{
-		int8		digit = (*ptr++ - '0');
-
-		if (unlikely(pg_mul_s32_overflow(tmp, 10, &tmp)) ||
-			unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+		if (unlikely(tmp > (PG_INT32_MAX / 10)))
 			goto out_of_range;
+
+		tmp = tmp * 10 + (*ptr++ - '0');
 	}

 	/* allow trailing whitespace, but not other trailing chars */
@ -212,15 +213,18 @@ pg_strtoint32(const char *s)
 	if (unlikely(*ptr != '\0'))
 		goto invalid_syntax;

-	if (!neg)
+	if (neg)
 	{
-		/* could fail if input is most negative number */
-		if (unlikely(tmp == PG_INT32_MIN))
+		/* check the negative equivalent will fit without overflowing */
+		if (tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1)
 			goto out_of_range;
-		tmp = -tmp;
+		return -((int32) tmp);
 	}

-	return tmp;
+	if (tmp > PG_INT32_MAX)
+		goto out_of_range;
+
+	return (int32) tmp;

 out_of_range:
 	ereport(ERROR,
@ -243,25 +247,17 @@ invalid_syntax:
 * Allows any number of leading or trailing whitespace characters. Will throw
 * ereport() upon bad input format or overflow.
 *
- * NB: Accumulate input as a negative number, to deal with two's complement
+ * NB: Accumulate input as an unsigned number, to deal with two's complement
 * representation of the most negative number, which can't be represented as a
- * positive number.
+ * signed positive number.
 */
 int64
 pg_strtoint64(const char *s)
 {
 	const char *ptr = s;
-	int64		tmp = 0;
+	uint64		tmp = 0;
 	bool		neg = false;

-	/*
-	 * Do our own scan, rather than relying on sscanf which might be broken
-	 * for long long.
-	 *
-	 * As INT64_MIN can't be stored as a positive 64 bit integer, accumulate
-	 * value as a negative number.
-	 */
-
 	/* skip leading spaces */
 	while (*ptr && isspace((unsigned char) *ptr))
 		ptr++;
@ -282,11 +278,10 @@ pg_strtoint64(const char *s)
 	/* process digits */
 	while (*ptr && isdigit((unsigned char) *ptr))
 	{
-		int8		digit = (*ptr++ - '0');
-
-		if (unlikely(pg_mul_s64_overflow(tmp, 10, &tmp)) ||
-			unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+		if (unlikely(tmp > (PG_INT64_MAX / 10)))
 			goto out_of_range;
+
+		tmp = tmp * 10 + (*ptr++ - '0');
 	}

 	/* allow trailing whitespace, but not other trailing chars */
@ -296,15 +291,18 @@ pg_strtoint64(const char *s)
 	if (unlikely(*ptr != '\0'))
 		goto invalid_syntax;

-	if (!neg)
+	if (neg)
 	{
-		/* could fail if input is most negative number */
-		if (unlikely(tmp == PG_INT64_MIN))
+		/* check the negative equivalent will fit without overflowing */
+		if (tmp > (uint64) (-(PG_INT64_MIN + 1)) + 1)
 			goto out_of_range;
-		tmp = -tmp;
+		return -((int64) tmp);
 	}

-	return tmp;
+	if (tmp > PG_INT64_MAX)
+		goto out_of_range;
+
+	return (int64) tmp;

 out_of_range:
 	ereport(ERROR,