From 6eb3eb577d76b3f58a98f78232af9e86624ce5eb Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 8 Oct 2018 12:19:20 -0400
Subject: [PATCH] Improve snprintf.c's handling of NaN, Infinity, and minus
 zero.

Up to now, float4out/float8out handled NaN and Infinity cases explicitly,
and invoked psprintf only for ordinary float values.  This was done because
platform implementations of snprintf produce varying representations of
these special cases.  But now that we use snprintf.c always, it's better
to give it the responsibility to produce a uniform representation of
these cases, so that we have uniformity across the board not only in
float4out/float8out.  Hence, move that work into fmtfloat().

Also, teach fmtfloat() to recognize IEEE minus zero and handle it
correctly.  The previous coding worked only accidentally, and would
fail for e.g. "%+f" format (it'd print "+-0.00000").  Now that we're
using snprintf.c everywhere, it's not acceptable for it to do weird
things in corner cases.  (This incidentally avoids a portability
problem we've seen on some really ancient platforms, that native
sprintf does the wrong thing with minus zero.)

Also, introduce a new entry point in snprintf.c to allow float[48]out
to bypass the work of interpreting a well-known format spec, as well
as bypassing the overhead of the psprintf layer.  I modeled this API
loosely on strfromd().  In my testing, this brings float[48]out back
to approximately the same speed they had when using native snprintf,
fixing one of the main performance issues caused by using snprintf.c.

(There is some talk of more aggressive work to improve the speed of
floating-point output conversion, but these changes seem to provide
a better starting point for such work anyway.)

Getting rid of the previous ad-hoc hack for Infinity/NaN in fmtfloat()
allows removing <ctype.h> from snprintf.c's #includes.  I also removed
a few other #includes that I think are historical, though the buildfarm
may expose that as wrong.

Discussion: https://postgr.es/m/13178.1538794717@sss.pgh.pa.us
---
 src/backend/utils/adt/float.c |  52 ++---------
 src/include/port.h            |   3 +
 src/port/snprintf.c           | 164 ++++++++++++++++++++++++++++------
 3 files changed, 145 insertions(+), 74 deletions(-)

diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c
index d1e12c14be..c91bb1a305 100644
--- a/src/backend/utils/adt/float.c
+++ b/src/backend/utils/adt/float.c
@@ -243,30 +243,10 @@ Datum
 float4out(PG_FUNCTION_ARGS)
 {
 	float4		num = PG_GETARG_FLOAT4(0);
-	char	   *ascii;
-
-	if (isnan(num))
-		PG_RETURN_CSTRING(pstrdup("NaN"));
-
-	switch (is_infinite(num))
-	{
-		case 1:
-			ascii = pstrdup("Infinity");
-			break;
-		case -1:
-			ascii = pstrdup("-Infinity");
-			break;
-		default:
-			{
-				int			ndig = FLT_DIG + extra_float_digits;
-
-				if (ndig < 1)
-					ndig = 1;
-
-				ascii = psprintf("%.*g", ndig, num);
-			}
-	}
+	char	   *ascii = (char *) palloc(32);
+	int			ndig = FLT_DIG + extra_float_digits;
 
+	(void) pg_strfromd(ascii, 32, ndig, num);
 	PG_RETURN_CSTRING(ascii);
 }
 
@@ -479,30 +459,10 @@ float8out(PG_FUNCTION_ARGS)
 char *
 float8out_internal(double num)
 {
-	char	   *ascii;
-
-	if (isnan(num))
-		return pstrdup("NaN");
-
-	switch (is_infinite(num))
-	{
-		case 1:
-			ascii = pstrdup("Infinity");
-			break;
-		case -1:
-			ascii = pstrdup("-Infinity");
-			break;
-		default:
-			{
-				int			ndig = DBL_DIG + extra_float_digits;
-
-				if (ndig < 1)
-					ndig = 1;
-
-				ascii = psprintf("%.*g", ndig, num);
-			}
-	}
+	char	   *ascii = (char *) palloc(32);
+	int			ndig = DBL_DIG + extra_float_digits;
 
+	(void) pg_strfromd(ascii, 32, ndig, num);
 	return ascii;
 }
 
diff --git a/src/include/port.h b/src/include/port.h
index e654d5cbdf..0729c3fc2e 100644
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -187,6 +187,9 @@ extern int	pg_printf(const char *fmt,...) pg_attribute_printf(1, 2);
 #define fprintf			pg_fprintf
 #define printf(...)		pg_printf(__VA_ARGS__)
 
+/* This is also provided by snprintf.c */
+extern int	pg_strfromd(char *str, size_t count, int precision, double value);
+
 /* Replace strerror() with our own, somewhat more robust wrapper */
 extern char *pg_strerror(int errnum);
 #define strerror pg_strerror
diff --git a/src/port/snprintf.c b/src/port/snprintf.c
index ef496fa4a4..58300eab95 100644
--- a/src/port/snprintf.c
+++ b/src/port/snprintf.c
@@ -33,13 +33,7 @@
 
 #include "c.h"
 
-#include <ctype.h>
-#include <limits.h>
 #include <math.h>
-#ifndef WIN32
-#include <sys/ioctl.h>
-#endif
-#include <sys/param.h>
 
 /*
  * We used to use the platform's NL_ARGMAX here, but that's a bad idea,
@@ -1111,10 +1105,6 @@ fmtfloat(double value, char type, int forcesign, int leftjust,
 	int			zeropadlen = 0; /* amount to pad with zeroes */
 	int			padlen;			/* amount to pad with spaces */
 
-	/* Handle sign (NaNs have no sign) */
-	if (!isnan(value) && adjust_sign((value < 0), forcesign, &signvalue))
-		value = -value;
-
 	/*
 	 * We rely on the regular C library's sprintf to do the basic conversion,
 	 * then handle padding considerations here.
@@ -1128,34 +1118,62 @@ fmtfloat(double value, char type, int forcesign, int leftjust,
 	 * bytes and limit requested precision to 350 digits; this should prevent
 	 * buffer overrun even with non-IEEE math.  If the original precision
 	 * request was more than 350, separately pad with zeroes.
+	 *
+	 * We handle infinities and NaNs specially to ensure platform-independent
+	 * output.
 	 */
 	if (precision < 0)			/* cover possible overflow of "accum" */
 		precision = 0;
 	prec = Min(precision, 350);
 
-	if (pointflag)
+	if (isnan(value))
 	{
-		zeropadlen = precision - prec;
-		fmt[0] = '%';
-		fmt[1] = '.';
-		fmt[2] = '*';
-		fmt[3] = type;
-		fmt[4] = '\0';
-		vallen = sprintf(convert, fmt, prec, value);
+		strcpy(convert, "NaN");
+		vallen = 3;
+		/* no zero padding, regardless of precision spec */
 	}
 	else
 	{
-		fmt[0] = '%';
-		fmt[1] = type;
-		fmt[2] = '\0';
-		vallen = sprintf(convert, fmt, value);
-	}
-	if (vallen < 0)
-		goto fail;
+		/*
+		 * Handle sign (NaNs have no sign, so we don't do this in the case
+		 * above).  "value < 0.0" will not be true for IEEE minus zero, so we
+		 * detect that by looking for the case where value equals 0.0
+		 * according to == but not according to memcmp.
+		 */
+		static const double dzero = 0.0;
 
-	/* If it's infinity or NaN, forget about doing any zero-padding */
-	if (zeropadlen > 0 && !isdigit((unsigned char) convert[vallen - 1]))
-		zeropadlen = 0;
+		if (adjust_sign((value < 0.0 ||
+						 (value == 0.0 &&
+						  memcmp(&value, &dzero, sizeof(double)) != 0)),
+						forcesign, &signvalue))
+			value = -value;
+
+		if (isinf(value))
+		{
+			strcpy(convert, "Infinity");
+			vallen = 8;
+			/* no zero padding, regardless of precision spec */
+		}
+		else if (pointflag)
+		{
+			zeropadlen = precision - prec;
+			fmt[0] = '%';
+			fmt[1] = '.';
+			fmt[2] = '*';
+			fmt[3] = type;
+			fmt[4] = '\0';
+			vallen = sprintf(convert, fmt, prec, value);
+		}
+		else
+		{
+			fmt[0] = '%';
+			fmt[1] = type;
+			fmt[2] = '\0';
+			vallen = sprintf(convert, fmt, value);
+		}
+		if (vallen < 0)
+			goto fail;
+	}
 
 	padlen = compute_padlen(minlen, vallen + zeropadlen, leftjust);
 
@@ -1197,6 +1215,96 @@ fail:
 	target->failed = true;
 }
 
+/*
+ * Nonstandard entry point to print a double value efficiently.
+ *
+ * This is approximately equivalent to strfromd(), but has an API more
+ * adapted to what float8out() wants.  The behavior is like snprintf()
+ * with a format of "%.ng", where n is the specified precision.
+ * However, the target buffer must be nonempty (i.e. count > 0), and
+ * the precision is silently bounded to a sane range.
+ */
+int
+pg_strfromd(char *str, size_t count, int precision, double value)
+{
+	PrintfTarget target;
+	int			signvalue = 0;
+	int			vallen;
+	char		fmt[8];
+	char		convert[64];
+
+	/* Set up the target like pg_snprintf, but require nonempty buffer */
+	Assert(count > 0);
+	target.bufstart = target.bufptr = str;
+	target.bufend = str + count - 1;
+	target.stream = NULL;
+	target.nchars = 0;
+	target.failed = false;
+
+	/*
+	 * We bound precision to a reasonable range; the combination of this and
+	 * the knowledge that we're using "g" format without padding allows the
+	 * convert[] buffer to be reasonably small.
+	 */
+	if (precision < 1)
+		precision = 1;
+	else if (precision > 32)
+		precision = 32;
+
+	/*
+	 * The rest is just an inlined version of the fmtfloat() logic above,
+	 * simplified using the knowledge that no padding is wanted.
+	 */
+	if (isnan(value))
+	{
+		strcpy(convert, "NaN");
+		vallen = 3;
+	}
+	else
+	{
+		static const double dzero = 0.0;
+
+		if (value < 0.0 ||
+			(value == 0.0 &&
+			 memcmp(&value, &dzero, sizeof(double)) != 0))
+		{
+			signvalue = '-';
+			value = -value;
+		}
+
+		if (isinf(value))
+		{
+			strcpy(convert, "Infinity");
+			vallen = 8;
+		}
+		else
+		{
+			fmt[0] = '%';
+			fmt[1] = '.';
+			fmt[2] = '*';
+			fmt[3] = 'g';
+			fmt[4] = '\0';
+			vallen = sprintf(convert, fmt, precision, value);
+			if (vallen < 0)
+			{
+				target.failed = true;
+				goto fail;
+			}
+		}
+	}
+
+	if (signvalue)
+		dopr_outch(signvalue, &target);
+
+	dostr(convert, vallen, &target);
+
+fail:
+	*(target.bufptr) = '\0';
+	return target.failed ? -1 : (target.bufptr - target.bufstart
+								 + target.nchars);
+}
+
+
 static void
 dostr(const char *str, int slen, PrintfTarget *target)
 {