diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index 54d5c37947..b4d55e849b 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -125,6 +125,9 @@ clean distclean maintainer-clean: like.o: like.c like_match.c +# Some code in numeric.c benefits from auto-vectorization +numeric.o: CFLAGS += ${CFLAGS_VECTORIZE} + varlena.o: varlena.c levenshtein.c include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index ed825a1fdd..d2a42b811d 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -8191,6 +8191,7 @@ mul_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result, int res_weight; int maxdigits; int *dig; + int *dig_i1_2; int carry; int maxdig; int newdig; @@ -8327,10 +8328,18 @@ mul_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result, * * As above, digits of var2 can be ignored if they don't contribute, * so we only include digits for which i1+i2+2 <= res_ndigits - 1. + * + * This inner loop is the performance bottleneck for multiplication, + * so we want to keep it simple enough so that it can be + * auto-vectorized. Accordingly, process the digits left-to-right + * even though schoolbook multiplication would suggest right-to-left. + * Since we aren't propagating carries in this loop, the order does + * not matter. */ - for (i2 = Min(var2ndigits - 1, res_ndigits - i1 - 3), i = i1 + i2 + 2; - i2 >= 0; i2--) - dig[i--] += var1digit * var2digits[i2]; + i = Min(var2ndigits - 1, res_ndigits - i1 - 3); + dig_i1_2 = &dig[i1 + 2]; + for (i2 = 0; i2 <= i; i2++) + dig_i1_2[i2] += var1digit * var2digits[i2]; } /*