diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 49a4898987..d545c34b6b 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -62,6 +62,9 @@ typedef struct char *buf2; /* 2nd string, or abbreviation strxfrm() buf */ int buflen1; int buflen2; + int last_len1; /* Length of last buf1 string/strxfrm() blob */ + int last_len2; /* Length of last buf2 string/strxfrm() blob */ + int last_returned; /* Last comparison result (cache) */ bool collate_c; hyperLogLogState abbr_card; /* Abbreviated key cardinality state */ hyperLogLogState full_card; /* Full key cardinality state */ @@ -1832,9 +1835,20 @@ btsortsupport_worker(SortSupport ssup, Oid collid) tss->buflen1 = TEXTBUFLEN; tss->buf2 = palloc(TEXTBUFLEN); tss->buflen2 = TEXTBUFLEN; + /* Start with invalid values */ + tss->last_len1 = -1; + tss->last_len2 = -1; #ifdef HAVE_LOCALE_T tss->locale = locale; #endif + /* + * To avoid somehow confusing a strxfrm() blob and an original string + * within bttextfastcmp_locale(), initialize last returned cache to a + * sentinel value. A platform-specific actual strcoll() return value + * of INT_MIN seems unlikely, but if that occurs it will not cause + * wrong answers. + */ + tss->last_returned = INT_MIN; tss->collate_c = collate_c; ssup->ssup_extra = tss; @@ -1897,6 +1911,7 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup) { text *arg1 = DatumGetTextPP(x); text *arg2 = DatumGetTextPP(y); + bool arg1_match; TextSortSupport *tss = (TextSortSupport *) ssup->ssup_extra; /* working state */ @@ -1915,6 +1930,11 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup) /* Fast pre-check for equality, as discussed in varstr_cmp() */ if (len1 == len2 && memcmp(a1p, a2p, len1) == 0) { + /* + * No change in buf1 or buf2 contents, so avoid changing last_len1 or + * last_len2. Existing contents of buffers might still be used by next + * call. + */ result = 0; goto done; } @@ -1932,10 +1952,43 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup) tss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, tss->buflen2); } - memcpy(tss->buf1, a1p, len1); - tss->buf1[len1] = '\0'; - memcpy(tss->buf2, a2p, len2); - tss->buf2[len2] = '\0'; + /* + * We're likely to be asked to compare the same strings repeatedly, and + * memcmp() is so much cheaper than strcoll() that it pays to try to cache + * comparisons, even though in general there is no reason to think that + * that will work out (every text datum may be unique). Caching does not + * slow things down measurably when it doesn't work out, and can speed + * things up by rather a lot when it does. In part, this is because the + * memcmp() compares data from cachelines that are needed in L1 cache even + * when the last comparison's result cannot be reused. + */ + arg1_match = true; + if (len1 != tss->last_len1 || memcmp(tss->buf1, a1p, len1) != 0) + { + arg1_match = false; + memcpy(tss->buf1, a1p, len1); + tss->buf1[len1] = '\0'; + tss->last_len1 = len1; + } + + /* + * If we're comparing the same two strings as last time, we can return the + * same answer without calling strcoll() again. This is more likely than + * it seems (at least with moderate to low cardinality sets), because + * quicksort compares the same pivot against many values. + */ + if (len2 != tss->last_len2 || memcmp(tss->buf2, a2p, len2) != 0) + { + memcpy(tss->buf2, a2p, len2); + tss->buf2[len2] = '\0'; + tss->last_len2 = len2; + } + else if (arg1_match && tss->last_returned != INT_MIN) + { + /* Use result cached following last actual strcoll() call */ + result = tss->last_returned; + goto done; + } #ifdef HAVE_LOCALE_T if (tss->locale) @@ -1952,6 +2005,8 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup) if (result == 0) result = strcmp(tss->buf1, tss->buf2); + /* Cache result, perhaps saving an expensive strcoll() call next time */ + tss->last_returned = result; done: /* We can't afford to leak memory here. */ if (PointerGetDatum(arg1) != x) @@ -2034,9 +2089,19 @@ bttext_abbrev_convert(Datum original, SortSupport ssup) tss->buf1 = palloc(tss->buflen1); } + /* Might be able to reuse strxfrm() blob from last call */ + if (tss->last_len1 == len && + memcmp(tss->buf1, authoritative_data, len) == 0) + { + memcpy(pres, tss->buf2, Min(sizeof(Datum), tss->last_len2)); + /* No change affecting cardinality, so no hashing required */ + goto done; + } + /* Just like strcoll(), strxfrm() expects a NUL-terminated string */ memcpy(tss->buf1, authoritative_data, len); tss->buf1[len] = '\0'; + tss->last_len1 = len; for (;;) { @@ -2048,6 +2113,7 @@ bttext_abbrev_convert(Datum original, SortSupport ssup) #endif bsize = strxfrm(tss->buf2, tss->buf1, tss->buflen2); + tss->last_len2 = bsize; if (bsize < tss->buflen2) break; @@ -2105,6 +2171,7 @@ bttext_abbrev_convert(Datum original, SortSupport ssup) addHyperLogLog(&tss->abbr_card, hash); +done: /* * Byteswap on little-endian machines. *