diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index ce8b674e41..625f490af8 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -717,6 +717,8 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
 			sortKey->ssup_nulls_first =
 				(scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
 			sortKey->ssup_attno = scanKey->sk_attno;
+			/* Abbreviation is not supported here */
+			sortKey->abbreviate = false;
 
 			AssertState(sortKey->ssup_attno != 0);
 
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 5de2b39d10..d2856a379e 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -2301,6 +2301,12 @@ compute_scalar_stats(VacAttrStatsP stats,
 	/* We always use the default collation for statistics */
 	ssup.ssup_collation = DEFAULT_COLLATION_OID;
 	ssup.ssup_nulls_first = false;
+	/*
+	 * For now, don't perform abbreviated key conversion, because full values
+	 * are required for MCV slot generation.  Supporting that optimization
+	 * would necessitate teaching compare_scalars() to call a tie-breaker.
+	 */
+	ssup.abbreviate = false;
 
 	PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup);
 
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 08088ea3c0..8079d97764 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -363,6 +363,10 @@ initialize_aggregates(AggState *aggstate,
 			 * We use a plain Datum sorter when there's a single input column;
 			 * otherwise sort the full tuple.  (See comments for
 			 * process_ordered_aggregate_single.)
+			 *
+			 * In the future, we should consider forcing the
+			 * tuplesort_begin_heap() case when the abbreviated key
+			 * optimization can thereby be used, even when numInputs is 1.
 			 */
 			peraggstate->sortstate =
 				(peraggstate->numInputs == 1) ?
diff --git a/src/backend/executor/nodeMergeAppend.c b/src/backend/executor/nodeMergeAppend.c
index 4e200a8e34..0c814f0e72 100644
--- a/src/backend/executor/nodeMergeAppend.c
+++ b/src/backend/executor/nodeMergeAppend.c
@@ -137,6 +137,15 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags)
 		sortKey->ssup_nulls_first = node->nullsFirst[i];
 		sortKey->ssup_attno = node->sortColIdx[i];
 
+		/*
+		 * It isn't feasible to perform abbreviated key conversion, since
+		 * tuples are pulled into mergestate's binary heap as needed.  It would
+		 * likely be counter-productive to convert tuples into an abbreviated
+		 * representation as they're pulled up, so opt out of that additional
+		 * optimization entirely.
+		 */
+		sortKey->abbreviate = false;
+
 		PrepareSortSupportFromOrderingOp(node->sortOperators[i], sortKey);
 	}
 
diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c
index 2a5a7acf94..15742c574a 100644
--- a/src/backend/executor/nodeMergejoin.c
+++ b/src/backend/executor/nodeMergejoin.c
@@ -229,6 +229,14 @@ MJExamineQuals(List *mergeclauses,
 			elog(ERROR, "cannot merge using non-equality operator %u",
 				 qual->opno);
 
+		/*
+		 * sortsupport routine must know if abbreviation optimization is
+		 * applicable in principle.  It is never applicable for merge joins
+		 * because there is no convenient opportunity to convert to alternative
+		 * representation.
+		 */
+		clause->ssup.abbreviate = false;
+
 		/* And get the matching support or comparison function */
 		Assert(clause->ssup.comparator == NULL);
 		sortfunc = get_opfamily_proc(opfamily,
diff --git a/src/backend/lib/Makefile b/src/backend/lib/Makefile
index 949ee5e41d..fe4781a8e8 100644
--- a/src/backend/lib/Makefile
+++ b/src/backend/lib/Makefile
@@ -12,6 +12,6 @@ subdir = src/backend/lib
 top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = ilist.o binaryheap.o pairingheap.o rbtree.o stringinfo.o
+OBJS = ilist.o binaryheap.o hyperloglog.o pairingheap.o rbtree.o stringinfo.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/lib/hyperloglog.c b/src/backend/lib/hyperloglog.c
new file mode 100644
index 0000000000..1157e9ad76
--- /dev/null
+++ b/src/backend/lib/hyperloglog.c
@@ -0,0 +1,228 @@
+/*-------------------------------------------------------------------------
+ *
+ * hyperloglog.c
+ *	  HyperLogLog cardinality estimator
+ *
+ * Portions Copyright (c) 2014, PostgreSQL Global Development Group
+ *
+ * Based on Hideaki Ohno's C++ implementation.  This is probably not ideally
+ * suited to estimating the cardinality of very large sets;  in particular, we
+ * have not attempted to further optimize the implementation as described in
+ * the Heule, Nunkesser and Hall paper "HyperLogLog in Practice: Algorithmic
+ * Engineering of a State of The Art Cardinality Estimation Algorithm".
+ *
+ * A sparse representation of HyperLogLog state is used, with fixed space
+ * overhead.
+ *
+ * The copyright terms of Ohno's original version (the MIT license) follow.
+ *
+ * IDENTIFICATION
+ *	  src/backend/lib/hyperloglog.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * Copyright (c) 2013 Hideaki Ohno <hide.o.j55{at}gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the 'Software'), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "postgres.h"
+
+#include <math.h>
+
+#include "lib/hyperloglog.h"
+
+#define POW_2_32			(4294967296.0)
+#define NEG_POW_2_32		(-4294967296.0)
+
+static inline uint8 rho(uint32 x, uint8 b);
+
+/*
+ * Initialize HyperLogLog track state
+ *
+ * bwidth is bit width (so register size will be 2 to the power of bwidth).
+ * Must be between 4 and 16 inclusive.
+ */
+void
+initHyperLogLog(hyperLogLogState *cState, uint8 bwidth)
+{
+	double		alpha;
+
+	if (bwidth < 4 || bwidth > 16)
+		elog(ERROR, "bit width must be between 4 and 16 inclusive");
+
+	cState->registerWidth = bwidth;
+	cState->nRegisters = 1 << bwidth;
+	cState->arrSize = sizeof(uint8) * cState->nRegisters + 1;
+
+	/*
+	 * Initialize hashes array to zero, not negative infinity, per discussion
+	 * of the coupon collector problem in the HyperLogLog paper
+	 */
+	cState->hashesArr = palloc0(cState->arrSize);
+
+	/*
+	 * "alpha" is a value that for each possible number of registers (m) is
+	 * used to correct a systematic multiplicative bias present in m ^ 2 Z (Z
+	 * is "the indicator function" through which we finally compute E,
+	 * estimated cardinality).
+	 */
+	switch (cState->nRegisters)
+	{
+		case 16:
+			alpha = 0.673;
+			break;
+		case 32:
+			alpha = 0.697;
+			break;
+		case 64:
+			alpha = 0.709;
+			break;
+		default:
+			alpha = 0.7213 / (1.0 + 1.079 / cState->nRegisters);
+	}
+
+	/*
+	 * Precalculate alpha m ^ 2, later used to generate "raw" HyperLogLog
+	 * estimate E
+	 */
+	cState->alphaMM = alpha * cState->nRegisters * cState->nRegisters;
+}
+
+/*
+ * Adds element to the estimator, from caller-supplied hash.
+ *
+ * It is critical that the hash value passed be an actual hash value, typically
+ * generated using hash_any().  The algorithm relies on a specific bit-pattern
+ * observable in conjunction with stochastic averaging.  There must be a
+ * uniform distribution of bits in hash values for each distinct original value
+ * observed.
+ */
+void
+addHyperLogLog(hyperLogLogState *cState, uint32 hash)
+{
+	uint8		count;
+	uint32		index;
+
+	/* Use the first "k" (registerWidth) bits as a zero based index */
+	index = hash >> (BITS_PER_BYTE * sizeof(uint32) - cState->registerWidth);
+
+	/* Compute the rank of the remaining 32 - "k" (registerWidth) bits */
+	count = rho(hash << cState->registerWidth,
+				BITS_PER_BYTE * sizeof(uint32) - cState->registerWidth);
+
+	cState->hashesArr[index] = Max(count, cState->hashesArr[index]);
+}
+
+/*
+ * Estimates cardinality, based on elements added so far
+ */
+double
+estimateHyperLogLog(hyperLogLogState *cState)
+{
+	double		result;
+	double		sum = 0.0;
+	int			i;
+
+	for (i = 0; i < cState->nRegisters; i++)
+	{
+		sum += 1.0 / pow(2.0, cState->hashesArr[i]);
+	}
+
+	/* result set to "raw" HyperLogLog estimate (E in the HyperLogLog paper) */
+	result = cState->alphaMM / sum;
+
+	if (result <= (5.0 / 2.0) * cState->nRegisters)
+	{
+		/* Small range correction */
+		int 	zero_count = 0;
+
+		for (i = 0; i < cState->nRegisters; i++)
+		{
+			if (cState->hashesArr[i] == 0)
+				zero_count++;
+		}
+
+		if (zero_count != 0)
+			result = cState->nRegisters * log((double) cState->nRegisters /
+											  zero_count);
+	}
+	else if (result > (1.0 / 30.0) * POW_2_32)
+	{
+		/* Large range correction */
+		result = NEG_POW_2_32 * log(1.0 - (result / POW_2_32));
+	}
+
+	return result;
+}
+
+/*
+ * Merges the estimate from one HyperLogLog state to another, returning the
+ * estimate of their union.
+ *
+ * The number of registers in each must match.
+ */
+void
+mergeHyperLogLog(hyperLogLogState *cState, const hyperLogLogState *oState)
+{
+	int		r;
+
+	if (cState->nRegisters != oState->nRegisters)
+		elog(ERROR, "number of registers mismatch: %zu != %zu",
+			 cState->nRegisters, oState->nRegisters);
+
+	for (r = 0; r < cState->nRegisters; ++r)
+	{
+		cState->hashesArr[r] = Max(cState->hashesArr[r], oState->hashesArr[r]);
+	}
+}
+
+
+/*
+ * Worker for addHyperLogLog().
+ *
+ * Calculates the position of the first set bit in first b bits of x argument
+ * starting from the first, reading from most significant to least significant
+ * bits.
+ *
+ * Example (when considering fist 10 bits of x):
+ *
+ * rho(x = 0b1000000000)   returns 1
+ * rho(x = 0b0010000000)   returns 3
+ * rho(x = 0b0000000000)   returns b + 1
+ *
+ * "The binary address determined by the first b bits of x"
+ *
+ * Return value "j" used to index bit pattern to watch.
+ */
+static inline uint8
+rho(uint32 x, uint8 b)
+{
+	uint8	j = 1;
+
+	while (j <= b && !(x & 0x80000000))
+	{
+		j++;
+		x <<= 1;
+	}
+
+	return j;
+}
diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c
index 869a83b185..f9a5f7f93f 100644
--- a/src/backend/utils/adt/orderedsetaggs.c
+++ b/src/backend/utils/adt/orderedsetaggs.c
@@ -266,7 +266,13 @@ ordered_set_startup(FunctionCallInfo fcinfo, bool use_tuples)
 	osastate->qstate = qstate;
 	osastate->gcontext = gcontext;
 
-	/* Initialize tuplesort object */
+	/*
+	 * Initialize tuplesort object.
+	 *
+	 * In the future, we should consider forcing the tuplesort_begin_heap()
+	 * case when the abbreviated key optimization can thereby be used, even
+	 * when !use_tuples.
+	 */
 	if (use_tuples)
 		osastate->sortstate = tuplesort_begin_heap(qstate->tupdesc,
 												   qstate->numSortCols,
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index e95ed88366..71d47380ac 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -17,9 +17,11 @@
 #include <ctype.h>
 #include <limits.h>
 
+#include "access/hash.h"
 #include "access/tuptoaster.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_type.h"
+#include "lib/hyperloglog.h"
 #include "libpq/md5.h"
 #include "libpq/pqformat.h"
 #include "miscadmin.h"
@@ -32,6 +34,9 @@
 #include "utils/pg_locale.h"
 #include "utils/sortsupport.h"
 
+#ifdef DEBUG_ABBREV_KEYS
+#define DEBUG_elog_output	DEBUG1
+#endif
 
 /* GUC variable */
 int			bytea_output = BYTEA_OUTPUT_HEX;
@@ -54,10 +59,12 @@ typedef struct
 
 typedef struct
 {
-	char			   *buf1;		/* 1st string */
-	char			   *buf2;		/* 2nd string */
+	char			   *buf1;		/* 1st string, or abbreviation original string buf */
+	char			   *buf2;		/* 2nd string, or abbreviation strxfrm() buf */
 	int					buflen1;
 	int					buflen2;
+	hyperLogLogState	abbr_card;	/* Abbreviated key cardinality state */
+	hyperLogLogState	full_card;	/* Full key cardinality state */
 #ifdef HAVE_LOCALE_T
 	pg_locale_t locale;
 #endif
@@ -78,6 +85,9 @@ typedef struct
 static void btsortsupport_worker(SortSupport ssup, Oid collid);
 static int bttextfastcmp_c(Datum x, Datum y, SortSupport ssup);
 static int bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup);
+static int bttextcmp_abbrev(Datum x, Datum y, SortSupport ssup);
+static Datum bttext_abbrev_convert(Datum original, SortSupport ssup);
+static bool bttext_abbrev_abort(int memtupcount, SortSupport ssup);
 static int32 text_length(Datum str);
 static text *text_catenate(text *t1, text *t2);
 static text *text_substring(Datum str,
@@ -1735,27 +1745,54 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
 {
 	TextSortSupport	   *tss;
 
-	/*
-	 * If LC_COLLATE = C, we can make things quite a bit faster by using
-	 * memcmp() rather than strcoll().  To minimize the per-comparison
-	 * overhead, we make this decision just once for the whole sort.
-	 */
-	if (lc_collate_is_c(collid))
-	{
-		ssup->comparator = bttextfastcmp_c;
-		return;
-	}
-
 	/*
 	 * WIN32 requires complex hacks when the database encoding is UTF-8 (except
 	 * when using the "C" collation).  For now, we don't optimize that case.
 	 */
 #ifdef WIN32
-	if (GetDatabaseEncoding() == PG_UTF8)
+	if (GetDatabaseEncoding() == PG_UTF8 && !lc_collate_is_c(collid))
 		return;
 #endif
 
 	/*
+	 * On platforms where the abbreviated key for text optimization might have
+	 * bad worst case performance, it may be useful to avoid it entirely by
+	 * disabling it at compile time.  Having only 4 byte datums could make
+	 * worst-case performance drastically more likely, for example.  Moreover,
+	 * Darwin's strxfrm() implementations is known to not effectively
+	 * concentrate a significant amount of entropy from the original string in
+	 * earlier transformed blobs.  It's possible that other supported platforms
+	 * are similarly encumbered.
+	 *
+	 * Any reasonable implementation will pack primary weights into the start
+	 * of returned blobs.  The canonical algorithm's implementation is
+	 * discussed by Unicode Technical Standard #10 ("UNICODE COLLATION
+	 * ALGORITHM"), section 4, "Main algorithm".  Section 4.3, "Form Sort Key"
+	 * is of particular interest:
+	 *
+	 * http://www.unicode.org/reports/tr10/#Step_3
+	 *
+	 * The collation algorithm standard goes on to state:
+	 *
+	 * "By default, the algorithm makes use of three fully-customizable levels.
+	 * For the Latin script, these levels correspond roughly to:
+	 *
+	 * alphabetic ordering
+	 *
+	 * diacritic ordering
+	 *
+	 * case ordering.
+	 *
+	 * A final level may be used for tie-breaking between strings not otherwise
+	 * distinguished."
+	 *
+	 * It is generally expected that most non-equal keys will have their
+	 * comparisons resolved at the primary level.  If enough comparisons can be
+	 * resolved with just 4 or 8 byte abbreviated keys, this optimization is
+	 * very effective (although if there are many tie-breakers that largely
+	 * only perform cheap memcmp() calls, that is also much faster than the
+	 * unoptimized case - see bttext_abbrev_abort()).
+	 *
 	 * We may need a collation-sensitive comparison.  To make things faster,
 	 * we'll figure out the collation based on the locale id and cache the
 	 * result.  Also, since strxfrm()/strcoll() require NUL-terminated inputs,
@@ -1788,13 +1825,47 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
 #endif
 	}
 
-	tss->buf1 = palloc(TEXTBUFLEN);
-	tss->buflen1 = TEXTBUFLEN;
-	tss->buf2 = palloc(TEXTBUFLEN);
-	tss->buflen2 = TEXTBUFLEN;
+	/*
+	 * If LC_COLLATE = C, we can make things quite a bit faster by using
+	 * memcmp() rather than strcoll().  To minimize the per-comparison
+	 * overhead, we make this decision just once for the whole sort.
+	 *
+	 * There is no reason to not at least perform fmgr elision on builds where
+	 * abbreviation is disabled.
+	 */
+	if (lc_collate_is_c(collid))
+		ssup->abbrev_full_comparator = ssup->comparator = bttextfastcmp_c;
+	else
+		ssup->abbrev_full_comparator = ssup->comparator = bttextfastcmp_locale;
 
-	ssup->ssup_extra = tss;
-	ssup->comparator = bttextfastcmp_locale;
+	if (!lc_collate_is_c(collid) || ssup->abbreviate)
+	{
+		/*
+		 * Abbreviated case requires temp buffers for strxfrm() copying.
+		 * bttextfastcmp_locale() also uses these buffers (even if abbreviation
+		 * isn't used), while bttextfast_c() does not.
+		 */
+		tss->buf1 = palloc(TEXTBUFLEN);
+		tss->buflen1 = TEXTBUFLEN;
+		tss->buf2 = palloc(TEXTBUFLEN);
+		tss->buflen2 = TEXTBUFLEN;
+		ssup->ssup_extra = tss;
+	}
+
+	if (!ssup->abbreviate)
+		return;
+
+	initHyperLogLog(&tss->abbr_card, 10);
+	initHyperLogLog(&tss->full_card, 10);
+
+	/*
+	 * Change comparator to be abbreviation-based -- abbreviated version will
+	 * probably ultimately be used during sorting proper, but core code may
+	 * switch back to authoritative comparator should abbreviation be aborted
+	 */
+	ssup->comparator = bttextcmp_abbrev;
+	ssup->abbrev_converter = bttext_abbrev_convert;
+	ssup->abbrev_abort = bttext_abbrev_abort;
 }
 
 /*
@@ -1903,6 +1974,225 @@ done:
 	return result;
 }
 
+/*
+ * Abbreviated key comparison func
+ */
+static int
+bttextcmp_abbrev(Datum x, Datum y, SortSupport ssup)
+{
+	char   *a = (char *) &x;
+	char   *b = (char *) &y;
+	int 	result;
+
+	result = memcmp(a, b, sizeof(Datum));
+
+	/*
+	 * When result = 0, the core system will call bttextfastcmp_c() or
+	 * bttextfastcmp_locale().  Even a strcmp() on two non-truncated strxfrm()
+	 * blobs cannot indicate *equality* authoritatively, for the same reason
+	 * that there is a strcoll() tie-breaker call to strcmp() in varstr_cmp().
+	 */
+	return result;
+}
+
+/*
+ * Conversion routine for sortsupport.  Converts original text to abbreviated
+ * key representation.  Our encoding strategy is simple -- pack the first 8
+ * bytes of a strxfrm() blob into a Datum.
+ */
+static Datum
+bttext_abbrev_convert(Datum original, SortSupport ssup)
+{
+	TextSortSupport	   *tss = (TextSortSupport *) ssup->ssup_extra;
+	text			   *authoritative = DatumGetTextPP(original);
+
+	/* working state */
+	Datum				res;
+	char			   *pres;
+	int					len;
+	Size				bsize;
+	uint32				hash;
+
+	/*
+	 * Abbreviated key representation is a pass-by-value Datum that is treated
+	 * as a char array by the specialized comparator bttextcmp_abbrev().
+	 */
+	pres = (char *) &res;
+	/* memset(), so any non-overwritten bytes are NUL */
+	memset(pres, 0, sizeof(Datum));
+	len = VARSIZE_ANY_EXHDR(authoritative);
+
+	/* By convention, we use buffer 1 to store and NUL-terminate text */
+	if (len >= tss->buflen1)
+	{
+		pfree(tss->buf1);
+		tss->buflen1 = Max(len + 1, Min(tss->buflen1 * 2, MaxAllocSize));
+		tss->buf1 = palloc(tss->buflen1);
+	}
+
+	/* Just like strcoll(), strxfrm() expects a NUL-terminated string */
+	memcpy(tss->buf1, VARDATA_ANY(authoritative), len);
+	tss->buf1[len] = '\0';
+
+	/* Don't leak memory here */
+	if (PointerGetDatum(authoritative) != original)
+		pfree(authoritative);
+
+retry:
+
+	/*
+	 * There is no special handling of the C locale here, unlike with
+	 * varstr_cmp().  strxfrm() is used indifferently.
+	 */
+#ifdef HAVE_LOCALE_T
+	if (tss->locale)
+		bsize = strxfrm_l(tss->buf2, tss->buf1, tss->buflen2, tss->locale);
+	else
+#endif
+		bsize = strxfrm(tss->buf2, tss->buf1, tss->buflen2);
+
+	if (bsize >= tss->buflen2)
+	{
+		/*
+		 * The C standard states that the contents of the buffer is now
+		 * unspecified.  Grow buffer, and retry.
+		 */
+		pfree(tss->buf2);
+		tss->buflen2 = Max(bsize + 1, Min(tss->buflen2 * 2, MaxAllocSize));
+		tss->buf2 = palloc(tss->buflen2);
+		goto retry;
+	}
+
+	/*
+	 * Maintain approximate cardinality of both abbreviated keys and original,
+	 * authoritative keys using HyperLogLog.  Used as cheap insurance against
+	 * the worst case, where we do many string transformations for no saving in
+	 * full strcoll()-based comparisons.  These statistics are used by
+	 * bttext_abbrev_abort().
+	 *
+	 * First, Hash key proper, or a significant fraction of it.  Mix in length
+	 * in order to compensate for cases where differences are past
+	 * CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
+	 */
+	hash = hash_any((unsigned char *) tss->buf1, Min(len, PG_CACHE_LINE_SIZE));
+
+	if (len > PG_CACHE_LINE_SIZE)
+		hash ^= DatumGetUInt32(hash_uint32((uint32) len));
+
+	addHyperLogLog(&tss->full_card, hash);
+
+	memcpy(pres, tss->buf2, Min(sizeof(Datum), bsize));
+
+	/* Hash abbreviated key */
+#if SIZEOF_DATUM == 8
+	{
+		uint32				lohalf,
+							hihalf;
+
+		lohalf = (uint32) res;
+		hihalf = (uint32) (res >> 32);
+		hash = hash_uint32(lohalf ^ hihalf);
+	}
+#else							/* SIZEOF_DATUM != 8 */
+	hash = hash_uint32((uint32) res);
+#endif
+
+	addHyperLogLog(&tss->abbr_card, hash);
+
+	/*
+	 * Every Datum byte is always compared.  This is safe because the strxfrm()
+	 * blob is itself NUL terminated, leaving no danger of misinterpreting any
+	 * NUL bytes not intended to be interpreted as logically representing
+	 * termination.
+	 */
+	return res;
+}
+
+/*
+ * Callback for estimating effectiveness of abbreviated key optimization, using
+ * heuristic rules.  Returns value indicating if the abbreviation optimization
+ * should be aborted, based on its projected effectiveness.
+ */
+static bool
+bttext_abbrev_abort(int memtupcount, SortSupport ssup)
+{
+	TextSortSupport	   *tss = (TextSortSupport *) ssup->ssup_extra;
+	double				abbrev_distinct, key_distinct;
+
+	Assert(ssup->abbreviate);
+
+	/* Have a little patience */
+	if (memtupcount < 20)
+		return false;
+
+	abbrev_distinct = estimateHyperLogLog(&tss->abbr_card);
+	key_distinct = estimateHyperLogLog(&tss->full_card);
+
+	/*
+	 * Clamp cardinality estimates to at least one distinct value.  While NULLs
+	 * are generally disregarded, if only NULL values were seen so far, that
+	 * might misrepresent costs if we failed to clamp.
+	 */
+	if (abbrev_distinct <= 1.0)
+		abbrev_distinct = 1.0;
+
+	if (key_distinct <= 1.0)
+		key_distinct = 1.0;
+
+	/*
+	 * In the worst case all abbreviated keys are identical, while at the same
+	 * time there are differences within full key strings not captured in
+	 * abbreviations.
+	 */
+#ifdef DEBUG_ABBREV_KEYS
+	{
+		double norm_abbrev_card = abbrev_distinct / (double) memtupcount;
+
+		elog(DEBUG_elog_output, "abbrev_distinct after %d: %f (key_distinct: %f, norm_abbrev_card: %f)",
+			 memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card);
+	}
+#endif
+
+	/*
+	 * If the number of distinct abbreviated keys approximately matches the
+	 * number of distinct authoritative original keys, that's reason enough to
+	 * proceed.  We can win even with a very low cardinality set if most
+	 * tie-breakers only memcmp().  This is by far the most important
+	 * consideration.
+	 *
+	 * While comparisons that are resolved at the abbreviated key level are
+	 * considerably cheaper than tie-breakers resolved with memcmp(), both of
+	 * those two outcomes are so much cheaper than a full strcoll() once
+	 * sorting is underway that it doesn't seem worth it to weigh abbreviated
+	 * cardinality against the overall size of the set in order to more
+	 * accurately model costs.  Assume that an abbreviated comparison, and an
+	 * abbreviated comparison with a cheap memcmp()-based authoritative
+	 * resolution are equivalent.
+	 */
+	if (abbrev_distinct > key_distinct * 0.05)
+		return false;
+
+	/*
+	 * Abort abbreviation strategy.
+	 *
+	 * The worst case, where all abbreviated keys are identical while all
+	 * original strings differ will typically only see a regression of about
+	 * 10% in execution time for small to medium sized lists of strings.
+	 * Whereas on modern CPUs where cache stalls are the dominant cost, we can
+	 * often expect very large improvements, particularly with sets of strings
+	 * of moderately high to high abbreviated cardinality.  There is little to
+	 * lose but much to gain, which our strategy reflects.
+	 */
+#ifdef DEBUG_ABBREV_KEYS
+	elog(DEBUG_elog_output, "would have aborted abbreviation due to worst-case at %d. abbrev_distinct: %f, key_distinct: %f",
+		 memtupcount, abbrev_distinct, key_distinct);
+	/* Actually abort only when debugging is disabled */
+	return false;
+#endif
+
+	return true;
+}
+
 Datum
 text_larger(PG_FUNCTION_ARGS)
 {
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 4aa3972841..6d3aa889bc 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -150,7 +150,10 @@ bool		optimize_bounded_sort = true;
  * When sorting single Datums, the data value is represented directly by
  * datum1/isnull1.  If the datatype is pass-by-reference and isnull1 is false,
  * then datum1 points to a separately palloc'd data value that is also pointed
- * to by the "tuple" pointer; otherwise "tuple" is NULL.
+ * to by the "tuple" pointer; otherwise "tuple" is NULL.  There is one special
+ * case:  when the sort support infrastructure provides an "abbreviated key"
+ * representation, where the key is (typically) a pass by value proxy for a
+ * pass by reference type.
  *
  * While building initial runs, tupindex holds the tuple's run number.  During
  * merge passes, we re-use it to hold the input tape number that each tuple in
@@ -346,6 +349,14 @@ struct Tuplesortstate
 	 */
 	SortSupport onlyKey;
 
+	/*
+	 * Additional state for managing "abbreviated key" sortsupport routines
+	 * (which currently may be used by all cases except the Datum sort case and
+	 * hash index case).  Tracks the intervals at which the optimization's
+	 * effectiveness is tested.
+	 */
+	int64		abbrevNext;		/* Tuple # at which to next check applicability */
+
 	/*
 	 * These variables are specific to the CLUSTER case; they are set by
 	 * tuplesort_begin_cluster.
@@ -442,6 +453,7 @@ struct Tuplesortstate
 
 static Tuplesortstate *tuplesort_begin_common(int workMem, bool randomAccess);
 static void puttuple_common(Tuplesortstate *state, SortTuple *tuple);
+static bool consider_abort_common(Tuplesortstate *state);
 static void inittapes(Tuplesortstate *state);
 static void selectnewtape(Tuplesortstate *state);
 static void mergeruns(Tuplesortstate *state);
@@ -619,6 +631,7 @@ tuplesort_begin_heap(TupleDesc tupDesc,
 	state->readtup = readtup_heap;
 
 	state->tupDesc = tupDesc;	/* assume we need not copy tupDesc */
+	state->abbrevNext = 10;
 
 	/* Prepare SortSupport data for each column */
 	state->sortKeys = (SortSupport) palloc0(nkeys * sizeof(SortSupportData));
@@ -634,11 +647,19 @@ tuplesort_begin_heap(TupleDesc tupDesc,
 		sortKey->ssup_collation = sortCollations[i];
 		sortKey->ssup_nulls_first = nullsFirstFlags[i];
 		sortKey->ssup_attno = attNums[i];
+		/* Convey if abbreviation optimization is applicable in principle */
+		sortKey->abbreviate = (i == 0);
 
 		PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey);
 	}
 
-	if (nkeys == 1)
+	/*
+	 * The "onlyKey" optimization cannot be used with abbreviated keys, since
+	 * tie-breaker comparisons may be required.  Typically, the optimization is
+	 * only of value to pass-by-value types anyway, whereas abbreviated keys
+	 * are typically only of value to pass-by-reference types.
+	 */
+	if (nkeys == 1 && !state->sortKeys->abbrev_converter)
 		state->onlyKey = state->sortKeys;
 
 	MemoryContextSwitchTo(oldcontext);
@@ -680,6 +701,7 @@ tuplesort_begin_cluster(TupleDesc tupDesc,
 	state->copytup = copytup_cluster;
 	state->writetup = writetup_cluster;
 	state->readtup = readtup_cluster;
+	state->abbrevNext = 10;
 
 	state->indexInfo = BuildIndexInfo(indexRel);
 
@@ -719,6 +741,8 @@ tuplesort_begin_cluster(TupleDesc tupDesc,
 		sortKey->ssup_nulls_first =
 			(scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
 		sortKey->ssup_attno = scanKey->sk_attno;
+		/* Convey if abbreviation optimization is applicable in principle */
+		sortKey->abbreviate = (i == 0);
 
 		AssertState(sortKey->ssup_attno != 0);
 
@@ -768,6 +792,7 @@ tuplesort_begin_index_btree(Relation heapRel,
 	state->copytup = copytup_index;
 	state->writetup = writetup_index;
 	state->readtup = readtup_index;
+	state->abbrevNext = 10;
 
 	state->heapRel = heapRel;
 	state->indexRel = indexRel;
@@ -791,6 +816,8 @@ tuplesort_begin_index_btree(Relation heapRel,
 		sortKey->ssup_nulls_first =
 			(scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
 		sortKey->ssup_attno = scanKey->sk_attno;
+		/* Convey if abbreviation optimization is applicable in principle */
+		sortKey->abbreviate = (i == 0);
 
 		AssertState(sortKey->ssup_attno != 0);
 
@@ -883,6 +910,13 @@ tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
 	state->onlyKey->ssup_cxt = CurrentMemoryContext;
 	state->onlyKey->ssup_collation = sortCollation;
 	state->onlyKey->ssup_nulls_first = nullsFirstFlag;
+	/*
+	 * Conversion to abbreviated representation infeasible in the Datum case.
+	 * It must be possible to subsequently fetch original datum values within
+	 * tuplesort_getdatum(), which would require special-case preservation of
+	 * original values.
+	 */
+	state->onlyKey->abbreviate = false;
 
 	PrepareSortSupportFromOrderingOp(sortOperator, state->onlyKey);
 
@@ -928,6 +962,19 @@ tuplesort_set_bound(Tuplesortstate *state, int64 bound)
 
 	state->bounded = true;
 	state->bound = (int) bound;
+
+	/*
+	 * Bounded sorts are not an effective target for abbreviated key
+	 * optimization.  Disable by setting state to be consistent with no
+	 * abbreviation support.
+	 */
+	state->sortKeys->abbrev_converter = NULL;
+	if (state->sortKeys->abbrev_full_comparator)
+		state->sortKeys->comparator = state->sortKeys->abbrev_full_comparator;
+
+	/* Not strictly necessary, but be tidy */
+	state->sortKeys->abbrev_abort = NULL;
+	state->sortKeys->abbrev_full_comparator = NULL;
 }
 
 /*
@@ -1186,15 +1233,63 @@ tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel,
 {
 	MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
 	SortTuple	stup;
+	Datum		original;
+	IndexTuple	tuple;
 
 	stup.tuple = index_form_tuple(RelationGetDescr(rel), values, isnull);
-	((IndexTuple) stup.tuple)->t_tid = *self;
+	tuple = ((IndexTuple) stup.tuple);
+	tuple->t_tid = *self;
 	USEMEM(state, GetMemoryChunkSpace(stup.tuple));
 	/* set up first-column key value */
-	stup.datum1 = index_getattr((IndexTuple) stup.tuple,
-								1,
-								RelationGetDescr(state->indexRel),
-								&stup.isnull1);
+	original = index_getattr(tuple,
+							 1,
+							 RelationGetDescr(state->indexRel),
+							 &stup.isnull1);
+
+	if (!state->sortKeys->abbrev_converter || stup.isnull1)
+	{
+		/*
+		 * Store ordinary Datum representation, or NULL value.  If there is a
+		 * converter it won't expect NULL values, and cost model is not
+		 * required to account for NULL, so in that case we avoid calling
+		 * converter and just set datum1 to "void" representation (to be
+		 * consistent).
+		 */
+		stup.datum1 = original;
+	}
+	else if (!consider_abort_common(state))
+	{
+		/* Store abbreviated key representation */
+		stup.datum1 = state->sortKeys->abbrev_converter(original,
+														state->sortKeys);
+	}
+	else
+	{
+		/* Abort abbreviation */
+		int		i;
+
+		stup.datum1 = original;
+
+		/*
+		 * Set state to be consistent with never trying abbreviation.
+		 *
+		 * Alter datum1 representation in already-copied tuples, so as to
+		 * ensure a consistent representation (current tuple was just handled).
+		 * Note that we rely on all tuples copied so far actually being
+		 * contained within memtuples array.
+		 */
+		for (i = 0; i < state->memtupcount; i++)
+		{
+			SortTuple *mtup = &state->memtuples[i];
+
+			tuple = mtup->tuple;
+			mtup->datum1 = index_getattr(tuple,
+										 1,
+										 RelationGetDescr(state->indexRel),
+										 &stup.isnull1);
+		}
+	}
+
 	puttuple_common(state, &stup);
 
 	MemoryContextSwitchTo(oldcontext);
@@ -1359,6 +1454,47 @@ puttuple_common(Tuplesortstate *state, SortTuple *tuple)
 	}
 }
 
+static bool
+consider_abort_common(Tuplesortstate *state)
+{
+	Assert(state->sortKeys[0].abbrev_converter != NULL);
+	Assert(state->sortKeys[0].abbrev_abort != NULL);
+	Assert(state->sortKeys[0].abbrev_full_comparator != NULL);
+
+	/*
+	 * Check effectiveness of abbreviation optimization.  Consider aborting
+	 * when still within memory limit.
+	 */
+	if (state->status == TSS_INITIAL &&
+		state->memtupcount >= state->abbrevNext)
+	{
+		state->abbrevNext *= 2;
+
+		/*
+		 * Check opclass-supplied abbreviation abort routine.  It may
+		 * indicate that abbreviation should not proceed.
+		 */
+		if (!state->sortKeys->abbrev_abort(state->memtupcount,
+										   state->sortKeys))
+			return false;
+
+		/*
+		 * Finally, restore authoritative comparator, and indicate that
+		 * abbreviation is not in play by setting abbrev_converter to NULL
+		 */
+		state->sortKeys[0].comparator = state->sortKeys[0].abbrev_full_comparator;
+		state->sortKeys[0].abbrev_converter = NULL;
+		/* Not strictly necessary, but be tidy */
+		state->sortKeys[0].abbrev_abort = NULL;
+		state->sortKeys[0].abbrev_full_comparator = NULL;
+
+		/* Give up - expect original pass-by-value representation */
+		return true;
+	}
+
+	return false;
+}
+
 /*
  * All tuples have been provided; finish the sort.
  */
@@ -2853,6 +2989,12 @@ comparetup_heap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
 	TupleDesc	tupDesc;
 	int			nkey;
 	int32		compare;
+	AttrNumber	attno;
+	Datum		datum1,
+				datum2;
+	bool		isnull1,
+				isnull2;
+
 
 	/* Compare the leading sort key */
 	compare = ApplySortComparator(a->datum1, a->isnull1,
@@ -2867,14 +3009,25 @@ comparetup_heap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
 	rtup.t_len = ((MinimalTuple) b->tuple)->t_len + MINIMAL_TUPLE_OFFSET;
 	rtup.t_data = (HeapTupleHeader) ((char *) b->tuple - MINIMAL_TUPLE_OFFSET);
 	tupDesc = state->tupDesc;
+
+	if (sortKey->abbrev_converter)
+	{
+		attno = sortKey->ssup_attno;
+
+		datum1 = heap_getattr(&ltup, attno, tupDesc, &isnull1);
+		datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2);
+
+		compare = ApplySortAbbrevFullComparator(datum1, isnull1,
+												datum2, isnull2,
+												sortKey);
+		if (compare != 0)
+			return compare;
+	}
+
 	sortKey++;
 	for (nkey = 1; nkey < state->nKeys; nkey++, sortKey++)
 	{
-		AttrNumber	attno = sortKey->ssup_attno;
-		Datum		datum1,
-					datum2;
-		bool		isnull1,
-					isnull2;
+		attno = sortKey->ssup_attno;
 
 		datum1 = heap_getattr(&ltup, attno, tupDesc, &isnull1);
 		datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2);
@@ -2897,6 +3050,7 @@ copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup)
 	 * MinimalTuple using the exported interface for that.
 	 */
 	TupleTableSlot *slot = (TupleTableSlot *) tup;
+	Datum			original;
 	MinimalTuple tuple;
 	HeapTupleData htup;
 
@@ -2907,10 +3061,58 @@ copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup)
 	/* set up first-column key value */
 	htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET;
 	htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET);
-	stup->datum1 = heap_getattr(&htup,
-								state->sortKeys[0].ssup_attno,
-								state->tupDesc,
-								&stup->isnull1);
+	original = heap_getattr(&htup,
+							state->sortKeys[0].ssup_attno,
+							state->tupDesc,
+							&stup->isnull1);
+
+	if (!state->sortKeys->abbrev_converter || stup->isnull1)
+	{
+		/*
+		 * Store ordinary Datum representation, or NULL value.  If there is a
+		 * converter it won't expect NULL values, and cost model is not
+		 * required to account for NULL, so in that case we avoid calling
+		 * converter and just set datum1 to "void" representation (to be
+		 * consistent).
+		 */
+		stup->datum1 = original;
+	}
+	else if (!consider_abort_common(state))
+	{
+		/* Store abbreviated key representation */
+		stup->datum1 = state->sortKeys->abbrev_converter(original,
+														 state->sortKeys);
+	}
+	else
+	{
+		/* Abort abbreviation */
+		int		i;
+
+		stup->datum1 = original;
+
+		/*
+		 * Set state to be consistent with never trying abbreviation.
+		 *
+		 * Alter datum1 representation in already-copied tuples, so as to
+		 * ensure a consistent representation (current tuple was just handled).
+		 * Note that we rely on all tuples copied so far actually being
+		 * contained within memtuples array.
+		 */
+		for (i = 0; i < state->memtupcount; i++)
+		{
+			SortTuple *mtup = &state->memtuples[i];
+
+			htup.t_len = ((MinimalTuple) mtup->tuple)->t_len +
+							MINIMAL_TUPLE_OFFSET;
+			htup.t_data = (HeapTupleHeader) ((char *) mtup->tuple -
+							MINIMAL_TUPLE_OFFSET);
+
+			mtup->datum1 = heap_getattr(&htup,
+										state->sortKeys[0].ssup_attno,
+										state->tupDesc,
+										&mtup->isnull1);
+		}
+	}
 }
 
 static void
@@ -2980,13 +3182,35 @@ comparetup_cluster(const SortTuple *a, const SortTuple *b,
 	TupleDesc	tupDesc;
 	int			nkey;
 	int32		compare;
+	Datum		datum1,
+				datum2;
+	bool		isnull1,
+				isnull2;
+	AttrNumber	leading = state->indexInfo->ii_KeyAttrNumbers[0];
+
+	/* Be prepared to compare additional sort keys */
+	ltup = (HeapTuple) a->tuple;
+	rtup = (HeapTuple) b->tuple;
+	tupDesc = state->tupDesc;
 
 	/* Compare the leading sort key, if it's simple */
-	if (state->indexInfo->ii_KeyAttrNumbers[0] != 0)
+	if (leading != 0)
 	{
 		compare = ApplySortComparator(a->datum1, a->isnull1,
 									  b->datum1, b->isnull1,
 									  sortKey);
+		if (compare != 0)
+			return compare;
+
+		if (sortKey->abbrev_converter)
+		{
+			datum1 = heap_getattr(ltup, leading, tupDesc, &isnull1);
+			datum2 = heap_getattr(rtup, leading, tupDesc, &isnull2);
+
+			compare = ApplySortAbbrevFullComparator(datum1, isnull1,
+													datum2, isnull2,
+													sortKey);
+		}
 		if (compare != 0 || state->nKeys == 1)
 			return compare;
 		/* Compare additional columns the hard way */
@@ -2999,22 +3223,13 @@ comparetup_cluster(const SortTuple *a, const SortTuple *b,
 		nkey = 0;
 	}
 
-	/* Compare additional sort keys */
-	ltup = (HeapTuple) a->tuple;
-	rtup = (HeapTuple) b->tuple;
-
 	if (state->indexInfo->ii_Expressions == NULL)
 	{
 		/* If not expression index, just compare the proper heap attrs */
-		tupDesc = state->tupDesc;
 
 		for (; nkey < state->nKeys; nkey++, sortKey++)
 		{
 			AttrNumber	attno = state->indexInfo->ii_KeyAttrNumbers[nkey];
-			Datum		datum1,
-						datum2;
-			bool		isnull1,
-						isnull2;
 
 			datum1 = heap_getattr(ltup, attno, tupDesc, &isnull1);
 			datum2 = heap_getattr(rtup, attno, tupDesc, &isnull2);
@@ -3072,17 +3287,67 @@ static void
 copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup)
 {
 	HeapTuple	tuple = (HeapTuple) tup;
+	Datum		original;
 
 	/* copy the tuple into sort storage */
 	tuple = heap_copytuple(tuple);
 	stup->tuple = (void *) tuple;
 	USEMEM(state, GetMemoryChunkSpace(tuple));
-	/* set up first-column key value, if it's a simple column */
-	if (state->indexInfo->ii_KeyAttrNumbers[0] != 0)
-		stup->datum1 = heap_getattr(tuple,
-									state->indexInfo->ii_KeyAttrNumbers[0],
-									state->tupDesc,
-									&stup->isnull1);
+	/*
+	 * set up first-column key value, and potentially abbreviate, if it's a
+	 * simple column
+	 */
+	if (state->indexInfo->ii_KeyAttrNumbers[0] == 0)
+		return;
+
+	original = heap_getattr(tuple,
+							state->indexInfo->ii_KeyAttrNumbers[0],
+							state->tupDesc,
+							&stup->isnull1);
+
+	if (!state->sortKeys->abbrev_converter || stup->isnull1)
+	{
+		/*
+		 * Store ordinary Datum representation, or NULL value.  If there is a
+		 * converter it won't expect NULL values, and cost model is not
+		 * required to account for NULL, so in that case we avoid calling
+		 * converter and just set datum1 to "void" representation (to be
+		 * consistent).
+		 */
+		stup->datum1 = original;
+	}
+	else if (!consider_abort_common(state))
+	{
+		/* Store abbreviated key representation */
+		stup->datum1 = state->sortKeys->abbrev_converter(original,
+														 state->sortKeys);
+	}
+	else
+	{
+		/* Abort abbreviation */
+		int		i;
+
+		stup->datum1 = original;
+
+		/*
+		 * Set state to be consistent with never trying abbreviation.
+		 *
+		 * Alter datum1 representation in already-copied tuples, so as to
+		 * ensure a consistent representation (current tuple was just handled).
+		 * Note that we rely on all tuples copied so far actually being
+		 * contained within memtuples array.
+		 */
+		for (i = 0; i < state->memtupcount; i++)
+		{
+			SortTuple *mtup = &state->memtuples[i];
+
+			tuple = (HeapTuple) mtup->tuple;
+			mtup->datum1 = heap_getattr(tuple,
+										state->indexInfo->ii_KeyAttrNumbers[0],
+										state->tupDesc,
+										&stup->isnull1);
+		}
+	}
 }
 
 static void
@@ -3162,6 +3427,11 @@ comparetup_index_btree(const SortTuple *a, const SortTuple *b,
 	bool		equal_hasnull = false;
 	int			nkey;
 	int32		compare;
+	Datum		datum1,
+				datum2;
+	bool		isnull1,
+				isnull2;
+
 
 	/* Compare the leading sort key */
 	compare = ApplySortComparator(a->datum1, a->isnull1,
@@ -3170,23 +3440,31 @@ comparetup_index_btree(const SortTuple *a, const SortTuple *b,
 	if (compare != 0)
 		return compare;
 
-	/* they are equal, so we only need to examine one null flag */
-	if (a->isnull1)
-		equal_hasnull = true;
-
 	/* Compare additional sort keys */
 	tuple1 = (IndexTuple) a->tuple;
 	tuple2 = (IndexTuple) b->tuple;
 	keysz = state->nKeys;
 	tupDes = RelationGetDescr(state->indexRel);
+
+	if (sortKey->abbrev_converter)
+	{
+		datum1 = index_getattr(tuple1, 1, tupDes, &isnull1);
+		datum2 = index_getattr(tuple2, 1, tupDes, &isnull2);
+
+		compare = ApplySortAbbrevFullComparator(datum1, isnull1,
+												datum2, isnull2,
+												sortKey);
+		if (compare != 0)
+			return compare;
+	}
+
+	/* they are equal, so we only need to examine one null flag */
+	if (a->isnull1)
+		equal_hasnull = true;
+
 	sortKey++;
 	for (nkey = 2; nkey <= keysz; nkey++, sortKey++)
 	{
-		Datum		datum1,
-					datum2;
-		bool		isnull1,
-					isnull2;
-
 		datum1 = index_getattr(tuple1, nkey, tupDes, &isnull1);
 		datum2 = index_getattr(tuple2, nkey, tupDes, &isnull2);
 
@@ -3313,6 +3591,7 @@ copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup)
 	IndexTuple	tuple = (IndexTuple) tup;
 	unsigned int tuplen = IndexTupleSize(tuple);
 	IndexTuple	newtuple;
+	Datum		original;
 
 	/* copy the tuple into sort storage */
 	newtuple = (IndexTuple) palloc(tuplen);
@@ -3320,10 +3599,54 @@ copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup)
 	USEMEM(state, GetMemoryChunkSpace(newtuple));
 	stup->tuple = (void *) newtuple;
 	/* set up first-column key value */
-	stup->datum1 = index_getattr(newtuple,
-								 1,
-								 RelationGetDescr(state->indexRel),
-								 &stup->isnull1);
+	original = index_getattr(newtuple,
+							 1,
+							 RelationGetDescr(state->indexRel),
+							 &stup->isnull1);
+
+	if (!state->sortKeys->abbrev_converter || stup->isnull1)
+	{
+		/*
+		 * Store ordinary Datum representation, or NULL value.  If there is a
+		 * converter it won't expect NULL values, and cost model is not
+		 * required to account for NULL, so in that case we avoid calling
+		 * converter and just set datum1 to "void" representation (to be
+		 * consistent).
+		 */
+		stup->datum1 = original;
+	}
+	else if (!consider_abort_common(state))
+	{
+		/* Store abbreviated key representation */
+		stup->datum1 = state->sortKeys->abbrev_converter(original,
+														 state->sortKeys);
+	}
+	else
+	{
+		/* Abort abbreviation */
+		int		i;
+
+		stup->datum1 = original;
+
+		/*
+		 * Set state to be consistent with never trying abbreviation.
+		 *
+		 * Alter datum1 representation in already-copied tuples, so as to
+		 * ensure a consistent representation (current tuple was just handled).
+		 * Note that we rely on all tuples copied so far actually being
+		 * contained within memtuples array.
+		 */
+		for (i = 0; i < state->memtupcount; i++)
+		{
+			SortTuple *mtup = &state->memtuples[i];
+
+			tuple = (IndexTuple) mtup->tuple;
+			mtup->datum1 = index_getattr(tuple,
+										 1,
+										 RelationGetDescr(state->indexRel),
+										 &stup->isnull1);
+		}
+	}
 }
 
 static void
diff --git a/src/include/lib/hyperloglog.h b/src/include/lib/hyperloglog.h
new file mode 100644
index 0000000000..a6cbffc4c3
--- /dev/null
+++ b/src/include/lib/hyperloglog.h
@@ -0,0 +1,67 @@
+/*
+ * hyperloglog.h
+ *
+ * A simple HyperLogLog cardinality estimator implementation
+ *
+ * Portions Copyright (c) 2014, PostgreSQL Global Development Group
+ *
+ * Based on Hideaki Ohno's C++ implementation.  The copyright terms of Ohno's
+ * original version (the MIT license) follow.
+ *
+ * src/include/lib/hyperloglog.h
+ */
+
+/*
+ * Copyright (c) 2013 Hideaki Ohno <hide.o.j55{at}gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the 'Software'), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef HYPERLOGLOG_H
+#define HYPERLOGLOG_H
+
+/*
+ * HyperLogLog is an approximate technique for computing the number of distinct
+ * entries in a set.  Importantly, it does this by using a fixed amount of
+ * memory.  See the 2007 paper "HyperLogLog: the analysis of a near-optimal
+ * cardinality estimation algorithm" for more.
+ *
+ * hyperLogLogState
+ *
+ *		registerWidth		register width, in bits ("k")
+ *		nRegisters			number of registers
+ *		alphaMM				alpha * m ^ 2 (see initHyperLogLog())
+ *		hashesArr			array of hashes
+ *		arrSize				size of hashesArr
+ */
+typedef struct hyperLogLogState
+{
+	uint8		registerWidth;
+	Size		nRegisters;
+	double		alphaMM;
+	uint8	   *hashesArr;
+	Size		arrSize;
+} hyperLogLogState;
+
+extern void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth);
+extern void	addHyperLogLog(hyperLogLogState *cState, uint32 hash);
+extern double estimateHyperLogLog(hyperLogLogState *cState);
+extern void mergeHyperLogLog(hyperLogLogState *cState, const hyperLogLogState *oState);
+
+#endif   /* HYPERLOGLOG_H */
diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
index cb35697ad0..5cfc0ae1e8 100644
--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -214,13 +214,13 @@
 #endif
 
 /*
- * Assumed cache line size. This doesn't affect correctness, but can be
- * used for low-level optimizations. Currently, this is only used to pad
- * some data structures in xlog.c, to ensure that highly-contended fields
- * are on different cache lines. Too small a value can hurt performance due
- * to false sharing, while the only downside of too large a value is a few
- * bytes of wasted memory. The default is 128, which should be large enough
- * for all supported platforms.
+ * Assumed cache line size. This doesn't affect correctness, but can be used
+ * for low-level optimizations. Currently, this is used to pad some data
+ * structures in xlog.c, to ensure that highly-contended fields are on
+ * different cache lines. Too small a value can hurt performance due to false
+ * sharing, while the only downside of too large a value is a few bytes of
+ * wasted memory. The default is 128, which should be large enough for all
+ * supported platforms.
  */
 #define PG_CACHE_LINE_SIZE		128
 
diff --git a/src/include/utils/sortsupport.h b/src/include/utils/sortsupport.h
index f379bb4702..62fedfaaad 100644
--- a/src/include/utils/sortsupport.h
+++ b/src/include/utils/sortsupport.h
@@ -21,7 +21,12 @@
  * required to provide all of them.  The BTSORTSUPPORT function should
  * simply not set any function pointers for mechanisms it doesn't support.
  * Opclasses that provide BTSORTSUPPORT and don't provide a comparator
- * function will have a shim set up by sort support automatically.
+ * function will have a shim set up by sort support automatically.  However,
+ * opclasses that support the optional additional abbreviated key capability
+ * must always provide an authoritative comparator used to tie-break
+ * inconclusive abbreviated comparisons and also used  when aborting
+ * abbreviation.  Furthermore, a converter and abort/costing function must be
+ * provided.
  *
  * All sort support functions will be passed the address of the
  * SortSupportData struct when called, so they can use it to store
@@ -93,12 +98,96 @@ typedef struct SortSupportData
 	 * than, equal to, or greater than y.  Note that x and y are guaranteed
 	 * not null, and there is no way to return null either.  Do not return
 	 * INT_MIN, as callers are allowed to negate the result before using it.
+	 *
+	 * This may be either the authoritative comparator, or the abbreviated
+	 * comparator.  Core code may switch this over the initial preference of an
+	 * opclass support function despite originally indicating abbreviation was
+	 * applicable, by assigning the authoritative comparator back.
 	 */
 	int			(*comparator) (Datum x, Datum y, SortSupport ssup);
 
 	/*
-	 * Additional sort-acceleration functions might be added here later.
+	 * "Abbreviated key" infrastructure follows.
+	 *
+	 * All callbacks must be set by sortsupport opclasses that make use of this
+	 * optional additional infrastructure (unless for whatever reasons the
+	 * opclass doesn't proceed with abbreviation, in which case
+	 * abbrev_converter must not be set).
+	 *
+	 * This allows opclass authors to supply a conversion routine, used to
+	 * create an alternative representation of the underlying type (an
+	 * "abbreviated key").  Typically, this representation is an ad-hoc,
+	 * pass-by-value Datum format that only the opclass has knowledge of.  An
+	 * alternative comparator, used only with this alternative representation
+	 * must also be provided (which is assigned to "comparator").  This
+	 * representation is a simple approximation of the original Datum.  It must
+	 * be possible to compare datums of this representation with each other
+	 * using the supplied alternative comparator, and have any non-zero return
+	 * value be a reliable proxy for what a proper comparison would indicate.
+	 * Returning zero from the alternative comparator does not indicate
+	 * equality, as with a conventional support routine 1, though -- it
+	 * indicates that it wasn't possible to determine how the two abbreviated
+	 * values compared.  A proper comparison, using "abbrev_full_comparator"/
+	 * ApplySortAbbrevFullComparator() is therefore required.  In many cases
+	 * this results in most or all comparisons only using the cheap alternative
+	 * comparison func, which is typically implemented as code that compiles to
+	 * just a few CPU instructions.  CPU cache miss penalties are expensive; to
+	 * get good overall performance, sort infrastructure must heavily weigh
+	 * cache performance.
+	 *
+	 * Opclass authors must consider the final cardinality of abbreviated keys
+	 * when devising an encoding scheme.  It's possible for a strategy to work
+	 * better than an alternative strategy with one usage pattern, while the
+	 * reverse might be true for another usage pattern.  All of these factors
+	 * must be considered.
 	 */
+
+	/*
+	 * "abbreviate" concerns whether or not the abbreviated key optimization is
+	 * applicable in principle (that is, the sortsupport routine needs to know
+	 * if its dealing with a key where an abbreviated representation can
+	 * usefully be packed together.  Conventionally, this is the leading
+	 * attribute key).  Note, however, that in order to determine that
+	 * abbreviation is not in play, the core code always checks whether or not
+	 * the opclass has set abbrev_converter.  This is a one way, one time
+	 * message to the opclass.
+	 */
+	bool			abbreviate;
+
+	/*
+	 * Converter to abbreviated format, from original representation.  Core
+	 * code uses this callback to convert from a pass-by-reference "original"
+	 * Datum to a pass-by-value abbreviated key Datum.  Note that original is
+	 * guaranteed NOT NULL, because it doesn't make sense to factor NULLness
+	 * into ad-hoc cost model.
+	 *
+	 * abbrev_converter is tested to see if abbreviation is in play.  Core code
+	 * may set it to NULL to indicate abbreviation should not be used (which is
+	 * something sortsupport routines need not concern themselves with).
+	 * However, sortsupport routines must not set it when it is immediately
+	 * established that abbreviation should not proceed (for abbreviation
+	 * calls, or platform-specific impediments to using abbreviation).
+	 */
+	Datum			(*abbrev_converter) (Datum original, SortSupport ssup);
+
+	/*
+	 * abbrev_abort callback allows clients to verify that the current strategy
+	 * is working out, using a sortsupport routine defined ad-hoc cost model.
+	 * If there is a lot of duplicate abbreviated keys in practice, it's useful
+	 * to be able to abandon the strategy before paying too high a cost in
+	 * conversion (perhaps certain opclass-specific adaptations are useful
+	 * too).
+	 */
+	bool			(*abbrev_abort) (int memtupcount, SortSupport ssup);
+
+	/*
+	 * Full, authoritative comparator for key that an abbreviated
+	 * representation was generated for, used when an abbreviated comparison
+	 * was inconclusive (by calling ApplySortComparatorFull()), or used to
+	 * replace "comparator" when core system ultimately decides against
+	 * abbreviation.
+	 */
+	int			(*abbrev_full_comparator) (Datum x, Datum y, SortSupport ssup);
 } SortSupportData;
 
 
@@ -110,6 +199,9 @@ typedef struct SortSupportData
 extern int ApplySortComparator(Datum datum1, bool isNull1,
 					Datum datum2, bool isNull2,
 					SortSupport ssup);
+extern int ApplySortAbbrevFullComparator(Datum datum1, bool isNull1,
+							Datum datum2, bool isNull2,
+							SortSupport ssup);
 #endif   /* !PG_USE_INLINE */
 #if defined(PG_USE_INLINE) || defined(SORTSUPPORT_INCLUDE_DEFINITIONS)
 /*
@@ -148,6 +240,44 @@ ApplySortComparator(Datum datum1, bool isNull1,
 
 	return compare;
 }
+
+/*
+ * Apply a sort comparator function and return a 3-way comparison using full,
+ * authoritative comparator.  This takes care of handling reverse-sort and
+ * NULLs-ordering properly.
+ */
+STATIC_IF_INLINE int
+ApplySortAbbrevFullComparator(Datum datum1, bool isNull1,
+							  Datum datum2, bool isNull2,
+							  SortSupport ssup)
+{
+	int			compare;
+
+	if (isNull1)
+	{
+		if (isNull2)
+			compare = 0;		/* NULL "=" NULL */
+		else if (ssup->ssup_nulls_first)
+			compare = -1;		/* NULL "<" NOT_NULL */
+		else
+			compare = 1;		/* NULL ">" NOT_NULL */
+	}
+	else if (isNull2)
+	{
+		if (ssup->ssup_nulls_first)
+			compare = 1;		/* NOT_NULL ">" NULL */
+		else
+			compare = -1;		/* NOT_NULL "<" NULL */
+	}
+	else
+	{
+		compare = (*ssup->abbrev_full_comparator) (datum1, datum2, ssup);
+		if (ssup->ssup_reverse)
+			compare = -compare;
+	}
+
+	return compare;
+}
 #endif   /*-- PG_USE_INLINE || SORTSUPPORT_INCLUDE_DEFINITIONS */
 
 /* Other functions in utils/sort/sortsupport.c */