diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c
index 6a058ccdac..31717321b0 100644
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -145,6 +145,7 @@ static void bt_tuple_present_callback(Relation index, ItemPointer tid,
 									  bool tupleIsAlive, void *checkstate);
 static IndexTuple bt_normalize_tuple(BtreeCheckState *state,
 									 IndexTuple itup);
+static inline IndexTuple bt_posting_plain_tuple(IndexTuple itup, int n);
 static bool bt_rootdescend(BtreeCheckState *state, IndexTuple itup);
 static inline bool offset_is_negative_infinity(BTPageOpaque opaque,
 											   OffsetNumber offset);
@@ -167,6 +168,7 @@ static ItemId PageGetItemIdCareful(BtreeCheckState *state, BlockNumber block,
 								   Page page, OffsetNumber offset);
 static inline ItemPointer BTreeTupleGetHeapTIDCareful(BtreeCheckState *state,
 													  IndexTuple itup, bool nonpivot);
+static inline ItemPointer BTreeTupleGetPointsToTID(IndexTuple itup);
 
 /*
  * bt_index_check(index regclass, heapallindexed boolean)
@@ -278,7 +280,8 @@ bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed,
 
 	if (btree_index_mainfork_expected(indrel))
 	{
-		bool	heapkeyspace;
+		bool		heapkeyspace,
+					allequalimage;
 
 		RelationOpenSmgr(indrel);
 		if (!smgrexists(indrel->rd_smgr, MAIN_FORKNUM))
@@ -288,7 +291,7 @@ bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed,
 							RelationGetRelationName(indrel))));
 
 		/* Check index, possibly against table it is an index on */
-		heapkeyspace = _bt_heapkeyspace(indrel);
+		_bt_metaversion(indrel, &heapkeyspace, &allequalimage);
 		bt_check_every_level(indrel, heaprel, heapkeyspace, parentcheck,
 							 heapallindexed, rootdescend);
 	}
@@ -419,12 +422,12 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
 		/*
 		 * Size Bloom filter based on estimated number of tuples in index,
 		 * while conservatively assuming that each block must contain at least
-		 * MaxIndexTuplesPerPage / 5 non-pivot tuples.  (Non-leaf pages cannot
-		 * contain non-pivot tuples.  That's okay because they generally make
-		 * up no more than about 1% of all pages in the index.)
+		 * MaxTIDsPerBTreePage / 3 "plain" tuples -- see
+		 * bt_posting_plain_tuple() for definition, and details of how posting
+		 * list tuples are handled.
 		 */
 		total_pages = RelationGetNumberOfBlocks(rel);
-		total_elems = Max(total_pages * (MaxIndexTuplesPerPage / 5),
+		total_elems = Max(total_pages * (MaxTIDsPerBTreePage / 3),
 						  (int64) state->rel->rd_rel->reltuples);
 		/* Random seed relies on backend srandom() call to avoid repetition */
 		seed = random();
@@ -924,6 +927,7 @@ bt_target_page_check(BtreeCheckState *state)
 		size_t		tupsize;
 		BTScanInsert skey;
 		bool		lowersizelimit;
+		ItemPointer scantid;
 
 		CHECK_FOR_INTERRUPTS();
 
@@ -954,13 +958,15 @@ bt_target_page_check(BtreeCheckState *state)
 		if (!_bt_check_natts(state->rel, state->heapkeyspace, state->target,
 							 offset))
 		{
+			ItemPointer tid;
 			char	   *itid,
 					   *htid;
 
 			itid = psprintf("(%u,%u)", state->targetblock, offset);
+			tid = BTreeTupleGetPointsToTID(itup);
 			htid = psprintf("(%u,%u)",
-							ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)),
-							ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid)));
+							ItemPointerGetBlockNumberNoCheck(tid),
+							ItemPointerGetOffsetNumberNoCheck(tid));
 
 			ereport(ERROR,
 					(errcode(ERRCODE_INDEX_CORRUPTED),
@@ -994,18 +1000,20 @@ bt_target_page_check(BtreeCheckState *state)
 
 		/*
 		 * Readonly callers may optionally verify that non-pivot tuples can
-		 * each be found by an independent search that starts from the root
+		 * each be found by an independent search that starts from the root.
+		 * Note that we deliberately don't do individual searches for each
+		 * TID, since the posting list itself is validated by other checks.
 		 */
 		if (state->rootdescend && P_ISLEAF(topaque) &&
 			!bt_rootdescend(state, itup))
 		{
+			ItemPointer tid = BTreeTupleGetPointsToTID(itup);
 			char	   *itid,
 					   *htid;
 
 			itid = psprintf("(%u,%u)", state->targetblock, offset);
-			htid = psprintf("(%u,%u)",
-							ItemPointerGetBlockNumber(&(itup->t_tid)),
-							ItemPointerGetOffsetNumber(&(itup->t_tid)));
+			htid = psprintf("(%u,%u)", ItemPointerGetBlockNumber(tid),
+							ItemPointerGetOffsetNumber(tid));
 
 			ereport(ERROR,
 					(errcode(ERRCODE_INDEX_CORRUPTED),
@@ -1017,6 +1025,40 @@ bt_target_page_check(BtreeCheckState *state)
 										(uint32) state->targetlsn)));
 		}
 
+		/*
+		 * If tuple is a posting list tuple, make sure posting list TIDs are
+		 * in order
+		 */
+		if (BTreeTupleIsPosting(itup))
+		{
+			ItemPointerData last;
+			ItemPointer current;
+
+			ItemPointerCopy(BTreeTupleGetHeapTID(itup), &last);
+
+			for (int i = 1; i < BTreeTupleGetNPosting(itup); i++)
+			{
+
+				current = BTreeTupleGetPostingN(itup, i);
+
+				if (ItemPointerCompare(current, &last) <= 0)
+				{
+					char	   *itid = psprintf("(%u,%u)", state->targetblock, offset);
+
+					ereport(ERROR,
+							(errcode(ERRCODE_INDEX_CORRUPTED),
+							 errmsg_internal("posting list contains misplaced TID in index \"%s\"",
+											 RelationGetRelationName(state->rel)),
+							 errdetail_internal("Index tid=%s posting list offset=%d page lsn=%X/%X.",
+												itid, i,
+												(uint32) (state->targetlsn >> 32),
+												(uint32) state->targetlsn)));
+				}
+
+				ItemPointerCopy(current, &last);
+			}
+		}
+
 		/* Build insertion scankey for current page offset */
 		skey = bt_mkscankey_pivotsearch(state->rel, itup);
 
@@ -1049,13 +1091,14 @@ bt_target_page_check(BtreeCheckState *state)
 		if (tupsize > (lowersizelimit ? BTMaxItemSize(state->target) :
 					   BTMaxItemSizeNoHeapTid(state->target)))
 		{
+			ItemPointer tid = BTreeTupleGetPointsToTID(itup);
 			char	   *itid,
 					   *htid;
 
 			itid = psprintf("(%u,%u)", state->targetblock, offset);
 			htid = psprintf("(%u,%u)",
-							ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)),
-							ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid)));
+							ItemPointerGetBlockNumberNoCheck(tid),
+							ItemPointerGetOffsetNumberNoCheck(tid));
 
 			ereport(ERROR,
 					(errcode(ERRCODE_INDEX_CORRUPTED),
@@ -1074,12 +1117,32 @@ bt_target_page_check(BtreeCheckState *state)
 		{
 			IndexTuple	norm;
 
-			norm = bt_normalize_tuple(state, itup);
-			bloom_add_element(state->filter, (unsigned char *) norm,
-							  IndexTupleSize(norm));
-			/* Be tidy */
-			if (norm != itup)
-				pfree(norm);
+			if (BTreeTupleIsPosting(itup))
+			{
+				/* Fingerprint all elements as distinct "plain" tuples */
+				for (int i = 0; i < BTreeTupleGetNPosting(itup); i++)
+				{
+					IndexTuple	logtuple;
+
+					logtuple = bt_posting_plain_tuple(itup, i);
+					norm = bt_normalize_tuple(state, logtuple);
+					bloom_add_element(state->filter, (unsigned char *) norm,
+									  IndexTupleSize(norm));
+					/* Be tidy */
+					if (norm != logtuple)
+						pfree(norm);
+					pfree(logtuple);
+				}
+			}
+			else
+			{
+				norm = bt_normalize_tuple(state, itup);
+				bloom_add_element(state->filter, (unsigned char *) norm,
+								  IndexTupleSize(norm));
+				/* Be tidy */
+				if (norm != itup)
+					pfree(norm);
+			}
 		}
 
 		/*
@@ -1087,7 +1150,8 @@ bt_target_page_check(BtreeCheckState *state)
 		 *
 		 * If there is a high key (if this is not the rightmost page on its
 		 * entire level), check that high key actually is upper bound on all
-		 * page items.
+		 * page items.  If this is a posting list tuple, we'll need to set
+		 * scantid to be highest TID in posting list.
 		 *
 		 * We prefer to check all items against high key rather than checking
 		 * just the last and trusting that the operator class obeys the
@@ -1127,17 +1191,22 @@ bt_target_page_check(BtreeCheckState *state)
 		 * tuple. (See also: "Notes About Data Representation" in the nbtree
 		 * README.)
 		 */
+		scantid = skey->scantid;
+		if (state->heapkeyspace && BTreeTupleIsPosting(itup))
+			skey->scantid = BTreeTupleGetMaxHeapTID(itup);
+
 		if (!P_RIGHTMOST(topaque) &&
 			!(P_ISLEAF(topaque) ? invariant_leq_offset(state, skey, P_HIKEY) :
 			  invariant_l_offset(state, skey, P_HIKEY)))
 		{
+			ItemPointer tid = BTreeTupleGetPointsToTID(itup);
 			char	   *itid,
 					   *htid;
 
 			itid = psprintf("(%u,%u)", state->targetblock, offset);
 			htid = psprintf("(%u,%u)",
-							ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)),
-							ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid)));
+							ItemPointerGetBlockNumberNoCheck(tid),
+							ItemPointerGetOffsetNumberNoCheck(tid));
 
 			ereport(ERROR,
 					(errcode(ERRCODE_INDEX_CORRUPTED),
@@ -1150,6 +1219,8 @@ bt_target_page_check(BtreeCheckState *state)
 										(uint32) (state->targetlsn >> 32),
 										(uint32) state->targetlsn)));
 		}
+		/* Reset, in case scantid was set to (itup) posting tuple's max TID */
+		skey->scantid = scantid;
 
 		/*
 		 * * Item order check *
@@ -1160,15 +1231,17 @@ bt_target_page_check(BtreeCheckState *state)
 		if (OffsetNumberNext(offset) <= max &&
 			!invariant_l_offset(state, skey, OffsetNumberNext(offset)))
 		{
+			ItemPointer tid;
 			char	   *itid,
 					   *htid,
 					   *nitid,
 					   *nhtid;
 
 			itid = psprintf("(%u,%u)", state->targetblock, offset);
+			tid = BTreeTupleGetPointsToTID(itup);
 			htid = psprintf("(%u,%u)",
-							ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)),
-							ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid)));
+							ItemPointerGetBlockNumberNoCheck(tid),
+							ItemPointerGetOffsetNumberNoCheck(tid));
 			nitid = psprintf("(%u,%u)", state->targetblock,
 							 OffsetNumberNext(offset));
 
@@ -1177,9 +1250,10 @@ bt_target_page_check(BtreeCheckState *state)
 										  state->target,
 										  OffsetNumberNext(offset));
 			itup = (IndexTuple) PageGetItem(state->target, itemid);
+			tid = BTreeTupleGetPointsToTID(itup);
 			nhtid = psprintf("(%u,%u)",
-							 ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)),
-							 ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid)));
+							 ItemPointerGetBlockNumberNoCheck(tid),
+							 ItemPointerGetOffsetNumberNoCheck(tid));
 
 			ereport(ERROR,
 					(errcode(ERRCODE_INDEX_CORRUPTED),
@@ -1953,10 +2027,9 @@ bt_tuple_present_callback(Relation index, ItemPointer tid, Datum *values,
  * verification.  In particular, it won't try to normalize opclass-equal
  * datums with potentially distinct representations (e.g., btree/numeric_ops
  * index datums will not get their display scale normalized-away here).
- * Normalization may need to be expanded to handle more cases in the future,
- * though.  For example, it's possible that non-pivot tuples could in the
- * future have alternative logically equivalent representations due to using
- * the INDEX_ALT_TID_MASK bit to implement intelligent deduplication.
+ * Caller does normalization for non-pivot tuples that have a posting list,
+ * since dummy CREATE INDEX callback code generates new tuples with the same
+ * normalized representation.
  */
 static IndexTuple
 bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
@@ -1969,6 +2042,9 @@ bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
 	IndexTuple	reformed;
 	int			i;
 
+	/* Caller should only pass "logical" non-pivot tuples here */
+	Assert(!BTreeTupleIsPosting(itup) && !BTreeTupleIsPivot(itup));
+
 	/* Easy case: It's immediately clear that tuple has no varlena datums */
 	if (!IndexTupleHasVarwidths(itup))
 		return itup;
@@ -2031,6 +2107,29 @@ bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
 	return reformed;
 }
 
+/*
+ * Produce palloc()'d "plain" tuple for nth posting list entry/TID.
+ *
+ * In general, deduplication is not supposed to change the logical contents of
+ * an index.  Multiple index tuples are merged together into one equivalent
+ * posting list index tuple when convenient.
+ *
+ * heapallindexed verification must normalize-away this variation in
+ * representation by converting posting list tuples into two or more "plain"
+ * tuples.  Each tuple must be fingerprinted separately -- there must be one
+ * tuple for each corresponding Bloom filter probe during the heap scan.
+ *
+ * Note: Caller still needs to call bt_normalize_tuple() with returned tuple.
+ */
+static inline IndexTuple
+bt_posting_plain_tuple(IndexTuple itup, int n)
+{
+	Assert(BTreeTupleIsPosting(itup));
+
+	/* Returns non-posting-list tuple */
+	return _bt_form_posting(itup, BTreeTupleGetPostingN(itup, n), 1);
+}
+
 /*
  * Search for itup in index, starting from fast root page.  itup must be a
  * non-pivot tuple.  This is only supported with heapkeyspace indexes, since
@@ -2087,6 +2186,7 @@ bt_rootdescend(BtreeCheckState *state, IndexTuple itup)
 		insertstate.itup = itup;
 		insertstate.itemsz = MAXALIGN(IndexTupleSize(itup));
 		insertstate.itup_key = key;
+		insertstate.postingoff = 0;
 		insertstate.bounds_valid = false;
 		insertstate.buf = lbuf;
 
@@ -2094,7 +2194,9 @@ bt_rootdescend(BtreeCheckState *state, IndexTuple itup)
 		offnum = _bt_binsrch_insert(state->rel, &insertstate);
 		/* Compare first >= matching item on leaf page, if any */
 		page = BufferGetPage(lbuf);
+		/* Should match on first heap TID when tuple has a posting list */
 		if (offnum <= PageGetMaxOffsetNumber(page) &&
+			insertstate.postingoff <= 0 &&
 			_bt_compare(state->rel, key, page, offnum) == 0)
 			exists = true;
 		_bt_relbuf(state->rel, lbuf);
@@ -2548,26 +2650,69 @@ PageGetItemIdCareful(BtreeCheckState *state, BlockNumber block, Page page,
 }
 
 /*
- * BTreeTupleGetHeapTID() wrapper that lets caller enforce that a heap TID must
- * be present in cases where that is mandatory.
- *
- * This doesn't add much as of BTREE_VERSION 4, since the INDEX_ALT_TID_MASK
- * bit is effectively a proxy for whether or not the tuple is a pivot tuple.
- * It may become more useful in the future, when non-pivot tuples support their
- * own alternative INDEX_ALT_TID_MASK representation.
+ * BTreeTupleGetHeapTID() wrapper that enforces that a heap TID is present in
+ * cases where that is mandatory (i.e. for non-pivot tuples)
  */
 static inline ItemPointer
 BTreeTupleGetHeapTIDCareful(BtreeCheckState *state, IndexTuple itup,
 							bool nonpivot)
 {
-	ItemPointer result = BTreeTupleGetHeapTID(itup);
-	BlockNumber targetblock = state->targetblock;
+	ItemPointer htid;
 
-	if (result == NULL && nonpivot)
+	/*
+	 * Caller determines whether this is supposed to be a pivot or non-pivot
+	 * tuple using page type and item offset number.  Verify that tuple
+	 * metadata agrees with this.
+	 */
+	Assert(state->heapkeyspace);
+	if (BTreeTupleIsPivot(itup) && nonpivot)
+		ereport(ERROR,
+				(errcode(ERRCODE_INDEX_CORRUPTED),
+				 errmsg_internal("block %u or its right sibling block or child block in index \"%s\" has unexpected pivot tuple",
+								 state->targetblock,
+								 RelationGetRelationName(state->rel))));
+
+	if (!BTreeTupleIsPivot(itup) && !nonpivot)
+		ereport(ERROR,
+				(errcode(ERRCODE_INDEX_CORRUPTED),
+				 errmsg_internal("block %u or its right sibling block or child block in index \"%s\" has unexpected non-pivot tuple",
+								 state->targetblock,
+								 RelationGetRelationName(state->rel))));
+
+	htid = BTreeTupleGetHeapTID(itup);
+	if (!ItemPointerIsValid(htid) && nonpivot)
 		ereport(ERROR,
 				(errcode(ERRCODE_INDEX_CORRUPTED),
 				 errmsg("block %u or its right sibling block or child block in index \"%s\" contains non-pivot tuple that lacks a heap TID",
-						targetblock, RelationGetRelationName(state->rel))));
+						state->targetblock,
+						RelationGetRelationName(state->rel))));
 
-	return result;
+	return htid;
+}
+
+/*
+ * Return the "pointed to" TID for itup, which is used to generate a
+ * descriptive error message.  itup must be a "data item" tuple (it wouldn't
+ * make much sense to call here with a high key tuple, since there won't be a
+ * valid downlink/block number to display).
+ *
+ * Returns either a heap TID (which will be the first heap TID in posting list
+ * if itup is posting list tuple), or a TID that contains downlink block
+ * number, plus some encoded metadata (e.g., the number of attributes present
+ * in itup).
+ */
+static inline ItemPointer
+BTreeTupleGetPointsToTID(IndexTuple itup)
+{
+	/*
+	 * Rely on the assumption that !heapkeyspace internal page data items will
+	 * correctly return TID with downlink here -- BTreeTupleGetHeapTID() won't
+	 * recognize it as a pivot tuple, but everything still works out because
+	 * the t_tid field is still returned
+	 */
+	if (!BTreeTupleIsPivot(itup))
+		return BTreeTupleGetHeapTID(itup);
+
+	/* Pivot tuple returns TID with downlink block (heapkeyspace variant) */
+	return &itup->t_tid;
 }
diff --git a/doc/src/sgml/btree.sgml b/doc/src/sgml/btree.sgml
index fcf771c857..f02e02b0ac 100644
--- a/doc/src/sgml/btree.sgml
+++ b/doc/src/sgml/btree.sgml
@@ -557,11 +557,208 @@ equalimage(<replaceable>opcintype</replaceable> <type>oid</type>) returns bool
 <sect1 id="btree-implementation">
  <title>Implementation</title>
 
+ <para>
+  This section covers B-Tree index implementation details that may be
+  of use to advanced users.  See
+  <filename>src/backend/access/nbtree/README</filename> in the source
+  distribution for a much more detailed, internals-focused description
+  of the B-Tree implementation.
+ </para>
+ <sect2 id="btree-structure">
+  <title>B-Tree Structure</title>
   <para>
-   An introduction to the btree index implementation can be found in
-   <filename>src/backend/access/nbtree/README</filename>.
+   <productname>PostgreSQL</productname> B-Tree indexes are
+   multi-level tree structures, where each level of the tree can be
+   used as a doubly-linked list of pages.  A single metapage is stored
+   in a fixed position at the start of the first segment file of the
+   index.  All other pages are either leaf pages or internal pages.
+   Leaf pages are the pages on the lowest level of the tree.  All
+   other levels consist of internal pages.  Each leaf page contains
+   tuples that point to table rows.  Each internal page contains
+   tuples that point to the next level down in the tree.  Typically,
+   over 99% of all pages are leaf pages.  Both internal pages and leaf
+   pages use the standard page format described in <xref
+    linkend="storage-page-layout"/>.
+  </para>
+  <para>
+   New leaf pages are added to a B-Tree index when an existing leaf
+   page cannot fit an incoming tuple.  A <firstterm>page
+    split</firstterm> operation makes room for items that originally
+   belonged on the overflowing page by moving a portion of the items
+   to a new page.  Page splits must also insert a new
+   <firstterm>downlink</firstterm> to the new page in the parent page,
+   which may cause the parent to split in turn.  Page splits
+   <quote>cascade upwards</quote> in a recursive fashion.  When the
+   root page finally cannot fit a new downlink, a <firstterm>root page
+    split</firstterm> operation takes place.  This adds a new level to
+   the tree structure by creating a new root page that is one level
+   above the original root page.
+  </para>
+ </sect2>
+
+ <sect2 id="btree-deduplication">
+  <title>Deduplication</title>
+  <para>
+   A duplicate is a leaf page tuple (a tuple that points to a table
+   row) where <emphasis>all</emphasis> indexed key columns have values
+   that match corresponding column values from at least one other leaf
+   page tuple that's close by in the same index.  Duplicate tuples are
+   quite common in practice.  B-Tree indexes can use a special,
+   space-efficient representation for duplicates when an optional
+   technique is enabled: <firstterm>deduplication</firstterm>.
+  </para>
+  <para>
+   Deduplication works by periodically merging groups of duplicate
+   tuples together, forming a single posting list tuple for each
+   group.  The column key value(s) only appear once in this
+   representation.  This is followed by a sorted array of
+   <acronym>TID</acronym>s that point to rows in the table.  This
+   significantly reduces the storage size of indexes where each value
+   (or each distinct combination of column values) appears several
+   times on average.  The latency of queries can be reduced
+   significantly.  Overall query throughput may increase
+   significantly.  The overhead of routine index vacuuming may also be
+   reduced significantly.
+  </para>
+  <note>
+   <para>
+    While NULL is generally not considered to be equal to any other
+    value, including NULL, NULL is nevertheless treated as just
+    another value from the domain of indexed values by the B-Tree
+    implementation (except when enforcing uniqueness in a unique
+    index).  B-Tree deduplication is therefore just as effective with
+    <quote>duplicates</quote> that contain a NULL value.
+   </para>
+  </note>
+  <para>
+   The deduplication process occurs lazily, when a new item is
+   inserted that cannot fit on an existing leaf page.  This prevents
+   (or at least delays) leaf page splits.  Unlike GIN posting list
+   tuples, B-Tree posting list tuples do not need to expand every time
+   a new duplicate is inserted; they are merely an alternative
+   physical representation of the original logical contents of the
+   leaf page.  This design prioritizes consistent performance with
+   mixed read-write workloads.  Most client applications will at least
+   see a moderate performance benefit from using deduplication.
+   Deduplication is enabled by default.
+  </para>
+  <para>
+   Write-heavy workloads that don't benefit from deduplication due to
+   having few or no duplicate values in indexes will incur a small,
+   fixed performance penalty (unless deduplication is explicitly
+   disabled).  The <literal>deduplicate_items</literal> storage
+   parameter can be used to disable deduplication within individual
+   indexes.  There is never any performance penalty with read-only
+   workloads, since reading posting list tuples is at least as
+   efficient as reading the standard tuple representation.  Disabling
+   deduplication isn't usually helpful.
+  </para>
+  <para>
+   B-Tree indexes are not directly aware that under MVCC, there might
+   be multiple extant versions of the same logical table row; to an
+   index, each tuple is an independent object that needs its own index
+   entry.  Thus, an update of a row always creates all-new index
+   entries for the row, even if the key values did not change.  Some
+   workloads suffer from index bloat caused by these
+   implementation-level version duplicates (this is typically a
+   problem for <command>UPDATE</command>-heavy workloads that cannot
+   apply the <acronym>HOT</acronym> optimization due to modifying at
+   least one indexed column).  B-Tree deduplication does not
+   distinguish between these implementation-level version duplicates
+   and conventional duplicates.  Deduplication can nevertheless help
+   with controlling index bloat caused by implementation-level version
+   churn.
+  </para>
+  <tip>
+   <para>
+    A special heuristic is applied to determine whether a
+    deduplication pass in a unique index should take place.  It can
+    often skip straight to splitting a leaf page, avoiding a
+    performance penalty from wasting cycles on unhelpful deduplication
+    passes.  If you're concerned about the overhead of deduplication,
+    consider setting <literal>deduplicate_items = off</literal>
+    selectively.  Leaving deduplication enabled in unique indexes has
+    little downside.
+   </para>
+  </tip>
+  <para>
+   Deduplication cannot be used in all cases due to
+   implementation-level restrictions.  Deduplication safety is
+   determined when <command>CREATE INDEX</command> or
+   <command>REINDEX</command> run.
+  </para>
+  <para>
+   Note that deduplication is deemed unsafe and cannot be used in the
+   following cases involving semantically significant differences
+   among equal datums:
+  </para>
+  <para>
+   <itemizedlist>
+    <listitem>
+     <para>
+      <type>text</type>, <type>varchar</type>, and <type>char</type>
+      cannot use deduplication when a
+      <emphasis>nondeterministic</emphasis> collation is used.  Case
+      and accent differences must be preserved among equal datums.
+     </para>
+    </listitem>
+
+    <listitem>
+     <para>
+      <type>numeric</type> cannot use deduplication.  Numeric display
+      scale must be preserved among equal datums.
+     </para>
+    </listitem>
+
+    <listitem>
+     <para>
+      <type>jsonb</type> cannot use deduplication, since the
+      <type>jsonb</type> B-Tree operator class uses
+      <type>numeric</type> internally.
+     </para>
+    </listitem>
+
+    <listitem>
+     <para>
+      <type>float4</type> and <type>float8</type> cannot use
+      deduplication.  These types have distinct representations for
+      <literal>-0</literal> and <literal>0</literal>, which are
+      nevertheless considered equal.  This difference must be
+      preserved.
+     </para>
+    </listitem>
+   </itemizedlist>
+  </para>
+  <para>
+   There is one further implementation-level restriction that may be
+   lifted in a future version of
+   <productname>PostgreSQL</productname>:
+  </para>
+  <para>
+   <itemizedlist>
+    <listitem>
+     <para>
+      Container types (such as composite types, arrays, or range
+      types) cannot use deduplication.
+     </para>
+    </listitem>
+   </itemizedlist>
+  </para>
+  <para>
+   There is one further implementation-level restriction that applies
+   regardless of the operator class or collation used:
+  </para>
+  <para>
+   <itemizedlist>
+    <listitem>
+     <para>
+      <literal>INCLUDE</literal> indexes can never use deduplication.
+     </para>
+    </listitem>
+   </itemizedlist>
   </para>
 
+ </sect2>
 </sect1>
 
 </chapter>
diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml
index 057a6bb81a..20cdfabd7b 100644
--- a/doc/src/sgml/charset.sgml
+++ b/doc/src/sgml/charset.sgml
@@ -928,10 +928,11 @@ CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-tr
      nondeterministic collations give a more <quote>correct</quote> behavior,
      especially when considering the full power of Unicode and its many
      special cases, they also have some drawbacks.  Foremost, their use leads
-     to a performance penalty.  Also, certain operations are not possible with
-     nondeterministic collations, such as pattern matching operations.
-     Therefore, they should be used only in cases where they are specifically
-     wanted.
+     to a performance penalty.  Note, in particular, that B-tree cannot use
+     deduplication with indexes that use a nondeterministic collation.  Also,
+     certain operations are not possible with nondeterministic collations,
+     such as pattern matching operations.  Therefore, they should be used
+     only in cases where they are specifically wanted.
     </para>
    </sect3>
   </sect2>
diff --git a/doc/src/sgml/citext.sgml b/doc/src/sgml/citext.sgml
index 667824fb0b..5986601327 100644
--- a/doc/src/sgml/citext.sgml
+++ b/doc/src/sgml/citext.sgml
@@ -233,9 +233,10 @@ SELECT * FROM users WHERE nick = 'Larry';
      <para>
        <type>citext</type> is not as efficient as <type>text</type> because the
        operator functions and the B-tree comparison functions must make copies
-       of the data and convert it to lower case for comparisons. It is,
-       however, slightly more efficient than using <function>lower</function> to get
-       case-insensitive matching.
+       of the data and convert it to lower case for comparisons.  Also, only
+       <type>text</type> can support B-Tree deduplication.  However,
+       <type>citext</type> is slightly more efficient than using
+       <function>lower</function> to get case-insensitive matching.
      </para>
     </listitem>
 
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index ceda48e0fc..28035f1635 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -16561,10 +16561,11 @@ AND
    rows.  Two rows might have a different binary representation even
    though comparisons of the two rows with the equality operator is true.
    The ordering of rows under these comparison operators is deterministic
-   but not otherwise meaningful.  These operators are used internally for
-   materialized views and might be useful for other specialized purposes
-   such as replication but are not intended to be generally useful for
-   writing queries.
+   but not otherwise meaningful.  These operators are used internally
+   for materialized views and might be useful for other specialized
+   purposes such as replication and B-Tree deduplication (see <xref
+   linkend="btree-deduplication"/>).  They are not intended to be
+   generally useful for writing queries, though.
   </para>
   </sect2>
  </sect1>
diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index ab362a0dc5..a05e2e6b9c 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -171,6 +171,8 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
         maximum size allowed for the index type, data insertion will fail.
         In any case, non-key columns duplicate data from the index's table
         and bloat the size of the index, thus potentially slowing searches.
+        Furthermore, B-tree deduplication is never used with indexes
+        that have a non-key column.
        </para>
 
        <para>
@@ -393,10 +395,39 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
    </variablelist>
 
    <para>
-    B-tree indexes additionally accept this parameter:
+    B-tree indexes also accept these parameters:
    </para>
 
    <variablelist>
+   <varlistentry id="index-reloption-deduplication" xreflabel="deduplicate_items">
+    <term><literal>deduplicate_items</literal>
+     <indexterm>
+      <primary><varname>deduplicate_items</varname></primary>
+      <secondary>storage parameter</secondary>
+     </indexterm>
+    </term>
+    <listitem>
+    <para>
+      Controls usage of the B-tree deduplication technique described
+      in <xref linkend="btree-deduplication"/>.  Set to
+      <literal>ON</literal> or <literal>OFF</literal> to enable or
+      disable the optimization.  (Alternative spellings of
+      <literal>ON</literal> and <literal>OFF</literal> are allowed as
+      described in <xref linkend="config-setting"/>.) The default is
+      <literal>ON</literal>.
+    </para>
+
+    <note>
+     <para>
+      Turning <literal>deduplicate_items</literal> off via
+      <command>ALTER INDEX</command> prevents future insertions from
+      triggering deduplication, but does not in itself make existing
+      posting list tuples use the standard tuple representation.
+     </para>
+    </note>
+    </listitem>
+   </varlistentry>
+
    <varlistentry id="index-reloption-vacuum-cleanup-index-scale-factor" xreflabel="vacuum_cleanup_index_scale_factor">
     <term><literal>vacuum_cleanup_index_scale_factor</literal>
      <indexterm>
@@ -451,9 +482,7 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
      This setting controls usage of the fast update technique described in
      <xref linkend="gin-fast-update"/>.  It is a Boolean parameter:
      <literal>ON</literal> enables fast update, <literal>OFF</literal> disables it.
-     (Alternative spellings of <literal>ON</literal> and <literal>OFF</literal> are
-     allowed as described in <xref linkend="config-setting"/>.)  The
-     default is <literal>ON</literal>.
+     The default is <literal>ON</literal>.
     </para>
 
     <note>
@@ -805,6 +834,13 @@ CREATE UNIQUE INDEX title_idx ON films (title) INCLUDE (director, rating);
 </programlisting>
   </para>
 
+  <para>
+   To create a B-Tree index with deduplication disabled:
+<programlisting>
+CREATE INDEX title_idx ON films (title) WITH (deduplicate_items = off);
+</programlisting>
+  </para>
+
   <para>
    To create an index on the expression <literal>lower(title)</literal>,
    allowing efficient case-insensitive searches:
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 79430d2b7b..5325dd3f61 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -158,6 +158,16 @@ static relopt_bool boolRelOpts[] =
 		},
 		true
 	},
+	{
+		{
+			"deduplicate_items",
+			"Enables \"deduplicate items\" feature for this btree index",
+			RELOPT_KIND_BTREE,
+			ShareUpdateExclusiveLock	/* since it applies only to later
+										 * inserts */
+		},
+		true
+	},
 	/* list terminator */
 	{{NULL}}
 };
diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c
index c16eb05416..dfba5ae39a 100644
--- a/src/backend/access/index/genam.c
+++ b/src/backend/access/index/genam.c
@@ -276,6 +276,10 @@ BuildIndexValueDescription(Relation indexRelation,
 /*
  * Get the latestRemovedXid from the table entries pointed at by the index
  * tuples being deleted.
+ *
+ * Note: index access methods that don't consistently use the standard
+ * IndexTuple + heap TID item pointer representation will need to provide
+ * their own version of this function.
  */
 TransactionId
 index_compute_xid_horizon_for_tuples(Relation irel,
diff --git a/src/backend/access/nbtree/Makefile b/src/backend/access/nbtree/Makefile
index bf245f5dab..d69808e78c 100644
--- a/src/backend/access/nbtree/Makefile
+++ b/src/backend/access/nbtree/Makefile
@@ -14,6 +14,7 @@ include $(top_builddir)/src/Makefile.global
 
 OBJS = \
 	nbtcompare.o \
+	nbtdedup.o \
 	nbtinsert.o \
 	nbtpage.o \
 	nbtree.o \
diff --git a/src/backend/access/nbtree/README b/src/backend/access/nbtree/README
index c60a4d0d9e..6499f5adb7 100644
--- a/src/backend/access/nbtree/README
+++ b/src/backend/access/nbtree/README
@@ -432,7 +432,10 @@ because we allow LP_DEAD to be set with only a share lock (it's exactly
 like a hint bit for a heap tuple), but physically removing tuples requires
 exclusive lock.  In the current code we try to remove LP_DEAD tuples when
 we are otherwise faced with having to split a page to do an insertion (and
-hence have exclusive lock on it already).
+hence have exclusive lock on it already).  Deduplication can also prevent
+a page split, but removing LP_DEAD tuples is the preferred approach.
+(Note that posting list tuples can only have their LP_DEAD bit set when
+every table TID within the posting list is known dead.)
 
 This leaves the index in a state where it has no entry for a dead tuple
 that still exists in the heap.  This is not a problem for the current
@@ -726,6 +729,134 @@ if it must.  When a page that's already full of duplicates must be split,
 the fallback strategy assumes that duplicates are mostly inserted in
 ascending heap TID order.  The page is split in a way that leaves the left
 half of the page mostly full, and the right half of the page mostly empty.
+The overall effect is that leaf page splits gracefully adapt to inserts of
+large groups of duplicates, maximizing space utilization.  Note also that
+"trapping" large groups of duplicates on the same leaf page like this makes
+deduplication more efficient.  Deduplication can be performed infrequently,
+without merging together existing posting list tuples too often.
+
+Notes about deduplication
+-------------------------
+
+We deduplicate non-pivot tuples in non-unique indexes to reduce storage
+overhead, and to avoid (or at least delay) page splits.  Note that the
+goals for deduplication in unique indexes are rather different; see later
+section for details.  Deduplication alters the physical representation of
+tuples without changing the logical contents of the index, and without
+adding overhead to read queries.  Non-pivot tuples are merged together
+into a single physical tuple with a posting list (a simple array of heap
+TIDs with the standard item pointer format).  Deduplication is always
+applied lazily, at the point where it would otherwise be necessary to
+perform a page split.  It occurs only when LP_DEAD items have been
+removed, as our last line of defense against splitting a leaf page.  We
+can set the LP_DEAD bit with posting list tuples, though only when all
+TIDs are known dead.
+
+Our lazy approach to deduplication allows the page space accounting used
+during page splits to have absolutely minimal special case logic for
+posting lists.  Posting lists can be thought of as extra payload that
+suffix truncation will reliably truncate away as needed during page
+splits, just like non-key columns from an INCLUDE index tuple.
+Incoming/new tuples can generally be treated as non-overlapping plain
+items (though see section on posting list splits for information about how
+overlapping new/incoming items are really handled).
+
+The representation of posting lists is almost identical to the posting
+lists used by GIN, so it would be straightforward to apply GIN's varbyte
+encoding compression scheme to individual posting lists.  Posting list
+compression would break the assumptions made by posting list splits about
+page space accounting (see later section), so it's not clear how
+compression could be integrated with nbtree.  Besides, posting list
+compression does not offer a compelling trade-off for nbtree, since in
+general nbtree is optimized for consistent performance with many
+concurrent readers and writers.
+
+A major goal of our lazy approach to deduplication is to limit the
+performance impact of deduplication with random updates.  Even concurrent
+append-only inserts of the same key value will tend to have inserts of
+individual index tuples in an order that doesn't quite match heap TID
+order.  Delaying deduplication minimizes page level fragmentation.
+
+Deduplication in unique indexes
+-------------------------------
+
+Very often, the range of values that can be placed on a given leaf page in
+a unique index is fixed and permanent.  For example, a primary key on an
+identity column will usually only have page splits caused by the insertion
+of new logical rows within the rightmost leaf page.  If there is a split
+of a non-rightmost leaf page, then the split must have been triggered by
+inserts associated with an UPDATE of an existing logical row.  Splitting a
+leaf page purely to store multiple versions should be considered
+pathological, since it permanently degrades the index structure in order
+to absorb a temporary burst of duplicates.  Deduplication in unique
+indexes helps to prevent these pathological page splits.  Storing
+duplicates in a space efficient manner is not the goal, since in the long
+run there won't be any duplicates anyway.  Rather, we're buying time for
+standard garbage collection mechanisms to run before a page split is
+needed.
+
+Unique index leaf pages only get a deduplication pass when an insertion
+(that might have to split the page) observed an existing duplicate on the
+page in passing.  This is based on the assumption that deduplication will
+only work out when _all_ new insertions are duplicates from UPDATEs.  This
+may mean that we miss an opportunity to delay a page split, but that's
+okay because our ultimate goal is to delay leaf page splits _indefinitely_
+(i.e. to prevent them altogether).  There is little point in trying to
+delay a split that is probably inevitable anyway.  This allows us to avoid
+the overhead of attempting to deduplicate with unique indexes that always
+have few or no duplicates.
+
+Posting list splits
+-------------------
+
+When the incoming tuple happens to overlap with an existing posting list,
+a posting list split is performed.  Like a page split, a posting list
+split resolves a situation where a new/incoming item "won't fit", while
+inserting the incoming item in passing (i.e. as part of the same atomic
+action).  It's possible (though not particularly likely) that an insert of
+a new item on to an almost-full page will overlap with a posting list,
+resulting in both a posting list split and a page split.  Even then, the
+atomic action that splits the posting list also inserts the new item
+(since page splits always insert the new item in passing).  Including the
+posting list split in the same atomic action as the insert avoids problems
+caused by concurrent inserts into the same posting list --  the exact
+details of how we change the posting list depend upon the new item, and
+vice-versa.  A single atomic action also minimizes the volume of extra
+WAL required for a posting list split, since we don't have to explicitly
+WAL-log the original posting list tuple.
+
+Despite piggy-backing on the same atomic action that inserts a new tuple,
+posting list splits can be thought of as a separate, extra action to the
+insert itself (or to the page split itself).  Posting list splits
+conceptually "rewrite" an insert that overlaps with an existing posting
+list into an insert that adds its final new item just to the right of the
+posting list instead.  The size of the posting list won't change, and so
+page space accounting code does not need to care about posting list splits
+at all.  This is an important upside of our design; the page split point
+choice logic is very subtle even without it needing to deal with posting
+list splits.
+
+Only a few isolated extra steps are required to preserve the illusion that
+the new item never overlapped with an existing posting list in the first
+place: the heap TID of the incoming tuple is swapped with the rightmost/max
+heap TID from the existing/originally overlapping posting list.  Also, the
+posting-split-with-page-split case must generate a new high key based on
+an imaginary version of the original page that has both the final new item
+and the after-list-split posting tuple (page splits usually just operate
+against an imaginary version that contains the new item/item that won't
+fit).
+
+This approach avoids inventing an "eager" atomic posting split operation
+that splits the posting list without simultaneously finishing the insert
+of the incoming item.  This alternative design might seem cleaner, but it
+creates subtle problems for page space accounting.  In general, there
+might not be enough free space on the page to split a posting list such
+that the incoming/new item no longer overlaps with either posting list
+half --- the operation could fail before the actual retail insert of the
+new item even begins.  We'd end up having to handle posting list splits
+that need a page split anyway.  Besides, supporting variable "split points"
+while splitting posting lists won't actually improve overall space
+utilization.
 
 Notes About Data Representation
 -------------------------------
diff --git a/src/backend/access/nbtree/nbtdedup.c b/src/backend/access/nbtree/nbtdedup.c
new file mode 100644
index 0000000000..e5481f2f93
--- /dev/null
+++ b/src/backend/access/nbtree/nbtdedup.c
@@ -0,0 +1,830 @@
+/*-------------------------------------------------------------------------
+ *
+ * nbtdedup.c
+ *	  Deduplicate items in Postgres btrees.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/access/nbtree/nbtdedup.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/nbtree.h"
+#include "access/nbtxlog.h"
+#include "miscadmin.h"
+#include "utils/rel.h"
+
+static bool _bt_do_singleval(Relation rel, Page page, BTDedupState state,
+							 OffsetNumber minoff, IndexTuple newitem);
+static void _bt_singleval_fillfactor(Page page, BTDedupState state,
+									 Size newitemsz);
+#ifdef USE_ASSERT_CHECKING
+static bool _bt_posting_valid(IndexTuple posting);
+#endif
+
+/*
+ * Deduplicate items on a leaf page.  The page will have to be split by caller
+ * if we cannot successfully free at least newitemsz (we also need space for
+ * newitem's line pointer, which isn't included in caller's newitemsz).
+ *
+ * The general approach taken here is to perform as much deduplication as
+ * possible to free as much space as possible.  Note, however, that "single
+ * value" strategy is sometimes used for !checkingunique callers, in which
+ * case deduplication will leave a few tuples untouched at the end of the
+ * page.  The general idea is to prepare the page for an anticipated page
+ * split that uses nbtsplitloc.c's "single value" strategy to determine a
+ * split point.  (There is no reason to deduplicate items that will end up on
+ * the right half of the page after the anticipated page split; better to
+ * handle those if and when the anticipated right half page gets its own
+ * deduplication pass, following further inserts of duplicates.)
+ *
+ * This function should be called during insertion, when the page doesn't have
+ * enough space to fit an incoming newitem.  If the BTP_HAS_GARBAGE page flag
+ * was set, caller should have removed any LP_DEAD items by calling
+ * _bt_vacuum_one_page() before calling here.  We may still have to kill
+ * LP_DEAD items here when the page's BTP_HAS_GARBAGE hint is falsely unset,
+ * but that should be rare.  Also, _bt_vacuum_one_page() won't unset the
+ * BTP_HAS_GARBAGE flag when it finds no LP_DEAD items, so a successful
+ * deduplication pass will always clear it, just to keep things tidy.
+ */
+void
+_bt_dedup_one_page(Relation rel, Buffer buf, Relation heapRel,
+				   IndexTuple newitem, Size newitemsz, bool checkingunique)
+{
+	OffsetNumber offnum,
+				minoff,
+				maxoff;
+	Page		page = BufferGetPage(buf);
+	BTPageOpaque opaque;
+	Page		newpage;
+	int			newpagendataitems = 0;
+	OffsetNumber deletable[MaxIndexTuplesPerPage];
+	BTDedupState state;
+	int			ndeletable = 0;
+	Size		pagesaving = 0;
+	bool		singlevalstrat = false;
+	int			natts = IndexRelationGetNumberOfAttributes(rel);
+
+	/*
+	 * We can't assume that there are no LP_DEAD items.  For one thing, VACUUM
+	 * will clear the BTP_HAS_GARBAGE hint without reliably removing items
+	 * that are marked LP_DEAD.  We don't want to unnecessarily unset LP_DEAD
+	 * bits when deduplicating items.  Allowing it would be correct, though
+	 * wasteful.
+	 */
+	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+	minoff = P_FIRSTDATAKEY(opaque);
+	maxoff = PageGetMaxOffsetNumber(page);
+	for (offnum = minoff;
+		 offnum <= maxoff;
+		 offnum = OffsetNumberNext(offnum))
+	{
+		ItemId		itemid = PageGetItemId(page, offnum);
+
+		if (ItemIdIsDead(itemid))
+			deletable[ndeletable++] = offnum;
+	}
+
+	if (ndeletable > 0)
+	{
+		_bt_delitems_delete(rel, buf, deletable, ndeletable, heapRel);
+
+		/*
+		 * Return when a split will be avoided.  This is equivalent to
+		 * avoiding a split using the usual _bt_vacuum_one_page() path.
+		 */
+		if (PageGetFreeSpace(page) >= newitemsz)
+			return;
+
+		/*
+		 * Reconsider number of items on page, in case _bt_delitems_delete()
+		 * managed to delete an item or two
+		 */
+		minoff = P_FIRSTDATAKEY(opaque);
+		maxoff = PageGetMaxOffsetNumber(page);
+	}
+
+	/* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
+	newitemsz += sizeof(ItemIdData);
+
+	/*
+	 * By here, it's clear that deduplication will definitely be attempted.
+	 * Initialize deduplication state.
+	 *
+	 * It would be possible for maxpostingsize (limit on posting list tuple
+	 * size) to be set to one third of the page.  However, it seems like a
+	 * good idea to limit the size of posting lists to one sixth of a page.
+	 * That ought to leave us with a good split point when pages full of
+	 * duplicates can be split several times.
+	 */
+	state = (BTDedupState) palloc(sizeof(BTDedupStateData));
+	state->deduplicate = true;
+	state->maxpostingsize = Min(BTMaxItemSize(page) / 2, INDEX_SIZE_MASK);
+	/* Metadata about base tuple of current pending posting list */
+	state->base = NULL;
+	state->baseoff = InvalidOffsetNumber;
+	state->basetupsize = 0;
+	/* Metadata about current pending posting list TIDs */
+	state->htids = palloc(state->maxpostingsize);
+	state->nhtids = 0;
+	state->nitems = 0;
+	/* Size of all physical tuples to be replaced by pending posting list */
+	state->phystupsize = 0;
+	/* nintervals should be initialized to zero */
+	state->nintervals = 0;
+
+	/* Determine if "single value" strategy should be used */
+	if (!checkingunique)
+		singlevalstrat = _bt_do_singleval(rel, page, state, minoff, newitem);
+
+	/*
+	 * Deduplicate items from page, and write them to newpage.
+	 *
+	 * Copy the original page's LSN into newpage copy.  This will become the
+	 * updated version of the page.  We need this because XLogInsert will
+	 * examine the LSN and possibly dump it in a page image.
+	 */
+	newpage = PageGetTempPageCopySpecial(page);
+	PageSetLSN(newpage, PageGetLSN(page));
+
+	/* Copy high key, if any */
+	if (!P_RIGHTMOST(opaque))
+	{
+		ItemId		hitemid = PageGetItemId(page, P_HIKEY);
+		Size		hitemsz = ItemIdGetLength(hitemid);
+		IndexTuple	hitem = (IndexTuple) PageGetItem(page, hitemid);
+
+		if (PageAddItem(newpage, (Item) hitem, hitemsz, P_HIKEY,
+						false, false) == InvalidOffsetNumber)
+			elog(ERROR, "deduplication failed to add highkey");
+	}
+
+	for (offnum = minoff;
+		 offnum <= maxoff;
+		 offnum = OffsetNumberNext(offnum))
+	{
+		ItemId		itemid = PageGetItemId(page, offnum);
+		IndexTuple	itup = (IndexTuple) PageGetItem(page, itemid);
+
+		Assert(!ItemIdIsDead(itemid));
+
+		if (offnum == minoff)
+		{
+			/*
+			 * No previous/base tuple for the data item -- use the data item
+			 * as base tuple of pending posting list
+			 */
+			_bt_dedup_start_pending(state, itup, offnum);
+		}
+		else if (state->deduplicate &&
+				 _bt_keep_natts_fast(rel, state->base, itup) > natts &&
+				 _bt_dedup_save_htid(state, itup))
+		{
+			/*
+			 * Tuple is equal to base tuple of pending posting list.  Heap
+			 * TID(s) for itup have been saved in state.
+			 */
+		}
+		else
+		{
+			/*
+			 * Tuple is not equal to pending posting list tuple, or
+			 * _bt_dedup_save_htid() opted to not merge current item into
+			 * pending posting list for some other reason (e.g., adding more
+			 * TIDs would have caused posting list to exceed current
+			 * maxpostingsize).
+			 *
+			 * If state contains pending posting list with more than one item,
+			 * form new posting tuple, and actually update the page.  Else
+			 * reset the state and move on without modifying the page.
+			 */
+			pagesaving += _bt_dedup_finish_pending(newpage, state);
+			newpagendataitems++;
+
+			if (singlevalstrat)
+			{
+				/*
+				 * Single value strategy's extra steps.
+				 *
+				 * Lower maxpostingsize for sixth and final item that might be
+				 * deduplicated by current deduplication pass.  When sixth
+				 * item formed/observed, stop deduplicating items.
+				 *
+				 * Note: It's possible that this will be reached even when
+				 * current deduplication pass has yet to merge together some
+				 * existing items.  It doesn't matter whether or not the
+				 * current call generated the maxpostingsize-capped duplicate
+				 * tuples at the start of the page.
+				 */
+				if (newpagendataitems == 5)
+					_bt_singleval_fillfactor(page, state, newitemsz);
+				else if (newpagendataitems == 6)
+				{
+					state->deduplicate = false;
+					singlevalstrat = false; /* won't be back here */
+				}
+			}
+
+			/* itup starts new pending posting list */
+			_bt_dedup_start_pending(state, itup, offnum);
+		}
+	}
+
+	/* Handle the last item */
+	pagesaving += _bt_dedup_finish_pending(newpage, state);
+	newpagendataitems++;
+
+	/*
+	 * If no items suitable for deduplication were found, newpage must be
+	 * exactly the same as the original page, so just return from function.
+	 *
+	 * We could determine whether or not to proceed on the basis the space
+	 * savings being sufficient to avoid an immediate page split instead.  We
+	 * don't do that because there is some small value in nbtsplitloc.c always
+	 * operating against a page that is fully deduplicated (apart from
+	 * newitem).  Besides, most of the cost has already been paid.
+	 */
+	if (state->nintervals == 0)
+	{
+		/* cannot leak memory here */
+		pfree(newpage);
+		pfree(state->htids);
+		pfree(state);
+		return;
+	}
+
+	/*
+	 * By here, it's clear that deduplication will definitely go ahead.
+	 *
+	 * Clear the BTP_HAS_GARBAGE page flag in the unlikely event that it is
+	 * still falsely set, just to keep things tidy.  (We can't rely on
+	 * _bt_vacuum_one_page() having done this already, and we can't rely on a
+	 * page split or VACUUM getting to it in the near future.)
+	 */
+	if (P_HAS_GARBAGE(opaque))
+	{
+		BTPageOpaque nopaque = (BTPageOpaque) PageGetSpecialPointer(newpage);
+
+		nopaque->btpo_flags &= ~BTP_HAS_GARBAGE;
+	}
+
+	START_CRIT_SECTION();
+
+	PageRestoreTempPage(newpage, page);
+	MarkBufferDirty(buf);
+
+	/* XLOG stuff */
+	if (RelationNeedsWAL(rel))
+	{
+		XLogRecPtr	recptr;
+		xl_btree_dedup xlrec_dedup;
+
+		xlrec_dedup.nintervals = state->nintervals;
+
+		XLogBeginInsert();
+		XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+		XLogRegisterData((char *) &xlrec_dedup, SizeOfBtreeDedup);
+
+		/*
+		 * The intervals array is not in the buffer, but pretend that it is.
+		 * When XLogInsert stores the whole buffer, the array need not be
+		 * stored too.
+		 */
+		XLogRegisterBufData(0, (char *) state->intervals,
+							state->nintervals * sizeof(BTDedupInterval));
+
+		recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DEDUP);
+
+		PageSetLSN(page, recptr);
+	}
+
+	END_CRIT_SECTION();
+
+	/* Local space accounting should agree with page accounting */
+	Assert(pagesaving < newitemsz || PageGetExactFreeSpace(page) >= newitemsz);
+
+	/* cannot leak memory here */
+	pfree(state->htids);
+	pfree(state);
+}
+
+/*
+ * Create a new pending posting list tuple based on caller's base tuple.
+ *
+ * Every tuple processed by deduplication either becomes the base tuple for a
+ * posting list, or gets its heap TID(s) accepted into a pending posting list.
+ * A tuple that starts out as the base tuple for a posting list will only
+ * actually be rewritten within _bt_dedup_finish_pending() when it turns out
+ * that there are duplicates that can be merged into the base tuple.
+ */
+void
+_bt_dedup_start_pending(BTDedupState state, IndexTuple base,
+						OffsetNumber baseoff)
+{
+	Assert(state->nhtids == 0);
+	Assert(state->nitems == 0);
+	Assert(!BTreeTupleIsPivot(base));
+
+	/*
+	 * Copy heap TID(s) from new base tuple for new candidate posting list
+	 * into working state's array
+	 */
+	if (!BTreeTupleIsPosting(base))
+	{
+		memcpy(state->htids, &base->t_tid, sizeof(ItemPointerData));
+		state->nhtids = 1;
+		state->basetupsize = IndexTupleSize(base);
+	}
+	else
+	{
+		int			nposting;
+
+		nposting = BTreeTupleGetNPosting(base);
+		memcpy(state->htids, BTreeTupleGetPosting(base),
+			   sizeof(ItemPointerData) * nposting);
+		state->nhtids = nposting;
+		/* basetupsize should not include existing posting list */
+		state->basetupsize = BTreeTupleGetPostingOffset(base);
+	}
+
+	/*
+	 * Save new base tuple itself -- it'll be needed if we actually create a
+	 * new posting list from new pending posting list.
+	 *
+	 * Must maintain physical size of all existing tuples (including line
+	 * pointer overhead) so that we can calculate space savings on page.
+	 */
+	state->nitems = 1;
+	state->base = base;
+	state->baseoff = baseoff;
+	state->phystupsize = MAXALIGN(IndexTupleSize(base)) + sizeof(ItemIdData);
+	/* Also save baseoff in pending state for interval */
+	state->intervals[state->nintervals].baseoff = state->baseoff;
+}
+
+/*
+ * Save itup heap TID(s) into pending posting list where possible.
+ *
+ * Returns bool indicating if the pending posting list managed by state now
+ * includes itup's heap TID(s).
+ */
+bool
+_bt_dedup_save_htid(BTDedupState state, IndexTuple itup)
+{
+	int			nhtids;
+	ItemPointer htids;
+	Size		mergedtupsz;
+
+	Assert(!BTreeTupleIsPivot(itup));
+
+	if (!BTreeTupleIsPosting(itup))
+	{
+		nhtids = 1;
+		htids = &itup->t_tid;
+	}
+	else
+	{
+		nhtids = BTreeTupleGetNPosting(itup);
+		htids = BTreeTupleGetPosting(itup);
+	}
+
+	/*
+	 * Don't append (have caller finish pending posting list as-is) if
+	 * appending heap TID(s) from itup would put us over maxpostingsize limit.
+	 *
+	 * This calculation needs to match the code used within _bt_form_posting()
+	 * for new posting list tuples.
+	 */
+	mergedtupsz = MAXALIGN(state->basetupsize +
+						   (state->nhtids + nhtids) * sizeof(ItemPointerData));
+
+	if (mergedtupsz > state->maxpostingsize)
+		return false;
+
+	/*
+	 * Save heap TIDs to pending posting list tuple -- itup can be merged into
+	 * pending posting list
+	 */
+	state->nitems++;
+	memcpy(state->htids + state->nhtids, htids,
+		   sizeof(ItemPointerData) * nhtids);
+	state->nhtids += nhtids;
+	state->phystupsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
+
+	return true;
+}
+
+/*
+ * Finalize pending posting list tuple, and add it to the page.  Final tuple
+ * is based on saved base tuple, and saved list of heap TIDs.
+ *
+ * Returns space saving from deduplicating to make a new posting list tuple.
+ * Note that this includes line pointer overhead.  This is zero in the case
+ * where no deduplication was possible.
+ */
+Size
+_bt_dedup_finish_pending(Page newpage, BTDedupState state)
+{
+	OffsetNumber tupoff;
+	Size		tuplesz;
+	Size		spacesaving;
+
+	Assert(state->nitems > 0);
+	Assert(state->nitems <= state->nhtids);
+	Assert(state->intervals[state->nintervals].baseoff == state->baseoff);
+
+	tupoff = OffsetNumberNext(PageGetMaxOffsetNumber(newpage));
+	if (state->nitems == 1)
+	{
+		/* Use original, unchanged base tuple */
+		tuplesz = IndexTupleSize(state->base);
+		if (PageAddItem(newpage, (Item) state->base, tuplesz, tupoff,
+						false, false) == InvalidOffsetNumber)
+			elog(ERROR, "deduplication failed to add tuple to page");
+
+		spacesaving = 0;
+	}
+	else
+	{
+		IndexTuple	final;
+
+		/* Form a tuple with a posting list */
+		final = _bt_form_posting(state->base, state->htids, state->nhtids);
+		tuplesz = IndexTupleSize(final);
+		Assert(tuplesz <= state->maxpostingsize);
+
+		/* Save final number of items for posting list */
+		state->intervals[state->nintervals].nitems = state->nitems;
+
+		Assert(tuplesz == MAXALIGN(IndexTupleSize(final)));
+		if (PageAddItem(newpage, (Item) final, tuplesz, tupoff, false,
+						false) == InvalidOffsetNumber)
+			elog(ERROR, "deduplication failed to add tuple to page");
+
+		pfree(final);
+		spacesaving = state->phystupsize - (tuplesz + sizeof(ItemIdData));
+		/* Increment nintervals, since we wrote a new posting list tuple */
+		state->nintervals++;
+		Assert(spacesaving > 0 && spacesaving < BLCKSZ);
+	}
+
+	/* Reset state for next pending posting list */
+	state->nhtids = 0;
+	state->nitems = 0;
+	state->phystupsize = 0;
+
+	return spacesaving;
+}
+
+/*
+ * Determine if page non-pivot tuples (data items) are all duplicates of the
+ * same value -- if they are, deduplication's "single value" strategy should
+ * be applied.  The general goal of this strategy is to ensure that
+ * nbtsplitloc.c (which uses its own single value strategy) will find a useful
+ * split point as further duplicates are inserted, and successive rightmost
+ * page splits occur among pages that store the same duplicate value.  When
+ * the page finally splits, it should end up BTREE_SINGLEVAL_FILLFACTOR% full,
+ * just like it would if deduplication were disabled.
+ *
+ * We expect that affected workloads will require _several_ single value
+ * strategy deduplication passes (over a page that only stores duplicates)
+ * before the page is finally split.  The first deduplication pass should only
+ * find regular non-pivot tuples.  Later deduplication passes will find
+ * existing maxpostingsize-capped posting list tuples, which must be skipped
+ * over.  The penultimate pass is generally the first pass that actually
+ * reaches _bt_singleval_fillfactor(), and so will deliberately leave behind a
+ * few untouched non-pivot tuples.  The final deduplication pass won't free
+ * any space -- it will skip over everything without merging anything (it
+ * retraces the steps of the penultimate pass).
+ *
+ * Fortunately, having several passes isn't too expensive.  Each pass (after
+ * the first pass) won't spend many cycles on the large posting list tuples
+ * left by previous passes.  Each pass will find a large contiguous group of
+ * smaller duplicate tuples to merge together at the end of the page.
+ *
+ * Note: We deliberately don't bother checking if the high key is a distinct
+ * value (prior to the TID tiebreaker column) before proceeding, unlike
+ * nbtsplitloc.c.  Its single value strategy only gets applied on the
+ * rightmost page of duplicates of the same value (other leaf pages full of
+ * duplicates will get a simple 50:50 page split instead of splitting towards
+ * the end of the page).  There is little point in making the same distinction
+ * here.
+ */
+static bool
+_bt_do_singleval(Relation rel, Page page, BTDedupState state,
+				 OffsetNumber minoff, IndexTuple newitem)
+{
+	int			natts = IndexRelationGetNumberOfAttributes(rel);
+	ItemId		itemid;
+	IndexTuple	itup;
+
+	itemid = PageGetItemId(page, minoff);
+	itup = (IndexTuple) PageGetItem(page, itemid);
+
+	if (_bt_keep_natts_fast(rel, newitem, itup) > natts)
+	{
+		itemid = PageGetItemId(page, PageGetMaxOffsetNumber(page));
+		itup = (IndexTuple) PageGetItem(page, itemid);
+
+		if (_bt_keep_natts_fast(rel, newitem, itup) > natts)
+			return true;
+	}
+
+	return false;
+}
+
+/*
+ * Lower maxpostingsize when using "single value" strategy, to avoid a sixth
+ * and final maxpostingsize-capped tuple.  The sixth and final posting list
+ * tuple will end up somewhat smaller than the first five.  (Note: The first
+ * five tuples could actually just be very large duplicate tuples that
+ * couldn't be merged together at all.  Deduplication will simply not modify
+ * the page when that happens.)
+ *
+ * When there are six posting lists on the page (after current deduplication
+ * pass goes on to create/observe a sixth very large tuple), caller should end
+ * its deduplication pass.  It isn't useful to try to deduplicate items that
+ * are supposed to end up on the new right sibling page following the
+ * anticipated page split.  A future deduplication pass of future right
+ * sibling page might take care of it.  (This is why the first single value
+ * strategy deduplication pass for a given leaf page will generally find only
+ * plain non-pivot tuples -- see _bt_do_singleval() comments.)
+ */
+static void
+_bt_singleval_fillfactor(Page page, BTDedupState state, Size newitemsz)
+{
+	Size		leftfree;
+	int			reduction;
+
+	/* This calculation needs to match nbtsplitloc.c */
+	leftfree = PageGetPageSize(page) - SizeOfPageHeaderData -
+		MAXALIGN(sizeof(BTPageOpaqueData));
+	/* Subtract size of new high key (includes pivot heap TID space) */
+	leftfree -= newitemsz + MAXALIGN(sizeof(ItemPointerData));
+
+	/*
+	 * Reduce maxpostingsize by an amount equal to target free space on left
+	 * half of page
+	 */
+	reduction = leftfree * ((100 - BTREE_SINGLEVAL_FILLFACTOR) / 100.0);
+	if (state->maxpostingsize > reduction)
+		state->maxpostingsize -= reduction;
+	else
+		state->maxpostingsize = 0;
+}
+
+/*
+ * Build a posting list tuple based on caller's "base" index tuple and list of
+ * heap TIDs.  When nhtids == 1, builds a standard non-pivot tuple without a
+ * posting list. (Posting list tuples can never have a single heap TID, partly
+ * because that ensures that deduplication always reduces final MAXALIGN()'d
+ * size of entire tuple.)
+ *
+ * Convention is that posting list starts at a MAXALIGN()'d offset (rather
+ * than a SHORTALIGN()'d offset), in line with the approach taken when
+ * appending a heap TID to new pivot tuple/high key during suffix truncation.
+ * This sometimes wastes a little space that was only needed as alignment
+ * padding in the original tuple.  Following this convention simplifies the
+ * space accounting used when deduplicating a page (the same convention
+ * simplifies the accounting for choosing a point to split a page at).
+ *
+ * Note: Caller's "htids" array must be unique and already in ascending TID
+ * order.  Any existing heap TIDs from "base" won't automatically appear in
+ * returned posting list tuple (they must be included in htids array.)
+ */
+IndexTuple
+_bt_form_posting(IndexTuple base, ItemPointer htids, int nhtids)
+{
+	uint32		keysize,
+				newsize;
+	IndexTuple	itup;
+
+	if (BTreeTupleIsPosting(base))
+		keysize = BTreeTupleGetPostingOffset(base);
+	else
+		keysize = IndexTupleSize(base);
+
+	Assert(!BTreeTupleIsPivot(base));
+	Assert(nhtids > 0 && nhtids <= PG_UINT16_MAX);
+	Assert(keysize == MAXALIGN(keysize));
+
+	/* Determine final size of new tuple */
+	if (nhtids > 1)
+		newsize = MAXALIGN(keysize +
+						   nhtids * sizeof(ItemPointerData));
+	else
+		newsize = keysize;
+
+	Assert(newsize <= INDEX_SIZE_MASK);
+	Assert(newsize == MAXALIGN(newsize));
+
+	/* Allocate memory using palloc0() (matches index_form_tuple()) */
+	itup = palloc0(newsize);
+	memcpy(itup, base, keysize);
+	itup->t_info &= ~INDEX_SIZE_MASK;
+	itup->t_info |= newsize;
+	if (nhtids > 1)
+	{
+		/* Form posting list tuple */
+		BTreeTupleSetPosting(itup, nhtids, keysize);
+		memcpy(BTreeTupleGetPosting(itup), htids,
+			   sizeof(ItemPointerData) * nhtids);
+		Assert(_bt_posting_valid(itup));
+	}
+	else
+	{
+		/* Form standard non-pivot tuple */
+		itup->t_info &= ~INDEX_ALT_TID_MASK;
+		ItemPointerCopy(htids, &itup->t_tid);
+		Assert(ItemPointerIsValid(&itup->t_tid));
+	}
+
+	return itup;
+}
+
+/*
+ * Generate a replacement tuple by "updating" a posting list tuple so that it
+ * no longer has TIDs that need to be deleted.
+ *
+ * Used by VACUUM.  Caller's vacposting argument points to the existing
+ * posting list tuple to be updated.
+ *
+ * On return, caller's vacposting argument will point to final "updated"
+ * tuple, which will be palloc()'d in caller's memory context.
+ */
+void
+_bt_update_posting(BTVacuumPosting vacposting)
+{
+	IndexTuple	origtuple = vacposting->itup;
+	uint32		keysize,
+				newsize;
+	IndexTuple	itup;
+	int			nhtids;
+	int			ui,
+				d;
+	ItemPointer htids;
+
+	nhtids = BTreeTupleGetNPosting(origtuple) - vacposting->ndeletedtids;
+
+	Assert(_bt_posting_valid(origtuple));
+	Assert(nhtids > 0 && nhtids < BTreeTupleGetNPosting(origtuple));
+
+	if (BTreeTupleIsPosting(origtuple))
+		keysize = BTreeTupleGetPostingOffset(origtuple);
+	else
+		keysize = IndexTupleSize(origtuple);
+
+	/*
+	 * Determine final size of new tuple.
+	 *
+	 * This calculation needs to match the code used within _bt_form_posting()
+	 * for new posting list tuples.  We avoid calling _bt_form_posting() here
+	 * to save ourselves a second memory allocation for a htids workspace.
+	 */
+	if (nhtids > 1)
+		newsize = MAXALIGN(keysize +
+						   nhtids * sizeof(ItemPointerData));
+	else
+		newsize = keysize;
+
+	/* Allocate memory using palloc0() (matches index_form_tuple()) */
+	itup = palloc0(newsize);
+	memcpy(itup, origtuple, keysize);
+	itup->t_info &= ~INDEX_SIZE_MASK;
+	itup->t_info |= newsize;
+
+	if (nhtids > 1)
+	{
+		/* Form posting list tuple */
+		BTreeTupleSetPosting(itup, nhtids, keysize);
+		htids = BTreeTupleGetPosting(itup);
+	}
+	else
+	{
+		/* Form standard non-pivot tuple */
+		itup->t_info &= ~INDEX_ALT_TID_MASK;
+		htids = &itup->t_tid;
+	}
+
+	ui = 0;
+	d = 0;
+	for (int i = 0; i < BTreeTupleGetNPosting(origtuple); i++)
+	{
+		if (d < vacposting->ndeletedtids && vacposting->deletetids[d] == i)
+		{
+			d++;
+			continue;
+		}
+		htids[ui++] = *BTreeTupleGetPostingN(origtuple, i);
+	}
+	Assert(ui == nhtids);
+	Assert(d == vacposting->ndeletedtids);
+	Assert(nhtids == 1 || _bt_posting_valid(itup));
+
+	/* vacposting arg's itup will now point to updated version */
+	vacposting->itup = itup;
+}
+
+/*
+ * Prepare for a posting list split by swapping heap TID in newitem with heap
+ * TID from original posting list (the 'oposting' heap TID located at offset
+ * 'postingoff').  Modifies newitem, so caller should pass their own private
+ * copy that can safely be modified.
+ *
+ * Returns new posting list tuple, which is palloc()'d in caller's context.
+ * This is guaranteed to be the same size as 'oposting'.  Modified newitem is
+ * what caller actually inserts. (This happens inside the same critical
+ * section that performs an in-place update of old posting list using new
+ * posting list returned here.)
+ *
+ * While the keys from newitem and oposting must be opclass equal, and must
+ * generate identical output when run through the underlying type's output
+ * function, it doesn't follow that their representations match exactly.
+ * Caller must avoid assuming that there can't be representational differences
+ * that make datums from oposting bigger or smaller than the corresponding
+ * datums from newitem.  For example, differences in TOAST input state might
+ * break a faulty assumption about tuple size (the executor is entitled to
+ * apply TOAST compression based on its own criteria).  It also seems possible
+ * that further representational variation will be introduced in the future,
+ * in order to support nbtree features like page-level prefix compression.
+ *
+ * See nbtree/README for details on the design of posting list splits.
+ */
+IndexTuple
+_bt_swap_posting(IndexTuple newitem, IndexTuple oposting, int postingoff)
+{
+	int			nhtids;
+	char	   *replacepos;
+	char	   *replaceposright;
+	Size		nmovebytes;
+	IndexTuple	nposting;
+
+	nhtids = BTreeTupleGetNPosting(oposting);
+	Assert(_bt_posting_valid(oposting));
+	Assert(postingoff > 0 && postingoff < nhtids);
+
+	/*
+	 * Move item pointers in posting list to make a gap for the new item's
+	 * heap TID.  We shift TIDs one place to the right, losing original
+	 * rightmost TID. (nmovebytes must not include TIDs to the left of
+	 * postingoff, nor the existing rightmost/max TID that gets overwritten.)
+	 */
+	nposting = CopyIndexTuple(oposting);
+	replacepos = (char *) BTreeTupleGetPostingN(nposting, postingoff);
+	replaceposright = (char *) BTreeTupleGetPostingN(nposting, postingoff + 1);
+	nmovebytes = (nhtids - postingoff - 1) * sizeof(ItemPointerData);
+	memmove(replaceposright, replacepos, nmovebytes);
+
+	/* Fill the gap at postingoff with TID of new item (original new TID) */
+	Assert(!BTreeTupleIsPivot(newitem) && !BTreeTupleIsPosting(newitem));
+	ItemPointerCopy(&newitem->t_tid, (ItemPointer) replacepos);
+
+	/* Now copy oposting's rightmost/max TID into new item (final new TID) */
+	ItemPointerCopy(BTreeTupleGetMaxHeapTID(oposting), &newitem->t_tid);
+
+	Assert(ItemPointerCompare(BTreeTupleGetMaxHeapTID(nposting),
+							  BTreeTupleGetHeapTID(newitem)) < 0);
+	Assert(_bt_posting_valid(nposting));
+
+	return nposting;
+}
+
+/*
+ * Verify posting list invariants for "posting", which must be a posting list
+ * tuple.  Used within assertions.
+ */
+#ifdef USE_ASSERT_CHECKING
+static bool
+_bt_posting_valid(IndexTuple posting)
+{
+	ItemPointerData last;
+	ItemPointer htid;
+
+	if (!BTreeTupleIsPosting(posting) || BTreeTupleGetNPosting(posting) < 2)
+		return false;
+
+	/* Remember first heap TID for loop */
+	ItemPointerCopy(BTreeTupleGetHeapTID(posting), &last);
+	if (!ItemPointerIsValid(&last))
+		return false;
+
+	/* Iterate, starting from second TID */
+	for (int i = 1; i < BTreeTupleGetNPosting(posting); i++)
+	{
+		htid = BTreeTupleGetPostingN(posting, i);
+
+		if (!ItemPointerIsValid(htid))
+			return false;
+		if (ItemPointerCompare(htid, &last) <= 0)
+			return false;
+		ItemPointerCopy(htid, &last);
+	}
+
+	return true;
+}
+#endif
diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c
index 4e5849ab8e..b913543221 100644
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -47,10 +47,12 @@ static void _bt_insertonpg(Relation rel, BTScanInsert itup_key,
 						   BTStack stack,
 						   IndexTuple itup,
 						   OffsetNumber newitemoff,
+						   int postingoff,
 						   bool split_only_page);
 static Buffer _bt_split(Relation rel, BTScanInsert itup_key, Buffer buf,
 						Buffer cbuf, OffsetNumber newitemoff, Size newitemsz,
-						IndexTuple newitem);
+						IndexTuple newitem, IndexTuple orignewitem,
+						IndexTuple nposting, uint16 postingoff);
 static void _bt_insert_parent(Relation rel, Buffer buf, Buffer rbuf,
 							  BTStack stack, bool is_root, bool is_only);
 static bool _bt_pgaddtup(Page page, Size itemsize, IndexTuple itup,
@@ -125,6 +127,7 @@ _bt_doinsert(Relation rel, IndexTuple itup,
 	insertstate.itup_key = itup_key;
 	insertstate.bounds_valid = false;
 	insertstate.buf = InvalidBuffer;
+	insertstate.postingoff = 0;
 
 	/*
 	 * It's very common to have an index on an auto-incremented or
@@ -295,7 +298,7 @@ top:
 		newitemoff = _bt_findinsertloc(rel, &insertstate, checkingunique,
 									   stack, heapRel);
 		_bt_insertonpg(rel, itup_key, insertstate.buf, InvalidBuffer, stack,
-					   itup, newitemoff, false);
+					   itup, newitemoff, insertstate.postingoff, false);
 	}
 	else
 	{
@@ -340,6 +343,8 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
 				 uint32 *speculativeToken)
 {
 	IndexTuple	itup = insertstate->itup;
+	IndexTuple	curitup;
+	ItemId		curitemid;
 	BTScanInsert itup_key = insertstate->itup_key;
 	SnapshotData SnapshotDirty;
 	OffsetNumber offset;
@@ -348,6 +353,9 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
 	BTPageOpaque opaque;
 	Buffer		nbuf = InvalidBuffer;
 	bool		found = false;
+	bool		inposting = false;
+	bool		prevalldead = true;
+	int			curposti = 0;
 
 	/* Assume unique until we find a duplicate */
 	*is_unique = true;
@@ -375,13 +383,21 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
 	Assert(itup_key->scantid == NULL);
 	for (;;)
 	{
-		ItemId		curitemid;
-		IndexTuple	curitup;
-		BlockNumber nblkno;
-
 		/*
-		 * make sure the offset points to an actual item before trying to
-		 * examine it...
+		 * Each iteration of the loop processes one heap TID, not one index
+		 * tuple.  Current offset number for page isn't usually advanced on
+		 * iterations that process heap TIDs from posting list tuples.
+		 *
+		 * "inposting" state is set when _inside_ a posting list --- not when
+		 * we're at the start (or end) of a posting list.  We advance curposti
+		 * at the end of the iteration when inside a posting list tuple.  In
+		 * general, every loop iteration either advances the page offset or
+		 * advances curposti --- an iteration that handles the rightmost/max
+		 * heap TID in a posting list finally advances the page offset (and
+		 * unsets "inposting").
+		 *
+		 * Make sure the offset points to an actual index tuple before trying
+		 * to examine it...
 		 */
 		if (offset <= maxoff)
 		{
@@ -406,31 +422,60 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
 				break;
 			}
 
-			curitemid = PageGetItemId(page, offset);
-
 			/*
-			 * We can skip items that are marked killed.
+			 * We can skip items that are already marked killed.
 			 *
 			 * In the presence of heavy update activity an index may contain
 			 * many killed items with the same key; running _bt_compare() on
 			 * each killed item gets expensive.  Just advance over killed
 			 * items as quickly as we can.  We only apply _bt_compare() when
-			 * we get to a non-killed item.  Even those comparisons could be
-			 * avoided (in the common case where there is only one page to
-			 * visit) by reusing bounds, but just skipping dead items is fast
-			 * enough.
+			 * we get to a non-killed item.  We could reuse the bounds to
+			 * avoid _bt_compare() calls for known equal tuples, but it
+			 * doesn't seem worth it.  Workloads with heavy update activity
+			 * tend to have many deduplication passes, so we'll often avoid
+			 * most of those comparisons, too (we call _bt_compare() when the
+			 * posting list tuple is initially encountered, though not when
+			 * processing later TIDs from the same tuple).
 			 */
-			if (!ItemIdIsDead(curitemid))
+			if (!inposting)
+				curitemid = PageGetItemId(page, offset);
+			if (inposting || !ItemIdIsDead(curitemid))
 			{
 				ItemPointerData htid;
 				bool		all_dead;
 
-				if (_bt_compare(rel, itup_key, page, offset) != 0)
-					break;		/* we're past all the equal tuples */
+				if (!inposting)
+				{
+					/* Plain tuple, or first TID in posting list tuple */
+					if (_bt_compare(rel, itup_key, page, offset) != 0)
+						break;	/* we're past all the equal tuples */
 
-				/* okay, we gotta fetch the heap tuple ... */
-				curitup = (IndexTuple) PageGetItem(page, curitemid);
-				htid = curitup->t_tid;
+					/* Advanced curitup */
+					curitup = (IndexTuple) PageGetItem(page, curitemid);
+					Assert(!BTreeTupleIsPivot(curitup));
+				}
+
+				/* okay, we gotta fetch the heap tuple using htid ... */
+				if (!BTreeTupleIsPosting(curitup))
+				{
+					/* ... htid is from simple non-pivot tuple */
+					Assert(!inposting);
+					htid = curitup->t_tid;
+				}
+				else if (!inposting)
+				{
+					/* ... htid is first TID in new posting list */
+					inposting = true;
+					prevalldead = true;
+					curposti = 0;
+					htid = *BTreeTupleGetPostingN(curitup, 0);
+				}
+				else
+				{
+					/* ... htid is second or subsequent TID in posting list */
+					Assert(curposti > 0);
+					htid = *BTreeTupleGetPostingN(curitup, curposti);
+				}
 
 				/*
 				 * If we are doing a recheck, we expect to find the tuple we
@@ -506,8 +551,7 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
 					 * not part of this chain because it had a different index
 					 * entry.
 					 */
-					htid = itup->t_tid;
-					if (table_index_fetch_tuple_check(heapRel, &htid,
+					if (table_index_fetch_tuple_check(heapRel, &itup->t_tid,
 													  SnapshotSelf, NULL))
 					{
 						/* Normal case --- it's still live */
@@ -565,12 +609,14 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
 													RelationGetRelationName(rel))));
 					}
 				}
-				else if (all_dead)
+				else if (all_dead && (!inposting ||
+									  (prevalldead &&
+									   curposti == BTreeTupleGetNPosting(curitup) - 1)))
 				{
 					/*
-					 * The conflicting tuple (or whole HOT chain) is dead to
-					 * everyone, so we may as well mark the index entry
-					 * killed.
+					 * The conflicting tuple (or all HOT chains pointed to by
+					 * all posting list TIDs) is dead to everyone, so mark the
+					 * index entry killed.
 					 */
 					ItemIdMarkDead(curitemid);
 					opaque->btpo_flags |= BTP_HAS_GARBAGE;
@@ -584,14 +630,29 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
 					else
 						MarkBufferDirtyHint(insertstate->buf, true);
 				}
+
+				/*
+				 * Remember if posting list tuple has even a single HOT chain
+				 * whose members are not all dead
+				 */
+				if (!all_dead && inposting)
+					prevalldead = false;
 			}
 		}
 
-		/*
-		 * Advance to next tuple to continue checking.
-		 */
-		if (offset < maxoff)
+		if (inposting && curposti < BTreeTupleGetNPosting(curitup) - 1)
+		{
+			/* Advance to next TID in same posting list */
+			curposti++;
+			continue;
+		}
+		else if (offset < maxoff)
+		{
+			/* Advance to next tuple */
+			curposti = 0;
+			inposting = false;
 			offset = OffsetNumberNext(offset);
+		}
 		else
 		{
 			int			highkeycmp;
@@ -606,7 +667,8 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
 			/* Advance to next non-dead page --- there must be one */
 			for (;;)
 			{
-				nblkno = opaque->btpo_next;
+				BlockNumber nblkno = opaque->btpo_next;
+
 				nbuf = _bt_relandgetbuf(rel, nbuf, nblkno, BT_READ);
 				page = BufferGetPage(nbuf);
 				opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -616,6 +678,9 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
 					elog(ERROR, "fell off the end of index \"%s\"",
 						 RelationGetRelationName(rel));
 			}
+			/* Will also advance to next tuple */
+			curposti = 0;
+			inposting = false;
 			maxoff = PageGetMaxOffsetNumber(page);
 			offset = P_FIRSTDATAKEY(opaque);
 			/* Don't invalidate binary search bounds */
@@ -684,6 +749,7 @@ _bt_findinsertloc(Relation rel,
 	BTScanInsert itup_key = insertstate->itup_key;
 	Page		page = BufferGetPage(insertstate->buf);
 	BTPageOpaque lpageop;
+	OffsetNumber newitemoff;
 
 	lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
 
@@ -696,9 +762,13 @@ _bt_findinsertloc(Relation rel,
 	Assert(!insertstate->bounds_valid || checkingunique);
 	Assert(!itup_key->heapkeyspace || itup_key->scantid != NULL);
 	Assert(itup_key->heapkeyspace || itup_key->scantid == NULL);
+	Assert(!itup_key->allequalimage || itup_key->heapkeyspace);
 
 	if (itup_key->heapkeyspace)
 	{
+		/* Keep track of whether checkingunique duplicate seen */
+		bool		uniquedup = false;
+
 		/*
 		 * If we're inserting into a unique index, we may have to walk right
 		 * through leaf pages to find the one leaf page that we must insert on
@@ -715,6 +785,13 @@ _bt_findinsertloc(Relation rel,
 		 */
 		if (checkingunique)
 		{
+			if (insertstate->low < insertstate->stricthigh)
+			{
+				/* Encountered a duplicate in _bt_check_unique() */
+				Assert(insertstate->bounds_valid);
+				uniquedup = true;
+			}
+
 			for (;;)
 			{
 				/*
@@ -741,18 +818,43 @@ _bt_findinsertloc(Relation rel,
 				/* Update local state after stepping right */
 				page = BufferGetPage(insertstate->buf);
 				lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
+				/* Assume duplicates (if checkingunique) */
+				uniquedup = true;
 			}
 		}
 
 		/*
 		 * If the target page is full, see if we can obtain enough space by
-		 * erasing LP_DEAD items
+		 * erasing LP_DEAD items.  If that fails to free enough space, see if
+		 * we can avoid a page split by performing a deduplication pass over
+		 * the page.
+		 *
+		 * We only perform a deduplication pass for a checkingunique caller
+		 * when the incoming item is a duplicate of an existing item on the
+		 * leaf page.  This heuristic avoids wasting cycles -- we only expect
+		 * to benefit from deduplicating a unique index page when most or all
+		 * recently added items are duplicates.  See nbtree/README.
 		 */
-		if (PageGetFreeSpace(page) < insertstate->itemsz &&
-			P_HAS_GARBAGE(lpageop))
+		if (PageGetFreeSpace(page) < insertstate->itemsz)
 		{
-			_bt_vacuum_one_page(rel, insertstate->buf, heapRel);
-			insertstate->bounds_valid = false;
+			if (P_HAS_GARBAGE(lpageop))
+			{
+				_bt_vacuum_one_page(rel, insertstate->buf, heapRel);
+				insertstate->bounds_valid = false;
+
+				/* Might as well assume duplicates (if checkingunique) */
+				uniquedup = true;
+			}
+
+			if (itup_key->allequalimage && BTGetDeduplicateItems(rel) &&
+				(!checkingunique || uniquedup) &&
+				PageGetFreeSpace(page) < insertstate->itemsz)
+			{
+				_bt_dedup_one_page(rel, insertstate->buf, heapRel,
+								   insertstate->itup, insertstate->itemsz,
+								   checkingunique);
+				insertstate->bounds_valid = false;
+			}
 		}
 	}
 	else
@@ -834,7 +936,30 @@ _bt_findinsertloc(Relation rel,
 	Assert(P_RIGHTMOST(lpageop) ||
 		   _bt_compare(rel, itup_key, page, P_HIKEY) <= 0);
 
-	return _bt_binsrch_insert(rel, insertstate);
+	newitemoff = _bt_binsrch_insert(rel, insertstate);
+
+	if (insertstate->postingoff == -1)
+	{
+		/*
+		 * There is an overlapping posting list tuple with its LP_DEAD bit
+		 * set.  We don't want to unnecessarily unset its LP_DEAD bit while
+		 * performing a posting list split, so delete all LP_DEAD items early.
+		 * This is the only case where LP_DEAD deletes happen even though
+		 * there is space for newitem on the page.
+		 */
+		_bt_vacuum_one_page(rel, insertstate->buf, heapRel);
+
+		/*
+		 * Do new binary search.  New insert location cannot overlap with any
+		 * posting list now.
+		 */
+		insertstate->bounds_valid = false;
+		insertstate->postingoff = 0;
+		newitemoff = _bt_binsrch_insert(rel, insertstate);
+		Assert(insertstate->postingoff == 0);
+	}
+
+	return newitemoff;
 }
 
 /*
@@ -900,10 +1025,12 @@ _bt_stepright(Relation rel, BTInsertState insertstate, BTStack stack)
  *
  *		This recursive procedure does the following things:
  *
+ *			+  if postingoff != 0, splits existing posting list tuple
+ *			   (since it overlaps with new 'itup' tuple).
  *			+  if necessary, splits the target page, using 'itup_key' for
  *			   suffix truncation on leaf pages (caller passes NULL for
  *			   non-leaf pages).
- *			+  inserts the tuple.
+ *			+  inserts the new tuple (might be split from posting list).
  *			+  if the page was split, pops the parent stack, and finds the
  *			   right place to insert the new child pointer (by walking
  *			   right using information stored in the parent stack).
@@ -931,11 +1058,15 @@ _bt_insertonpg(Relation rel,
 			   BTStack stack,
 			   IndexTuple itup,
 			   OffsetNumber newitemoff,
+			   int postingoff,
 			   bool split_only_page)
 {
 	Page		page;
 	BTPageOpaque lpageop;
 	Size		itemsz;
+	IndexTuple	oposting;
+	IndexTuple	origitup = NULL;
+	IndexTuple	nposting = NULL;
 
 	page = BufferGetPage(buf);
 	lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -949,6 +1080,7 @@ _bt_insertonpg(Relation rel,
 	Assert(P_ISLEAF(lpageop) ||
 		   BTreeTupleGetNAtts(itup, rel) <=
 		   IndexRelationGetNumberOfKeyAttributes(rel));
+	Assert(!BTreeTupleIsPosting(itup));
 
 	/* The caller should've finished any incomplete splits already. */
 	if (P_INCOMPLETE_SPLIT(lpageop))
@@ -959,6 +1091,34 @@ _bt_insertonpg(Relation rel,
 	itemsz = MAXALIGN(itemsz);	/* be safe, PageAddItem will do this but we
 								 * need to be consistent */
 
+	/*
+	 * Do we need to split an existing posting list item?
+	 */
+	if (postingoff != 0)
+	{
+		ItemId		itemid = PageGetItemId(page, newitemoff);
+
+		/*
+		 * The new tuple is a duplicate with a heap TID that falls inside the
+		 * range of an existing posting list tuple on a leaf page.  Prepare to
+		 * split an existing posting list.  Overwriting the posting list with
+		 * its post-split version is treated as an extra step in either the
+		 * insert or page split critical section.
+		 */
+		Assert(P_ISLEAF(lpageop) && !ItemIdIsDead(itemid));
+		Assert(itup_key->heapkeyspace && itup_key->allequalimage);
+		oposting = (IndexTuple) PageGetItem(page, itemid);
+
+		/* use a mutable copy of itup as our itup from here on */
+		origitup = itup;
+		itup = CopyIndexTuple(origitup);
+		nposting = _bt_swap_posting(itup, oposting, postingoff);
+		/* itup now contains rightmost/max TID from oposting */
+
+		/* Alter offset so that newitem goes after posting list */
+		newitemoff = OffsetNumberNext(newitemoff);
+	}
+
 	/*
 	 * Do we need to split the page to fit the item on it?
 	 *
@@ -991,7 +1151,8 @@ _bt_insertonpg(Relation rel,
 				 BlockNumberIsValid(RelationGetTargetBlock(rel))));
 
 		/* split the buffer into left and right halves */
-		rbuf = _bt_split(rel, itup_key, buf, cbuf, newitemoff, itemsz, itup);
+		rbuf = _bt_split(rel, itup_key, buf, cbuf, newitemoff, itemsz, itup,
+						 origitup, nposting, postingoff);
 		PredicateLockPageSplit(rel,
 							   BufferGetBlockNumber(buf),
 							   BufferGetBlockNumber(rbuf));
@@ -1066,6 +1227,9 @@ _bt_insertonpg(Relation rel,
 		/* Do the update.  No ereport(ERROR) until changes are logged */
 		START_CRIT_SECTION();
 
+		if (postingoff != 0)
+			memcpy(oposting, nposting, MAXALIGN(IndexTupleSize(nposting)));
+
 		if (!_bt_pgaddtup(page, itemsz, itup, newitemoff))
 			elog(PANIC, "failed to add new item to block %u in index \"%s\"",
 				 itup_blkno, RelationGetRelationName(rel));
@@ -1115,8 +1279,19 @@ _bt_insertonpg(Relation rel,
 			XLogBeginInsert();
 			XLogRegisterData((char *) &xlrec, SizeOfBtreeInsert);
 
-			if (P_ISLEAF(lpageop))
+			if (P_ISLEAF(lpageop) && postingoff == 0)
+			{
+				/* Simple leaf insert */
 				xlinfo = XLOG_BTREE_INSERT_LEAF;
+			}
+			else if (postingoff != 0)
+			{
+				/*
+				 * Leaf insert with posting list split.  Must include
+				 * postingoff field before newitem/orignewitem.
+				 */
+				xlinfo = XLOG_BTREE_INSERT_POST;
+			}
 			else
 			{
 				/*
@@ -1139,6 +1314,7 @@ _bt_insertonpg(Relation rel,
 				xlmeta.oldest_btpo_xact = metad->btm_oldest_btpo_xact;
 				xlmeta.last_cleanup_num_heap_tuples =
 					metad->btm_last_cleanup_num_heap_tuples;
+				xlmeta.allequalimage = metad->btm_allequalimage;
 
 				XLogRegisterBuffer(2, metabuf, REGBUF_WILL_INIT | REGBUF_STANDARD);
 				XLogRegisterBufData(2, (char *) &xlmeta, sizeof(xl_btree_metadata));
@@ -1147,7 +1323,27 @@ _bt_insertonpg(Relation rel,
 			}
 
 			XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
-			XLogRegisterBufData(0, (char *) itup, IndexTupleSize(itup));
+			if (postingoff == 0)
+			{
+				/* Simple, common case -- log itup from caller */
+				XLogRegisterBufData(0, (char *) itup, IndexTupleSize(itup));
+			}
+			else
+			{
+				/*
+				 * Insert with posting list split (XLOG_BTREE_INSERT_POST
+				 * record) case.
+				 *
+				 * Log postingoff.  Also log origitup, not itup.  REDO routine
+				 * must reconstruct final itup (as well as nposting) using
+				 * _bt_swap_posting().
+				 */
+				uint16		upostingoff = postingoff;
+
+				XLogRegisterBufData(0, (char *) &upostingoff, sizeof(uint16));
+				XLogRegisterBufData(0, (char *) origitup,
+									IndexTupleSize(origitup));
+			}
 
 			recptr = XLogInsert(RM_BTREE_ID, xlinfo);
 
@@ -1189,6 +1385,14 @@ _bt_insertonpg(Relation rel,
 			_bt_getrootheight(rel) >= BTREE_FASTPATH_MIN_LEVEL)
 			RelationSetTargetBlock(rel, cachedBlock);
 	}
+
+	/* be tidy */
+	if (postingoff != 0)
+	{
+		/* itup is actually a modified copy of caller's original */
+		pfree(nposting);
+		pfree(itup);
+	}
 }
 
 /*
@@ -1204,12 +1408,24 @@ _bt_insertonpg(Relation rel,
  *		This function will clear the INCOMPLETE_SPLIT flag on it, and
  *		release the buffer.
  *
+ *		orignewitem, nposting, and postingoff are needed when an insert of
+ *		orignewitem results in both a posting list split and a page split.
+ *		These extra posting list split details are used here in the same
+ *		way as they are used in the more common case where a posting list
+ *		split does not coincide with a page split.  We need to deal with
+ *		posting list splits directly in order to ensure that everything
+ *		that follows from the insert of orignewitem is handled as a single
+ *		atomic operation (though caller's insert of a new pivot/downlink
+ *		into parent page will still be a separate operation).  See
+ *		nbtree/README for details on the design of posting list splits.
+ *
  *		Returns the new right sibling of buf, pinned and write-locked.
  *		The pin and lock on buf are maintained.
  */
 static Buffer
 _bt_split(Relation rel, BTScanInsert itup_key, Buffer buf, Buffer cbuf,
-		  OffsetNumber newitemoff, Size newitemsz, IndexTuple newitem)
+		  OffsetNumber newitemoff, Size newitemsz, IndexTuple newitem,
+		  IndexTuple orignewitem, IndexTuple nposting, uint16 postingoff)
 {
 	Buffer		rbuf;
 	Page		origpage;
@@ -1229,6 +1445,7 @@ _bt_split(Relation rel, BTScanInsert itup_key, Buffer buf, Buffer cbuf,
 	OffsetNumber leftoff,
 				rightoff;
 	OffsetNumber firstright;
+	OffsetNumber origpagepostingoff;
 	OffsetNumber maxoff;
 	OffsetNumber i;
 	bool		newitemonleft,
@@ -1298,6 +1515,34 @@ _bt_split(Relation rel, BTScanInsert itup_key, Buffer buf, Buffer cbuf,
 	PageSetLSN(leftpage, PageGetLSN(origpage));
 	isleaf = P_ISLEAF(oopaque);
 
+	/*
+	 * Determine page offset number of existing overlapped-with-orignewitem
+	 * posting list when it is necessary to perform a posting list split in
+	 * passing.  Note that newitem was already changed by caller (newitem no
+	 * longer has the orignewitem TID).
+	 *
+	 * This page offset number (origpagepostingoff) will be used to pretend
+	 * that the posting split has already taken place, even though the
+	 * required modifications to origpage won't occur until we reach the
+	 * critical section.  The lastleft and firstright tuples of our page split
+	 * point should, in effect, come from an imaginary version of origpage
+	 * that has the nposting tuple instead of the original posting list tuple.
+	 *
+	 * Note: _bt_findsplitloc() should have compensated for coinciding posting
+	 * list splits in just the same way, at least in theory.  It doesn't
+	 * bother with that, though.  In practice it won't affect its choice of
+	 * split point.
+	 */
+	origpagepostingoff = InvalidOffsetNumber;
+	if (postingoff != 0)
+	{
+		Assert(isleaf);
+		Assert(ItemPointerCompare(&orignewitem->t_tid,
+								  &newitem->t_tid) < 0);
+		Assert(BTreeTupleIsPosting(nposting));
+		origpagepostingoff = OffsetNumberPrev(newitemoff);
+	}
+
 	/*
 	 * The "high key" for the new left page will be the first key that's going
 	 * to go into the new right page, or a truncated version if this is a leaf
@@ -1335,6 +1580,8 @@ _bt_split(Relation rel, BTScanInsert itup_key, Buffer buf, Buffer cbuf,
 		itemid = PageGetItemId(origpage, firstright);
 		itemsz = ItemIdGetLength(itemid);
 		item = (IndexTuple) PageGetItem(origpage, itemid);
+		if (firstright == origpagepostingoff)
+			item = nposting;
 	}
 
 	/*
@@ -1368,6 +1615,8 @@ _bt_split(Relation rel, BTScanInsert itup_key, Buffer buf, Buffer cbuf,
 			Assert(lastleftoff >= P_FIRSTDATAKEY(oopaque));
 			itemid = PageGetItemId(origpage, lastleftoff);
 			lastleft = (IndexTuple) PageGetItem(origpage, itemid);
+			if (lastleftoff == origpagepostingoff)
+				lastleft = nposting;
 		}
 
 		Assert(lastleft != item);
@@ -1383,6 +1632,7 @@ _bt_split(Relation rel, BTScanInsert itup_key, Buffer buf, Buffer cbuf,
 	 */
 	leftoff = P_HIKEY;
 
+	Assert(BTreeTupleIsPivot(lefthikey) || !itup_key->heapkeyspace);
 	Assert(BTreeTupleGetNAtts(lefthikey, rel) > 0);
 	Assert(BTreeTupleGetNAtts(lefthikey, rel) <= indnkeyatts);
 	if (PageAddItem(leftpage, (Item) lefthikey, itemsz, leftoff,
@@ -1447,6 +1697,7 @@ _bt_split(Relation rel, BTScanInsert itup_key, Buffer buf, Buffer cbuf,
 		itemid = PageGetItemId(origpage, P_HIKEY);
 		itemsz = ItemIdGetLength(itemid);
 		item = (IndexTuple) PageGetItem(origpage, itemid);
+		Assert(BTreeTupleIsPivot(item) || !itup_key->heapkeyspace);
 		Assert(BTreeTupleGetNAtts(item, rel) > 0);
 		Assert(BTreeTupleGetNAtts(item, rel) <= indnkeyatts);
 		if (PageAddItem(rightpage, (Item) item, itemsz, rightoff,
@@ -1475,8 +1726,16 @@ _bt_split(Relation rel, BTScanInsert itup_key, Buffer buf, Buffer cbuf,
 		itemsz = ItemIdGetLength(itemid);
 		item = (IndexTuple) PageGetItem(origpage, itemid);
 
+		/* replace original item with nposting due to posting split? */
+		if (i == origpagepostingoff)
+		{
+			Assert(BTreeTupleIsPosting(item));
+			Assert(itemsz == MAXALIGN(IndexTupleSize(nposting)));
+			item = nposting;
+		}
+
 		/* does new item belong before this one? */
-		if (i == newitemoff)
+		else if (i == newitemoff)
 		{
 			if (newitemonleft)
 			{
@@ -1645,8 +1904,12 @@ _bt_split(Relation rel, BTScanInsert itup_key, Buffer buf, Buffer cbuf,
 		XLogRecPtr	recptr;
 
 		xlrec.level = ropaque->btpo.level;
+		/* See comments below on newitem, orignewitem, and posting lists */
 		xlrec.firstright = firstright;
 		xlrec.newitemoff = newitemoff;
+		xlrec.postingoff = 0;
+		if (postingoff != 0 && origpagepostingoff < firstright)
+			xlrec.postingoff = postingoff;
 
 		XLogBeginInsert();
 		XLogRegisterData((char *) &xlrec, SizeOfBtreeSplit);
@@ -1665,11 +1928,35 @@ _bt_split(Relation rel, BTScanInsert itup_key, Buffer buf, Buffer cbuf,
 		 * because it's included with all the other items on the right page.)
 		 * Show the new item as belonging to the left page buffer, so that it
 		 * is not stored if XLogInsert decides it needs a full-page image of
-		 * the left page.  We store the offset anyway, though, to support
-		 * archive compression of these records.
+		 * the left page.  We always store newitemoff in the record, though.
+		 *
+		 * The details are sometimes slightly different for page splits that
+		 * coincide with a posting list split.  If both the replacement
+		 * posting list and newitem go on the right page, then we don't need
+		 * to log anything extra, just like the simple !newitemonleft
+		 * no-posting-split case (postingoff is set to zero in the WAL record,
+		 * so recovery doesn't need to process a posting list split at all).
+		 * Otherwise, we set postingoff and log orignewitem instead of
+		 * newitem, despite having actually inserted newitem.  REDO routine
+		 * must reconstruct nposting and newitem using _bt_swap_posting().
+		 *
+		 * Note: It's possible that our page split point is the point that
+		 * makes the posting list lastleft and newitem firstright.  This is
+		 * the only case where we log orignewitem/newitem despite newitem
+		 * going on the right page.  If XLogInsert decides that it can omit
+		 * orignewitem due to logging a full-page image of the left page,
+		 * everything still works out, since recovery only needs to log
+		 * orignewitem for items on the left page (just like the regular
+		 * newitem-logged case).
 		 */
-		if (newitemonleft)
+		if (newitemonleft && xlrec.postingoff == 0)
 			XLogRegisterBufData(0, (char *) newitem, MAXALIGN(newitemsz));
+		else if (xlrec.postingoff != 0)
+		{
+			Assert(newitemonleft || firstright == newitemoff);
+			Assert(MAXALIGN(newitemsz) == IndexTupleSize(orignewitem));
+			XLogRegisterBufData(0, (char *) orignewitem, MAXALIGN(newitemsz));
+		}
 
 		/* Log the left page's new high key */
 		itemid = PageGetItemId(origpage, P_HIKEY);
@@ -1829,7 +2116,7 @@ _bt_insert_parent(Relation rel,
 
 		/* Recursively insert into the parent */
 		_bt_insertonpg(rel, NULL, pbuf, buf, stack->bts_parent,
-					   new_item, stack->bts_offset + 1,
+					   new_item, stack->bts_offset + 1, 0,
 					   is_only);
 
 		/* be tidy */
@@ -2185,6 +2472,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
 		md.fastlevel = metad->btm_level;
 		md.oldest_btpo_xact = metad->btm_oldest_btpo_xact;
 		md.last_cleanup_num_heap_tuples = metad->btm_last_cleanup_num_heap_tuples;
+		md.allequalimage = metad->btm_allequalimage;
 
 		XLogRegisterBufData(2, (char *) &md, sizeof(xl_btree_metadata));
 
@@ -2265,7 +2553,7 @@ _bt_pgaddtup(Page page,
 static void
 _bt_vacuum_one_page(Relation rel, Buffer buffer, Relation heapRel)
 {
-	OffsetNumber deletable[MaxOffsetNumber];
+	OffsetNumber deletable[MaxIndexTuplesPerPage];
 	int			ndeletable = 0;
 	OffsetNumber offnum,
 				minoff,
@@ -2298,6 +2586,6 @@ _bt_vacuum_one_page(Relation rel, Buffer buffer, Relation heapRel)
 	 * Note: if we didn't find any LP_DEAD items, then the page's
 	 * BTP_HAS_GARBAGE hint bit is falsely set.  We do not bother expending a
 	 * separate write to clear it, however.  We will clear it when we split
-	 * the page.
+	 * the page, or when deduplication runs.
 	 */
 }
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index f05cbe7467..39b8f17f4b 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -24,6 +24,7 @@
 
 #include "access/nbtree.h"
 #include "access/nbtxlog.h"
+#include "access/tableam.h"
 #include "access/transam.h"
 #include "access/xlog.h"
 #include "access/xloginsert.h"
@@ -37,6 +38,8 @@ static BTMetaPageData *_bt_getmeta(Relation rel, Buffer metabuf);
 static bool _bt_mark_page_halfdead(Relation rel, Buffer buf, BTStack stack);
 static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf,
 									 bool *rightsib_empty);
+static TransactionId _bt_xid_horizon(Relation rel, Relation heapRel, Page page,
+									 OffsetNumber *deletable, int ndeletable);
 static bool _bt_lock_branch_parent(Relation rel, BlockNumber child,
 								   BTStack stack, Buffer *topparent, OffsetNumber *topoff,
 								   BlockNumber *target, BlockNumber *rightsib);
@@ -47,7 +50,8 @@ static void _bt_log_reuse_page(Relation rel, BlockNumber blkno,
  *	_bt_initmetapage() -- Fill a page buffer with a correct metapage image
  */
 void
-_bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level)
+_bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level,
+				 bool allequalimage)
 {
 	BTMetaPageData *metad;
 	BTPageOpaque metaopaque;
@@ -63,6 +67,7 @@ _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level)
 	metad->btm_fastlevel = level;
 	metad->btm_oldest_btpo_xact = InvalidTransactionId;
 	metad->btm_last_cleanup_num_heap_tuples = -1.0;
+	metad->btm_allequalimage = allequalimage;
 
 	metaopaque = (BTPageOpaque) PageGetSpecialPointer(page);
 	metaopaque->btpo_flags = BTP_META;
@@ -102,6 +107,9 @@ _bt_upgrademetapage(Page page)
 	metad->btm_version = BTREE_NOVAC_VERSION;
 	metad->btm_oldest_btpo_xact = InvalidTransactionId;
 	metad->btm_last_cleanup_num_heap_tuples = -1.0;
+	/* Only a REINDEX can set this field */
+	Assert(!metad->btm_allequalimage);
+	metad->btm_allequalimage = false;
 
 	/* Adjust pd_lower (see _bt_initmetapage() for details) */
 	((PageHeader) page)->pd_lower =
@@ -213,6 +221,7 @@ _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact,
 		md.fastlevel = metad->btm_fastlevel;
 		md.oldest_btpo_xact = oldestBtpoXact;
 		md.last_cleanup_num_heap_tuples = numHeapTuples;
+		md.allequalimage = metad->btm_allequalimage;
 
 		XLogRegisterBufData(0, (char *) &md, sizeof(xl_btree_metadata));
 
@@ -274,6 +283,8 @@ _bt_getroot(Relation rel, int access)
 		Assert(metad->btm_magic == BTREE_MAGIC);
 		Assert(metad->btm_version >= BTREE_MIN_VERSION);
 		Assert(metad->btm_version <= BTREE_VERSION);
+		Assert(!metad->btm_allequalimage ||
+			   metad->btm_version > BTREE_NOVAC_VERSION);
 		Assert(metad->btm_root != P_NONE);
 
 		rootblkno = metad->btm_fastroot;
@@ -394,6 +405,7 @@ _bt_getroot(Relation rel, int access)
 			md.fastlevel = 0;
 			md.oldest_btpo_xact = InvalidTransactionId;
 			md.last_cleanup_num_heap_tuples = -1.0;
+			md.allequalimage = metad->btm_allequalimage;
 
 			XLogRegisterBufData(2, (char *) &md, sizeof(xl_btree_metadata));
 
@@ -618,22 +630,34 @@ _bt_getrootheight(Relation rel)
 	Assert(metad->btm_magic == BTREE_MAGIC);
 	Assert(metad->btm_version >= BTREE_MIN_VERSION);
 	Assert(metad->btm_version <= BTREE_VERSION);
+	Assert(!metad->btm_allequalimage ||
+		   metad->btm_version > BTREE_NOVAC_VERSION);
 	Assert(metad->btm_fastroot != P_NONE);
 
 	return metad->btm_fastlevel;
 }
 
 /*
- *	_bt_heapkeyspace() -- is heap TID being treated as a key?
+ *	_bt_metaversion() -- Get version/status info from metapage.
+ *
+ *		Sets caller's *heapkeyspace and *allequalimage arguments using data
+ *		from the B-Tree metapage (could be locally-cached version).  This
+ *		information needs to be stashed in insertion scankey, so we provide a
+ *		single function that fetches both at once.
  *
  *		This is used to determine the rules that must be used to descend a
  *		btree.  Version 4 indexes treat heap TID as a tiebreaker attribute.
  *		pg_upgrade'd version 3 indexes need extra steps to preserve reasonable
  *		performance when inserting a new BTScanInsert-wise duplicate tuple
  *		among many leaf pages already full of such duplicates.
+ *
+ *		Also sets allequalimage field, which indicates whether or not it is
+ *		safe to apply deduplication.  We rely on the assumption that
+ *		btm_allequalimage will be zero'ed on heapkeyspace indexes that were
+ *		pg_upgrade'd from Postgres 12.
  */
-bool
-_bt_heapkeyspace(Relation rel)
+void
+_bt_metaversion(Relation rel, bool *heapkeyspace, bool *allequalimage)
 {
 	BTMetaPageData *metad;
 
@@ -651,10 +675,11 @@ _bt_heapkeyspace(Relation rel)
 		 */
 		if (metad->btm_root == P_NONE)
 		{
-			uint32		btm_version = metad->btm_version;
+			*heapkeyspace = metad->btm_version > BTREE_NOVAC_VERSION;
+			*allequalimage = metad->btm_allequalimage;
 
 			_bt_relbuf(rel, metabuf);
-			return btm_version > BTREE_NOVAC_VERSION;
+			return;
 		}
 
 		/*
@@ -678,9 +703,12 @@ _bt_heapkeyspace(Relation rel)
 	Assert(metad->btm_magic == BTREE_MAGIC);
 	Assert(metad->btm_version >= BTREE_MIN_VERSION);
 	Assert(metad->btm_version <= BTREE_VERSION);
+	Assert(!metad->btm_allequalimage ||
+		   metad->btm_version > BTREE_NOVAC_VERSION);
 	Assert(metad->btm_fastroot != P_NONE);
 
-	return metad->btm_version > BTREE_NOVAC_VERSION;
+	*heapkeyspace = metad->btm_version > BTREE_NOVAC_VERSION;
+	*allequalimage = metad->btm_allequalimage;
 }
 
 /*
@@ -964,28 +992,106 @@ _bt_page_recyclable(Page page)
  * Delete item(s) from a btree leaf page during VACUUM.
  *
  * This routine assumes that the caller has a super-exclusive write lock on
- * the buffer.  Also, the given deletable array *must* be sorted in ascending
- * order.
+ * the buffer.  Also, the given deletable and updatable arrays *must* be
+ * sorted in ascending order.
+ *
+ * Routine deals with deleting TIDs when some (but not all) of the heap TIDs
+ * in an existing posting list item are to be removed by VACUUM.  This works
+ * by updating/overwriting an existing item with caller's new version of the
+ * item (a version that lacks the TIDs that are to be deleted).
  *
  * We record VACUUMs and b-tree deletes differently in WAL.  Deletes must
  * generate their own latestRemovedXid by accessing the heap directly, whereas
- * VACUUMs rely on the initial heap scan taking care of it indirectly.
+ * VACUUMs rely on the initial heap scan taking care of it indirectly.  Also,
+ * only VACUUM can perform granular deletes of individual TIDs in posting list
+ * tuples.
  */
 void
 _bt_delitems_vacuum(Relation rel, Buffer buf,
-					OffsetNumber *deletable, int ndeletable)
+					OffsetNumber *deletable, int ndeletable,
+					BTVacuumPosting *updatable, int nupdatable)
 {
 	Page		page = BufferGetPage(buf);
 	BTPageOpaque opaque;
+	Size		itemsz;
+	char	   *updatedbuf = NULL;
+	Size		updatedbuflen = 0;
+	OffsetNumber updatedoffsets[MaxIndexTuplesPerPage];
 
 	/* Shouldn't be called unless there's something to do */
-	Assert(ndeletable > 0);
+	Assert(ndeletable > 0 || nupdatable > 0);
+
+	for (int i = 0; i < nupdatable; i++)
+	{
+		/* Replace work area IndexTuple with updated version */
+		_bt_update_posting(updatable[i]);
+
+		/* Maintain array of updatable page offsets for WAL record */
+		updatedoffsets[i] = updatable[i]->updatedoffset;
+	}
+
+	/* XLOG stuff -- allocate and fill buffer before critical section */
+	if (nupdatable > 0 && RelationNeedsWAL(rel))
+	{
+		Size		offset = 0;
+
+		for (int i = 0; i < nupdatable; i++)
+		{
+			BTVacuumPosting vacposting = updatable[i];
+
+			itemsz = SizeOfBtreeUpdate +
+				vacposting->ndeletedtids * sizeof(uint16);
+			updatedbuflen += itemsz;
+		}
+
+		updatedbuf = palloc(updatedbuflen);
+		for (int i = 0; i < nupdatable; i++)
+		{
+			BTVacuumPosting vacposting = updatable[i];
+			xl_btree_update update;
+
+			update.ndeletedtids = vacposting->ndeletedtids;
+			memcpy(updatedbuf + offset, &update.ndeletedtids,
+				   SizeOfBtreeUpdate);
+			offset += SizeOfBtreeUpdate;
+
+			itemsz = update.ndeletedtids * sizeof(uint16);
+			memcpy(updatedbuf + offset, vacposting->deletetids, itemsz);
+			offset += itemsz;
+		}
+	}
 
 	/* No ereport(ERROR) until changes are logged */
 	START_CRIT_SECTION();
 
-	/* Fix the page */
-	PageIndexMultiDelete(page, deletable, ndeletable);
+	/*
+	 * Handle posting tuple updates.
+	 *
+	 * Deliberately do this before handling simple deletes.  If we did it the
+	 * other way around (i.e. WAL record order -- simple deletes before
+	 * updates) then we'd have to make compensating changes to the 'updatable'
+	 * array of offset numbers.
+	 *
+	 * PageIndexTupleOverwrite() won't unset each item's LP_DEAD bit when it
+	 * happens to already be set.  Although we unset the BTP_HAS_GARBAGE page
+	 * level flag, unsetting individual LP_DEAD bits should still be avoided.
+	 */
+	for (int i = 0; i < nupdatable; i++)
+	{
+		OffsetNumber updatedoffset = updatedoffsets[i];
+		IndexTuple	itup;
+
+		itup = updatable[i]->itup;
+		itemsz = MAXALIGN(IndexTupleSize(itup));
+		if (!PageIndexTupleOverwrite(page, updatedoffset, (Item) itup,
+									 itemsz))
+			elog(PANIC, "failed to update partially dead item in block %u of index \"%s\"",
+				 BufferGetBlockNumber(buf), RelationGetRelationName(rel));
+	}
+
+	/* Now handle simple deletes of entire tuples */
+	if (ndeletable > 0)
+		PageIndexMultiDelete(page, deletable, ndeletable);
 
 	/*
 	 * We can clear the vacuum cycle ID since this page has certainly been
@@ -1006,7 +1112,9 @@ _bt_delitems_vacuum(Relation rel, Buffer buf,
 	 * limited, since we never falsely unset an LP_DEAD bit.  Workloads that
 	 * are particularly dependent on LP_DEAD bits being set quickly will
 	 * usually manage to set the BTP_HAS_GARBAGE flag before the page fills up
-	 * again anyway.
+	 * again anyway.  Furthermore, attempting a deduplication pass will remove
+	 * all LP_DEAD items, regardless of whether the BTP_HAS_GARBAGE hint bit
+	 * is set or not.
 	 */
 	opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
 
@@ -1019,18 +1127,22 @@ _bt_delitems_vacuum(Relation rel, Buffer buf,
 		xl_btree_vacuum xlrec_vacuum;
 
 		xlrec_vacuum.ndeleted = ndeletable;
+		xlrec_vacuum.nupdated = nupdatable;
 
 		XLogBeginInsert();
 		XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
 		XLogRegisterData((char *) &xlrec_vacuum, SizeOfBtreeVacuum);
 
-		/*
-		 * The deletable array is not in the buffer, but pretend that it is.
-		 * When XLogInsert stores the whole buffer, the array need not be
-		 * stored too.
-		 */
-		XLogRegisterBufData(0, (char *) deletable,
-							ndeletable * sizeof(OffsetNumber));
+		if (ndeletable > 0)
+			XLogRegisterBufData(0, (char *) deletable,
+								ndeletable * sizeof(OffsetNumber));
+
+		if (nupdatable > 0)
+		{
+			XLogRegisterBufData(0, (char *) updatedoffsets,
+								nupdatable * sizeof(OffsetNumber));
+			XLogRegisterBufData(0, updatedbuf, updatedbuflen);
+		}
 
 		recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM);
 
@@ -1038,6 +1150,13 @@ _bt_delitems_vacuum(Relation rel, Buffer buf,
 	}
 
 	END_CRIT_SECTION();
+
+	/* can't leak memory here */
+	if (updatedbuf != NULL)
+		pfree(updatedbuf);
+	/* free tuples generated by calling _bt_update_posting() */
+	for (int i = 0; i < nupdatable; i++)
+		pfree(updatable[i]->itup);
 }
 
 /*
@@ -1050,6 +1169,8 @@ _bt_delitems_vacuum(Relation rel, Buffer buf,
  * This is nearly the same as _bt_delitems_vacuum as far as what it does to
  * the page, but it needs to generate its own latestRemovedXid by accessing
  * the heap.  This is used by the REDO routine to generate recovery conflicts.
+ * Also, it doesn't handle posting list tuples unless the entire tuple can be
+ * deleted as a whole (since there is only one LP_DEAD bit per line pointer).
  */
 void
 _bt_delitems_delete(Relation rel, Buffer buf,
@@ -1065,8 +1186,7 @@ _bt_delitems_delete(Relation rel, Buffer buf,
 
 	if (XLogStandbyInfoActive() && RelationNeedsWAL(rel))
 		latestRemovedXid =
-			index_compute_xid_horizon_for_tuples(rel, heapRel, buf,
-												 deletable, ndeletable);
+			_bt_xid_horizon(rel, heapRel, page, deletable, ndeletable);
 
 	/* No ereport(ERROR) until changes are logged */
 	START_CRIT_SECTION();
@@ -1113,6 +1233,83 @@ _bt_delitems_delete(Relation rel, Buffer buf,
 	END_CRIT_SECTION();
 }
 
+/*
+ * Get the latestRemovedXid from the table entries pointed to by the non-pivot
+ * tuples being deleted.
+ *
+ * This is a specialized version of index_compute_xid_horizon_for_tuples().
+ * It's needed because btree tuples don't always store table TID using the
+ * standard index tuple header field.
+ */
+static TransactionId
+_bt_xid_horizon(Relation rel, Relation heapRel, Page page,
+				OffsetNumber *deletable, int ndeletable)
+{
+	TransactionId latestRemovedXid = InvalidTransactionId;
+	int			spacenhtids;
+	int			nhtids;
+	ItemPointer htids;
+
+	/* Array will grow iff there are posting list tuples to consider */
+	spacenhtids = ndeletable;
+	nhtids = 0;
+	htids = (ItemPointer) palloc(sizeof(ItemPointerData) * spacenhtids);
+	for (int i = 0; i < ndeletable; i++)
+	{
+		ItemId		itemid;
+		IndexTuple	itup;
+
+		itemid = PageGetItemId(page, deletable[i]);
+		itup = (IndexTuple) PageGetItem(page, itemid);
+
+		Assert(ItemIdIsDead(itemid));
+		Assert(!BTreeTupleIsPivot(itup));
+
+		if (!BTreeTupleIsPosting(itup))
+		{
+			if (nhtids + 1 > spacenhtids)
+			{
+				spacenhtids *= 2;
+				htids = (ItemPointer)
+					repalloc(htids, sizeof(ItemPointerData) * spacenhtids);
+			}
+
+			Assert(ItemPointerIsValid(&itup->t_tid));
+			ItemPointerCopy(&itup->t_tid, &htids[nhtids]);
+			nhtids++;
+		}
+		else
+		{
+			int			nposting = BTreeTupleGetNPosting(itup);
+
+			if (nhtids + nposting > spacenhtids)
+			{
+				spacenhtids = Max(spacenhtids * 2, nhtids + nposting);
+				htids = (ItemPointer)
+					repalloc(htids, sizeof(ItemPointerData) * spacenhtids);
+			}
+
+			for (int j = 0; j < nposting; j++)
+			{
+				ItemPointer htid = BTreeTupleGetPostingN(itup, j);
+
+				Assert(ItemPointerIsValid(htid));
+				ItemPointerCopy(htid, &htids[nhtids]);
+				nhtids++;
+			}
+		}
+	}
+
+	Assert(nhtids >= ndeletable);
+
+	latestRemovedXid =
+		table_compute_xid_horizon_for_tuples(heapRel, htids, nhtids);
+
+	pfree(htids);
+
+	return latestRemovedXid;
+}
+
 /*
  * Returns true, if the given block has the half-dead flag set.
  */
@@ -2058,6 +2255,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
 			xlmeta.fastlevel = metad->btm_fastlevel;
 			xlmeta.oldest_btpo_xact = metad->btm_oldest_btpo_xact;
 			xlmeta.last_cleanup_num_heap_tuples = metad->btm_last_cleanup_num_heap_tuples;
+			xlmeta.allequalimage = metad->btm_allequalimage;
 
 			XLogRegisterBufData(4, (char *) &xlmeta, sizeof(xl_btree_metadata));
 			xlinfo = XLOG_BTREE_UNLINK_PAGE_META;
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 5254bc7ef5..4bb16297c3 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -95,6 +95,10 @@ static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
 						 BTCycleId cycleid, TransactionId *oldestBtpoXact);
 static void btvacuumpage(BTVacState *vstate, BlockNumber blkno,
 						 BlockNumber orig_blkno);
+static BTVacuumPosting btreevacuumposting(BTVacState *vstate,
+										  IndexTuple posting,
+										  OffsetNumber updatedoffset,
+										  int *nremaining);
 
 
 /*
@@ -161,7 +165,7 @@ btbuildempty(Relation index)
 
 	/* Construct metapage. */
 	metapage = (Page) palloc(BLCKSZ);
-	_bt_initmetapage(metapage, P_NONE, 0);
+	_bt_initmetapage(metapage, P_NONE, 0, _bt_allequalimage(index, false));
 
 	/*
 	 * Write the page and log it.  It might seem that an immediate sync would
@@ -264,8 +268,8 @@ btgettuple(IndexScanDesc scan, ScanDirection dir)
 				 */
 				if (so->killedItems == NULL)
 					so->killedItems = (int *)
-						palloc(MaxIndexTuplesPerPage * sizeof(int));
-				if (so->numKilled < MaxIndexTuplesPerPage)
+						palloc(MaxTIDsPerBTreePage * sizeof(int));
+				if (so->numKilled < MaxTIDsPerBTreePage)
 					so->killedItems[so->numKilled++] = so->currPos.itemIndex;
 			}
 
@@ -1154,11 +1158,15 @@ restart:
 	}
 	else if (P_ISLEAF(opaque))
 	{
-		OffsetNumber deletable[MaxOffsetNumber];
+		OffsetNumber deletable[MaxIndexTuplesPerPage];
 		int			ndeletable;
+		BTVacuumPosting updatable[MaxIndexTuplesPerPage];
+		int			nupdatable;
 		OffsetNumber offnum,
 					minoff,
 					maxoff;
+		int			nhtidsdead,
+					nhtidslive;
 
 		/*
 		 * Trade in the initial read lock for a super-exclusive write lock on
@@ -1190,8 +1198,11 @@ restart:
 		 * point using callback.
 		 */
 		ndeletable = 0;
+		nupdatable = 0;
 		minoff = P_FIRSTDATAKEY(opaque);
 		maxoff = PageGetMaxOffsetNumber(page);
+		nhtidsdead = 0;
+		nhtidslive = 0;
 		if (callback)
 		{
 			for (offnum = minoff;
@@ -1199,11 +1210,9 @@ restart:
 				 offnum = OffsetNumberNext(offnum))
 			{
 				IndexTuple	itup;
-				ItemPointer htup;
 
 				itup = (IndexTuple) PageGetItem(page,
 												PageGetItemId(page, offnum));
-				htup = &(itup->t_tid);
 
 				/*
 				 * Hot Standby assumes that it's okay that XLOG_BTREE_VACUUM
@@ -1226,22 +1235,82 @@ restart:
 				 * simple, and allows us to always avoid generating our own
 				 * conflicts.
 				 */
-				if (callback(htup, callback_state))
-					deletable[ndeletable++] = offnum;
+				Assert(!BTreeTupleIsPivot(itup));
+				if (!BTreeTupleIsPosting(itup))
+				{
+					/* Regular tuple, standard table TID representation */
+					if (callback(&itup->t_tid, callback_state))
+					{
+						deletable[ndeletable++] = offnum;
+						nhtidsdead++;
+					}
+					else
+						nhtidslive++;
+				}
+				else
+				{
+					BTVacuumPosting vacposting;
+					int			nremaining;
+
+					/* Posting list tuple */
+					vacposting = btreevacuumposting(vstate, itup, offnum,
+													&nremaining);
+					if (vacposting == NULL)
+					{
+						/*
+						 * All table TIDs from the posting tuple remain, so no
+						 * delete or update required
+						 */
+						Assert(nremaining == BTreeTupleGetNPosting(itup));
+					}
+					else if (nremaining > 0)
+					{
+
+						/*
+						 * Store metadata about posting list tuple in
+						 * updatable array for entire page.  Existing tuple
+						 * will be updated during the later call to
+						 * _bt_delitems_vacuum().
+						 */
+						Assert(nremaining < BTreeTupleGetNPosting(itup));
+						updatable[nupdatable++] = vacposting;
+						nhtidsdead += BTreeTupleGetNPosting(itup) - nremaining;
+					}
+					else
+					{
+						/*
+						 * All table TIDs from the posting list must be
+						 * deleted.  We'll delete the index tuple completely
+						 * (no update required).
+						 */
+						Assert(nremaining == 0);
+						deletable[ndeletable++] = offnum;
+						nhtidsdead += BTreeTupleGetNPosting(itup);
+						pfree(vacposting);
+					}
+
+					nhtidslive += nremaining;
+				}
 			}
 		}
 
 		/*
-		 * Apply any needed deletes.  We issue just one _bt_delitems_vacuum()
-		 * call per page, so as to minimize WAL traffic.
+		 * Apply any needed deletes or updates.  We issue just one
+		 * _bt_delitems_vacuum() call per page, so as to minimize WAL traffic.
 		 */
-		if (ndeletable > 0)
+		if (ndeletable > 0 || nupdatable > 0)
 		{
-			_bt_delitems_vacuum(rel, buf, deletable, ndeletable);
+			Assert(nhtidsdead >= Max(ndeletable, 1));
+			_bt_delitems_vacuum(rel, buf, deletable, ndeletable, updatable,
+								nupdatable);
 
-			stats->tuples_removed += ndeletable;
+			stats->tuples_removed += nhtidsdead;
 			/* must recompute maxoff */
 			maxoff = PageGetMaxOffsetNumber(page);
+
+			/* can't leak memory here */
+			for (int i = 0; i < nupdatable; i++)
+				pfree(updatable[i]);
 		}
 		else
 		{
@@ -1254,6 +1323,7 @@ restart:
 			 * We treat this like a hint-bit update because there's no need to
 			 * WAL-log it.
 			 */
+			Assert(nhtidsdead == 0);
 			if (vstate->cycleid != 0 &&
 				opaque->btpo_cycleid == vstate->cycleid)
 			{
@@ -1263,15 +1333,18 @@ restart:
 		}
 
 		/*
-		 * If it's now empty, try to delete; else count the live tuples. We
-		 * don't delete when recursing, though, to avoid putting entries into
-		 * freePages out-of-order (doesn't seem worth any extra code to handle
-		 * the case).
+		 * If it's now empty, try to delete; else count the live tuples (live
+		 * table TIDs in posting lists are counted as separate live tuples).
+		 * We don't delete when recursing, though, to avoid putting entries
+		 * into freePages out-of-order (doesn't seem worth any extra code to
+		 * handle the case).
 		 */
 		if (minoff > maxoff)
 			delete_now = (blkno == orig_blkno);
 		else
-			stats->num_index_tuples += maxoff - minoff + 1;
+			stats->num_index_tuples += nhtidslive;
+
+		Assert(!delete_now || nhtidslive == 0);
 	}
 
 	if (delete_now)
@@ -1303,9 +1376,10 @@ restart:
 	/*
 	 * This is really tail recursion, but if the compiler is too stupid to
 	 * optimize it as such, we'd eat an uncomfortably large amount of stack
-	 * space per recursion level (due to the deletable[] array). A failure is
-	 * improbable since the number of levels isn't likely to be large ... but
-	 * just in case, let's hand-optimize into a loop.
+	 * space per recursion level (due to the arrays used to track details of
+	 * deletable/updatable items).  A failure is improbable since the number
+	 * of levels isn't likely to be large ...  but just in case, let's
+	 * hand-optimize into a loop.
 	 */
 	if (recurse_to != P_NONE)
 	{
@@ -1314,6 +1388,61 @@ restart:
 	}
 }
 
+/*
+ * btreevacuumposting --- determine TIDs still needed in posting list
+ *
+ * Returns metadata describing how to build replacement tuple without the TIDs
+ * that VACUUM needs to delete.  Returned value is NULL in the common case
+ * where no changes are needed to caller's posting list tuple (we avoid
+ * allocating memory here as an optimization).
+ *
+ * The number of TIDs that should remain in the posting list tuple is set for
+ * caller in *nremaining.
+ */
+static BTVacuumPosting
+btreevacuumposting(BTVacState *vstate, IndexTuple posting,
+				   OffsetNumber updatedoffset, int *nremaining)
+{
+	int			live = 0;
+	int			nitem = BTreeTupleGetNPosting(posting);
+	ItemPointer items = BTreeTupleGetPosting(posting);
+	BTVacuumPosting vacposting = NULL;
+
+	for (int i = 0; i < nitem; i++)
+	{
+		if (!vstate->callback(items + i, vstate->callback_state))
+		{
+			/* Live table TID */
+			live++;
+		}
+		else if (vacposting == NULL)
+		{
+			/*
+			 * First dead table TID encountered.
+			 *
+			 * It's now clear that we need to delete one or more dead table
+			 * TIDs, so start maintaining metadata describing how to update
+			 * existing posting list tuple.
+			 */
+			vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) +
+								nitem * sizeof(uint16));
+
+			vacposting->itup = posting;
+			vacposting->updatedoffset = updatedoffset;
+			vacposting->ndeletedtids = 0;
+			vacposting->deletetids[vacposting->ndeletedtids++] = i;
+		}
+		else
+		{
+			/* Second or subsequent dead table TID */
+			vacposting->deletetids[vacposting->ndeletedtids++] = i;
+		}
+	}
+
+	*nremaining = live;
+	return vacposting;
+}
+
 /*
  *	btcanreturn() -- Check whether btree indexes support index-only scans.
  *
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index df065d72f8..7aaa8c17b0 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -26,10 +26,18 @@
 
 static void _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp);
 static OffsetNumber _bt_binsrch(Relation rel, BTScanInsert key, Buffer buf);
+static int	_bt_binsrch_posting(BTScanInsert key, Page page,
+								OffsetNumber offnum);
 static bool _bt_readpage(IndexScanDesc scan, ScanDirection dir,
 						 OffsetNumber offnum);
 static void _bt_saveitem(BTScanOpaque so, int itemIndex,
 						 OffsetNumber offnum, IndexTuple itup);
+static int	_bt_setuppostingitems(BTScanOpaque so, int itemIndex,
+								  OffsetNumber offnum, ItemPointer heapTid,
+								  IndexTuple itup);
+static inline void _bt_savepostingitem(BTScanOpaque so, int itemIndex,
+									   OffsetNumber offnum,
+									   ItemPointer heapTid, int tupleOffset);
 static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir);
 static bool _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir);
 static bool _bt_parallel_readpage(IndexScanDesc scan, BlockNumber blkno,
@@ -142,6 +150,7 @@ _bt_search(Relation rel, BTScanInsert key, Buffer *bufP, int access,
 		offnum = _bt_binsrch(rel, key, *bufP);
 		itemid = PageGetItemId(page, offnum);
 		itup = (IndexTuple) PageGetItem(page, itemid);
+		Assert(BTreeTupleIsPivot(itup) || !key->heapkeyspace);
 		blkno = BTreeTupleGetDownLink(itup);
 		par_blkno = BufferGetBlockNumber(*bufP);
 
@@ -434,7 +443,10 @@ _bt_binsrch(Relation rel,
  * low) makes bounds invalid.
  *
  * Caller is responsible for invalidating bounds when it modifies the page
- * before calling here a second time.
+ * before calling here a second time, and for dealing with posting list
+ * tuple matches (callers can use insertstate's postingoff field to
+ * determine which existing heap TID will need to be replaced by a posting
+ * list split).
  */
 OffsetNumber
 _bt_binsrch_insert(Relation rel, BTInsertState insertstate)
@@ -453,6 +465,7 @@ _bt_binsrch_insert(Relation rel, BTInsertState insertstate)
 
 	Assert(P_ISLEAF(opaque));
 	Assert(!key->nextkey);
+	Assert(insertstate->postingoff == 0);
 
 	if (!insertstate->bounds_valid)
 	{
@@ -509,6 +522,16 @@ _bt_binsrch_insert(Relation rel, BTInsertState insertstate)
 			if (result != 0)
 				stricthigh = high;
 		}
+
+		/*
+		 * If tuple at offset located by binary search is a posting list whose
+		 * TID range overlaps with caller's scantid, perform posting list
+		 * binary search to set postingoff for caller.  Caller must split the
+		 * posting list when postingoff is set.  This should happen
+		 * infrequently.
+		 */
+		if (unlikely(result == 0 && key->scantid != NULL))
+			insertstate->postingoff = _bt_binsrch_posting(key, page, mid);
 	}
 
 	/*
@@ -528,6 +551,73 @@ _bt_binsrch_insert(Relation rel, BTInsertState insertstate)
 	return low;
 }
 
+/*----------
+ *	_bt_binsrch_posting() -- posting list binary search.
+ *
+ * Helper routine for _bt_binsrch_insert().
+ *
+ * Returns offset into posting list where caller's scantid belongs.
+ *----------
+ */
+static int
+_bt_binsrch_posting(BTScanInsert key, Page page, OffsetNumber offnum)
+{
+	IndexTuple	itup;
+	ItemId		itemid;
+	int			low,
+				high,
+				mid,
+				res;
+
+	/*
+	 * If this isn't a posting tuple, then the index must be corrupt (if it is
+	 * an ordinary non-pivot tuple then there must be an existing tuple with a
+	 * heap TID that equals inserter's new heap TID/scantid).  Defensively
+	 * check that tuple is a posting list tuple whose posting list range
+	 * includes caller's scantid.
+	 *
+	 * (This is also needed because contrib/amcheck's rootdescend option needs
+	 * to be able to relocate a non-pivot tuple using _bt_binsrch_insert().)
+	 */
+	itemid = PageGetItemId(page, offnum);
+	itup = (IndexTuple) PageGetItem(page, itemid);
+	if (!BTreeTupleIsPosting(itup))
+		return 0;
+
+	Assert(key->heapkeyspace && key->allequalimage);
+
+	/*
+	 * In the event that posting list tuple has LP_DEAD bit set, indicate this
+	 * to _bt_binsrch_insert() caller by returning -1, a sentinel value.  A
+	 * second call to _bt_binsrch_insert() can take place when its caller has
+	 * removed the dead item.
+	 */
+	if (ItemIdIsDead(itemid))
+		return -1;
+
+	/* "high" is past end of posting list for loop invariant */
+	low = 0;
+	high = BTreeTupleGetNPosting(itup);
+	Assert(high >= 2);
+
+	while (high > low)
+	{
+		mid = low + ((high - low) / 2);
+		res = ItemPointerCompare(key->scantid,
+								 BTreeTupleGetPostingN(itup, mid));
+
+		if (res > 0)
+			low = mid + 1;
+		else if (res < 0)
+			high = mid;
+		else
+			return mid;
+	}
+
+	/* Exact match not found */
+	return low;
+}
+
 /*----------
  *	_bt_compare() -- Compare insertion-type scankey to tuple on a page.
  *
@@ -537,9 +627,14 @@ _bt_binsrch_insert(Relation rel, BTInsertState insertstate)
  *			<0 if scankey < tuple at offnum;
  *			 0 if scankey == tuple at offnum;
  *			>0 if scankey > tuple at offnum.
- *		NULLs in the keys are treated as sortable values.  Therefore
- *		"equality" does not necessarily mean that the item should be
- *		returned to the caller as a matching key!
+ *
+ * NULLs in the keys are treated as sortable values.  Therefore
+ * "equality" does not necessarily mean that the item should be returned
+ * to the caller as a matching key.  Similarly, an insertion scankey
+ * with its scantid set is treated as equal to a posting tuple whose TID
+ * range overlaps with their scantid.  There generally won't be a
+ * matching TID in the posting tuple, which caller must handle
+ * themselves (e.g., by splitting the posting list tuple).
  *
  * CRUCIAL NOTE: on a non-leaf page, the first data key is assumed to be
  * "minus infinity": this routine will always claim it is less than the
@@ -563,6 +658,7 @@ _bt_compare(Relation rel,
 	ScanKey		scankey;
 	int			ncmpkey;
 	int			ntupatts;
+	int32		result;
 
 	Assert(_bt_check_natts(rel, key->heapkeyspace, page, offnum));
 	Assert(key->keysz <= IndexRelationGetNumberOfKeyAttributes(rel));
@@ -592,12 +688,12 @@ _bt_compare(Relation rel,
 
 	ncmpkey = Min(ntupatts, key->keysz);
 	Assert(key->heapkeyspace || ncmpkey == key->keysz);
+	Assert(!BTreeTupleIsPosting(itup) || key->allequalimage);
 	scankey = key->scankeys;
 	for (int i = 1; i <= ncmpkey; i++)
 	{
 		Datum		datum;
 		bool		isNull;
-		int32		result;
 
 		datum = index_getattr(itup, scankey->sk_attno, itupdesc, &isNull);
 
@@ -712,8 +808,25 @@ _bt_compare(Relation rel,
 	if (heapTid == NULL)
 		return 1;
 
+	/*
+	 * Scankey must be treated as equal to a posting list tuple if its scantid
+	 * value falls within the range of the posting list.  In all other cases
+	 * there can only be a single heap TID value, which is compared directly
+	 * with scantid.
+	 */
 	Assert(ntupatts >= IndexRelationGetNumberOfKeyAttributes(rel));
-	return ItemPointerCompare(key->scantid, heapTid);
+	result = ItemPointerCompare(key->scantid, heapTid);
+	if (result <= 0 || !BTreeTupleIsPosting(itup))
+		return result;
+	else
+	{
+		result = ItemPointerCompare(key->scantid,
+									BTreeTupleGetMaxHeapTID(itup));
+		if (result > 0)
+			return 1;
+	}
+
+	return 0;
 }
 
 /*
@@ -1228,7 +1341,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 	}
 
 	/* Initialize remaining insertion scan key fields */
-	inskey.heapkeyspace = _bt_heapkeyspace(rel);
+	_bt_metaversion(rel, &inskey.heapkeyspace, &inskey.allequalimage);
 	inskey.anynullkeys = false; /* unused */
 	inskey.nextkey = nextkey;
 	inskey.pivotsearch = false;
@@ -1483,9 +1596,35 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
 
 			if (_bt_checkkeys(scan, itup, indnatts, dir, &continuescan))
 			{
-				/* tuple passes all scan key conditions, so remember it */
-				_bt_saveitem(so, itemIndex, offnum, itup);
-				itemIndex++;
+				/* tuple passes all scan key conditions */
+				if (!BTreeTupleIsPosting(itup))
+				{
+					/* Remember it */
+					_bt_saveitem(so, itemIndex, offnum, itup);
+					itemIndex++;
+				}
+				else
+				{
+					int			tupleOffset;
+
+					/*
+					 * Set up state to return posting list, and remember first
+					 * TID
+					 */
+					tupleOffset =
+						_bt_setuppostingitems(so, itemIndex, offnum,
+											  BTreeTupleGetPostingN(itup, 0),
+											  itup);
+					itemIndex++;
+					/* Remember additional TIDs */
+					for (int i = 1; i < BTreeTupleGetNPosting(itup); i++)
+					{
+						_bt_savepostingitem(so, itemIndex, offnum,
+											BTreeTupleGetPostingN(itup, i),
+											tupleOffset);
+						itemIndex++;
+					}
+				}
 			}
 			/* When !continuescan, there can't be any more matches, so stop */
 			if (!continuescan)
@@ -1518,7 +1657,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
 		if (!continuescan)
 			so->currPos.moreRight = false;
 
-		Assert(itemIndex <= MaxIndexTuplesPerPage);
+		Assert(itemIndex <= MaxTIDsPerBTreePage);
 		so->currPos.firstItem = 0;
 		so->currPos.lastItem = itemIndex - 1;
 		so->currPos.itemIndex = 0;
@@ -1526,7 +1665,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
 	else
 	{
 		/* load items[] in descending order */
-		itemIndex = MaxIndexTuplesPerPage;
+		itemIndex = MaxTIDsPerBTreePage;
 
 		offnum = Min(offnum, maxoff);
 
@@ -1567,9 +1706,41 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
 										 &continuescan);
 			if (passes_quals && tuple_alive)
 			{
-				/* tuple passes all scan key conditions, so remember it */
-				itemIndex--;
-				_bt_saveitem(so, itemIndex, offnum, itup);
+				/* tuple passes all scan key conditions */
+				if (!BTreeTupleIsPosting(itup))
+				{
+					/* Remember it */
+					itemIndex--;
+					_bt_saveitem(so, itemIndex, offnum, itup);
+				}
+				else
+				{
+					int			tupleOffset;
+
+					/*
+					 * Set up state to return posting list, and remember first
+					 * TID.
+					 *
+					 * Note that we deliberately save/return items from
+					 * posting lists in ascending heap TID order for backwards
+					 * scans.  This allows _bt_killitems() to make a
+					 * consistent assumption about the order of items
+					 * associated with the same posting list tuple.
+					 */
+					itemIndex--;
+					tupleOffset =
+						_bt_setuppostingitems(so, itemIndex, offnum,
+											  BTreeTupleGetPostingN(itup, 0),
+											  itup);
+					/* Remember additional TIDs */
+					for (int i = 1; i < BTreeTupleGetNPosting(itup); i++)
+					{
+						itemIndex--;
+						_bt_savepostingitem(so, itemIndex, offnum,
+											BTreeTupleGetPostingN(itup, i),
+											tupleOffset);
+					}
+				}
 			}
 			if (!continuescan)
 			{
@@ -1583,8 +1754,8 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
 
 		Assert(itemIndex >= 0);
 		so->currPos.firstItem = itemIndex;
-		so->currPos.lastItem = MaxIndexTuplesPerPage - 1;
-		so->currPos.itemIndex = MaxIndexTuplesPerPage - 1;
+		so->currPos.lastItem = MaxTIDsPerBTreePage - 1;
+		so->currPos.itemIndex = MaxTIDsPerBTreePage - 1;
 	}
 
 	return (so->currPos.firstItem <= so->currPos.lastItem);
@@ -1597,6 +1768,8 @@ _bt_saveitem(BTScanOpaque so, int itemIndex,
 {
 	BTScanPosItem *currItem = &so->currPos.items[itemIndex];
 
+	Assert(!BTreeTupleIsPivot(itup) && !BTreeTupleIsPosting(itup));
+
 	currItem->heapTid = itup->t_tid;
 	currItem->indexOffset = offnum;
 	if (so->currTuples)
@@ -1609,6 +1782,71 @@ _bt_saveitem(BTScanOpaque so, int itemIndex,
 	}
 }
 
+/*
+ * Setup state to save TIDs/items from a single posting list tuple.
+ *
+ * Saves an index item into so->currPos.items[itemIndex] for TID that is
+ * returned to scan first.  Second or subsequent TIDs for posting list should
+ * be saved by calling _bt_savepostingitem().
+ *
+ * Returns an offset into tuple storage space that main tuple is stored at if
+ * needed.
+ */
+static int
+_bt_setuppostingitems(BTScanOpaque so, int itemIndex, OffsetNumber offnum,
+					  ItemPointer heapTid, IndexTuple itup)
+{
+	BTScanPosItem *currItem = &so->currPos.items[itemIndex];
+
+	Assert(BTreeTupleIsPosting(itup));
+
+	currItem->heapTid = *heapTid;
+	currItem->indexOffset = offnum;
+	if (so->currTuples)
+	{
+		/* Save base IndexTuple (truncate posting list) */
+		IndexTuple	base;
+		Size		itupsz = BTreeTupleGetPostingOffset(itup);
+
+		itupsz = MAXALIGN(itupsz);
+		currItem->tupleOffset = so->currPos.nextTupleOffset;
+		base = (IndexTuple) (so->currTuples + so->currPos.nextTupleOffset);
+		memcpy(base, itup, itupsz);
+		/* Defensively reduce work area index tuple header size */
+		base->t_info &= ~INDEX_SIZE_MASK;
+		base->t_info |= itupsz;
+		so->currPos.nextTupleOffset += itupsz;
+
+		return currItem->tupleOffset;
+	}
+
+	return 0;
+}
+
+/*
+ * Save an index item into so->currPos.items[itemIndex] for current posting
+ * tuple.
+ *
+ * Assumes that _bt_setuppostingitems() has already been called for current
+ * posting list tuple.  Caller passes its return value as tupleOffset.
+ */
+static inline void
+_bt_savepostingitem(BTScanOpaque so, int itemIndex, OffsetNumber offnum,
+					ItemPointer heapTid, int tupleOffset)
+{
+	BTScanPosItem *currItem = &so->currPos.items[itemIndex];
+
+	currItem->heapTid = *heapTid;
+	currItem->indexOffset = offnum;
+
+	/*
+	 * Have index-only scans return the same base IndexTuple for every TID
+	 * that originates from the same posting list
+	 */
+	if (so->currTuples)
+		currItem->tupleOffset = tupleOffset;
+}
+
 /*
  *	_bt_steppage() -- Step to next page containing valid data for scan
  *
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index baec5de999..e66cd36dfa 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -243,6 +243,7 @@ typedef struct BTPageState
 	BlockNumber btps_blkno;		/* block # to write this page at */
 	IndexTuple	btps_lowkey;	/* page's strict lower bound pivot tuple */
 	OffsetNumber btps_lastoff;	/* last item offset loaded */
+	Size		btps_lastextra; /* last item's extra posting list space */
 	uint32		btps_level;		/* tree level (0 = leaf) */
 	Size		btps_full;		/* "full" if less than this much free space */
 	struct BTPageState *btps_next;	/* link to parent level, if any */
@@ -277,7 +278,10 @@ static void _bt_slideleft(Page page);
 static void _bt_sortaddtup(Page page, Size itemsize,
 						   IndexTuple itup, OffsetNumber itup_off);
 static void _bt_buildadd(BTWriteState *wstate, BTPageState *state,
-						 IndexTuple itup);
+						 IndexTuple itup, Size truncextra);
+static void _bt_sort_dedup_finish_pending(BTWriteState *wstate,
+										  BTPageState *state,
+										  BTDedupState dstate);
 static void _bt_uppershutdown(BTWriteState *wstate, BTPageState *state);
 static void _bt_load(BTWriteState *wstate,
 					 BTSpool *btspool, BTSpool *btspool2);
@@ -563,6 +567,8 @@ _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
 	wstate.heap = btspool->heap;
 	wstate.index = btspool->index;
 	wstate.inskey = _bt_mkscankey(wstate.index, NULL);
+	/* _bt_mkscankey() won't set allequalimage without metapage */
+	wstate.inskey->allequalimage = _bt_allequalimage(wstate.index, true);
 
 	/*
 	 * We need to log index creation in WAL iff WAL archiving/streaming is
@@ -711,6 +717,7 @@ _bt_pagestate(BTWriteState *wstate, uint32 level)
 	state->btps_lowkey = NULL;
 	/* initialize lastoff so first item goes into P_FIRSTKEY */
 	state->btps_lastoff = P_HIKEY;
+	state->btps_lastextra = 0;
 	state->btps_level = level;
 	/* set "full" threshold based on level.  See notes at head of file. */
 	if (level > 0)
@@ -789,7 +796,8 @@ _bt_sortaddtup(Page page,
 }
 
 /*----------
- * Add an item to a disk page from the sort output.
+ * Add an item to a disk page from the sort output (or add a posting list
+ * item formed from the sort output).
  *
  * We must be careful to observe the page layout conventions of nbtsearch.c:
  * - rightmost pages start data items at P_HIKEY instead of at P_FIRSTKEY.
@@ -821,14 +829,27 @@ _bt_sortaddtup(Page page,
  * the truncated high key at offset 1.
  *
  * 'last' pointer indicates the last offset added to the page.
+ *
+ * 'truncextra' is the size of the posting list in itup, if any.  This
+ * information is stashed for the next call here, when we may benefit
+ * from considering the impact of truncating away the posting list on
+ * the page before deciding to finish the page off.  Posting lists are
+ * often relatively large, so it is worth going to the trouble of
+ * accounting for the saving from truncating away the posting list of
+ * the tuple that becomes the high key (that may be the only way to
+ * get close to target free space on the page).  Note that this is
+ * only used for the soft fillfactor-wise limit, not the critical hard
+ * limit.
  *----------
  */
 static void
-_bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
+_bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup,
+			 Size truncextra)
 {
 	Page		npage;
 	BlockNumber nblkno;
 	OffsetNumber last_off;
+	Size		last_truncextra;
 	Size		pgspc;
 	Size		itupsz;
 	bool		isleaf;
@@ -842,6 +863,8 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
 	npage = state->btps_page;
 	nblkno = state->btps_blkno;
 	last_off = state->btps_lastoff;
+	last_truncextra = state->btps_lastextra;
+	state->btps_lastextra = truncextra;
 
 	pgspc = PageGetFreeSpace(npage);
 	itupsz = IndexTupleSize(itup);
@@ -883,10 +906,10 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
 	 * page.  Disregard fillfactor and insert on "full" current page if we
 	 * don't have the minimum number of items yet.  (Note that we deliberately
 	 * assume that suffix truncation neither enlarges nor shrinks new high key
-	 * when applying soft limit.)
+	 * when applying soft limit, except when last tuple has a posting list.)
 	 */
 	if (pgspc < itupsz + (isleaf ? MAXALIGN(sizeof(ItemPointerData)) : 0) ||
-		(pgspc < state->btps_full && last_off > P_FIRSTKEY))
+		(pgspc + last_truncextra < state->btps_full && last_off > P_FIRSTKEY))
 	{
 		/*
 		 * Finish off the page and write it out.
@@ -944,11 +967,14 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
 			 * We don't try to bias our choice of split point to make it more
 			 * likely that _bt_truncate() can truncate away more attributes,
 			 * whereas the split point used within _bt_split() is chosen much
-			 * more delicately.  Suffix truncation is mostly useful because it
-			 * improves space utilization for workloads with random
-			 * insertions.  It doesn't seem worthwhile to add logic for
-			 * choosing a split point here for a benefit that is bound to be
-			 * much smaller.
+			 * more delicately.  Even still, the lastleft and firstright
+			 * tuples passed to _bt_truncate() here are at least not fully
+			 * equal to each other when deduplication is used, unless there is
+			 * a large group of duplicates (also, unique index builds usually
+			 * have few or no spool2 duplicates).  When the split point is
+			 * between two unequal tuples, _bt_truncate() will avoid including
+			 * a heap TID in the new high key, which is the most important
+			 * benefit of suffix truncation.
 			 *
 			 * Overwrite the old item with new truncated high key directly.
 			 * oitup is already located at the physical beginning of tuple
@@ -983,7 +1009,7 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
 		Assert(BTreeTupleGetNAtts(state->btps_lowkey, wstate->index) == 0 ||
 			   !P_LEFTMOST((BTPageOpaque) PageGetSpecialPointer(opage)));
 		BTreeTupleSetDownLink(state->btps_lowkey, oblkno);
-		_bt_buildadd(wstate, state->btps_next, state->btps_lowkey);
+		_bt_buildadd(wstate, state->btps_next, state->btps_lowkey, 0);
 		pfree(state->btps_lowkey);
 
 		/*
@@ -1045,6 +1071,43 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
 	state->btps_lastoff = last_off;
 }
 
+/*
+ * Finalize pending posting list tuple, and add it to the index.  Final tuple
+ * is based on saved base tuple, and saved list of heap TIDs.
+ *
+ * This is almost like _bt_dedup_finish_pending(), but it adds a new tuple
+ * using _bt_buildadd().
+ */
+static void
+_bt_sort_dedup_finish_pending(BTWriteState *wstate, BTPageState *state,
+							  BTDedupState dstate)
+{
+	Assert(dstate->nitems > 0);
+
+	if (dstate->nitems == 1)
+		_bt_buildadd(wstate, state, dstate->base, 0);
+	else
+	{
+		IndexTuple	postingtuple;
+		Size		truncextra;
+
+		/* form a tuple with a posting list */
+		postingtuple = _bt_form_posting(dstate->base,
+										dstate->htids,
+										dstate->nhtids);
+		/* Calculate posting list overhead */
+		truncextra = IndexTupleSize(postingtuple) -
+			BTreeTupleGetPostingOffset(postingtuple);
+
+		_bt_buildadd(wstate, state, postingtuple, truncextra);
+		pfree(postingtuple);
+	}
+
+	dstate->nhtids = 0;
+	dstate->nitems = 0;
+	dstate->phystupsize = 0;
+}
+
 /*
  * Finish writing out the completed btree.
  */
@@ -1090,7 +1153,7 @@ _bt_uppershutdown(BTWriteState *wstate, BTPageState *state)
 			Assert(BTreeTupleGetNAtts(s->btps_lowkey, wstate->index) == 0 ||
 				   !P_LEFTMOST(opaque));
 			BTreeTupleSetDownLink(s->btps_lowkey, blkno);
-			_bt_buildadd(wstate, s->btps_next, s->btps_lowkey);
+			_bt_buildadd(wstate, s->btps_next, s->btps_lowkey, 0);
 			pfree(s->btps_lowkey);
 			s->btps_lowkey = NULL;
 		}
@@ -1111,7 +1174,8 @@ _bt_uppershutdown(BTWriteState *wstate, BTPageState *state)
 	 * by filling in a valid magic number in the metapage.
 	 */
 	metapage = (Page) palloc(BLCKSZ);
-	_bt_initmetapage(metapage, rootblkno, rootlevel);
+	_bt_initmetapage(metapage, rootblkno, rootlevel,
+					 wstate->inskey->allequalimage);
 	_bt_blwritepage(wstate, metapage, BTREE_METAPAGE);
 }
 
@@ -1132,6 +1196,10 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
 				keysz = IndexRelationGetNumberOfKeyAttributes(wstate->index);
 	SortSupport sortKeys;
 	int64		tuples_done = 0;
+	bool		deduplicate;
+
+	deduplicate = wstate->inskey->allequalimage &&
+		BTGetDeduplicateItems(wstate->index);
 
 	if (merge)
 	{
@@ -1228,12 +1296,12 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
 
 			if (load1)
 			{
-				_bt_buildadd(wstate, state, itup);
+				_bt_buildadd(wstate, state, itup, 0);
 				itup = tuplesort_getindextuple(btspool->sortstate, true);
 			}
 			else
 			{
-				_bt_buildadd(wstate, state, itup2);
+				_bt_buildadd(wstate, state, itup2, 0);
 				itup2 = tuplesort_getindextuple(btspool2->sortstate, true);
 			}
 
@@ -1243,9 +1311,100 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
 		}
 		pfree(sortKeys);
 	}
+	else if (deduplicate)
+	{
+		/* merge is unnecessary, deduplicate into posting lists */
+		BTDedupState dstate;
+
+		dstate = (BTDedupState) palloc(sizeof(BTDedupStateData));
+		dstate->deduplicate = true; /* unused */
+		dstate->maxpostingsize = 0; /* set later */
+		/* Metadata about base tuple of current pending posting list */
+		dstate->base = NULL;
+		dstate->baseoff = InvalidOffsetNumber;	/* unused */
+		dstate->basetupsize = 0;
+		/* Metadata about current pending posting list TIDs */
+		dstate->htids = NULL;
+		dstate->nhtids = 0;
+		dstate->nitems = 0;
+		dstate->phystupsize = 0;	/* unused */
+		dstate->nintervals = 0; /* unused */
+
+		while ((itup = tuplesort_getindextuple(btspool->sortstate,
+											   true)) != NULL)
+		{
+			/* When we see first tuple, create first index page */
+			if (state == NULL)
+			{
+				state = _bt_pagestate(wstate, 0);
+
+				/*
+				 * Limit size of posting list tuples to 1/10 space we want to
+				 * leave behind on the page, plus space for final item's line
+				 * pointer.  This is equal to the space that we'd like to
+				 * leave behind on each leaf page when fillfactor is 90,
+				 * allowing us to get close to fillfactor% space utilization
+				 * when there happen to be a great many duplicates.  (This
+				 * makes higher leaf fillfactor settings ineffective when
+				 * building indexes that have many duplicates, but packing
+				 * leaf pages full with few very large tuples doesn't seem
+				 * like a useful goal.)
+				 */
+				dstate->maxpostingsize = MAXALIGN_DOWN((BLCKSZ * 10 / 100)) -
+					sizeof(ItemIdData);
+				Assert(dstate->maxpostingsize <= BTMaxItemSize(state->btps_page) &&
+					   dstate->maxpostingsize <= INDEX_SIZE_MASK);
+				dstate->htids = palloc(dstate->maxpostingsize);
+
+				/* start new pending posting list with itup copy */
+				_bt_dedup_start_pending(dstate, CopyIndexTuple(itup),
+										InvalidOffsetNumber);
+			}
+			else if (_bt_keep_natts_fast(wstate->index, dstate->base,
+										 itup) > keysz &&
+					 _bt_dedup_save_htid(dstate, itup))
+			{
+				/*
+				 * Tuple is equal to base tuple of pending posting list.  Heap
+				 * TID from itup has been saved in state.
+				 */
+			}
+			else
+			{
+				/*
+				 * Tuple is not equal to pending posting list tuple, or
+				 * _bt_dedup_save_htid() opted to not merge current item into
+				 * pending posting list.
+				 */
+				_bt_sort_dedup_finish_pending(wstate, state, dstate);
+				pfree(dstate->base);
+
+				/* start new pending posting list with itup copy */
+				_bt_dedup_start_pending(dstate, CopyIndexTuple(itup),
+										InvalidOffsetNumber);
+			}
+
+			/* Report progress */
+			pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
+										 ++tuples_done);
+		}
+
+		if (state)
+		{
+			/*
+			 * Handle the last item (there must be a last item when the
+			 * tuplesort returned one or more tuples)
+			 */
+			_bt_sort_dedup_finish_pending(wstate, state, dstate);
+			pfree(dstate->base);
+			pfree(dstate->htids);
+		}
+
+		pfree(dstate);
+	}
 	else
 	{
-		/* merge is unnecessary */
+		/* merging and deduplication are both unnecessary */
 		while ((itup = tuplesort_getindextuple(btspool->sortstate,
 											   true)) != NULL)
 		{
@@ -1253,7 +1412,7 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
 			if (state == NULL)
 				state = _bt_pagestate(wstate, 0);
 
-			_bt_buildadd(wstate, state, itup);
+			_bt_buildadd(wstate, state, itup, 0);
 
 			/* Report progress */
 			pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
diff --git a/src/backend/access/nbtree/nbtsplitloc.c b/src/backend/access/nbtree/nbtsplitloc.c
index 76c2d945c8..8ba055be9e 100644
--- a/src/backend/access/nbtree/nbtsplitloc.c
+++ b/src/backend/access/nbtree/nbtsplitloc.c
@@ -183,6 +183,9 @@ _bt_findsplitloc(Relation rel,
 	state.minfirstrightsz = SIZE_MAX;
 	state.newitemoff = newitemoff;
 
+	/* newitem cannot be a posting list item */
+	Assert(!BTreeTupleIsPosting(newitem));
+
 	/*
 	 * maxsplits should never exceed maxoff because there will be at most as
 	 * many candidate split points as there are points _between_ tuples, once
@@ -459,6 +462,7 @@ _bt_recsplitloc(FindSplitData *state,
 	int16		leftfree,
 				rightfree;
 	Size		firstrightitemsz;
+	Size		postingsz = 0;
 	bool		newitemisfirstonright;
 
 	/* Is the new item going to be the first item on the right page? */
@@ -468,8 +472,30 @@ _bt_recsplitloc(FindSplitData *state,
 	if (newitemisfirstonright)
 		firstrightitemsz = state->newitemsz;
 	else
+	{
 		firstrightitemsz = firstoldonrightsz;
 
+		/*
+		 * Calculate suffix truncation space saving when firstright is a
+		 * posting list tuple, though only when the firstright is over 64
+		 * bytes including line pointer overhead (arbitrary).  This avoids
+		 * accessing the tuple in cases where its posting list must be very
+		 * small (if firstright has one at all).
+		 */
+		if (state->is_leaf && firstrightitemsz > 64)
+		{
+			ItemId		itemid;
+			IndexTuple	newhighkey;
+
+			itemid = PageGetItemId(state->page, firstoldonright);
+			newhighkey = (IndexTuple) PageGetItem(state->page, itemid);
+
+			if (BTreeTupleIsPosting(newhighkey))
+				postingsz = IndexTupleSize(newhighkey) -
+					BTreeTupleGetPostingOffset(newhighkey);
+		}
+	}
+
 	/* Account for all the old tuples */
 	leftfree = state->leftspace - olddataitemstoleft;
 	rightfree = state->rightspace -
@@ -491,11 +517,17 @@ _bt_recsplitloc(FindSplitData *state,
 	 * If we are on the leaf level, assume that suffix truncation cannot avoid
 	 * adding a heap TID to the left half's new high key when splitting at the
 	 * leaf level.  In practice the new high key will often be smaller and
-	 * will rarely be larger, but conservatively assume the worst case.
+	 * will rarely be larger, but conservatively assume the worst case.  We do
+	 * go to the trouble of subtracting away posting list overhead, though
+	 * only when it looks like it will make an appreciable difference.
+	 * (Posting lists are the only case where truncation will typically make
+	 * the final high key far smaller than firstright, so being a bit more
+	 * precise there noticeably improves the balance of free space.)
 	 */
 	if (state->is_leaf)
 		leftfree -= (int16) (firstrightitemsz +
-							 MAXALIGN(sizeof(ItemPointerData)));
+							 MAXALIGN(sizeof(ItemPointerData)) -
+							 postingsz);
 	else
 		leftfree -= (int16) firstrightitemsz;
 
@@ -691,7 +723,8 @@ _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,
 	itemid = PageGetItemId(state->page, OffsetNumberPrev(state->newitemoff));
 	tup = (IndexTuple) PageGetItem(state->page, itemid);
 	/* Do cheaper test first */
-	if (!_bt_adjacenthtid(&tup->t_tid, &state->newitem->t_tid))
+	if (BTreeTupleIsPosting(tup) ||
+		!_bt_adjacenthtid(&tup->t_tid, &state->newitem->t_tid))
 		return false;
 	/* Check same conditions as rightmost item case, too */
 	keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index af07732eab..54afa6f417 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -81,7 +81,10 @@ static int	_bt_keep_natts(Relation rel, IndexTuple lastleft,
  *		determine whether or not the keys in the index are expected to be
  *		unique (i.e. if this is a "heapkeyspace" index).  We assume a
  *		heapkeyspace index when caller passes a NULL tuple, allowing index
- *		build callers to avoid accessing the non-existent metapage.
+ *		build callers to avoid accessing the non-existent metapage.  We
+ *		also assume that the index is _not_ allequalimage when a NULL tuple
+ *		is passed; CREATE INDEX callers call _bt_allequalimage() to set the
+ *		field themselves.
  */
 BTScanInsert
 _bt_mkscankey(Relation rel, IndexTuple itup)
@@ -108,7 +111,14 @@ _bt_mkscankey(Relation rel, IndexTuple itup)
 	 */
 	key = palloc(offsetof(BTScanInsertData, scankeys) +
 				 sizeof(ScanKeyData) * indnkeyatts);
-	key->heapkeyspace = itup == NULL || _bt_heapkeyspace(rel);
+	if (itup)
+		_bt_metaversion(rel, &key->heapkeyspace, &key->allequalimage);
+	else
+	{
+		/* Utility statement callers can set these fields themselves */
+		key->heapkeyspace = true;
+		key->allequalimage = false;
+	}
 	key->anynullkeys = false;	/* initial assumption */
 	key->nextkey = false;
 	key->pivotsearch = false;
@@ -1374,6 +1384,7 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
 			 * attribute passes the qual.
 			 */
 			Assert(ScanDirectionIsForward(dir));
+			Assert(BTreeTupleIsPivot(tuple));
 			continue;
 		}
 
@@ -1535,6 +1546,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
 			 * attribute passes the qual.
 			 */
 			Assert(ScanDirectionIsForward(dir));
+			Assert(BTreeTupleIsPivot(tuple));
 			cmpresult = 0;
 			if (subkey->sk_flags & SK_ROW_END)
 				break;
@@ -1774,10 +1786,65 @@ _bt_killitems(IndexScanDesc scan)
 		{
 			ItemId		iid = PageGetItemId(page, offnum);
 			IndexTuple	ituple = (IndexTuple) PageGetItem(page, iid);
+			bool		killtuple = false;
 
-			if (ItemPointerEquals(&ituple->t_tid, &kitem->heapTid))
+			if (BTreeTupleIsPosting(ituple))
 			{
-				/* found the item */
+				int			pi = i + 1;
+				int			nposting = BTreeTupleGetNPosting(ituple);
+				int			j;
+
+				/*
+				 * Note that we rely on the assumption that heap TIDs in the
+				 * scanpos items array are always in ascending heap TID order
+				 * within a posting list
+				 */
+				for (j = 0; j < nposting; j++)
+				{
+					ItemPointer item = BTreeTupleGetPostingN(ituple, j);
+
+					if (!ItemPointerEquals(item, &kitem->heapTid))
+						break;	/* out of posting list loop */
+
+					/* kitem must have matching offnum when heap TIDs match */
+					Assert(kitem->indexOffset == offnum);
+
+					/*
+					 * Read-ahead to later kitems here.
+					 *
+					 * We rely on the assumption that not advancing kitem here
+					 * will prevent us from considering the posting list tuple
+					 * fully dead by not matching its next heap TID in next
+					 * loop iteration.
+					 *
+					 * If, on the other hand, this is the final heap TID in
+					 * the posting list tuple, then tuple gets killed
+					 * regardless (i.e. we handle the case where the last
+					 * kitem is also the last heap TID in the last index tuple
+					 * correctly -- posting tuple still gets killed).
+					 */
+					if (pi < numKilled)
+						kitem = &so->currPos.items[so->killedItems[pi++]];
+				}
+
+				/*
+				 * Don't bother advancing the outermost loop's int iterator to
+				 * avoid processing killed items that relate to the same
+				 * offnum/posting list tuple.  This micro-optimization hardly
+				 * seems worth it.  (Further iterations of the outermost loop
+				 * will fail to match on this same posting list's first heap
+				 * TID instead, so we'll advance to the next offnum/index
+				 * tuple pretty quickly.)
+				 */
+				if (j == nposting)
+					killtuple = true;
+			}
+			else if (ItemPointerEquals(&ituple->t_tid, &kitem->heapTid))
+				killtuple = true;
+
+			if (killtuple)
+			{
+				/* found the item/all posting list items */
 				ItemIdMarkDead(iid);
 				killedsomething = true;
 				break;			/* out of inner search loop */
@@ -2018,7 +2085,9 @@ btoptions(Datum reloptions, bool validate)
 	static const relopt_parse_elt tab[] = {
 		{"fillfactor", RELOPT_TYPE_INT, offsetof(BTOptions, fillfactor)},
 		{"vacuum_cleanup_index_scale_factor", RELOPT_TYPE_REAL,
-		offsetof(BTOptions, vacuum_cleanup_index_scale_factor)}
+		offsetof(BTOptions, vacuum_cleanup_index_scale_factor)},
+		{"deduplicate_items", RELOPT_TYPE_BOOL,
+		offsetof(BTOptions, deduplicate_items)}
 
 	};
 
@@ -2119,11 +2188,10 @@ _bt_truncate(Relation rel, IndexTuple lastleft, IndexTuple firstright,
 	Size		newsize;
 
 	/*
-	 * We should only ever truncate leaf index tuples.  It's never okay to
-	 * truncate a second time.
+	 * We should only ever truncate non-pivot tuples from leaf pages.  It's
+	 * never okay to truncate when splitting an internal page.
 	 */
-	Assert(BTreeTupleGetNAtts(lastleft, rel) == natts);
-	Assert(BTreeTupleGetNAtts(firstright, rel) == natts);
+	Assert(!BTreeTupleIsPivot(lastleft) && !BTreeTupleIsPivot(firstright));
 
 	/* Determine how many attributes must be kept in truncated tuple */
 	keepnatts = _bt_keep_natts(rel, lastleft, firstright, itup_key);
@@ -2139,6 +2207,19 @@ _bt_truncate(Relation rel, IndexTuple lastleft, IndexTuple firstright,
 
 		pivot = index_truncate_tuple(itupdesc, firstright, keepnatts);
 
+		if (BTreeTupleIsPosting(pivot))
+		{
+			/*
+			 * index_truncate_tuple() just returns a straight copy of
+			 * firstright when it has no key attributes to truncate.  We need
+			 * to truncate away the posting list ourselves.
+			 */
+			Assert(keepnatts == nkeyatts);
+			Assert(natts == nkeyatts);
+			pivot->t_info &= ~INDEX_SIZE_MASK;
+			pivot->t_info |= MAXALIGN(BTreeTupleGetPostingOffset(firstright));
+		}
+
 		/*
 		 * If there is a distinguishing key attribute within new pivot tuple,
 		 * there is no need to add an explicit heap TID attribute
@@ -2155,6 +2236,8 @@ _bt_truncate(Relation rel, IndexTuple lastleft, IndexTuple firstright,
 		 * attribute to the new pivot tuple.
 		 */
 		Assert(natts != nkeyatts);
+		Assert(!BTreeTupleIsPosting(lastleft) &&
+			   !BTreeTupleIsPosting(firstright));
 		newsize = IndexTupleSize(pivot) + MAXALIGN(sizeof(ItemPointerData));
 		tidpivot = palloc0(newsize);
 		memcpy(tidpivot, pivot, IndexTupleSize(pivot));
@@ -2172,6 +2255,19 @@ _bt_truncate(Relation rel, IndexTuple lastleft, IndexTuple firstright,
 		newsize = IndexTupleSize(firstright) + MAXALIGN(sizeof(ItemPointerData));
 		pivot = palloc0(newsize);
 		memcpy(pivot, firstright, IndexTupleSize(firstright));
+
+		if (BTreeTupleIsPosting(firstright))
+		{
+			/*
+			 * New pivot tuple was copied from firstright, which happens to be
+			 * a posting list tuple.  We will have to include the max lastleft
+			 * heap TID in the final pivot tuple, but we can remove the
+			 * posting list now. (Pivot tuples should never contain a posting
+			 * list.)
+			 */
+			newsize = MAXALIGN(BTreeTupleGetPostingOffset(firstright)) +
+				MAXALIGN(sizeof(ItemPointerData));
+		}
 	}
 
 	/*
@@ -2199,7 +2295,7 @@ _bt_truncate(Relation rel, IndexTuple lastleft, IndexTuple firstright,
 	 */
 	pivotheaptid = (ItemPointer) ((char *) pivot + newsize -
 								  sizeof(ItemPointerData));
-	ItemPointerCopy(&lastleft->t_tid, pivotheaptid);
+	ItemPointerCopy(BTreeTupleGetMaxHeapTID(lastleft), pivotheaptid);
 
 	/*
 	 * Lehman and Yao require that the downlink to the right page, which is to
@@ -2210,9 +2306,12 @@ _bt_truncate(Relation rel, IndexTuple lastleft, IndexTuple firstright,
 	 * tiebreaker.
 	 */
 #ifndef DEBUG_NO_TRUNCATE
-	Assert(ItemPointerCompare(&lastleft->t_tid, &firstright->t_tid) < 0);
-	Assert(ItemPointerCompare(pivotheaptid, &lastleft->t_tid) >= 0);
-	Assert(ItemPointerCompare(pivotheaptid, &firstright->t_tid) < 0);
+	Assert(ItemPointerCompare(BTreeTupleGetMaxHeapTID(lastleft),
+							  BTreeTupleGetHeapTID(firstright)) < 0);
+	Assert(ItemPointerCompare(pivotheaptid,
+							  BTreeTupleGetHeapTID(lastleft)) >= 0);
+	Assert(ItemPointerCompare(pivotheaptid,
+							  BTreeTupleGetHeapTID(firstright)) < 0);
 #else
 
 	/*
@@ -2225,7 +2324,7 @@ _bt_truncate(Relation rel, IndexTuple lastleft, IndexTuple firstright,
 	 * attribute values along with lastleft's heap TID value when lastleft's
 	 * TID happens to be greater than firstright's TID.
 	 */
-	ItemPointerCopy(&firstright->t_tid, pivotheaptid);
+	ItemPointerCopy(BTreeTupleGetHeapTID(firstright), pivotheaptid);
 
 	/*
 	 * Pivot heap TID should never be fully equal to firstright.  Note that
@@ -2234,7 +2333,8 @@ _bt_truncate(Relation rel, IndexTuple lastleft, IndexTuple firstright,
 	 */
 	ItemPointerSetOffsetNumber(pivotheaptid,
 							   OffsetNumberPrev(ItemPointerGetOffsetNumber(pivotheaptid)));
-	Assert(ItemPointerCompare(pivotheaptid, &firstright->t_tid) < 0);
+	Assert(ItemPointerCompare(pivotheaptid,
+							  BTreeTupleGetHeapTID(firstright)) < 0);
 #endif
 
 	BTreeTupleSetNAtts(pivot, nkeyatts);
@@ -2301,6 +2401,13 @@ _bt_keep_natts(Relation rel, IndexTuple lastleft, IndexTuple firstright,
 		keepnatts++;
 	}
 
+	/*
+	 * Assert that _bt_keep_natts_fast() agrees with us in passing.  This is
+	 * expected in an allequalimage index.
+	 */
+	Assert(!itup_key->allequalimage ||
+		   keepnatts == _bt_keep_natts_fast(rel, lastleft, firstright));
+
 	return keepnatts;
 }
 
@@ -2315,13 +2422,16 @@ _bt_keep_natts(Relation rel, IndexTuple lastleft, IndexTuple firstright,
  * The approach taken here usually provides the same answer as _bt_keep_natts
  * will (for the same pair of tuples from a heapkeyspace index), since the
  * majority of btree opclasses can never indicate that two datums are equal
- * unless they're bitwise equal after detoasting.
+ * unless they're bitwise equal after detoasting.  When an index only has
+ * "equal image" columns, routine is guaranteed to give the same result as
+ * _bt_keep_natts would.
  *
- * These issues must be acceptable to callers, typically because they're only
- * concerned about making suffix truncation as effective as possible without
- * leaving excessive amounts of free space on either side of page split.
  * Callers can rely on the fact that attributes considered equal here are
- * definitely also equal according to _bt_keep_natts.
+ * definitely also equal according to _bt_keep_natts, even when the index uses
+ * an opclass or collation that is not "allequalimage"/deduplication-safe.
+ * This weaker guarantee is good enough for nbtsplitloc.c caller, since false
+ * negatives generally only have the effect of making leaf page splits use a
+ * more balanced split point.
  */
 int
 _bt_keep_natts_fast(Relation rel, IndexTuple lastleft, IndexTuple firstright)
@@ -2393,28 +2503,42 @@ _bt_check_natts(Relation rel, bool heapkeyspace, Page page, OffsetNumber offnum)
 	 * Mask allocated for number of keys in index tuple must be able to fit
 	 * maximum possible number of index attributes
 	 */
-	StaticAssertStmt(BT_N_KEYS_OFFSET_MASK >= INDEX_MAX_KEYS,
-					 "BT_N_KEYS_OFFSET_MASK can't fit INDEX_MAX_KEYS");
+	StaticAssertStmt(BT_OFFSET_MASK >= INDEX_MAX_KEYS,
+					 "BT_OFFSET_MASK can't fit INDEX_MAX_KEYS");
 
 	itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
 	tupnatts = BTreeTupleGetNAtts(itup, rel);
 
+	/* !heapkeyspace indexes do not support deduplication */
+	if (!heapkeyspace && BTreeTupleIsPosting(itup))
+		return false;
+
+	/* Posting list tuples should never have "pivot heap TID" bit set */
+	if (BTreeTupleIsPosting(itup) &&
+		(ItemPointerGetOffsetNumberNoCheck(&itup->t_tid) &
+		 BT_PIVOT_HEAP_TID_ATTR) != 0)
+		return false;
+
+	/* INCLUDE indexes do not support deduplication */
+	if (natts != nkeyatts && BTreeTupleIsPosting(itup))
+		return false;
+
 	if (P_ISLEAF(opaque))
 	{
 		if (offnum >= P_FIRSTDATAKEY(opaque))
 		{
 			/*
-			 * Non-pivot tuples currently never use alternative heap TID
-			 * representation -- even those within heapkeyspace indexes
+			 * Non-pivot tuple should never be explicitly marked as a pivot
+			 * tuple
 			 */
-			if ((itup->t_info & INDEX_ALT_TID_MASK) != 0)
+			if (BTreeTupleIsPivot(itup))
 				return false;
 
 			/*
 			 * Leaf tuples that are not the page high key (non-pivot tuples)
 			 * should never be truncated.  (Note that tupnatts must have been
-			 * inferred, rather than coming from an explicit on-disk
-			 * representation.)
+			 * inferred, even with a posting list tuple, because only pivot
+			 * tuples store tupnatts directly.)
 			 */
 			return tupnatts == natts;
 		}
@@ -2458,12 +2582,12 @@ _bt_check_natts(Relation rel, bool heapkeyspace, Page page, OffsetNumber offnum)
 			 * non-zero, or when there is no explicit representation and the
 			 * tuple is evidently not a pre-pg_upgrade tuple.
 			 *
-			 * Prior to v11, downlinks always had P_HIKEY as their offset. Use
-			 * that to decide if the tuple is a pre-v11 tuple.
+			 * Prior to v11, downlinks always had P_HIKEY as their offset.
+			 * Accept that as an alternative indication of a valid
+			 * !heapkeyspace negative infinity tuple.
 			 */
 			return tupnatts == 0 ||
-				((itup->t_info & INDEX_ALT_TID_MASK) == 0 &&
-				 ItemPointerGetOffsetNumber(&(itup->t_tid)) == P_HIKEY);
+				ItemPointerGetOffsetNumber(&(itup->t_tid)) == P_HIKEY;
 		}
 		else
 		{
@@ -2489,7 +2613,11 @@ _bt_check_natts(Relation rel, bool heapkeyspace, Page page, OffsetNumber offnum)
 	 * heapkeyspace index pivot tuples, regardless of whether or not there are
 	 * non-key attributes.
 	 */
-	if ((itup->t_info & INDEX_ALT_TID_MASK) == 0)
+	if (!BTreeTupleIsPivot(itup))
+		return false;
+
+	/* Pivot tuple should not use posting list representation (redundant) */
+	if (BTreeTupleIsPosting(itup))
 		return false;
 
 	/*
@@ -2559,8 +2687,8 @@ _bt_check_third_page(Relation rel, Relation heap, bool needheaptidspace,
 					BTMaxItemSizeNoHeapTid(page),
 					RelationGetRelationName(rel)),
 			 errdetail("Index row references tuple (%u,%u) in relation \"%s\".",
-					   ItemPointerGetBlockNumber(&newtup->t_tid),
-					   ItemPointerGetOffsetNumber(&newtup->t_tid),
+					   ItemPointerGetBlockNumber(BTreeTupleGetHeapTID(newtup)),
+					   ItemPointerGetOffsetNumber(BTreeTupleGetHeapTID(newtup)),
 					   RelationGetRelationName(heap)),
 			 errhint("Values larger than 1/3 of a buffer page cannot be indexed.\n"
 					 "Consider a function index of an MD5 hash of the value, "
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c
index 2e5202c2d6..99d0914e72 100644
--- a/src/backend/access/nbtree/nbtxlog.c
+++ b/src/backend/access/nbtree/nbtxlog.c
@@ -22,6 +22,9 @@
 #include "access/xlogutils.h"
 #include "miscadmin.h"
 #include "storage/procarray.h"
+#include "utils/memutils.h"
+
+static MemoryContext opCtx;		/* working memory for operations */
 
 /*
  * _bt_restore_page -- re-enter all the index tuples on a page
@@ -111,6 +114,7 @@ _bt_restore_meta(XLogReaderState *record, uint8 block_id)
 	Assert(md->btm_version >= BTREE_NOVAC_VERSION);
 	md->btm_oldest_btpo_xact = xlrec->oldest_btpo_xact;
 	md->btm_last_cleanup_num_heap_tuples = xlrec->last_cleanup_num_heap_tuples;
+	md->btm_allequalimage = xlrec->allequalimage;
 
 	pageop = (BTPageOpaque) PageGetSpecialPointer(metapg);
 	pageop->btpo_flags = BTP_META;
@@ -156,7 +160,8 @@ _bt_clear_incomplete_split(XLogReaderState *record, uint8 block_id)
 }
 
 static void
-btree_xlog_insert(bool isleaf, bool ismeta, XLogReaderState *record)
+btree_xlog_insert(bool isleaf, bool ismeta, bool posting,
+				  XLogReaderState *record)
 {
 	XLogRecPtr	lsn = record->EndRecPtr;
 	xl_btree_insert *xlrec = (xl_btree_insert *) XLogRecGetData(record);
@@ -181,9 +186,52 @@ btree_xlog_insert(bool isleaf, bool ismeta, XLogReaderState *record)
 
 		page = BufferGetPage(buffer);
 
-		if (PageAddItem(page, (Item) datapos, datalen, xlrec->offnum,
-						false, false) == InvalidOffsetNumber)
-			elog(PANIC, "btree_xlog_insert: failed to add item");
+		if (!posting)
+		{
+			/* Simple retail insertion */
+			if (PageAddItem(page, (Item) datapos, datalen, xlrec->offnum,
+							false, false) == InvalidOffsetNumber)
+				elog(PANIC, "failed to add new item");
+		}
+		else
+		{
+			ItemId		itemid;
+			IndexTuple	oposting,
+						newitem,
+						nposting;
+			uint16		postingoff;
+
+			/*
+			 * A posting list split occurred during leaf page insertion.  WAL
+			 * record data will start with an offset number representing the
+			 * point in an existing posting list that a split occurs at.
+			 *
+			 * Use _bt_swap_posting() to repeat posting list split steps from
+			 * primary.  Note that newitem from WAL record is 'orignewitem',
+			 * not the final version of newitem that is actually inserted on
+			 * page.
+			 */
+			postingoff = *((uint16 *) datapos);
+			datapos += sizeof(uint16);
+			datalen -= sizeof(uint16);
+
+			itemid = PageGetItemId(page, OffsetNumberPrev(xlrec->offnum));
+			oposting = (IndexTuple) PageGetItem(page, itemid);
+
+			/* Use mutable, aligned newitem copy in _bt_swap_posting() */
+			Assert(isleaf && postingoff > 0);
+			newitem = CopyIndexTuple((IndexTuple) datapos);
+			nposting = _bt_swap_posting(newitem, oposting, postingoff);
+
+			/* Replace existing posting list with post-split version */
+			memcpy(oposting, nposting, MAXALIGN(IndexTupleSize(nposting)));
+
+			/* Insert "final" new item (not orignewitem from WAL stream) */
+			Assert(IndexTupleSize(newitem) == datalen);
+			if (PageAddItem(page, (Item) newitem, datalen, xlrec->offnum,
+							false, false) == InvalidOffsetNumber)
+				elog(PANIC, "failed to add posting split new item");
+		}
 
 		PageSetLSN(page, lsn);
 		MarkBufferDirty(buffer);
@@ -265,20 +313,38 @@ btree_xlog_split(bool onleft, XLogReaderState *record)
 		BTPageOpaque lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage);
 		OffsetNumber off;
 		IndexTuple	newitem = NULL,
-					left_hikey = NULL;
+					left_hikey = NULL,
+					nposting = NULL;
 		Size		newitemsz = 0,
 					left_hikeysz = 0;
 		Page		newlpage;
-		OffsetNumber leftoff;
+		OffsetNumber leftoff,
+					replacepostingoff = InvalidOffsetNumber;
 
 		datapos = XLogRecGetBlockData(record, 0, &datalen);
 
-		if (onleft)
+		if (onleft || xlrec->postingoff != 0)
 		{
 			newitem = (IndexTuple) datapos;
 			newitemsz = MAXALIGN(IndexTupleSize(newitem));
 			datapos += newitemsz;
 			datalen -= newitemsz;
+
+			if (xlrec->postingoff != 0)
+			{
+				ItemId		itemid;
+				IndexTuple	oposting;
+
+				/* Posting list must be at offset number before new item's */
+				replacepostingoff = OffsetNumberPrev(xlrec->newitemoff);
+
+				/* Use mutable, aligned newitem copy in _bt_swap_posting() */
+				newitem = CopyIndexTuple(newitem);
+				itemid = PageGetItemId(lpage, replacepostingoff);
+				oposting = (IndexTuple) PageGetItem(lpage, itemid);
+				nposting = _bt_swap_posting(newitem, oposting,
+											xlrec->postingoff);
+			}
 		}
 
 		/*
@@ -308,8 +374,20 @@ btree_xlog_split(bool onleft, XLogReaderState *record)
 			Size		itemsz;
 			IndexTuple	item;
 
+			/* Add replacement posting list when required */
+			if (off == replacepostingoff)
+			{
+				Assert(onleft || xlrec->firstright == xlrec->newitemoff);
+				if (PageAddItem(newlpage, (Item) nposting,
+								MAXALIGN(IndexTupleSize(nposting)), leftoff,
+								false, false) == InvalidOffsetNumber)
+					elog(ERROR, "failed to add new posting list item to left page after split");
+				leftoff = OffsetNumberNext(leftoff);
+				continue;		/* don't insert oposting */
+			}
+
 			/* add the new item if it was inserted on left page */
-			if (onleft && off == xlrec->newitemoff)
+			else if (onleft && off == xlrec->newitemoff)
 			{
 				if (PageAddItem(newlpage, (Item) newitem, newitemsz, leftoff,
 								false, false) == InvalidOffsetNumber)
@@ -383,6 +461,98 @@ btree_xlog_split(bool onleft, XLogReaderState *record)
 	}
 }
 
+static void
+btree_xlog_dedup(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	xl_btree_dedup *xlrec = (xl_btree_dedup *) XLogRecGetData(record);
+	Buffer		buf;
+
+	if (XLogReadBufferForRedo(record, 0, &buf) == BLK_NEEDS_REDO)
+	{
+		char	   *ptr = XLogRecGetBlockData(record, 0, NULL);
+		Page		page = (Page) BufferGetPage(buf);
+		BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+		OffsetNumber offnum,
+					minoff,
+					maxoff;
+		BTDedupState state;
+		BTDedupInterval *intervals;
+		Page		newpage;
+
+		state = (BTDedupState) palloc(sizeof(BTDedupStateData));
+		state->deduplicate = true;	/* unused */
+		/* Conservatively use larger maxpostingsize than primary */
+		state->maxpostingsize = BTMaxItemSize(page);
+		state->base = NULL;
+		state->baseoff = InvalidOffsetNumber;
+		state->basetupsize = 0;
+		state->htids = palloc(state->maxpostingsize);
+		state->nhtids = 0;
+		state->nitems = 0;
+		state->phystupsize = 0;
+		state->nintervals = 0;
+
+		minoff = P_FIRSTDATAKEY(opaque);
+		maxoff = PageGetMaxOffsetNumber(page);
+		newpage = PageGetTempPageCopySpecial(page);
+
+		if (!P_RIGHTMOST(opaque))
+		{
+			ItemId		itemid = PageGetItemId(page, P_HIKEY);
+			Size		itemsz = ItemIdGetLength(itemid);
+			IndexTuple	item = (IndexTuple) PageGetItem(page, itemid);
+
+			if (PageAddItem(newpage, (Item) item, itemsz, P_HIKEY,
+							false, false) == InvalidOffsetNumber)
+				elog(ERROR, "deduplication failed to add highkey");
+		}
+
+		intervals = (BTDedupInterval *) ptr;
+		for (offnum = minoff;
+			 offnum <= maxoff;
+			 offnum = OffsetNumberNext(offnum))
+		{
+			ItemId		itemid = PageGetItemId(page, offnum);
+			IndexTuple	itup = (IndexTuple) PageGetItem(page, itemid);
+
+			if (offnum == minoff)
+				_bt_dedup_start_pending(state, itup, offnum);
+			else if (state->nintervals < xlrec->nintervals &&
+					 state->baseoff == intervals[state->nintervals].baseoff &&
+					 state->nitems < intervals[state->nintervals].nitems)
+			{
+				if (!_bt_dedup_save_htid(state, itup))
+					elog(ERROR, "deduplication failed to add heap tid to pending posting list");
+			}
+			else
+			{
+				_bt_dedup_finish_pending(newpage, state);
+				_bt_dedup_start_pending(state, itup, offnum);
+			}
+		}
+
+		_bt_dedup_finish_pending(newpage, state);
+		Assert(state->nintervals == xlrec->nintervals);
+		Assert(memcmp(state->intervals, intervals,
+					  state->nintervals * sizeof(BTDedupInterval)) == 0);
+
+		if (P_HAS_GARBAGE(opaque))
+		{
+			BTPageOpaque nopaque = (BTPageOpaque) PageGetSpecialPointer(newpage);
+
+			nopaque->btpo_flags &= ~BTP_HAS_GARBAGE;
+		}
+
+		PageRestoreTempPage(newpage, page);
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(buf);
+	}
+
+	if (BufferIsValid(buf))
+		UnlockReleaseBuffer(buf);
+}
+
 static void
 btree_xlog_vacuum(XLogReaderState *record)
 {
@@ -405,7 +575,56 @@ btree_xlog_vacuum(XLogReaderState *record)
 
 		page = (Page) BufferGetPage(buffer);
 
-		PageIndexMultiDelete(page, (OffsetNumber *) ptr, xlrec->ndeleted);
+		if (xlrec->nupdated > 0)
+		{
+			OffsetNumber *updatedoffsets;
+			xl_btree_update *updates;
+
+			updatedoffsets = (OffsetNumber *)
+				(ptr + xlrec->ndeleted * sizeof(OffsetNumber));
+			updates = (xl_btree_update *) ((char *) updatedoffsets +
+										   xlrec->nupdated *
+										   sizeof(OffsetNumber));
+
+			for (int i = 0; i < xlrec->nupdated; i++)
+			{
+				BTVacuumPosting vacposting;
+				IndexTuple	origtuple;
+				ItemId		itemid;
+				Size		itemsz;
+
+				itemid = PageGetItemId(page, updatedoffsets[i]);
+				origtuple = (IndexTuple) PageGetItem(page, itemid);
+
+				vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) +
+									updates->ndeletedtids * sizeof(uint16));
+				vacposting->updatedoffset = updatedoffsets[i];
+				vacposting->itup = origtuple;
+				vacposting->ndeletedtids = updates->ndeletedtids;
+				memcpy(vacposting->deletetids,
+					   (char *) updates + SizeOfBtreeUpdate,
+					   updates->ndeletedtids * sizeof(uint16));
+
+				_bt_update_posting(vacposting);
+
+				/* Overwrite updated version of tuple */
+				itemsz = MAXALIGN(IndexTupleSize(vacposting->itup));
+				if (!PageIndexTupleOverwrite(page, updatedoffsets[i],
+											 (Item) vacposting->itup, itemsz))
+					elog(PANIC, "failed to update partially dead item");
+
+				pfree(vacposting->itup);
+				pfree(vacposting);
+
+				/* advance to next xl_btree_update from array */
+				updates = (xl_btree_update *)
+					((char *) updates + SizeOfBtreeUpdate +
+					 updates->ndeletedtids * sizeof(uint16));
+			}
+		}
+
+		if (xlrec->ndeleted > 0)
+			PageIndexMultiDelete(page, (OffsetNumber *) ptr, xlrec->ndeleted);
 
 		/*
 		 * Mark the page as not containing any LP_DEAD items --- see comments
@@ -724,17 +943,19 @@ void
 btree_redo(XLogReaderState *record)
 {
 	uint8		info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+	MemoryContext oldCtx;
 
+	oldCtx = MemoryContextSwitchTo(opCtx);
 	switch (info)
 	{
 		case XLOG_BTREE_INSERT_LEAF:
-			btree_xlog_insert(true, false, record);
+			btree_xlog_insert(true, false, false, record);
 			break;
 		case XLOG_BTREE_INSERT_UPPER:
-			btree_xlog_insert(false, false, record);
+			btree_xlog_insert(false, false, false, record);
 			break;
 		case XLOG_BTREE_INSERT_META:
-			btree_xlog_insert(false, true, record);
+			btree_xlog_insert(false, true, false, record);
 			break;
 		case XLOG_BTREE_SPLIT_L:
 			btree_xlog_split(true, record);
@@ -742,6 +963,12 @@ btree_redo(XLogReaderState *record)
 		case XLOG_BTREE_SPLIT_R:
 			btree_xlog_split(false, record);
 			break;
+		case XLOG_BTREE_INSERT_POST:
+			btree_xlog_insert(true, false, true, record);
+			break;
+		case XLOG_BTREE_DEDUP:
+			btree_xlog_dedup(record);
+			break;
 		case XLOG_BTREE_VACUUM:
 			btree_xlog_vacuum(record);
 			break;
@@ -767,6 +994,23 @@ btree_redo(XLogReaderState *record)
 		default:
 			elog(PANIC, "btree_redo: unknown op code %u", info);
 	}
+	MemoryContextSwitchTo(oldCtx);
+	MemoryContextReset(opCtx);
+}
+
+void
+btree_xlog_startup(void)
+{
+	opCtx = AllocSetContextCreate(CurrentMemoryContext,
+								  "Btree recovery temporary context",
+								  ALLOCSET_DEFAULT_SIZES);
+}
+
+void
+btree_xlog_cleanup(void)
+{
+	MemoryContextDelete(opCtx);
+	opCtx = NULL;
 }
 
 /*
diff --git a/src/backend/access/rmgrdesc/nbtdesc.c b/src/backend/access/rmgrdesc/nbtdesc.c
index 7d63a7124e..7a1616f371 100644
--- a/src/backend/access/rmgrdesc/nbtdesc.c
+++ b/src/backend/access/rmgrdesc/nbtdesc.c
@@ -27,6 +27,7 @@ btree_desc(StringInfo buf, XLogReaderState *record)
 		case XLOG_BTREE_INSERT_LEAF:
 		case XLOG_BTREE_INSERT_UPPER:
 		case XLOG_BTREE_INSERT_META:
+		case XLOG_BTREE_INSERT_POST:
 			{
 				xl_btree_insert *xlrec = (xl_btree_insert *) rec;
 
@@ -38,15 +39,24 @@ btree_desc(StringInfo buf, XLogReaderState *record)
 			{
 				xl_btree_split *xlrec = (xl_btree_split *) rec;
 
-				appendStringInfo(buf, "level %u, firstright %d, newitemoff %d",
-								 xlrec->level, xlrec->firstright, xlrec->newitemoff);
+				appendStringInfo(buf, "level %u, firstright %d, newitemoff %d, postingoff %d",
+								 xlrec->level, xlrec->firstright,
+								 xlrec->newitemoff, xlrec->postingoff);
+				break;
+			}
+		case XLOG_BTREE_DEDUP:
+			{
+				xl_btree_dedup *xlrec = (xl_btree_dedup *) rec;
+
+				appendStringInfo(buf, "nintervals %u", xlrec->nintervals);
 				break;
 			}
 		case XLOG_BTREE_VACUUM:
 			{
 				xl_btree_vacuum *xlrec = (xl_btree_vacuum *) rec;
 
-				appendStringInfo(buf, "ndeleted %u", xlrec->ndeleted);
+				appendStringInfo(buf, "ndeleted %u; nupdated %u",
+								 xlrec->ndeleted, xlrec->nupdated);
 				break;
 			}
 		case XLOG_BTREE_DELETE:
@@ -130,6 +140,12 @@ btree_identify(uint8 info)
 		case XLOG_BTREE_SPLIT_R:
 			id = "SPLIT_R";
 			break;
+		case XLOG_BTREE_INSERT_POST:
+			id = "INSERT_POST";
+			break;
+		case XLOG_BTREE_DEDUP:
+			id = "DEDUP";
+			break;
 		case XLOG_BTREE_VACUUM:
 			id = "VACUUM";
 			break;
diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c
index 4ea6ea7a3d..cb7b8c8a63 100644
--- a/src/backend/storage/page/bufpage.c
+++ b/src/backend/storage/page/bufpage.c
@@ -1048,8 +1048,10 @@ PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
  * This is better than deleting and reinserting the tuple, because it
  * avoids any data shifting when the tuple size doesn't change; and
  * even when it does, we avoid moving the line pointers around.
- * Conceivably this could also be of use to an index AM that cares about
- * the physical order of tuples as well as their ItemId order.
+ * This could be used by an index AM that doesn't want to unset the
+ * LP_DEAD bit when it happens to be set.  It could conceivably also be
+ * used by an index AM that cares about the physical order of tuples as
+ * well as their logical/ItemId order.
  *
  * If there's insufficient space for the new tuple, return false.  Other
  * errors represent data-corruption problems, so we just elog.
@@ -1134,8 +1136,9 @@ PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
 		}
 	}
 
-	/* Update the item's tuple length (other fields shouldn't change) */
-	ItemIdSetNormal(tupid, offset + size_diff, newsize);
+	/* Update the item's tuple length without changing its lp_flags field */
+	tupid->lp_off = offset + size_diff;
+	tupid->lp_len = newsize;
 
 	/* Copy new tuple data onto page */
 	memcpy(PageGetItem(page, tupid), newtup, newsize);
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index dc03fbde13..b6b08d0ccb 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -1731,14 +1731,14 @@ psql_completion(const char *text, int start, int end)
 	/* ALTER INDEX <foo> SET|RESET ( */
 	else if (Matches("ALTER", "INDEX", MatchAny, "RESET", "("))
 		COMPLETE_WITH("fillfactor",
-					  "vacuum_cleanup_index_scale_factor",	/* BTREE */
+					  "vacuum_cleanup_index_scale_factor", "deduplicate_items",	/* BTREE */
 					  "fastupdate", "gin_pending_list_limit",	/* GIN */
 					  "buffering",	/* GiST */
 					  "pages_per_range", "autosummarize"	/* BRIN */
 			);
 	else if (Matches("ALTER", "INDEX", MatchAny, "SET", "("))
 		COMPLETE_WITH("fillfactor =",
-					  "vacuum_cleanup_index_scale_factor =",	/* BTREE */
+					  "vacuum_cleanup_index_scale_factor =", "deduplicate_items =",	/* BTREE */
 					  "fastupdate =", "gin_pending_list_limit =",	/* GIN */
 					  "buffering =",	/* GiST */
 					  "pages_per_range =", "autosummarize ="	/* BRIN */
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index e8d4d2b55b..bfe49f46b0 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -108,6 +108,7 @@ typedef struct BTMetaPageData
 										 * pages */
 	float8		btm_last_cleanup_num_heap_tuples;	/* number of heap tuples
 													 * during last cleanup */
+	bool		btm_allequalimage;	/* are all columns "equalimage"? */
 } BTMetaPageData;
 
 #define BTPageGetMeta(p) \
@@ -124,6 +125,14 @@ typedef struct BTMetaPageData
  * need to be immediately re-indexed at pg_upgrade.  In order to get the
  * new heapkeyspace semantics, however, a REINDEX is needed.
  *
+ * Deduplication is safe to use when the btm_allequalimage field is set to
+ * true.  It's safe to read the btm_allequalimage field on version 3, but
+ * only version 4 indexes make use of deduplication.  Even version 4
+ * indexes created on PostgreSQL v12 will need a REINDEX to make use of
+ * deduplication, though, since there is no other way to set
+ * btm_allequalimage to true (pg_upgrade hasn't been taught to set the
+ * metapage field).
+ *
  * Btree version 2 is mostly the same as version 3.  There are two new
  * fields in the metapage that were introduced in version 3.  A version 2
  * metapage will be automatically upgraded to version 3 on the first
@@ -156,6 +165,21 @@ typedef struct BTMetaPageData
 				   MAXALIGN(SizeOfPageHeaderData + 3*sizeof(ItemIdData)) - \
 				   MAXALIGN(sizeof(BTPageOpaqueData))) / 3)
 
+/*
+ * MaxTIDsPerBTreePage is an upper bound on the number of heap TIDs tuples
+ * that may be stored on a btree leaf page.  It is used to size the
+ * per-page temporary buffers used by index scans.)
+ *
+ * Note: we don't bother considering per-tuple overheads here to keep
+ * things simple (value is based on how many elements a single array of
+ * heap TIDs must have to fill the space between the page header and
+ * special area).  The value is slightly higher (i.e. more conservative)
+ * than necessary as a result, which is considered acceptable.
+ */
+#define MaxTIDsPerBTreePage \
+	(int) ((BLCKSZ - SizeOfPageHeaderData - sizeof(BTPageOpaqueData)) / \
+		   sizeof(ItemPointerData))
+
 /*
  * The leaf-page fillfactor defaults to 90% but is user-adjustable.
  * For pages above the leaf level, we use a fixed 70% fillfactor.
@@ -230,16 +254,15 @@ typedef struct BTMetaPageData
  * tuples (non-pivot tuples).  _bt_check_natts() enforces the rules
  * described here.
  *
- * Non-pivot tuple format:
+ * Non-pivot tuple format (plain/non-posting variant):
  *
  *  t_tid | t_info | key values | INCLUDE columns, if any
  *
  * t_tid points to the heap TID, which is a tiebreaker key column as of
- * BTREE_VERSION 4.  Currently, the INDEX_ALT_TID_MASK status bit is never
- * set for non-pivot tuples.
+ * BTREE_VERSION 4.
  *
- * All other types of index tuples ("pivot" tuples) only have key columns,
- * since pivot tuples only exist to represent how the key space is
+ * Non-pivot tuples complement pivot tuples, which only have key columns.
+ * The sole purpose of pivot tuples is to represent how the key space is
  * separated.  In general, any B-Tree index that has more than one level
  * (i.e. any index that does not just consist of a metapage and a single
  * leaf root page) must have some number of pivot tuples, since pivot
@@ -264,7 +287,8 @@ typedef struct BTMetaPageData
  * INDEX_ALT_TID_MASK bit is set, which doesn't count the trailing heap
  * TID column sometimes stored in pivot tuples -- that's represented by
  * the presence of BT_PIVOT_HEAP_TID_ATTR.  The INDEX_ALT_TID_MASK bit in
- * t_info is always set on BTREE_VERSION 4 pivot tuples.
+ * t_info is always set on BTREE_VERSION 4 pivot tuples, since
+ * BTreeTupleIsPivot() must work reliably on heapkeyspace versions.
  *
  * In version 3 indexes, the INDEX_ALT_TID_MASK flag might not be set in
  * pivot tuples.  In that case, the number of key columns is implicitly
@@ -279,90 +303,256 @@ typedef struct BTMetaPageData
  * The 12 least significant offset bits from t_tid are used to represent
  * the number of columns in INDEX_ALT_TID_MASK tuples, leaving 4 status
  * bits (BT_RESERVED_OFFSET_MASK bits), 3 of which that are reserved for
- * future use.  BT_N_KEYS_OFFSET_MASK should be large enough to store any
- * number of columns/attributes <= INDEX_MAX_KEYS.
+ * future use.  BT_OFFSET_MASK should be large enough to store any number
+ * of columns/attributes <= INDEX_MAX_KEYS.
+ *
+ * Sometimes non-pivot tuples also use a representation that repurposes
+ * t_tid to store metadata rather than a TID.  PostgreSQL v13 introduced a
+ * new non-pivot tuple format to support deduplication: posting list
+ * tuples.  Deduplication merges together multiple equal non-pivot tuples
+ * into a logically equivalent, space efficient representation.  A posting
+ * list is an array of ItemPointerData elements.  Non-pivot tuples are
+ * merged together to form posting list tuples lazily, at the point where
+ * we'd otherwise have to split a leaf page.
+ *
+ * Posting tuple format (alternative non-pivot tuple representation):
+ *
+ *  t_tid | t_info | key values | posting list (TID array)
+ *
+ * Posting list tuples are recognized as such by having the
+ * INDEX_ALT_TID_MASK status bit set in t_info and the BT_IS_POSTING status
+ * bit set in t_tid.  These flags redefine the content of the posting
+ * tuple's t_tid to store an offset to the posting list, as well as the
+ * total number of posting list array elements.
+ *
+ * The 12 least significant offset bits from t_tid are used to represent
+ * the number of posting items present in the tuple, leaving 4 status
+ * bits (BT_RESERVED_OFFSET_MASK bits), 3 of which that are reserved for
+ * future use.  Like any non-pivot tuple, the number of columns stored is
+ * always implicitly the total number in the index (in practice there can
+ * never be non-key columns stored, since deduplication is not supported
+ * with INCLUDE indexes).  BT_OFFSET_MASK should be large enough to store
+ * any number of posting list TIDs that might be present in a tuple (since
+ * tuple size is subject to the INDEX_SIZE_MASK limit).
  *
  * Note well: The macros that deal with the number of attributes in tuples
- * assume that a tuple with INDEX_ALT_TID_MASK set must be a pivot tuple,
- * and that a tuple without INDEX_ALT_TID_MASK set must be a non-pivot
- * tuple (or must have the same number of attributes as the index has
- * generally in the case of !heapkeyspace indexes).  They will need to be
- * updated if non-pivot tuples ever get taught to use INDEX_ALT_TID_MASK
- * for something else.
+ * assume that a tuple with INDEX_ALT_TID_MASK set must be a pivot tuple or
+ * non-pivot posting tuple, and that a tuple without INDEX_ALT_TID_MASK set
+ * must be a non-pivot tuple (or must have the same number of attributes as
+ * the index has generally in the case of !heapkeyspace indexes).
  */
 #define INDEX_ALT_TID_MASK			INDEX_AM_RESERVED_BIT
 
 /* Item pointer offset bits */
 #define BT_RESERVED_OFFSET_MASK		0xF000
-#define BT_N_KEYS_OFFSET_MASK		0x0FFF
+#define BT_OFFSET_MASK				0x0FFF
 #define BT_PIVOT_HEAP_TID_ATTR		0x1000
-
-/* Get/set downlink block number in pivot tuple */
-#define BTreeTupleGetDownLink(itup) \
-	ItemPointerGetBlockNumberNoCheck(&((itup)->t_tid))
-#define BTreeTupleSetDownLink(itup, blkno) \
-	ItemPointerSetBlockNumber(&((itup)->t_tid), (blkno))
+#define BT_IS_POSTING				0x2000
 
 /*
- * Get/set leaf page highkey's link. During the second phase of deletion, the
- * target leaf page's high key may point to an ancestor page (at all other
- * times, the leaf level high key's link is not used).  See the nbtree README
- * for full details.
+ * Note: BTreeTupleIsPivot() can have false negatives (but not false
+ * positives) when used with !heapkeyspace indexes
  */
-#define BTreeTupleGetTopParent(itup) \
-	ItemPointerGetBlockNumberNoCheck(&((itup)->t_tid))
-#define BTreeTupleSetTopParent(itup, blkno)	\
-	do { \
-		ItemPointerSetBlockNumber(&((itup)->t_tid), (blkno)); \
-		BTreeTupleSetNAtts((itup), 0); \
-	} while(0)
+static inline bool
+BTreeTupleIsPivot(IndexTuple itup)
+{
+	if ((itup->t_info & INDEX_ALT_TID_MASK) == 0)
+		return false;
+	/* absence of BT_IS_POSTING in offset number indicates pivot tuple */
+	if ((ItemPointerGetOffsetNumberNoCheck(&itup->t_tid) & BT_IS_POSTING) != 0)
+		return false;
+
+	return true;
+}
+
+static inline bool
+BTreeTupleIsPosting(IndexTuple itup)
+{
+	if ((itup->t_info & INDEX_ALT_TID_MASK) == 0)
+		return false;
+	/* presence of BT_IS_POSTING in offset number indicates posting tuple */
+	if ((ItemPointerGetOffsetNumberNoCheck(&itup->t_tid) & BT_IS_POSTING) == 0)
+		return false;
+
+	return true;
+}
+
+static inline void
+BTreeTupleSetPosting(IndexTuple itup, int nhtids, int postingoffset)
+{
+	Assert(nhtids > 1 && (nhtids & BT_OFFSET_MASK) == nhtids);
+	Assert(postingoffset == MAXALIGN(postingoffset));
+	Assert(postingoffset < INDEX_SIZE_MASK);
+
+	itup->t_info |= INDEX_ALT_TID_MASK;
+	ItemPointerSetOffsetNumber(&itup->t_tid, (nhtids | BT_IS_POSTING));
+	ItemPointerSetBlockNumber(&itup->t_tid, postingoffset);
+}
+
+static inline uint16
+BTreeTupleGetNPosting(IndexTuple posting)
+{
+	OffsetNumber existing;
+
+	Assert(BTreeTupleIsPosting(posting));
+
+	existing = ItemPointerGetOffsetNumberNoCheck(&posting->t_tid);
+	return (existing & BT_OFFSET_MASK);
+}
+
+static inline uint32
+BTreeTupleGetPostingOffset(IndexTuple posting)
+{
+	Assert(BTreeTupleIsPosting(posting));
+
+	return ItemPointerGetBlockNumberNoCheck(&posting->t_tid);
+}
+
+static inline ItemPointer
+BTreeTupleGetPosting(IndexTuple posting)
+{
+	return (ItemPointer) ((char *) posting +
+						  BTreeTupleGetPostingOffset(posting));
+}
+
+static inline ItemPointer
+BTreeTupleGetPostingN(IndexTuple posting, int n)
+{
+	return BTreeTupleGetPosting(posting) + n;
+}
 
 /*
- * Get/set number of attributes within B-tree index tuple.
+ * Get/set downlink block number in pivot tuple.
+ *
+ * Note: Cannot assert that tuple is a pivot tuple.  If we did so then
+ * !heapkeyspace indexes would exhibit false positive assertion failures.
+ */
+static inline BlockNumber
+BTreeTupleGetDownLink(IndexTuple pivot)
+{
+	return ItemPointerGetBlockNumberNoCheck(&pivot->t_tid);
+}
+
+static inline void
+BTreeTupleSetDownLink(IndexTuple pivot, BlockNumber blkno)
+{
+	ItemPointerSetBlockNumber(&pivot->t_tid, blkno);
+}
+
+/*
+ * Get number of attributes within tuple.
  *
  * Note that this does not include an implicit tiebreaker heap TID
  * attribute, if any.  Note also that the number of key attributes must be
  * explicitly represented in all heapkeyspace pivot tuples.
+ *
+ * Note: This is defined as a macro rather than an inline function to
+ * avoid including rel.h.
  */
 #define BTreeTupleGetNAtts(itup, rel)	\
 	( \
-		(itup)->t_info & INDEX_ALT_TID_MASK ? \
+		(BTreeTupleIsPivot(itup)) ? \
 		( \
-			ItemPointerGetOffsetNumberNoCheck(&(itup)->t_tid) & BT_N_KEYS_OFFSET_MASK \
+			ItemPointerGetOffsetNumberNoCheck(&(itup)->t_tid) & BT_OFFSET_MASK \
 		) \
 		: \
 		IndexRelationGetNumberOfAttributes(rel) \
 	)
-#define BTreeTupleSetNAtts(itup, n) \
-	do { \
-		(itup)->t_info |= INDEX_ALT_TID_MASK; \
-		ItemPointerSetOffsetNumber(&(itup)->t_tid, (n) & BT_N_KEYS_OFFSET_MASK); \
-	} while(0)
 
 /*
- * Get tiebreaker heap TID attribute, if any.  Macro works with both pivot
- * and non-pivot tuples, despite differences in how heap TID is represented.
+ * Set number of attributes in tuple, making it into a pivot tuple
  */
-#define BTreeTupleGetHeapTID(itup) \
-	( \
-	  (itup)->t_info & INDEX_ALT_TID_MASK && \
-	  (ItemPointerGetOffsetNumberNoCheck(&(itup)->t_tid) & BT_PIVOT_HEAP_TID_ATTR) != 0 ? \
-	  ( \
-		(ItemPointer) (((char *) (itup) + IndexTupleSize(itup)) - \
-					   sizeof(ItemPointerData)) \
-	  ) \
-	  : (itup)->t_info & INDEX_ALT_TID_MASK ? NULL : (ItemPointer) &((itup)->t_tid) \
-	)
+static inline void
+BTreeTupleSetNAtts(IndexTuple itup, int natts)
+{
+	Assert(natts <= INDEX_MAX_KEYS);
+
+	itup->t_info |= INDEX_ALT_TID_MASK;
+	/* BT_IS_POSTING bit may be unset -- tuple always becomes a pivot tuple */
+	ItemPointerSetOffsetNumber(&itup->t_tid, natts);
+	Assert(BTreeTupleIsPivot(itup));
+}
+
 /*
- * Set the heap TID attribute for a tuple that uses the INDEX_ALT_TID_MASK
- * representation (currently limited to pivot tuples)
+ * Set the bit indicating heap TID attribute present in pivot tuple
  */
-#define BTreeTupleSetAltHeapTID(itup) \
-	do { \
-		Assert((itup)->t_info & INDEX_ALT_TID_MASK); \
-		ItemPointerSetOffsetNumber(&(itup)->t_tid, \
-								   ItemPointerGetOffsetNumberNoCheck(&(itup)->t_tid) | BT_PIVOT_HEAP_TID_ATTR); \
-	} while(0)
+static inline void
+BTreeTupleSetAltHeapTID(IndexTuple pivot)
+{
+	OffsetNumber existing;
+
+	Assert(BTreeTupleIsPivot(pivot));
+
+	existing = ItemPointerGetOffsetNumberNoCheck(&pivot->t_tid);
+	ItemPointerSetOffsetNumber(&pivot->t_tid,
+							   existing | BT_PIVOT_HEAP_TID_ATTR);
+}
+
+/*
+ * Get/set leaf page's "top parent" link from its high key.  Used during page
+ * deletion.
+ *
+ * Note: Cannot assert that tuple is a pivot tuple.  If we did so then
+ * !heapkeyspace indexes would exhibit false positive assertion failures.
+ */
+static inline BlockNumber
+BTreeTupleGetTopParent(IndexTuple leafhikey)
+{
+	return ItemPointerGetBlockNumberNoCheck(&leafhikey->t_tid);
+}
+
+static inline void
+BTreeTupleSetTopParent(IndexTuple leafhikey, BlockNumber blkno)
+{
+	ItemPointerSetBlockNumber(&leafhikey->t_tid, blkno);
+	BTreeTupleSetNAtts(leafhikey, 0);
+}
+
+/*
+ * Get tiebreaker heap TID attribute, if any.
+ *
+ * This returns the first/lowest heap TID in the case of a posting list tuple.
+ */
+static inline ItemPointer
+BTreeTupleGetHeapTID(IndexTuple itup)
+{
+	if (BTreeTupleIsPivot(itup))
+	{
+		/* Pivot tuple heap TID representation? */
+		if ((ItemPointerGetOffsetNumberNoCheck(&itup->t_tid) &
+			 BT_PIVOT_HEAP_TID_ATTR) != 0)
+			return (ItemPointer) ((char *) itup + IndexTupleSize(itup) -
+								  sizeof(ItemPointerData));
+
+		/* Heap TID attribute was truncated */
+		return NULL;
+	}
+	else if (BTreeTupleIsPosting(itup))
+		return BTreeTupleGetPosting(itup);
+
+	return &itup->t_tid;
+}
+
+/*
+ * Get maximum heap TID attribute, which could be the only TID in the case of
+ * a non-pivot tuple that does not have a posting list tuple.
+ *
+ * Works with non-pivot tuples only.
+ */
+static inline ItemPointer
+BTreeTupleGetMaxHeapTID(IndexTuple itup)
+{
+	Assert(!BTreeTupleIsPivot(itup));
+
+	if (BTreeTupleIsPosting(itup))
+	{
+		uint16		nposting = BTreeTupleGetNPosting(itup);
+
+		return BTreeTupleGetPostingN(itup, nposting - 1);
+	}
+
+	return &itup->t_tid;
+}
 
 /*
  *	Operator strategy numbers for B-tree have been moved to access/stratnum.h,
@@ -439,6 +629,9 @@ typedef BTStackData *BTStack;
  * indexes whose version is >= version 4.  It's convenient to keep this close
  * by, rather than accessing the metapage repeatedly.
  *
+ * allequalimage is set to indicate that deduplication is safe for the index.
+ * This is also a property of the index relation rather than an indexscan.
+ *
  * anynullkeys indicates if any of the keys had NULL value when scankey was
  * built from index tuple (note that already-truncated tuple key attributes
  * set NULL as a placeholder key value, which also affects value of
@@ -474,6 +667,7 @@ typedef BTStackData *BTStack;
 typedef struct BTScanInsertData
 {
 	bool		heapkeyspace;
+	bool		allequalimage;
 	bool		anynullkeys;
 	bool		nextkey;
 	bool		pivotsearch;
@@ -512,10 +706,94 @@ typedef struct BTInsertStateData
 	bool		bounds_valid;
 	OffsetNumber low;
 	OffsetNumber stricthigh;
+
+	/*
+	 * if _bt_binsrch_insert found the location inside existing posting list,
+	 * save the position inside the list.  -1 sentinel value indicates overlap
+	 * with an existing posting list tuple that has its LP_DEAD bit set.
+	 */
+	int			postingoff;
 } BTInsertStateData;
 
 typedef BTInsertStateData *BTInsertState;
 
+/*
+ * State used to representing an individual pending tuple during
+ * deduplication.
+ */
+typedef struct BTDedupInterval
+{
+	OffsetNumber baseoff;
+	uint16		nitems;
+} BTDedupInterval;
+
+/*
+ * BTDedupStateData is a working area used during deduplication.
+ *
+ * The status info fields track the state of a whole-page deduplication pass.
+ * State about the current pending posting list is also tracked.
+ *
+ * A pending posting list is comprised of a contiguous group of equal items
+ * from the page, starting from page offset number 'baseoff'.  This is the
+ * offset number of the "base" tuple for new posting list.  'nitems' is the
+ * current total number of existing items from the page that will be merged to
+ * make a new posting list tuple, including the base tuple item.  (Existing
+ * items may themselves be posting list tuples, or regular non-pivot tuples.)
+ *
+ * The total size of the existing tuples to be freed when pending posting list
+ * is processed gets tracked by 'phystupsize'.  This information allows
+ * deduplication to calculate the space saving for each new posting list
+ * tuple, and for the entire pass over the page as a whole.
+ */
+typedef struct BTDedupStateData
+{
+	/* Deduplication status info for entire pass over page */
+	bool		deduplicate;	/* Still deduplicating page? */
+	Size		maxpostingsize; /* Limit on size of final tuple */
+
+	/* Metadata about base tuple of current pending posting list */
+	IndexTuple	base;			/* Use to form new posting list */
+	OffsetNumber baseoff;		/* page offset of base */
+	Size		basetupsize;	/* base size without original posting list */
+
+	/* Other metadata about pending posting list */
+	ItemPointer htids;			/* Heap TIDs in pending posting list */
+	int			nhtids;			/* Number of heap TIDs in htids array */
+	int			nitems;			/* Number of existing tuples/line pointers */
+	Size		phystupsize;	/* Includes line pointer overhead */
+
+	/*
+	 * Array of tuples to go on new version of the page.  Contains one entry
+	 * for each group of consecutive items.  Note that existing tuples that
+	 * will not become posting list tuples do not appear in the array (they
+	 * are implicitly unchanged by deduplication pass).
+	 */
+	int			nintervals;		/* current size of intervals array */
+	BTDedupInterval intervals[MaxIndexTuplesPerPage];
+} BTDedupStateData;
+
+typedef BTDedupStateData *BTDedupState;
+
+/*
+ * BTVacuumPostingData is state that represents how to VACUUM a posting list
+ * tuple when some (though not all) of its TIDs are to be deleted.
+ *
+ * Convention is that itup field is the original posting list tuple on input,
+ * and palloc()'d final tuple used to overwrite existing tuple on output.
+ */
+typedef struct BTVacuumPostingData
+{
+	/* Tuple that will be/was updated */
+	IndexTuple	itup;
+	OffsetNumber updatedoffset;
+
+	/* State needed to describe final itup in WAL */
+	uint16		ndeletedtids;
+	uint16		deletetids[FLEXIBLE_ARRAY_MEMBER];
+} BTVacuumPostingData;
+
+typedef BTVacuumPostingData *BTVacuumPosting;
+
 /*
  * BTScanOpaqueData is the btree-private state needed for an indexscan.
  * This consists of preprocessed scan keys (see _bt_preprocess_keys() for
@@ -539,7 +817,9 @@ typedef BTInsertStateData *BTInsertState;
  * If we are doing an index-only scan, we save the entire IndexTuple for each
  * matched item, otherwise only its heap TID and offset.  The IndexTuples go
  * into a separate workspace array; each BTScanPosItem stores its tuple's
- * offset within that array.
+ * offset within that array.  Posting list tuples store a "base" tuple once,
+ * allowing the same key to be returned for each TID in the posting list
+ * tuple.
  */
 
 typedef struct BTScanPosItem	/* what we remember about each match */
@@ -583,7 +863,7 @@ typedef struct BTScanPosData
 	int			lastItem;		/* last valid index in items[] */
 	int			itemIndex;		/* current index in items[] */
 
-	BTScanPosItem items[MaxIndexTuplesPerPage]; /* MUST BE LAST */
+	BTScanPosItem items[MaxTIDsPerBTreePage];	/* MUST BE LAST */
 } BTScanPosData;
 
 typedef BTScanPosData *BTScanPos;
@@ -691,6 +971,7 @@ typedef struct BTOptions
 	int			fillfactor;		/* page fill factor in percent (0..100) */
 	/* fraction of newly inserted tuples prior to trigger index cleanup */
 	float8		vacuum_cleanup_index_scale_factor;
+	bool		deduplicate_items;	/* Try to deduplicate items? */
 } BTOptions;
 
 #define BTGetFillFactor(relation) \
@@ -701,6 +982,11 @@ typedef struct BTOptions
 	 BTREE_DEFAULT_FILLFACTOR)
 #define BTGetTargetPageFreeSpace(relation) \
 	(BLCKSZ * (100 - BTGetFillFactor(relation)) / 100)
+#define BTGetDeduplicateItems(relation) \
+	(AssertMacro(relation->rd_rel->relkind == RELKIND_INDEX && \
+				 relation->rd_rel->relam == BTREE_AM_OID), \
+	((relation)->rd_options ? \
+	 ((BTOptions *) (relation)->rd_options)->deduplicate_items : true))
 
 /*
  * Constant definition for progress reporting.  Phase numbers must match
@@ -747,6 +1033,22 @@ extern void _bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page);
 extern void _bt_parallel_done(IndexScanDesc scan);
 extern void _bt_parallel_advance_array_keys(IndexScanDesc scan);
 
+/*
+ * prototypes for functions in nbtdedup.c
+ */
+extern void _bt_dedup_one_page(Relation rel, Buffer buf, Relation heapRel,
+							   IndexTuple newitem, Size newitemsz,
+							   bool checkingunique);
+extern void _bt_dedup_start_pending(BTDedupState state, IndexTuple base,
+									OffsetNumber baseoff);
+extern bool _bt_dedup_save_htid(BTDedupState state, IndexTuple itup);
+extern Size _bt_dedup_finish_pending(Page newpage, BTDedupState state);
+extern IndexTuple _bt_form_posting(IndexTuple base, ItemPointer htids,
+								   int nhtids);
+extern void _bt_update_posting(BTVacuumPosting vacposting);
+extern IndexTuple _bt_swap_posting(IndexTuple newitem, IndexTuple oposting,
+								   int postingoff);
+
 /*
  * prototypes for functions in nbtinsert.c
  */
@@ -765,14 +1067,16 @@ extern OffsetNumber _bt_findsplitloc(Relation rel, Page page,
 /*
  * prototypes for functions in nbtpage.c
  */
-extern void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level);
+extern void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level,
+							 bool allequalimage);
 extern void _bt_update_meta_cleanup_info(Relation rel,
 										 TransactionId oldestBtpoXact, float8 numHeapTuples);
 extern void _bt_upgrademetapage(Page page);
 extern Buffer _bt_getroot(Relation rel, int access);
 extern Buffer _bt_gettrueroot(Relation rel);
 extern int	_bt_getrootheight(Relation rel);
-extern bool _bt_heapkeyspace(Relation rel);
+extern void _bt_metaversion(Relation rel, bool *heapkeyspace,
+							bool *allequalimage);
 extern void _bt_checkpage(Relation rel, Buffer buf);
 extern Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access);
 extern Buffer _bt_relandgetbuf(Relation rel, Buffer obuf,
@@ -781,7 +1085,8 @@ extern void _bt_relbuf(Relation rel, Buffer buf);
 extern void _bt_pageinit(Page page, Size size);
 extern bool _bt_page_recyclable(Page page);
 extern void _bt_delitems_vacuum(Relation rel, Buffer buf,
-								OffsetNumber *deletable, int ndeletable);
+								OffsetNumber *deletable, int ndeletable,
+								BTVacuumPosting *updatable, int nupdatable);
 extern void _bt_delitems_delete(Relation rel, Buffer buf,
 								OffsetNumber *deletable, int ndeletable,
 								Relation heapRel);
diff --git a/src/include/access/nbtxlog.h b/src/include/access/nbtxlog.h
index 776a9bd723..347976c532 100644
--- a/src/include/access/nbtxlog.h
+++ b/src/include/access/nbtxlog.h
@@ -28,7 +28,8 @@
 #define XLOG_BTREE_INSERT_META	0x20	/* same, plus update metapage */
 #define XLOG_BTREE_SPLIT_L		0x30	/* add index tuple with split */
 #define XLOG_BTREE_SPLIT_R		0x40	/* as above, new item on right */
-/* 0x50 and 0x60 are unused */
+#define XLOG_BTREE_INSERT_POST	0x50	/* add index tuple with posting split */
+#define XLOG_BTREE_DEDUP		0x60	/* deduplicate tuples for a page */
 #define XLOG_BTREE_DELETE		0x70	/* delete leaf index tuples for a page */
 #define XLOG_BTREE_UNLINK_PAGE	0x80	/* delete a half-dead page */
 #define XLOG_BTREE_UNLINK_PAGE_META 0x90	/* same, and update metapage */
@@ -53,21 +54,34 @@ typedef struct xl_btree_metadata
 	uint32		fastlevel;
 	TransactionId oldest_btpo_xact;
 	float8		last_cleanup_num_heap_tuples;
+	bool		allequalimage;
 } xl_btree_metadata;
 
 /*
  * This is what we need to know about simple (without split) insert.
  *
- * This data record is used for INSERT_LEAF, INSERT_UPPER, INSERT_META.
- * Note that INSERT_META implies it's not a leaf page.
+ * This data record is used for INSERT_LEAF, INSERT_UPPER, INSERT_META, and
+ * INSERT_POST.  Note that INSERT_META and INSERT_UPPER implies it's not a
+ * leaf page, while INSERT_POST and INSERT_LEAF imply that it must be a leaf
+ * page.
  *
- * Backup Blk 0: original page (data contains the inserted tuple)
+ * Backup Blk 0: original page
  * Backup Blk 1: child's left sibling, if INSERT_UPPER or INSERT_META
  * Backup Blk 2: xl_btree_metadata, if INSERT_META
+ *
+ * Note: The new tuple is actually the "original" new item in the posting
+ * list split insert case (i.e. the INSERT_POST case).  A split offset for
+ * the posting list is logged before the original new item.  Recovery needs
+ * both, since it must do an in-place update of the existing posting list
+ * that was split as an extra step.  Also, recovery generates a "final"
+ * newitem.  See _bt_swap_posting() for details on posting list splits.
  */
 typedef struct xl_btree_insert
 {
 	OffsetNumber offnum;
+
+	/* POSTING SPLIT OFFSET FOLLOWS (INSERT_POST case) */
+	/* NEW TUPLE ALWAYS FOLLOWS AT THE END */
 } xl_btree_insert;
 
 #define SizeOfBtreeInsert	(offsetof(xl_btree_insert, offnum) + sizeof(OffsetNumber))
@@ -92,8 +106,37 @@ typedef struct xl_btree_insert
  * Backup Blk 0: original page / new left page
  *
  * The left page's data portion contains the new item, if it's the _L variant.
- * An IndexTuple representing the high key of the left page must follow with
- * either variant.
+ * _R variant split records generally do not have a newitem (_R variant leaf
+ * page split records that must deal with a posting list split will include an
+ * explicit newitem, though it is never used on the right page -- it is
+ * actually an orignewitem needed to update existing posting list).  The new
+ * high key of the left/original page appears last of all (and must always be
+ * present).
+ *
+ * Page split records that need the REDO routine to deal with a posting list
+ * split directly will have an explicit newitem, which is actually an
+ * orignewitem (the newitem as it was before the posting list split, not
+ * after).  A posting list split always has a newitem that comes immediately
+ * after the posting list being split (which would have overlapped with
+ * orignewitem prior to split).  Usually REDO must deal with posting list
+ * splits with an _L variant page split record, and usually both the new
+ * posting list and the final newitem go on the left page (the existing
+ * posting list will be inserted instead of the old, and the final newitem
+ * will be inserted next to that).  However, _R variant split records will
+ * include an orignewitem when the split point for the page happens to have a
+ * lastleft tuple that is also the posting list being split (leaving newitem
+ * as the page split's firstright tuple).  The existence of this corner case
+ * does not change the basic fact about newitem/orignewitem for the REDO
+ * routine: it is always state used for the left page alone.  (This is why the
+ * record's postingoff field isn't a reliable indicator of whether or not a
+ * posting list split occurred during the page split; a non-zero value merely
+ * indicates that the REDO routine must reconstruct a new posting list tuple
+ * that is needed for the left page.)
+ *
+ * This posting list split handling is equivalent to the xl_btree_insert REDO
+ * routine's INSERT_POST handling.  While the details are more complicated
+ * here, the concept and goals are exactly the same.  See _bt_swap_posting()
+ * for details on posting list splits.
  *
  * Backup Blk 1: new right page
  *
@@ -111,15 +154,33 @@ typedef struct xl_btree_split
 {
 	uint32		level;			/* tree level of page being split */
 	OffsetNumber firstright;	/* first item moved to right page */
-	OffsetNumber newitemoff;	/* new item's offset (useful for _L variant) */
+	OffsetNumber newitemoff;	/* new item's offset */
+	uint16		postingoff;		/* offset inside orig posting tuple */
 } xl_btree_split;
 
-#define SizeOfBtreeSplit	(offsetof(xl_btree_split, newitemoff) + sizeof(OffsetNumber))
+#define SizeOfBtreeSplit	(offsetof(xl_btree_split, postingoff) + sizeof(uint16))
+
+/*
+ * When page is deduplicated, consecutive groups of tuples with equal keys are
+ * merged together into posting list tuples.
+ *
+ * The WAL record represents a deduplication pass for a leaf page.  An array
+ * of BTDedupInterval structs follows.
+ */
+typedef struct xl_btree_dedup
+{
+	uint16		nintervals;
+
+	/* DEDUPLICATION INTERVALS FOLLOW */
+} xl_btree_dedup;
+
+#define SizeOfBtreeDedup 	(offsetof(xl_btree_dedup, nintervals) + sizeof(uint16))
 
 /*
  * This is what we need to know about delete of individual leaf index tuples.
  * The WAL record can represent deletion of any number of index tuples on a
- * single index page when *not* executed by VACUUM.
+ * single index page when *not* executed by VACUUM.  Deletion of a subset of
+ * the TIDs within a posting list tuple is not supported.
  *
  * Backup Blk 0: index page
  */
@@ -150,21 +211,43 @@ typedef struct xl_btree_reuse_page
 #define SizeOfBtreeReusePage	(sizeof(xl_btree_reuse_page))
 
 /*
- * This is what we need to know about vacuum of individual leaf index tuples.
- * The WAL record can represent deletion of any number of index tuples on a
- * single index page when executed by VACUUM.
+ * This is what we need to know about which TIDs to remove from an individual
+ * posting list tuple during vacuuming.  An array of these may appear at the
+ * end of xl_btree_vacuum records.
+ */
+typedef struct xl_btree_update
+{
+	uint16		ndeletedtids;
+
+	/* POSTING LIST uint16 OFFSETS TO A DELETED TID FOLLOW */
+} xl_btree_update;
+
+#define SizeOfBtreeUpdate	(offsetof(xl_btree_update, ndeletedtids) + sizeof(uint16))
+
+/*
+ * This is what we need to know about a VACUUM of a leaf page.  The WAL record
+ * can represent deletion of any number of index tuples on a single index page
+ * when executed by VACUUM.  It can also support "updates" of index tuples,
+ * which is how deletes of a subset of TIDs contained in an existing posting
+ * list tuple are implemented. (Updates are only used when there will be some
+ * remaining TIDs once VACUUM finishes; otherwise the posting list tuple can
+ * just be deleted).
  *
- * Note that the WAL record in any vacuum of an index must have at least one
- * item to delete.
+ * Updated posting list tuples are represented using xl_btree_update metadata.
+ * The REDO routine uses each xl_btree_update (plus its corresponding original
+ * index tuple from the target leaf page) to generate the final updated tuple.
  */
 typedef struct xl_btree_vacuum
 {
-	uint32		ndeleted;
+	uint16		ndeleted;
+	uint16		nupdated;
 
 	/* DELETED TARGET OFFSET NUMBERS FOLLOW */
+	/* UPDATED TARGET OFFSET NUMBERS FOLLOW */
+	/* UPDATED TUPLES METADATA ARRAY FOLLOWS */
 } xl_btree_vacuum;
 
-#define SizeOfBtreeVacuum	(offsetof(xl_btree_vacuum, ndeleted) + sizeof(uint32))
+#define SizeOfBtreeVacuum	(offsetof(xl_btree_vacuum, nupdated) + sizeof(uint16))
 
 /*
  * This is what we need to know about marking an empty branch for deletion.
@@ -245,6 +328,8 @@ typedef struct xl_btree_newroot
 extern void btree_redo(XLogReaderState *record);
 extern void btree_desc(StringInfo buf, XLogReaderState *record);
 extern const char *btree_identify(uint8 info);
+extern void btree_xlog_startup(void);
+extern void btree_xlog_cleanup(void);
 extern void btree_mask(char *pagedata, BlockNumber blkno);
 
 #endif							/* NBTXLOG_H */
diff --git a/src/include/access/rmgrlist.h b/src/include/access/rmgrlist.h
index c88dccfb8d..6c15df7e70 100644
--- a/src/include/access/rmgrlist.h
+++ b/src/include/access/rmgrlist.h
@@ -36,7 +36,7 @@ PG_RMGR(RM_RELMAP_ID, "RelMap", relmap_redo, relmap_desc, relmap_identify, NULL,
 PG_RMGR(RM_STANDBY_ID, "Standby", standby_redo, standby_desc, standby_identify, NULL, NULL, NULL)
 PG_RMGR(RM_HEAP2_ID, "Heap2", heap2_redo, heap2_desc, heap2_identify, NULL, NULL, heap_mask)
 PG_RMGR(RM_HEAP_ID, "Heap", heap_redo, heap_desc, heap_identify, NULL, NULL, heap_mask)
-PG_RMGR(RM_BTREE_ID, "Btree", btree_redo, btree_desc, btree_identify, NULL, NULL, btree_mask)
+PG_RMGR(RM_BTREE_ID, "Btree", btree_redo, btree_desc, btree_identify, btree_xlog_startup, btree_xlog_cleanup, btree_mask)
 PG_RMGR(RM_HASH_ID, "Hash", hash_redo, hash_desc, hash_identify, NULL, NULL, hash_mask)
 PG_RMGR(RM_GIN_ID, "Gin", gin_redo, gin_desc, gin_identify, gin_xlog_startup, gin_xlog_cleanup, gin_mask)
 PG_RMGR(RM_GIST_ID, "Gist", gist_redo, gist_desc, gist_identify, gist_xlog_startup, gist_xlog_cleanup, gist_mask)
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index 087918d41d..27ded593ab 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -31,7 +31,7 @@
 /*
  * Each page of XLOG file has a header like this:
  */
-#define XLOG_PAGE_MAGIC 0xD104	/* can be used as WAL version indicator */
+#define XLOG_PAGE_MAGIC 0xD105	/* can be used as WAL version indicator */
 
 typedef struct XLogPageHeaderData
 {
diff --git a/src/test/regress/expected/btree_index.out b/src/test/regress/expected/btree_index.out
index f567117a46..1646deb092 100644
--- a/src/test/regress/expected/btree_index.out
+++ b/src/test/regress/expected/btree_index.out
@@ -200,7 +200,7 @@ reset enable_indexscan;
 reset enable_bitmapscan;
 -- Also check LIKE optimization with binary-compatible cases
 create temp table btree_bpchar (f1 text collate "C");
-create index on btree_bpchar(f1 bpchar_ops);
+create index on btree_bpchar(f1 bpchar_ops) WITH (deduplicate_items=on);
 insert into btree_bpchar values ('foo'), ('fool'), ('bar'), ('quux');
 -- doesn't match index:
 explain (costs off)
@@ -266,6 +266,24 @@ select * from btree_bpchar where f1::bpchar like 'foo%';
  fool
 (2 rows)
 
+-- get test coverage for "single value" deduplication strategy:
+insert into btree_bpchar select 'foo' from generate_series(1,1500);
+--
+-- Perform unique checking, with and without the use of deduplication
+--
+CREATE TABLE dedup_unique_test_table (a int) WITH (autovacuum_enabled=false);
+CREATE UNIQUE INDEX dedup_unique ON dedup_unique_test_table (a) WITH (deduplicate_items=on);
+CREATE UNIQUE INDEX plain_unique ON dedup_unique_test_table (a) WITH (deduplicate_items=off);
+-- Generate enough garbage tuples in index to ensure that even the unique index
+-- with deduplication enabled has to check multiple leaf pages during unique
+-- checking (at least with a BLCKSZ of 8192 or less)
+DO $$
+BEGIN
+    FOR r IN 1..1350 LOOP
+        DELETE FROM dedup_unique_test_table;
+        INSERT INTO dedup_unique_test_table SELECT 1;
+    END LOOP;
+END$$;
 --
 -- Test B-tree fast path (cache rightmost leaf page) optimization.
 --
diff --git a/src/test/regress/sql/btree_index.sql b/src/test/regress/sql/btree_index.sql
index 558dcae0ec..6e14b935ce 100644
--- a/src/test/regress/sql/btree_index.sql
+++ b/src/test/regress/sql/btree_index.sql
@@ -86,7 +86,7 @@ reset enable_bitmapscan;
 -- Also check LIKE optimization with binary-compatible cases
 
 create temp table btree_bpchar (f1 text collate "C");
-create index on btree_bpchar(f1 bpchar_ops);
+create index on btree_bpchar(f1 bpchar_ops) WITH (deduplicate_items=on);
 insert into btree_bpchar values ('foo'), ('fool'), ('bar'), ('quux');
 -- doesn't match index:
 explain (costs off)
@@ -103,6 +103,26 @@ explain (costs off)
 select * from btree_bpchar where f1::bpchar like 'foo%';
 select * from btree_bpchar where f1::bpchar like 'foo%';
 
+-- get test coverage for "single value" deduplication strategy:
+insert into btree_bpchar select 'foo' from generate_series(1,1500);
+
+--
+-- Perform unique checking, with and without the use of deduplication
+--
+CREATE TABLE dedup_unique_test_table (a int) WITH (autovacuum_enabled=false);
+CREATE UNIQUE INDEX dedup_unique ON dedup_unique_test_table (a) WITH (deduplicate_items=on);
+CREATE UNIQUE INDEX plain_unique ON dedup_unique_test_table (a) WITH (deduplicate_items=off);
+-- Generate enough garbage tuples in index to ensure that even the unique index
+-- with deduplication enabled has to check multiple leaf pages during unique
+-- checking (at least with a BLCKSZ of 8192 or less)
+DO $$
+BEGIN
+    FOR r IN 1..1350 LOOP
+        DELETE FROM dedup_unique_test_table;
+        INSERT INTO dedup_unique_test_table SELECT 1;
+    END LOOP;
+END$$;
+
 --
 -- Test B-tree fast path (cache rightmost leaf page) optimization.
 --