postgresql/src/backend/utils/sort/tuplesortvariants.c

/*-------------------------------------------------------------------------
 *
 * tuplesortvariants.c
 *	  Implementation of tuple sorting variants.
 *
 * This module handles the sorting of heap tuples, index tuples, or single
 * Datums.  The implementation is based on the generalized tuple sorting
 * facility given in tuplesort.c.  Support other kinds of sortable objects
 * could be easily added here, another module, or even an extension.
 *
 *
 * Copyright (c) 2022-2023, PostgreSQL Global Development Group
 *
 * IDENTIFICATION
 *	  src/backend/utils/sort/tuplesortvariants.c
 *
 *-------------------------------------------------------------------------
 */

#include "postgres.h"

#include "access/brin_tuple.h"
#include "access/hash.h"
#include "access/htup_details.h"
#include "access/nbtree.h"
#include "catalog/index.h"
#include "executor/executor.h"
#include "pg_trace.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
#include "utils/guc.h"
#include "utils/tuplesort.h"


/* sort-type codes for sort__start probes */
#define HEAP_SORT		0
#define INDEX_SORT		1
#define DATUM_SORT		2
#define CLUSTER_SORT	3

static void removeabbrev_heap(Tuplesortstate *state, SortTuple *stups,
							  int count);
static void removeabbrev_cluster(Tuplesortstate *state, SortTuple *stups,
								 int count);
static void removeabbrev_index(Tuplesortstate *state, SortTuple *stups,
							   int count);
static void removeabbrev_index_brin(Tuplesortstate *state, SortTuple *stups,
									int count);
static void removeabbrev_datum(Tuplesortstate *state, SortTuple *stups,
							   int count);
static int	comparetup_heap(const SortTuple *a, const SortTuple *b,
							Tuplesortstate *state);
static int	comparetup_heap_tiebreak(const SortTuple *a, const SortTuple *b,
									 Tuplesortstate *state);
static void writetup_heap(Tuplesortstate *state, LogicalTape *tape,
						  SortTuple *stup);
static void readtup_heap(Tuplesortstate *state, SortTuple *stup,
						 LogicalTape *tape, unsigned int len);
static int	comparetup_cluster(const SortTuple *a, const SortTuple *b,
							   Tuplesortstate *state);
static int	comparetup_cluster_tiebreak(const SortTuple *a, const SortTuple *b,
										Tuplesortstate *state);
static void writetup_cluster(Tuplesortstate *state, LogicalTape *tape,
							 SortTuple *stup);
static void readtup_cluster(Tuplesortstate *state, SortTuple *stup,
							LogicalTape *tape, unsigned int tuplen);
static int	comparetup_index_btree(const SortTuple *a, const SortTuple *b,
								   Tuplesortstate *state);
static int	comparetup_index_btree_tiebreak(const SortTuple *a, const SortTuple *b,
											Tuplesortstate *state);
static int	comparetup_index_hash(const SortTuple *a, const SortTuple *b,
								  Tuplesortstate *state);
static int	comparetup_index_hash_tiebreak(const SortTuple *a, const SortTuple *b,
										   Tuplesortstate *state);
static int	comparetup_index_brin(const SortTuple *a, const SortTuple *b,
								  Tuplesortstate *state);
static void writetup_index(Tuplesortstate *state, LogicalTape *tape,
						   SortTuple *stup);
static void readtup_index(Tuplesortstate *state, SortTuple *stup,
						  LogicalTape *tape, unsigned int len);
static void writetup_index_brin(Tuplesortstate *state, LogicalTape *tape,
								SortTuple *stup);
static void readtup_index_brin(Tuplesortstate *state, SortTuple *stup,
							   LogicalTape *tape, unsigned int len);
static int	comparetup_datum(const SortTuple *a, const SortTuple *b,
							 Tuplesortstate *state);
static int	comparetup_datum_tiebreak(const SortTuple *a, const SortTuple *b,
									  Tuplesortstate *state);
static void writetup_datum(Tuplesortstate *state, LogicalTape *tape,
						   SortTuple *stup);
static void readtup_datum(Tuplesortstate *state, SortTuple *stup,
						  LogicalTape *tape, unsigned int len);
static void freestate_cluster(Tuplesortstate *state);

/*
 * Data struture pointed by "TuplesortPublic.arg" for the CLUSTER case.  Set by
 * the tuplesort_begin_cluster.
 */
typedef struct
{
	TupleDesc	tupDesc;

	IndexInfo  *indexInfo;		/* info about index being used for reference */
	EState	   *estate;			/* for evaluating index expressions */
} TuplesortClusterArg;

/*
 * Data struture pointed by "TuplesortPublic.arg" for the IndexTuple case.
 * Set by tuplesort_begin_index_xxx and used only by the IndexTuple routines.
 */
typedef struct
{
	Relation	heapRel;		/* table the index is being built on */
	Relation	indexRel;		/* index being built */
} TuplesortIndexArg;

/*
 * Data struture pointed by "TuplesortPublic.arg" for the index_btree subcase.
 */
typedef struct
{
	TuplesortIndexArg index;

	bool		enforceUnique;	/* complain if we find duplicate tuples */
	bool		uniqueNullsNotDistinct; /* unique constraint null treatment */
} TuplesortIndexBTreeArg;

/*
 * Data struture pointed by "TuplesortPublic.arg" for the index_hash subcase.
 */
typedef struct
{
	TuplesortIndexArg index;

	uint32		high_mask;		/* masks for sortable part of hash code */
	uint32		low_mask;
	uint32		max_buckets;
} TuplesortIndexHashArg;

/*
 * Data struture pointed by "TuplesortPublic.arg" for the index_brin subcase.
 */
typedef struct
{
	TuplesortIndexArg index;

	/* XXX do we need something here? */
} TuplesortIndexBrinArg;

/*
 * Data struture pointed by "TuplesortPublic.arg" for the Datum case.
 * Set by tuplesort_begin_datum and used only by the DatumTuple routines.
 */
typedef struct
{
	/* the datatype oid of Datum's to be sorted */
	Oid			datumType;
	/* we need typelen in order to know how to copy the Datums. */
	int			datumTypeLen;
} TuplesortDatumArg;

/*
 * Computing BrinTuple size with only the tuple is difficult, so we want to track
 * the length referenced by the SortTuple. That's what BrinSortTuple is meant
 * to do - it's essentially a BrinTuple prefixed by its length.
 */
typedef struct BrinSortTuple
{
	Size		tuplen;
	BrinTuple	tuple;
} BrinSortTuple;

/* Size of the BrinSortTuple, given length of the BrinTuple. */
#define BRINSORTTUPLE_SIZE(len)		(offsetof(BrinSortTuple, tuple) + (len))


Tuplesortstate *
tuplesort_begin_heap(TupleDesc tupDesc,
					 int nkeys, AttrNumber *attNums,
					 Oid *sortOperators, Oid *sortCollations,
					 bool *nullsFirstFlags,
					 int workMem, SortCoordinate coordinate, int sortopt)
{
	Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
												   sortopt);
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	MemoryContext oldcontext;
	int			i;

	oldcontext = MemoryContextSwitchTo(base->maincontext);

	Assert(nkeys > 0);

#ifdef TRACE_SORT
	if (trace_sort)
		elog(LOG,
			 "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c",
			 nkeys, workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
#endif

	base->nKeys = nkeys;

	TRACE_POSTGRESQL_SORT_START(HEAP_SORT,
								false,	/* no unique check */
								nkeys,
								workMem,
								sortopt & TUPLESORT_RANDOMACCESS,
								PARALLEL_SORT(coordinate));

	base->removeabbrev = removeabbrev_heap;
	base->comparetup = comparetup_heap;
	base->comparetup_tiebreak = comparetup_heap_tiebreak;
	base->writetup = writetup_heap;
	base->readtup = readtup_heap;
	base->haveDatum1 = true;
	base->arg = tupDesc;		/* assume we need not copy tupDesc */

	/* Prepare SortSupport data for each column */
	base->sortKeys = (SortSupport) palloc0(nkeys * sizeof(SortSupportData));

	for (i = 0; i < nkeys; i++)
	{
		SortSupport sortKey = base->sortKeys + i;

		Assert(attNums[i] != 0);
		Assert(sortOperators[i] != 0);

		sortKey->ssup_cxt = CurrentMemoryContext;
		sortKey->ssup_collation = sortCollations[i];
		sortKey->ssup_nulls_first = nullsFirstFlags[i];
		sortKey->ssup_attno = attNums[i];
		/* Convey if abbreviation optimization is applicable in principle */
		sortKey->abbreviate = (i == 0 && base->haveDatum1);

		PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey);
	}

	/*
	 * The "onlyKey" optimization cannot be used with abbreviated keys, since
	 * tie-breaker comparisons may be required.  Typically, the optimization
	 * is only of value to pass-by-value types anyway, whereas abbreviated
	 * keys are typically only of value to pass-by-reference types.
	 */
	if (nkeys == 1 && !base->sortKeys->abbrev_converter)
		base->onlyKey = base->sortKeys;

	MemoryContextSwitchTo(oldcontext);

	return state;
}

Tuplesortstate *
tuplesort_begin_cluster(TupleDesc tupDesc,
						Relation indexRel,
						int workMem,
						SortCoordinate coordinate, int sortopt)
{
	Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
												   sortopt);
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	BTScanInsert indexScanKey;
	MemoryContext oldcontext;
	TuplesortClusterArg *arg;
	int			i;

	Assert(indexRel->rd_rel->relam == BTREE_AM_OID);

	oldcontext = MemoryContextSwitchTo(base->maincontext);
	arg = (TuplesortClusterArg *) palloc0(sizeof(TuplesortClusterArg));

#ifdef TRACE_SORT
	if (trace_sort)
		elog(LOG,
			 "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c",
			 RelationGetNumberOfAttributes(indexRel),
			 workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
#endif

	base->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel);

	TRACE_POSTGRESQL_SORT_START(CLUSTER_SORT,
								false,	/* no unique check */
								base->nKeys,
								workMem,
								sortopt & TUPLESORT_RANDOMACCESS,
								PARALLEL_SORT(coordinate));

	base->removeabbrev = removeabbrev_cluster;
	base->comparetup = comparetup_cluster;
	base->comparetup_tiebreak = comparetup_cluster_tiebreak;
	base->writetup = writetup_cluster;
	base->readtup = readtup_cluster;
	base->freestate = freestate_cluster;
	base->arg = arg;

	arg->indexInfo = BuildIndexInfo(indexRel);

	/*
	 * If we don't have a simple leading attribute, we don't currently
	 * initialize datum1, so disable optimizations that require it.
	 */
	if (arg->indexInfo->ii_IndexAttrNumbers[0] == 0)
		base->haveDatum1 = false;
	else
		base->haveDatum1 = true;

	arg->tupDesc = tupDesc;		/* assume we need not copy tupDesc */

	indexScanKey = _bt_mkscankey(indexRel, NULL);

	if (arg->indexInfo->ii_Expressions != NULL)
	{
		TupleTableSlot *slot;
		ExprContext *econtext;

		/*
		 * We will need to use FormIndexDatum to evaluate the index
		 * expressions.  To do that, we need an EState, as well as a
		 * TupleTableSlot to put the table tuples into.  The econtext's
		 * scantuple has to point to that slot, too.
		 */
		arg->estate = CreateExecutorState();
		slot = MakeSingleTupleTableSlot(tupDesc, &TTSOpsHeapTuple);
		econtext = GetPerTupleExprContext(arg->estate);
		econtext->ecxt_scantuple = slot;
	}

	/* Prepare SortSupport data for each column */
	base->sortKeys = (SortSupport) palloc0(base->nKeys *
										   sizeof(SortSupportData));

	for (i = 0; i < base->nKeys; i++)
	{
		SortSupport sortKey = base->sortKeys + i;
		ScanKey		scanKey = indexScanKey->scankeys + i;
		int16		strategy;

		sortKey->ssup_cxt = CurrentMemoryContext;
		sortKey->ssup_collation = scanKey->sk_collation;
		sortKey->ssup_nulls_first =
			(scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
		sortKey->ssup_attno = scanKey->sk_attno;
		/* Convey if abbreviation optimization is applicable in principle */
		sortKey->abbreviate = (i == 0 && base->haveDatum1);

		Assert(sortKey->ssup_attno != 0);

		strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ?
			BTGreaterStrategyNumber : BTLessStrategyNumber;

		PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey);
	}

	pfree(indexScanKey);

	MemoryContextSwitchTo(oldcontext);

	return state;
}

Tuplesortstate *
tuplesort_begin_index_btree(Relation heapRel,
							Relation indexRel,
							bool enforceUnique,
							bool uniqueNullsNotDistinct,
							int workMem,
							SortCoordinate coordinate,
							int sortopt)
{
	Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
												   sortopt);
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	BTScanInsert indexScanKey;
	TuplesortIndexBTreeArg *arg;
	MemoryContext oldcontext;
	int			i;

	oldcontext = MemoryContextSwitchTo(base->maincontext);
	arg = (TuplesortIndexBTreeArg *) palloc(sizeof(TuplesortIndexBTreeArg));

#ifdef TRACE_SORT
	if (trace_sort)
		elog(LOG,
			 "begin index sort: unique = %c, workMem = %d, randomAccess = %c",
			 enforceUnique ? 't' : 'f',
			 workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
#endif

	base->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel);

	TRACE_POSTGRESQL_SORT_START(INDEX_SORT,
								enforceUnique,
								base->nKeys,
								workMem,
								sortopt & TUPLESORT_RANDOMACCESS,
								PARALLEL_SORT(coordinate));

	base->removeabbrev = removeabbrev_index;
	base->comparetup = comparetup_index_btree;
	base->comparetup_tiebreak = comparetup_index_btree_tiebreak;
	base->writetup = writetup_index;
	base->readtup = readtup_index;
	base->haveDatum1 = true;
	base->arg = arg;

	arg->index.heapRel = heapRel;
	arg->index.indexRel = indexRel;
	arg->enforceUnique = enforceUnique;
	arg->uniqueNullsNotDistinct = uniqueNullsNotDistinct;

	indexScanKey = _bt_mkscankey(indexRel, NULL);

	/* Prepare SortSupport data for each column */
	base->sortKeys = (SortSupport) palloc0(base->nKeys *
										   sizeof(SortSupportData));

	for (i = 0; i < base->nKeys; i++)
	{
		SortSupport sortKey = base->sortKeys + i;
		ScanKey		scanKey = indexScanKey->scankeys + i;
		int16		strategy;

		sortKey->ssup_cxt = CurrentMemoryContext;
		sortKey->ssup_collation = scanKey->sk_collation;
		sortKey->ssup_nulls_first =
			(scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
		sortKey->ssup_attno = scanKey->sk_attno;
		/* Convey if abbreviation optimization is applicable in principle */
		sortKey->abbreviate = (i == 0 && base->haveDatum1);

		Assert(sortKey->ssup_attno != 0);

		strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ?
			BTGreaterStrategyNumber : BTLessStrategyNumber;

		PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey);
	}

	pfree(indexScanKey);

	MemoryContextSwitchTo(oldcontext);

	return state;
}

Tuplesortstate *
tuplesort_begin_index_hash(Relation heapRel,
						   Relation indexRel,
						   uint32 high_mask,
						   uint32 low_mask,
						   uint32 max_buckets,
						   int workMem,
						   SortCoordinate coordinate,
						   int sortopt)
{
	Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
												   sortopt);
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	MemoryContext oldcontext;
	TuplesortIndexHashArg *arg;

	oldcontext = MemoryContextSwitchTo(base->maincontext);
	arg = (TuplesortIndexHashArg *) palloc(sizeof(TuplesortIndexHashArg));

#ifdef TRACE_SORT
	if (trace_sort)
		elog(LOG,
			 "begin index sort: high_mask = 0x%x, low_mask = 0x%x, "
			 "max_buckets = 0x%x, workMem = %d, randomAccess = %c",
			 high_mask,
			 low_mask,
			 max_buckets,
			 workMem,
			 sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
#endif

	base->nKeys = 1;			/* Only one sort column, the hash code */

	base->removeabbrev = removeabbrev_index;
	base->comparetup = comparetup_index_hash;
	base->comparetup_tiebreak = comparetup_index_hash_tiebreak;
	base->writetup = writetup_index;
	base->readtup = readtup_index;
	base->haveDatum1 = true;
	base->arg = arg;

	arg->index.heapRel = heapRel;
	arg->index.indexRel = indexRel;

	arg->high_mask = high_mask;
	arg->low_mask = low_mask;
	arg->max_buckets = max_buckets;

	MemoryContextSwitchTo(oldcontext);

	return state;
}

Tuplesortstate *
tuplesort_begin_index_gist(Relation heapRel,
						   Relation indexRel,
						   int workMem,
						   SortCoordinate coordinate,
						   int sortopt)
{
	Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
												   sortopt);
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	MemoryContext oldcontext;
	TuplesortIndexBTreeArg *arg;
	int			i;

	oldcontext = MemoryContextSwitchTo(base->maincontext);
	arg = (TuplesortIndexBTreeArg *) palloc(sizeof(TuplesortIndexBTreeArg));

#ifdef TRACE_SORT
	if (trace_sort)
		elog(LOG,
			 "begin index sort: workMem = %d, randomAccess = %c",
			 workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
#endif

	base->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel);

	base->removeabbrev = removeabbrev_index;
	base->comparetup = comparetup_index_btree;
	base->comparetup_tiebreak = comparetup_index_btree_tiebreak;
	base->writetup = writetup_index;
	base->readtup = readtup_index;
	base->haveDatum1 = true;
	base->arg = arg;

	arg->index.heapRel = heapRel;
	arg->index.indexRel = indexRel;
	arg->enforceUnique = false;
	arg->uniqueNullsNotDistinct = false;

	/* Prepare SortSupport data for each column */
	base->sortKeys = (SortSupport) palloc0(base->nKeys *
										   sizeof(SortSupportData));

	for (i = 0; i < base->nKeys; i++)
	{
		SortSupport sortKey = base->sortKeys + i;

		sortKey->ssup_cxt = CurrentMemoryContext;
		sortKey->ssup_collation = indexRel->rd_indcollation[i];
		sortKey->ssup_nulls_first = false;
		sortKey->ssup_attno = i + 1;
		/* Convey if abbreviation optimization is applicable in principle */
		sortKey->abbreviate = (i == 0 && base->haveDatum1);

		Assert(sortKey->ssup_attno != 0);

		/* Look for a sort support function */
		PrepareSortSupportFromGistIndexRel(indexRel, sortKey);
	}

	MemoryContextSwitchTo(oldcontext);

	return state;
}

Tuplesortstate *
tuplesort_begin_index_brin(Relation heapRel,
						   Relation indexRel,
						   int workMem,
						   SortCoordinate coordinate,
						   int sortopt)
{
	Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
												   sortopt);
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	MemoryContext oldcontext;
	TuplesortIndexBrinArg *arg;

	oldcontext = MemoryContextSwitchTo(base->maincontext);
	arg = (TuplesortIndexBrinArg *) palloc(sizeof(TuplesortIndexBrinArg));

#ifdef TRACE_SORT
	if (trace_sort)
		elog(LOG,
			 "begin index sort: workMem = %d, randomAccess = %c",
			 workMem,
			 sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
#endif

	base->nKeys = 1;			/* Only one sort column, the block number */

	base->removeabbrev = removeabbrev_index_brin;
	base->comparetup = comparetup_index_brin;
	base->writetup = writetup_index_brin;
	base->readtup = readtup_index_brin;
	base->haveDatum1 = true;
	base->arg = arg;

	arg->index.heapRel = heapRel;
	arg->index.indexRel = indexRel;

	MemoryContextSwitchTo(oldcontext);

	return state;
}

Tuplesortstate *
tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
					  bool nullsFirstFlag, int workMem,
					  SortCoordinate coordinate, int sortopt)
{
	Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
												   sortopt);
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	TuplesortDatumArg *arg;
	MemoryContext oldcontext;
	int16		typlen;
	bool		typbyval;

	oldcontext = MemoryContextSwitchTo(base->maincontext);
	arg = (TuplesortDatumArg *) palloc(sizeof(TuplesortDatumArg));

#ifdef TRACE_SORT
	if (trace_sort)
		elog(LOG,
			 "begin datum sort: workMem = %d, randomAccess = %c",
			 workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
#endif

	base->nKeys = 1;			/* always a one-column sort */

	TRACE_POSTGRESQL_SORT_START(DATUM_SORT,
								false,	/* no unique check */
								1,
								workMem,
								sortopt & TUPLESORT_RANDOMACCESS,
								PARALLEL_SORT(coordinate));

	base->removeabbrev = removeabbrev_datum;
	base->comparetup = comparetup_datum;
	base->comparetup_tiebreak = comparetup_datum_tiebreak;
	base->writetup = writetup_datum;
	base->readtup = readtup_datum;
	base->haveDatum1 = true;
	base->arg = arg;

	arg->datumType = datumType;

	/* lookup necessary attributes of the datum type */
	get_typlenbyval(datumType, &typlen, &typbyval);
	arg->datumTypeLen = typlen;
	base->tuples = !typbyval;

	/* Prepare SortSupport data */
	base->sortKeys = (SortSupport) palloc0(sizeof(SortSupportData));

	base->sortKeys->ssup_cxt = CurrentMemoryContext;
	base->sortKeys->ssup_collation = sortCollation;
	base->sortKeys->ssup_nulls_first = nullsFirstFlag;

	/*
	 * Abbreviation is possible here only for by-reference types.  In theory,
	 * a pass-by-value datatype could have an abbreviated form that is cheaper
	 * to compare.  In a tuple sort, we could support that, because we can
	 * always extract the original datum from the tuple as needed.  Here, we
	 * can't, because a datum sort only stores a single copy of the datum; the
	 * "tuple" field of each SortTuple is NULL.
	 */
	base->sortKeys->abbreviate = !typbyval;

	PrepareSortSupportFromOrderingOp(sortOperator, base->sortKeys);

	/*
	 * The "onlyKey" optimization cannot be used with abbreviated keys, since
	 * tie-breaker comparisons may be required.  Typically, the optimization
	 * is only of value to pass-by-value types anyway, whereas abbreviated
	 * keys are typically only of value to pass-by-reference types.
	 */
	if (!base->sortKeys->abbrev_converter)
		base->onlyKey = base->sortKeys;

	MemoryContextSwitchTo(oldcontext);

	return state;
}

/*
 * Accept one tuple while collecting input data for sort.
 *
 * Note that the input data is always copied; the caller need not save it.
 */
void
tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext);
	TupleDesc	tupDesc = (TupleDesc) base->arg;
	SortTuple	stup;
	MinimalTuple tuple;
	HeapTupleData htup;

	/* copy the tuple into sort storage */
	tuple = ExecCopySlotMinimalTuple(slot);
	stup.tuple = (void *) tuple;
	/* set up first-column key value */
	htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET;
	htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET);
	stup.datum1 = heap_getattr(&htup,
							   base->sortKeys[0].ssup_attno,
							   tupDesc,
							   &stup.isnull1);

	tuplesort_puttuple_common(state, &stup,
							  base->sortKeys->abbrev_converter &&
							  !stup.isnull1);

	MemoryContextSwitchTo(oldcontext);
}

/*
 * Accept one tuple while collecting input data for sort.
 *
 * Note that the input data is always copied; the caller need not save it.
 */
void
tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
{
	SortTuple	stup;
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext);
	TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg;

	/* copy the tuple into sort storage */
	tup = heap_copytuple(tup);
	stup.tuple = (void *) tup;

	/*
	 * set up first-column key value, and potentially abbreviate, if it's a
	 * simple column
	 */
	if (base->haveDatum1)
	{
		stup.datum1 = heap_getattr(tup,
								   arg->indexInfo->ii_IndexAttrNumbers[0],
								   arg->tupDesc,
								   &stup.isnull1);
	}

	tuplesort_puttuple_common(state, &stup,
							  base->haveDatum1 &&
							  base->sortKeys->abbrev_converter &&
							  !stup.isnull1);

	MemoryContextSwitchTo(oldcontext);
}

/*
 * Collect one index tuple while collecting input data for sort, building
 * it from caller-supplied values.
 */
void
tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel,
							  ItemPointer self, const Datum *values,
							  const bool *isnull)
{
	SortTuple	stup;
	IndexTuple	tuple;
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	TuplesortIndexArg *arg = (TuplesortIndexArg *) base->arg;

	stup.tuple = index_form_tuple_context(RelationGetDescr(rel), values,
										  isnull, base->tuplecontext);
	tuple = ((IndexTuple) stup.tuple);
	tuple->t_tid = *self;
	/* set up first-column key value */
	stup.datum1 = index_getattr(tuple,
								1,
								RelationGetDescr(arg->indexRel),
								&stup.isnull1);

	tuplesort_puttuple_common(state, &stup,
							  base->sortKeys &&
							  base->sortKeys->abbrev_converter &&
							  !stup.isnull1);
}

/*
 * Collect one BRIN tuple while collecting input data for sort.
 */
void
tuplesort_putbrintuple(Tuplesortstate *state, BrinTuple *tuple, Size size)
{
	SortTuple	stup;
	BrinSortTuple *bstup;
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext);

	/* allocate space for the whole BRIN sort tuple */
	bstup = palloc(BRINSORTTUPLE_SIZE(size));

	bstup->tuplen = size;
	memcpy(&bstup->tuple, tuple, size);

	stup.tuple = bstup;
	stup.datum1 = tuple->bt_blkno;
	stup.isnull1 = false;

	tuplesort_puttuple_common(state, &stup,
							  base->sortKeys &&
							  base->sortKeys->abbrev_converter &&
							  !stup.isnull1);

	MemoryContextSwitchTo(oldcontext);
}

/*
 * Accept one Datum while collecting input data for sort.
 *
 * If the Datum is pass-by-ref type, the value will be copied.
 */
void
tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext);
	TuplesortDatumArg *arg = (TuplesortDatumArg *) base->arg;
	SortTuple	stup;

	/*
	 * Pass-by-value types or null values are just stored directly in
	 * stup.datum1 (and stup.tuple is not used and set to NULL).
	 *
	 * Non-null pass-by-reference values need to be copied into memory we
	 * control, and possibly abbreviated. The copied value is pointed to by
	 * stup.tuple and is treated as the canonical copy (e.g. to return via
	 * tuplesort_getdatum or when writing to tape); stup.datum1 gets the
	 * abbreviated value if abbreviation is happening, otherwise it's
	 * identical to stup.tuple.
	 */

	if (isNull || !base->tuples)
	{
		/*
		 * Set datum1 to zeroed representation for NULLs (to be consistent,
		 * and to support cheap inequality tests for NULL abbreviated keys).
		 */
		stup.datum1 = !isNull ? val : (Datum) 0;
		stup.isnull1 = isNull;
		stup.tuple = NULL;		/* no separate storage */
	}
	else
	{
		stup.isnull1 = false;
		stup.datum1 = datumCopy(val, false, arg->datumTypeLen);
		stup.tuple = DatumGetPointer(stup.datum1);
	}

	tuplesort_puttuple_common(state, &stup,
							  base->tuples &&
							  base->sortKeys->abbrev_converter && !isNull);

	MemoryContextSwitchTo(oldcontext);
}

/*
 * Fetch the next tuple in either forward or back direction.
 * If successful, put tuple in slot and return true; else, clear the slot
 * and return false.
 *
 * Caller may optionally be passed back abbreviated value (on true return
 * value) when abbreviation was used, which can be used to cheaply avoid
 * equality checks that might otherwise be required.  Caller can safely make a
 * determination of "non-equal tuple" based on simple binary inequality.  A
 * NULL value in leading attribute will set abbreviated value to zeroed
 * representation, which caller may rely on in abbreviated inequality check.
 *
 * If copy is true, the slot receives a tuple that's been copied into the
 * caller's memory context, so that it will stay valid regardless of future
 * manipulations of the tuplesort's state (up to and including deleting the
 * tuplesort).  If copy is false, the slot will just receive a pointer to a
 * tuple held within the tuplesort, which is more efficient, but only safe for
 * callers that are prepared to have any subsequent manipulation of the
 * tuplesort's state invalidate slot contents.
 */
bool
tuplesort_gettupleslot(Tuplesortstate *state, bool forward, bool copy,
					   TupleTableSlot *slot, Datum *abbrev)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
	SortTuple	stup;

	if (!tuplesort_gettuple_common(state, forward, &stup))
		stup.tuple = NULL;

	MemoryContextSwitchTo(oldcontext);

	if (stup.tuple)
	{
		/* Record abbreviated key for caller */
		if (base->sortKeys->abbrev_converter && abbrev)
			*abbrev = stup.datum1;

		if (copy)
			stup.tuple = heap_copy_minimal_tuple((MinimalTuple) stup.tuple);

		ExecStoreMinimalTuple((MinimalTuple) stup.tuple, slot, copy);
		return true;
	}
	else
	{
		ExecClearTuple(slot);
		return false;
	}
}

/*
 * Fetch the next tuple in either forward or back direction.
 * Returns NULL if no more tuples.  Returned tuple belongs to tuplesort memory
 * context, and must not be freed by caller.  Caller may not rely on tuple
 * remaining valid after any further manipulation of tuplesort.
 */
HeapTuple
tuplesort_getheaptuple(Tuplesortstate *state, bool forward)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
	SortTuple	stup;

	if (!tuplesort_gettuple_common(state, forward, &stup))
		stup.tuple = NULL;

	MemoryContextSwitchTo(oldcontext);

	return stup.tuple;
}

/*
 * Fetch the next index tuple in either forward or back direction.
 * Returns NULL if no more tuples.  Returned tuple belongs to tuplesort memory
 * context, and must not be freed by caller.  Caller may not rely on tuple
 * remaining valid after any further manipulation of tuplesort.
 */
IndexTuple
tuplesort_getindextuple(Tuplesortstate *state, bool forward)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
	SortTuple	stup;

	if (!tuplesort_gettuple_common(state, forward, &stup))
		stup.tuple = NULL;

	MemoryContextSwitchTo(oldcontext);

	return (IndexTuple) stup.tuple;
}

/*
 * Fetch the next BRIN tuple in either forward or back direction.
 * Returns NULL if no more tuples.  Returned tuple belongs to tuplesort memory
 * context, and must not be freed by caller.  Caller may not rely on tuple
 * remaining valid after any further manipulation of tuplesort.
 */
BrinTuple *
tuplesort_getbrintuple(Tuplesortstate *state, Size *len, bool forward)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
	SortTuple	stup;
	BrinSortTuple *btup;

	if (!tuplesort_gettuple_common(state, forward, &stup))
		stup.tuple = NULL;

	MemoryContextSwitchTo(oldcontext);

	if (!stup.tuple)
		return NULL;

	btup = (BrinSortTuple *) stup.tuple;

	*len = btup->tuplen;

	return &btup->tuple;
}

/*
 * Fetch the next Datum in either forward or back direction.
 * Returns false if no more datums.
 *
 * If the Datum is pass-by-ref type, the returned value is freshly palloc'd
 * in caller's context, and is now owned by the caller (this differs from
 * similar routines for other types of tuplesorts).
 *
 * Caller may optionally be passed back abbreviated value (on true return
 * value) when abbreviation was used, which can be used to cheaply avoid
 * equality checks that might otherwise be required.  Caller can safely make a
 * determination of "non-equal tuple" based on simple binary inequality.  A
 * NULL value will have a zeroed abbreviated value representation, which caller
 * may rely on in abbreviated inequality check.
 *
 * For byref Datums, if copy is true, *val is set to a copy of the Datum
 * copied into the caller's memory context, so that it will stay valid
 * regardless of future manipulations of the tuplesort's state (up to and
 * including deleting the tuplesort).  If copy is false, *val will just be
 * set to a pointer to the Datum held within the tuplesort, which is more
 * efficient, but only safe for callers that are prepared to have any
 * subsequent manipulation of the tuplesort's state invalidate slot contents.
 * For byval Datums, the value of the 'copy' parameter has no effect.

 */
bool
tuplesort_getdatum(Tuplesortstate *state, bool forward, bool copy,
				   Datum *val, bool *isNull, Datum *abbrev)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
	TuplesortDatumArg *arg = (TuplesortDatumArg *) base->arg;
	SortTuple	stup;

	if (!tuplesort_gettuple_common(state, forward, &stup))
	{
		MemoryContextSwitchTo(oldcontext);
		return false;
	}

	/* Ensure we copy into caller's memory context */
	MemoryContextSwitchTo(oldcontext);

	/* Record abbreviated key for caller */
	if (base->sortKeys->abbrev_converter && abbrev)
		*abbrev = stup.datum1;

	if (stup.isnull1 || !base->tuples)
	{
		*val = stup.datum1;
		*isNull = stup.isnull1;
	}
	else
	{
		/* use stup.tuple because stup.datum1 may be an abbreviation */
		if (copy)
			*val = datumCopy(PointerGetDatum(stup.tuple), false,
							 arg->datumTypeLen);
		else
			*val = PointerGetDatum(stup.tuple);
		*isNull = false;
	}

	return true;
}


/*
 * Routines specialized for HeapTuple (actually MinimalTuple) case
 */

static void
removeabbrev_heap(Tuplesortstate *state, SortTuple *stups, int count)
{
	int			i;
	TuplesortPublic *base = TuplesortstateGetPublic(state);

	for (i = 0; i < count; i++)
	{
		HeapTupleData htup;

		htup.t_len = ((MinimalTuple) stups[i].tuple)->t_len +
			MINIMAL_TUPLE_OFFSET;
		htup.t_data = (HeapTupleHeader) ((char *) stups[i].tuple -
										 MINIMAL_TUPLE_OFFSET);
		stups[i].datum1 = heap_getattr(&htup,
									   base->sortKeys[0].ssup_attno,
									   (TupleDesc) base->arg,
									   &stups[i].isnull1);
	}
}

static int
comparetup_heap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	SortSupport sortKey = base->sortKeys;
	int32		compare;


	/* Compare the leading sort key */
	compare = ApplySortComparator(a->datum1, a->isnull1,
								  b->datum1, b->isnull1,
								  sortKey);
	if (compare != 0)
		return compare;

	/* Compare additional sort keys */
	return comparetup_heap_tiebreak(a, b, state);
}

static int
comparetup_heap_tiebreak(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	SortSupport sortKey = base->sortKeys;
	HeapTupleData ltup;
	HeapTupleData rtup;
	TupleDesc	tupDesc;
	int			nkey;
	int32		compare;
	AttrNumber	attno;
	Datum		datum1,
				datum2;
	bool		isnull1,
				isnull2;

	ltup.t_len = ((MinimalTuple) a->tuple)->t_len + MINIMAL_TUPLE_OFFSET;
	ltup.t_data = (HeapTupleHeader) ((char *) a->tuple - MINIMAL_TUPLE_OFFSET);
	rtup.t_len = ((MinimalTuple) b->tuple)->t_len + MINIMAL_TUPLE_OFFSET;
	rtup.t_data = (HeapTupleHeader) ((char *) b->tuple - MINIMAL_TUPLE_OFFSET);
	tupDesc = (TupleDesc) base->arg;

	if (sortKey->abbrev_converter)
	{
		attno = sortKey->ssup_attno;

		datum1 = heap_getattr(&ltup, attno, tupDesc, &isnull1);
		datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2);

		compare = ApplySortAbbrevFullComparator(datum1, isnull1,
												datum2, isnull2,
												sortKey);
		if (compare != 0)
			return compare;
	}

	sortKey++;
	for (nkey = 1; nkey < base->nKeys; nkey++, sortKey++)
	{
		attno = sortKey->ssup_attno;

		datum1 = heap_getattr(&ltup, attno, tupDesc, &isnull1);
		datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2);

		compare = ApplySortComparator(datum1, isnull1,
									  datum2, isnull2,
									  sortKey);
		if (compare != 0)
			return compare;
	}

	return 0;
}

static void
writetup_heap(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	MinimalTuple tuple = (MinimalTuple) stup->tuple;

	/* the part of the MinimalTuple we'll write: */
	char	   *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET;
	unsigned int tupbodylen = tuple->t_len - MINIMAL_TUPLE_DATA_OFFSET;

	/* total on-disk footprint: */
	unsigned int tuplen = tupbodylen + sizeof(int);

	LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
	LogicalTapeWrite(tape, tupbody, tupbodylen);
	if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
		LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
}

static void
readtup_heap(Tuplesortstate *state, SortTuple *stup,
			 LogicalTape *tape, unsigned int len)
{
	unsigned int tupbodylen = len - sizeof(int);
	unsigned int tuplen = tupbodylen + MINIMAL_TUPLE_DATA_OFFSET;
	MinimalTuple tuple = (MinimalTuple) tuplesort_readtup_alloc(state, tuplen);
	char	   *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET;
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	HeapTupleData htup;

	/* read in the tuple proper */
	tuple->t_len = tuplen;
	LogicalTapeReadExact(tape, tupbody, tupbodylen);
	if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
		LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
	stup->tuple = (void *) tuple;
	/* set up first-column key value */
	htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET;
	htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET);
	stup->datum1 = heap_getattr(&htup,
								base->sortKeys[0].ssup_attno,
								(TupleDesc) base->arg,
								&stup->isnull1);
}

/*
 * Routines specialized for the CLUSTER case (HeapTuple data, with
 * comparisons per a btree index definition)
 */

static void
removeabbrev_cluster(Tuplesortstate *state, SortTuple *stups, int count)
{
	int			i;
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg;

	for (i = 0; i < count; i++)
	{
		HeapTuple	tup;

		tup = (HeapTuple) stups[i].tuple;
		stups[i].datum1 = heap_getattr(tup,
									   arg->indexInfo->ii_IndexAttrNumbers[0],
									   arg->tupDesc,
									   &stups[i].isnull1);
	}
}

static int
comparetup_cluster(const SortTuple *a, const SortTuple *b,
				   Tuplesortstate *state)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	SortSupport sortKey = base->sortKeys;
	int32		compare;

	/* Compare the leading sort key, if it's simple */
	if (base->haveDatum1)
	{
		compare = ApplySortComparator(a->datum1, a->isnull1,
									  b->datum1, b->isnull1,
									  sortKey);
		if (compare != 0)
			return compare;
	}

	return comparetup_cluster_tiebreak(a, b, state);
}

static int
comparetup_cluster_tiebreak(const SortTuple *a, const SortTuple *b,
							Tuplesortstate *state)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg;
	SortSupport sortKey = base->sortKeys;
	HeapTuple	ltup;
	HeapTuple	rtup;
	TupleDesc	tupDesc;
	int			nkey;
	int32		compare = 0;
	Datum		datum1,
				datum2;
	bool		isnull1,
				isnull2;

	ltup = (HeapTuple) a->tuple;
	rtup = (HeapTuple) b->tuple;
	tupDesc = arg->tupDesc;

	/* Compare the leading sort key, if it's simple */
	if (base->haveDatum1)
	{
		if (sortKey->abbrev_converter)
		{
			AttrNumber	leading = arg->indexInfo->ii_IndexAttrNumbers[0];

			datum1 = heap_getattr(ltup, leading, tupDesc, &isnull1);
			datum2 = heap_getattr(rtup, leading, tupDesc, &isnull2);

			compare = ApplySortAbbrevFullComparator(datum1, isnull1,
													datum2, isnull2,
													sortKey);
		}
		if (compare != 0 || base->nKeys == 1)
			return compare;
		/* Compare additional columns the hard way */
		sortKey++;
		nkey = 1;
	}
	else
	{
		/* Must compare all keys the hard way */
		nkey = 0;
	}

	if (arg->indexInfo->ii_Expressions == NULL)
	{
		/* If not expression index, just compare the proper heap attrs */

		for (; nkey < base->nKeys; nkey++, sortKey++)
		{
			AttrNumber	attno = arg->indexInfo->ii_IndexAttrNumbers[nkey];

			datum1 = heap_getattr(ltup, attno, tupDesc, &isnull1);
			datum2 = heap_getattr(rtup, attno, tupDesc, &isnull2);

			compare = ApplySortComparator(datum1, isnull1,
										  datum2, isnull2,
										  sortKey);
			if (compare != 0)
				return compare;
		}
	}
	else
	{
		/*
		 * In the expression index case, compute the whole index tuple and
		 * then compare values.  It would perhaps be faster to compute only as
		 * many columns as we need to compare, but that would require
		 * duplicating all the logic in FormIndexDatum.
		 */
		Datum		l_index_values[INDEX_MAX_KEYS];
		bool		l_index_isnull[INDEX_MAX_KEYS];
		Datum		r_index_values[INDEX_MAX_KEYS];
		bool		r_index_isnull[INDEX_MAX_KEYS];
		TupleTableSlot *ecxt_scantuple;

		/* Reset context each time to prevent memory leakage */
		ResetPerTupleExprContext(arg->estate);

		ecxt_scantuple = GetPerTupleExprContext(arg->estate)->ecxt_scantuple;

		ExecStoreHeapTuple(ltup, ecxt_scantuple, false);
		FormIndexDatum(arg->indexInfo, ecxt_scantuple, arg->estate,
					   l_index_values, l_index_isnull);

		ExecStoreHeapTuple(rtup, ecxt_scantuple, false);
		FormIndexDatum(arg->indexInfo, ecxt_scantuple, arg->estate,
					   r_index_values, r_index_isnull);

		for (; nkey < base->nKeys; nkey++, sortKey++)
		{
			compare = ApplySortComparator(l_index_values[nkey],
										  l_index_isnull[nkey],
										  r_index_values[nkey],
										  r_index_isnull[nkey],
										  sortKey);
			if (compare != 0)
				return compare;
		}
	}

	return 0;
}

static void
writetup_cluster(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	HeapTuple	tuple = (HeapTuple) stup->tuple;
	unsigned int tuplen = tuple->t_len + sizeof(ItemPointerData) + sizeof(int);

	/* We need to store t_self, but not other fields of HeapTupleData */
	LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
	LogicalTapeWrite(tape, &tuple->t_self, sizeof(ItemPointerData));
	LogicalTapeWrite(tape, tuple->t_data, tuple->t_len);
	if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
		LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
}

static void
readtup_cluster(Tuplesortstate *state, SortTuple *stup,
				LogicalTape *tape, unsigned int tuplen)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg;
	unsigned int t_len = tuplen - sizeof(ItemPointerData) - sizeof(int);
	HeapTuple	tuple = (HeapTuple) tuplesort_readtup_alloc(state,
															t_len + HEAPTUPLESIZE);

	/* Reconstruct the HeapTupleData header */
	tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE);
	tuple->t_len = t_len;
	LogicalTapeReadExact(tape, &tuple->t_self, sizeof(ItemPointerData));
	/* We don't currently bother to reconstruct t_tableOid */
	tuple->t_tableOid = InvalidOid;
	/* Read in the tuple body */
	LogicalTapeReadExact(tape, tuple->t_data, tuple->t_len);
	if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
		LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
	stup->tuple = (void *) tuple;
	/* set up first-column key value, if it's a simple column */
	if (base->haveDatum1)
		stup->datum1 = heap_getattr(tuple,
									arg->indexInfo->ii_IndexAttrNumbers[0],
									arg->tupDesc,
									&stup->isnull1);
}

static void
freestate_cluster(Tuplesortstate *state)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg;

	/* Free any execution state created for CLUSTER case */
	if (arg->estate != NULL)
	{
		ExprContext *econtext = GetPerTupleExprContext(arg->estate);

		ExecDropSingleTupleTableSlot(econtext->ecxt_scantuple);
		FreeExecutorState(arg->estate);
	}
}

/*
 * Routines specialized for IndexTuple case
 *
 * The btree and hash cases require separate comparison functions, but the
 * IndexTuple representation is the same so the copy/write/read support
 * functions can be shared.
 */

static void
removeabbrev_index(Tuplesortstate *state, SortTuple *stups, int count)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	TuplesortIndexArg *arg = (TuplesortIndexArg *) base->arg;
	int			i;

	for (i = 0; i < count; i++)
	{
		IndexTuple	tuple;

		tuple = stups[i].tuple;
		stups[i].datum1 = index_getattr(tuple,
										1,
										RelationGetDescr(arg->indexRel),
										&stups[i].isnull1);
	}
}

static int
comparetup_index_btree(const SortTuple *a, const SortTuple *b,
					   Tuplesortstate *state)
{
	/*
	 * This is similar to comparetup_heap(), but expects index tuples.  There
	 * is also special handling for enforcing uniqueness, and special
	 * treatment for equal keys at the end.
	 */
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	SortSupport sortKey = base->sortKeys;
	int32		compare;

	/* Compare the leading sort key */
	compare = ApplySortComparator(a->datum1, a->isnull1,
								  b->datum1, b->isnull1,
								  sortKey);
	if (compare != 0)
		return compare;

	/* Compare additional sort keys */
	return comparetup_index_btree_tiebreak(a, b, state);
}

static int
comparetup_index_btree_tiebreak(const SortTuple *a, const SortTuple *b,
								Tuplesortstate *state)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	TuplesortIndexBTreeArg *arg = (TuplesortIndexBTreeArg *) base->arg;
	SortSupport sortKey = base->sortKeys;
	IndexTuple	tuple1;
	IndexTuple	tuple2;
	int			keysz;
	TupleDesc	tupDes;
	bool		equal_hasnull = false;
	int			nkey;
	int32		compare;
	Datum		datum1,
				datum2;
	bool		isnull1,
				isnull2;

	tuple1 = (IndexTuple) a->tuple;
	tuple2 = (IndexTuple) b->tuple;
	keysz = base->nKeys;
	tupDes = RelationGetDescr(arg->index.indexRel);

	if (sortKey->abbrev_converter)
	{
		datum1 = index_getattr(tuple1, 1, tupDes, &isnull1);
		datum2 = index_getattr(tuple2, 1, tupDes, &isnull2);

		compare = ApplySortAbbrevFullComparator(datum1, isnull1,
												datum2, isnull2,
												sortKey);
		if (compare != 0)
			return compare;
	}

	/* they are equal, so we only need to examine one null flag */
	if (a->isnull1)
		equal_hasnull = true;

	sortKey++;
	for (nkey = 2; nkey <= keysz; nkey++, sortKey++)
	{
		datum1 = index_getattr(tuple1, nkey, tupDes, &isnull1);
		datum2 = index_getattr(tuple2, nkey, tupDes, &isnull2);

		compare = ApplySortComparator(datum1, isnull1,
									  datum2, isnull2,
									  sortKey);
		if (compare != 0)
			return compare;		/* done when we find unequal attributes */

		/* they are equal, so we only need to examine one null flag */
		if (isnull1)
			equal_hasnull = true;
	}

	/*
	 * If btree has asked us to enforce uniqueness, complain if two equal
	 * tuples are detected (unless there was at least one NULL field and NULLS
	 * NOT DISTINCT was not set).
	 *
	 * It is sufficient to make the test here, because if two tuples are equal
	 * they *must* get compared at some stage of the sort --- otherwise the
	 * sort algorithm wouldn't have checked whether one must appear before the
	 * other.
	 */
	if (arg->enforceUnique && !(!arg->uniqueNullsNotDistinct && equal_hasnull))
	{
		Datum		values[INDEX_MAX_KEYS];
		bool		isnull[INDEX_MAX_KEYS];
		char	   *key_desc;

		/*
		 * Some rather brain-dead implementations of qsort (such as the one in
		 * QNX 4) will sometimes call the comparison routine to compare a
		 * value to itself, but we always use our own implementation, which
		 * does not.
		 */
		Assert(tuple1 != tuple2);

		index_deform_tuple(tuple1, tupDes, values, isnull);

		key_desc = BuildIndexValueDescription(arg->index.indexRel, values, isnull);

		ereport(ERROR,
				(errcode(ERRCODE_UNIQUE_VIOLATION),
				 errmsg("could not create unique index \"%s\"",
						RelationGetRelationName(arg->index.indexRel)),
				 key_desc ? errdetail("Key %s is duplicated.", key_desc) :
				 errdetail("Duplicate keys exist."),
				 errtableconstraint(arg->index.heapRel,
									RelationGetRelationName(arg->index.indexRel))));
	}

	/*
	 * If key values are equal, we sort on ItemPointer.  This is required for
	 * btree indexes, since heap TID is treated as an implicit last key
	 * attribute in order to ensure that all keys in the index are physically
	 * unique.
	 */
	{
		BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid);
		BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid);

		if (blk1 != blk2)
			return (blk1 < blk2) ? -1 : 1;
	}
	{
		OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid);
		OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid);

		if (pos1 != pos2)
			return (pos1 < pos2) ? -1 : 1;
	}

	/* ItemPointer values should never be equal */
	Assert(false);

	return 0;
}

static int
comparetup_index_hash(const SortTuple *a, const SortTuple *b,
					  Tuplesortstate *state)
{
	Bucket		bucket1;
	Bucket		bucket2;
	uint32		hash1;
	uint32		hash2;
	IndexTuple	tuple1;
	IndexTuple	tuple2;
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	TuplesortIndexHashArg *arg = (TuplesortIndexHashArg *) base->arg;

	/*
	 * Fetch hash keys and mask off bits we don't want to sort by, so that the
	 * initial sort is just on the bucket number.  We know that the first
	 * column of the index tuple is the hash key.
	 */
	Assert(!a->isnull1);
	bucket1 = _hash_hashkey2bucket(DatumGetUInt32(a->datum1),
								   arg->max_buckets, arg->high_mask,
								   arg->low_mask);
	Assert(!b->isnull1);
	bucket2 = _hash_hashkey2bucket(DatumGetUInt32(b->datum1),
								   arg->max_buckets, arg->high_mask,
								   arg->low_mask);
	if (bucket1 > bucket2)
		return 1;
	else if (bucket1 < bucket2)
		return -1;

	/*
	 * If bucket values are equal, sort by hash values.  This allows us to
	 * insert directly onto bucket/overflow pages, where the index tuples are
	 * stored in hash order to allow fast binary search within each page.
	 */
	hash1 = DatumGetUInt32(a->datum1);
	hash2 = DatumGetUInt32(b->datum1);
	if (hash1 > hash2)
		return 1;
	else if (hash1 < hash2)
		return -1;

	/*
	 * If hash values are equal, we sort on ItemPointer.  This does not affect
	 * validity of the finished index, but it may be useful to have index
	 * scans in physical order.
	 */
	tuple1 = (IndexTuple) a->tuple;
	tuple2 = (IndexTuple) b->tuple;

	{
		BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid);
		BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid);

		if (blk1 != blk2)
			return (blk1 < blk2) ? -1 : 1;
	}
	{
		OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid);
		OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid);

		if (pos1 != pos2)
			return (pos1 < pos2) ? -1 : 1;
	}

	/* ItemPointer values should never be equal */
	Assert(false);

	return 0;
}

/*
 * Sorting for hash indexes only uses one sort key, so this shouldn't ever be
 * called. It's only here for consistency.
 */
static int
comparetup_index_hash_tiebreak(const SortTuple *a, const SortTuple *b,
							   Tuplesortstate *state)
{
	Assert(false);

	return 0;
}

static void
writetup_index(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	IndexTuple	tuple = (IndexTuple) stup->tuple;
	unsigned int tuplen;

	tuplen = IndexTupleSize(tuple) + sizeof(tuplen);
	LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
	LogicalTapeWrite(tape, tuple, IndexTupleSize(tuple));
	if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
		LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
}

static void
readtup_index(Tuplesortstate *state, SortTuple *stup,
			  LogicalTape *tape, unsigned int len)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	TuplesortIndexArg *arg = (TuplesortIndexArg *) base->arg;
	unsigned int tuplen = len - sizeof(unsigned int);
	IndexTuple	tuple = (IndexTuple) tuplesort_readtup_alloc(state, tuplen);

	LogicalTapeReadExact(tape, tuple, tuplen);
	if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
		LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
	stup->tuple = (void *) tuple;
	/* set up first-column key value */
	stup->datum1 = index_getattr(tuple,
								 1,
								 RelationGetDescr(arg->indexRel),
								 &stup->isnull1);
}

/*
 * Routines specialized for BrinTuple case
 */

static void
removeabbrev_index_brin(Tuplesortstate *state, SortTuple *stups, int count)
{
	int			i;

	for (i = 0; i < count; i++)
	{
		BrinSortTuple *tuple;

		tuple = stups[i].tuple;
		stups[i].datum1 = tuple->tuple.bt_blkno;
	}
}

static int
comparetup_index_brin(const SortTuple *a, const SortTuple *b,
					  Tuplesortstate *state)
{
	Assert(TuplesortstateGetPublic(state)->haveDatum1);

	if (DatumGetUInt32(a->datum1) > DatumGetUInt32(b->datum1))
		return 1;

	if (DatumGetUInt32(a->datum1) < DatumGetUInt32(b->datum1))
		return -1;

	/* silence compilers */
	return 0;
}

static void
writetup_index_brin(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	BrinSortTuple *tuple = (BrinSortTuple *) stup->tuple;
	unsigned int tuplen = tuple->tuplen;

	tuplen = tuplen + sizeof(tuplen);
	LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
	LogicalTapeWrite(tape, &tuple->tuple, tuple->tuplen);
	if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
		LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
}

static void
readtup_index_brin(Tuplesortstate *state, SortTuple *stup,
				   LogicalTape *tape, unsigned int len)
{
	BrinSortTuple *tuple;
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	unsigned int tuplen = len - sizeof(unsigned int);

	/*
	 * Allocate space for the BRIN sort tuple, which is BrinTuple with an
	 * extra length field.
	 */
	tuple = (BrinSortTuple *) tuplesort_readtup_alloc(state,
													  BRINSORTTUPLE_SIZE(tuplen));

	tuple->tuplen = tuplen;

	LogicalTapeReadExact(tape, &tuple->tuple, tuplen);
	if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
		LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
	stup->tuple = (void *) tuple;

	/* set up first-column key value, which is block number */
	stup->datum1 = tuple->tuple.bt_blkno;
}

/*
 * Routines specialized for DatumTuple case
 */

static void
removeabbrev_datum(Tuplesortstate *state, SortTuple *stups, int count)
{
	int			i;

	for (i = 0; i < count; i++)
		stups[i].datum1 = PointerGetDatum(stups[i].tuple);
}

static int
comparetup_datum(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	int			compare;

	compare = ApplySortComparator(a->datum1, a->isnull1,
								  b->datum1, b->isnull1,
								  base->sortKeys);
	if (compare != 0)
		return compare;

	return comparetup_datum_tiebreak(a, b, state);
}

static int
comparetup_datum_tiebreak(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	int32		compare = 0;

	/* if we have abbreviations, then "tuple" has the original value */
	if (base->sortKeys->abbrev_converter)
		compare = ApplySortAbbrevFullComparator(PointerGetDatum(a->tuple), a->isnull1,
												PointerGetDatum(b->tuple), b->isnull1,
												base->sortKeys);

	return compare;
}

static void
writetup_datum(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	TuplesortDatumArg *arg = (TuplesortDatumArg *) base->arg;
	void	   *waddr;
	unsigned int tuplen;
	unsigned int writtenlen;

	if (stup->isnull1)
	{
		waddr = NULL;
		tuplen = 0;
	}
	else if (!base->tuples)
	{
		waddr = &stup->datum1;
		tuplen = sizeof(Datum);
	}
	else
	{
		waddr = stup->tuple;
		tuplen = datumGetSize(PointerGetDatum(stup->tuple), false, arg->datumTypeLen);
		Assert(tuplen != 0);
	}

	writtenlen = tuplen + sizeof(unsigned int);

	LogicalTapeWrite(tape, &writtenlen, sizeof(writtenlen));
	LogicalTapeWrite(tape, waddr, tuplen);
	if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
		LogicalTapeWrite(tape, &writtenlen, sizeof(writtenlen));
}

static void
readtup_datum(Tuplesortstate *state, SortTuple *stup,
			  LogicalTape *tape, unsigned int len)
{
	TuplesortPublic *base = TuplesortstateGetPublic(state);
	unsigned int tuplen = len - sizeof(unsigned int);

	if (tuplen == 0)
	{
		/* it's NULL */
		stup->datum1 = (Datum) 0;
		stup->isnull1 = true;
		stup->tuple = NULL;
	}
	else if (!base->tuples)
	{
		Assert(tuplen == sizeof(Datum));
		LogicalTapeReadExact(tape, &stup->datum1, tuplen);
		stup->isnull1 = false;
		stup->tuple = NULL;
	}
	else
	{
		void	   *raddr = tuplesort_readtup_alloc(state, tuplen);

		LogicalTapeReadExact(tape, raddr, tuplen);
		stup->datum1 = PointerGetDatum(raddr);
		stup->isnull1 = false;
		stup->tuple = raddr;
	}

	if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
		LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
}