postgresql/contrib/pageinspect/heapfuncs.c

/*-------------------------------------------------------------------------
 *
 * heapfuncs.c
 *	  Functions to investigate heap pages
 *
 * We check the input to these functions for corrupt pointers etc. that
 * might cause crashes, but at the same time we try to print out as much
 * information as possible, even if it's nonsense. That's because if a
 * page is corrupt, we don't know why and how exactly it is corrupt, so we
 * let the user judge it.
 *
 * These functions are restricted to superusers for the fear of introducing
 * security holes if the input checking isn't as water-tight as it should be.
 * You'd need to be superuser to obtain a raw page image anyway, so
 * there's hardly any use case for using these without superuser-rights
 * anyway.
 *
 * Copyright (c) 2007-2024, PostgreSQL Global Development Group
 *
 * IDENTIFICATION
 *	  contrib/pageinspect/heapfuncs.c
 *
 *-------------------------------------------------------------------------
 */

#include "postgres.h"

#include "access/htup_details.h"
#include "access/relation.h"
#include "catalog/pg_am_d.h"
#include "catalog/pg_type.h"
#include "funcapi.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "pageinspect.h"
#include "port/pg_bitutils.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/rel.h"

/*
 * It's not supported to create tuples with oids anymore, but when pg_upgrade
 * was used to upgrade from an older version, tuples might still have an
 * oid. Seems worthwhile to display that.
 */
#define HeapTupleHeaderGetOidOld(tup) \
( \
	((tup)->t_infomask & HEAP_HASOID_OLD) ? \
	   *((Oid *) ((char *)(tup) + (tup)->t_hoff - sizeof(Oid))) \
	: \
		InvalidOid \
)


/*
 * bits_to_text
 *
 * Converts a bits8-array of 'len' bits to a human-readable
 * c-string representation.
 */
static char *
bits_to_text(bits8 *bits, int len)
{
	int			i;
	char	   *str;

	str = palloc(len + 1);

	for (i = 0; i < len; i++)
		str[i] = (bits[(i / 8)] & (1 << (i % 8))) ? '1' : '0';

	str[i] = '\0';

	return str;
}


/*
 * text_to_bits
 *
 * Converts a c-string representation of bits into a bits8-array. This is
 * the reverse operation of previous routine.
 */
static bits8 *
text_to_bits(char *str, int len)
{
	bits8	   *bits;
	int			off = 0;
	char		byte = 0;

	bits = palloc(len + 1);

	while (off < len)
	{
		if (off % 8 == 0)
			byte = 0;

		if ((str[off] == '0') || (str[off] == '1'))
			byte = byte | ((str[off] - '0') << off % 8);
		else
			ereport(ERROR,
					(errcode(ERRCODE_DATA_CORRUPTED),
					 errmsg("invalid character \"%.*s\" in t_bits string",
							pg_mblen(str + off), str + off)));

		if (off % 8 == 7)
			bits[off / 8] = byte;

		off++;
	}

	return bits;
}

/*
 * heap_page_items
 *
 * Allows inspection of line pointers and tuple headers of a heap page.
 */
PG_FUNCTION_INFO_V1(heap_page_items);

typedef struct heap_page_items_state
{
	TupleDesc	tupd;
	Page		page;
	uint16		offset;
} heap_page_items_state;

Datum
heap_page_items(PG_FUNCTION_ARGS)
{
	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
	heap_page_items_state *inter_call_data = NULL;
	FuncCallContext *fctx;
	int			raw_page_size;

	if (!superuser())
		ereport(ERROR,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
				 errmsg("must be superuser to use raw page functions")));

	raw_page_size = VARSIZE(raw_page) - VARHDRSZ;

	if (SRF_IS_FIRSTCALL())
	{
		TupleDesc	tupdesc;
		MemoryContext mctx;

		if (raw_page_size < SizeOfPageHeaderData)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
					 errmsg("input page too small (%d bytes)", raw_page_size)));

		fctx = SRF_FIRSTCALL_INIT();
		mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);

		inter_call_data = palloc(sizeof(heap_page_items_state));

		/* Build a tuple descriptor for our result type */
		if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
			elog(ERROR, "return type must be a row type");

		inter_call_data->tupd = tupdesc;

		inter_call_data->offset = FirstOffsetNumber;
		inter_call_data->page = VARDATA(raw_page);

		fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page);
		fctx->user_fctx = inter_call_data;

		MemoryContextSwitchTo(mctx);
	}

	fctx = SRF_PERCALL_SETUP();
	inter_call_data = fctx->user_fctx;

	if (fctx->call_cntr < fctx->max_calls)
	{
		Page		page = inter_call_data->page;
		HeapTuple	resultTuple;
		Datum		result;
		ItemId		id;
		Datum		values[14];
		bool		nulls[14];
		uint16		lp_offset;
		uint16		lp_flags;
		uint16		lp_len;

		memset(nulls, 0, sizeof(nulls));

		/* Extract information from the line pointer */

		id = PageGetItemId(page, inter_call_data->offset);

		lp_offset = ItemIdGetOffset(id);
		lp_flags = ItemIdGetFlags(id);
		lp_len = ItemIdGetLength(id);

		values[0] = UInt16GetDatum(inter_call_data->offset);
		values[1] = UInt16GetDatum(lp_offset);
		values[2] = UInt16GetDatum(lp_flags);
		values[3] = UInt16GetDatum(lp_len);

		/*
		 * We do just enough validity checking to make sure we don't reference
		 * data outside the page passed to us. The page could be corrupt in
		 * many other ways, but at least we won't crash.
		 */
		if (ItemIdHasStorage(id) &&
			lp_len >= MinHeapTupleSize &&
			lp_offset == MAXALIGN(lp_offset) &&
			lp_offset + lp_len <= raw_page_size)
		{
			HeapTupleHeader tuphdr;
			bytea	   *tuple_data_bytea;
			int			tuple_data_len;

			/* Extract information from the tuple header */

			tuphdr = (HeapTupleHeader) PageGetItem(page, id);

			values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr));
			values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr));
			/* shared with xvac */
			values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr));
			values[7] = PointerGetDatum(&tuphdr->t_ctid);
			values[8] = UInt32GetDatum(tuphdr->t_infomask2);
			values[9] = UInt32GetDatum(tuphdr->t_infomask);
			values[10] = UInt8GetDatum(tuphdr->t_hoff);

			/* Copy raw tuple data into bytea attribute */
			tuple_data_len = lp_len - tuphdr->t_hoff;
			tuple_data_bytea = (bytea *) palloc(tuple_data_len + VARHDRSZ);
			SET_VARSIZE(tuple_data_bytea, tuple_data_len + VARHDRSZ);
			memcpy(VARDATA(tuple_data_bytea), (char *) tuphdr + tuphdr->t_hoff,
				   tuple_data_len);
			values[13] = PointerGetDatum(tuple_data_bytea);

			/*
			 * We already checked that the item is completely within the raw
			 * page passed to us, with the length given in the line pointer.
			 * Let's check that t_hoff doesn't point over lp_len, before using
			 * it to access t_bits and oid.
			 */
			if (tuphdr->t_hoff >= SizeofHeapTupleHeader &&
				tuphdr->t_hoff <= lp_len &&
				tuphdr->t_hoff == MAXALIGN(tuphdr->t_hoff))
			{
				if (tuphdr->t_infomask & HEAP_HASNULL)
				{
					int			bits_len;

					bits_len =
						BITMAPLEN(HeapTupleHeaderGetNatts(tuphdr)) * BITS_PER_BYTE;
					values[11] = CStringGetTextDatum(bits_to_text(tuphdr->t_bits, bits_len));
				}
				else
					nulls[11] = true;

				if (tuphdr->t_infomask & HEAP_HASOID_OLD)
					values[12] = HeapTupleHeaderGetOidOld(tuphdr);
				else
					nulls[12] = true;
			}
			else
			{
				nulls[11] = true;
				nulls[12] = true;
			}
		}
		else
		{
			/*
			 * The line pointer is not used, or it's invalid. Set the rest of
			 * the fields to NULL
			 */
			int			i;

			for (i = 4; i <= 13; i++)
				nulls[i] = true;
		}

		/* Build and return the result tuple. */
		resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls);
		result = HeapTupleGetDatum(resultTuple);

		inter_call_data->offset++;

		SRF_RETURN_NEXT(fctx, result);
	}
	else
		SRF_RETURN_DONE(fctx);
}

/*
 * tuple_data_split_internal
 *
 * Split raw tuple data taken directly from a page into an array of bytea
 * elements. This routine does a lookup on NULL values and creates array
 * elements accordingly. This is a reimplementation of nocachegetattr()
 * in heaptuple.c simplified for educational purposes.
 */
static Datum
tuple_data_split_internal(Oid relid, char *tupdata,
						  uint16 tupdata_len, uint16 t_infomask,
						  uint16 t_infomask2, bits8 *t_bits,
						  bool do_detoast)
{
	ArrayBuildState *raw_attrs;
	int			nattrs;
	int			i;
	int			off = 0;
	Relation	rel;
	TupleDesc	tupdesc;

	/* Get tuple descriptor from relation OID */
	rel = relation_open(relid, AccessShareLock);
	tupdesc = RelationGetDescr(rel);

	raw_attrs = initArrayResult(BYTEAOID, CurrentMemoryContext, false);
	nattrs = tupdesc->natts;

	if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
		ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						errmsg("only heap AM is supported")));

	if (nattrs < (t_infomask2 & HEAP_NATTS_MASK))
		ereport(ERROR,
				(errcode(ERRCODE_DATA_CORRUPTED),
				 errmsg("number of attributes in tuple header is greater than number of attributes in tuple descriptor")));

	for (i = 0; i < nattrs; i++)
	{
		Form_pg_attribute attr;
		bool		is_null;
		bytea	   *attr_data = NULL;

		attr = TupleDescAttr(tupdesc, i);

		/*
		 * Tuple header can specify fewer attributes than tuple descriptor as
		 * ALTER TABLE ADD COLUMN without DEFAULT keyword does not actually
		 * change tuples in pages, so attributes with numbers greater than
		 * (t_infomask2 & HEAP_NATTS_MASK) should be treated as NULL.
		 */
		if (i >= (t_infomask2 & HEAP_NATTS_MASK))
			is_null = true;
		else
			is_null = (t_infomask & HEAP_HASNULL) && att_isnull(i, t_bits);

		if (!is_null)
		{
			int			len;

			if (attr->attlen == -1)
			{
				off = att_align_pointer(off, attr->attalign, -1,
										tupdata + off);

				/*
				 * As VARSIZE_ANY throws an exception if it can't properly
				 * detect the type of external storage in macros VARTAG_SIZE,
				 * this check is repeated to have a nicer error handling.
				 */
				if (VARATT_IS_EXTERNAL(tupdata + off) &&
					!VARATT_IS_EXTERNAL_ONDISK(tupdata + off) &&
					!VARATT_IS_EXTERNAL_INDIRECT(tupdata + off))
					ereport(ERROR,
							(errcode(ERRCODE_DATA_CORRUPTED),
							 errmsg("first byte of varlena attribute is incorrect for attribute %d", i)));

				len = VARSIZE_ANY(tupdata + off);
			}
			else
			{
				off = att_align_nominal(off, attr->attalign);
				len = attr->attlen;
			}

			if (tupdata_len < off + len)
				ereport(ERROR,
						(errcode(ERRCODE_DATA_CORRUPTED),
						 errmsg("unexpected end of tuple data")));

			if (attr->attlen == -1 && do_detoast)
				attr_data = pg_detoast_datum_copy((struct varlena *) (tupdata + off));
			else
			{
				attr_data = (bytea *) palloc(len + VARHDRSZ);
				SET_VARSIZE(attr_data, len + VARHDRSZ);
				memcpy(VARDATA(attr_data), tupdata + off, len);
			}

			off = att_addlength_pointer(off, attr->attlen,
										tupdata + off);
		}

		raw_attrs = accumArrayResult(raw_attrs, PointerGetDatum(attr_data),
									 is_null, BYTEAOID, CurrentMemoryContext);
		if (attr_data)
			pfree(attr_data);
	}

	if (tupdata_len != off)
		ereport(ERROR,
				(errcode(ERRCODE_DATA_CORRUPTED),
				 errmsg("end of tuple reached without looking at all its data")));

	relation_close(rel, AccessShareLock);

	return makeArrayResult(raw_attrs, CurrentMemoryContext);
}

/*
 * tuple_data_split
 *
 * Split raw tuple data taken directly from page into distinct elements
 * taking into account null values.
 */
PG_FUNCTION_INFO_V1(tuple_data_split);

Datum
tuple_data_split(PG_FUNCTION_ARGS)
{
	Oid			relid;
	bytea	   *raw_data;
	uint16		t_infomask;
	uint16		t_infomask2;
	char	   *t_bits_str;
	bool		do_detoast = false;
	bits8	   *t_bits = NULL;
	Datum		res;

	relid = PG_GETARG_OID(0);
	raw_data = PG_ARGISNULL(1) ? NULL : PG_GETARG_BYTEA_P(1);
	t_infomask = PG_GETARG_INT16(2);
	t_infomask2 = PG_GETARG_INT16(3);
	t_bits_str = PG_ARGISNULL(4) ? NULL :
		text_to_cstring(PG_GETARG_TEXT_PP(4));

	if (PG_NARGS() >= 6)
		do_detoast = PG_GETARG_BOOL(5);

	if (!superuser())
		ereport(ERROR,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
				 errmsg("must be superuser to use raw page functions")));

	if (!raw_data)
		PG_RETURN_NULL();

	/*
	 * Convert t_bits string back to the bits8 array as represented in the
	 * tuple header.
	 */
	if (t_infomask & HEAP_HASNULL)
	{
		size_t		bits_str_len;
		size_t		bits_len;

		bits_len = BITMAPLEN(t_infomask2 & HEAP_NATTS_MASK) * BITS_PER_BYTE;
		if (!t_bits_str)
			ereport(ERROR,
					(errcode(ERRCODE_DATA_CORRUPTED),
					 errmsg("t_bits string must not be NULL")));

		bits_str_len = strlen(t_bits_str);
		if (bits_len != bits_str_len)
			ereport(ERROR,
					(errcode(ERRCODE_DATA_CORRUPTED),
					 errmsg("unexpected length of t_bits string: %zu, expected %zu",
							bits_str_len, bits_len)));

		/* do the conversion */
		t_bits = text_to_bits(t_bits_str, bits_str_len);
	}
	else
	{
		if (t_bits_str)
			ereport(ERROR,
					(errcode(ERRCODE_DATA_CORRUPTED),
					 errmsg("t_bits string is expected to be NULL, but instead it is %zu bytes long",
							strlen(t_bits_str))));
	}

	/* Split tuple data */
	res = tuple_data_split_internal(relid, (char *) raw_data + VARHDRSZ,
									VARSIZE(raw_data) - VARHDRSZ,
									t_infomask, t_infomask2, t_bits,
									do_detoast);

	if (t_bits)
		pfree(t_bits);

	PG_RETURN_DATUM(res);
}

/*
 * heap_tuple_infomask_flags
 *
 * Decode into a human-readable format t_infomask and t_infomask2 associated
 * to a tuple.  All the flags are described in access/htup_details.h.
 */
PG_FUNCTION_INFO_V1(heap_tuple_infomask_flags);

Datum
heap_tuple_infomask_flags(PG_FUNCTION_ARGS)
{
#define HEAP_TUPLE_INFOMASK_COLS 2
	Datum		values[HEAP_TUPLE_INFOMASK_COLS] = {0};
	bool		nulls[HEAP_TUPLE_INFOMASK_COLS] = {0};
	uint16		t_infomask = PG_GETARG_INT16(0);
	uint16		t_infomask2 = PG_GETARG_INT16(1);
	int			cnt = 0;
	ArrayType  *a;
	int			bitcnt;
	Datum	   *flags;
	TupleDesc	tupdesc;
	HeapTuple	tuple;

	if (!superuser())
		ereport(ERROR,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
				 errmsg("must be superuser to use raw page functions")));

	/* Build a tuple descriptor for our result type */
	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
		elog(ERROR, "return type must be a row type");

	bitcnt = pg_popcount((const char *) &t_infomask, sizeof(uint16)) +
		pg_popcount((const char *) &t_infomask2, sizeof(uint16));

	/* If no flags, return a set of empty arrays */
	if (bitcnt <= 0)
	{
		values[0] = PointerGetDatum(construct_empty_array(TEXTOID));
		values[1] = PointerGetDatum(construct_empty_array(TEXTOID));
		tuple = heap_form_tuple(tupdesc, values, nulls);
		PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
	}

	/* build set of raw flags */
	flags = (Datum *) palloc0(sizeof(Datum) * bitcnt);

	/* decode t_infomask */
	if ((t_infomask & HEAP_HASNULL) != 0)
		flags[cnt++] = CStringGetTextDatum("HEAP_HASNULL");
	if ((t_infomask & HEAP_HASVARWIDTH) != 0)
		flags[cnt++] = CStringGetTextDatum("HEAP_HASVARWIDTH");
	if ((t_infomask & HEAP_HASEXTERNAL) != 0)
		flags[cnt++] = CStringGetTextDatum("HEAP_HASEXTERNAL");
	if ((t_infomask & HEAP_HASOID_OLD) != 0)
		flags[cnt++] = CStringGetTextDatum("HEAP_HASOID_OLD");
	if ((t_infomask & HEAP_XMAX_KEYSHR_LOCK) != 0)
		flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_KEYSHR_LOCK");
	if ((t_infomask & HEAP_COMBOCID) != 0)
		flags[cnt++] = CStringGetTextDatum("HEAP_COMBOCID");
	if ((t_infomask & HEAP_XMAX_EXCL_LOCK) != 0)
		flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_EXCL_LOCK");
	if ((t_infomask & HEAP_XMAX_LOCK_ONLY) != 0)
		flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_LOCK_ONLY");
	if ((t_infomask & HEAP_XMIN_COMMITTED) != 0)
		flags[cnt++] = CStringGetTextDatum("HEAP_XMIN_COMMITTED");
	if ((t_infomask & HEAP_XMIN_INVALID) != 0)
		flags[cnt++] = CStringGetTextDatum("HEAP_XMIN_INVALID");
	if ((t_infomask & HEAP_XMAX_COMMITTED) != 0)
		flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_COMMITTED");
	if ((t_infomask & HEAP_XMAX_INVALID) != 0)
		flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_INVALID");
	if ((t_infomask & HEAP_XMAX_IS_MULTI) != 0)
		flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_IS_MULTI");
	if ((t_infomask & HEAP_UPDATED) != 0)
		flags[cnt++] = CStringGetTextDatum("HEAP_UPDATED");
	if ((t_infomask & HEAP_MOVED_OFF) != 0)
		flags[cnt++] = CStringGetTextDatum("HEAP_MOVED_OFF");
	if ((t_infomask & HEAP_MOVED_IN) != 0)
		flags[cnt++] = CStringGetTextDatum("HEAP_MOVED_IN");

	/* decode t_infomask2 */
	if ((t_infomask2 & HEAP_KEYS_UPDATED) != 0)
		flags[cnt++] = CStringGetTextDatum("HEAP_KEYS_UPDATED");
	if ((t_infomask2 & HEAP_HOT_UPDATED) != 0)
		flags[cnt++] = CStringGetTextDatum("HEAP_HOT_UPDATED");
	if ((t_infomask2 & HEAP_ONLY_TUPLE) != 0)
		flags[cnt++] = CStringGetTextDatum("HEAP_ONLY_TUPLE");

	/* build value */
	Assert(cnt <= bitcnt);
	a = construct_array_builtin(flags, cnt, TEXTOID);
	values[0] = PointerGetDatum(a);

	/*
	 * Build set of combined flags.  Use the same array as previously, this
	 * keeps the code simple.
	 */
	cnt = 0;
	MemSet(flags, 0, sizeof(Datum) * bitcnt);

	/* decode combined masks of t_infomask */
	if ((t_infomask & HEAP_XMAX_SHR_LOCK) == HEAP_XMAX_SHR_LOCK)
		flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_SHR_LOCK");
	if ((t_infomask & HEAP_XMIN_FROZEN) == HEAP_XMIN_FROZEN)
		flags[cnt++] = CStringGetTextDatum("HEAP_XMIN_FROZEN");
	if ((t_infomask & HEAP_MOVED) == HEAP_MOVED)
		flags[cnt++] = CStringGetTextDatum("HEAP_MOVED");

	/* Build an empty array if there are no combined flags */
	if (cnt == 0)
		a = construct_empty_array(TEXTOID);
	else
		a = construct_array_builtin(flags, cnt, TEXTOID);
	pfree(flags);
	values[1] = PointerGetDatum(a);

	/* Returns the record as Datum */
	tuple = heap_form_tuple(tupdesc, values, nulls);
	PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
}