postgresql/contrib/pageinspect/heapfuncs.c
Andres Freund 4c850ecec6 Don't include heapam.h from others headers.
heapam.h previously was included in a number of widely used
headers (e.g. execnodes.h, indirectly in executor.h, ...). That's
problematic on its own, as heapam.h contains a lot of low-level
details that don't need to be exposed that widely, but becomes more
problematic with the upcoming introduction of pluggable table storage
- it seems inappropriate for heapam.h to be included that widely
afterwards.

heapam.h was largely only included in other headers to get the
HeapScanDesc typedef (which was defined in heapam.h, even though
HeapScanDescData is defined in relscan.h). The better solution here
seems to be to just use the underlying struct (forward declared where
necessary). Similar for BulkInsertState.

Another problem was that LockTupleMode was used in executor.h - parts
of the file tried to cope without heapam.h, but due to the fact that
it indirectly included it, several subsequent violations of that goal
were not not noticed. We could just reuse the approach of declaring
parameters as int, but it seems nicer to move LockTupleMode to
lockoptions.h - that's not a perfect location, but also doesn't seem
bad.

As a number of files relied on implicitly included heapam.h, a
significant number of files grew an explicit include. It's quite
probably that a few external projects will need to do the same.

Author: Andres Freund
Reviewed-By: Alvaro Herrera
Discussion: https://postgr.es/m/20190114000701.y4ttcb74jpskkcfb@alap3.anarazel.de
2019-01-14 16:24:41 -08:00

492 lines
12 KiB
C

/*-------------------------------------------------------------------------
*
* heapfuncs.c
* Functions to investigate heap pages
*
* We check the input to these functions for corrupt pointers etc. that
* might cause crashes, but at the same time we try to print out as much
* information as possible, even if it's nonsense. That's because if a
* page is corrupt, we don't know why and how exactly it is corrupt, so we
* let the user judge it.
*
* These functions are restricted to superusers for the fear of introducing
* security holes if the input checking isn't as water-tight as it should be.
* You'd need to be superuser to obtain a raw page image anyway, so
* there's hardly any use case for using these without superuser-rights
* anyway.
*
* Copyright (c) 2007-2019, PostgreSQL Global Development Group
*
* IDENTIFICATION
* contrib/pageinspect/heapfuncs.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "pageinspect.h"
#include "access/heapam.h"
#include "access/htup_details.h"
#include "funcapi.h"
#include "catalog/pg_type.h"
#include "miscadmin.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/rel.h"
/*
* It's not supported to create tuples with oids anymore, but when pg_upgrade
* was used to upgrade from an older version, tuples might still have an
* oid. Seems worthwhile to display that.
*/
#define HeapTupleHeaderGetOidOld(tup) \
( \
((tup)->t_infomask & HEAP_HASOID_OLD) ? \
*((Oid *) ((char *)(tup) + (tup)->t_hoff - sizeof(Oid))) \
: \
InvalidOid \
)
/*
* bits_to_text
*
* Converts a bits8-array of 'len' bits to a human-readable
* c-string representation.
*/
static char *
bits_to_text(bits8 *bits, int len)
{
int i;
char *str;
str = palloc(len + 1);
for (i = 0; i < len; i++)
str[i] = (bits[(i / 8)] & (1 << (i % 8))) ? '1' : '0';
str[i] = '\0';
return str;
}
/*
* text_to_bits
*
* Converts a c-string representation of bits into a bits8-array. This is
* the reverse operation of previous routine.
*/
static bits8 *
text_to_bits(char *str, int len)
{
bits8 *bits;
int off = 0;
char byte = 0;
bits = palloc(len + 1);
while (off < len)
{
if (off % 8 == 0)
byte = 0;
if ((str[off] == '0') || (str[off] == '1'))
byte = byte | ((str[off] - '0') << off % 8);
else
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("illegal character '%c' in t_bits string", str[off])));
if (off % 8 == 7)
bits[off / 8] = byte;
off++;
}
return bits;
}
/*
* heap_page_items
*
* Allows inspection of line pointers and tuple headers of a heap page.
*/
PG_FUNCTION_INFO_V1(heap_page_items);
typedef struct heap_page_items_state
{
TupleDesc tupd;
Page page;
uint16 offset;
} heap_page_items_state;
Datum
heap_page_items(PG_FUNCTION_ARGS)
{
bytea *raw_page = PG_GETARG_BYTEA_P(0);
heap_page_items_state *inter_call_data = NULL;
FuncCallContext *fctx;
int raw_page_size;
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
(errmsg("must be superuser to use raw page functions"))));
raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
if (SRF_IS_FIRSTCALL())
{
TupleDesc tupdesc;
MemoryContext mctx;
if (raw_page_size < SizeOfPageHeaderData)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("input page too small (%d bytes)", raw_page_size)));
fctx = SRF_FIRSTCALL_INIT();
mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
inter_call_data = palloc(sizeof(heap_page_items_state));
/* Build a tuple descriptor for our result type */
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type");
inter_call_data->tupd = tupdesc;
inter_call_data->offset = FirstOffsetNumber;
inter_call_data->page = VARDATA(raw_page);
fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page);
fctx->user_fctx = inter_call_data;
MemoryContextSwitchTo(mctx);
}
fctx = SRF_PERCALL_SETUP();
inter_call_data = fctx->user_fctx;
if (fctx->call_cntr < fctx->max_calls)
{
Page page = inter_call_data->page;
HeapTuple resultTuple;
Datum result;
ItemId id;
Datum values[14];
bool nulls[14];
uint16 lp_offset;
uint16 lp_flags;
uint16 lp_len;
memset(nulls, 0, sizeof(nulls));
/* Extract information from the line pointer */
id = PageGetItemId(page, inter_call_data->offset);
lp_offset = ItemIdGetOffset(id);
lp_flags = ItemIdGetFlags(id);
lp_len = ItemIdGetLength(id);
values[0] = UInt16GetDatum(inter_call_data->offset);
values[1] = UInt16GetDatum(lp_offset);
values[2] = UInt16GetDatum(lp_flags);
values[3] = UInt16GetDatum(lp_len);
/*
* We do just enough validity checking to make sure we don't reference
* data outside the page passed to us. The page could be corrupt in
* many other ways, but at least we won't crash.
*/
if (ItemIdHasStorage(id) &&
lp_len >= MinHeapTupleSize &&
lp_offset == MAXALIGN(lp_offset) &&
lp_offset + lp_len <= raw_page_size)
{
HeapTupleHeader tuphdr;
bytea *tuple_data_bytea;
int tuple_data_len;
/* Extract information from the tuple header */
tuphdr = (HeapTupleHeader) PageGetItem(page, id);
values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr));
values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr));
/* shared with xvac */
values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr));
values[7] = PointerGetDatum(&tuphdr->t_ctid);
values[8] = UInt32GetDatum(tuphdr->t_infomask2);
values[9] = UInt32GetDatum(tuphdr->t_infomask);
values[10] = UInt8GetDatum(tuphdr->t_hoff);
/* Copy raw tuple data into bytea attribute */
tuple_data_len = lp_len - tuphdr->t_hoff;
tuple_data_bytea = (bytea *) palloc(tuple_data_len + VARHDRSZ);
SET_VARSIZE(tuple_data_bytea, tuple_data_len + VARHDRSZ);
memcpy(VARDATA(tuple_data_bytea), (char *) tuphdr + tuphdr->t_hoff,
tuple_data_len);
values[13] = PointerGetDatum(tuple_data_bytea);
/*
* We already checked that the item is completely within the raw
* page passed to us, with the length given in the line pointer.
* Let's check that t_hoff doesn't point over lp_len, before using
* it to access t_bits and oid.
*/
if (tuphdr->t_hoff >= SizeofHeapTupleHeader &&
tuphdr->t_hoff <= lp_len &&
tuphdr->t_hoff == MAXALIGN(tuphdr->t_hoff))
{
if (tuphdr->t_infomask & HEAP_HASNULL)
{
int bits_len;
bits_len =
BITMAPLEN(HeapTupleHeaderGetNatts(tuphdr)) * BITS_PER_BYTE;
values[11] = CStringGetTextDatum(
bits_to_text(tuphdr->t_bits, bits_len));
}
else
nulls[11] = true;
if (tuphdr->t_infomask & HEAP_HASOID_OLD)
values[12] = HeapTupleHeaderGetOidOld(tuphdr);
else
nulls[12] = true;
}
else
{
nulls[11] = true;
nulls[12] = true;
}
}
else
{
/*
* The line pointer is not used, or it's invalid. Set the rest of
* the fields to NULL
*/
int i;
for (i = 4; i <= 13; i++)
nulls[i] = true;
}
/* Build and return the result tuple. */
resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls);
result = HeapTupleGetDatum(resultTuple);
inter_call_data->offset++;
SRF_RETURN_NEXT(fctx, result);
}
else
SRF_RETURN_DONE(fctx);
}
/*
* tuple_data_split_internal
*
* Split raw tuple data taken directly from a page into an array of bytea
* elements. This routine does a lookup on NULL values and creates array
* elements accordingly. This is a reimplementation of nocachegetattr()
* in heaptuple.c simplified for educational purposes.
*/
static Datum
tuple_data_split_internal(Oid relid, char *tupdata,
uint16 tupdata_len, uint16 t_infomask,
uint16 t_infomask2, bits8 *t_bits,
bool do_detoast)
{
ArrayBuildState *raw_attrs;
int nattrs;
int i;
int off = 0;
Relation rel;
TupleDesc tupdesc;
/* Get tuple descriptor from relation OID */
rel = relation_open(relid, AccessShareLock);
tupdesc = RelationGetDescr(rel);
raw_attrs = initArrayResult(BYTEAOID, CurrentMemoryContext, false);
nattrs = tupdesc->natts;
if (nattrs < (t_infomask2 & HEAP_NATTS_MASK))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("number of attributes in tuple header is greater than number of attributes in tuple descriptor")));
for (i = 0; i < nattrs; i++)
{
Form_pg_attribute attr;
bool is_null;
bytea *attr_data = NULL;
attr = TupleDescAttr(tupdesc, i);
/*
* Tuple header can specify less attributes than tuple descriptor as
* ALTER TABLE ADD COLUMN without DEFAULT keyword does not actually
* change tuples in pages, so attributes with numbers greater than
* (t_infomask2 & HEAP_NATTS_MASK) should be treated as NULL.
*/
if (i >= (t_infomask2 & HEAP_NATTS_MASK))
is_null = true;
else
is_null = (t_infomask & HEAP_HASNULL) && att_isnull(i, t_bits);
if (!is_null)
{
int len;
if (attr->attlen == -1)
{
off = att_align_pointer(off, attr->attalign, -1,
tupdata + off);
/*
* As VARSIZE_ANY throws an exception if it can't properly
* detect the type of external storage in macros VARTAG_SIZE,
* this check is repeated to have a nicer error handling.
*/
if (VARATT_IS_EXTERNAL(tupdata + off) &&
!VARATT_IS_EXTERNAL_ONDISK(tupdata + off) &&
!VARATT_IS_EXTERNAL_INDIRECT(tupdata + off))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("first byte of varlena attribute is incorrect for attribute %d", i)));
len = VARSIZE_ANY(tupdata + off);
}
else
{
off = att_align_nominal(off, attr->attalign);
len = attr->attlen;
}
if (tupdata_len < off + len)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("unexpected end of tuple data")));
if (attr->attlen == -1 && do_detoast)
attr_data = DatumGetByteaPCopy(tupdata + off);
else
{
attr_data = (bytea *) palloc(len + VARHDRSZ);
SET_VARSIZE(attr_data, len + VARHDRSZ);
memcpy(VARDATA(attr_data), tupdata + off, len);
}
off = att_addlength_pointer(off, attr->attlen,
tupdata + off);
}
raw_attrs = accumArrayResult(raw_attrs, PointerGetDatum(attr_data),
is_null, BYTEAOID, CurrentMemoryContext);
if (attr_data)
pfree(attr_data);
}
if (tupdata_len != off)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("end of tuple reached without looking at all its data")));
relation_close(rel, AccessShareLock);
return makeArrayResult(raw_attrs, CurrentMemoryContext);
}
/*
* tuple_data_split
*
* Split raw tuple data taken directly from page into distinct elements
* taking into account null values.
*/
PG_FUNCTION_INFO_V1(tuple_data_split);
Datum
tuple_data_split(PG_FUNCTION_ARGS)
{
Oid relid;
bytea *raw_data;
uint16 t_infomask;
uint16 t_infomask2;
char *t_bits_str;
bool do_detoast = false;
bits8 *t_bits = NULL;
Datum res;
relid = PG_GETARG_OID(0);
raw_data = PG_ARGISNULL(1) ? NULL : PG_GETARG_BYTEA_P(1);
t_infomask = PG_GETARG_INT16(2);
t_infomask2 = PG_GETARG_INT16(3);
t_bits_str = PG_ARGISNULL(4) ? NULL :
text_to_cstring(PG_GETARG_TEXT_PP(4));
if (PG_NARGS() >= 6)
do_detoast = PG_GETARG_BOOL(5);
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser to use raw page functions")));
if (!raw_data)
PG_RETURN_NULL();
/*
* Convert t_bits string back to the bits8 array as represented in the
* tuple header.
*/
if (t_infomask & HEAP_HASNULL)
{
int bits_str_len;
int bits_len;
bits_len = BITMAPLEN(t_infomask2 & HEAP_NATTS_MASK) * BITS_PER_BYTE;
if (!t_bits_str)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("argument of t_bits is null, but it is expected to be null and %d character long",
bits_len)));
bits_str_len = strlen(t_bits_str);
if (bits_len != bits_str_len)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("unexpected length of t_bits %u, expected %d",
bits_str_len, bits_len)));
/* do the conversion */
t_bits = text_to_bits(t_bits_str, bits_str_len);
}
else
{
if (t_bits_str)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("t_bits string is expected to be NULL, but instead it is %zu bytes length",
strlen(t_bits_str))));
}
/* Split tuple data */
res = tuple_data_split_internal(relid, (char *) raw_data + VARHDRSZ,
VARSIZE(raw_data) - VARHDRSZ,
t_infomask, t_infomask2, t_bits,
do_detoast);
if (t_bits)
pfree(t_bits);
PG_RETURN_ARRAYTYPE_P(res);
}