Use materialize SRF mode in brin_page_items

This function was using the single-value-per-call mechanism, but the
code relied on a relcache entry that wasn't kept open across calls.
This manifested as weird errors in buildfarm during the short time that
the "brin-1" isolation test lived.

Backpatch to 9.5, where it was introduced.
This commit is contained in:
Alvaro Herrera 2015-08-13 13:02:10 -03:00
parent 36e863bbd4
commit 94d626ff5a
1 changed files with 104 additions and 125 deletions

View File

@ -37,18 +37,6 @@ typedef struct brin_column_state
FmgrInfo outputFn[FLEXIBLE_ARRAY_MEMBER]; FmgrInfo outputFn[FLEXIBLE_ARRAY_MEMBER];
} brin_column_state; } brin_column_state;
typedef struct brin_page_state
{
BrinDesc *bdesc;
Page page;
OffsetNumber offset;
bool unusedItem;
bool done;
AttrNumber attno;
BrinMemTuple *dtup;
brin_column_state *columns[FLEXIBLE_ARRAY_MEMBER];
} brin_page_state;
static Page verify_brin_page(bytea *raw_page, uint16 type, static Page verify_brin_page(bytea *raw_page, uint16 type,
const char *strtype); const char *strtype);
@ -119,89 +107,89 @@ verify_brin_page(bytea *raw_page, uint16 type, const char *strtype)
Datum Datum
brin_page_items(PG_FUNCTION_ARGS) brin_page_items(PG_FUNCTION_ARGS)
{ {
brin_page_state *state; bytea *raw_page = PG_GETARG_BYTEA_P(0);
FuncCallContext *fctx; Oid indexRelid = PG_GETARG_OID(1);
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
TupleDesc tupdesc;
MemoryContext oldcontext;
Tuplestorestate *tupstore;
Relation indexRel;
brin_column_state **columns;
BrinDesc *bdesc;
BrinMemTuple *dtup;
Page page;
OffsetNumber offset;
AttrNumber attno;
bool unusedItem;
if (!superuser()) if (!superuser())
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
(errmsg("must be superuser to use raw page functions")))); (errmsg("must be superuser to use raw page functions"))));
if (SRF_IS_FIRSTCALL()) /* check to see if caller supports us returning a tuplestore */
if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("set-valued function called in context that cannot accept a set")));
if (!(rsinfo->allowedModes & SFRM_Materialize) ||
rsinfo->expectedDesc == NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("materialize mode required, but it is not allowed in this context")));
/* Build a tuple descriptor for our result type */
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type");
/* Build tuplestore to hold the result rows */
oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory);
tupstore = tuplestore_begin_heap(true, false, work_mem);
rsinfo->returnMode = SFRM_Materialize;
rsinfo->setResult = tupstore;
rsinfo->setDesc = tupdesc;
MemoryContextSwitchTo(oldcontext);
indexRel = index_open(indexRelid, AccessShareLock);
bdesc = brin_build_desc(indexRel);
/* minimally verify the page we got */
page = verify_brin_page(raw_page, BRIN_PAGETYPE_REGULAR, "regular");
/*
* Initialize output functions for all indexed datatypes; simplifies
* calling them later.
*/
columns = palloc(sizeof(brin_column_state *) * RelationGetDescr(indexRel)->natts);
for (attno = 1; attno <= bdesc->bd_tupdesc->natts; attno++)
{ {
bytea *raw_page = PG_GETARG_BYTEA_P(0); Oid output;
Oid indexRelid = PG_GETARG_OID(1); bool isVarlena;
Page page; BrinOpcInfo *opcinfo;
TupleDesc tupdesc; int i;
MemoryContext mctx; brin_column_state *column;
Relation indexRel;
AttrNumber attno;
/* minimally verify the page we got */ opcinfo = bdesc->bd_info[attno - 1];
page = verify_brin_page(raw_page, BRIN_PAGETYPE_REGULAR, "regular"); column = palloc(offsetof(brin_column_state, outputFn) +
sizeof(FmgrInfo) * opcinfo->oi_nstored);
/* create a function context for cross-call persistence */ column->nstored = opcinfo->oi_nstored;
fctx = SRF_FIRSTCALL_INIT(); for (i = 0; i < opcinfo->oi_nstored; i++)
/* switch to memory context appropriate for multiple function calls */
mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
/* Build a tuple descriptor for our result type */
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type");
indexRel = index_open(indexRelid, AccessShareLock);
state = palloc(offsetof(brin_page_state, columns) +
sizeof(brin_column_state) * RelationGetDescr(indexRel)->natts);
state->bdesc = brin_build_desc(indexRel);
state->page = page;
state->offset = FirstOffsetNumber;
state->unusedItem = false;
state->done = false;
state->dtup = NULL;
/*
* Initialize output functions for all indexed datatypes; simplifies
* calling them later.
*/
for (attno = 1; attno <= state->bdesc->bd_tupdesc->natts; attno++)
{ {
Oid output; getTypeOutputInfo(opcinfo->oi_typcache[i]->type_id, &output, &isVarlena);
bool isVarlena; fmgr_info(output, &column->outputFn[i]);
BrinOpcInfo *opcinfo;
int i;
brin_column_state *column;
opcinfo = state->bdesc->bd_info[attno - 1];
column = palloc(offsetof(brin_column_state, outputFn) +
sizeof(FmgrInfo) * opcinfo->oi_nstored);
column->nstored = opcinfo->oi_nstored;
for (i = 0; i < opcinfo->oi_nstored; i++)
{
getTypeOutputInfo(opcinfo->oi_typcache[i]->type_id, &output, &isVarlena);
fmgr_info(output, &column->outputFn[i]);
}
state->columns[attno - 1] = column;
} }
index_close(indexRel, AccessShareLock); columns[attno - 1] = column;
fctx->user_fctx = state;
fctx->tuple_desc = BlessTupleDesc(tupdesc);
MemoryContextSwitchTo(mctx);
} }
fctx = SRF_PERCALL_SETUP(); offset = FirstOffsetNumber;
state = fctx->user_fctx; unusedItem = false;
dtup = NULL;
if (!state->done) for (;;)
{ {
HeapTuple result;
Datum values[7]; Datum values[7];
bool nulls[7]; bool nulls[7];
@ -211,39 +199,30 @@ brin_page_items(PG_FUNCTION_ARGS)
* signal for obtaining and decoding the next one. If that's not the * signal for obtaining and decoding the next one. If that's not the
* case, we output the next attribute. * case, we output the next attribute.
*/ */
if (state->dtup == NULL) if (dtup == NULL)
{ {
BrinTuple *tup;
MemoryContext mctx;
ItemId itemId; ItemId itemId;
/* deformed tuple must live across calls */
mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
/* verify item status: if there's no data, we can't decode */ /* verify item status: if there's no data, we can't decode */
itemId = PageGetItemId(state->page, state->offset); itemId = PageGetItemId(page, offset);
if (ItemIdIsUsed(itemId)) if (ItemIdIsUsed(itemId))
{ {
tup = (BrinTuple *) PageGetItem(state->page, dtup = brin_deform_tuple(bdesc,
PageGetItemId(state->page, (BrinTuple *) PageGetItem(page, itemId));
state->offset)); attno = 1;
state->dtup = brin_deform_tuple(state->bdesc, tup); unusedItem = false;
state->attno = 1;
state->unusedItem = false;
} }
else else
state->unusedItem = true; unusedItem = true;
MemoryContextSwitchTo(mctx);
} }
else else
state->attno++; attno++;
MemSet(nulls, 0, sizeof(nulls)); MemSet(nulls, 0, sizeof(nulls));
if (state->unusedItem) if (unusedItem)
{ {
values[0] = UInt16GetDatum(state->offset); values[0] = UInt16GetDatum(offset);
nulls[1] = true; nulls[1] = true;
nulls[2] = true; nulls[2] = true;
nulls[3] = true; nulls[3] = true;
@ -253,17 +232,17 @@ brin_page_items(PG_FUNCTION_ARGS)
} }
else else
{ {
int att = state->attno - 1; int att = attno - 1;
values[0] = UInt16GetDatum(state->offset); values[0] = UInt16GetDatum(offset);
values[1] = UInt32GetDatum(state->dtup->bt_blkno); values[1] = UInt32GetDatum(dtup->bt_blkno);
values[2] = UInt16GetDatum(state->attno); values[2] = UInt16GetDatum(attno);
values[3] = BoolGetDatum(state->dtup->bt_columns[att].bv_allnulls); values[3] = BoolGetDatum(dtup->bt_columns[att].bv_allnulls);
values[4] = BoolGetDatum(state->dtup->bt_columns[att].bv_hasnulls); values[4] = BoolGetDatum(dtup->bt_columns[att].bv_hasnulls);
values[5] = BoolGetDatum(state->dtup->bt_placeholder); values[5] = BoolGetDatum(dtup->bt_placeholder);
if (!state->dtup->bt_columns[att].bv_allnulls) if (!dtup->bt_columns[att].bv_allnulls)
{ {
BrinValues *bvalues = &state->dtup->bt_columns[att]; BrinValues *bvalues = &dtup->bt_columns[att];
StringInfoData s; StringInfoData s;
bool first; bool first;
int i; int i;
@ -272,14 +251,14 @@ brin_page_items(PG_FUNCTION_ARGS)
appendStringInfoChar(&s, '{'); appendStringInfoChar(&s, '{');
first = true; first = true;
for (i = 0; i < state->columns[att]->nstored; i++) for (i = 0; i < columns[att]->nstored; i++)
{ {
char *val; char *val;
if (!first) if (!first)
appendStringInfoString(&s, " .. "); appendStringInfoString(&s, " .. ");
first = false; first = false;
val = OutputFunctionCall(&state->columns[att]->outputFn[i], val = OutputFunctionCall(&columns[att]->outputFn[i],
bvalues->bv_values[i]); bvalues->bv_values[i]);
appendStringInfoString(&s, val); appendStringInfoString(&s, val);
pfree(val); pfree(val);
@ -295,35 +274,35 @@ brin_page_items(PG_FUNCTION_ARGS)
} }
} }
result = heap_form_tuple(fctx->tuple_desc, values, nulls); tuplestore_putvalues(tupstore, tupdesc, values, nulls);
/* /*
* If the item was unused, jump straight to the next one; otherwise, * If the item was unused, jump straight to the next one; otherwise,
* the only cleanup needed here is to set our signal to go to the next * the only cleanup needed here is to set our signal to go to the next
* tuple in the following iteration, by freeing the current one. * tuple in the following iteration, by freeing the current one.
*/ */
if (state->unusedItem) if (unusedItem)
state->offset = OffsetNumberNext(state->offset); offset = OffsetNumberNext(offset);
else if (state->attno >= state->bdesc->bd_tupdesc->natts) else if (attno >= bdesc->bd_tupdesc->natts)
{ {
pfree(state->dtup); pfree(dtup);
state->dtup = NULL; dtup = NULL;
state->offset = OffsetNumberNext(state->offset); offset = OffsetNumberNext(offset);
} }
/* /*
* If we're beyond the end of the page, set flag to end the function * If we're beyond the end of the page, we're done.
* in the following iteration.
*/ */
if (state->offset > PageGetMaxOffsetNumber(state->page)) if (offset > PageGetMaxOffsetNumber(page))
state->done = true; break;
SRF_RETURN_NEXT(fctx, HeapTupleGetDatum(result));
} }
brin_free_desc(state->bdesc); /* clean up and return the tuplestore */
brin_free_desc(bdesc);
tuplestore_donestoring(tupstore);
index_close(indexRel, AccessShareLock);
SRF_RETURN_DONE(fctx); return (Datum) 0;
} }
Datum Datum