From 9b88f27cb42fe8ff59ddc75e29c005624b8850a2 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 27 Feb 2017 17:20:34 -0500 Subject: [PATCH] Allow index AMs to return either HeapTuple or IndexTuple format during IOS. Previously, only IndexTuple format was supported for the output data of an index-only scan. This is fine for btree, which is just returning a verbatim index tuple anyway. It's not so fine for SP-GiST, which can return reconstructed data that's much larger than a page. To fix, extend the index AM API so that index-only scan data can be returned in either HeapTuple or IndexTuple format. There's other ways we could have done it, but this way avoids an API break for index AMs that aren't concerned with the issue, and it costs little except a couple more fields in IndexScanDescs. I changed both GiST and SP-GiST to use the HeapTuple method. I'm not very clear on whether GiST can reconstruct data that's too large for an IndexTuple, but that seems possible, and it's not much of a code change to fix. Per a complaint from Vik Fearing. Reviewed by Jason Li. Discussion: https://postgr.es/m/49527f79-530d-0bfe-3dad-d183596afa92@2ndquadrant.fr --- doc/src/sgml/indexam.sgml | 16 ++++++++++------ src/backend/access/gist/gistget.c | 17 +++++++++-------- src/backend/access/gist/gistscan.c | 5 +++-- src/backend/access/gist/gistutil.c | 6 +++--- src/backend/access/index/genam.c | 2 ++ src/backend/access/index/indexam.c | 4 ++-- src/backend/access/spgist/spgscan.c | 24 ++++++++++++------------ src/backend/executor/nodeIndexonlyscan.c | 21 +++++++++++++++++++-- src/include/access/gist_private.h | 4 ++-- src/include/access/relscan.h | 9 ++++++++- src/include/access/spgist_private.h | 2 +- 11 files changed, 71 insertions(+), 39 deletions(-) diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml index 401b11598e..ac512588e2 100644 --- a/doc/src/sgml/indexam.sgml +++ b/doc/src/sgml/indexam.sgml @@ -551,15 +551,19 @@ amgettuple (IndexScanDesc scan, If the index supports index-only scans (i.e., amcanreturn returns TRUE for it), - then on success the AM must also check - scan->xs_want_itup, and if that is true it must return - the original indexed data for the index entry, in the form of an + then on success the AM must also check scan->xs_want_itup, + and if that is true it must return the originally indexed data for the + index entry. The data can be returned in the form of an IndexTuple pointer stored at scan->xs_itup, - with tuple descriptor scan->xs_itupdesc. - (Management of the data referenced by the pointer is the access method's + with tuple descriptor scan->xs_itupdesc; or in the form of + a HeapTuple pointer stored at scan->xs_hitup, + with tuple descriptor scan->xs_hitupdesc. (The latter + format should be used when reconstructing data that might possibly not fit + into an IndexTuple.) In either case, + management of the data referenced by the pointer is the access method's responsibility. The data must remain good at least until the next amgettuple, amrescan, or amendscan - call for the scan.) + call for the scan. diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c index eea366b1ad..122dc38db5 100644 --- a/src/backend/access/gist/gistget.c +++ b/src/backend/access/gist/gistget.c @@ -441,12 +441,13 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances, so->pageData[so->nPageData].offnum = i; /* - * In an index-only scan, also fetch the data from the tuple. + * In an index-only scan, also fetch the data from the tuple. The + * reconstructed tuples are stored in pageDataCxt. */ if (scan->xs_want_itup) { oldcxt = MemoryContextSwitchTo(so->pageDataCxt); - so->pageData[so->nPageData].ftup = + so->pageData[so->nPageData].recontup = gistFetchTuple(giststate, r, it); MemoryContextSwitchTo(oldcxt); } @@ -478,7 +479,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances, * In an index-only scan, also fetch the data from the tuple. */ if (scan->xs_want_itup) - item->data.heap.ftup = gistFetchTuple(giststate, r, it); + item->data.heap.recontup = gistFetchTuple(giststate, r, it); } else { @@ -540,11 +541,11 @@ getNextNearest(IndexScanDesc scan) bool res = false; int i; - if (scan->xs_itup) + if (scan->xs_hitup) { /* free previously returned tuple */ - pfree(scan->xs_itup); - scan->xs_itup = NULL; + pfree(scan->xs_hitup); + scan->xs_hitup = NULL; } do @@ -601,7 +602,7 @@ getNextNearest(IndexScanDesc scan) /* in an index-only scan, also return the reconstructed tuple. */ if (scan->xs_want_itup) - scan->xs_itup = item->data.heap.ftup; + scan->xs_hitup = item->data.heap.recontup; res = true; } else @@ -685,7 +686,7 @@ gistgettuple(IndexScanDesc scan, ScanDirection dir) /* in an index-only scan, also return the reconstructed tuple */ if (scan->xs_want_itup) - scan->xs_itup = so->pageData[so->curPageData].ftup; + scan->xs_hitup = so->pageData[so->curPageData].recontup; so->curPageData++; diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c index 33b388906a..81ff8fc8b6 100644 --- a/src/backend/access/gist/gistscan.c +++ b/src/backend/access/gist/gistscan.c @@ -155,7 +155,7 @@ gistrescan(IndexScanDesc scan, ScanKey key, int nkeys, * tuple descriptor to represent the returned index tuples and create a * memory context to hold them during the scan. */ - if (scan->xs_want_itup && !scan->xs_itupdesc) + if (scan->xs_want_itup && !scan->xs_hitupdesc) { int natts; int attno; @@ -174,8 +174,9 @@ gistrescan(IndexScanDesc scan, ScanKey key, int nkeys, scan->indexRelation->rd_opcintype[attno - 1], -1, 0); } - scan->xs_itupdesc = so->giststate->fetchTupdesc; + scan->xs_hitupdesc = so->giststate->fetchTupdesc; + /* Also create a memory context that will hold the returned tuples */ so->pageDataCxt = AllocSetContextCreate(so->giststate->scanCxt, "GiST page data context", ALLOCSET_DEFAULT_SIZES); diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index f92baedffd..75845ba0e7 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -624,9 +624,9 @@ gistFetchAtt(GISTSTATE *giststate, int nkey, Datum k, Relation r) /* * Fetch all keys in tuple. - * returns new IndexTuple that contains GISTENTRY with fetched data + * Returns a new HeapTuple containing the originally-indexed data. */ -IndexTuple +HeapTuple gistFetchTuple(GISTSTATE *giststate, Relation r, IndexTuple tuple) { MemoryContext oldcxt = MemoryContextSwitchTo(giststate->tempCxt); @@ -660,7 +660,7 @@ gistFetchTuple(GISTSTATE *giststate, Relation r, IndexTuple tuple) } MemoryContextSwitchTo(oldcxt); - return index_form_tuple(giststate->fetchTupdesc, fetchatt, isnull); + return heap_form_tuple(giststate->fetchTupdesc, fetchatt, isnull); } float diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index c4a393f34e..3599476930 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -119,6 +119,8 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys) scan->xs_itup = NULL; scan->xs_itupdesc = NULL; + scan->xs_hitup = NULL; + scan->xs_hitupdesc = NULL; ItemPointerSetInvalid(&scan->xs_ctup.t_self); scan->xs_ctup.t_data = NULL; diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 4e7eca73cc..cc5ac8b857 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -535,8 +535,8 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction) /* * The AM's amgettuple proc finds the next index entry matching the scan * keys, and puts the TID into scan->xs_ctup.t_self. It should also set - * scan->xs_recheck and possibly scan->xs_itup, though we pay no attention - * to those fields here. + * scan->xs_recheck and possibly scan->xs_itup/scan->xs_hitup, though we + * pay no attention to those fields here. */ found = scan->indexRelation->rd_amroutine->amgettuple(scan, direction); diff --git a/src/backend/access/spgist/spgscan.c b/src/backend/access/spgist/spgscan.c index 139d998600..2d96c0094e 100644 --- a/src/backend/access/spgist/spgscan.c +++ b/src/backend/access/spgist/spgscan.c @@ -92,11 +92,11 @@ resetSpGistScanOpaque(SpGistScanOpaque so) if (so->want_itup) { - /* Must pfree IndexTuples to avoid memory leak */ + /* Must pfree reconstructed tuples to avoid memory leak */ int i; for (i = 0; i < so->nPtrs; i++) - pfree(so->indexTups[i]); + pfree(so->reconTups[i]); } so->iPtr = so->nPtrs = 0; } @@ -195,8 +195,8 @@ spgbeginscan(Relation rel, int keysz, int orderbysz) "SP-GiST search temporary context", ALLOCSET_DEFAULT_SIZES); - /* Set up indexTupDesc and xs_itupdesc in case it's an index-only scan */ - so->indexTupDesc = scan->xs_itupdesc = RelationGetDescr(rel); + /* Set up indexTupDesc and xs_hitupdesc in case it's an index-only scan */ + so->indexTupDesc = scan->xs_hitupdesc = RelationGetDescr(rel); scan->opaque = so; @@ -591,12 +591,12 @@ storeGettuple(SpGistScanOpaque so, ItemPointer heapPtr, if (so->want_itup) { /* - * Reconstruct desired IndexTuple. We have to copy the datum out of - * the temp context anyway, so we may as well create the tuple here. + * Reconstruct index data. We have to copy the datum out of the temp + * context anyway, so we may as well create the tuple here. */ - so->indexTups[so->nPtrs] = index_form_tuple(so->indexTupDesc, - &leafValue, - &isnull); + so->reconTups[so->nPtrs] = heap_form_tuple(so->indexTupDesc, + &leafValue, + &isnull); } so->nPtrs++; } @@ -619,18 +619,18 @@ spggettuple(IndexScanDesc scan, ScanDirection dir) /* continuing to return tuples from a leaf page */ scan->xs_ctup.t_self = so->heapPtrs[so->iPtr]; scan->xs_recheck = so->recheck[so->iPtr]; - scan->xs_itup = so->indexTups[so->iPtr]; + scan->xs_hitup = so->reconTups[so->iPtr]; so->iPtr++; return true; } if (so->want_itup) { - /* Must pfree IndexTuples to avoid memory leak */ + /* Must pfree reconstructed tuples to avoid memory leak */ int i; for (i = 0; i < so->nPtrs; i++) - pfree(so->indexTups[i]); + pfree(so->reconTups[i]); } so->iPtr = so->nPtrs = 0; diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c index 66c2ad66d7..4a7f39a7c7 100644 --- a/src/backend/executor/nodeIndexonlyscan.c +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -149,9 +149,26 @@ IndexOnlyNext(IndexOnlyScanState *node) } /* - * Fill the scan tuple slot with data from the index. + * Fill the scan tuple slot with data from the index. This might be + * provided in either HeapTuple or IndexTuple format. Conceivably an + * index AM might fill both fields, in which case we prefer the heap + * format, since it's probably a bit cheaper to fill a slot from. */ - StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc); + if (scandesc->xs_hitup) + { + /* + * We don't take the trouble to verify that the provided tuple has + * exactly the slot's format, but it seems worth doing a quick + * check on the number of fields. + */ + Assert(slot->tts_tupleDescriptor->natts == + scandesc->xs_hitupdesc->natts); + ExecStoreTuple(scandesc->xs_hitup, slot, InvalidBuffer, false); + } + else if (scandesc->xs_itup) + StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc); + else + elog(ERROR, "no data returned for index-only scan"); /* * If the index was lossy, we have to recheck the index quals. diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index 5b3303056b..1ad4ed6da7 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -119,7 +119,7 @@ typedef struct GISTSearchHeapItem ItemPointerData heapPtr; bool recheck; /* T if quals must be rechecked */ bool recheckDistances; /* T if distances must be rechecked */ - IndexTuple ftup; /* data fetched back from the index, used in + HeapTuple recontup; /* data reconstructed from the index, used in * index-only scans */ OffsetNumber offnum; /* track offset in page to mark tuple as * LP_DEAD */ @@ -477,7 +477,7 @@ extern void gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, extern bool gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b); extern void gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p, OffsetNumber o, GISTENTRY *attdata, bool *isnull); -extern IndexTuple gistFetchTuple(GISTSTATE *giststate, Relation r, +extern HeapTuple gistFetchTuple(GISTSTATE *giststate, Relation r, IndexTuple tuple); extern void gistMakeUnionKey(GISTSTATE *giststate, int attno, GISTENTRY *entry1, bool isnull1, diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index ce3ca8d4ac..3fc726d712 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -104,9 +104,16 @@ typedef struct IndexScanDescData /* index access method's private state */ void *opaque; /* access-method-specific info */ - /* in an index-only scan, this is valid after a successful amgettuple */ + /* + * In an index-only scan, a successful amgettuple call must fill either + * xs_itup (and xs_itupdesc) or xs_hitup (and xs_hitupdesc) to provide the + * data returned by the scan. It can fill both, in which case the heap + * format will be used. + */ IndexTuple xs_itup; /* index tuple returned by AM */ TupleDesc xs_itupdesc; /* rowtype descriptor of xs_itup */ + HeapTuple xs_hitup; /* index data returned by AM, as HeapTuple */ + TupleDesc xs_hitupdesc; /* rowtype descriptor of xs_hitup */ /* xs_ctup/xs_cbuf/xs_recheck are valid after a successful index_getnext */ HeapTupleData xs_ctup; /* current heap tuple, if any */ diff --git a/src/include/access/spgist_private.h b/src/include/access/spgist_private.h index e42079b09f..4072c050de 100644 --- a/src/include/access/spgist_private.h +++ b/src/include/access/spgist_private.h @@ -159,7 +159,7 @@ typedef struct SpGistScanOpaqueData int iPtr; /* index for scanning through same */ ItemPointerData heapPtrs[MaxIndexTuplesPerPage]; /* TIDs from cur page */ bool recheck[MaxIndexTuplesPerPage]; /* their recheck flags */ - IndexTuple indexTups[MaxIndexTuplesPerPage]; /* reconstructed tuples */ + HeapTuple reconTups[MaxIndexTuplesPerPage]; /* reconstructed tuples */ /* * Note: using MaxIndexTuplesPerPage above is a bit hokey since