diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index d455981534..182981498c 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.21 2009/01/01 17:23:34 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.22 2009/01/10 21:08:36 tgl Exp $ *------------------------------------------------------------------------- */ @@ -290,6 +290,7 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) entry->list = NULL; entry->nlist = 0; entry->partialMatch = NULL; + entry->partialMatchIterator = NULL; entry->partialMatchResult = NULL; entry->reduceResult = FALSE; entry->predictNumberResult = 0; @@ -311,6 +312,9 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) */ if ( entry->partialMatch ) { + if (entry->partialMatchIterator) + tbm_end_iterate(entry->partialMatchIterator); + entry->partialMatchIterator = NULL; tbm_free( entry->partialMatch ); entry->partialMatch = NULL; } @@ -323,7 +327,7 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) if ( entry->partialMatch && !tbm_is_empty(entry->partialMatch) ) { - tbm_begin_iterate(entry->partialMatch); + entry->partialMatchIterator = tbm_begin_iterate(entry->partialMatch); entry->isFinished = FALSE; } } @@ -534,11 +538,13 @@ entryGetItem(Relation index, GinScanEntry entry) { if ( entry->partialMatchResult == NULL || entry->offset >= entry->partialMatchResult->ntuples ) { - entry->partialMatchResult = tbm_iterate( entry->partialMatch ); + entry->partialMatchResult = tbm_iterate( entry->partialMatchIterator ); if ( entry->partialMatchResult == NULL ) { ItemPointerSet(&entry->curItem, InvalidBlockNumber, InvalidOffsetNumber); + tbm_end_iterate(entry->partialMatchIterator); + entry->partialMatchIterator = NULL; entry->isFinished = TRUE; break; } diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c index 9c122cb526..ba37741922 100644 --- a/src/backend/access/gin/ginscan.c +++ b/src/backend/access/gin/ginscan.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.20 2009/01/01 17:23:34 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.21 2009/01/10 21:08:36 tgl Exp $ *------------------------------------------------------------------------- */ @@ -61,6 +61,8 @@ fillScanKey(GinState *ginstate, GinScanKey key, OffsetNumber attnum, Datum query key->scanEntry[i].offset = InvalidOffsetNumber; key->scanEntry[i].buffer = InvalidBuffer; key->scanEntry[i].partialMatch = NULL; + key->scanEntry[i].partialMatchIterator = NULL; + key->scanEntry[i].partialMatchResult = NULL; key->scanEntry[i].strategy = strategy; key->scanEntry[i].list = NULL; key->scanEntry[i].nlist = 0; @@ -107,6 +109,7 @@ resetScanKeys(GinScanKey keys, uint32 nkeys) key->scanEntry[j].list = NULL; key->scanEntry[j].nlist = 0; key->scanEntry[j].partialMatch = NULL; + key->scanEntry[j].partialMatchIterator = NULL; key->scanEntry[j].partialMatchResult = NULL; } } @@ -132,6 +135,8 @@ freeScanKeys(GinScanKey keys, uint32 nkeys) ReleaseBuffer(key->scanEntry[j].buffer); if (key->scanEntry[j].list) pfree(key->scanEntry[j].list); + if (key->scanEntry[j].partialMatchIterator) + tbm_end_iterate(key->scanEntry[j].partialMatchIterator); if (key->scanEntry[j].partialMatch) tbm_free(key->scanEntry[j].partialMatch); } diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index a74efe686c..880b9c9590 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -21,7 +21,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeBitmapHeapscan.c,v 1.31 2009/01/01 17:23:41 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeBitmapHeapscan.c,v 1.32 2009/01/10 21:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -65,6 +65,7 @@ BitmapHeapNext(BitmapHeapScanState *node) HeapScanDesc scan; Index scanrelid; TIDBitmap *tbm; + TBMIterator *tbmiterator; TBMIterateResult *tbmres; OffsetNumber targoffset; TupleTableSlot *slot; @@ -78,6 +79,7 @@ BitmapHeapNext(BitmapHeapScanState *node) scan = node->ss.ss_currentScanDesc; scanrelid = ((BitmapHeapScan *) node->ss.ps.plan)->scan.scanrelid; tbm = node->tbm; + tbmiterator = node->tbmiterator; tbmres = node->tbmres; /* @@ -111,7 +113,7 @@ BitmapHeapNext(BitmapHeapScanState *node) /* * If we haven't yet performed the underlying index scan, do it, and - * prepare the bitmap to be iterated over. + * begin the iteration over the bitmap. */ if (tbm == NULL) { @@ -121,9 +123,8 @@ BitmapHeapNext(BitmapHeapScanState *node) elog(ERROR, "unrecognized result from subplan"); node->tbm = tbm; + node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm); node->tbmres = tbmres = NULL; - - tbm_begin_iterate(tbm); } for (;;) @@ -136,7 +137,7 @@ BitmapHeapNext(BitmapHeapScanState *node) */ if (tbmres == NULL) { - node->tbmres = tbmres = tbm_iterate(tbm); + node->tbmres = tbmres = tbm_iterate(tbmiterator); if (tbmres == NULL) { /* no more entries in the bitmap */ @@ -376,9 +377,12 @@ ExecBitmapHeapReScan(BitmapHeapScanState *node, ExprContext *exprCtxt) /* rescan to release any page pin */ heap_rescan(node->ss.ss_currentScanDesc, NULL); + if (node->tbmiterator) + tbm_end_iterate(node->tbmiterator); if (node->tbm) tbm_free(node->tbm); node->tbm = NULL; + node->tbmiterator = NULL; node->tbmres = NULL; /* @@ -423,6 +427,8 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node) /* * release bitmap if any */ + if (node->tbmiterator) + tbm_end_iterate(node->tbmiterator); if (node->tbm) tbm_free(node->tbm); @@ -466,6 +472,7 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) scanstate->ss.ps.state = estate; scanstate->tbm = NULL; + scanstate->tbmiterator = NULL; scanstate->tbmres = NULL; /* diff --git a/src/backend/nodes/tidbitmap.c b/src/backend/nodes/tidbitmap.c index 54acf18fbf..e214bbb763 100644 --- a/src/backend/nodes/tidbitmap.c +++ b/src/backend/nodes/tidbitmap.c @@ -32,7 +32,7 @@ * Copyright (c) 2003-2009, PostgreSQL Global Development Group * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/tidbitmap.c,v 1.16 2009/01/01 17:23:43 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/tidbitmap.c,v 1.17 2009/01/10 21:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -136,9 +136,20 @@ struct TIDBitmap int nchunks; /* number of lossy entries in pagetable */ bool iterating; /* tbm_begin_iterate called? */ PagetableEntry entry1; /* used when status == TBM_ONE_PAGE */ - /* the remaining fields are used while producing sorted output: */ + /* these are valid when iterating is true: */ PagetableEntry **spages; /* sorted exact-page list, or NULL */ PagetableEntry **schunks; /* sorted lossy-chunk list, or NULL */ +}; + +/* + * When iterating over a bitmap in sorted order, a TBMIterator is used to + * track our progress. There can be several iterators scanning the same + * bitmap concurrently. Note that the bitmap becomes read-only as soon as + * any iterator is created. + */ +struct TBMIterator +{ + TIDBitmap *tbm; /* TIDBitmap we're iterating over */ int spageptr; /* next spages index */ int schunkptr; /* next schunks index */ int schunkbit; /* next bit to check in current schunk */ @@ -172,16 +183,9 @@ tbm_create(long maxbytes) TIDBitmap *tbm; long nbuckets; - /* - * Create the TIDBitmap struct, with enough trailing space to serve the - * needs of the TBMIterateResult sub-struct. - */ - tbm = (TIDBitmap *) palloc(sizeof(TIDBitmap) + - MAX_TUPLES_PER_PAGE * sizeof(OffsetNumber)); - /* Zero all the fixed fields */ - MemSetAligned(tbm, 0, sizeof(TIDBitmap)); + /* Create the TIDBitmap struct and zero all its fields */ + tbm = makeNode(TIDBitmap); - tbm->type = T_TIDBitmap; /* Set NodeTag */ tbm->mcxt = CurrentMemoryContext; tbm->status = TBM_EMPTY; @@ -533,60 +537,80 @@ tbm_is_empty(const TIDBitmap *tbm) /* * tbm_begin_iterate - prepare to iterate through a TIDBitmap * + * The TBMIterator struct is created in the caller's memory context. + * For a clean shutdown of the iteration, call tbm_end_iterate; but it's + * okay to just allow the memory context to be released, too. It is caller's + * responsibility not to touch the TBMIterator anymore once the TIDBitmap + * is freed. + * * NB: after this is called, it is no longer allowed to modify the contents * of the bitmap. However, you can call this multiple times to scan the - * contents repeatedly. + * contents repeatedly, including parallel scans. */ -void +TBMIterator * tbm_begin_iterate(TIDBitmap *tbm) { - HASH_SEQ_STATUS status; - PagetableEntry *page; - int npages; - int nchunks; + TBMIterator *iterator; + + /* + * Create the TBMIterator struct, with enough trailing space to serve the + * needs of the TBMIterateResult sub-struct. + */ + iterator = (TBMIterator *) palloc(sizeof(TBMIterator) + + MAX_TUPLES_PER_PAGE * sizeof(OffsetNumber)); + iterator->tbm = tbm; + + /* + * Initialize iteration pointers. + */ + iterator->spageptr = 0; + iterator->schunkptr = 0; + iterator->schunkbit = 0; + + /* + * If we have a hashtable, create and fill the sorted page lists, + * unless we already did that for a previous iterator. Note that the + * lists are attached to the bitmap not the iterator, so they can be + * used by more than one iterator. + */ + if (tbm->status == TBM_HASH && !tbm->iterating) + { + HASH_SEQ_STATUS status; + PagetableEntry *page; + int npages; + int nchunks; + + if (!tbm->spages && tbm->npages > 0) + tbm->spages = (PagetableEntry **) + MemoryContextAlloc(tbm->mcxt, + tbm->npages * sizeof(PagetableEntry *)); + if (!tbm->schunks && tbm->nchunks > 0) + tbm->schunks = (PagetableEntry **) + MemoryContextAlloc(tbm->mcxt, + tbm->nchunks * sizeof(PagetableEntry *)); + + hash_seq_init(&status, tbm->pagetable); + npages = nchunks = 0; + while ((page = (PagetableEntry *) hash_seq_search(&status)) != NULL) + { + if (page->ischunk) + tbm->schunks[nchunks++] = page; + else + tbm->spages[npages++] = page; + } + Assert(npages == tbm->npages); + Assert(nchunks == tbm->nchunks); + if (npages > 1) + qsort(tbm->spages, npages, sizeof(PagetableEntry *), + tbm_comparator); + if (nchunks > 1) + qsort(tbm->schunks, nchunks, sizeof(PagetableEntry *), + tbm_comparator); + } tbm->iterating = true; - /* - * Reset iteration pointers. - */ - tbm->spageptr = 0; - tbm->schunkptr = 0; - tbm->schunkbit = 0; - - /* - * Nothing else to do if no entries, nor if we don't have a hashtable. - */ - if (tbm->nentries == 0 || tbm->status != TBM_HASH) - return; - - /* - * Create and fill the sorted page lists if we didn't already. - */ - if (!tbm->spages && tbm->npages > 0) - tbm->spages = (PagetableEntry **) - MemoryContextAlloc(tbm->mcxt, - tbm->npages * sizeof(PagetableEntry *)); - if (!tbm->schunks && tbm->nchunks > 0) - tbm->schunks = (PagetableEntry **) - MemoryContextAlloc(tbm->mcxt, - tbm->nchunks * sizeof(PagetableEntry *)); - - hash_seq_init(&status, tbm->pagetable); - npages = nchunks = 0; - while ((page = (PagetableEntry *) hash_seq_search(&status)) != NULL) - { - if (page->ischunk) - tbm->schunks[nchunks++] = page; - else - tbm->spages[npages++] = page; - } - Assert(npages == tbm->npages); - Assert(nchunks == tbm->nchunks); - if (npages > 1) - qsort(tbm->spages, npages, sizeof(PagetableEntry *), tbm_comparator); - if (nchunks > 1) - qsort(tbm->schunks, nchunks, sizeof(PagetableEntry *), tbm_comparator); + return iterator; } /* @@ -602,9 +626,10 @@ tbm_begin_iterate(TIDBitmap *tbm) * testing, recheck is always set true when ntuples < 0.) */ TBMIterateResult * -tbm_iterate(TIDBitmap *tbm) +tbm_iterate(TBMIterator *iterator) { - TBMIterateResult *output = &(tbm->output); + TIDBitmap *tbm = iterator->tbm; + TBMIterateResult *output = &(iterator->output); Assert(tbm->iterating); @@ -612,10 +637,10 @@ tbm_iterate(TIDBitmap *tbm) * If lossy chunk pages remain, make sure we've advanced schunkptr/ * schunkbit to the next set bit. */ - while (tbm->schunkptr < tbm->nchunks) + while (iterator->schunkptr < tbm->nchunks) { - PagetableEntry *chunk = tbm->schunks[tbm->schunkptr]; - int schunkbit = tbm->schunkbit; + PagetableEntry *chunk = tbm->schunks[iterator->schunkptr]; + int schunkbit = iterator->schunkbit; while (schunkbit < PAGES_PER_CHUNK) { @@ -628,37 +653,37 @@ tbm_iterate(TIDBitmap *tbm) } if (schunkbit < PAGES_PER_CHUNK) { - tbm->schunkbit = schunkbit; + iterator->schunkbit = schunkbit; break; } /* advance to next chunk */ - tbm->schunkptr++; - tbm->schunkbit = 0; + iterator->schunkptr++; + iterator->schunkbit = 0; } /* * If both chunk and per-page data remain, must output the numerically * earlier page. */ - if (tbm->schunkptr < tbm->nchunks) + if (iterator->schunkptr < tbm->nchunks) { - PagetableEntry *chunk = tbm->schunks[tbm->schunkptr]; + PagetableEntry *chunk = tbm->schunks[iterator->schunkptr]; BlockNumber chunk_blockno; - chunk_blockno = chunk->blockno + tbm->schunkbit; - if (tbm->spageptr >= tbm->npages || - chunk_blockno < tbm->spages[tbm->spageptr]->blockno) + chunk_blockno = chunk->blockno + iterator->schunkbit; + if (iterator->spageptr >= tbm->npages || + chunk_blockno < tbm->spages[iterator->spageptr]->blockno) { /* Return a lossy page indicator from the chunk */ output->blockno = chunk_blockno; output->ntuples = -1; output->recheck = true; - tbm->schunkbit++; + iterator->schunkbit++; return output; } } - if (tbm->spageptr < tbm->npages) + if (iterator->spageptr < tbm->npages) { PagetableEntry *page; int ntuples; @@ -668,7 +693,7 @@ tbm_iterate(TIDBitmap *tbm) if (tbm->status == TBM_ONE_PAGE) page = &tbm->entry1; else - page = tbm->spages[tbm->spageptr]; + page = tbm->spages[iterator->spageptr]; /* scan bitmap to extract individual offset numbers */ ntuples = 0; @@ -692,7 +717,7 @@ tbm_iterate(TIDBitmap *tbm) output->blockno = page->blockno; output->ntuples = ntuples; output->recheck = page->recheck; - tbm->spageptr++; + iterator->spageptr++; return output; } @@ -700,6 +725,19 @@ tbm_iterate(TIDBitmap *tbm) return NULL; } +/* + * tbm_end_iterate - finish an iteration over a TIDBitmap + * + * Currently this is just a pfree, but it might do more someday. (For + * instance, it could be useful to count open iterators and allow the + * bitmap to return to read/write status when there are no more iterators.) + */ +void +tbm_end_iterate(TBMIterator *iterator) +{ + pfree(iterator); +} + /* * tbm_find_pageentry - find a PagetableEntry for the pageno * diff --git a/src/include/access/gin.h b/src/include/access/gin.h index 78269a415a..1425333221 100644 --- a/src/include/access/gin.h +++ b/src/include/access/gin.h @@ -4,7 +4,7 @@ * * Copyright (c) 2006-2009, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.27 2009/01/01 17:23:55 momjian Exp $ + * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.28 2009/01/10 21:08:36 tgl Exp $ *-------------------------------------------------------------------------- */ @@ -380,6 +380,7 @@ typedef struct GinScanEntryData /* partial match support */ bool isPartialMatch; TIDBitmap *partialMatch; + TBMIterator *partialMatchIterator; TBMIterateResult *partialMatchResult; StrategyNumber strategy; diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 4b2b64c300..506605df00 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.199 2009/01/01 17:23:59 momjian Exp $ + * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.200 2009/01/10 21:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1152,6 +1152,7 @@ typedef struct BitmapIndexScanState * * bitmapqualorig execution state for bitmapqualorig expressions * tbm bitmap obtained from child index scan(s) + * tbmiterator iterator for scanning current pages * tbmres current-page data * ---------------- */ @@ -1160,6 +1161,7 @@ typedef struct BitmapHeapScanState ScanState ss; /* its first field is NodeTag */ List *bitmapqualorig; TIDBitmap *tbm; + TBMIterator *tbmiterator; TBMIterateResult *tbmres; } BitmapHeapScanState; diff --git a/src/include/nodes/tidbitmap.h b/src/include/nodes/tidbitmap.h index e6ce0db892..93658543e4 100644 --- a/src/include/nodes/tidbitmap.h +++ b/src/include/nodes/tidbitmap.h @@ -15,7 +15,7 @@ * * Copyright (c) 2003-2009, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/nodes/tidbitmap.h,v 1.8 2009/01/01 17:24:00 momjian Exp $ + * $PostgreSQL: pgsql/src/include/nodes/tidbitmap.h,v 1.9 2009/01/10 21:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -31,6 +31,9 @@ */ typedef struct TIDBitmap TIDBitmap; +/* Likewise, TBMIterator is private */ +typedef struct TBMIterator TBMIterator; + /* Result structure for tbm_iterate */ typedef struct { @@ -55,7 +58,8 @@ extern void tbm_intersect(TIDBitmap *a, const TIDBitmap *b); extern bool tbm_is_empty(const TIDBitmap *tbm); -extern void tbm_begin_iterate(TIDBitmap *tbm); -extern TBMIterateResult *tbm_iterate(TIDBitmap *tbm); +extern TBMIterator *tbm_begin_iterate(TIDBitmap *tbm); +extern TBMIterateResult *tbm_iterate(TBMIterator *iterator); +extern void tbm_end_iterate(TBMIterator *iterator); #endif /* TIDBITMAP_H */