Account for the effect of lossy pages when costing bitmap scans.
Dilip Kumar, reviewed by Alexander Kumenkov, Amul Sul, and me. Some final adjustments by me. Discussion: http://postgr.es/m/CAFiTN-sYtqUOXQ4SpuhTv0Z9gD0si3YxZGv_PQAAMX8qbOotcg@mail.gmail.com
This commit is contained in:
parent
0c98d0dd5c
commit
5edc63bda6
|
@ -265,7 +265,6 @@ TIDBitmap *
|
||||||
tbm_create(long maxbytes, dsa_area *dsa)
|
tbm_create(long maxbytes, dsa_area *dsa)
|
||||||
{
|
{
|
||||||
TIDBitmap *tbm;
|
TIDBitmap *tbm;
|
||||||
long nbuckets;
|
|
||||||
|
|
||||||
/* Create the TIDBitmap struct and zero all its fields */
|
/* Create the TIDBitmap struct and zero all its fields */
|
||||||
tbm = makeNode(TIDBitmap);
|
tbm = makeNode(TIDBitmap);
|
||||||
|
@ -273,17 +272,7 @@ tbm_create(long maxbytes, dsa_area *dsa)
|
||||||
tbm->mcxt = CurrentMemoryContext;
|
tbm->mcxt = CurrentMemoryContext;
|
||||||
tbm->status = TBM_EMPTY;
|
tbm->status = TBM_EMPTY;
|
||||||
|
|
||||||
/*
|
tbm->maxentries = (int) tbm_calculate_entries(maxbytes);
|
||||||
* Estimate number of hashtable entries we can have within maxbytes. This
|
|
||||||
* estimates the hash cost as sizeof(PagetableEntry), which is good enough
|
|
||||||
* for our purpose. Also count an extra Pointer per entry for the arrays
|
|
||||||
* created during iteration readout.
|
|
||||||
*/
|
|
||||||
nbuckets = maxbytes /
|
|
||||||
(sizeof(PagetableEntry) + sizeof(Pointer) + sizeof(Pointer));
|
|
||||||
nbuckets = Min(nbuckets, INT_MAX - 1); /* safety limit */
|
|
||||||
nbuckets = Max(nbuckets, 16); /* sanity limit */
|
|
||||||
tbm->maxentries = (int) nbuckets;
|
|
||||||
tbm->lossify_start = 0;
|
tbm->lossify_start = 0;
|
||||||
tbm->dsa = dsa;
|
tbm->dsa = dsa;
|
||||||
tbm->dsapagetable = InvalidDsaPointer;
|
tbm->dsapagetable = InvalidDsaPointer;
|
||||||
|
@ -1546,3 +1535,27 @@ pagetable_free(pagetable_hash *pagetable, void *pointer)
|
||||||
tbm->dsapagetableold = InvalidDsaPointer;
|
tbm->dsapagetableold = InvalidDsaPointer;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* tbm_calculate_entries
|
||||||
|
*
|
||||||
|
* Estimate number of hashtable entries we can have within maxbytes.
|
||||||
|
*/
|
||||||
|
long
|
||||||
|
tbm_calculate_entries(double maxbytes)
|
||||||
|
{
|
||||||
|
long nbuckets;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Estimate number of hashtable entries we can have within maxbytes. This
|
||||||
|
* estimates the hash cost as sizeof(PagetableEntry), which is good enough
|
||||||
|
* for our purpose. Also count an extra Pointer per entry for the arrays
|
||||||
|
* created during iteration readout.
|
||||||
|
*/
|
||||||
|
nbuckets = maxbytes /
|
||||||
|
(sizeof(PagetableEntry) + sizeof(Pointer) + sizeof(Pointer));
|
||||||
|
nbuckets = Min(nbuckets, INT_MAX - 1); /* safety limit */
|
||||||
|
nbuckets = Max(nbuckets, 16); /* sanity limit */
|
||||||
|
|
||||||
|
return nbuckets;
|
||||||
|
}
|
||||||
|
|
|
@ -5171,6 +5171,8 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
|
||||||
double T;
|
double T;
|
||||||
double pages_fetched;
|
double pages_fetched;
|
||||||
double tuples_fetched;
|
double tuples_fetched;
|
||||||
|
double heap_pages;
|
||||||
|
long maxentries;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Fetch total cost of obtaining the bitmap, as well as its total
|
* Fetch total cost of obtaining the bitmap, as well as its total
|
||||||
|
@ -5185,6 +5187,24 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
|
||||||
|
|
||||||
T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
|
T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For a single scan, the number of heap pages that need to be fetched is
|
||||||
|
* the same as the Mackert and Lohman formula for the case T <= b (ie, no
|
||||||
|
* re-reads needed).
|
||||||
|
*/
|
||||||
|
pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Calculate the number of pages fetched from the heap. Then based on
|
||||||
|
* current work_mem estimate get the estimated maxentries in the bitmap.
|
||||||
|
* (Note that we always do this calculation based on the number of pages
|
||||||
|
* that would be fetched in a single iteration, even if loop_count > 1.
|
||||||
|
* That's correct, because only that number of entries will be stored in
|
||||||
|
* the bitmap at one time.)
|
||||||
|
*/
|
||||||
|
heap_pages = Min(pages_fetched, baserel->pages);
|
||||||
|
maxentries = tbm_calculate_entries(work_mem * 1024L);
|
||||||
|
|
||||||
if (loop_count > 1)
|
if (loop_count > 1)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -5199,22 +5219,41 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
|
||||||
root);
|
root);
|
||||||
pages_fetched /= loop_count;
|
pages_fetched /= loop_count;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* For a single scan, the number of heap pages that need to be fetched
|
|
||||||
* is the same as the Mackert and Lohman formula for the case T <= b
|
|
||||||
* (ie, no re-reads needed).
|
|
||||||
*/
|
|
||||||
pages_fetched =
|
|
||||||
(2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pages_fetched >= T)
|
if (pages_fetched >= T)
|
||||||
pages_fetched = T;
|
pages_fetched = T;
|
||||||
else
|
else
|
||||||
pages_fetched = ceil(pages_fetched);
|
pages_fetched = ceil(pages_fetched);
|
||||||
|
|
||||||
|
if (maxentries < heap_pages)
|
||||||
|
{
|
||||||
|
double exact_pages;
|
||||||
|
double lossy_pages;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Crude approximation of the number of lossy pages. Because of the
|
||||||
|
* way tbm_lossify() is coded, the number of lossy pages increases
|
||||||
|
* very sharply as soon as we run short of memory; this formula has
|
||||||
|
* that property and seems to perform adequately in testing, but it's
|
||||||
|
* possible we could do better somehow.
|
||||||
|
*/
|
||||||
|
lossy_pages = Max(0, heap_pages - maxentries / 2);
|
||||||
|
exact_pages = heap_pages - lossy_pages;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If there are lossy pages then recompute the number of tuples
|
||||||
|
* processed by the bitmap heap node. We assume here that the chance
|
||||||
|
* of a given tuple coming from an exact page is the same as the
|
||||||
|
* chance that a given page is exact. This might not be true, but
|
||||||
|
* it's not clear how we can do any better.
|
||||||
|
*/
|
||||||
|
if (lossy_pages > 0)
|
||||||
|
tuples_fetched =
|
||||||
|
clamp_row_est(indexSelectivity *
|
||||||
|
(exact_pages / heap_pages) * baserel->tuples +
|
||||||
|
(lossy_pages / heap_pages) * baserel->tuples);
|
||||||
|
}
|
||||||
|
|
||||||
if (cost)
|
if (cost)
|
||||||
*cost = indexTotalCost;
|
*cost = indexTotalCost;
|
||||||
if (tuple)
|
if (tuple)
|
||||||
|
|
|
@ -70,5 +70,6 @@ extern void tbm_end_iterate(TBMIterator *iterator);
|
||||||
extern void tbm_end_shared_iterate(TBMSharedIterator *iterator);
|
extern void tbm_end_shared_iterate(TBMSharedIterator *iterator);
|
||||||
extern TBMSharedIterator *tbm_attach_shared_iterate(dsa_area *dsa,
|
extern TBMSharedIterator *tbm_attach_shared_iterate(dsa_area *dsa,
|
||||||
dsa_pointer dp);
|
dsa_pointer dp);
|
||||||
|
extern long tbm_calculate_entries(double maxbytes);
|
||||||
|
|
||||||
#endif /* TIDBITMAP_H */
|
#endif /* TIDBITMAP_H */
|
||||||
|
|
Loading…
Reference in New Issue