From 5edc63bda68a77c4d38f0cbeae1c4271f9ef4100 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Fri, 10 Nov 2017 16:50:50 -0500 Subject: [PATCH] Account for the effect of lossy pages when costing bitmap scans. Dilip Kumar, reviewed by Alexander Kumenkov, Amul Sul, and me. Some final adjustments by me. Discussion: http://postgr.es/m/CAFiTN-sYtqUOXQ4SpuhTv0Z9gD0si3YxZGv_PQAAMX8qbOotcg@mail.gmail.com --- src/backend/nodes/tidbitmap.c | 37 +++++++++++------ src/backend/optimizer/path/costsize.c | 59 ++++++++++++++++++++++----- src/include/nodes/tidbitmap.h | 1 + 3 files changed, 75 insertions(+), 22 deletions(-) diff --git a/src/backend/nodes/tidbitmap.c b/src/backend/nodes/tidbitmap.c index c47d5849ef..acfe6b263c 100644 --- a/src/backend/nodes/tidbitmap.c +++ b/src/backend/nodes/tidbitmap.c @@ -265,7 +265,6 @@ TIDBitmap * tbm_create(long maxbytes, dsa_area *dsa) { TIDBitmap *tbm; - long nbuckets; /* Create the TIDBitmap struct and zero all its fields */ tbm = makeNode(TIDBitmap); @@ -273,17 +272,7 @@ tbm_create(long maxbytes, dsa_area *dsa) tbm->mcxt = CurrentMemoryContext; tbm->status = TBM_EMPTY; - /* - * Estimate number of hashtable entries we can have within maxbytes. This - * estimates the hash cost as sizeof(PagetableEntry), which is good enough - * for our purpose. Also count an extra Pointer per entry for the arrays - * created during iteration readout. - */ - nbuckets = maxbytes / - (sizeof(PagetableEntry) + sizeof(Pointer) + sizeof(Pointer)); - nbuckets = Min(nbuckets, INT_MAX - 1); /* safety limit */ - nbuckets = Max(nbuckets, 16); /* sanity limit */ - tbm->maxentries = (int) nbuckets; + tbm->maxentries = (int) tbm_calculate_entries(maxbytes); tbm->lossify_start = 0; tbm->dsa = dsa; tbm->dsapagetable = InvalidDsaPointer; @@ -1546,3 +1535,27 @@ pagetable_free(pagetable_hash *pagetable, void *pointer) tbm->dsapagetableold = InvalidDsaPointer; } } + +/* + * tbm_calculate_entries + * + * Estimate number of hashtable entries we can have within maxbytes. + */ +long +tbm_calculate_entries(double maxbytes) +{ + long nbuckets; + + /* + * Estimate number of hashtable entries we can have within maxbytes. This + * estimates the hash cost as sizeof(PagetableEntry), which is good enough + * for our purpose. Also count an extra Pointer per entry for the arrays + * created during iteration readout. + */ + nbuckets = maxbytes / + (sizeof(PagetableEntry) + sizeof(Pointer) + sizeof(Pointer)); + nbuckets = Min(nbuckets, INT_MAX - 1); /* safety limit */ + nbuckets = Max(nbuckets, 16); /* sanity limit */ + + return nbuckets; +} diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 98fb16e85a..2d2df60886 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -5171,6 +5171,8 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, double T; double pages_fetched; double tuples_fetched; + double heap_pages; + long maxentries; /* * Fetch total cost of obtaining the bitmap, as well as its total @@ -5185,6 +5187,24 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, T = (baserel->pages > 1) ? (double) baserel->pages : 1.0; + /* + * For a single scan, the number of heap pages that need to be fetched is + * the same as the Mackert and Lohman formula for the case T <= b (ie, no + * re-reads needed). + */ + pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched); + + /* + * Calculate the number of pages fetched from the heap. Then based on + * current work_mem estimate get the estimated maxentries in the bitmap. + * (Note that we always do this calculation based on the number of pages + * that would be fetched in a single iteration, even if loop_count > 1. + * That's correct, because only that number of entries will be stored in + * the bitmap at one time.) + */ + heap_pages = Min(pages_fetched, baserel->pages); + maxentries = tbm_calculate_entries(work_mem * 1024L); + if (loop_count > 1) { /* @@ -5199,22 +5219,41 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, root); pages_fetched /= loop_count; } - else - { - /* - * For a single scan, the number of heap pages that need to be fetched - * is the same as the Mackert and Lohman formula for the case T <= b - * (ie, no re-reads needed). - */ - pages_fetched = - (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched); - } if (pages_fetched >= T) pages_fetched = T; else pages_fetched = ceil(pages_fetched); + if (maxentries < heap_pages) + { + double exact_pages; + double lossy_pages; + + /* + * Crude approximation of the number of lossy pages. Because of the + * way tbm_lossify() is coded, the number of lossy pages increases + * very sharply as soon as we run short of memory; this formula has + * that property and seems to perform adequately in testing, but it's + * possible we could do better somehow. + */ + lossy_pages = Max(0, heap_pages - maxentries / 2); + exact_pages = heap_pages - lossy_pages; + + /* + * If there are lossy pages then recompute the number of tuples + * processed by the bitmap heap node. We assume here that the chance + * of a given tuple coming from an exact page is the same as the + * chance that a given page is exact. This might not be true, but + * it's not clear how we can do any better. + */ + if (lossy_pages > 0) + tuples_fetched = + clamp_row_est(indexSelectivity * + (exact_pages / heap_pages) * baserel->tuples + + (lossy_pages / heap_pages) * baserel->tuples); + } + if (cost) *cost = indexTotalCost; if (tuple) diff --git a/src/include/nodes/tidbitmap.h b/src/include/nodes/tidbitmap.h index f9a1902da8..d3ad0a5566 100644 --- a/src/include/nodes/tidbitmap.h +++ b/src/include/nodes/tidbitmap.h @@ -70,5 +70,6 @@ extern void tbm_end_iterate(TBMIterator *iterator); extern void tbm_end_shared_iterate(TBMSharedIterator *iterator); extern TBMSharedIterator *tbm_attach_shared_iterate(dsa_area *dsa, dsa_pointer dp); +extern long tbm_calculate_entries(double maxbytes); #endif /* TIDBITMAP_H */