Account for the effect of lossy pages when costing bitmap scans.

Dilip Kumar, reviewed by Alexander Kumenkov, Amul Sul, and me. Some final adjustments by me. Discussion: http://postgr.es/m/CAFiTN-sYtqUOXQ4SpuhTv0Z9gD0si3YxZGv_PQAAMX8qbOotcg@mail.gmail.com
2017-11-10 16:50:50 -05:00 · 2017-11-10 16:50:50 -05:00 · 5edc63bda6
parent 0c98d0dd5c
commit 5edc63bda6
3 changed files with 75 additions and 22 deletions
--- a/src/backend/nodes/tidbitmap.c
+++ b/src/backend/nodes/tidbitmap.c
@ -265,7 +265,6 @@ TIDBitmap *
 tbm_create(long maxbytes, dsa_area *dsa)
 {
 	TIDBitmap  *tbm;
 	long		nbuckets;
 	/* Create the TIDBitmap struct and zero all its fields */
 	tbm = makeNode(TIDBitmap);
@ -273,17 +272,7 @@ tbm_create(long maxbytes, dsa_area *dsa)
 	tbm->mcxt = CurrentMemoryContext;
 	tbm->status = TBM_EMPTY;
-	/*
+	tbm->maxentries = (int) tbm_calculate_entries(maxbytes);
 	 * Estimate number of hashtable entries we can have within maxbytes. This
 	 * estimates the hash cost as sizeof(PagetableEntry), which is good enough
 	 * for our purpose.  Also count an extra Pointer per entry for the arrays
 	 * created during iteration readout.
 	 */
 	nbuckets = maxbytes /
 		(sizeof(PagetableEntry) + sizeof(Pointer) + sizeof(Pointer));
 	nbuckets = Min(nbuckets, INT_MAX - 1);	/* safety limit */
 	nbuckets = Max(nbuckets, 16);	/* sanity limit */
 	tbm->maxentries = (int) nbuckets;
 	tbm->lossify_start = 0;
 	tbm->dsa = dsa;
 	tbm->dsapagetable = InvalidDsaPointer;
@ -1546,3 +1535,27 @@ pagetable_free(pagetable_hash *pagetable, void *pointer)
 		tbm->dsapagetableold = InvalidDsaPointer;
 	}
 }
 /*
 * tbm_calculate_entries
 *
 * Estimate number of hashtable entries we can have within maxbytes.
 */
 long
 tbm_calculate_entries(double maxbytes)
 {
 	long		nbuckets;
 	/*
 	 * Estimate number of hashtable entries we can have within maxbytes. This
 	 * estimates the hash cost as sizeof(PagetableEntry), which is good enough
 	 * for our purpose.  Also count an extra Pointer per entry for the arrays
 	 * created during iteration readout.
 	 */
 	nbuckets = maxbytes /
 		(sizeof(PagetableEntry) + sizeof(Pointer) + sizeof(Pointer));
 	nbuckets = Min(nbuckets, INT_MAX - 1);	/* safety limit */
 	nbuckets = Max(nbuckets, 16);	/* sanity limit */
 	return nbuckets;
 }
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@ -5171,6 +5171,8 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
 	double		T;
 	double		pages_fetched;
 	double		tuples_fetched;
 	double		heap_pages;
 	long		maxentries;
 	/*
 	 * Fetch total cost of obtaining the bitmap, as well as its total
@ -5185,6 +5187,24 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
 	T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
 	/*
 	 * For a single scan, the number of heap pages that need to be fetched is
 	 * the same as the Mackert and Lohman formula for the case T <= b (ie, no
 	 * re-reads needed).
 	 */
 	pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
 	/*
 	 * Calculate the number of pages fetched from the heap.  Then based on
 	 * current work_mem estimate get the estimated maxentries in the bitmap.
 	 * (Note that we always do this calculation based on the number of pages
 	 * that would be fetched in a single iteration, even if loop_count > 1.
 	 * That's correct, because only that number of entries will be stored in
 	 * the bitmap at one time.)
 	 */
 	heap_pages = Min(pages_fetched, baserel->pages);
 	maxentries = tbm_calculate_entries(work_mem * 1024L);
 	if (loop_count > 1)
 	{
 		/*
@ -5199,22 +5219,41 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
 											root);
 		pages_fetched /= loop_count;
 	}
 	else
 	{
 		/*
 		 * For a single scan, the number of heap pages that need to be fetched
 		 * is the same as the Mackert and Lohman formula for the case T <= b
 		 * (ie, no re-reads needed).
 		 */
 		pages_fetched =
 			(2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
 	}
 	if (pages_fetched >= T)
 		pages_fetched = T;
 	else
 		pages_fetched = ceil(pages_fetched);
 	if (maxentries < heap_pages)
 	{
 		double		exact_pages;
 		double		lossy_pages;
 		/*
 		 * Crude approximation of the number of lossy pages.  Because of the
 		 * way tbm_lossify() is coded, the number of lossy pages increases
 		 * very sharply as soon as we run short of memory; this formula has
 		 * that property and seems to perform adequately in testing, but it's
 		 * possible we could do better somehow.
 		 */
 		lossy_pages = Max(0, heap_pages - maxentries / 2);
 		exact_pages = heap_pages - lossy_pages;
 		/*
 		 * If there are lossy pages then recompute the  number of tuples
 		 * processed by the bitmap heap node.  We assume here that the chance
 		 * of a given tuple coming from an exact page is the same as the
 		 * chance that a given page is exact.  This might not be true, but
 		 * it's not clear how we can do any better.
 		 */
 		if (lossy_pages > 0)
 			tuples_fetched =
 				clamp_row_est(indexSelectivity *
 							  (exact_pages / heap_pages) * baserel->tuples +
 							  (lossy_pages / heap_pages) * baserel->tuples);
 	}
 	if (cost)
 		*cost = indexTotalCost;
 	if (tuple)
--- a/src/include/nodes/tidbitmap.h
+++ b/src/include/nodes/tidbitmap.h
@ -70,5 +70,6 @@ extern void tbm_end_iterate(TBMIterator *iterator);
 extern void tbm_end_shared_iterate(TBMSharedIterator *iterator);
 extern TBMSharedIterator *tbm_attach_shared_iterate(dsa_area *dsa,
 						  dsa_pointer dp);
 extern long tbm_calculate_entries(double maxbytes);
 #endif							/* TIDBITMAP_H */