Invent qsort_interruptible().

Justin Pryzby reported that some scenarios could cause gathering
of extended statistics to spend many seconds in an un-cancelable
qsort() operation.  To fix, invent qsort_interruptible(), which is
just like qsort_arg() except that it will also do CHECK_FOR_INTERRUPTS
every so often.  This bloats the backend by a couple of kB, which
seems like a good investment.  (We considered just enabling
CHECK_FOR_INTERRUPTS in the existing qsort and qsort_arg functions,
but there are some callers for which that'd demonstrably be unsafe.
Opt-in seems like a better way.)

For now, just apply qsort_interruptible() in statistics collection.
There's probably more places where it could be useful, but we can
always change other call sites as we find problems.

Back-patch to v14.  Before that we didn't have extended stats on
expressions, so that the problem was less severe.  Also, this patch
depends on the sort_template infrastructure introduced in v14.

Tom Lane and Justin Pryzby

Discussion: https://postgr.es/m/20220509000108.GQ28830@telsasoft.com
This commit is contained in:
Tom Lane 2022-07-12 16:30:36 -04:00
parent 5e7608e81e
commit af72b08894
10 changed files with 80 additions and 55 deletions

View File

@ -99,7 +99,7 @@ static VacAttrStats *examine_attribute(Relation onerel, int attnum,
static int acquire_sample_rows(Relation onerel, int elevel,
HeapTuple *rows, int targrows,
double *totalrows, double *totaldeadrows);
static int compare_rows(const void *a, const void *b);
static int compare_rows(const void *a, const void *b, void *arg);
static int acquire_inherited_sample_rows(Relation onerel, int elevel,
HeapTuple *rows, int targrows,
double *totalrows, double *totaldeadrows);
@ -1324,7 +1324,8 @@ acquire_sample_rows(Relation onerel, int elevel,
* tuples are already sorted.
*/
if (numrows == targrows)
qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
qsort_interruptible((void *) rows, numrows, sizeof(HeapTuple),
compare_rows, NULL);
/*
* Estimate total numbers of live and dead rows in relation, extrapolating
@ -1360,10 +1361,10 @@ acquire_sample_rows(Relation onerel, int elevel,
}
/*
* qsort comparator for sorting rows[] array
* Comparator for sorting rows[] array
*/
static int
compare_rows(const void *a, const void *b)
compare_rows(const void *a, const void *b, void *arg)
{
HeapTuple ha = *(const HeapTuple *) a;
HeapTuple hb = *(const HeapTuple *) b;
@ -1857,7 +1858,7 @@ static void compute_scalar_stats(VacAttrStatsP stats,
int samplerows,
double totalrows);
static int compare_scalars(const void *a, const void *b, void *arg);
static int compare_mcvs(const void *a, const void *b);
static int compare_mcvs(const void *a, const void *b, void *arg);
static int analyze_mcv_list(int *mcv_counts,
int num_mcv,
double stadistinct,
@ -2494,8 +2495,8 @@ compute_scalar_stats(VacAttrStatsP stats,
/* Sort the collected values */
cxt.ssup = &ssup;
cxt.tupnoLink = tupnoLink;
qsort_arg((void *) values, values_cnt, sizeof(ScalarItem),
compare_scalars, (void *) &cxt);
qsort_interruptible((void *) values, values_cnt, sizeof(ScalarItem),
compare_scalars, (void *) &cxt);
/*
* Now scan the values in order, find the most common ones, and also
@ -2739,8 +2740,8 @@ compute_scalar_stats(VacAttrStatsP stats,
deltafrac;
/* Sort the MCV items into position order to speed next loop */
qsort((void *) track, num_mcv,
sizeof(ScalarMCVItem), compare_mcvs);
qsort_interruptible((void *) track, num_mcv, sizeof(ScalarMCVItem),
compare_mcvs, NULL);
/*
* Collapse out the MCV items from the values[] array.
@ -2903,7 +2904,7 @@ compute_scalar_stats(VacAttrStatsP stats,
}
/*
* qsort_arg comparator for sorting ScalarItems
* Comparator for sorting ScalarItems
*
* Aside from sorting the items, we update the tupnoLink[] array
* whenever two ScalarItems are found to contain equal datums. The array
@ -2940,10 +2941,10 @@ compare_scalars(const void *a, const void *b, void *arg)
}
/*
* qsort comparator for sorting ScalarMCVItems by position
* Comparator for sorting ScalarMCVItems by position
*/
static int
compare_mcvs(const void *a, const void *b)
compare_mcvs(const void *a, const void *b, void *arg)
{
int da = ((const ScalarMCVItem *) a)->first;
int db = ((const ScalarMCVItem *) b)->first;

View File

@ -1133,8 +1133,8 @@ build_sorted_items(StatsBuildData *data, int *nitems,
}
/* do the sort, using the multi-sort */
qsort_arg((void *) items, nrows, sizeof(SortItem),
multi_sort_compare, mss);
qsort_interruptible((void *) items, nrows, sizeof(SortItem),
multi_sort_compare, mss);
return items;
}

View File

@ -404,7 +404,7 @@ count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss)
* order.
*/
static int
compare_sort_item_count(const void *a, const void *b)
compare_sort_item_count(const void *a, const void *b, void *arg)
{
SortItem *ia = (SortItem *) a;
SortItem *ib = (SortItem *) b;
@ -457,8 +457,8 @@ build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss,
Assert(j + 1 == ngroups);
/* Sort the distinct groups by frequency (in descending order). */
pg_qsort((void *) groups, ngroups, sizeof(SortItem),
compare_sort_item_count);
qsort_interruptible((void *) groups, ngroups, sizeof(SortItem),
compare_sort_item_count, NULL);
*ndistinct = ngroups;
return groups;
@ -528,8 +528,8 @@ build_column_frequencies(SortItem *groups, int ngroups,
}
/* sort the values, deduplicate */
qsort_arg((void *) result[dim], ngroups, sizeof(SortItem),
sort_item_compare, ssup);
qsort_interruptible((void *) result[dim], ngroups, sizeof(SortItem),
sort_item_compare, ssup);
/*
* Identify distinct values, compute frequency (there might be
@ -695,8 +695,8 @@ statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
qsort_arg(values[dim], counts[dim], sizeof(Datum),
compare_scalars_simple, &ssup[dim]);
qsort_interruptible(values[dim], counts[dim], sizeof(Datum),
compare_scalars_simple, &ssup[dim]);
/*
* Walk through the array and eliminate duplicate values, but keep the

View File

@ -488,8 +488,8 @@ ndistinct_for_combination(double totalrows, StatsBuildData *data,
}
/* We can sort the array now ... */
qsort_arg((void *) items, numrows, sizeof(SortItem),
multi_sort_compare, mss);
qsort_interruptible((void *) items, numrows, sizeof(SortItem),
multi_sort_compare, mss);
/* ... and count the number of distinct combinations */

View File

@ -44,8 +44,10 @@ static void prune_lexemes_hashtable(HTAB *lexemes_tab, int b_current);
static uint32 lexeme_hash(const void *key, Size keysize);
static int lexeme_match(const void *key1, const void *key2, Size keysize);
static int lexeme_compare(const void *key1, const void *key2);
static int trackitem_compare_frequencies_desc(const void *e1, const void *e2);
static int trackitem_compare_lexemes(const void *e1, const void *e2);
static int trackitem_compare_frequencies_desc(const void *e1, const void *e2,
void *arg);
static int trackitem_compare_lexemes(const void *e1, const void *e2,
void *arg);
/*
@ -347,8 +349,8 @@ compute_tsvector_stats(VacAttrStats *stats,
*/
if (num_mcelem < track_len)
{
qsort(sort_table, track_len, sizeof(TrackItem *),
trackitem_compare_frequencies_desc);
qsort_interruptible(sort_table, track_len, sizeof(TrackItem *),
trackitem_compare_frequencies_desc, NULL);
/* reset minfreq to the smallest frequency we're keeping */
minfreq = sort_table[num_mcelem - 1]->frequency;
}
@ -376,8 +378,8 @@ compute_tsvector_stats(VacAttrStats *stats,
* presorted we can employ binary search for that. See
* ts_selfuncs.c for a real usage scenario.
*/
qsort(sort_table, num_mcelem, sizeof(TrackItem *),
trackitem_compare_lexemes);
qsort_interruptible(sort_table, num_mcelem, sizeof(TrackItem *),
trackitem_compare_lexemes, NULL);
/* Must copy the target values into anl_context */
old_context = MemoryContextSwitchTo(stats->anl_context);
@ -510,10 +512,10 @@ lexeme_compare(const void *key1, const void *key2)
}
/*
* qsort() comparator for sorting TrackItems on frequencies (descending sort)
* Comparator for sorting TrackItems on frequencies (descending sort)
*/
static int
trackitem_compare_frequencies_desc(const void *e1, const void *e2)
trackitem_compare_frequencies_desc(const void *e1, const void *e2, void *arg)
{
const TrackItem *const *t1 = (const TrackItem *const *) e1;
const TrackItem *const *t2 = (const TrackItem *const *) e2;
@ -522,10 +524,10 @@ trackitem_compare_frequencies_desc(const void *e1, const void *e2)
}
/*
* qsort() comparator for sorting TrackItems on lexemes
* Comparator for sorting TrackItems on lexemes
*/
static int
trackitem_compare_lexemes(const void *e1, const void *e2)
trackitem_compare_lexemes(const void *e1, const void *e2, void *arg)
{
const TrackItem *const *t1 = (const TrackItem *const *) e1;
const TrackItem *const *t2 = (const TrackItem *const *) e2;

View File

@ -86,9 +86,9 @@ static void prune_element_hashtable(HTAB *elements_tab, int b_current);
static uint32 element_hash(const void *key, Size keysize);
static int element_match(const void *key1, const void *key2, Size keysize);
static int element_compare(const void *key1, const void *key2);
static int trackitem_compare_frequencies_desc(const void *e1, const void *e2);
static int trackitem_compare_element(const void *e1, const void *e2);
static int countitem_compare_count(const void *e1, const void *e2);
static int trackitem_compare_frequencies_desc(const void *e1, const void *e2, void *arg);
static int trackitem_compare_element(const void *e1, const void *e2, void *arg);
static int countitem_compare_count(const void *e1, const void *e2, void *arg);
/*
@ -502,8 +502,8 @@ compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
*/
if (num_mcelem < track_len)
{
qsort(sort_table, track_len, sizeof(TrackItem *),
trackitem_compare_frequencies_desc);
qsort_interruptible(sort_table, track_len, sizeof(TrackItem *),
trackitem_compare_frequencies_desc, NULL);
/* reset minfreq to the smallest frequency we're keeping */
minfreq = sort_table[num_mcelem - 1]->frequency;
}
@ -522,8 +522,8 @@ compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
* the element type's default comparison function. This permits
* fast binary searches in selectivity estimation functions.
*/
qsort(sort_table, num_mcelem, sizeof(TrackItem *),
trackitem_compare_element);
qsort_interruptible(sort_table, num_mcelem, sizeof(TrackItem *),
trackitem_compare_element, NULL);
/* Must copy the target values into anl_context */
old_context = MemoryContextSwitchTo(stats->anl_context);
@ -599,8 +599,9 @@ compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
{
sorted_count_items[j++] = count_item;
}
qsort(sorted_count_items, count_items_count,
sizeof(DECountItem *), countitem_compare_count);
qsort_interruptible(sorted_count_items, count_items_count,
sizeof(DECountItem *),
countitem_compare_count, NULL);
/*
* Prepare to fill stanumbers with the histogram, followed by the
@ -751,10 +752,10 @@ element_compare(const void *key1, const void *key2)
}
/*
* qsort() comparator for sorting TrackItems by frequencies (descending sort)
* Comparator for sorting TrackItems by frequencies (descending sort)
*/
static int
trackitem_compare_frequencies_desc(const void *e1, const void *e2)
trackitem_compare_frequencies_desc(const void *e1, const void *e2, void *arg)
{
const TrackItem *const *t1 = (const TrackItem *const *) e1;
const TrackItem *const *t2 = (const TrackItem *const *) e2;
@ -763,10 +764,10 @@ trackitem_compare_frequencies_desc(const void *e1, const void *e2)
}
/*
* qsort() comparator for sorting TrackItems by element values
* Comparator for sorting TrackItems by element values
*/
static int
trackitem_compare_element(const void *e1, const void *e2)
trackitem_compare_element(const void *e1, const void *e2, void *arg)
{
const TrackItem *const *t1 = (const TrackItem *const *) e1;
const TrackItem *const *t2 = (const TrackItem *const *) e2;
@ -775,10 +776,10 @@ trackitem_compare_element(const void *e1, const void *e2)
}
/*
* qsort() comparator for sorting DECountItems by count
* Comparator for sorting DECountItems by count
*/
static int
countitem_compare_count(const void *e1, const void *e2)
countitem_compare_count(const void *e1, const void *e2, void *arg)
{
const DECountItem *const *t1 = (const DECountItem *const *) e1;
const DECountItem *const *t2 = (const DECountItem *const *) e2;

View File

@ -32,7 +32,7 @@
#include "utils/rangetypes.h"
#include "utils/multirangetypes.h"
static int float8_qsort_cmp(const void *a1, const void *a2);
static int float8_qsort_cmp(const void *a1, const void *a2, void *arg);
static int range_bound_qsort_cmp(const void *a1, const void *a2, void *arg);
static void compute_range_stats(VacAttrStats *stats,
AnalyzeAttrFetchFunc fetchfunc, int samplerows,
@ -93,7 +93,7 @@ multirange_typanalyze(PG_FUNCTION_ARGS)
* Comparison function for sorting float8s, used for range lengths.
*/
static int
float8_qsort_cmp(const void *a1, const void *a2)
float8_qsort_cmp(const void *a1, const void *a2, void *arg)
{
const float8 *f1 = (const float8 *) a1;
const float8 *f2 = (const float8 *) a2;
@ -280,10 +280,10 @@ compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
if (non_empty_cnt >= 2)
{
/* Sort bound values */
qsort_arg(lowers, non_empty_cnt, sizeof(RangeBound),
range_bound_qsort_cmp, typcache);
qsort_arg(uppers, non_empty_cnt, sizeof(RangeBound),
range_bound_qsort_cmp, typcache);
qsort_interruptible(lowers, non_empty_cnt, sizeof(RangeBound),
range_bound_qsort_cmp, typcache);
qsort_interruptible(uppers, non_empty_cnt, sizeof(RangeBound),
range_bound_qsort_cmp, typcache);
num_hist = non_empty_cnt;
if (num_hist > num_bins)
@ -345,7 +345,8 @@ compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
* Ascending sort of range lengths for further filling of
* histogram
*/
qsort(lengths, non_empty_cnt, sizeof(float8), float8_qsort_cmp);
qsort_interruptible(lengths, non_empty_cnt, sizeof(float8),
float8_qsort_cmp, NULL);
num_hist = non_empty_cnt;
if (num_hist > num_bins)

View File

@ -16,6 +16,7 @@ override CPPFLAGS := -I. -I$(srcdir) $(CPPFLAGS)
OBJS = \
logtape.o \
qsort_interruptible.o \
sharedtuplestore.o \
sortsupport.o \
tuplesort.o \

View File

@ -0,0 +1,16 @@
/*
* qsort_interruptible.c: qsort_arg that includes CHECK_FOR_INTERRUPTS
*/
#include "postgres.h"
#include "miscadmin.h"
#define ST_SORT qsort_interruptible
#define ST_ELEMENT_TYPE_VOID
#define ST_COMPARATOR_TYPE_NAME qsort_arg_comparator
#define ST_COMPARE_RUNTIME_POINTER
#define ST_COMPARE_ARG_TYPE void
#define ST_SCOPE
#define ST_DEFINE
#define ST_CHECK_FOR_INTERRUPTS
#include "lib/sort_template.h"

View File

@ -508,6 +508,9 @@ typedef int (*qsort_arg_comparator) (const void *a, const void *b, void *arg);
extern void qsort_arg(void *base, size_t nel, size_t elsize,
qsort_arg_comparator cmp, void *arg);
extern void qsort_interruptible(void *base, size_t nel, size_t elsize,
qsort_arg_comparator cmp, void *arg);
extern void *bsearch_arg(const void *key, const void *base,
size_t nmemb, size_t size,
int (*compar) (const void *, const void *, void *),