Fix efficiency problems in tuplestore_trim().
The original coding in tuplestore_trim() was only meant to work efficiently in cases where each trim call deleted most of the tuples in the store. Which, in fact, was the pattern of the original usage with a Material node supporting mark/restore operations underneath a MergeJoin. However, WindowAgg now uses tuplestores and it has considerably less friendly trimming behavior. In particular it can attempt to trim one tuple at a time off a large tuplestore. tuplestore_trim() had O(N^2) runtime in this situation because of repeatedly shifting its tuple pointer array. Fix by avoiding shifting the array until a reasonably large number of tuples have been deleted. This can waste some pointer space, but we do still reclaim the tuples themselves, so the percentage wastage should be pretty small. Per Jie Li's report of slow percent_rank() evaluation. cume_dist() and ntile() would certainly be affected as well, along with any other window function that has a moving frame start and requires reading substantially ahead of the current row. Back-patch to 8.4, where window functions were introduced. There's no need to tweak it before that.
This commit is contained in:
parent
663fc32e26
commit
244407a710
|
@ -145,8 +145,15 @@ struct Tuplestorestate
|
||||||
/*
|
/*
|
||||||
* This array holds pointers to tuples in memory if we are in state INMEM.
|
* This array holds pointers to tuples in memory if we are in state INMEM.
|
||||||
* In states WRITEFILE and READFILE it's not used.
|
* In states WRITEFILE and READFILE it's not used.
|
||||||
|
*
|
||||||
|
* When memtupdeleted > 0, the first memtupdeleted pointers are already
|
||||||
|
* released due to a tuplestore_trim() operation, but we haven't expended
|
||||||
|
* the effort to slide the remaining pointers down. These unused pointers
|
||||||
|
* are set to NULL to catch any invalid accesses. Note that memtupcount
|
||||||
|
* includes the deleted pointers.
|
||||||
*/
|
*/
|
||||||
void **memtuples; /* array of pointers to palloc'd tuples */
|
void **memtuples; /* array of pointers to palloc'd tuples */
|
||||||
|
int memtupdeleted; /* the first N slots are currently unused */
|
||||||
int memtupcount; /* number of tuples currently present */
|
int memtupcount; /* number of tuples currently present */
|
||||||
int memtupsize; /* allocated length of memtuples array */
|
int memtupsize; /* allocated length of memtuples array */
|
||||||
|
|
||||||
|
@ -252,6 +259,7 @@ tuplestore_begin_common(int eflags, bool interXact, int maxKBytes)
|
||||||
state->context = CurrentMemoryContext;
|
state->context = CurrentMemoryContext;
|
||||||
state->resowner = CurrentResourceOwner;
|
state->resowner = CurrentResourceOwner;
|
||||||
|
|
||||||
|
state->memtupdeleted = 0;
|
||||||
state->memtupcount = 0;
|
state->memtupcount = 0;
|
||||||
state->memtupsize = 1024; /* initial guess */
|
state->memtupsize = 1024; /* initial guess */
|
||||||
state->memtuples = (void **) palloc(state->memtupsize * sizeof(void *));
|
state->memtuples = (void **) palloc(state->memtupsize * sizeof(void *));
|
||||||
|
@ -401,7 +409,7 @@ tuplestore_clear(Tuplestorestate *state)
|
||||||
state->myfile = NULL;
|
state->myfile = NULL;
|
||||||
if (state->memtuples)
|
if (state->memtuples)
|
||||||
{
|
{
|
||||||
for (i = 0; i < state->memtupcount; i++)
|
for (i = state->memtupdeleted; i < state->memtupcount; i++)
|
||||||
{
|
{
|
||||||
FREEMEM(state, GetMemoryChunkSpace(state->memtuples[i]));
|
FREEMEM(state, GetMemoryChunkSpace(state->memtuples[i]));
|
||||||
pfree(state->memtuples[i]);
|
pfree(state->memtuples[i]);
|
||||||
|
@ -409,6 +417,7 @@ tuplestore_clear(Tuplestorestate *state)
|
||||||
}
|
}
|
||||||
state->status = TSS_INMEM;
|
state->status = TSS_INMEM;
|
||||||
state->truncated = false;
|
state->truncated = false;
|
||||||
|
state->memtupdeleted = 0;
|
||||||
state->memtupcount = 0;
|
state->memtupcount = 0;
|
||||||
readptr = state->readptrs;
|
readptr = state->readptrs;
|
||||||
for (i = 0; i < state->readptrcount; readptr++, i++)
|
for (i = 0; i < state->readptrcount; readptr++, i++)
|
||||||
|
@ -432,7 +441,7 @@ tuplestore_end(Tuplestorestate *state)
|
||||||
BufFileClose(state->myfile);
|
BufFileClose(state->myfile);
|
||||||
if (state->memtuples)
|
if (state->memtuples)
|
||||||
{
|
{
|
||||||
for (i = 0; i < state->memtupcount; i++)
|
for (i = state->memtupdeleted; i < state->memtupcount; i++)
|
||||||
pfree(state->memtuples[i]);
|
pfree(state->memtuples[i]);
|
||||||
pfree(state->memtuples);
|
pfree(state->memtuples);
|
||||||
}
|
}
|
||||||
|
@ -774,14 +783,14 @@ tuplestore_gettuple(Tuplestorestate *state, bool forward,
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (readptr->current <= 0)
|
if (readptr->current <= state->memtupdeleted)
|
||||||
{
|
{
|
||||||
Assert(!state->truncated);
|
Assert(!state->truncated);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
readptr->current--; /* last returned tuple */
|
readptr->current--; /* last returned tuple */
|
||||||
}
|
}
|
||||||
if (readptr->current <= 0)
|
if (readptr->current <= state->memtupdeleted)
|
||||||
{
|
{
|
||||||
Assert(!state->truncated);
|
Assert(!state->truncated);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -969,7 +978,7 @@ dumptuples(Tuplestorestate *state)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0;; i++)
|
for (i = state->memtupdeleted;; i++)
|
||||||
{
|
{
|
||||||
TSReadPointer *readptr = state->readptrs;
|
TSReadPointer *readptr = state->readptrs;
|
||||||
int j;
|
int j;
|
||||||
|
@ -984,6 +993,7 @@ dumptuples(Tuplestorestate *state)
|
||||||
break;
|
break;
|
||||||
WRITETUP(state, state->memtuples[i]);
|
WRITETUP(state, state->memtuples[i]);
|
||||||
}
|
}
|
||||||
|
state->memtupdeleted = 0;
|
||||||
state->memtupcount = 0;
|
state->memtupcount = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1153,24 +1163,36 @@ tuplestore_trim(Tuplestorestate *state)
|
||||||
nremove = oldest - 1;
|
nremove = oldest - 1;
|
||||||
if (nremove <= 0)
|
if (nremove <= 0)
|
||||||
return; /* nothing to do */
|
return; /* nothing to do */
|
||||||
|
|
||||||
|
Assert(nremove >= state->memtupdeleted);
|
||||||
Assert(nremove <= state->memtupcount);
|
Assert(nremove <= state->memtupcount);
|
||||||
|
|
||||||
/* Release no-longer-needed tuples */
|
/* Release no-longer-needed tuples */
|
||||||
for (i = 0; i < nremove; i++)
|
for (i = state->memtupdeleted; i < nremove; i++)
|
||||||
{
|
{
|
||||||
FREEMEM(state, GetMemoryChunkSpace(state->memtuples[i]));
|
FREEMEM(state, GetMemoryChunkSpace(state->memtuples[i]));
|
||||||
pfree(state->memtuples[i]);
|
pfree(state->memtuples[i]);
|
||||||
|
state->memtuples[i] = NULL;
|
||||||
}
|
}
|
||||||
|
state->memtupdeleted = nremove;
|
||||||
|
|
||||||
|
/* mark tuplestore as truncated (used for Assert crosschecks only) */
|
||||||
|
state->truncated = true;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Slide the array down and readjust pointers. This may look pretty
|
* If nremove is less than 1/8th memtupcount, just stop here, leaving the
|
||||||
* stupid, but we expect that there will usually not be very many
|
* "deleted" slots as NULL. This prevents us from expending O(N^2) time
|
||||||
* tuple-pointers to move, so this isn't that expensive; and it keeps a
|
* repeatedly memmove-ing a large pointer array. The worst case space
|
||||||
* lot of other logic simple.
|
* wastage is pretty small, since it's just pointers and not whole tuples.
|
||||||
|
*/
|
||||||
|
if (nremove < state->memtupcount / 8)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Slide the array down and readjust pointers.
|
||||||
*
|
*
|
||||||
* In fact, in the current usage for merge joins, it's demonstrable that
|
* In mergejoin's current usage, it's demonstrable that there will always
|
||||||
* there will always be exactly one non-removed tuple; so optimize that
|
* be exactly one non-removed tuple; so optimize that case.
|
||||||
* case.
|
|
||||||
*/
|
*/
|
||||||
if (nremove + 1 == state->memtupcount)
|
if (nremove + 1 == state->memtupcount)
|
||||||
state->memtuples[0] = state->memtuples[nremove];
|
state->memtuples[0] = state->memtuples[nremove];
|
||||||
|
@ -1178,15 +1200,13 @@ tuplestore_trim(Tuplestorestate *state)
|
||||||
memmove(state->memtuples, state->memtuples + nremove,
|
memmove(state->memtuples, state->memtuples + nremove,
|
||||||
(state->memtupcount - nremove) * sizeof(void *));
|
(state->memtupcount - nremove) * sizeof(void *));
|
||||||
|
|
||||||
|
state->memtupdeleted = 0;
|
||||||
state->memtupcount -= nremove;
|
state->memtupcount -= nremove;
|
||||||
for (i = 0; i < state->readptrcount; i++)
|
for (i = 0; i < state->readptrcount; i++)
|
||||||
{
|
{
|
||||||
if (!state->readptrs[i].eof_reached)
|
if (!state->readptrs[i].eof_reached)
|
||||||
state->readptrs[i].current -= nremove;
|
state->readptrs[i].current -= nremove;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* mark tuplestore as truncated (used for Assert crosschecks only) */
|
|
||||||
state->truncated = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in New Issue