Add approximated Zipfian-distributed random generator to pgbench.
Generator helps to make close to real-world tests. Author: Alik Khilazhev Reviewed-By: Fabien COELHO Discussion: https://www.postgresql.org/message-id/flat/BF3B6F54-68C3-417A-BFAB-FB4D66F2B410@postgrespro.ru
This commit is contained in:
parent
538d114f6d
commit
1fcd0adeb3
|
@ -1092,6 +1092,14 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
|
|||
<entry><literal>random_gaussian(1, 10, 2.5)</literal></entry>
|
||||
<entry>an integer between <literal>1</literal> and <literal>10</literal></entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal><function>random_zipfian(<replaceable>lb</replaceable>, <replaceable>ub</replaceable>, <replaceable>parameter</replaceable>)</function></literal></entry>
|
||||
<entry>integer</entry>
|
||||
<entry>Zipfian-distributed random integer in <literal>[lb, ub]</literal>,
|
||||
see below</entry>
|
||||
<entry><literal>random_zipfian(1, 10, 1.5)</literal></entry>
|
||||
<entry>an integer between <literal>1</literal> and <literal>10</literal></entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal><function>sqrt(<replaceable>x</replaceable>)</function></literal></entry>
|
||||
<entry>double</entry>
|
||||
|
@ -1173,6 +1181,27 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) /
|
|||
of the Box-Muller transform.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
<literal>random_zipfian</literal> generates an approximated bounded zipfian
|
||||
distribution. For <replaceable>parameter</replaceable> in (0, 1), an
|
||||
approximated algorithm is taken from
|
||||
"Quickly Generating Billion-Record Synthetic Databases",
|
||||
Jim Gray et al, SIGMOD 1994. For <replaceable>parameter</replaceable>
|
||||
in (1, 1000), a rejection method is used, based on
|
||||
"Non-Uniform Random Variate Generation", Luc Devroye, p. 550-551,
|
||||
Springer 1986. The distribution is not defined when the parameter's
|
||||
value is 1.0. The drawing performance is poor for parameter values
|
||||
close and above 1.0 and on a small range.
|
||||
</para>
|
||||
<para>
|
||||
<replaceable>parameter</replaceable>
|
||||
defines how skewed the distribution is. The larger the <replaceable>parameter</replaceable>, the more
|
||||
frequently values to the beginning of the interval are drawn.
|
||||
The closer to 0 <replaceable>parameter</replaceable> is,
|
||||
the flatter (more uniform) the access distribution.
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
|
||||
<para>
|
||||
|
|
|
@ -191,6 +191,9 @@ static const struct
|
|||
{
|
||||
"random_exponential", 3, PGBENCH_RANDOM_EXPONENTIAL
|
||||
},
|
||||
{
|
||||
"random_zipfian", 3, PGBENCH_RANDOM_ZIPFIAN
|
||||
},
|
||||
/* keep as last array element */
|
||||
{
|
||||
NULL, 0, 0
|
||||
|
|
|
@ -95,7 +95,10 @@ static int pthread_join(pthread_t th, void **thread_return);
|
|||
#define LOG_STEP_SECONDS 5 /* seconds between log messages */
|
||||
#define DEFAULT_NXACTS 10 /* default nxacts */
|
||||
|
||||
#define ZIPF_CACHE_SIZE 15 /* cache cells number */
|
||||
|
||||
#define MIN_GAUSSIAN_PARAM 2.0 /* minimum parameter for gauss */
|
||||
#define MAX_ZIPFIAN_PARAM 1000 /* maximum parameter for zipfian */
|
||||
|
||||
int nxacts = 0; /* number of transactions per client */
|
||||
int duration = 0; /* duration in seconds */
|
||||
|
@ -330,6 +333,35 @@ typedef struct
|
|||
int ecnt; /* error count */
|
||||
} CState;
|
||||
|
||||
/*
|
||||
* Cache cell for zipfian_random call
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
/* cell keys */
|
||||
double s; /* s - parameter of zipfan_random function */
|
||||
int64 n; /* number of elements in range (max - min + 1) */
|
||||
|
||||
double harmonicn; /* generalizedHarmonicNumber(n, s) */
|
||||
double alpha;
|
||||
double beta;
|
||||
double eta;
|
||||
|
||||
uint64 last_used; /* last used logical time */
|
||||
} ZipfCell;
|
||||
|
||||
/*
|
||||
* Zipf cache for zeta values
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint64 current; /* counter for LRU cache replacement algorithm */
|
||||
|
||||
int nb_cells; /* number of filled cells */
|
||||
int overflowCount; /* number of cache overflows */
|
||||
ZipfCell cells[ZIPF_CACHE_SIZE];
|
||||
} ZipfCache;
|
||||
|
||||
/*
|
||||
* Thread state
|
||||
*/
|
||||
|
@ -342,6 +374,8 @@ typedef struct
|
|||
unsigned short random_state[3]; /* separate randomness for each thread */
|
||||
int64 throttle_trigger; /* previous/next throttling (us) */
|
||||
FILE *logfile; /* where to log, or NULL */
|
||||
ZipfCache zipf_cache; /* for thread-safe zipfian random number
|
||||
* generation */
|
||||
|
||||
/* per thread collected stats */
|
||||
instr_time start_time; /* thread start time */
|
||||
|
@ -746,6 +780,137 @@ getPoissonRand(TState *thread, int64 center)
|
|||
return (int64) (-log(uniform) * ((double) center) + 0.5);
|
||||
}
|
||||
|
||||
/* helper function for getZipfianRand */
|
||||
static double
|
||||
generalizedHarmonicNumber(int64 n, double s)
|
||||
{
|
||||
int i;
|
||||
double ans = 0.0;
|
||||
|
||||
for (i = n; i > 1; i--)
|
||||
ans += pow(i, -s);
|
||||
return ans + 1.0;
|
||||
}
|
||||
|
||||
/* set harmonicn and other parameters to cache cell */
|
||||
static void
|
||||
zipfSetCacheCell(ZipfCell * cell, int64 n, double s)
|
||||
{
|
||||
double harmonic2;
|
||||
|
||||
cell->n = n;
|
||||
cell->s = s;
|
||||
|
||||
harmonic2 = generalizedHarmonicNumber(2, s);
|
||||
cell->harmonicn = generalizedHarmonicNumber(n, s);
|
||||
|
||||
cell->alpha = 1.0 / (1.0 - s);
|
||||
cell->beta = pow(0.5, s);
|
||||
cell->eta = (1.0 - pow(2.0 / n, 1.0 - s)) / (1.0 - harmonic2 / cell->harmonicn);
|
||||
}
|
||||
|
||||
/*
|
||||
* search for cache cell with keys (n, s)
|
||||
* and create new cell if it does not exist
|
||||
*/
|
||||
static ZipfCell *
|
||||
zipfFindOrCreateCacheCell(ZipfCache * cache, int64 n, double s)
|
||||
{
|
||||
int i,
|
||||
least_recently_used = 0;
|
||||
ZipfCell *cell;
|
||||
|
||||
/* search cached cell for given parameters */
|
||||
for (i = 0; i < cache->nb_cells; i++)
|
||||
{
|
||||
cell = &cache->cells[i];
|
||||
if (cell->n == n && cell->s == s)
|
||||
return &cache->cells[i];
|
||||
|
||||
if (cell->last_used < cache->cells[least_recently_used].last_used)
|
||||
least_recently_used = i;
|
||||
}
|
||||
|
||||
/* create new one if it does not exist */
|
||||
if (cache->nb_cells < ZIPF_CACHE_SIZE)
|
||||
i = cache->nb_cells++;
|
||||
else
|
||||
{
|
||||
/* replace LRU cell if cache is full */
|
||||
i = least_recently_used;
|
||||
cache->overflowCount++;
|
||||
}
|
||||
|
||||
zipfSetCacheCell(&cache->cells[i], n, s);
|
||||
|
||||
cache->cells[i].last_used = cache->current++;
|
||||
return &cache->cells[i];
|
||||
}
|
||||
|
||||
/*
|
||||
* Computing zipfian using rejection method, based on
|
||||
* "Non-Uniform Random Variate Generation",
|
||||
* Luc Devroye, p. 550-551, Springer 1986.
|
||||
*/
|
||||
static int64
|
||||
computeIterativeZipfian(TState *thread, int64 n, double s)
|
||||
{
|
||||
double b = pow(2.0, s - 1.0);
|
||||
double x,
|
||||
t,
|
||||
u,
|
||||
v;
|
||||
|
||||
while (true)
|
||||
{
|
||||
/* random variates */
|
||||
u = pg_erand48(thread->random_state);
|
||||
v = pg_erand48(thread->random_state);
|
||||
|
||||
x = floor(pow(u, -1.0 / (s - 1.0)));
|
||||
|
||||
t = pow(1.0 + 1.0 / x, s - 1.0);
|
||||
/* reject if too large or out of bound */
|
||||
if (v * x * (t - 1.0) / (b - 1.0) <= t / b && x <= n)
|
||||
break;
|
||||
}
|
||||
return (int64) x;
|
||||
}
|
||||
|
||||
/*
|
||||
* Computing zipfian using harmonic numbers, based on algorithm described in
|
||||
* "Quickly Generating Billion-Record Synthetic Databases",
|
||||
* Jim Gray et al, SIGMOD 1994
|
||||
*/
|
||||
static int64
|
||||
computeHarmonicZipfian(TState *thread, int64 n, double s)
|
||||
{
|
||||
ZipfCell *cell = zipfFindOrCreateCacheCell(&thread->zipf_cache, n, s);
|
||||
double uniform = pg_erand48(thread->random_state);
|
||||
double uz = uniform * cell->harmonicn;
|
||||
|
||||
if (uz < 1.0)
|
||||
return 1;
|
||||
if (uz < 1.0 + cell->beta)
|
||||
return 2;
|
||||
return 1 + (int64) (cell->n * pow(cell->eta * uniform - cell->eta + 1.0, cell->alpha));
|
||||
}
|
||||
|
||||
/* random number generator: zipfian distribution from min to max inclusive */
|
||||
static int64
|
||||
getZipfianRand(TState *thread, int64 min, int64 max, double s)
|
||||
{
|
||||
int64 n = max - min + 1;
|
||||
|
||||
/* abort if parameter is invalid */
|
||||
Assert(s > 0.0 && s != 1.0 && s <= MAX_ZIPFIAN_PARAM);
|
||||
|
||||
|
||||
return min - 1 + ((s > 1)
|
||||
? computeIterativeZipfian(thread, n, s)
|
||||
: computeHarmonicZipfian(thread, n, s));
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the given SimpleStats struct to all zeroes
|
||||
*/
|
||||
|
@ -1303,7 +1468,6 @@ coerceToDouble(PgBenchValue *pval, double *dval)
|
|||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* assign an integer value */
|
||||
static void
|
||||
setIntValue(PgBenchValue *pv, int64 ival)
|
||||
|
@ -1605,6 +1769,7 @@ evalFunc(TState *thread, CState *st,
|
|||
case PGBENCH_RANDOM:
|
||||
case PGBENCH_RANDOM_EXPONENTIAL:
|
||||
case PGBENCH_RANDOM_GAUSSIAN:
|
||||
case PGBENCH_RANDOM_ZIPFIAN:
|
||||
{
|
||||
int64 imin,
|
||||
imax;
|
||||
|
@ -1655,6 +1820,18 @@ evalFunc(TState *thread, CState *st,
|
|||
setIntValue(retval,
|
||||
getGaussianRand(thread, imin, imax, param));
|
||||
}
|
||||
else if (func == PGBENCH_RANDOM_ZIPFIAN)
|
||||
{
|
||||
if (param <= 0.0 || param == 1.0 || param > MAX_ZIPFIAN_PARAM)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"zipfian parameter must be in range (0, 1) U (1, %d]"
|
||||
" (got %f)\n", MAX_ZIPFIAN_PARAM, param);
|
||||
return false;
|
||||
}
|
||||
setIntValue(retval,
|
||||
getZipfianRand(thread, imin, imax, param));
|
||||
}
|
||||
else /* exponential */
|
||||
{
|
||||
if (param <= 0.0)
|
||||
|
@ -3683,6 +3860,8 @@ printResults(TState *threads, StatsData *total, instr_time total_time,
|
|||
tps_include,
|
||||
tps_exclude;
|
||||
int64 ntx = total->cnt - total->skipped;
|
||||
int i,
|
||||
totalCacheOverflows = 0;
|
||||
|
||||
time_include = INSTR_TIME_GET_DOUBLE(total_time);
|
||||
|
||||
|
@ -3710,6 +3889,15 @@ printResults(TState *threads, StatsData *total, instr_time total_time,
|
|||
printf("number of transactions actually processed: " INT64_FORMAT "\n",
|
||||
ntx);
|
||||
}
|
||||
/* Report zipfian cache overflow */
|
||||
for (i = 0; i < nthreads; i++)
|
||||
{
|
||||
totalCacheOverflows += threads[i].zipf_cache.overflowCount;
|
||||
}
|
||||
if (totalCacheOverflows > 0)
|
||||
{
|
||||
printf("zipfian cache array overflowed %d time(s)\n", totalCacheOverflows);
|
||||
}
|
||||
|
||||
/* Remaining stats are nonsensical if we failed to execute any xacts */
|
||||
if (total->cnt <= 0)
|
||||
|
@ -4513,6 +4701,9 @@ main(int argc, char **argv)
|
|||
thread->random_state[2] = random();
|
||||
thread->logfile = NULL; /* filled in later */
|
||||
thread->latency_late = 0;
|
||||
thread->zipf_cache.nb_cells = 0;
|
||||
thread->zipf_cache.current = 0;
|
||||
thread->zipf_cache.overflowCount = 0;
|
||||
initStats(&thread->stats, 0);
|
||||
|
||||
nclients_dealt += thread->nstate;
|
||||
|
|
|
@ -75,7 +75,8 @@ typedef enum PgBenchFunction
|
|||
PGBENCH_SQRT,
|
||||
PGBENCH_RANDOM,
|
||||
PGBENCH_RANDOM_GAUSSIAN,
|
||||
PGBENCH_RANDOM_EXPONENTIAL
|
||||
PGBENCH_RANDOM_EXPONENTIAL,
|
||||
PGBENCH_RANDOM_ZIPFIAN
|
||||
} PgBenchFunction;
|
||||
|
||||
typedef struct PgBenchExpr PgBenchExpr;
|
||||
|
|
|
@ -231,7 +231,8 @@ pgbench(
|
|||
qr{command=18.: double 18\b},
|
||||
qr{command=19.: double 19\b},
|
||||
qr{command=20.: double 20\b},
|
||||
qr{command=21.: int 9223372036854775807\b}, ],
|
||||
qr{command=21.: int 9223372036854775807\b},
|
||||
qr{command=23.: int [1-9]\b}, ],
|
||||
'pgbench expressions',
|
||||
{ '001_pgbench_expressions' => q{-- integer functions
|
||||
\set i1 debug(random(1, 100))
|
||||
|
@ -261,6 +262,8 @@ pgbench(
|
|||
\set maxint debug(:minint - 1)
|
||||
-- reset a variable
|
||||
\set i1 0
|
||||
-- yet another integer function
|
||||
\set id debug(random_zipfian(1, 9, 1.3))
|
||||
} });
|
||||
|
||||
# backslash commands
|
||||
|
@ -371,6 +374,14 @@ SELECT LEAST(:i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i);
|
|||
0,
|
||||
[qr{exponential parameter must be greater }],
|
||||
q{\set i random_exponential(0, 10, 0.0)} ],
|
||||
[ 'set zipfian param to 1',
|
||||
0,
|
||||
[qr{zipfian parameter must be in range \(0, 1\) U \(1, \d+\]}],
|
||||
q{\set i random_zipfian(0, 10, 1)} ],
|
||||
[ 'set zipfian param too large',
|
||||
0,
|
||||
[qr{zipfian parameter must be in range \(0, 1\) U \(1, \d+\]}],
|
||||
q{\set i random_zipfian(0, 10, 1000000)} ],
|
||||
[ 'set non numeric value', 0,
|
||||
[qr{malformed variable "foo" value: "bla"}], q{\set i :foo + 1} ],
|
||||
[ 'set no expression', 1, [qr{syntax error}], q{\set i} ],
|
||||
|
@ -412,6 +423,31 @@ for my $e (@errors)
|
|||
{ $n => $script });
|
||||
}
|
||||
|
||||
# zipfian cache array overflow
|
||||
pgbench(
|
||||
'-t 1', 0,
|
||||
[ qr{processed: 1/1}, qr{zipfian cache array overflowed 1 time\(s\)} ],
|
||||
[ qr{^} ],
|
||||
'pgbench zipfian array overflow on random_zipfian',
|
||||
{ '001_pgbench_random_zipfian' => q{
|
||||
\set i random_zipfian(1, 100, 0.5)
|
||||
\set i random_zipfian(2, 100, 0.5)
|
||||
\set i random_zipfian(3, 100, 0.5)
|
||||
\set i random_zipfian(4, 100, 0.5)
|
||||
\set i random_zipfian(5, 100, 0.5)
|
||||
\set i random_zipfian(6, 100, 0.5)
|
||||
\set i random_zipfian(7, 100, 0.5)
|
||||
\set i random_zipfian(8, 100, 0.5)
|
||||
\set i random_zipfian(9, 100, 0.5)
|
||||
\set i random_zipfian(10, 100, 0.5)
|
||||
\set i random_zipfian(11, 100, 0.5)
|
||||
\set i random_zipfian(12, 100, 0.5)
|
||||
\set i random_zipfian(13, 100, 0.5)
|
||||
\set i random_zipfian(14, 100, 0.5)
|
||||
\set i random_zipfian(15, 100, 0.5)
|
||||
\set i random_zipfian(16, 100, 0.5)
|
||||
} });
|
||||
|
||||
# throttling
|
||||
pgbench(
|
||||
'-t 100 -S --rate=100000 --latency-limit=1000000 -c 2 -n -r',
|
||||
|
|
Loading…
Reference in New Issue