Set random seed for pgbench.
Setting random could increase reproducibility of test in some cases. Patch suggests three providers for seed: time (default), strong random generator (if available) and unsigned constant. Seed could be set from command line or enviroment variable. Author: Fabien Coelho Reviewed by: Chapman Flack Discussion: https://www.postgresql.org/message-id/flat/20160407082711.q7iq3ykffqxcszkv@alap3.anarazel.de
This commit is contained in:
parent
530bcf7581
commit
64f85894ad
|
@ -679,6 +679,43 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--random-seed=</option><replaceable>SEED</replaceable></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Set random generator seed. Seeds the system random number generator,
|
||||||
|
which then produces a sequence of initial generator states, one for
|
||||||
|
each thread.
|
||||||
|
Values for <replaceable>SEED</replaceable> may be:
|
||||||
|
<literal>time</literal> (the default, the seed is based on the current time),
|
||||||
|
<literal>rand</literal> (use a strong random source, failing if none
|
||||||
|
is available), or an unsigned decimal integer value.
|
||||||
|
The random generator is invoked explicitly from a pgbench script
|
||||||
|
(<literal>random...</literal> functions) or implicitly (for instance option
|
||||||
|
<option>--rate</option> uses it to schedule transactions).
|
||||||
|
When explicitly set, the value used for seeding is shown on the terminal.
|
||||||
|
Any value allowed for <replaceable>SEED</replaceable> may also be
|
||||||
|
provided through the environment variable
|
||||||
|
<literal>PGBENCH_RANDOM_SEED</literal>.
|
||||||
|
To ensure that the provided seed impacts all possible uses, put this option
|
||||||
|
first or use the environment variable.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Setting the seed explicitly allows to reproduce a <command>pgbench</command>
|
||||||
|
run exactly, as far as random numbers are concerned.
|
||||||
|
As the random state is managed per thread, this means the exact same
|
||||||
|
<command>pgbench</command> run for an identical invocation if there is one
|
||||||
|
client per thread and there are no external or data dependencies.
|
||||||
|
From a statistical viewpoint reproducing runs exactly is a bad idea because
|
||||||
|
it can hide the performance variability or improve performance unduly,
|
||||||
|
e.g. by hitting the same pages as a previous run.
|
||||||
|
However, it may also be of great help for debugging, for instance
|
||||||
|
re-running a tricky case which leads to an error.
|
||||||
|
Use wisely.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term><option>--sampling-rate=<replaceable>rate</replaceable></option></term>
|
<term><option>--sampling-rate=<replaceable>rate</replaceable></option></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
|
@ -883,6 +920,11 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
|
||||||
<entry>seed used in hash functions by default</entry>
|
<entry>seed used in hash functions by default</entry>
|
||||||
</row>
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry> <literal>random_seed</literal> </entry>
|
||||||
|
<entry>random generator seed (unless overwritten with <option>-D</option>)</entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
<row>
|
<row>
|
||||||
<entry> <literal>scale</literal> </entry>
|
<entry> <literal>scale</literal> </entry>
|
||||||
<entry>current scale factor</entry>
|
<entry>current scale factor</entry>
|
||||||
|
|
|
@ -155,6 +155,9 @@ int64 latency_limit = 0;
|
||||||
char *tablespace = NULL;
|
char *tablespace = NULL;
|
||||||
char *index_tablespace = NULL;
|
char *index_tablespace = NULL;
|
||||||
|
|
||||||
|
/* random seed used when calling srandom() */
|
||||||
|
int64 random_seed = -1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* end of configurable parameters
|
* end of configurable parameters
|
||||||
*********************************************************************/
|
*********************************************************************/
|
||||||
|
@ -579,6 +582,7 @@ usage(void)
|
||||||
" --log-prefix=PREFIX prefix for transaction time log file\n"
|
" --log-prefix=PREFIX prefix for transaction time log file\n"
|
||||||
" (default: \"pgbench_log\")\n"
|
" (default: \"pgbench_log\")\n"
|
||||||
" --progress-timestamp use Unix epoch timestamps for progress\n"
|
" --progress-timestamp use Unix epoch timestamps for progress\n"
|
||||||
|
" --random-seed=SEED set random seed (\"time\", \"rand\", integer)\n"
|
||||||
" --sampling-rate=NUM fraction of transactions to log (e.g., 0.01 for 1%%)\n"
|
" --sampling-rate=NUM fraction of transactions to log (e.g., 0.01 for 1%%)\n"
|
||||||
"\nCommon options:\n"
|
"\nCommon options:\n"
|
||||||
" -d, --debug print debugging output\n"
|
" -d, --debug print debugging output\n"
|
||||||
|
@ -4664,6 +4668,49 @@ printResults(TState *threads, StatsData *total, instr_time total_time,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* call srandom based on some seed. NULL triggers the default behavior. */
|
||||||
|
static void
|
||||||
|
set_random_seed(const char *seed, const char *origin)
|
||||||
|
{
|
||||||
|
/* srandom expects an unsigned int */
|
||||||
|
unsigned int iseed;
|
||||||
|
|
||||||
|
if (seed == NULL || strcmp(seed, "time") == 0)
|
||||||
|
{
|
||||||
|
/* rely on current time */
|
||||||
|
instr_time now;
|
||||||
|
INSTR_TIME_SET_CURRENT(now);
|
||||||
|
iseed = (unsigned int) INSTR_TIME_GET_MICROSEC(now);
|
||||||
|
}
|
||||||
|
else if (strcmp(seed, "rand") == 0)
|
||||||
|
{
|
||||||
|
/* use some "strong" random source */
|
||||||
|
if (!pg_strong_random(&iseed, sizeof(iseed)))
|
||||||
|
{
|
||||||
|
fprintf(stderr, "cannot seed random from a strong source\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* parse seed unsigned int value */
|
||||||
|
char garbage;
|
||||||
|
if (sscanf(seed, "%u%c", &iseed, &garbage) != 1)
|
||||||
|
{
|
||||||
|
fprintf(stderr,
|
||||||
|
"error while scanning '%s' from %s, expecting an unsigned integer, 'time' or 'rand'\n",
|
||||||
|
seed, origin);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (seed != NULL)
|
||||||
|
fprintf(stderr, "setting random seed to %u\n", iseed);
|
||||||
|
srandom(iseed);
|
||||||
|
/* no precision loss: 32 bit unsigned int cast to 64 bit int */
|
||||||
|
random_seed = iseed;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
main(int argc, char **argv)
|
main(int argc, char **argv)
|
||||||
|
@ -4706,6 +4753,7 @@ main(int argc, char **argv)
|
||||||
{"progress-timestamp", no_argument, NULL, 6},
|
{"progress-timestamp", no_argument, NULL, 6},
|
||||||
{"log-prefix", required_argument, NULL, 7},
|
{"log-prefix", required_argument, NULL, 7},
|
||||||
{"foreign-keys", no_argument, NULL, 8},
|
{"foreign-keys", no_argument, NULL, 8},
|
||||||
|
{"random-seed", required_argument, NULL, 9},
|
||||||
{NULL, 0, NULL, 0}
|
{NULL, 0, NULL, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -4774,6 +4822,9 @@ main(int argc, char **argv)
|
||||||
state = (CState *) pg_malloc(sizeof(CState));
|
state = (CState *) pg_malloc(sizeof(CState));
|
||||||
memset(state, 0, sizeof(CState));
|
memset(state, 0, sizeof(CState));
|
||||||
|
|
||||||
|
/* set random seed early, because it may be used while parsing scripts. */
|
||||||
|
set_random_seed(getenv("PGBENCH_RANDOM_SEED"), "PGBENCH_RANDOM_SEED environment variable");
|
||||||
|
|
||||||
while ((c = getopt_long(argc, argv, "iI:h:nvp:dqb:SNc:j:Crs:t:T:U:lf:D:F:M:P:R:L:", long_options, &optindex)) != -1)
|
while ((c = getopt_long(argc, argv, "iI:h:nvp:dqb:SNc:j:Crs:t:T:U:lf:D:F:M:P:R:L:", long_options, &optindex)) != -1)
|
||||||
{
|
{
|
||||||
char *script;
|
char *script;
|
||||||
|
@ -5046,6 +5097,10 @@ main(int argc, char **argv)
|
||||||
initialization_option_set = true;
|
initialization_option_set = true;
|
||||||
foreign_keys = true;
|
foreign_keys = true;
|
||||||
break;
|
break;
|
||||||
|
case 9: /* random-seed */
|
||||||
|
benchmarking_option_set = true;
|
||||||
|
set_random_seed(optarg, "--random-seed option");
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
|
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
|
||||||
exit(1);
|
exit(1);
|
||||||
|
@ -5280,10 +5335,6 @@ main(int argc, char **argv)
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* set random seed */
|
|
||||||
INSTR_TIME_SET_CURRENT(start_time);
|
|
||||||
srandom((unsigned int) INSTR_TIME_GET_MICROSEC(start_time));
|
|
||||||
|
|
||||||
if (internal_script_used)
|
if (internal_script_used)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -5339,10 +5390,8 @@ main(int argc, char **argv)
|
||||||
if (lookupVariable(&state[0], "client_id") == NULL)
|
if (lookupVariable(&state[0], "client_id") == NULL)
|
||||||
{
|
{
|
||||||
for (i = 0; i < nclients; i++)
|
for (i = 0; i < nclients; i++)
|
||||||
{
|
|
||||||
if (!putVariableInt(&state[i], "startup", "client_id", i))
|
if (!putVariableInt(&state[i], "startup", "client_id", i))
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* set default seed for hash functions */
|
/* set default seed for hash functions */
|
||||||
|
@ -5358,6 +5407,14 @@ main(int argc, char **argv)
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* set random seed unless overwritten */
|
||||||
|
if (lookupVariable(&state[0], "random_seed") == NULL)
|
||||||
|
{
|
||||||
|
for (i = 0; i < nclients; i++)
|
||||||
|
if (!putVariableInt(&state[i], "startup", "random_seed", random_seed))
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
if (!is_no_vacuum)
|
if (!is_no_vacuum)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "starting vacuum...");
|
fprintf(stderr, "starting vacuum...");
|
||||||
|
|
|
@ -29,6 +29,12 @@ sub pgbench
|
||||||
$filename =~ s/\@\d+$//;
|
$filename =~ s/\@\d+$//;
|
||||||
|
|
||||||
#push @filenames, $filename;
|
#push @filenames, $filename;
|
||||||
|
# filenames are expected to be unique on a test
|
||||||
|
if (-e $filename)
|
||||||
|
{
|
||||||
|
ok(0, "$filename must not already exists");
|
||||||
|
unlink $filename or die "cannot unlink $filename: $!";
|
||||||
|
}
|
||||||
append_to_file($filename, $$files{$fn});
|
append_to_file($filename, $$files{$fn});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -210,14 +216,18 @@ COMMIT;
|
||||||
} });
|
} });
|
||||||
|
|
||||||
# test expressions
|
# test expressions
|
||||||
|
# command 1..3 and 23 depend on random seed which is used to call srandom.
|
||||||
pgbench(
|
pgbench(
|
||||||
'-t 1 -Dfoo=-10.1 -Dbla=false -Di=+3 -Dminint=-9223372036854775808 -Dn=null -Dt=t -Df=of -Dd=1.0',
|
'--random-seed=5432 -t 1 -Dfoo=-10.1 -Dbla=false -Di=+3 -Dminint=-9223372036854775808 -Dn=null -Dt=t -Df=of -Dd=1.0',
|
||||||
0,
|
0,
|
||||||
[ qr{type: .*/001_pgbench_expressions}, qr{processed: 1/1} ],
|
[ qr{type: .*/001_pgbench_expressions}, qr{processed: 1/1} ],
|
||||||
[ qr{command=1.: int 1\d\b},
|
[ qr{setting random seed to 5432\b},
|
||||||
qr{command=2.: int 1\d\d\b},
|
# After explicit seeding, the four * random checks (1-3,20) should be
|
||||||
qr{command=3.: int 1\d\d\d\b},
|
# deterministic, but not necessarily portable.
|
||||||
qr{command=4.: int 4\b},
|
qr{command=1.: int 1\d\b}, # uniform random: 12 on linux
|
||||||
|
qr{command=2.: int 1\d\d\b}, # exponential random: 106 on linux
|
||||||
|
qr{command=3.: int 1\d\d\d\b}, # gaussian random: 1462 on linux
|
||||||
|
qr{command=4.: int 4\b},
|
||||||
qr{command=5.: int 5\b},
|
qr{command=5.: int 5\b},
|
||||||
qr{command=6.: int 6\b},
|
qr{command=6.: int 6\b},
|
||||||
qr{command=7.: int 7\b},
|
qr{command=7.: int 7\b},
|
||||||
|
@ -230,7 +240,7 @@ pgbench(
|
||||||
qr{command=16.: double 16\b},
|
qr{command=16.: double 16\b},
|
||||||
qr{command=17.: double 17\b},
|
qr{command=17.: double 17\b},
|
||||||
qr{command=18.: int 9223372036854775807\b},
|
qr{command=18.: int 9223372036854775807\b},
|
||||||
qr{command=20.: int [1-9]\b},
|
qr{command=20.: int \d\b}, # zipfian random: 1 on linux
|
||||||
qr{command=21.: double -27\b},
|
qr{command=21.: double -27\b},
|
||||||
qr{command=22.: double 1024\b},
|
qr{command=22.: double 1024\b},
|
||||||
qr{command=23.: double 1\b},
|
qr{command=23.: double 1\b},
|
||||||
|
@ -270,6 +280,9 @@ pgbench(
|
||||||
qr{command=86.: int 86\b},
|
qr{command=86.: int 86\b},
|
||||||
qr{command=93.: int 93\b},
|
qr{command=93.: int 93\b},
|
||||||
qr{command=95.: int 0\b},
|
qr{command=95.: int 0\b},
|
||||||
|
qr{command=96.: int 1\b}, # :scale
|
||||||
|
qr{command=97.: int 0\b}, # :client_id
|
||||||
|
qr{command=98.: int 5432\b}, # :random_seed
|
||||||
],
|
],
|
||||||
'pgbench expressions',
|
'pgbench expressions',
|
||||||
{ '001_pgbench_expressions' => q{-- integer functions
|
{ '001_pgbench_expressions' => q{-- integer functions
|
||||||
|
@ -390,8 +403,52 @@ SELECT :v0, :v1, :v2, :v3;
|
||||||
\endif
|
\endif
|
||||||
-- must be zero if false branches where skipped
|
-- must be zero if false branches where skipped
|
||||||
\set nope debug(:nope)
|
\set nope debug(:nope)
|
||||||
|
-- check automatic variables
|
||||||
|
\set sc debug(:scale)
|
||||||
|
\set ci debug(:client_id)
|
||||||
|
\set rs debug(:random_seed)
|
||||||
} });
|
} });
|
||||||
|
|
||||||
|
# random determinism when seeded
|
||||||
|
$node->safe_psql('postgres',
|
||||||
|
'CREATE UNLOGGED TABLE seeded_random(seed INT8 NOT NULL, rand TEXT NOT NULL, val INTEGER NOT NULL);');
|
||||||
|
|
||||||
|
# same value to check for determinism
|
||||||
|
my $seed = int(rand(1000000000));
|
||||||
|
for my $i (1, 2)
|
||||||
|
{
|
||||||
|
pgbench("--random-seed=$seed -t 1",
|
||||||
|
0,
|
||||||
|
[qr{processed: 1/1}],
|
||||||
|
[qr{setting random seed to $seed\b}],
|
||||||
|
"random seeded with $seed",
|
||||||
|
{ "001_pgbench_random_seed_$i" => q{-- test random functions
|
||||||
|
\set ur random(1000, 1999)
|
||||||
|
\set er random_exponential(2000, 2999, 2.0)
|
||||||
|
\set gr random_gaussian(3000, 3999, 3.0)
|
||||||
|
\set zr random_zipfian(4000, 4999, 2.5)
|
||||||
|
INSERT INTO seeded_random(seed, rand, val) VALUES
|
||||||
|
(:random_seed, 'uniform', :ur),
|
||||||
|
(:random_seed, 'exponential', :er),
|
||||||
|
(:random_seed, 'gaussian', :gr),
|
||||||
|
(:random_seed, 'zipfian', :zr);
|
||||||
|
} });
|
||||||
|
}
|
||||||
|
|
||||||
|
# check that all runs generated the same 4 values
|
||||||
|
my ($ret, $out, $err) =
|
||||||
|
$node->psql('postgres',
|
||||||
|
'SELECT seed, rand, val, COUNT(*) FROM seeded_random GROUP BY seed, rand, val');
|
||||||
|
|
||||||
|
ok($ret == 0, "psql seeded_random count ok");
|
||||||
|
ok($err eq '', "psql seeded_random count stderr is empty");
|
||||||
|
ok($out =~ /\b$seed\|uniform\|1\d\d\d\|2/, "psql seeded_random count uniform");
|
||||||
|
ok($out =~ /\b$seed\|exponential\|2\d\d\d\|2/, "psql seeded_random count exponential");
|
||||||
|
ok($out =~ /\b$seed\|gaussian\|3\d\d\d\|2/, "psql seeded_random count gaussian");
|
||||||
|
ok($out =~ /\b$seed\|zipfian\|4\d\d\d\|2/, "psql seeded_random count zipfian");
|
||||||
|
|
||||||
|
$node->safe_psql('postgres', 'DROP TABLE seeded_random;');
|
||||||
|
|
||||||
# backslash commands
|
# backslash commands
|
||||||
pgbench(
|
pgbench(
|
||||||
'-t 1', 0,
|
'-t 1', 0,
|
||||||
|
|
|
@ -110,6 +110,8 @@ my @options = (
|
||||||
[ 'invalid init step', '-i -I dta',
|
[ 'invalid init step', '-i -I dta',
|
||||||
[qr{unrecognized initialization step},
|
[qr{unrecognized initialization step},
|
||||||
qr{allowed steps are} ] ],
|
qr{allowed steps are} ] ],
|
||||||
|
[ 'bad random seed', '--random-seed=one',
|
||||||
|
[qr{error while scanning 'one' from --random-seed option, expecting an unsigned integer} ] ],
|
||||||
|
|
||||||
# loging sub-options
|
# loging sub-options
|
||||||
[ 'sampling => log', '--sampling-rate=0.01',
|
[ 'sampling => log', '--sampling-rate=0.01',
|
||||||
|
|
Loading…
Reference in New Issue