Add --sampling-rate option to pgbench.

This allows logging only some fraction of transactions, greatly reducing
the amount of log generated.

Tomas Vondra, reviewed by Robert Haas and Jeff Janes.
This commit is contained in:
Heikki Linnakangas 2012-10-03 15:37:42 +03:00
parent 7ae1815961
commit e1be1df51f
2 changed files with 67 additions and 11 deletions

View File

@ -129,6 +129,11 @@ int foreign_keys = 0;
*/
int unlogged_tables = 0;
/*
* log sampling rate (1.0 = log everything, 0.0 = option not given)
*/
double sample_rate = 0.0;
/*
* tablespace selection
*/
@ -370,6 +375,8 @@ usage(void)
" -f FILENAME read transaction script from FILENAME\n"
" -j NUM number of threads (default: 1)\n"
" -l write transaction times to log file\n"
" --sampling-rate NUM\n"
" fraction of transactions to log (e.g. 0.01 for 1%% sample)\n"
" -M simple|extended|prepared\n"
" protocol for submitting queries to server (default: simple)\n"
" -n do not run VACUUM before tests\n"
@ -883,21 +890,30 @@ top:
instr_time diff;
double usec;
INSTR_TIME_SET_CURRENT(now);
diff = now;
INSTR_TIME_SUBTRACT(diff, st->txn_begin);
usec = (double) INSTR_TIME_GET_MICROSEC(diff);
/*
* write the log entry if this row belongs to the random sample,
* or no sampling rate was given which means log everything.
*/
if (sample_rate == 0.0 ||
pg_erand48(thread->random_state) <= sample_rate)
{
INSTR_TIME_SET_CURRENT(now);
diff = now;
INSTR_TIME_SUBTRACT(diff, st->txn_begin);
usec = (double) INSTR_TIME_GET_MICROSEC(diff);
#ifndef WIN32
/* This is more than we really ought to know about instr_time */
fprintf(logfile, "%d %d %.0f %d %ld %ld\n",
st->id, st->cnt, usec, st->use_file,
(long) now.tv_sec, (long) now.tv_usec);
/* This is more than we really ought to know about instr_time */
fprintf(logfile, "%d %d %.0f %d %ld %ld\n",
st->id, st->cnt, usec, st->use_file,
(long) now.tv_sec, (long) now.tv_usec);
#else
/* On Windows, instr_time doesn't provide a timestamp anyway */
fprintf(logfile, "%d %d %.0f %d 0 0\n",
st->id, st->cnt, usec, st->use_file);
/* On Windows, instr_time doesn't provide a timestamp anyway */
fprintf(logfile, "%d %d %.0f %d 0 0\n",
st->id, st->cnt, usec, st->use_file);
#endif
}
}
if (commands[st->state]->type == SQL_COMMAND)
@ -1926,6 +1942,7 @@ main(int argc, char **argv)
{"index-tablespace", required_argument, NULL, 3},
{"tablespace", required_argument, NULL, 2},
{"unlogged-tables", no_argument, &unlogged_tables, 1},
{"sampling-rate", required_argument, NULL, 4},
{NULL, 0, NULL, 0}
};
@ -2131,6 +2148,14 @@ main(int argc, char **argv)
case 3: /* index-tablespace */
index_tablespace = optarg;
break;
case 4:
sample_rate = atof(optarg);
if (sample_rate <= 0.0 || sample_rate > 1.0)
{
fprintf(stderr, "invalid sampling rate: %f\n", sample_rate);
exit(1);
}
break;
default:
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
@ -2166,6 +2191,13 @@ main(int argc, char **argv)
exit(1);
}
/* --sampling-rate may be used only with -l */
if (sample_rate > 0.0 && !use_log)
{
fprintf(stderr, "log sampling rate is allowed only when logging transactions (-l) \n");
exit(1);
}
/*
* is_latencies only works with multiple threads in thread-based
* implementations, not fork-based ones, because it supposes that the

View File

@ -316,6 +316,24 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--sampling-rate</option> <replaceable>rate</></term>
<listitem>
<para>
Sampling rate, used when writing data into the log, to reduce the
amount of log generated. If this option is given, only the specified
fraction of transactions are logged. 1.0 means all transactions will
be logged, 0.05 means only 5% of the transactions will be logged.
</para>
<para>
Remember to take the sampling rate into account when processing the
log file. For example, when computing tps values, you need to multiply
the numbers accordingly (e.g. with 0.01 sample rate, you'll only get
1/100 of the actual tps).
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-M</option> <replaceable>querymode</></term>
<listitem>
@ -750,6 +768,12 @@ END;
0 201 2513 0 1175850569 608
0 202 2038 0 1175850569 2663
</screen></para>
<para>
When running a long test on hardware that can handle a lot of transactions,
the log files can become very large. The <option>--sampling-rate</> option
can be used to log only a random sample of transactions.
</para>
</refsect2>
<refsect2>