Add the ability to compute per-statement latencies (ie, average execution

times) to pgbench.

Florian Pflug, reviewed by Greg Smith
This commit is contained in:
Tom Lane 2010-08-12 20:39:39 +00:00
parent 47eeb5e662
commit 5a4e19abe6
2 changed files with 256 additions and 66 deletions

View File

@ -4,7 +4,7 @@
* A simple benchmark program for PostgreSQL
* Originally written by Tatsuo Ishii and enhanced by many contributors.
*
* $PostgreSQL: pgsql/contrib/pgbench/pgbench.c,v 1.99 2010/07/06 19:18:55 momjian Exp $
* $PostgreSQL: pgsql/contrib/pgbench/pgbench.c,v 1.100 2010/08/12 20:39:39 tgl Exp $
* Copyright (c) 2000-2010, PostgreSQL Global Development Group
* ALL RIGHTS RESERVED;
*
@ -133,6 +133,7 @@ int fillfactor = 100;
bool use_log; /* log transaction latencies to a file */
bool is_connect; /* establish connection for each transaction */
bool is_latencies; /* report per-command latencies */
int main_pid; /* main process id used in log filename */
char *pghost = "";
@ -171,7 +172,8 @@ typedef struct
int64 until; /* napping until (usec) */
Variable *variables; /* array of variable definitions */
int nvariables;
instr_time txn_begin; /* used for measuring latencies */
instr_time txn_begin; /* used for measuring transaction latencies */
instr_time stmt_begin; /* used for measuring statement latencies */
int use_file; /* index in sql_files for this client */
bool prepared[MAX_FILES];
} CState;
@ -186,6 +188,8 @@ typedef struct
CState *state; /* array of CState */
int nstate; /* length of state[] */
instr_time start_time; /* thread start time */
instr_time *exec_elapsed; /* time spent executing cmds (per Command) */
int *exec_count; /* number of cmd executions (per Command) */
} TState;
#define INVALID_THREAD ((pthread_t) 0)
@ -216,13 +220,16 @@ static const char *QUERYMODE[] = {"simple", "extended", "prepared"};
typedef struct
{
char *line; /* full text of command line */
int command_num; /* unique index of this Command struct */
int type; /* command type (SQL_COMMAND or META_COMMAND) */
int argc; /* number of commands */
char *argv[MAX_ARGS]; /* command list */
int argc; /* number of command words */
char *argv[MAX_ARGS]; /* command word list */
} Command;
static Command **sql_files[MAX_FILES]; /* SQL script files */
static int num_files; /* number of script files */
static int num_commands = 0; /* total number of Command structs */
static int debug = 0; /* debug flag */
/* default scenario */
@ -287,6 +294,7 @@ usage(const char *progname)
" define variable for use by custom script\n"
" -f FILENAME read transaction script from FILENAME\n"
" -j NUM number of threads (default: 1)\n"
" -r report average latency per command\n"
" -l write transaction times to log file\n"
" -M {simple|extended|prepared}\n"
" protocol for submitting queries to server (default: simple)\n"
@ -629,11 +637,13 @@ runShellCommand(CState *st, char *variable, char **argv, int argc)
char *endptr;
int retval;
/*
* Join arguments with whilespace separaters. Arguments starting with
* exactly one colon are treated as variables: name - append a string
* "name" :var - append a variable named 'var'. ::name - append a string
* ":name"
/*----------
* Join arguments with whitespace separators. Arguments starting with
* exactly one colon are treated as variables:
* name - append a string "name"
* :var - append a variable named 'var'
* ::name - append a string ":name"
*----------
*/
for (i = 0; i < argc; i++)
{
@ -740,7 +750,7 @@ clientDone(CState *st, bool ok)
/* return false iff client should be disconnected */
static bool
doCustom(CState *st, instr_time *conn_time, FILE *logfile)
doCustom(TState *thread, CState *st, instr_time *conn_time, FILE *logfile)
{
PGresult *res;
Command **commands;
@ -775,7 +785,22 @@ top:
}
/*
* transaction finished: record the time it took in the log
* command finished: accumulate per-command execution times in
* thread-local data structure, if per-command latencies are requested
*/
if (is_latencies)
{
instr_time now;
int cnum = commands[st->state]->command_num;
INSTR_TIME_SET_CURRENT(now);
INSTR_TIME_ACCUM_DIFF(thread->exec_elapsed[cnum],
now, st->stmt_begin);
thread->exec_count[cnum]++;
}
/*
* if transaction finished, record the time it took in the log
*/
if (logfile && commands[st->state + 1] == NULL)
{
@ -802,6 +827,10 @@ top:
if (commands[st->state]->type == SQL_COMMAND)
{
/*
* Read and discard the query result; note this is not included
* in the statement latency numbers.
*/
res = PQgetResult(st->con);
switch (PQresultStatus(res))
{
@ -856,9 +885,14 @@ top:
INSTR_TIME_ACCUM_DIFF(*conn_time, end, start);
}
/* Record transaction start time if logging is enabled */
if (logfile && st->state == 0)
INSTR_TIME_SET_CURRENT(st->txn_begin);
/* Record statement start time if per-command latencies are requested */
if (is_latencies)
INSTR_TIME_SET_CURRENT(st->stmt_begin);
if (commands[st->state]->type == SQL_COMMAND)
{
const Command *command = commands[st->state];
@ -1351,6 +1385,7 @@ parseQuery(Command *cmd, const char *raw_sql)
return true;
}
/* Parse a command; return a Command struct, or NULL if it's a comment */
static Command *
process_commands(char *buf)
{
@ -1361,24 +1396,28 @@ process_commands(char *buf)
char *p,
*tok;
/* Make the string buf end at the next newline */
if ((p = strchr(buf, '\n')) != NULL)
*p = '\0';
/* Skip leading whitespace */
p = buf;
while (isspace((unsigned char) *p))
p++;
/* If the line is empty or actually a comment, we're done */
if (*p == '\0' || strncmp(p, "--", 2) == 0)
{
return NULL;
}
/* Allocate and initialize Command structure */
my_commands = (Command *) malloc(sizeof(Command));
if (my_commands == NULL)
{
return NULL;
}
my_commands->line = strdup(buf);
if (my_commands->line == NULL)
return NULL;
my_commands->command_num = num_commands++;
my_commands->type = 0; /* until set */
my_commands->argc = 0;
if (*p == '\\')
@ -1547,26 +1586,13 @@ process_file(char *filename)
while (fgets(buf, sizeof(buf), fd) != NULL)
{
Command *commands;
int i;
Command *command;
i = 0;
while (isspace((unsigned char) buf[i]))
i++;
if (buf[i] != '\0' && strncmp(&buf[i], "--", 2) != 0)
{
commands = process_commands(&buf[i]);
if (commands == NULL)
{
fclose(fd);
return false;
}
}
else
command = process_commands(buf);
if (command == NULL)
continue;
my_commands[lineno] = commands;
my_commands[lineno] = command;
lineno++;
if (lineno >= alloc_num)
@ -1612,7 +1638,7 @@ process_builtin(char *tb)
for (;;)
{
char *p;
Command *commands;
Command *command;
p = buf;
while (*tb && *tb != '\n')
@ -1626,13 +1652,11 @@ process_builtin(char *tb)
*p = '\0';
commands = process_commands(buf);
if (commands == NULL)
{
return NULL;
}
command = process_commands(buf);
if (command == NULL)
continue;
my_commands[lineno] = commands;
my_commands[lineno] = command;
lineno++;
if (lineno >= alloc_num)
@ -1653,7 +1677,8 @@ process_builtin(char *tb)
/* print out results */
static void
printResults(int ttype, int normal_xacts, int nclients, int nthreads,
printResults(int ttype, int normal_xacts, int nclients,
TState *threads, int nthreads,
instr_time total_time, instr_time conn_total_time)
{
double time_include,
@ -1694,6 +1719,51 @@ printResults(int ttype, int normal_xacts, int nclients, int nthreads,
}
printf("tps = %f (including connections establishing)\n", tps_include);
printf("tps = %f (excluding connections establishing)\n", tps_exclude);
/* Report per-command latencies */
if (is_latencies)
{
int i;
for (i = 0; i < num_files; i++)
{
Command **commands;
if (num_files > 1)
printf("statement latencies in milliseconds, file %d:\n", i+1);
else
printf("statement latencies in milliseconds:\n");
for (commands = sql_files[i]; *commands != NULL; commands++)
{
Command *command = *commands;
int cnum = command->command_num;
double total_time;
instr_time total_exec_elapsed;
int total_exec_count;
int t;
/* Accumulate per-thread data for command */
INSTR_TIME_SET_ZERO(total_exec_elapsed);
total_exec_count = 0;
for (t = 0; t < nthreads; t++)
{
TState *thread = &threads[t];
INSTR_TIME_ADD(total_exec_elapsed,
thread->exec_elapsed[cnum]);
total_exec_count += thread->exec_count[cnum];
}
if (total_exec_count > 0)
total_time = INSTR_TIME_GET_MILLISEC(total_exec_elapsed) / (double) total_exec_count;
else
total_time = 0.0;
printf("\t%f\t%s\n", total_time, command->line);
}
}
}
}
@ -1770,7 +1840,7 @@ main(int argc, char **argv)
memset(state, 0, sizeof(*state));
while ((c = getopt(argc, argv, "ih:nvp:dSNc:Cs:t:T:U:lf:D:F:M:j:")) != -1)
while ((c = getopt(argc, argv, "ih:nvp:dSNc:j:Crs:t:T:U:lf:D:F:M:")) != -1)
{
switch (c)
{
@ -1834,6 +1904,9 @@ main(int argc, char **argv)
case 'C':
is_connect = true;
break;
case 'r':
is_latencies = true;
break;
case 's':
scale_given = true;
scale = atoi(optarg);
@ -1954,6 +2027,22 @@ main(int argc, char **argv)
exit(1);
}
/*
* is_latencies only works with multiple threads in thread-based
* implementations, not fork-based ones, because it supposes that the
* parent can see changes made to the per-thread execution stats by child
* threads. It seems useful enough to accept despite this limitation,
* but perhaps we should FIXME someday (by passing the stats data back
* up through the parent-to-child pipes).
*/
#ifndef ENABLE_THREAD_SAFETY
if (is_latencies && nthreads > 1)
{
fprintf(stderr, "-r does not work with -j larger than 1 on this platform.\n");
exit(1);
}
#endif
/*
* save main process id in the global variable because process id will be
* changed after fork.
@ -2091,6 +2180,39 @@ main(int argc, char **argv)
break;
}
/* set up thread data structures */
threads = (TState *) malloc(sizeof(TState) * nthreads);
for (i = 0; i < nthreads; i++)
{
TState *thread = &threads[i];
thread->tid = i;
thread->state = &state[nclients / nthreads * i];
thread->nstate = nclients / nthreads;
if (is_latencies)
{
/* Reserve memory for the thread to store per-command latencies */
int t;
thread->exec_elapsed = (instr_time *)
malloc(sizeof(instr_time) * num_commands);
thread->exec_count = (int *)
malloc(sizeof(int) * num_commands);
for (t = 0; t < num_commands; t++)
{
INSTR_TIME_SET_ZERO(thread->exec_elapsed[t]);
thread->exec_count[t] = 0;
}
}
else
{
thread->exec_elapsed = NULL;
thread->exec_count = NULL;
}
}
/* get start up time */
INSTR_TIME_SET_CURRENT(start_time);
@ -2099,20 +2221,18 @@ main(int argc, char **argv)
setalarm(duration);
/* start threads */
threads = (TState *) malloc(sizeof(TState) * nthreads);
for (i = 0; i < nthreads; i++)
{
threads[i].tid = i;
threads[i].state = &state[nclients / nthreads * i];
threads[i].nstate = nclients / nthreads;
INSTR_TIME_SET_CURRENT(threads[i].start_time);
TState *thread = &threads[i];
INSTR_TIME_SET_CURRENT(thread->start_time);
/* the first thread (i = 0) is executed by main thread */
if (i > 0)
{
int err = pthread_create(&threads[i].thread, NULL, threadRun, &threads[i]);
int err = pthread_create(&thread->thread, NULL, threadRun, thread);
if (err != 0 || threads[i].thread == INVALID_THREAD)
if (err != 0 || thread->thread == INVALID_THREAD)
{
fprintf(stderr, "cannot create thread: %s\n", strerror(err));
exit(1);
@ -2120,7 +2240,7 @@ main(int argc, char **argv)
}
else
{
threads[i].thread = INVALID_THREAD;
thread->thread = INVALID_THREAD;
}
}
@ -2150,7 +2270,8 @@ main(int argc, char **argv)
/* get end time */
INSTR_TIME_SET_CURRENT(total_time);
INSTR_TIME_SUBTRACT(total_time, start_time);
printResults(ttype, total_xacts, nclients, nthreads, total_time, conn_total_time);
printResults(ttype, total_xacts, nclients, threads, nthreads,
total_time, conn_total_time);
return 0;
}
@ -2211,7 +2332,7 @@ threadRun(void *arg)
int prev_ecnt = st->ecnt;
st->use_file = getrand(0, num_files - 1);
if (!doCustom(st, &result->conn_time, logfile))
if (!doCustom(thread, st, &result->conn_time, logfile))
remains--; /* I've aborted */
if (st->ecnt > prev_ecnt && commands[st->state]->type == META_COMMAND)
@ -2313,7 +2434,7 @@ threadRun(void *arg)
if (st->con && (FD_ISSET(PQsocket(st->con), &input_mask)
|| commands[st->state]->type == META_COMMAND))
{
if (!doCustom(st, &result->conn_time, logfile))
if (!doCustom(thread, st, &result->conn_time, logfile))
remains--; /* I've aborted */
}

View File

@ -1,4 +1,4 @@
<!-- $PostgreSQL: pgsql/doc/src/sgml/pgbench.sgml,v 1.17 2010/07/29 19:34:40 petere Exp $ -->
<!-- $PostgreSQL: pgsql/doc/src/sgml/pgbench.sgml,v 1.18 2010/08/12 20:39:39 tgl Exp $ -->
<sect1 id="pgbench">
<title>pgbench</title>
@ -38,7 +38,9 @@ tps = 85.296346 (excluding connections establishing)
settings. The next line reports the number of transactions completed
and intended (the latter being just the product of number of clients
and number of transactions per client); these will be equal unless the run
failed before completion. The last two lines report the TPS rate,
failed before completion. (In <literal>-T</> mode, only the actual
number of transactions is printed.)
The last two lines report the TPS rate,
figured with and without counting the time to start database sessions.
</para>
@ -125,6 +127,15 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
<variablelist>
<varlistentry>
<term><option>-i</option></term>
<listitem>
<para>
Required to invoke initialization mode.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-F</option> <replaceable>fillfactor</></term>
<listitem>
@ -137,15 +148,6 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-i</option></term>
<listitem>
<para>
Required to invoke initialization mode.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-s</option> <replaceable>scale_factor</></term>
<listitem>
@ -294,6 +296,17 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-r</option></term>
<listitem>
<para>
Report the average per-statement latency (execution time from the
perspective of the client) of each command after the benchmark
finishes. See below for details.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-s</option> <replaceable>scale_factor</></term>
<listitem>
@ -618,7 +631,7 @@ END;
<replaceable>nnn</> is the PID of the pgbench process.
If the <literal>-j</> option is 2 or higher, creating multiple worker
threads, each will have its own log file. The first worker will use the
same name for its log file as in the standard single worker case.
same name for its log file as in the standard single worker case.
The additional log files for the other workers will be named
<filename>pgbench_log.<replaceable>nnn</>.<replaceable>mmm</></filename>,
where <replaceable>mmm</> is a sequential number for each worker starting
@ -632,7 +645,7 @@ END;
<replaceable>client_id</> <replaceable>transaction_no</> <replaceable>time</> <replaceable>file_no</> <replaceable>time_epoch</> <replaceable>time_us</>
</synopsis>
where <replaceable>time</> is the elapsed transaction time in microseconds,
where <replaceable>time</> is the total elapsed transaction time in microseconds,
<replaceable>file_no</> identifies which script file was used
(useful when multiple scripts were specified with <literal>-f</>),
and <replaceable>time_epoch</>/<replaceable>time_us</> are a
@ -653,6 +666,62 @@ END;
</para>
</sect2>
<sect2>
<title>Per-statement latencies</title>
<para>
With the <literal>-r</> option, <application>pgbench</> collects
the elapsed transaction time of each statement executed by every
client. It then reports an average of those values, referred to
as the latency for each statement, after the benchmark has finished.
</para>
<para>
For the default script, the output will look similar to this:
<screen>
starting vacuum...end.
transaction type: TPC-B (sort of)
scaling factor: 1
query mode: simple
number of clients: 10
number of threads: 1
number of transactions per client: 1000
number of transactions actually processed: 10000/10000
tps = 618.764555 (including connections establishing)
tps = 622.977698 (excluding connections establishing)
statement latencies in milliseconds:
0.004386 \set nbranches 1 * :scale
0.001343 \set ntellers 10 * :scale
0.001212 \set naccounts 100000 * :scale
0.001310 \setrandom aid 1 :naccounts
0.001073 \setrandom bid 1 :nbranches
0.001005 \setrandom tid 1 :ntellers
0.001078 \setrandom delta -5000 5000
0.326152 BEGIN;
0.603376 UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;
0.454643 SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
5.528491 UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;
7.335435 UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;
0.371851 INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);
1.212976 END;
</screen>
</para>
<para>
If multiple script files are specified, the averages are reported
separately for each script file.
</para>
<para>
Note that collecting the additional timing information needed for
per-statement latency computation adds some overhead. This will slow
average execution speed and lower the computed TPS. The amount
of slowdown varies significantly depending on platform and hardware.
Comparing average TPS values with and without latency reporting enabled
is a good way to measure if the timing overhead is significant.
</para>
</sect2>
<sect2>
<title>Good Practices</title>