pgbench: Improve time logic.

Instead of instr_time (struct timespec) and the INSTR_XXX macros,
introduce pg_time_usec_t and use integer arithmetic.  Don't include the
connection time in TPS unless using -C mode, but report it separately.

Author: Fabien COELHO <coelho@cri.ensmp.fr>
Reviewed-by: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Reviewed-by: Hayato Kuroda <kuroda.hayato@fujitsu.com>
Discussion: https://postgr.es/m/20200227180100.zyvjwzcpiokfsqm2%40alap3.anarazel.de
This commit is contained in:
Thomas Munro 2021-03-10 16:09:50 +13:00
parent b1d6a8f868
commit 547f04e734
4 changed files with 232 additions and 245 deletions

View File

@ -58,8 +58,10 @@ number of clients: 10
number of threads: 1 number of threads: 1
number of transactions per client: 1000 number of transactions per client: 1000
number of transactions actually processed: 10000/10000 number of transactions actually processed: 10000/10000
tps = 85.184871 (including connections establishing) latency average = 11.013 ms
tps = 85.296346 (excluding connections establishing) latency stddev = 7.351 ms
initial connection time = 45.758 ms
tps = 896.967014 (without initial connection time)
</screen> </screen>
The first six lines report some of the most important parameter The first six lines report some of the most important parameter
@ -68,8 +70,7 @@ tps = 85.296346 (excluding connections establishing)
and number of transactions per client); these will be equal unless the run and number of transactions per client); these will be equal unless the run
failed before completion. (In <option>-T</option> mode, only the actual failed before completion. (In <option>-T</option> mode, only the actual
number of transactions is printed.) number of transactions is printed.)
The last two lines report the number of transactions per second, The last line reports the number of transactions per second.
figured with and without counting the time to start database sessions.
</para> </para>
<para> <para>
@ -2257,22 +2258,22 @@ number of clients: 10
number of threads: 1 number of threads: 1
number of transactions per client: 1000 number of transactions per client: 1000
number of transactions actually processed: 10000/10000 number of transactions actually processed: 10000/10000
latency average = 15.844 ms latency average = 10.870 ms
latency stddev = 2.715 ms latency stddev = 7.341 ms
tps = 618.764555 (including connections establishing) initial connection time = 30.954 ms
tps = 622.977698 (excluding connections establishing) tps = 907.949122 (without initial connection time)
statement latencies in milliseconds: statement latencies in milliseconds:
0.002 \set aid random(1, 100000 * :scale) 0.001 \set aid random(1, 100000 * :scale)
0.005 \set bid random(1, 1 * :scale) 0.001 \set bid random(1, 1 * :scale)
0.002 \set tid random(1, 10 * :scale) 0.001 \set tid random(1, 10 * :scale)
0.001 \set delta random(-5000, 5000) 0.000 \set delta random(-5000, 5000)
0.326 BEGIN; 0.046 BEGIN;
0.603 UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid; 0.151 UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;
0.454 SELECT abalance FROM pgbench_accounts WHERE aid = :aid; 0.107 SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
5.528 UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid; 4.241 UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;
7.335 UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid; 5.245 UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;
0.371 INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP); 0.102 INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);
1.212 END; 0.974 END;
</screen> </screen>
</para> </para>

View File

@ -307,7 +307,7 @@ typedef struct SimpleStats
*/ */
typedef struct StatsData typedef struct StatsData
{ {
time_t start_time; /* interval start time, for aggregates */ pg_time_usec_t start_time; /* interval start time, for aggregates */
int64 cnt; /* number of transactions, including skipped */ int64 cnt; /* number of transactions, including skipped */
int64 skipped; /* number of transactions skipped under --rate int64 skipped; /* number of transactions skipped under --rate
* and --latency-limit */ * and --latency-limit */
@ -432,11 +432,11 @@ typedef struct
int nvariables; /* number of variables */ int nvariables; /* number of variables */
bool vars_sorted; /* are variables sorted by name? */ bool vars_sorted; /* are variables sorted by name? */
/* various times about current transaction */ /* various times about current transaction in microseconds */
int64 txn_scheduled; /* scheduled start time of transaction (usec) */ pg_time_usec_t txn_scheduled; /* scheduled start time of transaction */
int64 sleep_until; /* scheduled start time of next cmd (usec) */ pg_time_usec_t sleep_until; /* scheduled start time of next cmd */
instr_time txn_begin; /* used for measuring schedule lag times */ pg_time_usec_t txn_begin; /* used for measuring schedule lag times */
instr_time stmt_begin; /* used for measuring statement latencies */ pg_time_usec_t stmt_begin; /* used for measuring statement latencies */
bool prepared[MAX_SCRIPTS]; /* whether client prepared the script */ bool prepared[MAX_SCRIPTS]; /* whether client prepared the script */
@ -466,11 +466,15 @@ typedef struct
int64 throttle_trigger; /* previous/next throttling (us) */ int64 throttle_trigger; /* previous/next throttling (us) */
FILE *logfile; /* where to log, or NULL */ FILE *logfile; /* where to log, or NULL */
/* per thread collected stats */ /* per thread collected stats in microseconds */
instr_time start_time; /* thread start time */ pg_time_usec_t create_time; /* thread creation time */
instr_time conn_time; pg_time_usec_t started_time; /* thread is running */
pg_time_usec_t bench_start; /* thread is benchmarking */
pg_time_usec_t conn_duration; /* cumulated connection and deconnection
* delays */
StatsData stats; StatsData stats;
int64 latency_late; /* executed but late transactions */ int64 latency_late; /* count executed but late transactions */
} TState; } TState;
/* /*
@ -610,10 +614,10 @@ static void setIntValue(PgBenchValue *pv, int64 ival);
static void setDoubleValue(PgBenchValue *pv, double dval); static void setDoubleValue(PgBenchValue *pv, double dval);
static bool evaluateExpr(CState *st, PgBenchExpr *expr, static bool evaluateExpr(CState *st, PgBenchExpr *expr,
PgBenchValue *retval); PgBenchValue *retval);
static ConnectionStateEnum executeMetaCommand(CState *st, instr_time *now); static ConnectionStateEnum executeMetaCommand(CState *st, pg_time_usec_t *now);
static void doLog(TState *thread, CState *st, static void doLog(TState *thread, CState *st,
StatsData *agg, bool skipped, double latency, double lag); StatsData *agg, bool skipped, double latency, double lag);
static void processXactStats(TState *thread, CState *st, instr_time *now, static void processXactStats(TState *thread, CState *st, pg_time_usec_t *now,
bool skipped, StatsData *agg); bool skipped, StatsData *agg);
static void addScript(ParsedScript script); static void addScript(ParsedScript script);
static THREAD_FUNC_RETURN_TYPE threadRun(void *arg); static THREAD_FUNC_RETURN_TYPE threadRun(void *arg);
@ -1117,9 +1121,9 @@ mergeSimpleStats(SimpleStats *acc, SimpleStats *ss)
* the given value. * the given value.
*/ */
static void static void
initStats(StatsData *sd, time_t start_time) initStats(StatsData *sd, pg_time_usec_t start)
{ {
sd->start_time = start_time; sd->start_time = start;
sd->cnt = 0; sd->cnt = 0;
sd->skipped = 0; sd->skipped = 0;
initSimpleStats(&sd->latency); initSimpleStats(&sd->latency);
@ -2898,7 +2902,6 @@ evaluateSleep(CState *st, int argc, char **argv, int *usecs)
static void static void
advanceConnectionState(TState *thread, CState *st, StatsData *agg) advanceConnectionState(TState *thread, CState *st, StatsData *agg)
{ {
instr_time now;
/* /*
* gettimeofday() isn't free, so we get the current timestamp lazily the * gettimeofday() isn't free, so we get the current timestamp lazily the
@ -2908,7 +2911,7 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
* means "not set yet". Reset "now" when we execute shell commands or * means "not set yet". Reset "now" when we execute shell commands or
* expressions, which might take a non-negligible amount of time, though. * expressions, which might take a non-negligible amount of time, though.
*/ */
INSTR_TIME_SET_ZERO(now); pg_time_usec_t now = 0;
/* /*
* Loop in the state machine, until we have to wait for a result from the * Loop in the state machine, until we have to wait for a result from the
@ -2943,29 +2946,30 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
/* Start new transaction (script) */ /* Start new transaction (script) */
case CSTATE_START_TX: case CSTATE_START_TX:
pg_time_now_lazy(&now);
/* establish connection if needed, i.e. under --connect */ /* establish connection if needed, i.e. under --connect */
if (st->con == NULL) if (st->con == NULL)
{ {
instr_time start; pg_time_usec_t start = now;
INSTR_TIME_SET_CURRENT_LAZY(now);
start = now;
if ((st->con = doConnect()) == NULL) if ((st->con = doConnect()) == NULL)
{ {
pg_log_error("client %d aborted while establishing connection", st->id); pg_log_error("client %d aborted while establishing connection", st->id);
st->state = CSTATE_ABORTED; st->state = CSTATE_ABORTED;
break; break;
} }
INSTR_TIME_SET_CURRENT(now);
INSTR_TIME_ACCUM_DIFF(thread->conn_time, now, start); /* reset now after connection */
now = pg_time_now();
thread->conn_duration += now - start;
/* Reset session-local state */ /* Reset session-local state */
memset(st->prepared, 0, sizeof(st->prepared)); memset(st->prepared, 0, sizeof(st->prepared));
} }
/* record transaction start time */ /* record transaction start time */
INSTR_TIME_SET_CURRENT_LAZY(now);
st->txn_begin = now; st->txn_begin = now;
/* /*
@ -2973,7 +2977,7 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
* scheduled start time. * scheduled start time.
*/ */
if (!throttle_delay) if (!throttle_delay)
st->txn_scheduled = INSTR_TIME_GET_MICROSEC(now); st->txn_scheduled = now;
/* Begin with the first command */ /* Begin with the first command */
st->state = CSTATE_START_COMMAND; st->state = CSTATE_START_COMMAND;
@ -3009,12 +3013,9 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
*/ */
if (latency_limit) if (latency_limit)
{ {
int64 now_us; pg_time_now_lazy(&now);
INSTR_TIME_SET_CURRENT_LAZY(now); while (thread->throttle_trigger < now - latency_limit &&
now_us = INSTR_TIME_GET_MICROSEC(now);
while (thread->throttle_trigger < now_us - latency_limit &&
(nxacts <= 0 || st->cnt < nxacts)) (nxacts <= 0 || st->cnt < nxacts))
{ {
processXactStats(thread, st, &now, true, agg); processXactStats(thread, st, &now, true, agg);
@ -3047,9 +3048,9 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
* Wait until it's time to start next transaction. * Wait until it's time to start next transaction.
*/ */
case CSTATE_THROTTLE: case CSTATE_THROTTLE:
INSTR_TIME_SET_CURRENT_LAZY(now); pg_time_now_lazy(&now);
if (INSTR_TIME_GET_MICROSEC(now) < st->txn_scheduled) if (now < st->txn_scheduled)
return; /* still sleeping, nothing to do here */ return; /* still sleeping, nothing to do here */
/* done sleeping, but don't start transaction if we're done */ /* done sleeping, but don't start transaction if we're done */
@ -3072,7 +3073,7 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
/* record begin time of next command, and initiate it */ /* record begin time of next command, and initiate it */
if (report_per_command) if (report_per_command)
{ {
INSTR_TIME_SET_CURRENT_LAZY(now); pg_time_now_lazy(&now);
st->stmt_begin = now; st->stmt_begin = now;
} }
@ -3237,8 +3238,8 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
* instead of CSTATE_START_TX. * instead of CSTATE_START_TX.
*/ */
case CSTATE_SLEEP: case CSTATE_SLEEP:
INSTR_TIME_SET_CURRENT_LAZY(now); pg_time_now_lazy(&now);
if (INSTR_TIME_GET_MICROSEC(now) < st->sleep_until) if (now < st->sleep_until)
return; /* still sleeping, nothing to do here */ return; /* still sleeping, nothing to do here */
/* Else done sleeping. */ /* Else done sleeping. */
st->state = CSTATE_END_COMMAND; st->state = CSTATE_END_COMMAND;
@ -3258,13 +3259,12 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
{ {
Command *command; Command *command;
INSTR_TIME_SET_CURRENT_LAZY(now); pg_time_now_lazy(&now);
command = sql_script[st->use_file].commands[st->command]; command = sql_script[st->use_file].commands[st->command];
/* XXX could use a mutex here, but we choose not to */ /* XXX could use a mutex here, but we choose not to */
addToSimpleStats(&command->stats, addToSimpleStats(&command->stats,
INSTR_TIME_GET_DOUBLE(now) - PG_TIME_GET_DOUBLE(now - st->stmt_begin));
INSTR_TIME_GET_DOUBLE(st->stmt_begin));
} }
/* Go ahead with next command, to be executed or skipped */ /* Go ahead with next command, to be executed or skipped */
@ -3290,7 +3290,7 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
if (is_connect) if (is_connect)
{ {
finishCon(st); finishCon(st);
INSTR_TIME_SET_ZERO(now); now = 0;
} }
if ((st->cnt >= nxacts && duration <= 0) || timer_exceeded) if ((st->cnt >= nxacts && duration <= 0) || timer_exceeded)
@ -3328,7 +3328,7 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
* take no time to execute. * take no time to execute.
*/ */
static ConnectionStateEnum static ConnectionStateEnum
executeMetaCommand(CState *st, instr_time *now) executeMetaCommand(CState *st, pg_time_usec_t *now)
{ {
Command *command = sql_script[st->use_file].commands[st->command]; Command *command = sql_script[st->use_file].commands[st->command];
int argc; int argc;
@ -3370,8 +3370,8 @@ executeMetaCommand(CState *st, instr_time *now)
return CSTATE_ABORTED; return CSTATE_ABORTED;
} }
INSTR_TIME_SET_CURRENT_LAZY(*now); pg_time_now_lazy(now);
st->sleep_until = INSTR_TIME_GET_MICROSEC(*now) + usec; st->sleep_until = (*now) + usec;
return CSTATE_SLEEP; return CSTATE_SLEEP;
} }
else if (command->meta == META_SET) else if (command->meta == META_SET)
@ -3474,7 +3474,7 @@ executeMetaCommand(CState *st, instr_time *now)
* executing the expression or shell command might have taken a * executing the expression or shell command might have taken a
* non-negligible amount of time, so reset 'now' * non-negligible amount of time, so reset 'now'
*/ */
INSTR_TIME_SET_ZERO(*now); *now = 0;
return CSTATE_END_COMMAND; return CSTATE_END_COMMAND;
} }
@ -3484,14 +3484,15 @@ executeMetaCommand(CState *st, instr_time *now)
* *
* We print Unix-epoch timestamps in the log, so that entries can be * We print Unix-epoch timestamps in the log, so that entries can be
* correlated against other logs. On some platforms this could be obtained * correlated against other logs. On some platforms this could be obtained
* from the instr_time reading the caller has, but rather than get entangled * from the caller, but rather than get entangled with that, we just eat
* with that, we just eat the cost of an extra syscall in all cases. * the cost of an extra syscall in all cases.
*/ */
static void static void
doLog(TState *thread, CState *st, doLog(TState *thread, CState *st,
StatsData *agg, bool skipped, double latency, double lag) StatsData *agg, bool skipped, double latency, double lag)
{ {
FILE *logfile = thread->logfile; FILE *logfile = thread->logfile;
pg_time_usec_t now = pg_time_now();
Assert(use_log); Assert(use_log);
@ -3511,13 +3512,12 @@ doLog(TState *thread, CState *st,
* any empty intervals in between (this may happen with very low tps, * any empty intervals in between (this may happen with very low tps,
* e.g. --rate=0.1). * e.g. --rate=0.1).
*/ */
time_t now = time(NULL);
while (agg->start_time + agg_interval <= now) while (agg->start_time + agg_interval <= now)
{ {
/* print aggregated report to logfile */ /* print aggregated report to logfile */
fprintf(logfile, "%ld " INT64_FORMAT " %.0f %.0f %.0f %.0f", fprintf(logfile, INT64_FORMAT " " INT64_FORMAT " %.0f %.0f %.0f %.0f",
(long) agg->start_time, agg->start_time,
agg->cnt, agg->cnt,
agg->latency.sum, agg->latency.sum,
agg->latency.sum2, agg->latency.sum2,
@ -3545,17 +3545,13 @@ doLog(TState *thread, CState *st,
else else
{ {
/* no, print raw transactions */ /* no, print raw transactions */
struct timeval tv;
gettimeofday(&tv, NULL);
if (skipped) if (skipped)
fprintf(logfile, "%d " INT64_FORMAT " skipped %d %ld %ld", fprintf(logfile, "%d " INT64_FORMAT " skipped %d %ld %ld",
st->id, st->cnt, st->use_file, st->id, st->cnt, st->use_file, now / 1000000, now % 1000000);
(long) tv.tv_sec, (long) tv.tv_usec);
else else
fprintf(logfile, "%d " INT64_FORMAT " %.0f %d %ld %ld", fprintf(logfile, "%d " INT64_FORMAT " %.0f %d %ld %ld",
st->id, st->cnt, latency, st->use_file, st->id, st->cnt, latency, st->use_file,
(long) tv.tv_sec, (long) tv.tv_usec); now / 1000000, now % 1000000);
if (throttle_delay) if (throttle_delay)
fprintf(logfile, " %.0f", lag); fprintf(logfile, " %.0f", lag);
fputc('\n', logfile); fputc('\n', logfile);
@ -3569,7 +3565,7 @@ doLog(TState *thread, CState *st,
* Note that even skipped transactions are counted in the "cnt" fields.) * Note that even skipped transactions are counted in the "cnt" fields.)
*/ */
static void static void
processXactStats(TState *thread, CState *st, instr_time *now, processXactStats(TState *thread, CState *st, pg_time_usec_t *now,
bool skipped, StatsData *agg) bool skipped, StatsData *agg)
{ {
double latency = 0.0, double latency = 0.0,
@ -3579,11 +3575,11 @@ processXactStats(TState *thread, CState *st, instr_time *now,
if (detailed && !skipped) if (detailed && !skipped)
{ {
INSTR_TIME_SET_CURRENT_LAZY(*now); pg_time_now_lazy(now);
/* compute latency & lag */ /* compute latency & lag */
latency = INSTR_TIME_GET_MICROSEC(*now) - st->txn_scheduled; latency = (*now) - st->txn_scheduled;
lag = INSTR_TIME_GET_MICROSEC(st->txn_begin) - st->txn_scheduled; lag = st->txn_begin - st->txn_scheduled;
} }
if (thread_details) if (thread_details)
@ -3834,10 +3830,7 @@ initGenerateDataClientSide(PGconn *con)
int64 k; int64 k;
/* used to track elapsed time and estimate of the remaining time */ /* used to track elapsed time and estimate of the remaining time */
instr_time start, pg_time_usec_t start;
diff;
double elapsed_sec,
remaining_sec;
int log_interval = 1; int log_interval = 1;
/* Stay on the same line if reporting to a terminal */ /* Stay on the same line if reporting to a terminal */
@ -3889,7 +3882,7 @@ initGenerateDataClientSide(PGconn *con)
} }
PQclear(res); PQclear(res);
INSTR_TIME_SET_CURRENT(start); start = pg_time_now();
for (k = 0; k < (int64) naccounts * scale; k++) for (k = 0; k < (int64) naccounts * scale; k++)
{ {
@ -3914,11 +3907,8 @@ initGenerateDataClientSide(PGconn *con)
*/ */
if ((!use_quiet) && (j % 100000 == 0)) if ((!use_quiet) && (j % 100000 == 0))
{ {
INSTR_TIME_SET_CURRENT(diff); double elapsed_sec = PG_TIME_GET_DOUBLE(pg_time_now() - start);
INSTR_TIME_SUBTRACT(diff, start); double remaining_sec = ((double) scale * naccounts - j) * elapsed_sec / j;
elapsed_sec = INSTR_TIME_GET_DOUBLE(diff);
remaining_sec = ((double) scale * naccounts - j) * elapsed_sec / j;
fprintf(stderr, INT64_FORMAT " of " INT64_FORMAT " tuples (%d%%) done (elapsed %.2f s, remaining %.2f s)%c", fprintf(stderr, INT64_FORMAT " of " INT64_FORMAT " tuples (%d%%) done (elapsed %.2f s, remaining %.2f s)%c",
j, (int64) naccounts * scale, j, (int64) naccounts * scale,
@ -3928,11 +3918,8 @@ initGenerateDataClientSide(PGconn *con)
/* let's not call the timing for each row, but only each 100 rows */ /* let's not call the timing for each row, but only each 100 rows */
else if (use_quiet && (j % 100 == 0)) else if (use_quiet && (j % 100 == 0))
{ {
INSTR_TIME_SET_CURRENT(diff); double elapsed_sec = PG_TIME_GET_DOUBLE(pg_time_now() - start);
INSTR_TIME_SUBTRACT(diff, start); double remaining_sec = ((double) scale * naccounts - j) * elapsed_sec / j;
elapsed_sec = INSTR_TIME_GET_DOUBLE(diff);
remaining_sec = ((double) scale * naccounts - j) * elapsed_sec / j;
/* have we reached the next interval (or end)? */ /* have we reached the next interval (or end)? */
if ((j == scale * naccounts) || (elapsed_sec >= log_interval * LOG_STEP_SECONDS)) if ((j == scale * naccounts) || (elapsed_sec >= log_interval * LOG_STEP_SECONDS))
@ -4137,10 +4124,8 @@ runInitSteps(const char *initialize_steps)
for (step = initialize_steps; *step != '\0'; step++) for (step = initialize_steps; *step != '\0'; step++)
{ {
instr_time start;
char *op = NULL; char *op = NULL;
pg_time_usec_t start = pg_time_now();
INSTR_TIME_SET_CURRENT(start);
switch (*step) switch (*step)
{ {
@ -4182,12 +4167,7 @@ runInitSteps(const char *initialize_steps)
if (op != NULL) if (op != NULL)
{ {
instr_time diff; double elapsed_sec = PG_TIME_GET_DOUBLE(pg_time_now() - start);
double elapsed_sec;
INSTR_TIME_SET_CURRENT(diff);
INSTR_TIME_SUBTRACT(diff, start);
elapsed_sec = INSTR_TIME_GET_DOUBLE(diff);
if (!first) if (!first)
appendPQExpBufferStr(&stats, ", "); appendPQExpBufferStr(&stats, ", ");
@ -5109,12 +5089,12 @@ addScript(ParsedScript script)
* progress report. On exit, they are updated with the new stats. * progress report. On exit, they are updated with the new stats.
*/ */
static void static void
printProgressReport(TState *threads, int64 test_start, int64 now, printProgressReport(TState *threads, int64 test_start, pg_time_usec_t now,
StatsData *last, int64 *last_report) StatsData *last, int64 *last_report)
{ {
/* generate and show report */ /* generate and show report */
int64 run = now - *last_report, pg_time_usec_t run = now - *last_report;
ntx; int64 ntx;
double tps, double tps,
total_run, total_run,
latency, latency,
@ -5161,16 +5141,7 @@ printProgressReport(TState *threads, int64 test_start, int64 now,
if (progress_timestamp) if (progress_timestamp)
{ {
/* snprintf(tbuf, sizeof(tbuf), "%.3f s", PG_TIME_GET_DOUBLE(now));
* On some platforms the current system timestamp is available in
* now_time, but rather than get entangled with that, we just eat the
* cost of an extra syscall in all cases.
*/
struct timeval tv;
gettimeofday(&tv, NULL);
snprintf(tbuf, sizeof(tbuf), "%ld.%03ld s",
(long) tv.tv_sec, (long) (tv.tv_usec / 1000));
} }
else else
{ {
@ -5210,21 +5181,18 @@ printSimpleStats(const char *prefix, SimpleStats *ss)
/* print out results */ /* print out results */
static void static void
printResults(StatsData *total, instr_time total_time, printResults(StatsData *total,
instr_time conn_total_time, int64 latency_late) pg_time_usec_t total_duration, /* benchmarking time */
pg_time_usec_t conn_total_duration, /* is_connect */
pg_time_usec_t conn_elapsed_duration, /* !is_connect */
int64 latency_late)
{ {
double time_include, /* tps is about actually executed transactions during benchmarking */
tps_include,
tps_exclude;
int64 ntx = total->cnt - total->skipped; int64 ntx = total->cnt - total->skipped;
double bench_duration = PG_TIME_GET_DOUBLE(total_duration);
double tps = ntx / bench_duration;
time_include = INSTR_TIME_GET_DOUBLE(total_time); printf("pgbench (PostgreSQL) %d.%d\n", PG_VERSION_NUM / 10000, PG_VERSION_NUM % 100);
/* tps is about actually executed transactions */
tps_include = ntx / time_include;
tps_exclude = ntx /
(time_include - (INSTR_TIME_GET_DOUBLE(conn_total_time) / nclients));
/* Report test parameters. */ /* Report test parameters. */
printf("transaction type: %s\n", printf("transaction type: %s\n",
num_scripts == 1 ? sql_script[0].desc : "multiple scripts"); num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
@ -5255,8 +5223,7 @@ printResults(StatsData *total, instr_time total_time,
if (throttle_delay && latency_limit) if (throttle_delay && latency_limit)
printf("number of transactions skipped: " INT64_FORMAT " (%.3f %%)\n", printf("number of transactions skipped: " INT64_FORMAT " (%.3f %%)\n",
total->skipped, total->skipped, 100.0 * total->skipped / total->cnt);
100.0 * total->skipped / total->cnt);
if (latency_limit) if (latency_limit)
printf("number of transactions above the %.1f ms latency limit: " INT64_FORMAT "/" INT64_FORMAT " (%.3f %%)\n", printf("number of transactions above the %.1f ms latency limit: " INT64_FORMAT "/" INT64_FORMAT " (%.3f %%)\n",
@ -5269,7 +5236,7 @@ printResults(StatsData *total, instr_time total_time,
{ {
/* no measurement, show average latency computed from run time */ /* no measurement, show average latency computed from run time */
printf("latency average = %.3f ms\n", printf("latency average = %.3f ms\n",
1000.0 * time_include * nclients / total->cnt); 0.001 * total_duration * nclients / total->cnt);
} }
if (throttle_delay) if (throttle_delay)
@ -5284,8 +5251,25 @@ printResults(StatsData *total, instr_time total_time,
0.001 * total->lag.sum / total->cnt, 0.001 * total->lag.max); 0.001 * total->lag.sum / total->cnt, 0.001 * total->lag.max);
} }
printf("tps = %f (including connections establishing)\n", tps_include); /*
printf("tps = %f (excluding connections establishing)\n", tps_exclude); * Under -C/--connect, each transaction incurs a significant connection
* cost, it would not make much sense to ignore it in tps, and it would
* not be tps anyway.
*
* Otherwise connections are made just once at the beginning of the run
* and should not impact performance but for very short run, so they are
* (right)fully ignored in tps.
*/
if (is_connect)
{
printf("average connection time = %.3f ms\n", 0.001 * conn_total_duration / total->cnt);
printf("tps = %f (including reconnection times)\n", tps);
}
else
{
printf("initial connection time = %.3f ms\n", 0.001 * conn_elapsed_duration);
printf("tps = %f (without initial connection time)\n", tps);
}
/* Report per-script/command statistics */ /* Report per-script/command statistics */
if (per_script_stats || report_per_command) if (per_script_stats || report_per_command)
@ -5306,7 +5290,7 @@ printResults(StatsData *total, instr_time total_time,
100.0 * sql_script[i].weight / total_weight, 100.0 * sql_script[i].weight / total_weight,
sstats->cnt, sstats->cnt,
100.0 * sstats->cnt / total->cnt, 100.0 * sstats->cnt / total->cnt,
(sstats->cnt - sstats->skipped) / time_include); (sstats->cnt - sstats->skipped) / bench_duration);
if (throttle_delay && latency_limit && sstats->cnt > 0) if (throttle_delay && latency_limit && sstats->cnt > 0)
printf(" - number of transactions skipped: " INT64_FORMAT " (%.3f%%)\n", printf(" - number of transactions skipped: " INT64_FORMAT " (%.3f%%)\n",
@ -5354,10 +5338,7 @@ set_random_seed(const char *seed)
if (seed == NULL || strcmp(seed, "time") == 0) if (seed == NULL || strcmp(seed, "time") == 0)
{ {
/* rely on current time */ /* rely on current time */
instr_time now; iseed = pg_time_now();
INSTR_TIME_SET_CURRENT(now);
iseed = (uint64) INSTR_TIME_GET_MICROSEC(now);
} }
else if (strcmp(seed, "rand") == 0) else if (strcmp(seed, "rand") == 0)
{ {
@ -5460,9 +5441,11 @@ main(int argc, char **argv)
CState *state; /* status of clients */ CState *state; /* status of clients */
TState *threads; /* array of thread */ TState *threads; /* array of thread */
instr_time start_time; /* start up time */ pg_time_usec_t
instr_time total_time; start_time, /* start up time */
instr_time conn_total_time; bench_start = 0, /* first recorded benchmarking time */
conn_total_duration; /* cumulated connection time in
* threads */
int64 latency_late = 0; int64 latency_late = 0;
StatsData stats; StatsData stats;
int weight; int weight;
@ -6131,62 +6114,51 @@ main(int argc, char **argv)
/* all clients must be assigned to a thread */ /* all clients must be assigned to a thread */
Assert(nclients_dealt == nclients); Assert(nclients_dealt == nclients);
/* get start up time */ /* get start up time for the whole computation */
INSTR_TIME_SET_CURRENT(start_time); start_time = pg_time_now();
/* set alarm if duration is specified. */ /* set alarm if duration is specified. */
if (duration > 0) if (duration > 0)
setalarm(duration); setalarm(duration);
/* start threads */
#ifdef ENABLE_THREAD_SAFETY #ifdef ENABLE_THREAD_SAFETY
for (i = 0; i < nthreads; i++) /* start all threads but thread 0 which is executed directly later */
for (i = 1; i < nthreads; i++)
{ {
TState *thread = &threads[i]; TState *thread = &threads[i];
INSTR_TIME_SET_CURRENT(thread->start_time); thread->create_time = pg_time_now();
errno = THREAD_CREATE(&thread->thread, threadRun, thread);
/* compute when to stop */ if (errno != 0)
if (duration > 0)
end_time = INSTR_TIME_GET_MICROSEC(thread->start_time) +
(int64) 1000000 * duration;
/* the first thread (i = 0) is executed by main thread */
if (i > 0)
{ {
errno = THREAD_CREATE(&thread->thread, threadRun, thread); pg_log_fatal("could not create thread: %m");
exit(1);
if (errno != 0)
{
pg_log_fatal("could not create thread: %m");
exit(1);
}
} }
} }
#else #else
INSTR_TIME_SET_CURRENT(threads[0].start_time); Assert(nthreads == 1);
/* compute when to stop */
if (duration > 0)
end_time = INSTR_TIME_GET_MICROSEC(threads[0].start_time) +
(int64) 1000000 * duration;
#endif /* ENABLE_THREAD_SAFETY */ #endif /* ENABLE_THREAD_SAFETY */
/* wait for threads and accumulate results */ /* compute when to stop */
threads[0].create_time = pg_time_now();
if (duration > 0)
end_time = threads[0].create_time + (int64) 1000000 * duration;
/* run thread 0 directly */
(void) threadRun(&threads[0]);
/* wait for other threads and accumulate results */
initStats(&stats, 0); initStats(&stats, 0);
INSTR_TIME_SET_ZERO(conn_total_time); conn_total_duration = 0;
for (i = 0; i < nthreads; i++) for (i = 0; i < nthreads; i++)
{ {
TState *thread = &threads[i]; TState *thread = &threads[i];
#ifdef ENABLE_THREAD_SAFETY #ifdef ENABLE_THREAD_SAFETY
if (i == 0) if (i > 0)
/* actually run this thread directly in the main thread */
(void) threadRun(thread);
else
/* wait of other threads. should check that 0 is returned? */
THREAD_JOIN(thread->thread); THREAD_JOIN(thread->thread);
#else
(void) threadRun(thread);
#endif /* ENABLE_THREAD_SAFETY */ #endif /* ENABLE_THREAD_SAFETY */
for (int j = 0; j < thread->nstate; j++) for (int j = 0; j < thread->nstate; j++)
@ -6199,23 +6171,25 @@ main(int argc, char **argv)
stats.cnt += thread->stats.cnt; stats.cnt += thread->stats.cnt;
stats.skipped += thread->stats.skipped; stats.skipped += thread->stats.skipped;
latency_late += thread->latency_late; latency_late += thread->latency_late;
INSTR_TIME_ADD(conn_total_time, thread->conn_time); conn_total_duration += thread->conn_duration;
/* first recorded benchmarking start time */
if (bench_start == 0 || thread->bench_start < bench_start)
bench_start = thread->bench_start;
} }
/* XXX should this be connection time? */
disconnect_all(state, nclients); disconnect_all(state, nclients);
/* /*
* XXX We compute results as though every client of every thread started * Beware that performance of short benchmarks with many threads and
* and finished at the same time. That model can diverge noticeably from * possibly long transactions can be deceptive because threads do not
* reality for a short benchmark run involving relatively many threads. * start and finish at the exact same time. The total duration computed
* The first thread may process notably many transactions before the last * here encompasses all transactions so that tps shown is somehow slightly
* thread begins. Improving the model alone would bring limited benefit, * underestimated.
* because performance during those periods of partial thread count can
* easily exceed steady state performance. This is one of the many ways
* short runs convey deceptive performance figures.
*/ */
INSTR_TIME_SET_CURRENT(total_time); printResults(&stats, pg_time_now() - bench_start, conn_total_duration,
INSTR_TIME_SUBTRACT(total_time, start_time); bench_start - start_time, latency_late);
printResults(&stats, total_time, conn_total_time, latency_late);
if (exit_code != 0) if (exit_code != 0)
pg_log_fatal("Run was aborted; the above results are incomplete."); pg_log_fatal("Run was aborted; the above results are incomplete.");
@ -6228,34 +6202,16 @@ threadRun(void *arg)
{ {
TState *thread = (TState *) arg; TState *thread = (TState *) arg;
CState *state = thread->state; CState *state = thread->state;
instr_time start, pg_time_usec_t start;
end;
int nstate = thread->nstate; int nstate = thread->nstate;
int remains = nstate; /* number of remaining clients */ int remains = nstate; /* number of remaining clients */
socket_set *sockets = alloc_socket_set(nstate); socket_set *sockets = alloc_socket_set(nstate);
int i; int64 thread_start,
last_report,
/* for reporting progress: */ next_report;
int64 thread_start = INSTR_TIME_GET_MICROSEC(thread->start_time);
int64 last_report = thread_start;
int64 next_report = last_report + (int64) progress * 1000000;
StatsData last, StatsData last,
aggs; aggs;
/*
* Initialize throttling rate target for all of the thread's clients. It
* might be a little more accurate to reset thread->start_time here too.
* The possible drift seems too small relative to typical throttle delay
* times to worry about it.
*/
INSTR_TIME_SET_CURRENT(start);
thread->throttle_trigger = INSTR_TIME_GET_MICROSEC(start);
INSTR_TIME_SET_ZERO(thread->conn_time);
initStats(&aggs, time(NULL));
last = aggs;
/* open log file if requested */ /* open log file if requested */
if (use_log) if (use_log)
{ {
@ -6276,32 +6232,49 @@ threadRun(void *arg)
} }
} }
/* explicitly initialize the state machines */
for (int i = 0; i < nstate; i++)
state[i].state = CSTATE_CHOOSE_SCRIPT;
/* READY */
thread_start = pg_time_now();
thread->started_time = thread_start;
last_report = thread_start;
next_report = last_report + (int64) 1000000 * progress;
/* STEADY */
if (!is_connect) if (!is_connect)
{ {
/* make connections to the database before starting */ /* make connections to the database before starting */
for (i = 0; i < nstate; i++) for (int i = 0; i < nstate; i++)
{ {
if ((state[i].con = doConnect()) == NULL) if ((state[i].con = doConnect()) == NULL)
goto done; goto done;
} }
/* compute connection delay */
thread->conn_duration = pg_time_now() - thread->started_time;
} }
else
/* time after thread and connections set up */
INSTR_TIME_SET_CURRENT(thread->conn_time);
INSTR_TIME_SUBTRACT(thread->conn_time, thread->start_time);
/* explicitly initialize the state machines */
for (i = 0; i < nstate; i++)
{ {
state[i].state = CSTATE_CHOOSE_SCRIPT; /* no connection delay to record */
thread->conn_duration = 0;
} }
start = pg_time_now();
thread->bench_start = start;
thread->throttle_trigger = start;
initStats(&aggs, start);
last = aggs;
/* loop till all clients have terminated */ /* loop till all clients have terminated */
while (remains > 0) while (remains > 0)
{ {
int nsocks; /* number of sockets to be waited for */ int nsocks; /* number of sockets to be waited for */
int64 min_usec; pg_time_usec_t min_usec;
int64 now_usec = 0; /* set this only if needed */ pg_time_usec_t now = 0; /* set this only if needed */
/* /*
* identify which client sockets should be checked for input, and * identify which client sockets should be checked for input, and
@ -6310,27 +6283,21 @@ threadRun(void *arg)
clear_socket_set(sockets); clear_socket_set(sockets);
nsocks = 0; nsocks = 0;
min_usec = PG_INT64_MAX; min_usec = PG_INT64_MAX;
for (i = 0; i < nstate; i++) for (int i = 0; i < nstate; i++)
{ {
CState *st = &state[i]; CState *st = &state[i];
if (st->state == CSTATE_SLEEP || st->state == CSTATE_THROTTLE) if (st->state == CSTATE_SLEEP || st->state == CSTATE_THROTTLE)
{ {
/* a nap from the script, or under throttling */ /* a nap from the script, or under throttling */
int64 this_usec; pg_time_usec_t this_usec;
/* get current time if needed */ /* get current time if needed */
if (now_usec == 0) pg_time_now_lazy(&now);
{
instr_time now;
INSTR_TIME_SET_CURRENT(now);
now_usec = INSTR_TIME_GET_MICROSEC(now);
}
/* min_usec should be the minimum delay across all clients */ /* min_usec should be the minimum delay across all clients */
this_usec = (st->state == CSTATE_SLEEP ? this_usec = (st->state == CSTATE_SLEEP ?
st->sleep_until : st->txn_scheduled) - now_usec; st->sleep_until : st->txn_scheduled) - now;
if (min_usec > this_usec) if (min_usec > this_usec)
min_usec = this_usec; min_usec = this_usec;
} }
@ -6365,19 +6332,12 @@ threadRun(void *arg)
/* also wake up to print the next progress report on time */ /* also wake up to print the next progress report on time */
if (progress && min_usec > 0 && thread->tid == 0) if (progress && min_usec > 0 && thread->tid == 0)
{ {
/* get current time if needed */ pg_time_now_lazy(&now);
if (now_usec == 0)
{
instr_time now;
INSTR_TIME_SET_CURRENT(now); if (now >= next_report)
now_usec = INSTR_TIME_GET_MICROSEC(now);
}
if (now_usec >= next_report)
min_usec = 0; min_usec = 0;
else if ((next_report - now_usec) < min_usec) else if ((next_report - now) < min_usec)
min_usec = next_report - now_usec; min_usec = next_report - now;
} }
/* /*
@ -6426,7 +6386,7 @@ threadRun(void *arg)
/* ok, advance the state machine of each connection */ /* ok, advance the state machine of each connection */
nsocks = 0; nsocks = 0;
for (i = 0; i < nstate; i++) for (int i = 0; i < nstate; i++)
{ {
CState *st = &state[i]; CState *st = &state[i];
@ -6464,11 +6424,8 @@ threadRun(void *arg)
/* progress report is made by thread 0 for all threads */ /* progress report is made by thread 0 for all threads */
if (progress && thread->tid == 0) if (progress && thread->tid == 0)
{ {
instr_time now_time; pg_time_usec_t now = pg_time_now();
int64 now;
INSTR_TIME_SET_CURRENT(now_time);
now = INSTR_TIME_GET_MICROSEC(now_time);
if (now >= next_report) if (now >= next_report)
{ {
/* /*
@ -6486,17 +6443,17 @@ threadRun(void *arg)
*/ */
do do
{ {
next_report += (int64) progress * 1000000; next_report += (int64) 1000000 * progress;
} while (now >= next_report); } while (now >= next_report);
} }
} }
} }
done: done:
INSTR_TIME_SET_CURRENT(start); start = pg_time_now();
disconnect_all(state, nstate); disconnect_all(state, nstate);
INSTR_TIME_SET_CURRENT(end); thread->conn_duration += pg_time_now() - start;
INSTR_TIME_ACCUM_DIFF(thread->conn_time, end, start);
if (thread->logfile) if (thread->logfile)
{ {
if (agg_interval > 0) if (agg_interval > 0)

View File

@ -253,4 +253,32 @@ GetTimerFrequency(void)
#define INSTR_TIME_SET_CURRENT_LAZY(t) \ #define INSTR_TIME_SET_CURRENT_LAZY(t) \
(INSTR_TIME_IS_ZERO(t) ? INSTR_TIME_SET_CURRENT(t), true : false) (INSTR_TIME_IS_ZERO(t) ? INSTR_TIME_SET_CURRENT(t), true : false)
/*
* Simpler convenient interface
*
* The instr_time type is expensive when dealing with time arithmetic.
* Define a type to hold microseconds on top of this, suitable for
* benchmarking performance measures, eg in "pgbench".
*
* Type int64 is good enough for about 584500 years.
*/
typedef int64 pg_time_usec_t;
static inline pg_time_usec_t
pg_time_now(void)
{
instr_time now;
INSTR_TIME_SET_CURRENT(now);
return (pg_time_usec_t) INSTR_TIME_GET_MICROSEC(now);
}
static inline void
pg_time_now_lazy(pg_time_usec_t *now)
{
if ((*now) == 0)
(*now) = pg_time_now();
}
#define PG_TIME_GET_DOUBLE(t) (0.000001 * (t))
#endif /* INSTR_TIME_H */ #endif /* INSTR_TIME_H */

View File

@ -3225,6 +3225,7 @@ pg_sha512_ctx
pg_snapshot pg_snapshot
pg_stack_base_t pg_stack_base_t
pg_time_t pg_time_t
pg_time_usec_t
pg_tz pg_tz
pg_tz_cache pg_tz_cache
pg_tzenum pg_tzenum