diff --git a/contrib/pgbench/pgbench.c b/contrib/pgbench/pgbench.c index 3ca120fa68..32d3be6508 100644 --- a/contrib/pgbench/pgbench.c +++ b/contrib/pgbench/pgbench.c @@ -151,6 +151,15 @@ char *index_tablespace = NULL; #define ntellers 10 #define naccounts 100000 +/* + * The scale factor at/beyond which 32bit integers are incapable of storing + * 64bit values. + * + * Although the actual threshold is 21474, we use 20000 because it is easier to + * document and remember, and isn't that far away from the real threshold. + */ +#define SCALE_32BIT_THRESHOLD 20000 + bool use_log; /* log transaction latencies to a file */ bool use_quiet; /* quiet logging onto stderr */ bool is_connect; /* establish connection for each transaction */ @@ -403,9 +412,77 @@ usage(void) progname, progname); } +/* + * strtoint64 -- convert a string to 64-bit integer + * + * This function is a modified version of scanint8() from + * src/backend/utils/adt/int8.c. + */ +static int64 +strtoint64(const char *str) +{ + const char *ptr = str; + int64 result = 0; + int sign = 1; + + /* + * Do our own scan, rather than relying on sscanf which might be broken + * for long long. + */ + + /* skip leading spaces */ + while (*ptr && isspace((unsigned char) *ptr)) + ptr++; + + /* handle sign */ + if (*ptr == '-') + { + ptr++; + + /* + * Do an explicit check for INT64_MIN. Ugly though this is, it's + * cleaner than trying to get the loop below to handle it portably. + */ + if (strncmp(ptr, "9223372036854775808", 19) == 0) + { + result = -INT64CONST(0x7fffffffffffffff) - 1; + ptr += 19; + goto gotdigits; + } + sign = -1; + } + else if (*ptr == '+') + ptr++; + + /* require at least one digit */ + if (!isdigit((unsigned char) *ptr)) + fprintf(stderr, "invalid input syntax for integer: \"%s\"\n", str); + + /* process digits */ + while (*ptr && isdigit((unsigned char) *ptr)) + { + int64 tmp = result * 10 + (*ptr++ - '0'); + + if ((tmp / 10) != result) /* overflow? */ + fprintf(stderr, "value \"%s\" is out of range for type bigint\n", str); + result = tmp; + } + +gotdigits: + + /* allow trailing whitespace, but not other trailing chars */ + while (*ptr != '\0' && isspace((unsigned char) *ptr)) + ptr++; + + if (*ptr != '\0') + fprintf(stderr, "invalid input syntax for integer: \"%s\"\n", str); + + return ((sign < 0) ? -result : result); +} + /* random number generator: uniform distribution from min to max inclusive */ -static int -getrand(TState *thread, int min, int max) +static int64 +getrand(TState *thread, int64 min, int64 max) { /* * Odd coding is so that min and max have approximately the same chance of @@ -416,7 +493,7 @@ getrand(TState *thread, int min, int max) * protected by a mutex, and therefore a bottleneck on machines with many * CPUs. */ - return min + (int) ((max - min + 1) * pg_erand48(thread->random_state)); + return min + (int64) ((max - min + 1) * pg_erand48(thread->random_state)); } /* call PQexec() and exit() on failure */ @@ -960,7 +1037,7 @@ top: if (commands[st->state] == NULL) { st->state = 0; - st->use_file = getrand(thread, 0, num_files - 1); + st->use_file = (int) getrand(thread, 0, num_files - 1); commands = sql_files[st->use_file]; } } @@ -1080,7 +1157,7 @@ top: if (pg_strcasecmp(argv[0], "setrandom") == 0) { char *var; - int min, + int64 min, max; char res[64]; @@ -1092,10 +1169,10 @@ top: st->ecnt++; return true; } - min = atoi(var); + min = strtoint64(var); } else - min = atoi(argv[2]); + min = strtoint64(argv[2]); #ifdef NOT_USED if (min < 0) @@ -1114,10 +1191,10 @@ top: st->ecnt++; return true; } - max = atoi(var); + max = strtoint64(var); } else - max = atoi(argv[3]); + max = strtoint64(argv[3]); if (max < min) { @@ -1127,8 +1204,8 @@ top: } /* - * getrand() neeeds to be able to subtract max from min and add - * one the result without overflowing. Since we know max > min, + * getrand() needs to be able to subtract max from min and add + * one to the result without overflowing. Since we know max > min, * we can detect overflow just by checking for a negative result. * But we must check both that the subtraction doesn't overflow, * and that adding one to the result doesn't overflow either. @@ -1141,9 +1218,9 @@ top: } #ifdef DEBUG - printf("min: %d max: %d random: %d\n", min, max, getrand(thread, min, max)); + printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getrand(thread, min, max)); #endif - snprintf(res, sizeof(res), "%d", getrand(thread, min, max)); + snprintf(res, sizeof(res), INT64_FORMAT, getrand(thread, min, max)); if (!putVariable(st, argv[0], argv[1], res)) { @@ -1156,7 +1233,7 @@ top: else if (pg_strcasecmp(argv[0], "set") == 0) { char *var; - int ope1, + int64 ope1, ope2; char res[64]; @@ -1168,13 +1245,13 @@ top: st->ecnt++; return true; } - ope1 = atoi(var); + ope1 = strtoint64(var); } else - ope1 = atoi(argv[2]); + ope1 = strtoint64(argv[2]); if (argc < 5) - snprintf(res, sizeof(res), "%d", ope1); + snprintf(res, sizeof(res), INT64_FORMAT, ope1); else { if (*argv[4] == ':') @@ -1185,17 +1262,17 @@ top: st->ecnt++; return true; } - ope2 = atoi(var); + ope2 = strtoint64(var); } else - ope2 = atoi(argv[4]); + ope2 = strtoint64(argv[4]); if (strcmp(argv[3], "+") == 0) - snprintf(res, sizeof(res), "%d", ope1 + ope2); + snprintf(res, sizeof(res), INT64_FORMAT, ope1 + ope2); else if (strcmp(argv[3], "-") == 0) - snprintf(res, sizeof(res), "%d", ope1 - ope2); + snprintf(res, sizeof(res), INT64_FORMAT, ope1 - ope2); else if (strcmp(argv[3], "*") == 0) - snprintf(res, sizeof(res), "%d", ope1 * ope2); + snprintf(res, sizeof(res), INT64_FORMAT, ope1 * ope2); else if (strcmp(argv[3], "/") == 0) { if (ope2 == 0) @@ -1204,7 +1281,7 @@ top: st->ecnt++; return true; } - snprintf(res, sizeof(res), "%d", ope1 / ope2); + snprintf(res, sizeof(res), INT64_FORMAT, ope1 / ope2); } else { @@ -1311,6 +1388,15 @@ disconnect_all(CState *state, int length) static void init(bool is_no_vacuum) { + +/* The scale factor at/beyond which 32bit integers are incapable of storing + * 64bit values. + * + * Although the actual threshold is 21474, we use 20000 because it is easier to + * document and remember, and isn't that far away from the real threshold. + */ +#define SCALE_32BIT_THRESHOLD 20000 + /* * Note: TPC-B requires at least 100 bytes per row, and the "filler" * fields in these table declarations were intended to comply with that. @@ -1329,7 +1415,9 @@ init(bool is_no_vacuum) struct ddlinfo DDLs[] = { { "pgbench_history", - "tid int,bid int,aid int,delta int,mtime timestamp,filler char(22)", + scale >= SCALE_32BIT_THRESHOLD + ? "tid int,bid int,aid bigint,delta int,mtime timestamp,filler char(22)" + : "tid int,bid int,aid int,delta int,mtime timestamp,filler char(22)", 0 }, { @@ -1339,7 +1427,9 @@ init(bool is_no_vacuum) }, { "pgbench_accounts", - "aid int not null,bid int,abalance int,filler char(84)", + scale >= SCALE_32BIT_THRESHOLD + ? "aid bigint not null,bid int,abalance int,filler char(84)" + : "aid int not null,bid int,abalance int,filler char(84)", 1 }, { @@ -1365,6 +1455,7 @@ init(bool is_no_vacuum) PGresult *res; char sql[256]; int i; + int64 k; /* used to track elapsed time and estimate of the remaining time */ instr_time start, diff; @@ -1441,11 +1532,11 @@ init(bool is_no_vacuum) INSTR_TIME_SET_CURRENT(start); - for (i = 0; i < naccounts * scale; i++) + for (k = 0; k < (int64) naccounts * scale; k++) { - int j = i + 1; + int64 j = k + 1; - snprintf(sql, 256, "%d\t%d\t%d\t\n", j, i / naccounts + 1, 0); + snprintf(sql, 256, INT64_FORMAT "\t" INT64_FORMAT "\t%d\t\n", j, k / naccounts + 1, 0); if (PQputline(con, sql)) { fprintf(stderr, "PQputline failed\n"); @@ -1462,8 +1553,8 @@ init(bool is_no_vacuum) elapsed_sec = INSTR_TIME_GET_DOUBLE(diff); remaining_sec = (scale * naccounts - j) * elapsed_sec / j; - fprintf(stderr, "%d of %d tuples (%d%%) done (elapsed %.2f s, remaining %.2f s).\n", - j, naccounts * scale, + fprintf(stderr, INT64_FORMAT " of " INT64_FORMAT " tuples (%d%%) done (elapsed %.2f s, remaining %.2f s).\n", + j, (int64)naccounts * scale, (int) (((int64) j * 100) / (naccounts * scale)), elapsed_sec, remaining_sec); } @@ -1479,8 +1570,8 @@ init(bool is_no_vacuum) /* have we reached the next interval (or end)? */ if ((j == scale * naccounts) || (elapsed_sec >= log_interval * LOG_STEP_SECONDS)) { - fprintf(stderr, "%d of %d tuples (%d%%) done (elapsed %.2f s, remaining %.2f s).\n", - j, naccounts * scale, + fprintf(stderr, INT64_FORMAT " of " INT64_FORMAT " tuples (%d%%) done (elapsed %.2f s, remaining %.2f s).\n", + j, (int64)naccounts * scale, (int) (((int64) j * 100) / (naccounts * scale)), elapsed_sec, remaining_sec); /* skip to the next interval */ diff --git a/doc/src/sgml/pgbench.sgml b/doc/src/sgml/pgbench.sgml index 58686b1a8b..9ed8b76963 100644 --- a/doc/src/sgml/pgbench.sgml +++ b/doc/src/sgml/pgbench.sgml @@ -185,6 +185,11 @@ pgbench options dbname Multiply the number of rows generated by the scale factor. For example, -s 100 will create 10,000,000 rows in the pgbench_accounts table. Default is 1. + When the scale is 20,000 or larger, the columns used to + hold account identifiers (aid columns) + will switch to using larger integers (bigint), + in order to be big enough to hold the range of account + identifiers.