From a386942bd29b0ef0c9df061392659880d22cdf43 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Wed, 6 Nov 2019 11:02:30 +0900 Subject: [PATCH] Add "G" (server-side data generation) as an initialization step in pgbench. This commit allows --init-steps option in pgbench to accept "G" character meaning server-side data generation as an initialization step. With "G", only limited queries are sent from pgbench client and then data is actually generated in the server. This might make the initialization phase faster if the bandwidth between pgbench client and the server is low. Author: Fabien Coelho Reviewed-by: Anna Endo, Ibrar Ahmed, Fujii Masao Discussion: https://postgr.es/m/alpine.DEB.2.21.1904061826420.3678@lancre --- doc/src/sgml/ref/pgbench.sgml | 30 +++++- src/bin/pgbench/pgbench.c | 98 +++++++++++++++----- src/bin/pgbench/t/001_pgbench_with_server.pl | 4 +- src/bin/pgbench/t/002_pgbench_no_server.pl | 2 +- 4 files changed, 108 insertions(+), 26 deletions(-) diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml index e3a0abb4c7..4c48a58ed2 100644 --- a/doc/src/sgml/ref/pgbench.sgml +++ b/doc/src/sgml/ref/pgbench.sgml @@ -193,12 +193,34 @@ pgbench options d - g (Generate data) + g or G (Generate data, client-side or server-side) Generate data and load it into the standard tables, replacing any data already present. + + With g (client-side data generation), + data is generated in pgbench client and then + sent to the server. This uses the client/server bandwidth + extensively through a COPY. + Using g causes logging to print one message + every 100,000 rows when generating data into + pgbench_accounts table. + + + With G (server-side data generation), + only limited queries are sent from pgbench + client and then data is actually generated in the server. + No significant bandwidth is required for this variant, but + the server will do more work. + Using G causes logging not to print any progress + message when generating data into + pgbench_accounts table. + + + + @@ -262,9 +284,13 @@ pgbench options d Switch logging to quiet mode, producing only one progress message per 5 - seconds. The default logging prints one message each 100000 rows, which + seconds. The default logging prints one message each 100,000 rows, which often outputs many lines per second (especially on good hardware). + + This setting has no effect if G is specified + in . + diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index 03bcd22996..14dbc4510c 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -132,6 +132,7 @@ static int pthread_join(pthread_t th, void **thread_return); * some configurable parameters */ #define DEFAULT_INIT_STEPS "dtgvp" /* default -I setting */ +#define ALL_INIT_STEPS "dtgGvpf" /* all possible steps */ #define LOG_STEP_SECONDS 5 /* seconds between log messages */ #define DEFAULT_NXACTS 10 /* default nxacts */ @@ -627,7 +628,7 @@ usage(void) " %s [OPTION]... [DBNAME]\n" "\nInitialization options:\n" " -i, --initialize invokes initialization mode\n" - " -I, --init-steps=[dtgvpf]+ (default \"dtgvp\")\n" + " -I, --init-steps=[" ALL_INIT_STEPS "]+ (default \"" DEFAULT_INIT_STEPS "\")\n" " run selected initialization steps\n" " -F, --fillfactor=NUM set fill factor\n" " -n, --no-vacuum do not run VACUUM during initialization\n" @@ -3803,10 +3804,23 @@ append_fillfactor(char *opts, int len) } /* - * Fill the standard tables with some data + * Truncate away any old data, in one command in case there are foreign keys */ static void -initGenerateData(PGconn *con) +initTruncateTables(PGconn *con) +{ + executeStatement(con, "truncate table " + "pgbench_accounts, " + "pgbench_branches, " + "pgbench_history, " + "pgbench_tellers"); +} + +/* + * Fill the standard tables with some data generated and sent from the client + */ +static void +initGenerateDataClientSide(PGconn *con) { char sql[256]; PGresult *res; @@ -3820,7 +3834,7 @@ initGenerateData(PGconn *con) remaining_sec; int log_interval = 1; - fprintf(stderr, "generating data...\n"); + fprintf(stderr, "generating data (client-side)...\n"); /* * we do all of this in one transaction to enable the backend's @@ -3828,15 +3842,8 @@ initGenerateData(PGconn *con) */ executeStatement(con, "begin"); - /* - * truncate away any old data, in one command in case there are foreign - * keys - */ - executeStatement(con, "truncate table " - "pgbench_accounts, " - "pgbench_branches, " - "pgbench_history, " - "pgbench_tellers"); + /* truncate away any old data */ + initTruncateTables(con); /* * fill branches, tellers, accounts in that order in case foreign keys @@ -3940,6 +3947,51 @@ initGenerateData(PGconn *con) executeStatement(con, "commit"); } +/* + * Fill the standard tables with some data generated on the server + * + * As already the case with the client-side data generation, the filler + * column defaults to NULL in pgbench_branches and pgbench_tellers, + * and is a blank-padded string in pgbench_accounts. + */ +static void +initGenerateDataServerSide(PGconn *con) +{ + char sql[256]; + + fprintf(stderr, "generating data (server-side)...\n"); + + /* + * we do all of this in one transaction to enable the backend's + * data-loading optimizations + */ + executeStatement(con, "begin"); + + /* truncate away any old data */ + initTruncateTables(con); + + snprintf(sql, sizeof(sql), + "insert into pgbench_branches(bid,bbalance) " + "select bid, 0 " + "from generate_series(1, %d) as bid", nbranches * scale); + executeStatement(con, sql); + + snprintf(sql, sizeof(sql), + "insert into pgbench_tellers(tid,bid,tbalance) " + "select tid, (tid - 1) / %d + 1, 0 " + "from generate_series(1, %d) as tid", ntellers, ntellers * scale); + executeStatement(con, sql); + + snprintf(sql, sizeof(sql), + "insert into pgbench_accounts(aid,bid,abalance,filler) " + "select aid, (aid - 1) / %d + 1, 0, '' " + "from generate_series(1, "INT64_FORMAT") as aid", + naccounts, (int64) naccounts * scale); + executeStatement(con, sql); + + executeStatement(con, "commit"); +} + /* * Invoke vacuum on the standard tables */ @@ -4020,21 +4072,21 @@ initCreateFKeys(PGconn *con) static void checkInitSteps(const char *initialize_steps) { - const char *step; - if (initialize_steps[0] == '\0') { fprintf(stderr, "no initialization steps specified\n"); exit(1); } - for (step = initialize_steps; *step != '\0'; step++) + for (const char *step = initialize_steps; *step != '\0'; step++) { - if (strchr("dtgvpf ", *step) == NULL) + if (strchr(ALL_INIT_STEPS " ", *step) == NULL) { - fprintf(stderr, "unrecognized initialization step \"%c\"\n", + fprintf(stderr, + "unrecognized initialization step \"%c\"\n", *step); - fprintf(stderr, "allowed steps are: \"d\", \"t\", \"g\", \"v\", \"p\", \"f\"\n"); + fprintf(stderr, + "Allowed step characters are: \"" ALL_INIT_STEPS "\".\n"); exit(1); } } @@ -4075,8 +4127,12 @@ runInitSteps(const char *initialize_steps) initCreateTables(con); break; case 'g': - op = "generate"; - initGenerateData(con); + op = "client-side generate"; + initGenerateDataClientSide(con); + break; + case 'G': + op = "server-side generate"; + initGenerateDataServerSide(con); break; case 'v': op = "vacuum"; diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl index c441626d7c..1845869016 100644 --- a/src/bin/pgbench/t/001_pgbench_with_server.pl +++ b/src/bin/pgbench/t/001_pgbench_with_server.pl @@ -130,7 +130,7 @@ pgbench( # Test interaction of --init-steps with legacy step-selection options pgbench( - '--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3', + '--initialize --init-steps=dtpvGvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3', 0, [qr{^$}], [ @@ -138,7 +138,7 @@ pgbench( qr{creating tables}, qr{creating 3 partitions}, qr{creating primary keys}, - qr{.* of .* tuples \(.*\) done}, + qr{generating data \(server-side\)}, qr{creating foreign keys}, qr{(?!vacuuming)}, # no vacuum qr{done in \d+\.\d\d s } diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl index 1e9542af3f..8b6d442812 100644 --- a/src/bin/pgbench/t/002_pgbench_no_server.pl +++ b/src/bin/pgbench/t/002_pgbench_no_server.pl @@ -147,7 +147,7 @@ my @options = ( [ 'invalid init step', '-i -I dta', - [ qr{unrecognized initialization step}, qr{allowed steps are} ] + [ qr{unrecognized initialization step}, qr{Allowed step characters are} ] ], [ 'bad random seed',