Speed up pg_regress server readiness testing.

Instead of connecting to the server with psql to check if it is ready
for running tests, this changes pg_regress to use PQPing which avoids
performing system() calls which are expensive on some platforms, like
Windows. The frequency of tests is also increased in order to connect
to the server faster.

This patch is part of a larger effort to make testing consume fewer
resources in order to be able to fit more tests into the available
CI system constraints.

Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/20230823192239.jxew5s3sjru63lio@awork3.anarazel.de
This commit is contained in:
Daniel Gustafsson 2023-10-24 21:37:35 +02:00
parent 387f9ed0a0
commit 66d6086cbc
7 changed files with 59 additions and 32 deletions

View File

@ -10,6 +10,7 @@ include $(top_builddir)/src/Makefile.global
override CPPFLAGS := \
'-I$(top_builddir)/src/port' \
'-I$(top_srcdir)/src/test/regress' \
'-I$(libpq_srcdir)' \
'-DHOST_TUPLE="$(host_tuple)"' \
'-DSHELLPROG="$(SHELL)"' \
$(CPPFLAGS)
@ -45,7 +46,7 @@ clean distclean maintainer-clean:
all: pg_regress$(X)
pg_regress$(X): pg_regress_ecpg.o $(WIN32RES) $(top_builddir)/src/test/regress/pg_regress.o
$(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@
$(CC) $(CFLAGS) $^ $(libpq_pgport) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@
$(top_builddir)/src/test/regress/pg_regress.o:
$(MAKE) -C $(dir $@) $(notdir $@)

View File

@ -18,7 +18,7 @@ pg_regress_ecpg = executable('pg_regress_ecpg',
pg_regress_ecpg_sources,
c_args: pg_regress_cflags,
include_directories: [pg_regress_inc, include_directories('.')],
dependencies: [frontend_code],
dependencies: [frontend_code, libpq],
kwargs: default_bin_args + {
'install': false
},

View File

@ -38,7 +38,7 @@ pg_regress.o: | submake-regress
rm -f $@ && $(LN_S) $(top_builddir)/src/test/regress/pg_regress.o .
pg_isolation_regress$(X): isolation_main.o pg_regress.o $(WIN32RES)
$(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@
$(CC) $(CFLAGS) $^ $(libpq_pgport) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@
isolationtester$(X): $(OBJS) | submake-libpq submake-libpgport
$(CC) $(CFLAGS) $^ $(libpq_pgport) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@

View File

@ -35,7 +35,7 @@ pg_isolation_regress = executable('pg_isolation_regress',
isolation_sources,
c_args: pg_regress_cflags,
include_directories: pg_regress_inc,
dependencies: frontend_code,
dependencies: [frontend_code, libpq],
kwargs: default_bin_args + {
'install_dir': dir_pgxs / 'src/test/isolation',
},

View File

@ -36,11 +36,11 @@ EXTRADEFS = '-DHOST_TUPLE="$(host_tuple)"' \
all: pg_regress$(X)
pg_regress$(X): pg_regress.o pg_regress_main.o $(WIN32RES) | submake-libpgport
$(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@
$(CC) $(CFLAGS) $^ $(libpq_pgport) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@
# dependencies ensure that path changes propagate
pg_regress.o: pg_regress.c $(top_builddir)/src/port/pg_config_paths.h
pg_regress.o: override CPPFLAGS += -I$(top_builddir)/src/port $(EXTRADEFS)
pg_regress.o: override CPPFLAGS += -I$(top_builddir)/src/port -I$(libpq_srcdir) $(EXTRADEFS)
# note: because of the submake dependency, this rule's action is really a no-op
$(top_builddir)/src/port/pg_config_paths.h: | submake-libpgport

View File

@ -30,7 +30,7 @@ endif
pg_regress = executable('pg_regress',
regress_sources,
c_args: pg_regress_cflags,
dependencies: [frontend_code],
dependencies: [frontend_code, libpq],
kwargs: default_bin_args + {
'install_dir': dir_pgxs / 'src/test/regress',
},

View File

@ -32,6 +32,7 @@
#include "common/username.h"
#include "getopt_long.h"
#include "lib/stringinfo.h"
#include "libpq-fe.h"
#include "libpq/pqcomm.h" /* needed for UNIXSOCK_PATH() */
#include "pg_config_paths.h"
#include "pg_regress.h"
@ -75,6 +76,12 @@ const char *pretty_diff_opts = "-w -U3";
*/
#define TESTNAME_WIDTH 36
/*
* The number times per second that pg_regress checks to see if the test
* instance server has started and is available for connection.
*/
#define WAIT_TICKS_PER_SECOND 20
typedef enum TAPtype
{
DIAG = 0,
@ -107,6 +114,7 @@ static bool nolocale = false;
static bool use_existing = false;
static char *hostname = NULL;
static int port = -1;
static char portstr[16];
static bool port_specified_by_user = false;
static char *dlpath = PKGLIBDIR;
static char *user = NULL;
@ -2107,7 +2115,6 @@ regression_main(int argc, char *argv[],
int i;
int option_index;
char buf[MAXPGPATH * 4];
char buf2[MAXPGPATH * 4];
pg_logging_init(argv[0]);
progname = get_progname(argv[0]);
@ -2296,6 +2303,9 @@ regression_main(int argc, char *argv[],
const char *env_wait;
int wait_seconds;
const char *initdb_template_dir;
const char *keywords[4];
const char *values[4];
PGPing rv;
/*
* Prepare the temp instance
@ -2435,22 +2445,29 @@ regression_main(int argc, char *argv[],
}
#endif
/*
* Prepare the connection params for checking the state of the server
* before starting the tests.
*/
sprintf(portstr, "%d", port);
keywords[0] = "dbname";
values[0] = "postgres";
keywords[1] = "port";
values[1] = portstr;
keywords[2] = "host";
values[2] = hostname ? hostname : sockdir;
keywords[3] = NULL;
values[3] = NULL;
/*
* Check if there is a postmaster running already.
*/
snprintf(buf2, sizeof(buf2),
"\"%s%spsql\" -X postgres <%s 2>%s",
bindir ? bindir : "",
bindir ? "/" : "",
DEVNULL, DEVNULL);
for (i = 0; i < 16; i++)
{
fflush(NULL);
if (system(buf2) == 0)
{
char s[16];
rv = PQpingParams(keywords, values, 1);
if (rv == PQPING_OK)
{
if (port_specified_by_user || i == 15)
{
note("port %d apparently in use", port);
@ -2461,8 +2478,8 @@ regression_main(int argc, char *argv[],
note("port %d apparently in use, trying %d", port, port + 1);
port++;
sprintf(s, "%d", port);
setenv("PGPORT", s, 1);
sprintf(portstr, "%d", port);
setenv("PGPORT", portstr, 1);
}
else
break;
@ -2485,11 +2502,11 @@ regression_main(int argc, char *argv[],
bail("could not spawn postmaster: %s", strerror(errno));
/*
* Wait till postmaster is able to accept connections; normally this
* is only a second or so, but Cygwin is reportedly *much* slower, and
* test builds using Valgrind or similar tools might be too. Hence,
* allow the default timeout of 60 seconds to be overridden from the
* PGCTLTIMEOUT environment variable.
* Wait till postmaster is able to accept connections; normally takes
* only a fraction of a second or so, but Cygwin is reportedly *much*
* slower, and test builds using Valgrind or similar tools might be
* too. Hence, allow the default timeout of 60 seconds to be
* overridden from the PGCTLTIMEOUT environment variable.
*/
env_wait = getenv("PGCTLTIMEOUT");
if (env_wait != NULL)
@ -2501,13 +2518,24 @@ regression_main(int argc, char *argv[],
else
wait_seconds = 60;
for (i = 0; i < wait_seconds; i++)
for (i = 0; i < wait_seconds * WAIT_TICKS_PER_SECOND; i++)
{
/* Done if psql succeeds */
fflush(NULL);
if (system(buf2) == 0)
/*
* It's fairly unlikely that the server is responding immediately
* so we start with sleeping before checking instead of the other
* way around.
*/
pg_usleep(1000000L / WAIT_TICKS_PER_SECOND);
rv = PQpingParams(keywords, values, 1);
/* Done if the server is running and accepts connections */
if (rv == PQPING_OK)
break;
if (rv == PQPING_NO_ATTEMPT)
bail("attempting to connect to postmaster failed");
/*
* Fail immediately if postmaster has exited
*/
@ -2520,10 +2548,8 @@ regression_main(int argc, char *argv[],
bail("postmaster failed, examine \"%s/log/postmaster.log\" for the reason",
outputdir);
}
pg_usleep(1000000L);
}
if (i >= wait_seconds)
if (i >= wait_seconds * WAIT_TICKS_PER_SECOND)
{
diag("postmaster did not respond within %d seconds, examine \"%s/log/postmaster.log\" for the reason",
wait_seconds, outputdir);