diff --git a/.cirrus.yml b/.cirrus.yml index 505c50f328..04786174ed 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -25,7 +25,7 @@ env: MTEST_ARGS: --print-errorlogs --no-rebuild -C build PGCTLTIMEOUT: 120 # avoids spurious failures during parallel tests TEMP_CONFIG: ${CIRRUS_WORKING_DIR}/src/tools/ci/pg_ci_base.conf - PG_TEST_EXTRA: kerberos ldap ssl + PG_TEST_EXTRA: kerberos ldap ssl load_balance # What files to preserve in case tests fail @@ -313,6 +313,14 @@ task: mkdir -m 770 /tmp/cores chown root:postgres /tmp/cores sysctl kernel.core_pattern='/tmp/cores/%e-%s-%p.core' + + setup_hosts_file_script: | + cat >> /etc/hosts <<-EOF + 127.0.0.1 pg-loadbalancetest + 127.0.0.2 pg-loadbalancetest + 127.0.0.3 pg-loadbalancetest + EOF + setup_additional_packages_script: | #apt-get update #DEBIAN_FRONTEND=noninteractive apt-get -y install ... @@ -564,6 +572,12 @@ task: setup_additional_packages_script: | REM choco install -y --no-progress ... + setup_hosts_file_script: | + echo 127.0.0.1 pg-loadbalancetest >> c:\Windows\System32\Drivers\etc\hosts + echo 127.0.0.2 pg-loadbalancetest >> c:\Windows\System32\Drivers\etc\hosts + echo 127.0.0.3 pg-loadbalancetest >> c:\Windows\System32\Drivers\etc\hosts + type c:\Windows\System32\Drivers\etc\hosts + # Use /DEBUG:FASTLINK to avoid high memory usage during linking configure_script: | vcvarsall x64 diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml index 8579dcac95..9f72dd29d8 100644 --- a/doc/src/sgml/libpq.sgml +++ b/doc/src/sgml/libpq.sgml @@ -2115,6 +2115,67 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname + + + load_balance_hosts + + + Controls the order in which the client tries to connect to the available + hosts and addresses. Once a connection attempt is successful no other + hosts and addresses will be tried. This parameter is typically used in + combination with multiple host names or a DNS record that returns + multiple IPs. This parameter can be used in combination with + + to, for example, load balance over standby servers only. Once successfully + connected, subsequent queries on the returned connection will all be + sent to the same server. There are currently two modes: + + + disable (default) + + + No load balancing across hosts is performed. Hosts are tried in + the order in which they are provided and addresses are tried in + the order they are received from DNS or a hosts file. + + + + + + random + + + Hosts and addresses are tried in random order. This value is mostly + useful when opening multiple connections at the same time, possibly + from different machines. This way connections can be load balanced + across multiple PostgreSQL servers. + + + While random load balancing, due to its random nature, will almost + never result in a completely uniform distribution, it statistically + gets quite close. One important aspect here is that this algorithm + uses two levels of random choices: First the hosts + will be resolved in random order. Then secondly, before resolving + the next host, all resolved addresses for the current host will be + tried in random order. This behaviour can skew the amount of + connections each node gets greatly in certain cases, for instance + when some hosts resolve to more addresses than others. But such a + skew can also be used on purpose, e.g. to increase the number of + connections a larger server gets by providing its hostname multiple + times in the host string. + + + When using this value it's recommended to also configure a reasonable + value for . Because then, + if one of the nodes that are used for load balancing is not responding, + a new node will be tried. + + + + + + + diff --git a/doc/src/sgml/regress.sgml b/doc/src/sgml/regress.sgml index 719e0a7698..8a0b384dec 100644 --- a/doc/src/sgml/regress.sgml +++ b/doc/src/sgml/regress.sgml @@ -256,7 +256,7 @@ make check-world -j8 >/dev/null PG_TEST_EXTRA to a whitespace-separated list, for example: -make check-world PG_TEST_EXTRA='kerberos ldap ssl' +make check-world PG_TEST_EXTRA='kerberos ldap ssl load_balance' The following values are currently supported: @@ -290,6 +290,17 @@ make check-world PG_TEST_EXTRA='kerberos ldap ssl' + + load_balance + + + Runs the test src/interfaces/libpq/t/004_load_balance_dns.pl. + This requires editing the system hosts file and + opens TCP/IP listen sockets. + + + + wal_consistency_checking diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c index 4e798e1672..a13ec16b32 100644 --- a/src/interfaces/libpq/fe-connect.c +++ b/src/interfaces/libpq/fe-connect.c @@ -123,6 +123,7 @@ static int ldapServiceLookup(const char *purl, PQconninfoOption *options, #define DefaultChannelBinding "disable" #endif #define DefaultTargetSessionAttrs "any" +#define DefaultLoadBalanceHosts "disable" #ifdef USE_SSL #define DefaultSSLMode "prefer" #define DefaultSSLCertMode "allow" @@ -351,6 +352,11 @@ static const internalPQconninfoOption PQconninfoOptions[] = { "Target-Session-Attrs", "", 15, /* sizeof("prefer-standby") = 15 */ offsetof(struct pg_conn, target_session_attrs)}, + {"load_balance_hosts", "PGLOADBALANCEHOSTS", + DefaultLoadBalanceHosts, NULL, + "Load-Balance-Hosts", "", 8, /* sizeof("disable") = 8 */ + offsetof(struct pg_conn, load_balance_hosts)}, + /* Terminating entry --- MUST BE LAST */ {NULL, NULL, NULL, NULL, NULL, NULL, 0} @@ -435,6 +441,8 @@ static void pgpassfileWarning(PGconn *conn); static void default_threadlock(int acquire); static bool sslVerifyProtocolVersion(const char *version); static bool sslVerifyProtocolRange(const char *min, const char *max); +static bool parse_int_param(const char *value, int *result, PGconn *conn, + const char *context); /* global variable because fe-auth.c needs to access it */ @@ -1020,6 +1028,31 @@ parse_comma_separated_list(char **startptr, bool *more) return p; } +/* + * Initializes the prng_state field of the connection. We want something + * unpredictable, so if possible, use high-quality random bits for the + * seed. Otherwise, fall back to a seed based on the connection address, + * timestamp and PID. + */ +static void +libpq_prng_init(PGconn *conn) +{ + uint64 rseed; + struct timeval tval = {0}; + + if (pg_prng_strong_seed(&conn->prng_state)) + return; + + gettimeofday(&tval, NULL); + + rseed = ((uint64) conn) ^ + ((uint64) getpid()) ^ + ((uint64) tval.tv_usec) ^ + ((uint64) tval.tv_sec); + + pg_prng_seed(&conn->prng_state, rseed); +} + /* * connectOptions2 * @@ -1619,6 +1652,49 @@ connectOptions2(PGconn *conn) else conn->target_server_type = SERVER_TYPE_ANY; + /* + * validate load_balance_hosts option, and set load_balance_type + */ + if (conn->load_balance_hosts) + { + if (strcmp(conn->load_balance_hosts, "disable") == 0) + conn->load_balance_type = LOAD_BALANCE_DISABLE; + else if (strcmp(conn->load_balance_hosts, "random") == 0) + conn->load_balance_type = LOAD_BALANCE_RANDOM; + else + { + conn->status = CONNECTION_BAD; + libpq_append_conn_error(conn, "invalid %s value: \"%s\"", + "load_balance_hosts", + conn->load_balance_hosts); + return false; + } + } + else + conn->load_balance_type = LOAD_BALANCE_DISABLE; + + if (conn->load_balance_type == LOAD_BALANCE_RANDOM) + { + libpq_prng_init(conn); + + /* + * This is the "inside-out" variant of the Fisher-Yates shuffle + * algorithm. Notionally, we append each new value to the array and + * then swap it with a randomly-chosen array element (possibly + * including itself, else we fail to generate permutations with the + * last integer last). The swap step can be optimized by combining it + * with the insertion. + */ + for (i = 1; i < conn->nconnhost; i++) + { + int j = pg_prng_uint64_range(&conn->prng_state, 0, i); + pg_conn_host temp = conn->connhost[j]; + + conn->connhost[j] = conn->connhost[i]; + conn->connhost[i] = temp; + } + } + /* * Resolve special "auto" client_encoding from the locale */ @@ -2626,6 +2702,32 @@ keep_going: /* We will come back to here until there is if (ret) goto error_return; /* message already logged */ + /* + * If random load balancing is enabled we shuffle the addresses. + */ + if (conn->load_balance_type == LOAD_BALANCE_RANDOM) + { + /* + * This is the "inside-out" variant of the Fisher-Yates shuffle + * algorithm. Notionally, we append each new value to the array + * and then swap it with a randomly-chosen array element (possibly + * including itself, else we fail to generate permutations with + * the last integer last). The swap step can be optimized by + * combining it with the insertion. + * + * We don't need to initialize conn->prng_state here, because that + * already happened in connectOptions2. + */ + for (int i = 1; i < conn->naddr; i++) + { + int j = pg_prng_uint64_range(&conn->prng_state, 0, i); + AddrInfo temp = conn->addr[j]; + + conn->addr[j] = conn->addr[i]; + conn->addr[i] = temp; + } + } + reset_connection_state_machine = true; conn->try_next_host = false; } @@ -4320,6 +4422,7 @@ freePGconn(PGconn *conn) free(conn->outBuffer); free(conn->rowBuf); free(conn->target_session_attrs); + free(conn->load_balance_hosts); termPQExpBuffer(&conn->errorMessage); termPQExpBuffer(&conn->workBuffer); diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h index 7d09147525..d93e976ca5 100644 --- a/src/interfaces/libpq/libpq-int.h +++ b/src/interfaces/libpq/libpq-int.h @@ -26,7 +26,8 @@ #include #include #include -#ifndef WIN32 +/* MinGW has sys/time.h, but MSVC doesn't */ +#ifndef _MSC_VER #include #endif @@ -82,6 +83,8 @@ typedef struct #endif #endif /* USE_OPENSSL */ +#include "common/pg_prng.h" + /* * POSTGRES backend dependent Constants. */ @@ -242,6 +245,13 @@ typedef enum SERVER_TYPE_PREFER_STANDBY_PASS2 /* second pass - behaves same as ANY */ } PGTargetServerType; +/* Target server type (decoded value of load_balance_hosts) */ +typedef enum +{ + LOAD_BALANCE_DISABLE = 0, /* Use the existing host order (default) */ + LOAD_BALANCE_RANDOM, /* Randomly shuffle the hosts */ +} PGLoadBalanceType; + /* Boolean value plus a not-known state, for GUCs we might have to fetch */ typedef enum { @@ -398,6 +408,7 @@ struct pg_conn char *ssl_max_protocol_version; /* maximum TLS protocol version */ char *target_session_attrs; /* desired session properties */ char *require_auth; /* name of the expected auth method */ + char *load_balance_hosts; /* load balance over hosts */ /* Optional file to write trace info to */ FILE *Pfdebug; @@ -469,6 +480,8 @@ struct pg_conn /* Transient state needed while establishing connection */ PGTargetServerType target_server_type; /* desired session properties */ + PGLoadBalanceType load_balance_type; /* desired load balancing + * algorithm */ bool try_next_addr; /* time to advance to next address/host? */ bool try_next_host; /* time to advance to next connhost[]? */ int naddr; /* number of addresses returned by getaddrinfo */ @@ -488,6 +501,8 @@ struct pg_conn PGVerbosity verbosity; /* error/notice message verbosity */ PGContextVisibility show_context; /* whether to show CONTEXT field */ PGlobjfuncs *lobjfuncs; /* private state for large-object access fns */ + pg_prng_state prng_state; /* prng state for load balancing connections */ + /* Buffer for data received from backend and not yet processed */ char *inBuffer; /* currently allocated buffer */ diff --git a/src/interfaces/libpq/meson.build b/src/interfaces/libpq/meson.build index 3cd0ddb494..80e6a15adf 100644 --- a/src/interfaces/libpq/meson.build +++ b/src/interfaces/libpq/meson.build @@ -116,6 +116,8 @@ tests += { 'tests': [ 't/001_uri.pl', 't/002_api.pl', + 't/003_load_balance_host_list.pl', + 't/004_load_balance_dns.pl', ], 'env': {'with_ssl': ssl_library}, }, diff --git a/src/interfaces/libpq/t/003_load_balance_host_list.pl b/src/interfaces/libpq/t/003_load_balance_host_list.pl new file mode 100644 index 0000000000..6963ef3849 --- /dev/null +++ b/src/interfaces/libpq/t/003_load_balance_host_list.pl @@ -0,0 +1,81 @@ +# Copyright (c) 2023, PostgreSQL Global Development Group +use strict; +use warnings; +use Config; +use PostgreSQL::Test::Utils; +use PostgreSQL::Test::Cluster; +use Test::More; + +# This tests load balancing across the list of different hosts in the host +# parameter of the connection string. + +# Cluster setup which is shared for testing both load balancing methods +my $node1 = PostgreSQL::Test::Cluster->new('node1'); +my $node2 = PostgreSQL::Test::Cluster->new('node2', own_host => 1); +my $node3 = PostgreSQL::Test::Cluster->new('node3', own_host => 1); + +# Create a data directory with initdb +$node1->init(); +$node2->init(); +$node3->init(); + +# Start the PostgreSQL server +$node1->start(); +$node2->start(); +$node3->start(); + +# Start the tests for load balancing method 1 +my $hostlist = $node1->host . ',' . $node2->host . ',' . $node3->host; +my $portlist = $node1->port . ',' . $node2->port . ',' . $node3->port; + +$node1->connect_fails( + "host=$hostlist port=$portlist load_balance_hosts=doesnotexist", + "load_balance_hosts doesn't accept unknown values", + expected_stderr => qr/invalid load_balance_hosts value: "doesnotexist"/); + +# load_balance_hosts=disable should always choose the first one. +$node1->connect_ok("host=$hostlist port=$portlist load_balance_hosts=disable", + "load_balance_hosts=disable connects to the first node", + sql => "SELECT 'connect2'", + log_like => [qr/statement: SELECT 'connect2'/]); + +# Statistically the following loop with load_balance_hosts=random will almost +# certainly connect at least once to each of the nodes. The chance of that not +# happening is so small that it's negligible: (2/3)^50 = 1.56832855e-9 +foreach my $i (1 .. 50) { + $node1->connect_ok("host=$hostlist port=$portlist load_balance_hosts=random", + "repeated connections with random load balancing", + sql => "SELECT 'connect1'"); +} + +my $node1_occurences = () = $node1->log_content() =~ /statement: SELECT 'connect1'/g; +my $node2_occurences = () = $node2->log_content() =~ /statement: SELECT 'connect1'/g; +my $node3_occurences = () = $node3->log_content() =~ /statement: SELECT 'connect1'/g; + +my $total_occurences = $node1_occurences + $node2_occurences + $node3_occurences; + +ok($node1_occurences > 1, "received at least one connection on node1"); +ok($node2_occurences > 1, "received at least one connection on node2"); +ok($node3_occurences > 1, "received at least one connection on node3"); +ok($total_occurences == 50, "received 50 connections across all nodes"); + +$node1->stop(); +$node2->stop(); + +# load_balance_hosts=disable should continue trying hosts until it finds a +# working one. +$node3->connect_ok("host=$hostlist port=$portlist load_balance_hosts=disable", + "load_balance_hosts=disable continues until it connects to the a working node", + sql => "SELECT 'connect3'", + log_like => [qr/statement: SELECT 'connect3'/]); + +# Also with load_balance_hosts=random we continue to the next nodes if previous +# ones are down. Connect a few times to make sure it's not just lucky. +foreach my $i (1 .. 5) { + $node3->connect_ok("host=$hostlist port=$portlist load_balance_hosts=random", + "load_balance_hosts=random continues until it connects to the a working node", + sql => "SELECT 'connect4'", + log_like => [qr/statement: SELECT 'connect4'/]); +} + +done_testing(); diff --git a/src/interfaces/libpq/t/004_load_balance_dns.pl b/src/interfaces/libpq/t/004_load_balance_dns.pl new file mode 100644 index 0000000000..f914916dd2 --- /dev/null +++ b/src/interfaces/libpq/t/004_load_balance_dns.pl @@ -0,0 +1,124 @@ +# Copyright (c) 2023, PostgreSQL Global Development Group +use strict; +use warnings; +use Config; +use PostgreSQL::Test::Utils; +use PostgreSQL::Test::Cluster; +use Test::More; + +if ($ENV{PG_TEST_EXTRA} !~ /\bload_balance\b/) +{ + plan skip_all => + 'Potentially unsafe test load_balance not enabled in PG_TEST_EXTRA'; +} + +# This tests loadbalancing based on a DNS entry that contains multiple records +# for different IPs. Since setting up a DNS server is more effort than we +# consider reasonable to run this test, this situation is instead immitated by +# using a hosts file where a single hostname maps to multiple different IP +# addresses. This test requires the adminstrator to add the following lines to +# the hosts file (if we detect that this hasn't happend we skip the test): +# +# 127.0.0.1 pg-loadbalancetest +# 127.0.0.2 pg-loadbalancetest +# 127.0.0.3 pg-loadbalancetest +# +# Windows or Linux are required to run this test because these OSes allow +# binding to 127.0.0.2 and 127.0.0.3 addresess by default, but other OSes +# don't. We need to bind to different IP addresses, so that we can use these +# different IP addresses in the hosts file. +# +# The hosts file needs to be prepared before running this test. We don't do it +# on the fly, because it requires root permissions to change the hosts file. In +# CI we set up the previously mentioned rules in the hosts file, so that this +# load balancing method is tested. + +# Cluster setup which is shared for testing both load balancing methods +my $can_bind_to_127_0_0_2 = $Config{osname} eq 'linux' || $PostgreSQL::Test::Utils::windows_os; + +# Checks for the requirements for testing load balancing method 2 +if (!$can_bind_to_127_0_0_2) { + plan skip_all => 'load_balance test only supported on Linux and Windows'; +} + +my $hosts_path; +if ($windows_os) { + $hosts_path = 'c:\Windows\System32\Drivers\etc\hosts'; +} +else +{ + $hosts_path = '/etc/hosts'; +} + +my $hosts_content = PostgreSQL::Test::Utils::slurp_file($hosts_path); + +my $hosts_count = () = $hosts_content =~ /127\.0\.0\.[1-3] pg-loadbalancetest/g; +if ($hosts_count != 3) { + # Host file is not prepared for this test + plan skip_all => "hosts file was not prepared for DNS load balance test" +} + +$PostgreSQL::Test::Cluster::use_tcp = 1; +$PostgreSQL::Test::Cluster::test_pghost = '127.0.0.1'; +my $port = PostgreSQL::Test::Cluster::get_free_port(); +my $node1 = PostgreSQL::Test::Cluster->new('node1', port => $port); +my $node2 = PostgreSQL::Test::Cluster->new('node2', port => $port, own_host => 1); +my $node3 = PostgreSQL::Test::Cluster->new('node3', port => $port, own_host => 1); + +# Create a data directory with initdb +$node1->init(); +$node2->init(); +$node3->init(); + +# Start the PostgreSQL server +$node1->start(); +$node2->start(); +$node3->start(); + +# load_balance_hosts=disable should always choose the first one. +$node1->connect_ok("host=pg-loadbalancetest port=$port load_balance_hosts=disable", + "load_balance_hosts=disable connects to the first node", + sql => "SELECT 'connect2'", + log_like => [qr/statement: SELECT 'connect2'/]); + + +# Statistically the following loop with load_balance_hosts=random will almost +# certainly connect at least once to each of the nodes. The chance of that not +# happening is so small that it's negligible: (2/3)^50 = 1.56832855e-9 +foreach my $i (1 .. 50) { + $node1->connect_ok("host=pg-loadbalancetest port=$port load_balance_hosts=random", + "repeated connections with random load balancing", + sql => "SELECT 'connect1'"); +} + +my $node1_occurences = () = $node1->log_content() =~ /statement: SELECT 'connect1'/g; +my $node2_occurences = () = $node2->log_content() =~ /statement: SELECT 'connect1'/g; +my $node3_occurences = () = $node3->log_content() =~ /statement: SELECT 'connect1'/g; + +my $total_occurences = $node1_occurences + $node2_occurences + $node3_occurences; + +ok($node1_occurences > 1, "received at least one connection on node1"); +ok($node2_occurences > 1, "received at least one connection on node2"); +ok($node3_occurences > 1, "received at least one connection on node3"); +ok($total_occurences == 50, "received 50 connections across all nodes"); + +$node1->stop(); +$node2->stop(); + +# load_balance_hosts=disable should continue trying hosts until it finds a +# working one. +$node3->connect_ok("host=pg-loadbalancetest port=$port load_balance_hosts=disable", + "load_balance_hosts=disable continues until it connects to the a working node", + sql => "SELECT 'connect3'", + log_like => [qr/statement: SELECT 'connect3'/]); + +# Also with load_balance_hosts=random we continue to the next nodes if previous +# ones are down. Connect a few times to make sure it's not just lucky. +foreach my $i (1 .. 5) { + $node3->connect_ok("host=pg-loadbalancetest port=$port load_balance_hosts=random", + "load_balance_hosts=random continues until it connects to the a working node", + sql => "SELECT 'connect4'", + log_like => [qr/statement: SELECT 'connect4'/]); +} + +done_testing(); diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm index 3e2a27fb71..a3aef8b5e9 100644 --- a/src/test/perl/PostgreSQL/Test/Cluster.pm +++ b/src/test/perl/PostgreSQL/Test/Cluster.pm @@ -2567,6 +2567,22 @@ sub issues_sql_like return; } +=pod + +=item $node->log_content() + +Returns the contents of log of the node + +=cut + +sub log_content +{ + my ($self) = @_; + return + PostgreSQL::Test::Utils::slurp_file($self->logfile); +} + + =pod =item $node->run_log(...) diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index d4f4987829..75a296920e 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1705,6 +1705,7 @@ PGFileType PGFunction PGLZ_HistEntry PGLZ_Strategy +PGLoadBalanceType PGMessageField PGModuleMagicFunction PGNoticeHooks