From db96e1ccfcb8f3514eef931bea9435974ec7ad28 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 27 Nov 2010 01:30:34 -0500 Subject: [PATCH] Rewrite PQping to be more like what we agreed to last week. Basically, we want to distinguish all cases where the connection was not made from those where it was. A convenient proxy for this is to see if we got a message with a SQLSTATE code back from the postmaster. This presumes that the postmaster will always send us a SQLSTATE in a failure message, which is true for 7.4 and later postmasters in every case except fork failure. (We could possibly complicate the postmaster code to do something about that, but it seems not worth the trouble, especially since pg_ctl's response for that case should be to keep waiting anyway.) If we did get a SQLSTATE from the postmaster, there are basically only two cases, as per last week's discussion: ERRCODE_CANNOT_CONNECT_NOW and everything else. Any other error code implies that the postmaster is in principle willing to accept connections, it just didn't like or couldn't handle this particular request. We want to make a special case for ERRCODE_CANNOT_CONNECT_NOW so that "pg_ctl start -w" knows it should keep waiting. In passing, pick names for the enum constants that are a tad less likely to present collision hazards in future. --- doc/src/sgml/libpq.sgml | 152 +++++++++++++++------------- src/bin/pg_ctl/pg_ctl.c | 93 +++++++++-------- src/interfaces/libpq/fe-connect.c | 82 +++++++++++---- src/interfaces/libpq/fe-protocol3.c | 10 +- src/interfaces/libpq/libpq-fe.h | 7 +- src/interfaces/libpq/libpq-int.h | 1 + 6 files changed, 210 insertions(+), 135 deletions(-) diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml index d2a4811315..18199b06ce 100644 --- a/doc/src/sgml/libpq.sgml +++ b/doc/src/sgml/libpq.sgml @@ -1060,6 +1060,90 @@ PostgresPollingStatusType PQresetPoll(PGconn *conn); + + PQpingParamsPQpingParams + + + PQpingParams reports the status of the + server. It accepts connection parameters identical to those of + PQconnectdbParams, described above. It is not, however, + necessary to supply correct user name, password, or database name + values to obtain the server status. + + +PGPing PQpingParams(const char **keywords, const char **values, int expand_dbname); + + + The function returns one of the following values: + + + + PQPING_OK + + + The server is running and appears to be accepting connections. + + + + + + PQPING_REJECT + + + The server is running but is in a state that disallows connections + (startup, shutdown, or crash recovery). + + + + + + PQPING_NO_RESPONSE + + + The server could not be contacted. + + + + + + PQPING_NO_ATTEMPT + + + No attempt was made to contact the server, because the supplied + parameters were incorrect or there was some client-side problem + (for example, out of memory). + + + + + + + + + + + + PQpingPQping + + + PQping reports the status of the + server. It accepts connection parameters identical to those of + PQconnectdb, described above. It is not, however, + necessary to supply correct user name, password, or database name + values to obtain the server status. + + +PGPing PQping(const char *conninfo); + + + + + The return values are the same as for PQpingParams. + + + + + @@ -1511,74 +1595,6 @@ int PQbackendPID(const PGconn *conn); - - PQpingParamsPQpingParams - - - PQpingParams indicates the status of the - server. The currently recognized parameter key words are the - same as PQconnectParams. - - -PGPing PQpingParams(const char **keywords, const char **values, int expand_dbname); - - - It returns one of the following values: - - - - PQACCESS - - - The server is running and allows access. - - - - - - PQREJECT - - - The server is running but rejected a connection request. - - - - - - PQNORESPONSE - - - The server did not respond. - - - - - - - - - - - - PQpingPQping - - - Returns the status of the server. - - -PGPing PQping(const char *conninfo); - - - - - This function uses the same conninfo parameter - key words as PQconnectdb. It returns the same - values as PQpingParams above. - - - - - PQconnectionNeedsPasswordPQconnectionNeedsPassword diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c index dc30f5de9f..3cf2afcc27 100644 --- a/src/bin/pg_ctl/pg_ctl.c +++ b/src/bin/pg_ctl/pg_ctl.c @@ -397,20 +397,21 @@ start_postmaster(void) /* * Find the pgport and try a connection + * * Note that the checkpoint parameter enables a Windows service control * manager checkpoint, it's got nothing to do with database checkpoints!! */ static PGPing test_postmaster_connection(bool do_checkpoint) { - PGPing ret = PQACCESS; /* assume success for zero wait */ + PGPing ret = PQPING_OK; /* assume success for wait == zero */ int i; char portstr[32]; char *p; char *q; char connstr[128]; /* Should be way more than enough! */ - *portstr = '\0'; + portstr[0] = '\0'; /* * Look in post_opts for a -p switch. @@ -453,7 +454,7 @@ test_postmaster_connection(bool do_checkpoint) * This parsing code isn't amazingly bright either, but it should be okay * for valid port settings. */ - if (!*portstr) + if (!portstr[0]) { char **optlines; @@ -491,11 +492,11 @@ test_postmaster_connection(bool do_checkpoint) } /* Check environment */ - if (!*portstr && getenv("PGPORT") != NULL) + if (!portstr[0] && getenv("PGPORT") != NULL) strlcpy(portstr, getenv("PGPORT"), sizeof(portstr)); /* Else use compiled-in default */ - if (!*portstr) + if (!portstr[0]) snprintf(portstr, sizeof(portstr), "%d", DEF_PGPORT); /* @@ -507,34 +508,32 @@ test_postmaster_connection(bool do_checkpoint) for (i = 0; i < wait_seconds; i++) { - if ((ret = PQping(connstr)) != PQNORESPONSE) - return ret; - else - { + ret = PQping(connstr); + if (ret == PQPING_OK || ret == PQPING_NO_ATTEMPT) + break; + /* No response, or startup still in process; wait */ #if defined(WIN32) - if (do_checkpoint) - { - /* - * Increment the wait hint by 6 secs (connection timeout + - * sleep) We must do this to indicate to the SCM that our - * startup time is changing, otherwise it'll usually send a - * stop signal after 20 seconds, despite incrementing the - * checkpoint counter. + if (do_checkpoint) + { + /* + * Increment the wait hint by 6 secs (connection timeout + + * sleep) We must do this to indicate to the SCM that our + * startup time is changing, otherwise it'll usually send a + * stop signal after 20 seconds, despite incrementing the + * checkpoint counter. */ - status.dwWaitHint += 6000; - status.dwCheckPoint++; - SetServiceStatus(hStatus, (LPSERVICE_STATUS) &status); - } - - else -#endif - print_msg("."); - - pg_usleep(1000000); /* 1 sec */ + status.dwWaitHint += 6000; + status.dwCheckPoint++; + SetServiceStatus(hStatus, (LPSERVICE_STATUS) &status); } + else +#endif + print_msg("."); + + pg_usleep(1000000); /* 1 sec */ } - /* value of last call to PQping */ + /* return result of last call to PQping */ return ret; } @@ -738,24 +737,30 @@ do_start(void) if (do_wait) { - int status; - print_msg(_("waiting for server to start...")); - if ((status = test_postmaster_connection(false)) == PQNORESPONSE) + switch (test_postmaster_connection(false)) { - write_stderr(_("%s: could not start server\n" - "Examine the log output.\n"), - progname); - exit(1); - } - else - { - print_msg(_(" done\n")); - print_msg(_("server started\n")); - if (status == PQREJECT) - write_stderr(_("warning: could not connect; might be caused by invalid authentication or\n" - "misconfiguration.\n")); + case PQPING_OK: + print_msg(_(" done\n")); + print_msg(_("server started\n")); + break; + case PQPING_REJECT: + print_msg(_(" stopped waiting\n")); + print_msg(_("server is still starting up\n")); + break; + case PQPING_NO_RESPONSE: + print_msg(_(" stopped waiting\n")); + write_stderr(_("%s: could not start server\n" + "Examine the log output.\n"), + progname); + exit(1); + break; + case PQPING_NO_ATTEMPT: + print_msg(_(" failed\n")); + write_stderr(_("%s: could not wait for server because of misconfiguration\n"), + progname); + exit(1); } } else @@ -1289,7 +1294,7 @@ pgwin32_ServiceMain(DWORD argc, LPTSTR *argv) if (do_wait) { write_eventlog(EVENTLOG_INFORMATION_TYPE, _("Waiting for server startup...\n")); - if (test_postmaster_connection(true) == false) + if (test_postmaster_connection(true) != PQPING_OK) { write_eventlog(EVENTLOG_INFORMATION_TYPE, _("Timed out waiting for server startup\n")); pgwin32_SetServiceStatus(SERVICE_STOPPED); diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c index 43969ebbd1..bff9b478f6 100644 --- a/src/interfaces/libpq/fe-connect.c +++ b/src/interfaces/libpq/fe-connect.c @@ -99,6 +99,8 @@ static int ldapServiceLookup(const char *purl, PQconninfoOption *options, /* This is part of the protocol so just define it */ #define ERRCODE_INVALID_PASSWORD "28P01" +/* This too */ +#define ERRCODE_CANNOT_CONNECT_NOW "57P03" /* * fall back options if they are not specified by arguments or defined @@ -376,10 +378,15 @@ PQconnectdbParams(const char **keywords, } +/* + * PQpingParams + * + * check server status, accepting parameters identical to PQconnectdbParams + */ PGPing PQpingParams(const char **keywords, - const char **values, - int expand_dbname) + const char **values, + int expand_dbname) { PGconn *conn = PQconnectStartParams(keywords, values, expand_dbname); PGPing ret; @@ -423,6 +430,11 @@ PQconnectdb(const char *conninfo) return conn; } +/* + * PQping + * + * check server status, accepting parameters identical to PQconnectdb + */ PGPing PQping(const char *conninfo) { @@ -1261,6 +1273,7 @@ connectDBStart(PGconn *conn) appendPQExpBuffer(&conn->errorMessage, libpq_gettext("invalid port number: \"%s\"\n"), conn->pgport); + conn->options_valid = false; goto connect_errReturn; } } @@ -1309,6 +1322,7 @@ connectDBStart(PGconn *conn) portstr, gai_strerror(ret)); if (addrs) pg_freeaddrinfo_all(hint.ai_family, addrs); + conn->options_valid = false; goto connect_errReturn; } @@ -2555,27 +2569,61 @@ error_return: /* * internal_ping - * Determine if a server is running and if we can connect to it. + * Determine if a server is running and if we can connect to it. + * + * The argument is a connection that's been started, but not completed. */ PGPing internal_ping(PGconn *conn) { - if (conn && conn->status != CONNECTION_BAD) - { + /* Say "no attempt" if we never got to PQconnectPoll */ + if (!conn || !conn->options_valid) + return PQPING_NO_ATTEMPT; + + /* Attempt to complete the connection */ + if (conn->status != CONNECTION_BAD) (void) connectDBComplete(conn); - /* - * If the connection needs a password, we can consider the - * server as accepting connections. - */ - if (conn && (conn->status != CONNECTION_BAD || - PQconnectionNeedsPassword(conn))) - return PQACCESS; - else - return PQREJECT; - } - else - return PQNORESPONSE; + /* Definitely OK if we succeeded */ + if (conn->status != CONNECTION_BAD) + return PQPING_OK; + + /* + * Here is the interesting part of "ping": determine the cause of the + * failure in sufficient detail to decide what to return. We do not want + * to report that the server is not up just because we didn't have a valid + * password, for example. + * + * If we failed to get any ERROR response from the postmaster, report + * PQPING_NO_RESPONSE. This result could be somewhat misleading for a + * pre-7.4 server, since it won't send back a SQLSTATE, but those are long + * out of support. Another corner case where the server could return a + * failure without a SQLSTATE is fork failure, but NO_RESPONSE isn't + * totally unreasonable for that anyway. We expect that every other + * failure case in a modern server will produce a report with a SQLSTATE. + * + * NOTE: whenever we get around to making libpq generate SQLSTATEs for + * client-side errors, we should either not store those into + * last_sqlstate, or add an extra flag so we can tell client-side errors + * apart from server-side ones. + */ + if (strlen(conn->last_sqlstate) != 5) + return PQPING_NO_RESPONSE; + + /* + * Report PQPING_REJECT if server says it's not accepting connections. + * (We distinguish this case mainly for the convenience of pg_ctl.) + */ + if (strcmp(conn->last_sqlstate, ERRCODE_CANNOT_CONNECT_NOW) == 0) + return PQPING_REJECT; + + /* + * Any other SQLSTATE can be taken to indicate that the server is up. + * Presumably it didn't like our username, password, or database name; or + * perhaps it had some transient failure, but that should not be taken as + * meaning "it's down". + */ + return PQPING_OK; } diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c index b8e4bee31b..cf9407de7a 100644 --- a/src/interfaces/libpq/fe-protocol3.c +++ b/src/interfaces/libpq/fe-protocol3.c @@ -758,15 +758,19 @@ pqGetErrorNotice3(PGconn *conn, bool isError) /* * Now build the "overall" error message for PQresultErrorMessage. + * + * Also, save the SQLSTATE in conn->last_sqlstate. */ resetPQExpBuffer(&workBuf); val = PQresultErrorField(res, PG_DIAG_SEVERITY); if (val) appendPQExpBuffer(&workBuf, "%s: ", val); - if (conn->verbosity == PQERRORS_VERBOSE) + val = PQresultErrorField(res, PG_DIAG_SQLSTATE); + if (val) { - val = PQresultErrorField(res, PG_DIAG_SQLSTATE); - if (val) + if (strlen(val) < sizeof(conn->last_sqlstate)) + strcpy(conn->last_sqlstate, val); + if (conn->verbosity == PQERRORS_VERBOSE) appendPQExpBuffer(&workBuf, "%s: ", val); } val = PQresultErrorField(res, PG_DIAG_MESSAGE_PRIMARY); diff --git a/src/interfaces/libpq/libpq-fe.h b/src/interfaces/libpq/libpq-fe.h index d1a6dd413a..d9e3067894 100644 --- a/src/interfaces/libpq/libpq-fe.h +++ b/src/interfaces/libpq/libpq-fe.h @@ -109,9 +109,10 @@ typedef enum typedef enum { - PQACCESS, /* connected to server */ - PQREJECT, /* server rejected access */ - PQNORESPONSE /* server did not respond */ + PQPING_OK, /* server is accepting connections */ + PQPING_REJECT, /* server is alive but rejecting connections */ + PQPING_NO_RESPONSE, /* could not establish connection */ + PQPING_NO_ATTEMPT /* connection not attempted (bad params) */ } PGPing; /* PGconn encapsulates a connection to the backend. diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h index 5dc7a122f3..f6a7f39065 100644 --- a/src/interfaces/libpq/libpq-int.h +++ b/src/interfaces/libpq/libpq-int.h @@ -333,6 +333,7 @@ struct pg_conn PGTransactionStatusType xactStatus; /* never changes to ACTIVE */ PGQueryClass queryclass; char *last_query; /* last SQL command, or NULL if unknown */ + char last_sqlstate[6]; /* last reported SQLSTATE */ bool options_valid; /* true if OK to attempt connection */ bool nonblocking; /* whether this connection is using nonblock * sending semantics */