From 2d612abd4d330cd9d3f5a3f5936b20a6e837a69c Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 28 May 2012 22:44:34 +0300 Subject: [PATCH] libpq: URI parsing fixes Drop special handling of host component with slashes to mean Unix-domain socket. Specify it as separate parameter or using percent-encoding now. Allow omitting username, password, and port even if the corresponding designators are present in URI. Handle percent-encoding in query parameter keywords. Alex Shulgin some documentation improvements by myself --- doc/src/sgml/libpq.sgml | 219 ++++++++++++---------- src/interfaces/libpq/fe-connect.c | 248 +++++++++++-------------- src/interfaces/libpq/test/expected.out | 48 +++-- src/interfaces/libpq/test/regress.in | 16 +- 4 files changed, 272 insertions(+), 259 deletions(-) diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml index 8a820ac007..ef1d95ed20 100644 --- a/doc/src/sgml/libpq.sgml +++ b/doc/src/sgml/libpq.sgml @@ -711,6 +711,124 @@ PGPing PQping(const char *conninfo); + + Connection Strings + + + conninfo + + + + URI + + + + Several libpq functions parse a user-specified string to obtain + connection parameters. There are two accepted formats for these strings: + plain keyword = value strings + and RFC + 3986 URIs. + + + + Keyword/Value Connection Strings + + + In the first format, each parameter setting is in the form + keyword = value. Spaces around the equal sign are + optional. To write an empty value, or a value containing spaces, surround it + with single quotes, e.g., keyword = 'a value'. Single + quotes and backslashes within + the value must be escaped with a backslash, i.e., \' and + \\. + + + + Example: + +host=localhost port=5432 dbname=mydb connect_timeout=10 + + + + + The recognized parameter key words are listed in . + + + + + Connection URIs + + + The general form for a connection URI is: + +postgresql://[user[:password]@][netloc][:port][/dbname][?param1=value1&...] + + + + + The URI scheme designator can be either + postgresql:// or postgres://. Each + of the URI parts is optional. The following examples + illustrate valid URI syntax uses: + +postgresql:// +postgresql://localhost +postgresql://localhost:5433 +postgresql://localhost/mydb +postgresql://user@localhost +postgresql://user:secret@localhost +postgresql://other@localhost/otherdb?connect_timeout=10&application_name=myapp + + Components of the hierarchical part of the URI can also + be given as parameters. For example: + +postgresql:///mydb?host=localhost&port=5433 + + + + + Percent-encoding may be used to include symbols with special meaning in any + of the URI parts. + + + + Any connection parameters not corresponding to key words listed in are ignored and a warning message about them + is sent to stderr. + + + + For improved compatibility with JDBC connection URIs, + instances of parameter ssl=true are translated into + sslmode=require. + + + + The host part may be either hostname or an IP address. To specify an + IPv6 host address, enclose it in square brackets: + +postgresql://[2001:db8::1234]/database + + + + + The host component is interpreted as described for the parameter . In particular, a Unix-domain socket + connection is chosen if the host part is either empty or starts with a + slash, otherwise a TCP/IP connection is initiated. Note, however, that the + slash is a reserved character in the hierarchical part of the URI. So, to + specify a non-standard Unix-domain socket directory, either omit the host + specification in the URI and specify the host as a parameter, or + percent-encode the path in the host component of the URI: + +postgresql:///dbname?host=/var/lib/postgresql +postgresql://%2Fvar%2Flib%2Fpostgresql/dbname + + + + + Parameter Key Words @@ -1220,107 +1338,6 @@ PGPing PQping(const char *conninfo); - - - Connection Strings - - - conninfo - - - - URI - - - - Several libpq functions parse a user-specified string to obtain - connection parameters. There are two accepted formats for these strings: - plain keyword = value strings, and URIs. - - - - In the first format, each parameter setting is in the form - keyword = value. Spaces around the equal sign are - optional. To write an empty value, or a value containing spaces, surround it - with single quotes, e.g., keyword = 'a value'. Single - quotes and backslashes within - the value must be escaped with a backslash, i.e., \' and - \\. - - - - The currently recognized parameter key words are listed in - . - - - - The general form for connection URI is the - following: - -postgresql://[user[:password]@][unix-socket][:port[/dbname]][?param1=value1&...] -postgresql://[user[:password]@][net-location][:port][/dbname][?param1=value1&...] - - - - - The URI designator can be either - postgresql:// or postgres:// and - each of the URI parts is optional. The following - examples illustrate valid URI syntax uses: - -postgresql:// -postgresql://localhost -postgresql://localhost:5433 -postgresql://localhost/mydb -postgresql://user@localhost -postgresql://user:secret@localhost -postgresql://other@localhost/otherdb - - - - - Percent-encoding may be used to include a symbol with special meaning in - any of the URI parts. - - - - Additional connection parameters may optionally follow the base URI. - Any connection parameters not corresponding to key words listed - in are ignored and a warning message - about them is sent to stderr. - - - - For improved compatibility with JDBC connection URI - syntax, instances of parameter ssl=true are translated - into sslmode=require (see above.) - - - - The host part may be either hostname or an IP address. To specify an - IPv6 host address, enclose it in square brackets: - -postgresql://[2001:db8::1234]/database - - As a special case, a host part which starts with / is - treated as a local Unix socket directory to look for the connection - socket special file: - -postgresql:///path/to/pgsql/socket/dir - - The whole connection string up to the extra parameters designator - (?) or the port designator (:) is treated - as the absolute path to the socket directory - (/path/to/pgsql/socket/dir in this example.) To specify - a non-default database name in this case you can use either of the following - syntaxes: - -postgresql:///path/to/pgsql/socket/dir?dbname=otherdb -postgresql:///path/to/pgsql/socket/dir:5432/otherdb - - - - diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c index d0b2ea47cb..960811f233 100644 --- a/src/interfaces/libpq/fe-connect.c +++ b/src/interfaces/libpq/fe-connect.c @@ -4544,18 +4544,15 @@ conninfo_uri_parse(const char *uri, PQExpBuffer errorMessage, * options from the URI. * If not successful, returns false and fills errorMessage accordingly. * - * Parses the connection URI string in 'uri' according to the URI syntax: + * Parses the connection URI string in 'uri' according to the URI syntax (RFC + * 3986): * - * postgresql://[user[:pwd]@][unix-socket][:port[/dbname]][?param1=value1&...] - * postgresql://[user[:pwd]@][net-location][:port][/dbname][?param1=value1&...] + * postgresql://[user[:password]@][netloc][:port][/dbname][?param1=value1&...] * - * "net-location" is a hostname, an IPv4 address, or an IPv6 address surrounded - * by literal square brackets. To be recognized as a unix-domain socket, the - * value must start with a slash '/'. Note slight inconsistency in that dbname - * can always be specified after net-location, but after unix-socket it can only - * be specified if there is a port specification. + * where "netloc" is a hostname, an IPv4 address, or an IPv6 address surrounded + * by literal square brackets. * - * Any of those elements might be percent-encoded (%xy). + * Any of the URI parts might use percent-encoding (%xy). */ static bool conninfo_uri_parse_options(PQconninfoOption *options, const char *uri, @@ -4566,6 +4563,8 @@ conninfo_uri_parse_options(PQconninfoOption *options, const char *uri, char *buf = strdup(uri); /* need a modifiable copy of the input URI */ char *start = buf; char prevchar = '\0'; + char *user = NULL; + char *host = NULL; bool retval = false; if (buf == NULL) @@ -4593,8 +4592,6 @@ conninfo_uri_parse_options(PQconninfoOption *options, const char *uri, ++p; if (*p == '@') { - char *user; - /* * Found username/password designator, so URI should be of the form * "scheme://user[:password]@[netloc]". @@ -4609,14 +4606,8 @@ conninfo_uri_parse_options(PQconninfoOption *options, const char *uri, prevchar = *p; *p = '\0'; - if (!*user) - { - printfPQExpBuffer(errorMessage, - libpq_gettext("invalid empty username specifier in URI: %s\n"), - uri); - goto cleanup; - } - if (!conninfo_storeval(options, "user", user, + if (*user && + !conninfo_storeval(options, "user", user, errorMessage, false, true)) goto cleanup; @@ -4628,15 +4619,8 @@ conninfo_uri_parse_options(PQconninfoOption *options, const char *uri, ++p; *p = '\0'; - if (!*password) - { - printfPQExpBuffer(errorMessage, - libpq_gettext("invalid empty password specifier in URI: %s\n"), - uri); - goto cleanup; - } - - if (!conninfo_storeval(options, "password", password, + if (*password && + !conninfo_storeval(options, "password", password, errorMessage, false, true)) goto cleanup; } @@ -4656,88 +4640,66 @@ conninfo_uri_parse_options(PQconninfoOption *options, const char *uri, * "p" has been incremented past optional URI credential information at * this point and now points at the "netloc" part of the URI. * - * Check for local unix socket dir. + * Look for IPv6 address. */ - if (*p == '/') + if (*p == '[') { - const char *socket = p; - - /* Look for possible port specifier or query parameters */ - while (*p && *p != ':' && *p != '?') + host = ++p; + while (*p && *p != ']') ++p; - prevchar = *p; - *p = '\0'; - - if (!conninfo_storeval(options, "host", socket, - errorMessage, false, true)) + if (!*p) + { + printfPQExpBuffer(errorMessage, + libpq_gettext("end of string reached when looking for matching ']' in IPv6 host address in URI: %s\n"), + uri); goto cleanup; + } + if (p == host) + { + printfPQExpBuffer(errorMessage, + libpq_gettext("IPv6 host address may not be empty in URI: %s\n"), + uri); + goto cleanup; + } + + /* Cut off the bracket and advance */ + *(p++) = '\0'; + + /* + * The address may be followed by a port specifier or a slash or a + * query. + */ + if (*p && *p != ':' && *p != '/' && *p != '?') + { + printfPQExpBuffer(errorMessage, + libpq_gettext("unexpected '%c' at position %d in URI (expecting ':' or '/'): %s\n"), + *p, (int) (p - buf + 1), uri); + goto cleanup; + } } else { - /* Not a unix socket dir: parse as host name or address */ - const char *host; + /* not an IPv6 address: DNS-named or IPv4 netloc */ + host = p; /* - * - * Look for IPv6 address + * Look for port specifier (colon) or end of host specifier + * (slash), or query (question mark). */ - if (*p == '[') - { - host = ++p; - while (*p && *p != ']') - ++p; - if (!*p) - { - printfPQExpBuffer(errorMessage, - libpq_gettext("end of string reached when looking for matching ']' in IPv6 host address in URI: %s\n"), - uri); - goto cleanup; - } - if (p == host) - { - printfPQExpBuffer(errorMessage, - libpq_gettext("IPv6 host address may not be empty in URI: %s\n"), - uri); - goto cleanup; - } - - /* Cut off the bracket and advance */ - *(p++) = '\0'; - - /* - * The address may be followed by a port specifier or a slash or a - * query. - */ - if (*p && *p != ':' && *p != '/' && *p != '?') - { - printfPQExpBuffer(errorMessage, - libpq_gettext("unexpected '%c' at position %d in URI (expecting ':' or '/'): %s\n"), - *p, (int) (p - buf + 1), uri); - goto cleanup; - } - } - else - { - /* not an IPv6 address: DNS-named or IPv4 netloc */ - host = p; - - /* - * Look for port specifier (colon) or end of host specifier - * (slash), or query (question mark). - */ - while (*p && *p != ':' && *p != '/' && *p != '?') - ++p; - } - - /* Save the hostname terminator before we null it */ - prevchar = *p; - *p = '\0'; - - if (!conninfo_storeval(options, "host", host, - errorMessage, false, true)) - goto cleanup; + while (*p && *p != ':' && *p != '/' && *p != '?') + ++p; } + /* Save the hostname terminator before we null it */ + prevchar = *p; + *p = '\0'; + + if (*host && + !conninfo_storeval(options, "host", host, + errorMessage, false, true)) + goto cleanup; + + if (prevchar == ':') { const char *port = ++p; /* advance past host terminator */ @@ -4748,14 +4710,8 @@ conninfo_uri_parse_options(PQconninfoOption *options, const char *uri, prevchar = *p; *p = '\0'; - if (!*port) - { - printfPQExpBuffer(errorMessage, - libpq_gettext("missing port specifier in URI: %s\n"), - uri); - goto cleanup; - } - if (!conninfo_storeval(options, "port", port, + if (*port && + !conninfo_storeval(options, "port", port, errorMessage, false, true)) goto cleanup; } @@ -4813,9 +4769,10 @@ conninfo_uri_parse_params(char *params, { while (*params) { - const char *keyword = params; - const char *value = NULL; + char *keyword = params; + char *value = NULL; char *p = params; + bool malloced = false; /* * Scan the params string for '=' and '&', marking the end of keyword @@ -4866,35 +4823,66 @@ conninfo_uri_parse_params(char *params, ++p; } + keyword = conninfo_uri_decode(keyword, errorMessage); + if (keyword == NULL) + { + /* conninfo_uri_decode already set an error message */ + return false; + } + value = conninfo_uri_decode(value, errorMessage); + if (value == NULL) + { + /* conninfo_uri_decode already set an error message */ + free(keyword); + return false; + } + malloced = true; + /* - * Special keyword handling for improved JDBC compatibility. Note - * we fail to detect URI-encoded values here, but we don't care. + * Special keyword handling for improved JDBC compatibility. */ if (strcmp(keyword, "ssl") == 0 && strcmp(value, "true") == 0) { + free(keyword); + free(value); + malloced = false; + keyword = "sslmode"; value = "require"; } /* * Store the value if the corresponding option exists; ignore - * otherwise. + * otherwise. At this point both keyword and value are not + * URI-encoded. */ if (!conninfo_storeval(connOptions, keyword, value, - errorMessage, true, true)) + errorMessage, true, false)) { /* * Check if there was a hard error when decoding or storing the * option. */ if (errorMessage->len != 0) + { + if (malloced) + { + free(keyword); + free(value); + } return false; + } fprintf(stderr, libpq_gettext("WARNING: ignoring unrecognized URI query parameter: %s\n"), keyword); } + if (malloced) + { + free(keyword); + free(value); + } /* Proceed to next key=value pair */ params = p; @@ -5017,7 +5005,8 @@ conninfo_getval(PQconninfoOption *connOptions, * Store a (new) value for an option corresponding to the keyword in * connOptions array. * - * If uri_decode is true, keyword and value are URI-decoded. + * If uri_decode is true, the value is URI-decoded. The keyword is always + * assumed to be non URI-encoded. * * If successful, returns a pointer to the corresponding PQconninfoOption, * which value is replaced with a strdup'd copy of the passed value string. @@ -5034,32 +5023,16 @@ conninfo_storeval(PQconninfoOption *connOptions, bool uri_decode) { PQconninfoOption *option; - char *value_copy; - char *keyword_copy = NULL; + char *value_copy; - /* - * Decode the keyword. XXX this is seldom necessary as keywords do not - * normally need URI-escaping. It'd be good to do away with the - * malloc/free overhead and the general ugliness, but I don't see a - * better way to handle it. - */ - if (uri_decode) - { - keyword_copy = conninfo_uri_decode(keyword, errorMessage); - if (keyword_copy == NULL) - /* conninfo_uri_decode already set an error message */ - goto failed; - } - - option = conninfo_find(connOptions, - keyword_copy != NULL ? keyword_copy : keyword); + option = conninfo_find(connOptions, keyword); if (option == NULL) { if (!ignoreMissing) printfPQExpBuffer(errorMessage, libpq_gettext("invalid connection option \"%s\"\n"), keyword); - goto failed; + return NULL; } if (uri_decode) @@ -5067,7 +5040,7 @@ conninfo_storeval(PQconninfoOption *connOptions, value_copy = conninfo_uri_decode(value, errorMessage); if (value_copy == NULL) /* conninfo_uri_decode already set an error message */ - goto failed; + return NULL; } else { @@ -5076,7 +5049,7 @@ conninfo_storeval(PQconninfoOption *connOptions, if (value_copy == NULL) { printfPQExpBuffer(errorMessage, libpq_gettext("out of memory\n")); - goto failed; + return NULL; } } @@ -5084,14 +5057,7 @@ conninfo_storeval(PQconninfoOption *connOptions, free(option->val); option->val = value_copy; - if (keyword_copy != NULL) - free(keyword_copy); return option; - -failed: - if (keyword_copy != NULL) - free(keyword_copy); - return NULL; } /* diff --git a/src/interfaces/libpq/test/expected.out b/src/interfaces/libpq/test/expected.out index 54a6291bc4..6d5077d762 100644 --- a/src/interfaces/libpq/test/expected.out +++ b/src/interfaces/libpq/test/expected.out @@ -20,7 +20,7 @@ trying postgresql://uri-user@host/ user='uri-user' host='host' (inet) trying postgresql://uri-user@ -user='uri-user' host='' (local) +user='uri-user' (local) trying postgresql://host:12345/ host='host' port='12345' (inet) @@ -38,10 +38,10 @@ trying postgresql://host host='host' (inet) trying postgresql:// -host='' (local) +(local) trying postgresql://?hostaddr=127.0.0.1 -host='' hostaddr='127.0.0.1' (inet) +hostaddr='127.0.0.1' (inet) trying postgresql://example.com?hostaddr=63.1.2.4 host='example.com' hostaddr='63.1.2.4' (inet) @@ -59,7 +59,7 @@ trying postgresql://host/db?u%73er=someotheruser&port=12345 user='someotheruser' dbname='db' host='host' port='12345' (inet) trying postgresql://host/db?u%7aer=someotheruser&port=12345 -WARNING: ignoring unrecognized URI query parameter: u%7aer +WARNING: ignoring unrecognized URI query parameter: uzer dbname='db' host='host' port='12345' (inet) trying postgresql://host:12345?user=uri-user @@ -87,10 +87,19 @@ trying postgresql://[::1] host='::1' (inet) trying postgres:// -host='' (local) +(local) -trying postgres:///tmp -host='/tmp' (local) +trying postgres:/// +(local) + +trying postgres:///db +dbname='db' (local) + +trying postgres://uri-user@/db +user='uri-user' dbname='db' (local) + +trying postgres://?host=/path/to/socket/dir +host='/path/to/socket/dir' (local) trying postgresql://host?uzer= WARNING: ignoring unrecognized URI query parameter: uzer @@ -145,19 +154,32 @@ uri-regress: invalid percent-encoded token: % trying postgres://@host -uri-regress: invalid empty username specifier in URI: postgres://@host - +host='host' (inet) trying postgres://host:/ -uri-regress: missing port specifier in URI: postgres://host:/ +host='host' (inet) +trying postgres://:12345/ +port='12345' (local) -trying postgres://otheruser@/no/such/directory +trying postgres://otheruser@?host=/no/such/directory user='otheruser' host='/no/such/directory' (local) -trying postgres://otheruser@/no/such/socket/path:12345 +trying postgres://otheruser@/?host=/no/such/directory +user='otheruser' host='/no/such/directory' (local) + +trying postgres://otheruser@:12345?host=/no/such/socket/path user='otheruser' host='/no/such/socket/path' port='12345' (local) -trying postgres://otheruser@/path/to/socket:12345/db +trying postgres://otheruser@:12345/db?host=/path/to/socket user='otheruser' dbname='db' host='/path/to/socket' port='12345' (local) +trying postgres://:12345/db?host=/path/to/socket +dbname='db' host='/path/to/socket' port='12345' (local) + +trying postgres://:12345?host=/path/to/socket +host='/path/to/socket' port='12345' (local) + +trying postgres://%2Fvar%2Flib%2Fpostgresql/dbname +dbname='dbname' host='/var/lib/postgresql' (local) + diff --git a/src/interfaces/libpq/test/regress.in b/src/interfaces/libpq/test/regress.in index 8d33ae1ac1..de034f3914 100644 --- a/src/interfaces/libpq/test/regress.in +++ b/src/interfaces/libpq/test/regress.in @@ -28,7 +28,10 @@ postgresql://[2001:db8::1234]/ postgresql://[200z:db8::1234]/ postgresql://[::1] postgres:// -postgres:///tmp +postgres:/// +postgres:///db +postgres://uri-user@/db +postgres://?host=/path/to/socket/dir postgresql://host?uzer= postgre:// postgres://[::1 @@ -44,6 +47,11 @@ postgresql://%1 postgresql://% postgres://@host postgres://host:/ -postgres://otheruser@/no/such/directory -postgres://otheruser@/no/such/socket/path:12345 -postgres://otheruser@/path/to/socket:12345/db +postgres://:12345/ +postgres://otheruser@?host=/no/such/directory +postgres://otheruser@/?host=/no/such/directory +postgres://otheruser@:12345?host=/no/such/socket/path +postgres://otheruser@:12345/db?host=/path/to/socket +postgres://:12345/db?host=/path/to/socket +postgres://:12345?host=/path/to/socket +postgres://%2Fvar%2Flib%2Fpostgresql/dbname