postgresql/src/backend/libpq/pqcomm.c

915 lines
23 KiB
C
Raw Normal View History

1998-09-10 06:07:59 +02:00
/*-------------------------------------------------------------------------
*
* pqcomm.c
* Communication functions between the Frontend and the Backend
*
* These routines handle the low-level details of communication between
* frontend and backend. They just shove data across the communication
* channel, and are ignorant of the semantics of the data --- or would be,
* except for major brain damage in the design of the old COPY OUT protocol.
* Unfortunately, COPY OUT was designed to commandeer the communication
* channel (it just transfers data without wrapping it into messages).
* No other messages can be sent while COPY OUT is in progress; and if the
* copy is aborted by an elog(ERROR), we need to close out the copy so that
* the frontend gets back into sync. Therefore, these routines have to be
* aware of COPY OUT state. (New COPY-OUT is message-based and does *not*
* set the DoingCopyOut flag.)
*
* NOTE: generally, it's a bad idea to emit outgoing messages directly with
* pq_putbytes(), especially if the message would require multiple calls
* to send. Instead, use the routines in pqformat.c to construct the message
* in a buffer and then emit it in one call to pq_putmessage. This ensures
* that the channel will not be clogged by an incomplete message if execution
* is aborted by elog(ERROR) partway through the message. The only non-libpq
* code that should call pq_putbytes directly is old-style COPY OUT.
*
* At one time, libpq was shared between frontend and backend, but now
* the backend's "backend/libpq" is quite separate from "interfaces/libpq".
* All that remains is similarities of names to trap the unwary...
*
2002-06-20 22:29:54 +02:00
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Header: /cvsroot/pgsql/src/backend/libpq/pqcomm.c,v 1.152 2003/04/25 01:24:00 momjian Exp $
*
*-------------------------------------------------------------------------
*/
/*------------------------
* INTERFACE ROUTINES
*
* setup/teardown:
* StreamServerPort - Open postmaster's server port
* StreamConnection - Create new connection with client
* StreamClose - Close a client/backend connection
* TouchSocketFile - Protect socket file against /tmp cleaners
* pq_init - initialize libpq at backend startup
* pq_close - shutdown libpq at backend exit
*
* low-level I/O:
* pq_getbytes - get a known number of bytes from connection
* pq_getstring - get a null terminated string from connection
* pq_getmessage - get a message with length word from connection
* pq_getbyte - get next byte from connection
* pq_peekbyte - peek at next byte from connection
* pq_putbytes - send bytes to connection (not flushed until pq_flush)
* pq_flush - flush pending output
*
* message-level I/O (and old-style-COPY-OUT cruft):
* pq_putmessage - send a normal message (suppressed in COPY OUT mode)
1999-05-25 18:15:34 +02:00
* pq_startcopyout - inform libpq that a COPY OUT transfer is beginning
* pq_endcopyout - end a COPY OUT transfer
*
*------------------------
*/
#include "postgres.h"
#include <signal.h>
1996-11-06 09:48:33 +01:00
#include <errno.h>
#include <fcntl.h>
#include <grp.h>
1999-07-16 05:14:30 +02:00
#include <unistd.h>
#include <sys/file.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <netdb.h>
#include <netinet/in.h>
#ifdef HAVE_NETINET_TCP_H
2001-03-22 05:01:46 +01:00
#include <netinet/tcp.h>
#endif
1996-11-06 09:48:33 +01:00
#include <arpa/inet.h>
#ifdef HAVE_UTIME_H
#include <utime.h>
#endif
1999-07-16 05:14:30 +02:00
#include "libpq/libpq.h"
#include "miscadmin.h"
#include "storage/ipc.h"
static void pq_close(void);
#ifdef HAVE_UNIX_SOCKETS
static int Lock_AF_UNIX(unsigned short portNumber, char *unixSocketName);
static int Setup_AF_UNIX(void);
#endif /* HAVE_UNIX_SOCKETS */
/*
* Configuration options
*/
2001-03-22 05:01:46 +01:00
int Unix_socket_permissions;
char *Unix_socket_group;
/*
* Buffers for low-level I/O
*/
#define PQ_BUFFER_SIZE 8192
static unsigned char PqSendBuffer[PQ_BUFFER_SIZE];
1999-05-25 18:15:34 +02:00
static int PqSendPointer; /* Next index to store a byte in
* PqSendBuffer */
static unsigned char PqRecvBuffer[PQ_BUFFER_SIZE];
1999-05-25 18:15:34 +02:00
static int PqRecvPointer; /* Next index to read a byte from
* PqRecvBuffer */
static int PqRecvLength; /* End of data available in PqRecvBuffer */
/*
* Message status
*/
static bool DoingCopyOut;
/* --------------------------------
* pq_init - initialize libpq at backend startup
* --------------------------------
*/
void
pq_init(void)
{
PqSendPointer = PqRecvPointer = PqRecvLength = 0;
DoingCopyOut = false;
on_proc_exit(pq_close, 0);
}
/* --------------------------------
* pq_close - shutdown libpq at backend exit
*
* Note: in a standalone backend MyProcPort will be null,
* don't crash during exit...
* --------------------------------
*/
static void
pq_close(void)
{
if (MyProcPort != NULL)
{
UPDATED PATCH: Attached are a revised set of SSL patches. Many of these patches are motivated by security concerns, it's not just bug fixes. The key differences (from stock 7.2.1) are: *) almost all code that directly uses the OpenSSL library is in two new files, src/interfaces/libpq/fe-ssl.c src/backend/postmaster/be-ssl.c in the long run, it would be nice to merge these two files. *) the legacy code to read and write network data have been encapsulated into read_SSL() and write_SSL(). These functions should probably be renamed - they handle both SSL and non-SSL cases. the remaining code should eliminate the problems identified earlier, albeit not very cleanly. *) both front- and back-ends will send a SSL shutdown via the new close_SSL() function. This is necessary for sessions to work properly. (Sessions are not yet fully supported, but by cleanly closing the SSL connection instead of just sending a TCP FIN packet other SSL tools will be much happier.) *) The client certificate and key are now expected in a subdirectory of the user's home directory. Specifically, - the directory .postgresql must be owned by the user, and allow no access by 'group' or 'other.' - the file .postgresql/postgresql.crt must be a regular file owned by the user. - the file .postgresql/postgresql.key must be a regular file owned by the user, and allow no access by 'group' or 'other'. At the current time encrypted private keys are not supported. There should also be a way to support multiple client certs/keys. *) the front-end performs minimal validation of the back-end cert. Self-signed certs are permitted, but the common name *must* match the hostname used by the front-end. (The cert itself should always use a fully qualified domain name (FDQN) in its common name field.) This means that psql -h eris db will fail, but psql -h eris.example.com db will succeed. At the current time this must be an exact match; future patches may support any FQDN that resolves to the address returned by getpeername(2). Another common "problem" is expiring certs. For now, it may be a good idea to use a very-long-lived self-signed cert. As a compile-time option, the front-end can specify a file containing valid root certificates, but it is not yet required. *) the back-end performs minimal validation of the client cert. It allows self-signed certs. It checks for expiration. It supports a compile-time option specifying a file containing valid root certificates. *) both front- and back-ends default to TLSv1, not SSLv3/SSLv2. *) both front- and back-ends support DSA keys. DSA keys are moderately more expensive on startup, but many people consider them preferable than RSA keys. (E.g., SSH2 prefers DSA keys.) *) if /dev/urandom exists, both client and server will read 16k of randomization data from it. *) the server can read empheral DH parameters from the files $DataDir/dh512.pem $DataDir/dh1024.pem $DataDir/dh2048.pem $DataDir/dh4096.pem if none are provided, the server will default to hardcoded parameter files provided by the OpenSSL project. Remaining tasks: *) the select() clauses need to be revisited - the SSL abstraction layer may need to absorb more of the current code to avoid rare deadlock conditions. This also touches on a true solution to the pg_eof() problem. *) the SIGPIPE signal handler may need to be revisited. *) support encrypted private keys. *) sessions are not yet fully supported. (SSL sessions can span multiple "connections," and allow the client and server to avoid costly renegotiations.) *) makecert - a script that creates back-end certs. *) pgkeygen - a tool that creates front-end certs. *) the whole protocol issue, SASL, etc. *) certs are fully validated - valid root certs must be available. This is a hassle, but it means that you *can* trust the identity of the server. *) the client library can handle hardcoded root certificates, to avoid the need to copy these files. *) host name of server cert must resolve to IP address, or be a recognized alias. This is more liberal than the previous iteration. *) the number of bytes transferred is tracked, and the session key is periodically renegotiated. *) basic cert generation scripts (mkcert.sh, pgkeygen.sh). The configuration files have reasonable defaults for each type of use. Bear Giles
2002-06-14 06:23:17 +02:00
secure_close(MyProcPort);
closesocket(MyProcPort->sock);
/* make sure any subsequent attempts to do I/O fail cleanly */
MyProcPort->sock = -1;
}
}
/*
* Streams -- wrapper around Unix socket system calls
*
*
* Stream functions are used for vanilla TCP connection protocol.
*/
static char sock_path[MAXPGPATH];
/* StreamDoUnlink()
* Shutdown routine for backend connection
* If a Unix socket is used for communication, explicitly close it.
*/
#ifdef HAVE_UNIX_SOCKETS
static void
StreamDoUnlink(void)
{
Assert(sock_path[0]);
unlink(sock_path);
}
#endif /* HAVE_UNIX_SOCKETS */
/*
* StreamServerPort -- open a sock stream "listening" port.
*
* This initializes the Postmaster's connection-accepting port *fdP.
*
* RETURNS: STATUS_OK or STATUS_ERROR
*/
int
2000-11-14 02:15:06 +01:00
StreamServerPort(int family, char *hostName, unsigned short portNumber,
UUNET is looking into offering PostgreSQL as a part of a managed web hosting product, on both shared and dedicated machines. We currently offer Oracle and MySQL, and it would be a nice middle-ground. However, as shipped, PostgreSQL lacks the following features we need that MySQL has: 1. The ability to listen only on a particular IP address. Each hosting customer has their own IP address, on which all of their servers (http, ftp, real media, etc.) run. 2. The ability to place the Unix-domain socket in a mode 700 directory. This allows us to automatically create an empty database, with an empty DBA password, for new or upgrading customers without having to interactively set a DBA password and communicate it to (or from) the customer. This in turn cuts down our install and upgrade times. 3. The ability to connect to the Unix-domain socket from within a change-rooted environment. We run CGI programs chrooted to the user's home directory, which is another reason why we need to be able to specify where the Unix-domain socket is, instead of /tmp. 4. The ability to, if run as root, open a pid file in /var/run as root, and then setuid to the desired user. (mysqld -u can almost do this; I had to patch it, too). The patch below fixes problem 1-3. I plan to address #4, also, but haven't done so yet. These diffs are big enough that they should give the PG development team something to think about in the meantime :-) Also, I'm about to leave for 2 weeks' vacation, so I thought I'd get out what I have, which works (for the problems it tackles), now. With these changes, we can set up and run PostgreSQL with scripts the same way we can with apache or proftpd or mysql. In summary, this patch makes the following enhancements: 1. Adds an environment variable PGUNIXSOCKET, analogous to MYSQL_UNIX_PORT, and command line options -k --unix-socket to the relevant programs. 2. Adds a -h option to postmaster to set the hostname or IP address to listen on instead of the default INADDR_ANY. 3. Extends some library interfaces to support the above. 4. Fixes a few memory leaks in PQconnectdb(). The default behavior is unchanged from stock 7.0.2; if you don't use any of these new features, they don't change the operation. David J. MacKenzie
2000-11-13 16:18:15 +01:00
char *unixSocketName, int *fdP)
{
int fd,
err;
int maxconn;
int one = 1;
int ret;
char portNumberStr[64];
char *service;
struct addrinfo *addrs = NULL;
struct addrinfo hint;
1999-05-10 18:10:51 +02:00
#ifdef HAVE_UNIX_SOCKETS
Assert(family == AF_UNIX || isAF_INETx(family));
#else
Assert(isAF_INETx(family));
#endif
/* Initialize hint structure */
MemSet(&hint, 0, sizeof(hint));
hint.ai_family = family;
hint.ai_flags = AI_PASSIVE;
hint.ai_socktype = SOCK_STREAM;
#ifdef HAVE_UNIX_SOCKETS
if (family == AF_UNIX)
{
if (Lock_AF_UNIX(portNumber, unixSocketName) != STATUS_OK)
return STATUS_ERROR;
service = sock_path;
}
else
#endif /* HAVE_UNIX_SOCKETS */
{
snprintf(portNumberStr, sizeof(portNumberStr), "%d", portNumber);
service = portNumberStr;
}
ret = getaddrinfo2(hostName, service, &hint, &addrs);
if (ret || addrs == NULL)
{
elog(LOG, "server socket failure: getaddrinfo2(): %s",
gai_strerror(ret));
if (addrs != NULL)
freeaddrinfo2(hint.ai_family, addrs);
return STATUS_ERROR;
}
if ((fd = socket(family, SOCK_STREAM, 0)) < 0)
{
elog(LOG, "server socket failure: socket(): %s",
strerror(errno));
freeaddrinfo2(hint.ai_family, addrs);
return STATUS_ERROR;
}
if (isAF_INETx(family))
{
if ((setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &one,
sizeof(one))) == -1)
{
elog(LOG, "server socket failure: setsockopt(SO_REUSEADDR): %s",
strerror(errno));
freeaddrinfo2(hint.ai_family, addrs);
return STATUS_ERROR;
}
}
Assert(addrs->ai_next == NULL && addrs->ai_family == family);
err = bind(fd, addrs->ai_addr, addrs->ai_addrlen);
if (err < 0)
{
elog(LOG, "server socket failure: bind(): %s\n"
"\tIs another postmaster already running on port %d?",
strerror(errno), (int) portNumber);
if (family == AF_UNIX)
elog(LOG, "\tIf not, remove socket node (%s) and retry.",
sock_path);
else
elog(LOG, "\tIf not, wait a few seconds and retry.");
freeaddrinfo2(hint.ai_family, addrs);
return STATUS_ERROR;
}
#ifdef HAVE_UNIX_SOCKETS
if (family == AF_UNIX)
{
if (Setup_AF_UNIX() != STATUS_OK)
{
freeaddrinfo2(hint.ai_family, addrs);
return STATUS_ERROR;
}
}
#endif
/*
* Select appropriate accept-queue length limit. PG_SOMAXCONN is only
* intended to provide a clamp on the request on platforms where an
* overly large request provokes a kernel error (are there any?).
*/
maxconn = MaxBackends * 2;
if (maxconn > PG_SOMAXCONN)
maxconn = PG_SOMAXCONN;
err = listen(fd, maxconn);
if (err < 0)
{
elog(LOG, "server socket failure: listen(): %s",
strerror(errno));
freeaddrinfo2(hint.ai_family, addrs);
return STATUS_ERROR;
}
*fdP = fd;
freeaddrinfo2(hint.ai_family, addrs);
return STATUS_OK;
}
#ifdef HAVE_UNIX_SOCKETS
/*
* Lock_AF_UNIX -- configure unix socket file path
*/
static int
Lock_AF_UNIX(unsigned short portNumber, char *unixSocketName)
{
SockAddr saddr; /* just used to get socket path */
UNIXSOCK_PATH(saddr.un, portNumber, unixSocketName);
strcpy(sock_path, saddr.un.sun_path);
/*
* Grab an interlock file associated with the socket file.
*/
if (!CreateSocketLockFile(sock_path, true))
return STATUS_ERROR;
/*
* Once we have the interlock, we can safely delete any pre-existing
* socket file to avoid failure at bind() time.
*/
unlink(sock_path);
return STATUS_OK;
}
/*
* Setup_AF_UNIX -- configure unix socket permissions
*/
static int
Setup_AF_UNIX(void)
{
/* Arrange to unlink the socket file at exit */
on_proc_exit(StreamDoUnlink, 0);
/*
* Fix socket ownership/permission if requested. Note we must do this
* before we listen() to avoid a window where unwanted connections
* could get accepted.
*/
Assert(Unix_socket_group);
if (Unix_socket_group[0] != '\0')
{
char *endptr;
unsigned long int val;
gid_t gid;
val = strtoul(Unix_socket_group, &endptr, 10);
if (*endptr == '\0')
{ /* numeric group id */
gid = val;
}
else
{ /* convert group name to id */
struct group *gr;
gr = getgrnam(Unix_socket_group);
if (!gr)
{
elog(LOG, "server socket failure: no such group '%s'",
Unix_socket_group);
return STATUS_ERROR;
}
gid = gr->gr_gid;
}
if (chown(sock_path, -1, gid) == -1)
{
elog(LOG, "server socket failure: could not set group of %s: %s",
sock_path, strerror(errno));
return STATUS_ERROR;
}
}
if (chmod(sock_path, Unix_socket_permissions) == -1)
{
elog(LOG, "server socket failure: could not set permissions on %s: %s",
sock_path, strerror(errno));
return STATUS_ERROR;
}
return STATUS_OK;
}
#endif /* HAVE_UNIX_SOCKETS */
/*
* StreamConnection -- create a new connection with client using
* server port.
*
* ASSUME: that this doesn't need to be non-blocking because
* the Postmaster uses select() to tell when the server master
* socket is ready for accept().
*
* RETURNS: STATUS_OK or STATUS_ERROR
*/
int
StreamConnection(int server_fd, Port *port)
{
ACCEPT_TYPE_ARG3 addrlen;
/* accept connection (and fill in the client (remote) address) */
addrlen = sizeof(port->raddr);
if ((port->sock = accept(server_fd,
(struct sockaddr *) &port->raddr,
1997-11-10 06:16:00 +01:00
&addrlen)) < 0)
{
elog(LOG, "StreamConnection: accept() failed: %m");
1998-09-01 05:29:17 +02:00
return STATUS_ERROR;
}
1997-11-10 06:16:00 +01:00
#ifdef SCO_ACCEPT_BUG
/*
2001-03-22 05:01:46 +01:00
* UnixWare 7+ and OpenServer 5.0.4 are known to have this bug, but it
* shouldn't hurt to catch it for all versions of those platforms.
*/
if (port->raddr.sa.sa_family == 0)
port->raddr.sa.sa_family = AF_UNIX;
#endif
/* fill in the server (local) address */
addrlen = sizeof(port->laddr);
if (getsockname(port->sock, (struct sockaddr *) & port->laddr,
&addrlen) < 0)
{
elog(LOG, "StreamConnection: getsockname() failed: %m");
1998-09-01 05:29:17 +02:00
return STATUS_ERROR;
}
/* select NODELAY and KEEPALIVE options if it's a TCP connection */
if (isAF_INETx(port->laddr.sa.sa_family))
{
int on = 1;
if (setsockopt(port->sock, IPPROTO_TCP, TCP_NODELAY,
(char *) &on, sizeof(on)) < 0)
{
elog(LOG, "StreamConnection: setsockopt(TCP_NODELAY) failed: %m");
return STATUS_ERROR;
}
if (setsockopt(port->sock, SOL_SOCKET, SO_KEEPALIVE,
(char *) &on, sizeof(on)) < 0)
{
elog(LOG, "StreamConnection: setsockopt(SO_KEEPALIVE) failed: %m");
1998-09-01 05:29:17 +02:00
return STATUS_ERROR;
}
}
1998-09-01 05:29:17 +02:00
return STATUS_OK;
}
/*
* StreamClose -- close a client/backend connection
*/
void
StreamClose(int sock)
{
closesocket(sock);
}
/*
* TouchSocketFile -- mark socket file as recently accessed
*
* This routine should be called every so often to ensure that the socket
* file has a recent mod date (ordinary operations on sockets usually won't
* change the mod date). That saves it from being removed by
* overenthusiastic /tmp-directory-cleaner daemons. (Another reason we should
* never have put the socket file in /tmp...)
*/
void
TouchSocketFile(void)
{
/* Do nothing if we did not create a socket... */
if (sock_path[0] != '\0')
{
/*
* utime() is POSIX standard, utimes() is a common alternative.
* If we have neither, there's no way to affect the mod or access
* time of the socket :-(
*
* In either path, we ignore errors; there's no point in complaining.
*/
#ifdef HAVE_UTIME
utime(sock_path, NULL);
#else /* !HAVE_UTIME */
#ifdef HAVE_UTIMES
utimes(sock_path, NULL);
#endif /* HAVE_UTIMES */
#endif /* HAVE_UTIME */
}
}
/* --------------------------------
* Low-level I/O routines begin here.
*
* These routines communicate with a frontend client across a connection
* already established by the preceding routines.
* --------------------------------
*/
/* --------------------------------
* pq_recvbuf - load some bytes into the input buffer
*
* returns 0 if OK, EOF if trouble
* --------------------------------
*/
static int
pq_recvbuf(void)
{
if (PqRecvPointer > 0)
{
if (PqRecvLength > PqRecvPointer)
{
/* still some unread data, left-justify it in the buffer */
1999-05-25 18:15:34 +02:00
memmove(PqRecvBuffer, PqRecvBuffer + PqRecvPointer,
PqRecvLength - PqRecvPointer);
PqRecvLength -= PqRecvPointer;
PqRecvPointer = 0;
}
else
PqRecvLength = PqRecvPointer = 0;
}
/* Can fill buffer from PqRecvLength and upwards */
for (;;)
{
int r;
UPDATED PATCH: Attached are a revised set of SSL patches. Many of these patches are motivated by security concerns, it's not just bug fixes. The key differences (from stock 7.2.1) are: *) almost all code that directly uses the OpenSSL library is in two new files, src/interfaces/libpq/fe-ssl.c src/backend/postmaster/be-ssl.c in the long run, it would be nice to merge these two files. *) the legacy code to read and write network data have been encapsulated into read_SSL() and write_SSL(). These functions should probably be renamed - they handle both SSL and non-SSL cases. the remaining code should eliminate the problems identified earlier, albeit not very cleanly. *) both front- and back-ends will send a SSL shutdown via the new close_SSL() function. This is necessary for sessions to work properly. (Sessions are not yet fully supported, but by cleanly closing the SSL connection instead of just sending a TCP FIN packet other SSL tools will be much happier.) *) The client certificate and key are now expected in a subdirectory of the user's home directory. Specifically, - the directory .postgresql must be owned by the user, and allow no access by 'group' or 'other.' - the file .postgresql/postgresql.crt must be a regular file owned by the user. - the file .postgresql/postgresql.key must be a regular file owned by the user, and allow no access by 'group' or 'other'. At the current time encrypted private keys are not supported. There should also be a way to support multiple client certs/keys. *) the front-end performs minimal validation of the back-end cert. Self-signed certs are permitted, but the common name *must* match the hostname used by the front-end. (The cert itself should always use a fully qualified domain name (FDQN) in its common name field.) This means that psql -h eris db will fail, but psql -h eris.example.com db will succeed. At the current time this must be an exact match; future patches may support any FQDN that resolves to the address returned by getpeername(2). Another common "problem" is expiring certs. For now, it may be a good idea to use a very-long-lived self-signed cert. As a compile-time option, the front-end can specify a file containing valid root certificates, but it is not yet required. *) the back-end performs minimal validation of the client cert. It allows self-signed certs. It checks for expiration. It supports a compile-time option specifying a file containing valid root certificates. *) both front- and back-ends default to TLSv1, not SSLv3/SSLv2. *) both front- and back-ends support DSA keys. DSA keys are moderately more expensive on startup, but many people consider them preferable than RSA keys. (E.g., SSH2 prefers DSA keys.) *) if /dev/urandom exists, both client and server will read 16k of randomization data from it. *) the server can read empheral DH parameters from the files $DataDir/dh512.pem $DataDir/dh1024.pem $DataDir/dh2048.pem $DataDir/dh4096.pem if none are provided, the server will default to hardcoded parameter files provided by the OpenSSL project. Remaining tasks: *) the select() clauses need to be revisited - the SSL abstraction layer may need to absorb more of the current code to avoid rare deadlock conditions. This also touches on a true solution to the pg_eof() problem. *) the SIGPIPE signal handler may need to be revisited. *) support encrypted private keys. *) sessions are not yet fully supported. (SSL sessions can span multiple "connections," and allow the client and server to avoid costly renegotiations.) *) makecert - a script that creates back-end certs. *) pgkeygen - a tool that creates front-end certs. *) the whole protocol issue, SASL, etc. *) certs are fully validated - valid root certs must be available. This is a hassle, but it means that you *can* trust the identity of the server. *) the client library can handle hardcoded root certificates, to avoid the need to copy these files. *) host name of server cert must resolve to IP address, or be a recognized alias. This is more liberal than the previous iteration. *) the number of bytes transferred is tracked, and the session key is periodically renegotiated. *) basic cert generation scripts (mkcert.sh, pgkeygen.sh). The configuration files have reasonable defaults for each type of use. Bear Giles
2002-06-14 06:23:17 +02:00
r = secure_read(MyProcPort, PqRecvBuffer + PqRecvLength,
PQ_BUFFER_SIZE - PqRecvLength);
1999-05-25 18:15:34 +02:00
if (r < 0)
{
if (errno == EINTR)
continue; /* Ok if interrupted */
1999-05-25 18:15:34 +02:00
/*
2002-09-04 22:31:48 +02:00
* Careful: an elog() that tries to write to the client would
* cause recursion to here, leading to stack overflow and core
* dump! This message must go *only* to the postmaster log.
*/
elog(COMMERROR, "pq_recvbuf: recv() failed: %m");
return EOF;
}
if (r == 0)
{
/*
* EOF detected. We used to write a log message here, but it's
* better to expect the ultimate caller to do that.
*/
return EOF;
}
/* r contains number of bytes read, so just incr length */
PqRecvLength += r;
return 0;
}
}
/* --------------------------------
* pq_getbyte - get a single byte from connection, or return EOF
* --------------------------------
*/
int
pq_getbyte(void)
{
while (PqRecvPointer >= PqRecvLength)
{
if (pq_recvbuf()) /* If nothing in buffer, then recv some */
return EOF; /* Failed to recv data */
}
return PqRecvBuffer[PqRecvPointer++];
}
1998-09-10 06:07:59 +02:00
/* --------------------------------
* pq_peekbyte - peek at next byte from connection
*
* Same as pq_getbyte() except we don't advance the pointer.
* --------------------------------
*/
int
pq_peekbyte(void)
{
while (PqRecvPointer >= PqRecvLength)
{
if (pq_recvbuf()) /* If nothing in buffer, then recv some */
return EOF; /* Failed to recv data */
}
return PqRecvBuffer[PqRecvPointer];
}
1998-09-10 06:07:59 +02:00
/* --------------------------------
* pq_getbytes - get a known number of bytes from connection
*
* returns 0 if OK, EOF if trouble
* --------------------------------
*/
int
pq_getbytes(char *s, size_t len)
{
1999-05-25 18:15:34 +02:00
size_t amount;
while (len > 0)
{
while (PqRecvPointer >= PqRecvLength)
{
if (pq_recvbuf()) /* If nothing in buffer, then recv some */
return EOF; /* Failed to recv data */
}
amount = PqRecvLength - PqRecvPointer;
if (amount > len)
amount = len;
memcpy(s, PqRecvBuffer + PqRecvPointer, amount);
PqRecvPointer += amount;
s += amount;
len -= amount;
}
return 0;
}
/* --------------------------------
* pq_getstring - get a null terminated string from connection
*
* The return value is placed in an expansible StringInfo, which has
* already been initialized by the caller.
*
* This is used only for dealing with old-protocol clients. The idea
* is to produce a StringInfo that looks the same as we would get from
* pq_getmessage() with a newer client; we will then process it with
* pq_getmsgstring. Therefore, no character set conversion is done here,
* even though this is presumably useful only for text.
*
* returns 0 if OK, EOF if trouble
* --------------------------------
*/
int
pq_getstring(StringInfo s)
{
int i;
/* Reset string to empty */
s->len = 0;
s->data[0] = '\0';
s->cursor = 0;
/* Read until we get the terminating '\0' */
2002-09-04 22:31:48 +02:00
for (;;)
{
while (PqRecvPointer >= PqRecvLength)
{
2002-09-04 22:31:48 +02:00
if (pq_recvbuf()) /* If nothing in buffer, then recv some */
return EOF; /* Failed to recv data */
}
for (i = PqRecvPointer; i < PqRecvLength; i++)
2002-04-21 03:03:33 +02:00
{
if (PqRecvBuffer[i] == '\0')
{
/* include the '\0' in the copy */
appendBinaryStringInfo(s, PqRecvBuffer + PqRecvPointer,
i - PqRecvPointer + 1);
2002-09-04 22:31:48 +02:00
PqRecvPointer = i + 1; /* advance past \0 */
return 0;
}
2002-04-21 03:03:33 +02:00
}
/* If we're here we haven't got the \0 in the buffer yet. */
appendBinaryStringInfo(s, PqRecvBuffer + PqRecvPointer,
PqRecvLength - PqRecvPointer);
PqRecvPointer = PqRecvLength;
}
}
/* --------------------------------
* pq_getmessage - get a message with length word from connection
*
* The return value is placed in an expansible StringInfo, which has
* already been initialized by the caller.
* Only the message body is placed in the StringInfo; the length word
* is removed. Also, s->cursor is initialized to zero for convenience
* in scanning the message contents.
*
* If maxlen is not zero, it is an upper limit on the length of the
* message we are willing to accept. We abort the connection (by
* returning EOF) if client tries to send more than that.
*
* returns 0 if OK, EOF if trouble
* --------------------------------
*/
int
pq_getmessage(StringInfo s, int maxlen)
{
int32 len;
/* Reset message buffer to empty */
s->len = 0;
s->data[0] = '\0';
s->cursor = 0;
/* Read message length word */
if (pq_getbytes((char *) &len, 4) == EOF)
{
elog(COMMERROR, "unexpected EOF within message length word");
return EOF;
}
len = ntohl(len);
len -= 4; /* discount length itself */
if (len < 0 ||
(maxlen > 0 && len > maxlen))
{
elog(COMMERROR, "invalid message length");
return EOF;
}
if (len > 0)
{
/* Allocate space for message */
enlargeStringInfo(s, len);
/* And grab the message */
if (pq_getbytes(s->data, len) == EOF)
{
elog(COMMERROR, "incomplete client message");
return EOF;
}
s->len = len;
/* Place a trailing null per StringInfo convention */
s->data[len] = '\0';
}
return 0;
}
/* --------------------------------
* pq_putbytes - send bytes to connection (not flushed until pq_flush)
*
* returns 0 if OK, EOF if trouble
* --------------------------------
*/
int
pq_putbytes(const char *s, size_t len)
{
1999-05-25 18:15:34 +02:00
size_t amount;
while (len > 0)
{
if (PqSendPointer >= PQ_BUFFER_SIZE)
if (pq_flush()) /* If buffer is full, then flush it out */
return EOF;
amount = PQ_BUFFER_SIZE - PqSendPointer;
if (amount > len)
amount = len;
memcpy(PqSendBuffer + PqSendPointer, s, amount);
PqSendPointer += amount;
s += amount;
len -= amount;
}
return 0;
}
/* --------------------------------
* pq_flush - flush pending output
*
* returns 0 if OK, EOF if trouble
* --------------------------------
*/
int
pq_flush(void)
{
2002-09-04 22:31:48 +02:00
static int last_reported_send_errno = 0;
unsigned char *bufptr = PqSendBuffer;
unsigned char *bufend = PqSendBuffer + PqSendPointer;
while (bufptr < bufend)
{
int r;
UPDATED PATCH: Attached are a revised set of SSL patches. Many of these patches are motivated by security concerns, it's not just bug fixes. The key differences (from stock 7.2.1) are: *) almost all code that directly uses the OpenSSL library is in two new files, src/interfaces/libpq/fe-ssl.c src/backend/postmaster/be-ssl.c in the long run, it would be nice to merge these two files. *) the legacy code to read and write network data have been encapsulated into read_SSL() and write_SSL(). These functions should probably be renamed - they handle both SSL and non-SSL cases. the remaining code should eliminate the problems identified earlier, albeit not very cleanly. *) both front- and back-ends will send a SSL shutdown via the new close_SSL() function. This is necessary for sessions to work properly. (Sessions are not yet fully supported, but by cleanly closing the SSL connection instead of just sending a TCP FIN packet other SSL tools will be much happier.) *) The client certificate and key are now expected in a subdirectory of the user's home directory. Specifically, - the directory .postgresql must be owned by the user, and allow no access by 'group' or 'other.' - the file .postgresql/postgresql.crt must be a regular file owned by the user. - the file .postgresql/postgresql.key must be a regular file owned by the user, and allow no access by 'group' or 'other'. At the current time encrypted private keys are not supported. There should also be a way to support multiple client certs/keys. *) the front-end performs minimal validation of the back-end cert. Self-signed certs are permitted, but the common name *must* match the hostname used by the front-end. (The cert itself should always use a fully qualified domain name (FDQN) in its common name field.) This means that psql -h eris db will fail, but psql -h eris.example.com db will succeed. At the current time this must be an exact match; future patches may support any FQDN that resolves to the address returned by getpeername(2). Another common "problem" is expiring certs. For now, it may be a good idea to use a very-long-lived self-signed cert. As a compile-time option, the front-end can specify a file containing valid root certificates, but it is not yet required. *) the back-end performs minimal validation of the client cert. It allows self-signed certs. It checks for expiration. It supports a compile-time option specifying a file containing valid root certificates. *) both front- and back-ends default to TLSv1, not SSLv3/SSLv2. *) both front- and back-ends support DSA keys. DSA keys are moderately more expensive on startup, but many people consider them preferable than RSA keys. (E.g., SSH2 prefers DSA keys.) *) if /dev/urandom exists, both client and server will read 16k of randomization data from it. *) the server can read empheral DH parameters from the files $DataDir/dh512.pem $DataDir/dh1024.pem $DataDir/dh2048.pem $DataDir/dh4096.pem if none are provided, the server will default to hardcoded parameter files provided by the OpenSSL project. Remaining tasks: *) the select() clauses need to be revisited - the SSL abstraction layer may need to absorb more of the current code to avoid rare deadlock conditions. This also touches on a true solution to the pg_eof() problem. *) the SIGPIPE signal handler may need to be revisited. *) support encrypted private keys. *) sessions are not yet fully supported. (SSL sessions can span multiple "connections," and allow the client and server to avoid costly renegotiations.) *) makecert - a script that creates back-end certs. *) pgkeygen - a tool that creates front-end certs. *) the whole protocol issue, SASL, etc. *) certs are fully validated - valid root certs must be available. This is a hassle, but it means that you *can* trust the identity of the server. *) the client library can handle hardcoded root certificates, to avoid the need to copy these files. *) host name of server cert must resolve to IP address, or be a recognized alias. This is more liberal than the previous iteration. *) the number of bytes transferred is tracked, and the session key is periodically renegotiated. *) basic cert generation scripts (mkcert.sh, pgkeygen.sh). The configuration files have reasonable defaults for each type of use. Bear Giles
2002-06-14 06:23:17 +02:00
r = secure_write(MyProcPort, bufptr, bufend - bufptr);
if (r <= 0)
{
if (errno == EINTR)
continue; /* Ok if we were interrupted */
1999-05-25 18:15:34 +02:00
/*
2002-09-04 22:31:48 +02:00
* Careful: an elog() that tries to write to the client would
* cause recursion to here, leading to stack overflow and core
* dump! This message must go *only* to the postmaster log.
*
2002-09-04 22:31:48 +02:00
* If a client disconnects while we're in the midst of output, we
* might write quite a bit of data before we get to a safe
* query abort point. So, suppress duplicate log messages.
*/
if (errno != last_reported_send_errno)
{
last_reported_send_errno = errno;
elog(COMMERROR, "pq_flush: send() failed: %m");
}
1999-05-25 18:15:34 +02:00
/*
* We drop the buffered data anyway so that processing can
* continue, even though we'll probably quit soon.
*/
PqSendPointer = 0;
return EOF;
}
2002-09-04 22:31:48 +02:00
last_reported_send_errno = 0; /* reset after any successful send */
bufptr += r;
}
PqSendPointer = 0;
return 0;
}
/* --------------------------------
* Message-level I/O routines begin here.
*
* These routines understand about the old-style COPY OUT protocol.
* --------------------------------
*/
/* --------------------------------
* pq_putmessage - send a normal message (suppressed in COPY OUT mode)
*
* If msgtype is not '\0', it is a message type code to place before
* the message body. If msgtype is '\0', then the message has no type
* code (this is only valid in pre-3.0 protocols).
*
* len is the length of the message body data at *s. In protocol 3.0
* and later, a message length word (equal to len+4 because it counts
* itself too) is inserted by this routine.
*
* All normal messages are suppressed while old-style COPY OUT is in
* progress. (In practice only a few notice messages might get emitted
* then; dropping them is annoying, but at least they will still appear
* in the postmaster log.)
*
* returns 0 if OK, EOF if trouble
* --------------------------------
*/
int
pq_putmessage(char msgtype, const char *s, size_t len)
{
if (DoingCopyOut)
return 0;
if (msgtype)
if (pq_putbytes(&msgtype, 1))
return EOF;
if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
{
uint32 n32;
n32 = htonl((uint32) (len + 4));
if (pq_putbytes((char *) &n32, 4))
return EOF;
}
return pq_putbytes(s, len);
}
/* --------------------------------
* pq_startcopyout - inform libpq that an old-style COPY OUT transfer
* is beginning
* --------------------------------
*/
void
pq_startcopyout(void)
{
DoingCopyOut = true;
}
/* --------------------------------
* pq_endcopyout - end an old-style COPY OUT transfer
*
* If errorAbort is indicated, we are aborting a COPY OUT due to an error,
* and must send a terminator line. Since a partial data line might have
* been emitted, send a couple of newlines first (the first one could
* get absorbed by a backslash...) Note that old-style COPY OUT does
* not allow binary transfers, so a textual terminator is always correct.
* --------------------------------
*/
void
pq_endcopyout(bool errorAbort)
{
1999-05-25 18:15:34 +02:00
if (!DoingCopyOut)
return;
DoingCopyOut = false;
if (errorAbort)
pq_putbytes("\n\n\\.\n", 5);
/* in non-error case, copy.c will have emitted the terminator line */
}