postgresql/src/backend/port/sysv_sema.c

524 lines
15 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* sysv_sema.c
* Implement PGSemaphores using SysV semaphore facilities
*
*
2002-06-20 22:29:54 +02:00
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/port/sysv_sema.c,v 1.5 2003/03/25 16:15:44 petere Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <errno.h>
#include <signal.h>
#include <unistd.h>
#include <sys/file.h>
#ifdef HAVE_SYS_IPC_H
#include <sys/ipc.h>
#endif
#ifdef HAVE_SYS_SEM_H
#include <sys/sem.h>
#endif
#ifdef HAVE_KERNEL_OS_H
#include <kernel/OS.h>
#endif
#include "miscadmin.h"
#include "storage/ipc.h"
#include "storage/pg_sema.h"
#ifndef HAVE_UNION_SEMUN
union semun
{
int val;
struct semid_ds *buf;
unsigned short *array;
};
#endif
typedef uint32 IpcSemaphoreKey; /* semaphore key passed to semget(2) */
typedef int IpcSemaphoreId; /* semaphore ID returned by semget(2) */
/*
* SEMAS_PER_SET is the number of useful semaphores in each semaphore set
* we allocate. It must be *less than* your kernel's SEMMSL (max semaphores
* per set) parameter, which is often around 25. (Less than, because we
* allocate one extra sema in each set for identification purposes.)
*/
#define SEMAS_PER_SET 16
#define IPCProtection (0600) /* access/modify by user only */
#define PGSemaMagic 537 /* must be less than SEMVMX */
2002-09-04 22:31:48 +02:00
static IpcSemaphoreId *mySemaSets; /* IDs of sema sets acquired so
* far */
static int numSemaSets; /* number of sema sets acquired so far */
static int maxSemaSets; /* allocated size of mySemaSets array */
2002-09-04 22:31:48 +02:00
static IpcSemaphoreKey nextSemaKey; /* next key to try using */
static int nextSemaNumber; /* next free sem num in last sema set */
static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,
2002-09-04 22:31:48 +02:00
int numSems);
static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum,
2002-09-04 22:31:48 +02:00
int value);
static void IpcSemaphoreKill(IpcSemaphoreId semId);
2002-09-04 22:31:48 +02:00
static int IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum);
static pid_t IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum);
static IpcSemaphoreId IpcSemaphoreCreate(int numSems);
static void ReleaseSemaphores(int status, Datum arg);
/*
* InternalIpcSemaphoreCreate
*
* Attempt to create a new semaphore set with the specified key.
* Will fail (return -1) if such a set already exists.
*
* If we fail with a failure code other than collision-with-existing-set,
* print out an error and abort. Other types of errors suggest nonrecoverable
* problems.
*/
static IpcSemaphoreId
InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, int numSems)
{
int semId;
semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | IPCProtection);
if (semId < 0)
{
/*
* Fail quietly if error indicates a collision with existing set.
* One would expect EEXIST, given that we said IPC_EXCL, but
* perhaps we could get a permission violation instead? Also,
* EIDRM might occur if an old set is slated for destruction but
* not gone yet.
*/
if (errno == EEXIST || errno == EACCES
#ifdef EIDRM
|| errno == EIDRM
#endif
)
return -1;
/*
* Else complain and abort
*/
fprintf(stderr, "IpcSemaphoreCreate: semget(key=%d, num=%d, 0%o) failed: %s\n",
2002-09-04 22:31:48 +02:00
(int) semKey, numSems, (IPC_CREAT | IPC_EXCL | IPCProtection),
strerror(errno));
if (errno == ENOSPC)
fprintf(stderr,
"\nThis error does *not* mean that you have run out of disk space.\n"
"\n"
"It occurs when either the system limit for the maximum number of\n"
"semaphore sets (SEMMNI), or the system wide maximum number of\n"
"semaphores (SEMMNS), would be exceeded. You need to raise the\n"
"respective kernel parameter. Alternatively, reduce PostgreSQL's\n"
"consumption of semaphores by reducing its max_connections parameter\n"
"(currently %d).\n"
"\n"
"The PostgreSQL documentation contains more information about\n"
"configuring your system for PostgreSQL.\n\n",
MaxBackends);
proc_exit(1);
}
return semId;
}
/*
* Initialize a semaphore to the specified value.
*/
static void
IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, int value)
{
union semun semun;
semun.val = value;
if (semctl(semId, semNum, SETVAL, semun) < 0)
{
fprintf(stderr, "IpcSemaphoreInitialize: semctl(id=%d, %d, SETVAL, %d) failed: %s\n",
semId, semNum, value, strerror(errno));
if (errno == ERANGE)
fprintf(stderr,
"You possibly need to raise your kernel's SEMVMX value to be at least\n"
2002-09-04 22:31:48 +02:00
"%d. Look into the PostgreSQL documentation for details.\n",
value);
proc_exit(1);
}
}
/*
* IpcSemaphoreKill(semId) - removes a semaphore set
*/
static void
IpcSemaphoreKill(IpcSemaphoreId semId)
{
union semun semun;
semun.val = 0; /* unused, but keep compiler quiet */
if (semctl(semId, 0, IPC_RMID, semun) < 0)
fprintf(stderr, "IpcSemaphoreKill: semctl(%d, 0, IPC_RMID, ...) failed: %s\n",
semId, strerror(errno));
/*
* We used to report a failure via elog(WARNING), but that's pretty
* pointless considering any client has long since disconnected ...
*/
}
/* Get the current value (semval) of the semaphore */
static int
IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum)
{
union semun dummy; /* for Solaris */
dummy.val = 0; /* unused */
return semctl(semId, semNum, GETVAL, dummy);
}
/* Get the PID of the last process to do semop() on the semaphore */
static pid_t
IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum)
{
union semun dummy; /* for Solaris */
dummy.val = 0; /* unused */
return semctl(semId, semNum, GETPID, dummy);
}
/*
* Create a semaphore set with the given number of useful semaphores
* (an additional sema is actually allocated to serve as identifier).
* Dead Postgres sema sets are recycled if found, but we do not fail
* upon collision with non-Postgres sema sets.
*
* The idea here is to detect and re-use keys that may have been assigned
* by a crashed postmaster or backend.
*/
static IpcSemaphoreId
IpcSemaphoreCreate(int numSems)
{
IpcSemaphoreId semId;
union semun semun;
PGSemaphoreData mysema;
/* Loop till we find a free IPC key */
2002-09-04 22:31:48 +02:00
for (nextSemaKey++;; nextSemaKey++)
{
pid_t creatorPID;
/* Try to create new semaphore set */
semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1);
if (semId >= 0)
break; /* successful create */
/* See if it looks to be leftover from a dead Postgres process */
semId = semget(nextSemaKey, numSems + 1, 0);
if (semId < 0)
continue; /* failed: must be some other app's */
if (IpcSemaphoreGetValue(semId, numSems) != PGSemaMagic)
continue; /* sema belongs to a non-Postgres app */
/*
* If the creator PID is my own PID or does not belong to any
* extant process, it's safe to zap it.
*/
creatorPID = IpcSemaphoreGetLastPID(semId, numSems);
if (creatorPID <= 0)
continue; /* oops, GETPID failed */
if (creatorPID != getpid())
{
if (kill(creatorPID, 0) == 0 ||
errno != ESRCH)
continue; /* sema belongs to a live process */
}
/*
* The sema set appears to be from a dead Postgres process, or
* from a previous cycle of life in this same process. Zap it, if
* possible. This probably shouldn't fail, but if it does, assume
* the sema set belongs to someone else after all, and continue
* quietly.
*/
semun.val = 0; /* unused, but keep compiler quiet */
if (semctl(semId, 0, IPC_RMID, semun) < 0)
continue;
/*
* Now try again to create the sema set.
*/
semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1);
if (semId >= 0)
break; /* successful create */
/*
* Can only get here if some other process managed to create the
* same sema key before we did. Let him have that one, loop
* around to try next key.
*/
}
/*
* OK, we created a new sema set. Mark it as created by this process.
* We do this by setting the spare semaphore to PGSemaMagic-1 and then
* incrementing it with semop(). That leaves it with value
* PGSemaMagic and sempid referencing this process.
*/
IpcSemaphoreInitialize(semId, numSems, PGSemaMagic - 1);
mysema.semId = semId;
mysema.semNum = numSems;
PGSemaphoreUnlock(&mysema);
return semId;
}
/*
* PGReserveSemaphores --- initialize semaphore support
*
* This is called during postmaster start or shared memory reinitialization.
* It should do whatever is needed to be able to support up to maxSemas
2002-09-04 22:31:48 +02:00
* subsequent PGSemaphoreCreate calls. Also, if any system resources
* are acquired here or in PGSemaphoreCreate, register an on_shmem_exit
* callback to release them.
*
* The port number is passed for possible use as a key (for SysV, we use
2002-09-04 22:31:48 +02:00
* it to generate the starting semaphore key). In a standalone backend,
* zero will be passed.
*
* In the SysV implementation, we acquire semaphore sets on-demand; the
* maxSemas parameter is just used to size the array that keeps track of
* acquired sets for subsequent releasing.
*/
void
PGReserveSemaphores(int maxSemas, int port)
{
2002-09-04 22:31:48 +02:00
maxSemaSets = (maxSemas + SEMAS_PER_SET - 1) / SEMAS_PER_SET;
mySemaSets = (IpcSemaphoreId *)
malloc(maxSemaSets * sizeof(IpcSemaphoreId));
if (mySemaSets == NULL)
elog(PANIC, "Out of memory in PGReserveSemaphores");
numSemaSets = 0;
nextSemaKey = port * 1000;
2002-09-04 22:31:48 +02:00
nextSemaNumber = SEMAS_PER_SET; /* force sema set alloc on 1st
* call */
on_shmem_exit(ReleaseSemaphores, 0);
}
/*
* Release semaphores at shutdown or shmem reinitialization
*
* (called as an on_shmem_exit callback, hence funny argument list)
*/
static void
ReleaseSemaphores(int status, Datum arg)
{
int i;
for (i = 0; i < numSemaSets; i++)
IpcSemaphoreKill(mySemaSets[i]);
free(mySemaSets);
}
/*
* PGSemaphoreCreate
*
* Initialize a PGSemaphore structure to represent a sema with count 1
*/
void
PGSemaphoreCreate(PGSemaphore sema)
{
/* Can't do this in a backend, because static state is postmaster's */
Assert(!IsUnderPostmaster);
if (nextSemaNumber >= SEMAS_PER_SET)
{
/* Time to allocate another semaphore set */
if (numSemaSets >= maxSemaSets)
elog(PANIC, "PGSemaphoreCreate: too many semaphores created");
mySemaSets[numSemaSets] = IpcSemaphoreCreate(SEMAS_PER_SET);
numSemaSets++;
nextSemaNumber = 0;
}
/* Assign the next free semaphore in the current set */
2002-09-04 22:31:48 +02:00
sema->semId = mySemaSets[numSemaSets - 1];
sema->semNum = nextSemaNumber++;
/* Initialize it to count 1 */
IpcSemaphoreInitialize(sema->semId, sema->semNum, 1);
}
/*
* PGSemaphoreReset
*
* Reset a previously-initialized PGSemaphore to have count 0
*/
void
PGSemaphoreReset(PGSemaphore sema)
{
IpcSemaphoreInitialize(sema->semId, sema->semNum, 0);
}
/*
* PGSemaphoreLock
*
* Lock a semaphore (decrement count), blocking if count would be < 0
*/
void
PGSemaphoreLock(PGSemaphore sema, bool interruptOK)
{
int errStatus;
struct sembuf sops;
sops.sem_op = -1; /* decrement */
sops.sem_flg = 0;
sops.sem_num = sema->semNum;
/*
* Note: if errStatus is -1 and errno == EINTR then it means we
* returned from the operation prematurely because we were sent a
* signal. So we try and lock the semaphore again.
*
* Each time around the loop, we check for a cancel/die interrupt. We
* assume that if such an interrupt comes in while we are waiting, it
* will cause the semop() call to exit with errno == EINTR, so that we
* will be able to service the interrupt (if not in a critical section
* already).
*
* Once we acquire the lock, we do NOT check for an interrupt before
* returning. The caller needs to be able to record ownership of the
* lock before any interrupt can be accepted.
*
* There is a window of a few instructions between CHECK_FOR_INTERRUPTS
* and entering the semop() call. If a cancel/die interrupt occurs in
* that window, we would fail to notice it until after we acquire the
* lock (or get another interrupt to escape the semop()). We can
* avoid this problem by temporarily setting ImmediateInterruptOK to
* true before we do CHECK_FOR_INTERRUPTS; then, a die() interrupt in
* this interval will execute directly. However, there is a huge
* pitfall: there is another window of a few instructions after the
* semop() before we are able to reset ImmediateInterruptOK. If an
* interrupt occurs then, we'll lose control, which means that the
* lock has been acquired but our caller did not get a chance to
* record the fact. Therefore, we only set ImmediateInterruptOK if the
* caller tells us it's OK to do so, ie, the caller does not need to
* record acquiring the lock. (This is currently true for lockmanager
* locks, since the process that granted us the lock did all the
* necessary state updates. It's not true for SysV semaphores used to
* implement LW locks or emulate spinlocks --- but the wait time for
* such locks should not be very long, anyway.)
*/
do
{
ImmediateInterruptOK = interruptOK;
CHECK_FOR_INTERRUPTS();
errStatus = semop(sema->semId, &sops, 1);
ImmediateInterruptOK = false;
} while (errStatus < 0 && errno == EINTR);
if (errStatus < 0)
{
fprintf(stderr, "PGSemaphoreLock: semop(id=%d) failed: %s\n",
sema->semId, strerror(errno));
proc_exit(255);
}
}
/*
* PGSemaphoreUnlock
*
* Unlock a semaphore (increment count)
*/
void
PGSemaphoreUnlock(PGSemaphore sema)
{
int errStatus;
struct sembuf sops;
sops.sem_op = 1; /* increment */
sops.sem_flg = 0;
sops.sem_num = sema->semNum;
/*
* Note: if errStatus is -1 and errno == EINTR then it means we
* returned from the operation prematurely because we were sent a
* signal. So we try and unlock the semaphore again. Not clear this
* can really happen, but might as well cope.
*/
do
{
errStatus = semop(sema->semId, &sops, 1);
} while (errStatus < 0 && errno == EINTR);
if (errStatus < 0)
{
fprintf(stderr, "PGSemaphoreUnlock: semop(id=%d) failed: %s\n",
sema->semId, strerror(errno));
proc_exit(255);
}
}
/*
* PGSemaphoreTryLock
*
* Lock a semaphore only if able to do so without blocking
*/
bool
PGSemaphoreTryLock(PGSemaphore sema)
{
int errStatus;
struct sembuf sops;
sops.sem_op = -1; /* decrement */
sops.sem_flg = IPC_NOWAIT; /* but don't block */
sops.sem_num = sema->semNum;
/*
* Note: if errStatus is -1 and errno == EINTR then it means we
* returned from the operation prematurely because we were sent a
* signal. So we try and lock the semaphore again.
*/
do
{
errStatus = semop(sema->semId, &sops, 1);
} while (errStatus < 0 && errno == EINTR);
if (errStatus < 0)
{
/* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */
#ifdef EAGAIN
if (errno == EAGAIN)
return false; /* failed to lock it */
#endif
#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
if (errno == EWOULDBLOCK)
return false; /* failed to lock it */
#endif
/* Otherwise we got trouble */
fprintf(stderr, "PGSemaphoreTryLock: semop(id=%d) failed: %s\n",
sema->semId, strerror(errno));
proc_exit(255);
}
return true;
}