diff --git a/contrib/start-scripts/linux b/contrib/start-scripts/linux index 4ad66917e6..b950cf512c 100644 --- a/contrib/start-scripts/linux +++ b/contrib/start-scripts/linux @@ -42,10 +42,14 @@ PGLOG="$PGDATA/serverlog" # It's often a good idea to protect the postmaster from being killed by the # OOM killer (which will tend to preferentially kill the postmaster because -# of the way it accounts for shared memory). Setting the OOM_ADJ value to -# -17 will disable OOM kill altogether. If you enable this, you probably want -# to compile PostgreSQL with "-DLINUX_OOM_ADJ=0", so that individual backends -# can still be killed by the OOM killer. +# of the way it accounts for shared memory). Setting the OOM_SCORE_ADJ value +# to -1000 will disable OOM kill altogether. If you enable this, you probably +# want to compile PostgreSQL with "-DLINUX_OOM_SCORE_ADJ=0", so that +# individual backends can still be killed by the OOM killer. +#OOM_SCORE_ADJ=-1000 +# Older Linux kernels may not have /proc/self/oom_score_adj, but instead +# /proc/self/oom_adj, which works similarly except the disable value is -17. +# For such a system, enable this and compile with "-DLINUX_OOM_ADJ=0". #OOM_ADJ=-17 ## STOP EDITING HERE @@ -78,6 +82,7 @@ test -x $DAEMON || case $1 in start) echo -n "Starting PostgreSQL: " + test x"$OOM_SCORE_ADJ" != x && echo "$OOM_SCORE_ADJ" > /proc/self/oom_score_adj test x"$OOM_ADJ" != x && echo "$OOM_ADJ" > /proc/self/oom_adj su - $PGUSER -c "$DAEMON -D '$PGDATA' &" >>$PGLOG 2>&1 echo "ok" @@ -90,6 +95,7 @@ case $1 in restart) echo -n "Restarting PostgreSQL: " su - $PGUSER -c "$PGCTL stop -D '$PGDATA' -s -m fast -w" + test x"$OOM_SCORE_ADJ" != x && echo "$OOM_SCORE_ADJ" > /proc/self/oom_score_adj test x"$OOM_ADJ" != x && echo "$OOM_ADJ" > /proc/self/oom_adj su - $PGUSER -c "$DAEMON -D '$PGDATA' &" >>$PGLOG 2>&1 echo "ok" diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml index 05da723508..7ba18f0319 100644 --- a/doc/src/sgml/runtime.sgml +++ b/doc/src/sgml/runtime.sgml @@ -1268,7 +1268,7 @@ default:\ In Linux 2.4 and later, the default virtual memory behavior is not optimal for PostgreSQL. Because of the way that the kernel implements memory overcommit, the kernel might - terminate the PostgreSQL server (the + terminate the PostgreSQL postmaster (the master server process) if the memory demands of another process cause the system to run out of virtual memory. @@ -1317,22 +1317,31 @@ sysctl -w vm.overcommit_memory=2 Another approach, which can be used with or without altering vm.overcommit_memory, is to set the process-specific - oom_adj value for the postmaster process to -17, - thereby guaranteeing it will not be targeted by the OOM killer. The - simplest way to do this is to execute + oom_score_adj value for the postmaster process to + -1000, thereby guaranteeing it will not be targeted by the OOM + killer. The simplest way to do this is to execute -echo -17 > /proc/self/oom_adj +echo -1000 > /proc/self/oom_score_adj in the postmaster's startup script just before invoking the postmaster. Note that this action must be done as root, or it will have no effect; so a root-owned startup script is the easiest place to do it. If you do this, you may also wish to build PostgreSQL - with -DLINUX_OOM_ADJ=0 added to CPPFLAGS. + with -DLINUX_OOM_SCORE_ADJ=0 added to CPPFLAGS. That will cause postmaster child processes to run with the normal - oom_adj value of zero, so that the OOM killer can still + oom_score_adj value of zero, so that the OOM killer can still target them at need. + + Older Linux kernels do not offer /proc/self/oom_score_adj, + but may have a previous version of the same functionality called + /proc/self/oom_adj. This works the same except the disable + value is -17 not -1000. The corresponding + build flag for PostgreSQL is + -DLINUX_OOM_ADJ=0. + + Some vendors' Linux 2.4 kernels are reported to have early versions diff --git a/src/backend/postmaster/fork_process.c b/src/backend/postmaster/fork_process.c index c99ed43265..2a415e844b 100644 --- a/src/backend/postmaster/fork_process.c +++ b/src/backend/postmaster/fork_process.c @@ -68,12 +68,40 @@ fork_process(void) * process sizes *including shared memory*. (This is unbelievably * stupid, but the kernel hackers seem uninterested in improving it.) * Therefore it's often a good idea to protect the postmaster by - * setting its oom_adj value negative (which has to be done in a - * root-owned startup script). If you just do that much, all child + * setting its oom_score_adj value negative (which has to be done in a + * root-owned startup script). If you just do that much, all child * processes will also be protected against OOM kill, which might not - * be desirable. You can then choose to build with LINUX_OOM_ADJ - * #defined to 0, or some other value that you want child processes to - * adopt here. + * be desirable. You can then choose to build with + * LINUX_OOM_SCORE_ADJ #defined to 0, or to some other value that you + * want child processes to adopt here. + */ +#ifdef LINUX_OOM_SCORE_ADJ + { + /* + * Use open() not stdio, to ensure we control the open flags. Some + * Linux security environments reject anything but O_WRONLY. + */ + int fd = open("/proc/self/oom_score_adj", O_WRONLY, 0); + + /* We ignore all errors */ + if (fd >= 0) + { + char buf[16]; + int rc; + + snprintf(buf, sizeof(buf), "%d\n", LINUX_OOM_SCORE_ADJ); + rc = write(fd, buf, strlen(buf)); + (void) rc; + close(fd); + } + } +#endif /* LINUX_OOM_SCORE_ADJ */ + + /* + * Older Linux kernels have oom_adj not oom_score_adj. This works + * similarly except with a different scale of adjustment values. + * If it's necessary to build Postgres to work with either API, + * you can define both LINUX_OOM_SCORE_ADJ and LINUX_OOM_ADJ. */ #ifdef LINUX_OOM_ADJ {