diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 14ed6c7a53..e7c255987d 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1107,6 +1107,43 @@ include 'filename' + + huge_tlb_pages (enum) + + huge_tlb_pages configuration parameter + + + + Enables/disables the use of huge TLB pages. Valid values are + try (the default), on, + and off. + + + + At present, this feature is supported only on Linux. The setting + is ignored on other systems. + + + + The use of huge TLB pages results in smaller page tables and + less CPU time spent on memory management, increasing performance. For + more details, see + the Debian wiki. + Remember that you will need at least shared_buffers / huge page size + + 1 huge TLB pages. So for example for a system with 6GB shared buffers + and a hugepage size of 2kb of you will need at least 3156 huge pages. + + + + With huge_tlb_pages set to try, + the server will try to use huge pages, but fall back to using + normal allocation if that fails. With onoff, huge pages will not be used. + + + + temp_buffers (integer) diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c index 0d01617e2f..f7596bf6e0 100644 --- a/src/backend/port/sysv_shmem.c +++ b/src/backend/port/sysv_shmem.c @@ -32,6 +32,7 @@ #include "portability/mem.h" #include "storage/ipc.h" #include "storage/pg_shmem.h" +#include "utils/guc.h" typedef key_t IpcMemoryKey; /* shared memory key passed to shmget(2) */ @@ -41,7 +42,7 @@ typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */ unsigned long UsedShmemSegID = 0; void *UsedShmemSegAddr = NULL; static Size AnonymousShmemSize; -static void *AnonymousShmem; +static void *AnonymousShmem = NULL; static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size); static void IpcMemoryDetach(int status, Datum shmaddr); @@ -317,6 +318,80 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2) return true; } +/* + * Creates an anonymous mmap()ed shared memory segment. + * + * Pass the requested size in *size. This function will modify *size to the + * actual size of the allocation, if it ends up allocating a segment that is + * larger than requested. + */ +#ifndef EXEC_BACKEND +static void * +CreateAnonymousSegment(Size *size) +{ + Size allocsize; + void *ptr = MAP_FAILED; + +#ifndef MAP_HUGETLB + if (huge_tlb_pages == HUGE_TLB_ON) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("huge TLB pages not supported on this platform"))); +#else + if (huge_tlb_pages == HUGE_TLB_ON || huge_tlb_pages == HUGE_TLB_TRY) + { + /* + * Round up the request size to a suitable large value. + * + * Some Linux kernel versions are known to have a bug, which causes + * mmap() with MAP_HUGETLB to fail if the request size is not a + * multiple of any supported huge page size. To work around that, we + * round up the request size to nearest 2MB. 2MB is the most common + * huge page page size on affected systems. + * + * Aside from that bug, even with a kernel that does the allocation + * correctly, rounding it up ourselves avoids wasting memory. Without + * it, if we for example make an allocation of 2MB + 1 bytes, the + * kernel might decide to use two 2MB huge pages for that, and waste 2 + * MB - 1 of memory. When we do the rounding ourselves, we can use + * that space for allocations. + */ + int hugepagesize = 2 * 1024 * 1024; + + allocsize = *size; + if (allocsize % hugepagesize != 0) + allocsize += hugepagesize - (allocsize % hugepagesize); + + ptr = mmap(NULL, *size, PROT_READ | PROT_WRITE, + PG_MMAP_FLAGS | MAP_HUGETLB, -1, 0); + if (huge_tlb_pages == HUGE_TLB_TRY && ptr == MAP_FAILED) + elog(DEBUG1, "mmap with MAP_HUGETLB failed, huge pages disabled: %m"); + } +#endif + + if (huge_tlb_pages == HUGE_TLB_OFF || + (huge_tlb_pages == HUGE_TLB_TRY && ptr == MAP_FAILED)) + { + allocsize = *size; + ptr = mmap(NULL, *size, PROT_READ | PROT_WRITE, PG_MMAP_FLAGS, -1, 0); + } + + if (ptr == MAP_FAILED) + ereport(FATAL, + (errmsg("could not map anonymous shared memory: %m"), + (errno == ENOMEM) ? + errhint("This error usually means that PostgreSQL's request " + "for a shared memory segment exceeded available memory, " + "swap space or huge pages. To reduce the request size " + "(currently %zu bytes), reduce PostgreSQL's shared " + "memory usage, perhaps by reducing shared_buffers or " + "max_connections.", + *size) : 0)); + + *size = allocsize; + return ptr; +} +#endif /* * PGSharedMemoryCreate @@ -344,7 +419,14 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port) PGShmemHeader *hdr; IpcMemoryId shmid; struct stat statbuf; - Size sysvsize = size; + Size sysvsize; + +#if defined(EXEC_BACKEND) || !defined(MAP_HUGETLB) + if (huge_tlb_pages == HUGE_TLB_ON) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("huge TLB pages not supported on this platform"))); +#endif /* Room for a header? */ Assert(size > MAXALIGN(sizeof(PGShmemHeader))); @@ -359,6 +441,12 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port) * to run many copies of PostgreSQL without needing to adjust system * settings. * + * We assume that no one will attempt to run PostgreSQL 9.3 or later on + * systems that are ancient enough that anonymous shared memory is not + * supported, such as pre-2.4 versions of Linux. If that turns out to be + * false, we might need to add a run-time test here and do this only if + * the running kernel supports it. + * * However, we disable this logic in the EXEC_BACKEND case, and fall back * to the old method of allocating the entire segment using System V * shared memory, because there's no way to attach an mmap'd segment to a @@ -366,44 +454,13 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port) * developer use, this shouldn't be a big problem. */ #ifndef EXEC_BACKEND - { - long pagesize = sysconf(_SC_PAGE_SIZE); + AnonymousShmem = CreateAnonymousSegment(&size); + AnonymousShmemSize = size; - /* - * Ensure request size is a multiple of pagesize. - * - * pagesize will, for practical purposes, always be a power of two. - * But just in case it isn't, we do it this way instead of using - * TYPEALIGN(). - */ - if (pagesize > 0 && size % pagesize != 0) - size += pagesize - (size % pagesize); - - /* - * We assume that no one will attempt to run PostgreSQL 9.3 or later - * on systems that are ancient enough that anonymous shared memory is - * not supported, such as pre-2.4 versions of Linux. If that turns - * out to be false, we might need to add a run-time test here and do - * this only if the running kernel supports it. - */ - AnonymousShmem = mmap(NULL, size, PROT_READ | PROT_WRITE, PG_MMAP_FLAGS, - -1, 0); - if (AnonymousShmem == MAP_FAILED) - ereport(FATAL, - (errmsg("could not map anonymous shared memory: %m"), - (errno == ENOMEM) ? - errhint("This error usually means that PostgreSQL's request " - "for a shared memory segment exceeded available memory " - "or swap space. To reduce the request size (currently " - "%zu bytes), reduce PostgreSQL's shared memory usage, " - "perhaps by reducing shared_buffers or " - "max_connections.", - size) : 0)); - AnonymousShmemSize = size; - - /* Now we need only allocate a minimal-sized SysV shmem block. */ - sysvsize = sizeof(PGShmemHeader); - } + /* Now we need only allocate a minimal-sized SysV shmem block. */ + sysvsize = sizeof(PGShmemHeader); +#else + sysvsize = size; #endif /* Make sure PGSharedMemoryAttach doesn't fail without need */ diff --git a/src/backend/port/win32_shmem.c b/src/backend/port/win32_shmem.c index 80f198277a..9b0cceb530 100644 --- a/src/backend/port/win32_shmem.c +++ b/src/backend/port/win32_shmem.c @@ -128,6 +128,11 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port) DWORD size_high; DWORD size_low; + if (huge_tlb_pages == HUGE_TLB_ON) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("huge TLB pages not supported on this platform"))); + /* Room for a header? */ Assert(size > MAXALIGN(sizeof(PGShmemHeader))); diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 2cc8f90e6d..a9b9794965 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -64,6 +64,7 @@ #include "storage/dsm_impl.h" #include "storage/standby.h" #include "storage/fd.h" +#include "storage/pg_shmem.h" #include "storage/proc.h" #include "storage/predicate.h" #include "tcop/tcopprot.h" @@ -387,6 +388,23 @@ static const struct config_enum_entry synchronous_commit_options[] = { {NULL, 0, false} }; +/* + * Although only "on", "off", "try" are documented, we accept all the likely + * variants of "on" and "off". + */ +static const struct config_enum_entry huge_tlb_options[] = { + {"off", HUGE_TLB_OFF, false}, + {"on", HUGE_TLB_ON, false}, + {"try", HUGE_TLB_TRY, false}, + {"true", HUGE_TLB_ON, true}, + {"false", HUGE_TLB_OFF, true}, + {"yes", HUGE_TLB_ON, true}, + {"no", HUGE_TLB_OFF, true}, + {"1", HUGE_TLB_ON, true}, + {"0", HUGE_TLB_OFF, true}, + {NULL, 0, false} +}; + /* * Options for enum values stored in other modules */ @@ -447,6 +465,12 @@ int tcp_keepalives_idle; int tcp_keepalives_interval; int tcp_keepalives_count; +/* + * This really belongs in pg_shmem.c, but is defined here so that it doesn't + * need to be duplicated in all the different implementations of pg_shmem.c. + */ +int huge_tlb_pages; + /* * These variables are all dummies that don't do anything, except in some * cases provide the value for SHOW to display. The real state is elsewhere @@ -3430,6 +3454,15 @@ static struct config_enum ConfigureNamesEnum[] = NULL, NULL, NULL }, + { + {"huge_tlb_pages", PGC_POSTMASTER, RESOURCES_MEM, + gettext_noop("Use of huge TLB pages on Linux"), + NULL + }, + &huge_tlb_pages, + HUGE_TLB_TRY, huge_tlb_options, + NULL, NULL, NULL + }, /* End-of-list marker */ { diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 7ad6b7cb45..c8673b382d 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -115,6 +115,8 @@ #shared_buffers = 32MB # min 128kB # (change requires restart) +#huge_tlb_pages = try # on, off, or try + # (change requires restart) #temp_buffers = 8MB # min 800kB #max_prepared_transactions = 0 # zero disables the feature # (change requires restart) diff --git a/src/include/storage/pg_shmem.h b/src/include/storage/pg_shmem.h index 22ef901e89..df094e801d 100644 --- a/src/include/storage/pg_shmem.h +++ b/src/include/storage/pg_shmem.h @@ -38,6 +38,16 @@ typedef struct PGShmemHeader /* standard header for all Postgres shmem */ #endif } PGShmemHeader; +/* GUC variable */ +extern int huge_tlb_pages; + +/* Possible values for huge_tlb_pages */ +typedef enum +{ + HUGE_TLB_OFF, + HUGE_TLB_ON, + HUGE_TLB_TRY +} HugeTlbType; #ifdef EXEC_BACKEND #ifndef WIN32