Keep heavily-contended fields in XLogCtlInsert on different cache lines.
Performance testing shows that if the insertpos_lck spinlock and the fields that it protects are on the same cache line with other variables that are frequently accessed, the false sharing can hurt performance a lot. Keep them apart by adding some padding.
This commit is contained in:
parent
cc52d5b33f
commit
375d8526f2
|
@ -408,7 +408,7 @@ typedef struct
|
|||
typedef union XLogInsertSlotPadded
|
||||
{
|
||||
XLogInsertSlot slot;
|
||||
char pad[64];
|
||||
char pad[CACHE_LINE_SIZE];
|
||||
} XLogInsertSlotPadded;
|
||||
|
||||
/*
|
||||
|
@ -428,8 +428,14 @@ typedef struct XLogCtlInsert
|
|||
uint64 CurrBytePos;
|
||||
uint64 PrevBytePos;
|
||||
|
||||
/* insertion slots, see above for details */
|
||||
XLogInsertSlotPadded *insertSlots;
|
||||
/*
|
||||
* Make sure the above heavily-contended spinlock and byte positions are
|
||||
* on their own cache line. In particular, the RedoRecPtr and full page
|
||||
* write variables below should be on a different cache line. They are
|
||||
* read on every WAL insertion, but updated rarely, and we don't want
|
||||
* those reads to steal the cache line containing Curr/PrevBytePos.
|
||||
*/
|
||||
char pad[CACHE_LINE_SIZE];
|
||||
|
||||
/*
|
||||
* fullPageWrites is the master copy used by all backends to determine
|
||||
|
@ -455,6 +461,9 @@ typedef struct XLogCtlInsert
|
|||
bool exclusiveBackup;
|
||||
int nonExclusiveBackups;
|
||||
XLogRecPtr lastBackupStart;
|
||||
|
||||
/* insertion slots, see XLogInsertSlot struct above for details */
|
||||
XLogInsertSlotPadded *insertSlots;
|
||||
} XLogCtlInsert;
|
||||
|
||||
/*
|
||||
|
|
|
@ -199,6 +199,17 @@
|
|||
#define USE_PPC_LWSYNC
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Assumed cache line size. This doesn't affect correctness, but can be
|
||||
* used for low-level optimizations. Currently, this is only used to pad
|
||||
* some data structures in xlog.c, to ensure that highly-contended fields
|
||||
* are on different cache lines. Too small a value can hurt performance due
|
||||
* to false sharing, while the only downside of too large a value is a few
|
||||
* bytes of wasted memory. The default is 128, which should be large enough
|
||||
* for all supported platforms.
|
||||
*/
|
||||
#define CACHE_LINE_SIZE 128
|
||||
|
||||
/*
|
||||
*------------------------------------------------------------------------
|
||||
* The following symbols are for enabling debugging code, not for
|
||||
|
|
Loading…
Reference in New Issue