postgresql/src/backend/port/atomics.c

/*-------------------------------------------------------------------------
 *
 * atomics.c
 *	   Non-Inline parts of the atomics implementation
 *
 * Portions Copyright (c) 2013-2015, PostgreSQL Global Development Group
 *
 *
 * IDENTIFICATION
 *	  src/backend/port/atomics.c
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

/*
 * We want the functions below to be inline; but if the compiler doesn't
 * support that, fall back on providing them as regular functions.	See
 * STATIC_IF_INLINE in c.h.
 */
#define ATOMICS_INCLUDE_DEFINITIONS

#include "port/atomics.h"
#include "storage/spin.h"

#ifdef PG_HAVE_MEMORY_BARRIER_EMULATION
void
pg_spinlock_barrier(void)
{
	S_LOCK(&dummy_spinlock);
	S_UNLOCK(&dummy_spinlock);
}
#endif

#ifdef PG_HAVE_COMPILER_BARRIER_EMULATION
void
pg_extern_compiler_barrier(void)
{
	/* do nothing */
}
#endif


#ifdef PG_HAVE_ATOMIC_FLAG_SIMULATION

void
pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr)
{
	StaticAssertStmt(sizeof(ptr->sema) >= sizeof(slock_t),
					 "size mismatch of atomic_flag vs slock_t");

#ifndef HAVE_SPINLOCKS
	/*
	 * NB: If we're using semaphore based TAS emulation, be careful to use a
	 * separate set of semaphores. Otherwise we'd get in trouble if an atomic
	 * var would be manipulated while spinlock is held.
	 */
	s_init_lock_sema((slock_t *) &ptr->sema, true);
#else
	SpinLockInit((slock_t *) &ptr->sema);
#endif
}

bool
pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
{
	return TAS((slock_t *) &ptr->sema);
}

void
pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
{
	S_UNLOCK((slock_t *) &ptr->sema);
}

#endif /* PG_HAVE_ATOMIC_FLAG_SIMULATION */

#ifdef PG_HAVE_ATOMIC_U32_SIMULATION
void
pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_)
{
	StaticAssertStmt(sizeof(ptr->sema) >= sizeof(slock_t),
					 "size mismatch of atomic_flag vs slock_t");

	/*
	 * If we're using semaphore based atomic flags, be careful about nested
	 * usage of atomics while a spinlock is held.
	 */
#ifndef HAVE_SPINLOCKS
	s_init_lock_sema((slock_t *) &ptr->sema, true);
#else
	SpinLockInit((slock_t *) &ptr->sema);
#endif
	ptr->value = val_;
}

bool
pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
									uint32 *expected, uint32 newval)
{
	bool ret;
	/*
	 * Do atomic op under a spinlock. It might look like we could just skip
	 * the cmpxchg if the lock isn't available, but that'd just emulate a
	 * 'weak' compare and swap. I.e. one that allows spurious failures. Since
	 * several algorithms rely on a strong variant and that is efficiently
	 * implementable on most major architectures let's emulate it here as
	 * well.
	 */
	SpinLockAcquire((slock_t *) &ptr->sema);

	/* perform compare/exchange logic*/
	ret = ptr->value == *expected;
	*expected = ptr->value;
	if (ret)
		ptr->value = newval;

	/* and release lock */
	SpinLockRelease((slock_t *) &ptr->sema);

	return ret;
}

uint32
pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
{
	uint32 oldval;
	SpinLockAcquire((slock_t *) &ptr->sema);
	oldval = ptr->value;
	ptr->value += add_;
	SpinLockRelease((slock_t *) &ptr->sema);
	return oldval;
}

#endif /* PG_HAVE_ATOMIC_U32_SIMULATION */
Add a basic atomic ops API abstracting away platform/architecture details. Several upcoming performance/scalability improvements require atomic operations. This new API avoids the need to splatter compiler and architecture dependent code over all the locations employing atomic ops. For several of the potential usages it'd be problematic to maintain both, a atomics using implementation and one using spinlocks or similar. In all likelihood one of the implementations would not get tested regularly under concurrency. To avoid that scenario the new API provides a automatic fallback of atomic operations to spinlocks. All properties of atomic operations are maintained. This fallback - obviously - isn't as fast as just using atomic ops, but it's not bad either. For one of the future users the atomics ontop spinlocks implementation was actually slightly faster than the old purely spinlock using implementation. That's important because it reduces the fear of regressing older platforms when improving the scalability for new ones. The API, loosely modeled after the C11 atomics support, currently provides 'atomic flags' and 32 bit unsigned integers. If the platform efficiently supports atomic 64 bit unsigned integers those are also provided. To implement atomics support for a platform/architecture/compiler for a type of atomics 32bit compare and exchange needs to be implemented. If available and more efficient native support for flags, 32 bit atomic addition, and corresponding 64 bit operations may also be provided. Additional useful atomic operations are implemented generically ontop of these. The implementation for various versions of gcc, msvc and sun studio have been tested. Additional existing stub implementations for * Intel icc * HUPX acc * IBM xlc are included but have never been tested. These will likely require fixes based on buildfarm and user feedback. As atomic operations also require barriers for some operations the existing barrier support has been moved into the atomics code. Author: Andres Freund with contributions from Oskari Saarenmaa Reviewed-By: Amit Kapila, Robert Haas, Heikki Linnakangas and Álvaro Herrera Discussion: CA+TgmoYBW+ux5-8Ja=Mcyuy8=VXAnVRHp3Kess6Pn3DMXAPAEA@mail.gmail.com, 20131015123303.GH5300@awork2.anarazel.de, 20131028205522.GI20248@awork2.anarazel.de 2014-09-25 23:49:05 +02:00			`/*-------------------------------------------------------------------------`
			`*`
			`* atomics.c`
			`* Non-Inline parts of the atomics implementation`
			`*`
Update copyright for 2015 Backpatch certain files through 9.0 2015-01-06 17:43:47 +01:00			`* Portions Copyright (c) 2013-2015, PostgreSQL Global Development Group`
Add a basic atomic ops API abstracting away platform/architecture details. Several upcoming performance/scalability improvements require atomic operations. This new API avoids the need to splatter compiler and architecture dependent code over all the locations employing atomic ops. For several of the potential usages it'd be problematic to maintain both, a atomics using implementation and one using spinlocks or similar. In all likelihood one of the implementations would not get tested regularly under concurrency. To avoid that scenario the new API provides a automatic fallback of atomic operations to spinlocks. All properties of atomic operations are maintained. This fallback - obviously - isn't as fast as just using atomic ops, but it's not bad either. For one of the future users the atomics ontop spinlocks implementation was actually slightly faster than the old purely spinlock using implementation. That's important because it reduces the fear of regressing older platforms when improving the scalability for new ones. The API, loosely modeled after the C11 atomics support, currently provides 'atomic flags' and 32 bit unsigned integers. If the platform efficiently supports atomic 64 bit unsigned integers those are also provided. To implement atomics support for a platform/architecture/compiler for a type of atomics 32bit compare and exchange needs to be implemented. If available and more efficient native support for flags, 32 bit atomic addition, and corresponding 64 bit operations may also be provided. Additional useful atomic operations are implemented generically ontop of these. The implementation for various versions of gcc, msvc and sun studio have been tested. Additional existing stub implementations for * Intel icc * HUPX acc * IBM xlc are included but have never been tested. These will likely require fixes based on buildfarm and user feedback. As atomic operations also require barriers for some operations the existing barrier support has been moved into the atomics code. Author: Andres Freund with contributions from Oskari Saarenmaa Reviewed-By: Amit Kapila, Robert Haas, Heikki Linnakangas and Álvaro Herrera Discussion: CA+TgmoYBW+ux5-8Ja=Mcyuy8=VXAnVRHp3Kess6Pn3DMXAPAEA@mail.gmail.com, 20131015123303.GH5300@awork2.anarazel.de, 20131028205522.GI20248@awork2.anarazel.de 2014-09-25 23:49:05 +02:00			`*`
			`*`
			`* IDENTIFICATION`
			`* src/backend/port/atomics.c`
			`*`
			`*-------------------------------------------------------------------------`
			`*/`
			`#include "postgres.h"`

			`/*`
			`* We want the functions below to be inline; but if the compiler doesn't`
			`* support that, fall back on providing them as regular functions. See`
			`* STATIC_IF_INLINE in c.h.`
			`*/`
			`#define ATOMICS_INCLUDE_DEFINITIONS`

			`#include "port/atomics.h"`
			`#include "storage/spin.h"`

			`#ifdef PG_HAVE_MEMORY_BARRIER_EMULATION`
			`void`
			`pg_spinlock_barrier(void)`
			`{`
			`S_LOCK(&dummy_spinlock);`
			`S_UNLOCK(&dummy_spinlock);`
			`}`
			`#endif`

Provide a generic fallback for pg_compiler_barrier using an extern function. If the compiler/arch combination does not provide compiler barriers, provide a fallback. That fallback simply consists out of a function call into a externally defined function. That should guarantee compiler barrierer semantics except for compilers that do inter translation unit/global optimization - those better provide an actual compiler barrier. Hopefully this fixes Tom's report of linker failures due to pg_compiler_barrier_impl not being provided. I'm not backpatching this commit as it builds on the new atomics infrastructure. If we decide an equivalent fix needs to be backpatched, I'll do so in a separate commit. Discussion: 27746.1420930690@sss.pgh.pa.us Per report from Tom Lane. 2015-01-11 01:15:29 +01:00			`#ifdef PG_HAVE_COMPILER_BARRIER_EMULATION`
			`void`
			`pg_extern_compiler_barrier(void)`
			`{`
			`/* do nothing */`
			`}`
			`#endif`

Add a basic atomic ops API abstracting away platform/architecture details. Several upcoming performance/scalability improvements require atomic operations. This new API avoids the need to splatter compiler and architecture dependent code over all the locations employing atomic ops. For several of the potential usages it'd be problematic to maintain both, a atomics using implementation and one using spinlocks or similar. In all likelihood one of the implementations would not get tested regularly under concurrency. To avoid that scenario the new API provides a automatic fallback of atomic operations to spinlocks. All properties of atomic operations are maintained. This fallback - obviously - isn't as fast as just using atomic ops, but it's not bad either. For one of the future users the atomics ontop spinlocks implementation was actually slightly faster than the old purely spinlock using implementation. That's important because it reduces the fear of regressing older platforms when improving the scalability for new ones. The API, loosely modeled after the C11 atomics support, currently provides 'atomic flags' and 32 bit unsigned integers. If the platform efficiently supports atomic 64 bit unsigned integers those are also provided. To implement atomics support for a platform/architecture/compiler for a type of atomics 32bit compare and exchange needs to be implemented. If available and more efficient native support for flags, 32 bit atomic addition, and corresponding 64 bit operations may also be provided. Additional useful atomic operations are implemented generically ontop of these. The implementation for various versions of gcc, msvc and sun studio have been tested. Additional existing stub implementations for * Intel icc * HUPX acc * IBM xlc are included but have never been tested. These will likely require fixes based on buildfarm and user feedback. As atomic operations also require barriers for some operations the existing barrier support has been moved into the atomics code. Author: Andres Freund with contributions from Oskari Saarenmaa Reviewed-By: Amit Kapila, Robert Haas, Heikki Linnakangas and Álvaro Herrera Discussion: CA+TgmoYBW+ux5-8Ja=Mcyuy8=VXAnVRHp3Kess6Pn3DMXAPAEA@mail.gmail.com, 20131015123303.GH5300@awork2.anarazel.de, 20131028205522.GI20248@awork2.anarazel.de 2014-09-25 23:49:05 +02:00
			`#ifdef PG_HAVE_ATOMIC_FLAG_SIMULATION`

			`void`
			`pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr)`
			`{`
			`StaticAssertStmt(sizeof(ptr->sema) >= sizeof(slock_t),`
			`"size mismatch of atomic_flag vs slock_t");`

			`#ifndef HAVE_SPINLOCKS`
			`/*`
			`* NB: If we're using semaphore based TAS emulation, be careful to use a`
Collection of typo fixes. Use "a" and "an" correctly, mostly in comments. Two error messages were also fixed (they were just elogs, so no translation work required). Two function comments in pg_proc.h were also fixed. Etsuro Fujita reported one of these, but I found a lot more with grep. Also fix a few other typos spotted while grepping for the a/an typos. For example, "consists out of ..." -> "consists of ...". Plus a "though"/ "through" mixup reported by Euler Taveira. Many of these typos were in old code, which would be nice to backpatch to make future backpatching easier. But much of the code was new, and I didn't feel like crafting separate patches for each branch. So no backpatching. 2015-05-20 15:18:11 +02:00			`* separate set of semaphores. Otherwise we'd get in trouble if an atomic`
Add a basic atomic ops API abstracting away platform/architecture details. Several upcoming performance/scalability improvements require atomic operations. This new API avoids the need to splatter compiler and architecture dependent code over all the locations employing atomic ops. For several of the potential usages it'd be problematic to maintain both, a atomics using implementation and one using spinlocks or similar. In all likelihood one of the implementations would not get tested regularly under concurrency. To avoid that scenario the new API provides a automatic fallback of atomic operations to spinlocks. All properties of atomic operations are maintained. This fallback - obviously - isn't as fast as just using atomic ops, but it's not bad either. For one of the future users the atomics ontop spinlocks implementation was actually slightly faster than the old purely spinlock using implementation. That's important because it reduces the fear of regressing older platforms when improving the scalability for new ones. The API, loosely modeled after the C11 atomics support, currently provides 'atomic flags' and 32 bit unsigned integers. If the platform efficiently supports atomic 64 bit unsigned integers those are also provided. To implement atomics support for a platform/architecture/compiler for a type of atomics 32bit compare and exchange needs to be implemented. If available and more efficient native support for flags, 32 bit atomic addition, and corresponding 64 bit operations may also be provided. Additional useful atomic operations are implemented generically ontop of these. The implementation for various versions of gcc, msvc and sun studio have been tested. Additional existing stub implementations for * Intel icc * HUPX acc * IBM xlc are included but have never been tested. These will likely require fixes based on buildfarm and user feedback. As atomic operations also require barriers for some operations the existing barrier support has been moved into the atomics code. Author: Andres Freund with contributions from Oskari Saarenmaa Reviewed-By: Amit Kapila, Robert Haas, Heikki Linnakangas and Álvaro Herrera Discussion: CA+TgmoYBW+ux5-8Ja=Mcyuy8=VXAnVRHp3Kess6Pn3DMXAPAEA@mail.gmail.com, 20131015123303.GH5300@awork2.anarazel.de, 20131028205522.GI20248@awork2.anarazel.de 2014-09-25 23:49:05 +02:00			`* var would be manipulated while spinlock is held.`
			`*/`
			`s_init_lock_sema((slock_t *) &ptr->sema, true);`
			`#else`
			`SpinLockInit((slock_t *) &ptr->sema);`
			`#endif`
			`}`

			`bool`
			`pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)`
			`{`
			`return TAS((slock_t *) &ptr->sema);`
			`}`

			`void`
			`pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)`
			`{`
			`S_UNLOCK((slock_t *) &ptr->sema);`
			`}`

			`#endif /* PG_HAVE_ATOMIC_FLAG_SIMULATION */`

			`#ifdef PG_HAVE_ATOMIC_U32_SIMULATION`
			`void`
			`pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_)`
			`{`
			`StaticAssertStmt(sizeof(ptr->sema) >= sizeof(slock_t),`
			`"size mismatch of atomic_flag vs slock_t");`

			`/*`
			`* If we're using semaphore based atomic flags, be careful about nested`
			`* usage of atomics while a spinlock is held.`
			`*/`
			`#ifndef HAVE_SPINLOCKS`
			`s_init_lock_sema((slock_t *) &ptr->sema, true);`
			`#else`
			`SpinLockInit((slock_t *) &ptr->sema);`
			`#endif`
			`ptr->value = val_;`
			`}`

			`bool`
			`pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,`
			`uint32 *expected, uint32 newval)`
			`{`
			`bool ret;`
			`/*`
			`* Do atomic op under a spinlock. It might look like we could just skip`
			`* the cmpxchg if the lock isn't available, but that'd just emulate a`
			`* 'weak' compare and swap. I.e. one that allows spurious failures. Since`
			`* several algorithms rely on a strong variant and that is efficiently`
			`* implementable on most major architectures let's emulate it here as`
			`* well.`
			`*/`
			`SpinLockAcquire((slock_t *) &ptr->sema);`

			`/* perform compare/exchange logic*/`
			`ret = ptr->value == *expected;`
			`*expected = ptr->value;`
			`if (ret)`
			`ptr->value = newval;`

			`/* and release lock */`
			`SpinLockRelease((slock_t *) &ptr->sema);`

			`return ret;`
			`}`

			`uint32`
			`pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)`
			`{`
			`uint32 oldval;`
			`SpinLockAcquire((slock_t *) &ptr->sema);`
			`oldval = ptr->value;`
			`ptr->value += add_;`
			`SpinLockRelease((slock_t *) &ptr->sema);`
			`return oldval;`
			`}`

			`#endif /* PG_HAVE_ATOMIC_U32_SIMULATION */`