Add a basic atomic ops API abstracting away platform/architecture details.
Several upcoming performance/scalability improvements require atomic
operations. This new API avoids the need to splatter compiler and
architecture dependent code over all the locations employing atomic
ops.
For several of the potential usages it'd be problematic to maintain
both, a atomics using implementation and one using spinlocks or
similar. In all likelihood one of the implementations would not get
tested regularly under concurrency. To avoid that scenario the new API
provides a automatic fallback of atomic operations to spinlocks. All
properties of atomic operations are maintained. This fallback -
obviously - isn't as fast as just using atomic ops, but it's not bad
either. For one of the future users the atomics ontop spinlocks
implementation was actually slightly faster than the old purely
spinlock using implementation. That's important because it reduces the
fear of regressing older platforms when improving the scalability for
new ones.
The API, loosely modeled after the C11 atomics support, currently
provides 'atomic flags' and 32 bit unsigned integers. If the platform
efficiently supports atomic 64 bit unsigned integers those are also
provided.
To implement atomics support for a platform/architecture/compiler for
a type of atomics 32bit compare and exchange needs to be
implemented. If available and more efficient native support for flags,
32 bit atomic addition, and corresponding 64 bit operations may also
be provided. Additional useful atomic operations are implemented
generically ontop of these.
The implementation for various versions of gcc, msvc and sun studio have
been tested. Additional existing stub implementations for
* Intel icc
* HUPX acc
* IBM xlc
are included but have never been tested. These will likely require
fixes based on buildfarm and user feedback.
As atomic operations also require barriers for some operations the
existing barrier support has been moved into the atomics code.
Author: Andres Freund with contributions from Oskari Saarenmaa
Reviewed-By: Amit Kapila, Robert Haas, Heikki Linnakangas and Álvaro Herrera
Discussion: CA+TgmoYBW+ux5-8Ja=Mcyuy8=VXAnVRHp3Kess6Pn3DMXAPAEA@mail.gmail.com,
20131015123303.GH5300@awork2.anarazel.de,
20131028205522.GI20248@awork2.anarazel.de
2014-09-25 23:49:05 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* atomics.c
|
|
|
|
* Non-Inline parts of the atomics implementation
|
|
|
|
*
|
2015-01-06 17:43:47 +01:00
|
|
|
* Portions Copyright (c) 2013-2015, PostgreSQL Global Development Group
|
Add a basic atomic ops API abstracting away platform/architecture details.
Several upcoming performance/scalability improvements require atomic
operations. This new API avoids the need to splatter compiler and
architecture dependent code over all the locations employing atomic
ops.
For several of the potential usages it'd be problematic to maintain
both, a atomics using implementation and one using spinlocks or
similar. In all likelihood one of the implementations would not get
tested regularly under concurrency. To avoid that scenario the new API
provides a automatic fallback of atomic operations to spinlocks. All
properties of atomic operations are maintained. This fallback -
obviously - isn't as fast as just using atomic ops, but it's not bad
either. For one of the future users the atomics ontop spinlocks
implementation was actually slightly faster than the old purely
spinlock using implementation. That's important because it reduces the
fear of regressing older platforms when improving the scalability for
new ones.
The API, loosely modeled after the C11 atomics support, currently
provides 'atomic flags' and 32 bit unsigned integers. If the platform
efficiently supports atomic 64 bit unsigned integers those are also
provided.
To implement atomics support for a platform/architecture/compiler for
a type of atomics 32bit compare and exchange needs to be
implemented. If available and more efficient native support for flags,
32 bit atomic addition, and corresponding 64 bit operations may also
be provided. Additional useful atomic operations are implemented
generically ontop of these.
The implementation for various versions of gcc, msvc and sun studio have
been tested. Additional existing stub implementations for
* Intel icc
* HUPX acc
* IBM xlc
are included but have never been tested. These will likely require
fixes based on buildfarm and user feedback.
As atomic operations also require barriers for some operations the
existing barrier support has been moved into the atomics code.
Author: Andres Freund with contributions from Oskari Saarenmaa
Reviewed-By: Amit Kapila, Robert Haas, Heikki Linnakangas and Álvaro Herrera
Discussion: CA+TgmoYBW+ux5-8Ja=Mcyuy8=VXAnVRHp3Kess6Pn3DMXAPAEA@mail.gmail.com,
20131015123303.GH5300@awork2.anarazel.de,
20131028205522.GI20248@awork2.anarazel.de
2014-09-25 23:49:05 +02:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* src/backend/port/atomics.c
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We want the functions below to be inline; but if the compiler doesn't
|
|
|
|
* support that, fall back on providing them as regular functions. See
|
|
|
|
* STATIC_IF_INLINE in c.h.
|
|
|
|
*/
|
|
|
|
#define ATOMICS_INCLUDE_DEFINITIONS
|
|
|
|
|
|
|
|
#include "port/atomics.h"
|
|
|
|
#include "storage/spin.h"
|
|
|
|
|
|
|
|
#ifdef PG_HAVE_MEMORY_BARRIER_EMULATION
|
|
|
|
void
|
|
|
|
pg_spinlock_barrier(void)
|
|
|
|
{
|
|
|
|
S_LOCK(&dummy_spinlock);
|
|
|
|
S_UNLOCK(&dummy_spinlock);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2015-01-11 01:15:29 +01:00
|
|
|
#ifdef PG_HAVE_COMPILER_BARRIER_EMULATION
|
|
|
|
void
|
|
|
|
pg_extern_compiler_barrier(void)
|
|
|
|
{
|
|
|
|
/* do nothing */
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
Add a basic atomic ops API abstracting away platform/architecture details.
Several upcoming performance/scalability improvements require atomic
operations. This new API avoids the need to splatter compiler and
architecture dependent code over all the locations employing atomic
ops.
For several of the potential usages it'd be problematic to maintain
both, a atomics using implementation and one using spinlocks or
similar. In all likelihood one of the implementations would not get
tested regularly under concurrency. To avoid that scenario the new API
provides a automatic fallback of atomic operations to spinlocks. All
properties of atomic operations are maintained. This fallback -
obviously - isn't as fast as just using atomic ops, but it's not bad
either. For one of the future users the atomics ontop spinlocks
implementation was actually slightly faster than the old purely
spinlock using implementation. That's important because it reduces the
fear of regressing older platforms when improving the scalability for
new ones.
The API, loosely modeled after the C11 atomics support, currently
provides 'atomic flags' and 32 bit unsigned integers. If the platform
efficiently supports atomic 64 bit unsigned integers those are also
provided.
To implement atomics support for a platform/architecture/compiler for
a type of atomics 32bit compare and exchange needs to be
implemented. If available and more efficient native support for flags,
32 bit atomic addition, and corresponding 64 bit operations may also
be provided. Additional useful atomic operations are implemented
generically ontop of these.
The implementation for various versions of gcc, msvc and sun studio have
been tested. Additional existing stub implementations for
* Intel icc
* HUPX acc
* IBM xlc
are included but have never been tested. These will likely require
fixes based on buildfarm and user feedback.
As atomic operations also require barriers for some operations the
existing barrier support has been moved into the atomics code.
Author: Andres Freund with contributions from Oskari Saarenmaa
Reviewed-By: Amit Kapila, Robert Haas, Heikki Linnakangas and Álvaro Herrera
Discussion: CA+TgmoYBW+ux5-8Ja=Mcyuy8=VXAnVRHp3Kess6Pn3DMXAPAEA@mail.gmail.com,
20131015123303.GH5300@awork2.anarazel.de,
20131028205522.GI20248@awork2.anarazel.de
2014-09-25 23:49:05 +02:00
|
|
|
|
|
|
|
#ifdef PG_HAVE_ATOMIC_FLAG_SIMULATION
|
|
|
|
|
|
|
|
void
|
|
|
|
pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr)
|
|
|
|
{
|
|
|
|
StaticAssertStmt(sizeof(ptr->sema) >= sizeof(slock_t),
|
|
|
|
"size mismatch of atomic_flag vs slock_t");
|
|
|
|
|
|
|
|
#ifndef HAVE_SPINLOCKS
|
|
|
|
/*
|
|
|
|
* NB: If we're using semaphore based TAS emulation, be careful to use a
|
|
|
|
* separate set of semaphores. Otherwise we'd get in trouble if a atomic
|
|
|
|
* var would be manipulated while spinlock is held.
|
|
|
|
*/
|
|
|
|
s_init_lock_sema((slock_t *) &ptr->sema, true);
|
|
|
|
#else
|
|
|
|
SpinLockInit((slock_t *) &ptr->sema);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
|
|
|
|
{
|
|
|
|
return TAS((slock_t *) &ptr->sema);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
|
|
|
|
{
|
|
|
|
S_UNLOCK((slock_t *) &ptr->sema);
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* PG_HAVE_ATOMIC_FLAG_SIMULATION */
|
|
|
|
|
|
|
|
#ifdef PG_HAVE_ATOMIC_U32_SIMULATION
|
|
|
|
void
|
|
|
|
pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_)
|
|
|
|
{
|
|
|
|
StaticAssertStmt(sizeof(ptr->sema) >= sizeof(slock_t),
|
|
|
|
"size mismatch of atomic_flag vs slock_t");
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we're using semaphore based atomic flags, be careful about nested
|
|
|
|
* usage of atomics while a spinlock is held.
|
|
|
|
*/
|
|
|
|
#ifndef HAVE_SPINLOCKS
|
|
|
|
s_init_lock_sema((slock_t *) &ptr->sema, true);
|
|
|
|
#else
|
|
|
|
SpinLockInit((slock_t *) &ptr->sema);
|
|
|
|
#endif
|
|
|
|
ptr->value = val_;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
|
|
|
|
uint32 *expected, uint32 newval)
|
|
|
|
{
|
|
|
|
bool ret;
|
|
|
|
/*
|
|
|
|
* Do atomic op under a spinlock. It might look like we could just skip
|
|
|
|
* the cmpxchg if the lock isn't available, but that'd just emulate a
|
|
|
|
* 'weak' compare and swap. I.e. one that allows spurious failures. Since
|
|
|
|
* several algorithms rely on a strong variant and that is efficiently
|
|
|
|
* implementable on most major architectures let's emulate it here as
|
|
|
|
* well.
|
|
|
|
*/
|
|
|
|
SpinLockAcquire((slock_t *) &ptr->sema);
|
|
|
|
|
|
|
|
/* perform compare/exchange logic*/
|
|
|
|
ret = ptr->value == *expected;
|
|
|
|
*expected = ptr->value;
|
|
|
|
if (ret)
|
|
|
|
ptr->value = newval;
|
|
|
|
|
|
|
|
/* and release lock */
|
|
|
|
SpinLockRelease((slock_t *) &ptr->sema);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32
|
|
|
|
pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
|
|
|
|
{
|
|
|
|
uint32 oldval;
|
|
|
|
SpinLockAcquire((slock_t *) &ptr->sema);
|
|
|
|
oldval = ptr->value;
|
|
|
|
ptr->value += add_;
|
|
|
|
SpinLockRelease((slock_t *) &ptr->sema);
|
|
|
|
return oldval;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* PG_HAVE_ATOMIC_U32_SIMULATION */
|