Add memory barriers for PgBackendStatus.st_changecount protocol.

st_changecount protocol needs the memory barriers to ensure that
the apparent order of execution is as it desires. Otherwise,
for example, the CPU might rearrange the code so that st_changecount
is incremented twice before the modification on a machine with
weak memory ordering. This surprising result can lead to bugs.

This commit introduces the macros to load and store st_changecount
with the memory barriers. These are called before and after
PgBackendStatus entries are modified or copied into private memory,
in order to prevent CPU from reordering PgBackendStatus access.

Per discussion on pgsql-hackers, we decided not to back-patch this
to 9.4 or before until we get an actual bug report about this.

Patch by me. Review by Robert Haas.
This commit is contained in:
Fujii Masao 2014-12-18 23:07:51 +09:00
parent 19e065c049
commit 38628db8d8
2 changed files with 71 additions and 24 deletions

View File

@ -2563,7 +2563,7 @@ pgstat_bestart(void)
beentry = MyBEEntry;
do
{
beentry->st_changecount++;
pgstat_increment_changecount_before(beentry);
} while ((beentry->st_changecount & 1) == 0);
beentry->st_procpid = MyProcPid;
@ -2588,8 +2588,7 @@ pgstat_bestart(void)
beentry->st_appname[NAMEDATALEN - 1] = '\0';
beentry->st_activity[pgstat_track_activity_query_size - 1] = '\0';
beentry->st_changecount++;
Assert((beentry->st_changecount & 1) == 0);
pgstat_increment_changecount_after(beentry);
/* Update app name to current GUC setting */
if (application_name)
@ -2624,12 +2623,11 @@ pgstat_beshutdown_hook(int code, Datum arg)
* before and after. We use a volatile pointer here to ensure the
* compiler doesn't try to get cute.
*/
beentry->st_changecount++;
pgstat_increment_changecount_before(beentry);
beentry->st_procpid = 0; /* mark invalid */
beentry->st_changecount++;
Assert((beentry->st_changecount & 1) == 0);
pgstat_increment_changecount_after(beentry);
}
@ -2666,7 +2664,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
* non-disabled state. As our final update, change the state and
* clear fields we will not be updating anymore.
*/
beentry->st_changecount++;
pgstat_increment_changecount_before(beentry);
beentry->st_state = STATE_DISABLED;
beentry->st_state_start_timestamp = 0;
beentry->st_activity[0] = '\0';
@ -2674,8 +2672,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
/* st_xact_start_timestamp and st_waiting are also disabled */
beentry->st_xact_start_timestamp = 0;
beentry->st_waiting = false;
beentry->st_changecount++;
Assert((beentry->st_changecount & 1) == 0);
pgstat_increment_changecount_after(beentry);
}
return;
}
@ -2695,7 +2692,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
/*
* Now update the status entry
*/
beentry->st_changecount++;
pgstat_increment_changecount_before(beentry);
beentry->st_state = state;
beentry->st_state_start_timestamp = current_timestamp;
@ -2707,8 +2704,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
beentry->st_activity_start_timestamp = start_timestamp;
}
beentry->st_changecount++;
Assert((beentry->st_changecount & 1) == 0);
pgstat_increment_changecount_after(beentry);
}
/* ----------
@ -2734,13 +2730,12 @@ pgstat_report_appname(const char *appname)
* st_changecount before and after. We use a volatile pointer here to
* ensure the compiler doesn't try to get cute.
*/
beentry->st_changecount++;
pgstat_increment_changecount_before(beentry);
memcpy((char *) beentry->st_appname, appname, len);
beentry->st_appname[len] = '\0';
beentry->st_changecount++;
Assert((beentry->st_changecount & 1) == 0);
pgstat_increment_changecount_after(beentry);
}
/*
@ -2760,10 +2755,9 @@ pgstat_report_xact_timestamp(TimestampTz tstamp)
* st_changecount before and after. We use a volatile pointer here to
* ensure the compiler doesn't try to get cute.
*/
beentry->st_changecount++;
pgstat_increment_changecount_before(beentry);
beentry->st_xact_start_timestamp = tstamp;
beentry->st_changecount++;
Assert((beentry->st_changecount & 1) == 0);
pgstat_increment_changecount_after(beentry);
}
/* ----------
@ -2839,7 +2833,10 @@ pgstat_read_current_status(void)
*/
for (;;)
{
int save_changecount = beentry->st_changecount;
int before_changecount;
int after_changecount;
pgstat_save_changecount_before(beentry, before_changecount);
localentry->backendStatus.st_procpid = beentry->st_procpid;
if (localentry->backendStatus.st_procpid > 0)
@ -2856,8 +2853,9 @@ pgstat_read_current_status(void)
localentry->backendStatus.st_activity = localactivity;
}
if (save_changecount == beentry->st_changecount &&
(save_changecount & 1) == 0)
pgstat_save_changecount_after(beentry, after_changecount);
if (before_changecount == after_changecount &&
(before_changecount & 1) == 0)
break;
/* Make sure we can break out of loop if stuck... */
@ -2927,12 +2925,17 @@ pgstat_get_backend_current_activity(int pid, bool checkUser)
for (;;)
{
int save_changecount = vbeentry->st_changecount;
int before_changecount;
int after_changecount;
pgstat_save_changecount_before(vbeentry, before_changecount);
found = (vbeentry->st_procpid == pid);
if (save_changecount == vbeentry->st_changecount &&
(save_changecount & 1) == 0)
pgstat_save_changecount_after(vbeentry, after_changecount);
if (before_changecount == after_changecount &&
(before_changecount & 1) == 0)
break;
/* Make sure we can break out of loop if stuck... */

View File

@ -16,6 +16,7 @@
#include "libpq/pqcomm.h"
#include "portability/instr_time.h"
#include "postmaster/pgarch.h"
#include "storage/barrier.h"
#include "utils/hsearch.h"
#include "utils/relcache.h"
@ -714,6 +715,12 @@ typedef struct PgBackendStatus
* st_changecount again. If the value hasn't changed, and if it's even,
* the copy is valid; otherwise start over. This makes updates cheap
* while reads are potentially expensive, but that's the tradeoff we want.
*
* The above protocol needs the memory barriers to ensure that
* the apparent order of execution is as it desires. Otherwise,
* for example, the CPU might rearrange the code so that st_changecount
* is incremented twice before the modification on a machine with
* weak memory ordering. This surprising result can lead to bugs.
*/
int st_changecount;
@ -745,6 +752,43 @@ typedef struct PgBackendStatus
char *st_activity;
} PgBackendStatus;
/*
* Macros to load and store st_changecount with the memory barriers.
*
* pgstat_increment_changecount_before() and
* pgstat_increment_changecount_after() need to be called before and after
* PgBackendStatus entries are modified, respectively. This makes sure that
* st_changecount is incremented around the modification.
*
* Also pgstat_save_changecount_before() and pgstat_save_changecount_after()
* need to be called before and after PgBackendStatus entries are copied into
* private memory, respectively.
*/
#define pgstat_increment_changecount_before(beentry) \
do { \
beentry->st_changecount++; \
pg_write_barrier(); \
} while (0)
#define pgstat_increment_changecount_after(beentry) \
do { \
pg_write_barrier(); \
beentry->st_changecount++; \
Assert((beentry->st_changecount & 1) == 0); \
} while (0)
#define pgstat_save_changecount_before(beentry, save_changecount) \
do { \
save_changecount = beentry->st_changecount; \
pg_read_barrier(); \
} while (0)
#define pgstat_save_changecount_after(beentry, save_changecount) \
do { \
pg_read_barrier(); \
save_changecount = beentry->st_changecount; \
} while (0)
/* ----------
* LocalPgBackendStatus
*