From 38628db8d8caff21eb6cf8d775c0b2d04cf07b9b Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Thu, 18 Dec 2014 23:07:51 +0900 Subject: [PATCH] Add memory barriers for PgBackendStatus.st_changecount protocol. st_changecount protocol needs the memory barriers to ensure that the apparent order of execution is as it desires. Otherwise, for example, the CPU might rearrange the code so that st_changecount is incremented twice before the modification on a machine with weak memory ordering. This surprising result can lead to bugs. This commit introduces the macros to load and store st_changecount with the memory barriers. These are called before and after PgBackendStatus entries are modified or copied into private memory, in order to prevent CPU from reordering PgBackendStatus access. Per discussion on pgsql-hackers, we decided not to back-patch this to 9.4 or before until we get an actual bug report about this. Patch by me. Review by Robert Haas. --- src/backend/postmaster/pgstat.c | 51 +++++++++++++++++---------------- src/include/pgstat.h | 44 ++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 24 deletions(-) diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index f71fdeb142..6672b8bae5 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -2563,7 +2563,7 @@ pgstat_bestart(void) beentry = MyBEEntry; do { - beentry->st_changecount++; + pgstat_increment_changecount_before(beentry); } while ((beentry->st_changecount & 1) == 0); beentry->st_procpid = MyProcPid; @@ -2588,8 +2588,7 @@ pgstat_bestart(void) beentry->st_appname[NAMEDATALEN - 1] = '\0'; beentry->st_activity[pgstat_track_activity_query_size - 1] = '\0'; - beentry->st_changecount++; - Assert((beentry->st_changecount & 1) == 0); + pgstat_increment_changecount_after(beentry); /* Update app name to current GUC setting */ if (application_name) @@ -2624,12 +2623,11 @@ pgstat_beshutdown_hook(int code, Datum arg) * before and after. We use a volatile pointer here to ensure the * compiler doesn't try to get cute. */ - beentry->st_changecount++; + pgstat_increment_changecount_before(beentry); beentry->st_procpid = 0; /* mark invalid */ - beentry->st_changecount++; - Assert((beentry->st_changecount & 1) == 0); + pgstat_increment_changecount_after(beentry); } @@ -2666,7 +2664,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str) * non-disabled state. As our final update, change the state and * clear fields we will not be updating anymore. */ - beentry->st_changecount++; + pgstat_increment_changecount_before(beentry); beentry->st_state = STATE_DISABLED; beentry->st_state_start_timestamp = 0; beentry->st_activity[0] = '\0'; @@ -2674,8 +2672,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str) /* st_xact_start_timestamp and st_waiting are also disabled */ beentry->st_xact_start_timestamp = 0; beentry->st_waiting = false; - beentry->st_changecount++; - Assert((beentry->st_changecount & 1) == 0); + pgstat_increment_changecount_after(beentry); } return; } @@ -2695,7 +2692,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str) /* * Now update the status entry */ - beentry->st_changecount++; + pgstat_increment_changecount_before(beentry); beentry->st_state = state; beentry->st_state_start_timestamp = current_timestamp; @@ -2707,8 +2704,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str) beentry->st_activity_start_timestamp = start_timestamp; } - beentry->st_changecount++; - Assert((beentry->st_changecount & 1) == 0); + pgstat_increment_changecount_after(beentry); } /* ---------- @@ -2734,13 +2730,12 @@ pgstat_report_appname(const char *appname) * st_changecount before and after. We use a volatile pointer here to * ensure the compiler doesn't try to get cute. */ - beentry->st_changecount++; + pgstat_increment_changecount_before(beentry); memcpy((char *) beentry->st_appname, appname, len); beentry->st_appname[len] = '\0'; - beentry->st_changecount++; - Assert((beentry->st_changecount & 1) == 0); + pgstat_increment_changecount_after(beentry); } /* @@ -2760,10 +2755,9 @@ pgstat_report_xact_timestamp(TimestampTz tstamp) * st_changecount before and after. We use a volatile pointer here to * ensure the compiler doesn't try to get cute. */ - beentry->st_changecount++; + pgstat_increment_changecount_before(beentry); beentry->st_xact_start_timestamp = tstamp; - beentry->st_changecount++; - Assert((beentry->st_changecount & 1) == 0); + pgstat_increment_changecount_after(beentry); } /* ---------- @@ -2839,7 +2833,10 @@ pgstat_read_current_status(void) */ for (;;) { - int save_changecount = beentry->st_changecount; + int before_changecount; + int after_changecount; + + pgstat_save_changecount_before(beentry, before_changecount); localentry->backendStatus.st_procpid = beentry->st_procpid; if (localentry->backendStatus.st_procpid > 0) @@ -2856,8 +2853,9 @@ pgstat_read_current_status(void) localentry->backendStatus.st_activity = localactivity; } - if (save_changecount == beentry->st_changecount && - (save_changecount & 1) == 0) + pgstat_save_changecount_after(beentry, after_changecount); + if (before_changecount == after_changecount && + (before_changecount & 1) == 0) break; /* Make sure we can break out of loop if stuck... */ @@ -2927,12 +2925,17 @@ pgstat_get_backend_current_activity(int pid, bool checkUser) for (;;) { - int save_changecount = vbeentry->st_changecount; + int before_changecount; + int after_changecount; + + pgstat_save_changecount_before(vbeentry, before_changecount); found = (vbeentry->st_procpid == pid); - if (save_changecount == vbeentry->st_changecount && - (save_changecount & 1) == 0) + pgstat_save_changecount_after(vbeentry, after_changecount); + + if (before_changecount == after_changecount && + (before_changecount & 1) == 0) break; /* Make sure we can break out of loop if stuck... */ diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 08925336d1..7285f3ee16 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -16,6 +16,7 @@ #include "libpq/pqcomm.h" #include "portability/instr_time.h" #include "postmaster/pgarch.h" +#include "storage/barrier.h" #include "utils/hsearch.h" #include "utils/relcache.h" @@ -714,6 +715,12 @@ typedef struct PgBackendStatus * st_changecount again. If the value hasn't changed, and if it's even, * the copy is valid; otherwise start over. This makes updates cheap * while reads are potentially expensive, but that's the tradeoff we want. + * + * The above protocol needs the memory barriers to ensure that + * the apparent order of execution is as it desires. Otherwise, + * for example, the CPU might rearrange the code so that st_changecount + * is incremented twice before the modification on a machine with + * weak memory ordering. This surprising result can lead to bugs. */ int st_changecount; @@ -745,6 +752,43 @@ typedef struct PgBackendStatus char *st_activity; } PgBackendStatus; +/* + * Macros to load and store st_changecount with the memory barriers. + * + * pgstat_increment_changecount_before() and + * pgstat_increment_changecount_after() need to be called before and after + * PgBackendStatus entries are modified, respectively. This makes sure that + * st_changecount is incremented around the modification. + * + * Also pgstat_save_changecount_before() and pgstat_save_changecount_after() + * need to be called before and after PgBackendStatus entries are copied into + * private memory, respectively. + */ +#define pgstat_increment_changecount_before(beentry) \ + do { \ + beentry->st_changecount++; \ + pg_write_barrier(); \ + } while (0) + +#define pgstat_increment_changecount_after(beentry) \ + do { \ + pg_write_barrier(); \ + beentry->st_changecount++; \ + Assert((beentry->st_changecount & 1) == 0); \ + } while (0) + +#define pgstat_save_changecount_before(beentry, save_changecount) \ + do { \ + save_changecount = beentry->st_changecount; \ + pg_read_barrier(); \ + } while (0) + +#define pgstat_save_changecount_after(beentry, save_changecount) \ + do { \ + pg_read_barrier(); \ + save_changecount = beentry->st_changecount; \ + } while (0) + /* ---------- * LocalPgBackendStatus *