pgindent run for 9.4
This includes removing tabs after periods in C comments, which was applied to back branches, so this change should not effect backpatching.
This commit is contained in:
parent
fb85cd4320
commit
0a78320057
|
@ -2,4 +2,8 @@
|
|||
* For the raison d'etre of this file, check the comment above the definition
|
||||
* of the PGAC_C_INLINE macro in config/c-compiler.m4.
|
||||
*/
|
||||
static inline int fun () { return 0; }
|
||||
static inline int
|
||||
fun()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -795,6 +795,7 @@ cube_inter(PG_FUNCTION_ARGS)
|
|||
if (DIM(a) < DIM(b))
|
||||
{
|
||||
NDBOX *tmp = b;
|
||||
|
||||
b = a;
|
||||
a = tmp;
|
||||
swapped = true;
|
||||
|
@ -1236,14 +1237,14 @@ cube_distance(PG_FUNCTION_ARGS)
|
|||
/* compute within the dimensions of (b) */
|
||||
for (i = 0; i < DIM(b); i++)
|
||||
{
|
||||
d = distance_1D(LL_COORD(a,i), UR_COORD(a,i), LL_COORD(b,i), UR_COORD(b,i));
|
||||
d = distance_1D(LL_COORD(a, i), UR_COORD(a, i), LL_COORD(b, i), UR_COORD(b, i));
|
||||
distance += d * d;
|
||||
}
|
||||
|
||||
/* compute distance to zero for those dimensions in (a) absent in (b) */
|
||||
for (i = DIM(b); i < DIM(a); i++)
|
||||
{
|
||||
d = distance_1D(LL_COORD(a,i), UR_COORD(a,i), 0.0, 0.0);
|
||||
d = distance_1D(LL_COORD(a, i), UR_COORD(a, i), 0.0, 0.0);
|
||||
distance += d * d;
|
||||
}
|
||||
|
||||
|
@ -1297,11 +1298,11 @@ cube_is_point_internal(NDBOX *cube)
|
|||
return true;
|
||||
|
||||
/*
|
||||
* Even if the point-flag is not set, all the lower-left coordinates
|
||||
* might match the upper-right coordinates, so that the value is in
|
||||
* fact a point. Such values don't arise with current code - the point
|
||||
* flag is always set if appropriate - but they might be present on-disk
|
||||
* in clusters upgraded from pre-9.4 versions.
|
||||
* Even if the point-flag is not set, all the lower-left coordinates might
|
||||
* match the upper-right coordinates, so that the value is in fact a
|
||||
* point. Such values don't arise with current code - the point flag is
|
||||
* always set if appropriate - but they might be present on-disk in
|
||||
* clusters upgraded from pre-9.4 versions.
|
||||
*/
|
||||
for (i = 0; i < DIM(cube); i++)
|
||||
{
|
||||
|
@ -1317,6 +1318,7 @@ cube_dim(PG_FUNCTION_ARGS)
|
|||
{
|
||||
NDBOX *c = PG_GETARG_NDBOX(0);
|
||||
int dim = DIM(c);
|
||||
|
||||
PG_FREE_IF_COPY(c, 0);
|
||||
PG_RETURN_INT32(dim);
|
||||
}
|
||||
|
@ -1330,7 +1332,7 @@ cube_ll_coord(PG_FUNCTION_ARGS)
|
|||
double result;
|
||||
|
||||
if (DIM(c) >= n && n > 0)
|
||||
result = Min(LL_COORD(c, n-1), UR_COORD(c, n-1));
|
||||
result = Min(LL_COORD(c, n - 1), UR_COORD(c, n - 1));
|
||||
else
|
||||
result = 0;
|
||||
|
||||
|
@ -1347,7 +1349,7 @@ cube_ur_coord(PG_FUNCTION_ARGS)
|
|||
double result;
|
||||
|
||||
if (DIM(c) >= n && n > 0)
|
||||
result = Max(LL_COORD(c, n-1), UR_COORD(c, n-1));
|
||||
result = Max(LL_COORD(c, n - 1), UR_COORD(c, n - 1));
|
||||
else
|
||||
result = 0;
|
||||
|
||||
|
@ -1382,15 +1384,15 @@ cube_enlarge(PG_FUNCTION_ARGS)
|
|||
|
||||
for (i = 0, j = dim; i < DIM(a); i++, j++)
|
||||
{
|
||||
if (LL_COORD(a,i) >= UR_COORD(a,i))
|
||||
if (LL_COORD(a, i) >= UR_COORD(a, i))
|
||||
{
|
||||
result->x[i] = UR_COORD(a,i) - r;
|
||||
result->x[j] = LL_COORD(a,i) + r;
|
||||
result->x[i] = UR_COORD(a, i) - r;
|
||||
result->x[j] = LL_COORD(a, i) + r;
|
||||
}
|
||||
else
|
||||
{
|
||||
result->x[i] = LL_COORD(a,i) - r;
|
||||
result->x[j] = UR_COORD(a,i) + r;
|
||||
result->x[i] = LL_COORD(a, i) - r;
|
||||
result->x[j] = UR_COORD(a, i) + r;
|
||||
}
|
||||
if (result->x[i] > result->x[j])
|
||||
{
|
||||
|
@ -1503,7 +1505,7 @@ cube_c_f8(PG_FUNCTION_ARGS)
|
|||
result->x[DIM(result) + i] = cube->x[DIM(cube) + i];
|
||||
}
|
||||
result->x[DIM(result) - 1] = x;
|
||||
result->x[2*DIM(result) - 1] = x;
|
||||
result->x[2 * DIM(result) - 1] = x;
|
||||
}
|
||||
|
||||
PG_FREE_IF_COPY(cube, 0);
|
||||
|
@ -1521,7 +1523,8 @@ cube_c_f8_f8(PG_FUNCTION_ARGS)
|
|||
int size;
|
||||
int i;
|
||||
|
||||
if (IS_POINT(cube) && (x1 == x2)){
|
||||
if (IS_POINT(cube) && (x1 == x2))
|
||||
{
|
||||
size = POINT_SIZE((DIM(cube) + 1));
|
||||
result = (NDBOX *) palloc0(size);
|
||||
SET_VARSIZE(result, size);
|
||||
|
|
|
@ -70,6 +70,7 @@ static const struct FileFdwOption valid_options[] = {
|
|||
{"encoding", ForeignTableRelationId},
|
||||
{"force_not_null", AttributeRelationId},
|
||||
{"force_null", AttributeRelationId},
|
||||
|
||||
/*
|
||||
* force_quote is not supported by file_fdw because it's for COPY TO.
|
||||
*/
|
||||
|
@ -253,6 +254,7 @@ file_fdw_validator(PG_FUNCTION_ARGS)
|
|||
errmsg("conflicting or redundant options")));
|
||||
filename = defGetString(def);
|
||||
}
|
||||
|
||||
/*
|
||||
* force_not_null is a boolean option; after validation we can discard
|
||||
* it - it will be retrieved later in get_file_fdw_attribute_options()
|
||||
|
@ -443,12 +445,15 @@ get_file_fdw_attribute_options(Oid relid)
|
|||
|
||||
heap_close(rel, AccessShareLock);
|
||||
|
||||
/* Return DefElem only when some column(s) have force_not_null / force_null options set */
|
||||
/*
|
||||
* Return DefElem only when some column(s) have force_not_null /
|
||||
* force_null options set
|
||||
*/
|
||||
if (fnncolumns != NIL)
|
||||
options = lappend(options, makeDefElem("force_not_null", (Node *) fnncolumns));
|
||||
|
||||
if (fncolumns != NIL)
|
||||
options = lappend(options,makeDefElem("force_null", (Node *) fncolumns));
|
||||
options = lappend(options, makeDefElem("force_null", (Node *) fncolumns));
|
||||
|
||||
return options;
|
||||
}
|
||||
|
|
|
@ -1245,7 +1245,7 @@ hstore_to_json_loose(PG_FUNCTION_ARGS)
|
|||
dst;
|
||||
|
||||
if (count == 0)
|
||||
PG_RETURN_TEXT_P(cstring_to_text_with_len("{}",2));
|
||||
PG_RETURN_TEXT_P(cstring_to_text_with_len("{}", 2));
|
||||
|
||||
initStringInfo(&tmp);
|
||||
initStringInfo(&dst);
|
||||
|
@ -1335,7 +1335,7 @@ hstore_to_json(PG_FUNCTION_ARGS)
|
|||
dst;
|
||||
|
||||
if (count == 0)
|
||||
PG_RETURN_TEXT_P(cstring_to_text_with_len("{}",2));
|
||||
PG_RETURN_TEXT_P(cstring_to_text_with_len("{}", 2));
|
||||
|
||||
initStringInfo(&tmp);
|
||||
initStringInfo(&dst);
|
||||
|
@ -1381,7 +1381,8 @@ hstore_to_jsonb(PG_FUNCTION_ARGS)
|
|||
|
||||
for (i = 0; i < count; i++)
|
||||
{
|
||||
JsonbValue key, val;
|
||||
JsonbValue key,
|
||||
val;
|
||||
|
||||
key.estSize = sizeof(JEntry);
|
||||
key.type = jbvString;
|
||||
|
@ -1432,7 +1433,8 @@ hstore_to_jsonb_loose(PG_FUNCTION_ARGS)
|
|||
|
||||
for (i = 0; i < count; i++)
|
||||
{
|
||||
JsonbValue key, val;
|
||||
JsonbValue key,
|
||||
val;
|
||||
|
||||
key.estSize = sizeof(JEntry);
|
||||
key.type = jbvString;
|
||||
|
@ -1508,6 +1510,7 @@ hstore_to_jsonb_loose(PG_FUNCTION_ARGS)
|
|||
val.type = jbvNumeric;
|
||||
val.val.numeric = DatumGetNumeric(
|
||||
DirectFunctionCall3(numeric_in, CStringGetDatum(tmp.data), 0, -1));
|
||||
|
||||
val.estSize += VARSIZE_ANY(val.val.numeric) +sizeof(JEntry);
|
||||
}
|
||||
else
|
||||
|
|
|
@ -209,6 +209,7 @@ page_header(PG_FUNCTION_ARGS)
|
|||
if (tupdesc->attrs[0]->atttypid == TEXTOID)
|
||||
{
|
||||
char lsnchar[64];
|
||||
|
||||
snprintf(lsnchar, sizeof(lsnchar), "%X/%X",
|
||||
(uint32) (lsn >> 32), (uint32) lsn);
|
||||
values[0] = CStringGetTextDatum(lsnchar);
|
||||
|
|
|
@ -369,11 +369,12 @@ test_sync(int writes_per_op)
|
|||
{
|
||||
for (writes = 0; writes < writes_per_op; writes++)
|
||||
if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
|
||||
|
||||
/*
|
||||
* This can generate write failures if the filesystem
|
||||
* has a large block size, e.g. 4k, and there is no
|
||||
* support for O_DIRECT writes smaller than the
|
||||
* file system block size, e.g. XFS.
|
||||
* This can generate write failures if the filesystem has
|
||||
* a large block size, e.g. 4k, and there is no support
|
||||
* for O_DIRECT writes smaller than the file system block
|
||||
* size, e.g. XFS.
|
||||
*/
|
||||
die("write failed");
|
||||
if (lseek(tmpfile, 0, SEEK_SET) == -1)
|
||||
|
|
|
@ -34,8 +34,8 @@ generate_old_dump(void)
|
|||
|
||||
/*
|
||||
* Set umask for this function, all functions it calls, and all
|
||||
* subprocesses/threads it creates. We can't use fopen_priv()
|
||||
* as Windows uses threads and umask is process-global.
|
||||
* subprocesses/threads it creates. We can't use fopen_priv() as Windows
|
||||
* uses threads and umask is process-global.
|
||||
*/
|
||||
old_umask = umask(S_IRWXG | S_IRWXO);
|
||||
|
||||
|
|
|
@ -52,7 +52,7 @@ exec_prog(const char *log_file, const char *opt_log_file,
|
|||
va_list ap;
|
||||
|
||||
#ifdef WIN32
|
||||
static DWORD mainThreadId = 0;
|
||||
static DWORD mainThreadId = 0;
|
||||
|
||||
/* We assume we are called from the primary thread first */
|
||||
if (mainThreadId == 0)
|
||||
|
@ -73,14 +73,15 @@ static DWORD mainThreadId = 0;
|
|||
pg_log(PG_VERBOSE, "%s\n", cmd);
|
||||
|
||||
#ifdef WIN32
|
||||
|
||||
/*
|
||||
* For some reason, Windows issues a file-in-use error if we write data
|
||||
* to the log file from a non-primary thread just before we create a
|
||||
* subprocess that also writes to the same log file. One fix is to
|
||||
* sleep for 100ms. A cleaner fix is to write to the log file _after_
|
||||
* the subprocess has completed, so we do this only when writing from
|
||||
* a non-primary thread. fflush(), running system() twice, and
|
||||
* pre-creating the file do not see to help.
|
||||
* For some reason, Windows issues a file-in-use error if we write data to
|
||||
* the log file from a non-primary thread just before we create a
|
||||
* subprocess that also writes to the same log file. One fix is to sleep
|
||||
* for 100ms. A cleaner fix is to write to the log file _after_ the
|
||||
* subprocess has completed, so we do this only when writing from a
|
||||
* non-primary thread. fflush(), running system() twice, and pre-creating
|
||||
* the file do not see to help.
|
||||
*/
|
||||
if (mainThreadId != GetCurrentThreadId())
|
||||
result = system(cmd);
|
||||
|
@ -154,6 +155,7 @@ static DWORD mainThreadId = 0;
|
|||
}
|
||||
|
||||
#ifndef WIN32
|
||||
|
||||
/*
|
||||
* We can't do this on Windows because it will keep the "pg_ctl start"
|
||||
* output filename open until the server stops, so we do the \n\n above on
|
||||
|
|
|
@ -270,7 +270,8 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
|
|||
i_relfilenode,
|
||||
i_reltablespace;
|
||||
char query[QUERY_ALLOC];
|
||||
char *last_namespace = NULL, *last_tablespace = NULL;
|
||||
char *last_namespace = NULL,
|
||||
*last_tablespace = NULL;
|
||||
|
||||
/*
|
||||
* pg_largeobject contains user data that does not appear in pg_dumpall
|
||||
|
@ -373,9 +374,9 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
|
|||
curr->nsp_alloc = false;
|
||||
|
||||
/*
|
||||
* Many of the namespace and tablespace strings are identical,
|
||||
* so we try to reuse the allocated string pointers where possible
|
||||
* to reduce memory consumption.
|
||||
* Many of the namespace and tablespace strings are identical, so we
|
||||
* try to reuse the allocated string pointers where possible to reduce
|
||||
* memory consumption.
|
||||
*/
|
||||
/* Can we reuse the previous string allocation? */
|
||||
if (last_namespace && strcmp(nspname, last_namespace) == 0)
|
||||
|
|
|
@ -213,6 +213,7 @@ parseCommandLine(int argc, char *argv[])
|
|||
{
|
||||
char *pgoptions = psprintf("%s %s", FIX_DEFAULT_READ_ONLY,
|
||||
getenv("PGOPTIONS"));
|
||||
|
||||
pg_putenv("PGOPTIONS", pgoptions);
|
||||
pfree(pgoptions);
|
||||
}
|
||||
|
|
|
@ -339,10 +339,10 @@ reap_child(bool wait_for_child)
|
|||
thread_handles[thread_num] = thread_handles[parallel_jobs - 1];
|
||||
|
||||
/*
|
||||
* Move last active thead arg struct into the now-dead slot,
|
||||
* and the now-dead slot to the end for reuse by the next thread.
|
||||
* Though the thread struct is in use by another thread, we can
|
||||
* safely swap the struct pointers within the array.
|
||||
* Move last active thead arg struct into the now-dead slot, and the
|
||||
* now-dead slot to the end for reuse by the next thread. Though the
|
||||
* thread struct is in use by another thread, we can safely swap the
|
||||
* struct pointers within the array.
|
||||
*/
|
||||
tmp_args = cur_thread_args[thread_num];
|
||||
cur_thread_args[thread_num] = cur_thread_args[parallel_jobs - 1];
|
||||
|
|
|
@ -167,7 +167,8 @@ typedef struct
|
|||
{
|
||||
Oid db_oid; /* oid of the database */
|
||||
char *db_name; /* database name */
|
||||
char db_tablespace[MAXPGPATH]; /* database default tablespace path */
|
||||
char db_tablespace[MAXPGPATH]; /* database default tablespace
|
||||
* path */
|
||||
RelInfoArr rel_arr; /* array of all user relinfos */
|
||||
} DbInfo;
|
||||
|
||||
|
@ -454,7 +455,7 @@ pg_log(eLogType type, const char *fmt,...)
|
|||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
|
||||
void
|
||||
pg_fatal(const char *fmt,...)
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 1, 2),noreturn));
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 1, 2), noreturn));
|
||||
void end_progress_output(void);
|
||||
void
|
||||
prep_status(const char *fmt,...)
|
||||
|
|
|
@ -240,28 +240,26 @@ start_postmaster(ClusterInfo *cluster, bool throw_error)
|
|||
return false;
|
||||
|
||||
/*
|
||||
* We set this here to make sure atexit() shuts down the server,
|
||||
* but only if we started the server successfully. We do it
|
||||
* before checking for connectivity in case the server started but
|
||||
* there is a connectivity failure. If pg_ctl did not return success,
|
||||
* we will exit below.
|
||||
* We set this here to make sure atexit() shuts down the server, but only
|
||||
* if we started the server successfully. We do it before checking for
|
||||
* connectivity in case the server started but there is a connectivity
|
||||
* failure. If pg_ctl did not return success, we will exit below.
|
||||
*
|
||||
* Pre-9.1 servers do not have PQping(), so we could be leaving the server
|
||||
* running if authentication was misconfigured, so someday we might went to
|
||||
* be more aggressive about doing server shutdowns even if pg_ctl fails,
|
||||
* but now (2013-08-14) it seems prudent to be cautious. We don't want to
|
||||
* shutdown a server that might have been accidentally started during the
|
||||
* upgrade.
|
||||
* running if authentication was misconfigured, so someday we might went
|
||||
* to be more aggressive about doing server shutdowns even if pg_ctl
|
||||
* fails, but now (2013-08-14) it seems prudent to be cautious. We don't
|
||||
* want to shutdown a server that might have been accidentally started
|
||||
* during the upgrade.
|
||||
*/
|
||||
if (pg_ctl_return)
|
||||
os_info.running_cluster = cluster;
|
||||
|
||||
/*
|
||||
* pg_ctl -w might have failed because the server couldn't be started,
|
||||
* or there might have been a connection problem in _checking_ if the
|
||||
* server has started. Therefore, even if pg_ctl failed, we continue
|
||||
* and test for connectivity in case we get a connection reason for the
|
||||
* failure.
|
||||
* pg_ctl -w might have failed because the server couldn't be started, or
|
||||
* there might have been a connection problem in _checking_ if the server
|
||||
* has started. Therefore, even if pg_ctl failed, we continue and test
|
||||
* for connectivity in case we get a connection reason for the failure.
|
||||
*/
|
||||
if ((conn = get_db_conn(cluster, "template1")) == NULL ||
|
||||
PQstatus(conn) != CONNECTION_OK)
|
||||
|
@ -278,7 +276,8 @@ start_postmaster(ClusterInfo *cluster, bool throw_error)
|
|||
|
||||
/*
|
||||
* If pg_ctl failed, and the connection didn't fail, and throw_error is
|
||||
* enabled, fail now. This could happen if the server was already running.
|
||||
* enabled, fail now. This could happen if the server was already
|
||||
* running.
|
||||
*/
|
||||
if (!pg_ctl_return)
|
||||
pg_fatal("pg_ctl failed to start the %s server, or connection failed\n",
|
||||
|
|
|
@ -78,10 +78,9 @@ get_tablespace_paths(void)
|
|||
* Effectively, this is checking only for tables/indexes in
|
||||
* non-existent tablespace directories. Databases located in
|
||||
* non-existent tablespaces already throw a backend error.
|
||||
* Non-existent tablespace directories can occur when a data
|
||||
* directory that contains user tablespaces is moved as part
|
||||
* of pg_upgrade preparation and the symbolic links are not
|
||||
* updated.
|
||||
* Non-existent tablespace directories can occur when a data directory
|
||||
* that contains user tablespaces is moved as part of pg_upgrade
|
||||
* preparation and the symbolic links are not updated.
|
||||
*/
|
||||
if (stat(os_info.old_tablespaces[tblnum], &statBuf) != 0)
|
||||
{
|
||||
|
|
|
@ -82,7 +82,7 @@ prep_status(const char *fmt,...)
|
|||
|
||||
|
||||
static
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 0)))
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 0)))
|
||||
void
|
||||
pg_log_v(eLogType type, const char *fmt, va_list ap)
|
||||
{
|
||||
|
|
|
@ -163,8 +163,10 @@ bool use_quiet; /* quiet logging onto stderr */
|
|||
int agg_interval; /* log aggregates instead of individual
|
||||
* transactions */
|
||||
int progress = 0; /* thread progress report every this seconds */
|
||||
int progress_nclients = 0; /* number of clients for progress report */
|
||||
int progress_nthreads = 0; /* number of threads for progress report */
|
||||
int progress_nclients = 0; /* number of clients for progress
|
||||
* report */
|
||||
int progress_nthreads = 0; /* number of threads for progress
|
||||
* report */
|
||||
bool is_connect; /* establish connection for each transaction */
|
||||
bool is_latencies; /* report per-command latencies */
|
||||
int main_pid; /* main process id used in log filename */
|
||||
|
@ -913,28 +915,28 @@ top:
|
|||
commands = sql_files[st->use_file];
|
||||
|
||||
/*
|
||||
* Handle throttling once per transaction by sleeping. It is simpler
|
||||
* to do this here rather than at the end, because so much complicated
|
||||
* logic happens below when statements finish.
|
||||
* Handle throttling once per transaction by sleeping. It is simpler to
|
||||
* do this here rather than at the end, because so much complicated logic
|
||||
* happens below when statements finish.
|
||||
*/
|
||||
if (throttle_delay && ! st->is_throttled)
|
||||
if (throttle_delay && !st->is_throttled)
|
||||
{
|
||||
/*
|
||||
* Use inverse transform sampling to randomly generate a delay, such
|
||||
* that the series of delays will approximate a Poisson distribution
|
||||
* centered on the throttle_delay time.
|
||||
*
|
||||
* 10000 implies a 9.2 (-log(1/10000)) to 0.0 (log 1) delay multiplier,
|
||||
* and results in a 0.055 % target underestimation bias:
|
||||
* 10000 implies a 9.2 (-log(1/10000)) to 0.0 (log 1) delay
|
||||
* multiplier, and results in a 0.055 % target underestimation bias:
|
||||
*
|
||||
* SELECT 1.0/AVG(-LN(i/10000.0)) FROM generate_series(1,10000) AS i;
|
||||
* = 1.000552717032611116335474
|
||||
*
|
||||
* If transactions are too slow or a given wait is shorter than
|
||||
* a transaction, the next transaction will start right away.
|
||||
* If transactions are too slow or a given wait is shorter than a
|
||||
* transaction, the next transaction will start right away.
|
||||
*/
|
||||
int64 wait = (int64) (throttle_delay *
|
||||
1.00055271703 * -log(getrand(thread, 1, 10000)/10000.0));
|
||||
1.00055271703 * -log(getrand(thread, 1, 10000) / 10000.0));
|
||||
|
||||
thread->throttle_trigger += wait;
|
||||
|
||||
|
@ -943,7 +945,7 @@ top:
|
|||
st->throttling = true;
|
||||
st->is_throttled = true;
|
||||
if (debug)
|
||||
fprintf(stderr, "client %d throttling "INT64_FORMAT" us\n",
|
||||
fprintf(stderr, "client %d throttling " INT64_FORMAT " us\n",
|
||||
st->id, wait);
|
||||
}
|
||||
|
||||
|
@ -961,6 +963,7 @@ top:
|
|||
{
|
||||
/* Measure lag of throttled transaction relative to target */
|
||||
int64 lag = now_us - st->until;
|
||||
|
||||
thread->throttle_lag += lag;
|
||||
if (lag > thread->throttle_lag_max)
|
||||
thread->throttle_lag_max = lag;
|
||||
|
@ -1011,6 +1014,7 @@ top:
|
|||
INSTR_TIME_SUBTRACT(diff, st->txn_begin);
|
||||
latency = INSTR_TIME_GET_MICROSEC(diff);
|
||||
st->txn_latencies += latency;
|
||||
|
||||
/*
|
||||
* XXX In a long benchmark run of high-latency transactions, this
|
||||
* int64 addition eventually overflows. For example, 100 threads
|
||||
|
@ -1174,14 +1178,16 @@ top:
|
|||
st->use_file = (int) getrand(thread, 0, num_files - 1);
|
||||
commands = sql_files[st->use_file];
|
||||
st->is_throttled = false;
|
||||
|
||||
/*
|
||||
* No transaction is underway anymore, which means there is nothing
|
||||
* to listen to right now. When throttling rate limits are active,
|
||||
* a sleep will happen next, as the next transaction starts. And
|
||||
* then in any case the next SQL command will set listen back to 1.
|
||||
* No transaction is underway anymore, which means there is
|
||||
* nothing to listen to right now. When throttling rate limits
|
||||
* are active, a sleep will happen next, as the next transaction
|
||||
* starts. And then in any case the next SQL command will set
|
||||
* listen back to 1.
|
||||
*/
|
||||
st->listen = 0;
|
||||
trans_needs_throttle = (throttle_delay>0);
|
||||
trans_needs_throttle = (throttle_delay > 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1201,11 +1207,12 @@ top:
|
|||
}
|
||||
|
||||
/*
|
||||
* This ensures that a throttling delay is inserted before proceeding
|
||||
* with sql commands, after the first transaction. The first transaction
|
||||
* This ensures that a throttling delay is inserted before proceeding with
|
||||
* sql commands, after the first transaction. The first transaction
|
||||
* throttling is performed when first entering doCustom.
|
||||
*/
|
||||
if (trans_needs_throttle) {
|
||||
if (trans_needs_throttle)
|
||||
{
|
||||
trans_needs_throttle = false;
|
||||
goto top;
|
||||
}
|
||||
|
@ -1553,12 +1560,12 @@ init(bool is_no_vacuum)
|
|||
* Note: TPC-B requires at least 100 bytes per row, and the "filler"
|
||||
* fields in these table declarations were intended to comply with that.
|
||||
* The pgbench_accounts table complies with that because the "filler"
|
||||
* column is set to blank-padded empty string. But for all other tables the
|
||||
* column defaults to NULL and so don't actually take any space. We could
|
||||
* fix that by giving them non-null default values. However, that would
|
||||
* completely break comparability of pgbench results with prior versions.
|
||||
* Since pgbench has never pretended to be fully TPC-B compliant anyway, we
|
||||
* stick with the historical behavior.
|
||||
* column is set to blank-padded empty string. But for all other tables
|
||||
* the column defaults to NULL and so don't actually take any space. We
|
||||
* could fix that by giving them non-null default values. However, that
|
||||
* would completely break comparability of pgbench results with prior
|
||||
* versions. Since pgbench has never pretended to be fully TPC-B compliant
|
||||
* anyway, we stick with the historical behavior.
|
||||
*/
|
||||
struct ddlinfo
|
||||
{
|
||||
|
@ -2211,6 +2218,7 @@ printResults(int ttype, int normal_xacts, int nclients,
|
|||
/* compute and show latency average and standard deviation */
|
||||
double latency = 0.001 * total_latencies / normal_xacts;
|
||||
double sqlat = (double) total_sqlats / normal_xacts;
|
||||
|
||||
printf("latency average: %.3f ms\n"
|
||||
"latency stddev: %.3f ms\n",
|
||||
latency, 0.001 * sqrt(sqlat - 1000000.0 * latency * latency));
|
||||
|
@ -2288,7 +2296,7 @@ int
|
|||
main(int argc, char **argv)
|
||||
{
|
||||
static struct option long_options[] = {
|
||||
/* systematic long/short named options*/
|
||||
/* systematic long/short named options */
|
||||
{"client", required_argument, NULL, 'c'},
|
||||
{"connect", no_argument, NULL, 'C'},
|
||||
{"debug", no_argument, NULL, 'd'},
|
||||
|
@ -2559,6 +2567,7 @@ main(int argc, char **argv)
|
|||
{
|
||||
/* get a double from the beginning of option value */
|
||||
double throttle_value = atof(optarg);
|
||||
|
||||
if (throttle_value <= 0.0)
|
||||
{
|
||||
fprintf(stderr, "invalid rate limit: %s\n", optarg);
|
||||
|
@ -2963,11 +2972,15 @@ threadRun(void *arg)
|
|||
int nstate = thread->nstate;
|
||||
int remains = nstate; /* number of remaining clients */
|
||||
int i;
|
||||
|
||||
/* for reporting progress: */
|
||||
int64 thread_start = INSTR_TIME_GET_MICROSEC(thread->start_time);
|
||||
int64 last_report = thread_start;
|
||||
int64 next_report = last_report + (int64) progress * 1000000;
|
||||
int64 last_count = 0, last_lats = 0, last_sqlats = 0, last_lags = 0;
|
||||
int64 last_count = 0,
|
||||
last_lats = 0,
|
||||
last_sqlats = 0,
|
||||
last_lags = 0;
|
||||
|
||||
AggVals aggs;
|
||||
|
||||
|
@ -3162,17 +3175,25 @@ threadRun(void *arg)
|
|||
{
|
||||
instr_time now_time;
|
||||
int64 now;
|
||||
|
||||
INSTR_TIME_SET_CURRENT(now_time);
|
||||
now = INSTR_TIME_GET_MICROSEC(now_time);
|
||||
if (now >= next_report)
|
||||
{
|
||||
/* generate and show report */
|
||||
int64 count = 0, lats = 0, sqlats = 0;
|
||||
int64 count = 0,
|
||||
lats = 0,
|
||||
sqlats = 0;
|
||||
int64 lags = thread->throttle_lag;
|
||||
int64 run = now - last_report;
|
||||
double tps, total_run, latency, sqlat, stdev, lag;
|
||||
double tps,
|
||||
total_run,
|
||||
latency,
|
||||
sqlat,
|
||||
stdev,
|
||||
lag;
|
||||
|
||||
for (i = 0 ; i < nstate ; i++)
|
||||
for (i = 0; i < nstate; i++)
|
||||
{
|
||||
count += state[i].cnt;
|
||||
lats += state[i].txn_latencies;
|
||||
|
@ -3202,7 +3223,7 @@ threadRun(void *arg)
|
|||
last_sqlats = sqlats;
|
||||
last_lags = lags;
|
||||
last_report = now;
|
||||
next_report += (int64) progress * 1000000;
|
||||
next_report += (int64) progress *1000000;
|
||||
}
|
||||
}
|
||||
#else
|
||||
|
@ -3211,23 +3232,32 @@ threadRun(void *arg)
|
|||
{
|
||||
instr_time now_time;
|
||||
int64 now;
|
||||
|
||||
INSTR_TIME_SET_CURRENT(now_time);
|
||||
now = INSTR_TIME_GET_MICROSEC(now_time);
|
||||
if (now >= next_report)
|
||||
{
|
||||
/* generate and show report */
|
||||
int64 count = 0, lats = 0, sqlats = 0, lags = 0;
|
||||
int64 count = 0,
|
||||
lats = 0,
|
||||
sqlats = 0,
|
||||
lags = 0;
|
||||
int64 run = now - last_report;
|
||||
double tps, total_run, latency, sqlat, lag, stdev;
|
||||
double tps,
|
||||
total_run,
|
||||
latency,
|
||||
sqlat,
|
||||
lag,
|
||||
stdev;
|
||||
|
||||
for (i = 0 ; i < progress_nclients ; i++)
|
||||
for (i = 0; i < progress_nclients; i++)
|
||||
{
|
||||
count += state[i].cnt;
|
||||
lats += state[i].txn_latencies;
|
||||
sqlats += state[i].txn_sqlats;
|
||||
}
|
||||
|
||||
for (i = 0 ; i < progress_nthreads ; i++)
|
||||
for (i = 0; i < progress_nthreads; i++)
|
||||
lags += thread[i].throttle_lag;
|
||||
|
||||
total_run = (now - thread_start) / 1000000.0;
|
||||
|
@ -3253,7 +3283,7 @@ threadRun(void *arg)
|
|||
last_sqlats = sqlats;
|
||||
last_lags = lags;
|
||||
last_report = now;
|
||||
next_report += (int64) progress * 1000000;
|
||||
next_report += (int64) progress *1000000;
|
||||
}
|
||||
}
|
||||
#endif /* PTHREAD_FORK_EMULATION */
|
||||
|
|
|
@ -429,8 +429,8 @@ bf_init(PX_Cipher *c, const uint8 *key, unsigned klen, const uint8 *iv)
|
|||
|
||||
/*
|
||||
* Test if key len is supported. BF_set_key silently cut large keys and it
|
||||
* could be a problem when user transfer crypted data from one server
|
||||
* to another.
|
||||
* could be a problem when user transfer crypted data from one server to
|
||||
* another.
|
||||
*/
|
||||
|
||||
if (bf_is_strong == -1)
|
||||
|
|
|
@ -319,6 +319,7 @@ tuple_to_stringinfo(StringInfo s, TupleDesc tupdesc, HeapTuple tuple, bool skip_
|
|||
else
|
||||
{
|
||||
Datum val; /* definitely detoasted Datum */
|
||||
|
||||
val = PointerGetDatum(PG_DETOAST_DATUM(origval));
|
||||
print_literal(s, typid, OidOutputFunctionCall(typoutput, val));
|
||||
}
|
||||
|
|
|
@ -69,8 +69,8 @@ test_shm_mq_setup(int64 queue_size, int32 nworkers, dsm_segment **segp,
|
|||
wait_for_workers_to_become_ready(wstate, hdr);
|
||||
|
||||
/*
|
||||
* Once we reach this point, all workers are ready. We no longer need
|
||||
* to kill them if we die; they'll die on their own as the message queues
|
||||
* Once we reach this point, all workers are ready. We no longer need to
|
||||
* kill them if we die; they'll die on their own as the message queues
|
||||
* shut down.
|
||||
*/
|
||||
cancel_on_dsm_detach(seg, cleanup_background_workers,
|
||||
|
@ -194,16 +194,16 @@ setup_background_workers(int nworkers, dsm_segment *seg)
|
|||
* Arrange to kill all the workers if we abort before all workers are
|
||||
* finished hooking themselves up to the dynamic shared memory segment.
|
||||
*
|
||||
* If we die after all the workers have finished hooking themselves up
|
||||
* to the dynamic shared memory segment, we'll mark the two queues to
|
||||
* which we're directly connected as detached, and the worker(s)
|
||||
* connected to those queues will exit, marking any other queues to
|
||||
* which they are connected as detached. This will cause any
|
||||
* as-yet-unaware workers connected to those queues to exit in their
|
||||
* turn, and so on, until everybody exits.
|
||||
* If we die after all the workers have finished hooking themselves up to
|
||||
* the dynamic shared memory segment, we'll mark the two queues to which
|
||||
* we're directly connected as detached, and the worker(s) connected to
|
||||
* those queues will exit, marking any other queues to which they are
|
||||
* connected as detached. This will cause any as-yet-unaware workers
|
||||
* connected to those queues to exit in their turn, and so on, until
|
||||
* everybody exits.
|
||||
*
|
||||
* But suppose the workers which are supposed to connect to the queues
|
||||
* to which we're directly attached exit due to some error before they
|
||||
* But suppose the workers which are supposed to connect to the queues to
|
||||
* which we're directly attached exit due to some error before they
|
||||
* actually attach the queues. The remaining workers will have no way of
|
||||
* knowing this. From their perspective, they're still waiting for those
|
||||
* workers to start, when in fact they've already died.
|
||||
|
|
|
@ -18,8 +18,7 @@
|
|||
|
||||
#include "test_shm_mq.h"
|
||||
|
||||
PG_MODULE_MAGIC;
|
||||
PG_FUNCTION_INFO_V1(test_shm_mq);
|
||||
PG_MODULE_MAGIC; PG_FUNCTION_INFO_V1(test_shm_mq);
|
||||
PG_FUNCTION_INFO_V1(test_shm_mq_pipelined);
|
||||
|
||||
void _PG_init(void);
|
||||
|
@ -59,8 +58,8 @@ test_shm_mq(PG_FUNCTION_ARGS)
|
|||
|
||||
/*
|
||||
* Since this test sends data using the blocking interfaces, it cannot
|
||||
* send data to itself. Therefore, a minimum of 1 worker is required.
|
||||
* Of course, a negative worker count is nonsensical.
|
||||
* send data to itself. Therefore, a minimum of 1 worker is required. Of
|
||||
* course, a negative worker count is nonsensical.
|
||||
*/
|
||||
if (nworkers < 1)
|
||||
ereport(ERROR,
|
||||
|
@ -224,10 +223,10 @@ test_shm_mq_pipelined(PG_FUNCTION_ARGS)
|
|||
if (wait)
|
||||
{
|
||||
/*
|
||||
* If we made no progress, wait for one of the other processes
|
||||
* to which we are connected to set our latch, indicating that
|
||||
* they have read or written data and therefore there may now be
|
||||
* work for us to do.
|
||||
* If we made no progress, wait for one of the other processes to
|
||||
* which we are connected to set our latch, indicating that they
|
||||
* have read or written data and therefore there may now be work
|
||||
* for us to do.
|
||||
*/
|
||||
WaitLatch(&MyProc->procLatch, WL_LATCH_SET, 0);
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
|
|
|
@ -58,12 +58,12 @@ test_shm_mq_main(Datum main_arg)
|
|||
/*
|
||||
* Establish signal handlers.
|
||||
*
|
||||
* We want CHECK_FOR_INTERRUPTS() to kill off this worker process just
|
||||
* as it would a normal user backend. To make that happen, we establish
|
||||
* a signal handler that is a stripped-down version of die(). We don't
|
||||
* have any equivalent of the backend's command-read loop, where interrupts
|
||||
* can be processed immediately, so make sure ImmediateInterruptOK is
|
||||
* turned off.
|
||||
* We want CHECK_FOR_INTERRUPTS() to kill off this worker process just as
|
||||
* it would a normal user backend. To make that happen, we establish a
|
||||
* signal handler that is a stripped-down version of die(). We don't have
|
||||
* any equivalent of the backend's command-read loop, where interrupts can
|
||||
* be processed immediately, so make sure ImmediateInterruptOK is turned
|
||||
* off.
|
||||
*/
|
||||
pqsignal(SIGTERM, handle_sigterm);
|
||||
ImmediateInterruptOK = false;
|
||||
|
@ -76,8 +76,8 @@ test_shm_mq_main(Datum main_arg)
|
|||
* memory segment to which we must attach for further instructions. In
|
||||
* order to attach to dynamic shared memory, we need a resource owner.
|
||||
* Once we've mapped the segment in our address space, attach to the table
|
||||
* of contents so we can locate the various data structures we'll need
|
||||
* to find within the segment.
|
||||
* of contents so we can locate the various data structures we'll need to
|
||||
* find within the segment.
|
||||
*/
|
||||
CurrentResourceOwner = ResourceOwnerCreate(NULL, "test_shm_mq worker");
|
||||
seg = dsm_attach(DatumGetInt32(main_arg));
|
||||
|
@ -114,8 +114,8 @@ test_shm_mq_main(Datum main_arg)
|
|||
attach_to_queues(seg, toc, myworkernumber, &inqh, &outqh);
|
||||
|
||||
/*
|
||||
* Indicate that we're fully initialized and ready to begin the main
|
||||
* part of the parallel operation.
|
||||
* Indicate that we're fully initialized and ready to begin the main part
|
||||
* of the parallel operation.
|
||||
*
|
||||
* Once we signal that we're ready, the user backend is entitled to assume
|
||||
* that our on_dsm_detach callbacks will fire before we disconnect from
|
||||
|
|
|
@ -279,6 +279,7 @@ ginarraytriconsistent(PG_FUNCTION_ARGS)
|
|||
res = GIN_MAYBE;
|
||||
break;
|
||||
case GinEqualStrategy:
|
||||
|
||||
/*
|
||||
* Must have all elements in check[] true; no discrimination
|
||||
* against nulls here. This is because array_contain_compare and
|
||||
|
|
|
@ -251,6 +251,7 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack)
|
|||
Assert(blkno != btree->rootBlkno);
|
||||
ptr->blkno = blkno;
|
||||
ptr->buffer = buffer;
|
||||
|
||||
/*
|
||||
* parent may be wrong, but if so, the ginFinishSplit call will
|
||||
* recurse to call ginFindParents again to fix it.
|
||||
|
@ -328,7 +329,8 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
|
|||
GinPlaceToPageRC rc;
|
||||
uint16 xlflags = 0;
|
||||
Page childpage = NULL;
|
||||
Page newlpage = NULL, newrpage = NULL;
|
||||
Page newlpage = NULL,
|
||||
newrpage = NULL;
|
||||
|
||||
if (GinPageIsData(page))
|
||||
xlflags |= GIN_INSERT_ISDATA;
|
||||
|
@ -346,8 +348,8 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
|
|||
}
|
||||
|
||||
/*
|
||||
* Try to put the incoming tuple on the page. placeToPage will decide
|
||||
* if the page needs to be split.
|
||||
* Try to put the incoming tuple on the page. placeToPage will decide if
|
||||
* the page needs to be split.
|
||||
*/
|
||||
rc = btree->placeToPage(btree, stack->buffer, stack,
|
||||
insertdata, updateblkno,
|
||||
|
@ -450,6 +452,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
|
|||
if (childbuf != InvalidBuffer)
|
||||
{
|
||||
Page childpage = BufferGetPage(childbuf);
|
||||
|
||||
GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
|
||||
|
||||
data.leftChildBlkno = BufferGetBlockNumber(childbuf);
|
||||
|
@ -505,8 +508,8 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
|
|||
|
||||
/*
|
||||
* Construct a new root page containing downlinks to the new left
|
||||
* and right pages. (do this in a temporary copy first rather
|
||||
* than overwriting the original page directly, so that we can still
|
||||
* and right pages. (do this in a temporary copy first rather than
|
||||
* overwriting the original page directly, so that we can still
|
||||
* abort gracefully if this fails.)
|
||||
*/
|
||||
newrootpg = PageGetTempPage(newrpage);
|
||||
|
@ -627,8 +630,8 @@ ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
|
|||
bool first = true;
|
||||
|
||||
/*
|
||||
* freestack == false when we encounter an incompletely split page during a
|
||||
* scan, while freestack == true is used in the normal scenario that a
|
||||
* freestack == false when we encounter an incompletely split page during
|
||||
* a scan, while freestack == true is used in the normal scenario that a
|
||||
* split is finished right after the initial insert.
|
||||
*/
|
||||
if (!freestack)
|
||||
|
@ -650,8 +653,8 @@ ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
|
|||
* then continue with the current one.
|
||||
*
|
||||
* Note: we have to finish *all* incomplete splits we encounter, even
|
||||
* if we have to move right. Otherwise we might choose as the target
|
||||
* a page that has no downlink in the parent, and splitting it further
|
||||
* if we have to move right. Otherwise we might choose as the target a
|
||||
* page that has no downlink in the parent, and splitting it further
|
||||
* would fail.
|
||||
*/
|
||||
if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
|
||||
|
|
|
@ -49,8 +49,8 @@ typedef struct
|
|||
dlist_head segments; /* a list of leafSegmentInfos */
|
||||
|
||||
/*
|
||||
* The following fields represent how the segments are split across
|
||||
* pages, if a page split is required. Filled in by leafRepackItems.
|
||||
* The following fields represent how the segments are split across pages,
|
||||
* if a page split is required. Filled in by leafRepackItems.
|
||||
*/
|
||||
dlist_node *lastleft; /* last segment on left page */
|
||||
int lsize; /* total size on left page */
|
||||
|
@ -83,9 +83,9 @@ typedef struct
|
|||
int nmodifieditems;
|
||||
|
||||
/*
|
||||
* The following fields represent the items in this segment. If 'items'
|
||||
* is not NULL, it contains a palloc'd array of the itemsin this segment.
|
||||
* If 'seg' is not NULL, it contains the items in an already-compressed
|
||||
* The following fields represent the items in this segment. If 'items' is
|
||||
* not NULL, it contains a palloc'd array of the itemsin this segment. If
|
||||
* 'seg' is not NULL, it contains the items in an already-compressed
|
||||
* format. It can point to an on-disk page (!modified), or a palloc'd
|
||||
* segment in memory. If both are set, they must represent the same items.
|
||||
*/
|
||||
|
@ -386,7 +386,7 @@ GinDataPageAddPostingItem(Page page, PostingItem *data, OffsetNumber offset)
|
|||
if (offset != maxoff + 1)
|
||||
memmove(ptr + sizeof(PostingItem),
|
||||
ptr,
|
||||
(maxoff - offset + 1) * sizeof(PostingItem));
|
||||
(maxoff - offset + 1) *sizeof(PostingItem));
|
||||
}
|
||||
memcpy(ptr, data, sizeof(PostingItem));
|
||||
|
||||
|
@ -464,8 +464,8 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
|
|||
{
|
||||
/*
|
||||
* This needs to go to some other location in the tree. (The
|
||||
* caller should've chosen the insert location so that at least
|
||||
* the first item goes here.)
|
||||
* caller should've chosen the insert location so that at
|
||||
* least the first item goes here.)
|
||||
*/
|
||||
Assert(i > 0);
|
||||
break;
|
||||
|
@ -769,16 +769,16 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
|
|||
* We don't try to re-encode the segments here, even though some of them
|
||||
* might be really small now that we've removed some items from them. It
|
||||
* seems like a waste of effort, as there isn't really any benefit from
|
||||
* larger segments per se; larger segments only help to pack more items
|
||||
* in the same space. We might as well delay doing that until the next
|
||||
* larger segments per se; larger segments only help to pack more items in
|
||||
* the same space. We might as well delay doing that until the next
|
||||
* insertion, which will need to re-encode at least part of the page
|
||||
* anyway.
|
||||
*
|
||||
* Also note if the page was in uncompressed, pre-9.4 format before, it
|
||||
* is now represented as one huge segment that contains all the items.
|
||||
* It might make sense to split that, to speed up random access, but we
|
||||
* don't bother. You'll have to REINDEX anyway if you want the full gain
|
||||
* of the new tighter index format.
|
||||
* Also note if the page was in uncompressed, pre-9.4 format before, it is
|
||||
* now represented as one huge segment that contains all the items. It
|
||||
* might make sense to split that, to speed up random access, but we don't
|
||||
* bother. You'll have to REINDEX anyway if you want the full gain of the
|
||||
* new tighter index format.
|
||||
*/
|
||||
if (removedsomething)
|
||||
{
|
||||
|
@ -795,6 +795,7 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
|
|||
{
|
||||
leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
|
||||
iter.cur);
|
||||
|
||||
if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
|
||||
modified = true;
|
||||
if (modified && seginfo->action != GIN_SEGMENT_DELETE)
|
||||
|
@ -863,7 +864,8 @@ constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
|
|||
|
||||
walbufbegin = palloc(
|
||||
sizeof(ginxlogRecompressDataLeaf) +
|
||||
BLCKSZ + /* max size needed to hold the segment data */
|
||||
BLCKSZ + /* max size needed to hold the segment
|
||||
* data */
|
||||
nmodified * 2 + /* (segno + action) per action */
|
||||
sizeof(XLogRecData));
|
||||
walbufend = walbufbegin;
|
||||
|
@ -965,9 +967,9 @@ dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf)
|
|||
int segsize;
|
||||
|
||||
/*
|
||||
* If the page was in pre-9.4 format before, convert the header, and
|
||||
* force all segments to be copied to the page whether they were modified
|
||||
* or not.
|
||||
* If the page was in pre-9.4 format before, convert the header, and force
|
||||
* all segments to be copied to the page whether they were modified or
|
||||
* not.
|
||||
*/
|
||||
if (!GinPageIsCompressed(page))
|
||||
{
|
||||
|
@ -1022,6 +1024,7 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
|
|||
dlist_node *node;
|
||||
dlist_node *firstright;
|
||||
leafSegmentInfo *seginfo;
|
||||
|
||||
/* these must be static so they can be returned to caller */
|
||||
static ginxlogSplitDataLeaf split_xlog;
|
||||
static XLogRecData rdata[3];
|
||||
|
@ -1121,6 +1124,7 @@ dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
|
|||
Page page = BufferGetPage(buf);
|
||||
OffsetNumber off = stack->off;
|
||||
PostingItem *pitem;
|
||||
|
||||
/* these must be static so they can be returned to caller */
|
||||
static XLogRecData rdata;
|
||||
static ginxlogInsertDataInternal data;
|
||||
|
@ -1216,8 +1220,8 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
|
|||
*prdata = rdata;
|
||||
|
||||
/*
|
||||
* First construct a new list of PostingItems, which includes all the
|
||||
* old items, and the new item.
|
||||
* First construct a new list of PostingItems, which includes all the old
|
||||
* items, and the new item.
|
||||
*/
|
||||
memcpy(allitems, GinDataPageGetPostingItem(oldpage, FirstOffsetNumber),
|
||||
(off - 1) * sizeof(PostingItem));
|
||||
|
@ -1402,8 +1406,8 @@ addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems, int nNewItems)
|
|||
leafSegmentInfo *newseg;
|
||||
|
||||
/*
|
||||
* If the page is completely empty, just construct one new segment to
|
||||
* hold all the new items.
|
||||
* If the page is completely empty, just construct one new segment to hold
|
||||
* all the new items.
|
||||
*/
|
||||
if (dlist_is_empty(&leaf->segments))
|
||||
{
|
||||
|
@ -1567,10 +1571,10 @@ leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining)
|
|||
if (npacked != seginfo->nitems)
|
||||
{
|
||||
/*
|
||||
* Too large. Compress again to the target size, and create
|
||||
* a new segment to represent the remaining items. The new
|
||||
* segment is inserted after this one, so it will be
|
||||
* processed in the next iteration of this loop.
|
||||
* Too large. Compress again to the target size, and
|
||||
* create a new segment to represent the remaining items.
|
||||
* The new segment is inserted after this one, so it will
|
||||
* be processed in the next iteration of this loop.
|
||||
*/
|
||||
if (seginfo->seg)
|
||||
pfree(seginfo->seg);
|
||||
|
@ -1741,8 +1745,8 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
|
|||
GinPageGetOpaque(tmppage)->rightlink = InvalidBlockNumber;
|
||||
|
||||
/*
|
||||
* Write as many of the items to the root page as fit. In segments
|
||||
* of max GinPostingListSegmentMaxSize bytes each.
|
||||
* Write as many of the items to the root page as fit. In segments of max
|
||||
* GinPostingListSegmentMaxSize bytes each.
|
||||
*/
|
||||
nrootitems = 0;
|
||||
rootsize = 0;
|
||||
|
|
|
@ -136,6 +136,7 @@ GinFormTuple(GinState *ginstate,
|
|||
if (data)
|
||||
{
|
||||
char *ptr = GinGetPosting(itup);
|
||||
|
||||
memcpy(ptr, data, dataSize);
|
||||
}
|
||||
|
||||
|
|
|
@ -86,6 +86,7 @@ scanPostingTree(Relation index, GinScanEntry scanEntry,
|
|||
if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0)
|
||||
{
|
||||
int n = GinDataLeafPageGetItemsToTbm(page, scanEntry->matchBitmap);
|
||||
|
||||
scanEntry->predictNumberResult += n;
|
||||
}
|
||||
|
||||
|
@ -463,11 +464,11 @@ startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key)
|
|||
* considerably, if the frequent term can be put in the additional set.
|
||||
*
|
||||
* There can be many legal ways to divide them entries into these two
|
||||
* sets. A conservative division is to just put everything in the
|
||||
* required set, but the more you can put in the additional set, the more
|
||||
* you can skip during the scan. To maximize skipping, we try to put as
|
||||
* many frequent items as possible into additional, and less frequent
|
||||
* ones into required. To do that, sort the entries by frequency
|
||||
* sets. A conservative division is to just put everything in the required
|
||||
* set, but the more you can put in the additional set, the more you can
|
||||
* skip during the scan. To maximize skipping, we try to put as many
|
||||
* frequent items as possible into additional, and less frequent ones into
|
||||
* required. To do that, sort the entries by frequency
|
||||
* (predictNumberResult), and put entries into the required set in that
|
||||
* order, until the consistent function says that none of the remaining
|
||||
* entries can form a match, without any items from the required set. The
|
||||
|
@ -635,8 +636,8 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
|
|||
if (stepright)
|
||||
{
|
||||
/*
|
||||
* We've processed all the entries on this page. If it was the last
|
||||
* page in the tree, we're done.
|
||||
* We've processed all the entries on this page. If it was the
|
||||
* last page in the tree, we're done.
|
||||
*/
|
||||
if (GinPageRightMost(page))
|
||||
{
|
||||
|
@ -647,8 +648,8 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
|
|||
}
|
||||
|
||||
/*
|
||||
* Step to next page, following the right link. then find the first
|
||||
* ItemPointer greater than advancePast.
|
||||
* Step to next page, following the right link. then find the
|
||||
* first ItemPointer greater than advancePast.
|
||||
*/
|
||||
entry->buffer = ginStepRight(entry->buffer,
|
||||
ginstate->index,
|
||||
|
@ -781,6 +782,7 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
|
|||
gotitem = true;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Not a lossy page. Skip over any offsets <= advancePast, and
|
||||
* return that.
|
||||
|
@ -788,8 +790,9 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
|
|||
if (entry->matchResult->blockno == advancePastBlk)
|
||||
{
|
||||
/*
|
||||
* First, do a quick check against the last offset on the page.
|
||||
* If that's > advancePast, so are all the other offsets.
|
||||
* First, do a quick check against the last offset on the
|
||||
* page. If that's > advancePast, so are all the other
|
||||
* offsets.
|
||||
*/
|
||||
if (entry->matchResult->offsets[entry->matchResult->ntuples - 1] <= advancePastOff)
|
||||
{
|
||||
|
@ -890,8 +893,8 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
|
|||
|
||||
/*
|
||||
* We might have already tested this item; if so, no need to repeat work.
|
||||
* (Note: the ">" case can happen, if advancePast is exact but we previously
|
||||
* had to set curItem to a lossy-page pointer.)
|
||||
* (Note: the ">" case can happen, if advancePast is exact but we
|
||||
* previously had to set curItem to a lossy-page pointer.)
|
||||
*/
|
||||
if (ginCompareItemPointers(&key->curItem, &advancePast) > 0)
|
||||
return;
|
||||
|
@ -942,8 +945,8 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
|
|||
/*
|
||||
* Ok, we now know that there are no matches < minItem.
|
||||
*
|
||||
* If minItem is lossy, it means that there were no exact items on
|
||||
* the page among requiredEntries, because lossy pointers sort after exact
|
||||
* If minItem is lossy, it means that there were no exact items on the
|
||||
* page among requiredEntries, because lossy pointers sort after exact
|
||||
* items. However, there might be exact items for the same page among
|
||||
* additionalEntries, so we mustn't advance past them.
|
||||
*/
|
||||
|
@ -1085,6 +1088,7 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
|
|||
if (entry->isFinished)
|
||||
key->entryRes[i] = GIN_FALSE;
|
||||
#if 0
|
||||
|
||||
/*
|
||||
* This case can't currently happen, because we loaded all the entries
|
||||
* for this item earlier.
|
||||
|
@ -1119,6 +1123,7 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
|
|||
break;
|
||||
|
||||
default:
|
||||
|
||||
/*
|
||||
* the 'default' case shouldn't happen, but if the consistent
|
||||
* function returns something bogus, this is the safe result
|
||||
|
@ -1129,11 +1134,10 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
|
|||
}
|
||||
|
||||
/*
|
||||
* We have a tuple, and we know if it matches or not. If it's a
|
||||
* non-match, we could continue to find the next matching tuple, but
|
||||
* let's break out and give scanGetItem a chance to advance the other
|
||||
* keys. They might be able to skip past to a much higher TID, allowing
|
||||
* us to save work.
|
||||
* We have a tuple, and we know if it matches or not. If it's a non-match,
|
||||
* we could continue to find the next matching tuple, but let's break out
|
||||
* and give scanGetItem a chance to advance the other keys. They might be
|
||||
* able to skip past to a much higher TID, allowing us to save work.
|
||||
*/
|
||||
|
||||
/* clean up after consistentFn calls */
|
||||
|
@ -1205,12 +1209,11 @@ scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
|
|||
}
|
||||
|
||||
/*
|
||||
* It's a match. We can conclude that nothing < matches, so
|
||||
* the other key streams can skip to this item.
|
||||
* It's a match. We can conclude that nothing < matches, so the
|
||||
* other key streams can skip to this item.
|
||||
*
|
||||
* Beware of lossy pointers, though; from a lossy pointer, we
|
||||
* can only conclude that nothing smaller than this *block*
|
||||
* matches.
|
||||
* Beware of lossy pointers, though; from a lossy pointer, we can
|
||||
* only conclude that nothing smaller than this *block* matches.
|
||||
*/
|
||||
if (ItemPointerIsLossyPage(&key->curItem))
|
||||
{
|
||||
|
@ -1229,8 +1232,8 @@ scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
|
|||
}
|
||||
|
||||
/*
|
||||
* If this is the first key, remember this location as a
|
||||
* potential match, and proceed to check the rest of the keys.
|
||||
* If this is the first key, remember this location as a potential
|
||||
* match, and proceed to check the rest of the keys.
|
||||
*
|
||||
* Otherwise, check if this is the same item that we checked the
|
||||
* previous keys for (or a lossy pointer for the same page). If
|
||||
|
@ -1247,7 +1250,7 @@ scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
|
|||
if (ItemPointerIsLossyPage(&key->curItem) ||
|
||||
ItemPointerIsLossyPage(item))
|
||||
{
|
||||
Assert (GinItemPointerGetBlockNumber(&key->curItem) >= GinItemPointerGetBlockNumber(item));
|
||||
Assert(GinItemPointerGetBlockNumber(&key->curItem) >= GinItemPointerGetBlockNumber(item));
|
||||
match = (GinItemPointerGetBlockNumber(&key->curItem) ==
|
||||
GinItemPointerGetBlockNumber(item));
|
||||
}
|
||||
|
@ -1264,8 +1267,8 @@ scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
|
|||
|
||||
/*
|
||||
* Now *item contains the first ItemPointer after previous result that
|
||||
* satisfied all the keys for that exact TID, or a lossy reference
|
||||
* to the same page.
|
||||
* satisfied all the keys for that exact TID, or a lossy reference to the
|
||||
* same page.
|
||||
*
|
||||
* We must return recheck = true if any of the keys are marked recheck.
|
||||
*/
|
||||
|
|
|
@ -115,6 +115,7 @@ static bool
|
|||
shimBoolConsistentFn(GinScanKey key)
|
||||
{
|
||||
GinTernaryValue result;
|
||||
|
||||
result = DatumGetGinTernaryValue(FunctionCall7Coll(
|
||||
key->triConsistentFmgrInfo,
|
||||
key->collation,
|
||||
|
|
|
@ -210,7 +210,7 @@ ginCompressPostingList(const ItemPointer ipd, int nipd, int maxsize,
|
|||
uint64 val = itemptr_to_uint64(&ipd[totalpacked]);
|
||||
uint64 delta = val - prev;
|
||||
|
||||
Assert (val > prev);
|
||||
Assert(val > prev);
|
||||
|
||||
if (endptr - ptr >= 6)
|
||||
encode_varbyte(delta, &ptr);
|
||||
|
@ -374,8 +374,8 @@ ginMergeItemPointers(ItemPointerData *a, uint32 na,
|
|||
dst = (ItemPointer) palloc((na + nb) * sizeof(ItemPointerData));
|
||||
|
||||
/*
|
||||
* If the argument arrays don't overlap, we can just append them to
|
||||
* each other.
|
||||
* If the argument arrays don't overlap, we can just append them to each
|
||||
* other.
|
||||
*/
|
||||
if (na == 0 || nb == 0 || ginCompareItemPointers(&a[na - 1], &b[0]) < 0)
|
||||
{
|
||||
|
|
|
@ -67,6 +67,7 @@ initGinState(GinState *state, Relation index)
|
|||
fmgr_info_copy(&(state->extractQueryFn[i]),
|
||||
index_getprocinfo(index, i + 1, GIN_EXTRACTQUERY_PROC),
|
||||
CurrentMemoryContext);
|
||||
|
||||
/*
|
||||
* Check opclass capability to do tri-state or binary logic consistent
|
||||
* check.
|
||||
|
|
|
@ -208,8 +208,8 @@ ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
|
|||
}
|
||||
|
||||
/*
|
||||
* if we have root and there are empty pages in tree, then we don't release
|
||||
* lock to go further processing and guarantee that tree is unused
|
||||
* if we have root and there are empty pages in tree, then we don't
|
||||
* release lock to go further processing and guarantee that tree is unused
|
||||
*/
|
||||
if (!(isRoot && hasVoidPage))
|
||||
{
|
||||
|
@ -302,11 +302,11 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
|
|||
data.rightLink = GinPageGetOpaque(page)->rightlink;
|
||||
|
||||
/*
|
||||
* We can't pass buffer_std = TRUE, because we didn't set pd_lower
|
||||
* on pre-9.4 versions. The page might've been binary-upgraded from
|
||||
* an older version, and hence not have pd_lower set correctly.
|
||||
* Ditto for the left page, but removing the item from the parent
|
||||
* updated its pd_lower, so we know that's OK at this point.
|
||||
* We can't pass buffer_std = TRUE, because we didn't set pd_lower on
|
||||
* pre-9.4 versions. The page might've been binary-upgraded from an
|
||||
* older version, and hence not have pd_lower set correctly. Ditto for
|
||||
* the left page, but removing the item from the parent updated its
|
||||
* pd_lower, so we know that's OK at this point.
|
||||
*/
|
||||
rdata[0].buffer = dBuffer;
|
||||
rdata[0].buffer_std = FALSE;
|
||||
|
@ -538,7 +538,8 @@ ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint3
|
|||
}
|
||||
|
||||
/*
|
||||
* if we already created a temporary page, make changes in place
|
||||
* if we already created a temporary page, make changes in
|
||||
* place
|
||||
*/
|
||||
if (tmppage == origpage)
|
||||
{
|
||||
|
|
|
@ -341,8 +341,8 @@ ginRedoInsert(XLogRecPtr lsn, XLogRecord *record)
|
|||
payload = XLogRecGetData(record) + sizeof(ginxlogInsert);
|
||||
|
||||
/*
|
||||
* First clear incomplete-split flag on child page if this finishes
|
||||
* a split.
|
||||
* First clear incomplete-split flag on child page if this finishes a
|
||||
* split.
|
||||
*/
|
||||
if (!isLeaf)
|
||||
{
|
||||
|
@ -472,8 +472,8 @@ ginRedoSplit(XLogRecPtr lsn, XLogRecord *record)
|
|||
payload = XLogRecGetData(record) + sizeof(ginxlogSplit);
|
||||
|
||||
/*
|
||||
* First clear incomplete-split flag on child page if this finishes
|
||||
* a split
|
||||
* First clear incomplete-split flag on child page if this finishes a
|
||||
* split
|
||||
*/
|
||||
if (!isLeaf)
|
||||
{
|
||||
|
@ -711,9 +711,9 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
|
|||
Buffer buffer;
|
||||
|
||||
/*
|
||||
* Restore the metapage. This is essentially the same as a full-page image,
|
||||
* so restore the metapage unconditionally without looking at the LSN, to
|
||||
* avoid torn page hazards.
|
||||
* Restore the metapage. This is essentially the same as a full-page
|
||||
* image, so restore the metapage unconditionally without looking at the
|
||||
* LSN, to avoid torn page hazards.
|
||||
*/
|
||||
metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
|
||||
if (!BufferIsValid(metabuffer))
|
||||
|
|
|
@ -387,6 +387,7 @@ gistXLogSplit(RelFileNode node, BlockNumber blkno, bool page_is_leaf,
|
|||
|
||||
for (ptr = dist; ptr; ptr = ptr->next)
|
||||
npage++;
|
||||
|
||||
/*
|
||||
* the caller should've checked this already, but doesn't hurt to check
|
||||
* again.
|
||||
|
|
|
@ -2123,8 +2123,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
|
|||
bool need_tuple_data;
|
||||
|
||||
/*
|
||||
* For logical decoding, we need the tuple even if we're doing a
|
||||
* full page write, so make sure to log it separately. (XXX We could
|
||||
* For logical decoding, we need the tuple even if we're doing a full
|
||||
* page write, so make sure to log it separately. (XXX We could
|
||||
* alternatively store a pointer into the FPW).
|
||||
*
|
||||
* Also, if this is a catalog, we need to transmit combocids to
|
||||
|
@ -2165,9 +2165,9 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
|
|||
rdata[2].next = NULL;
|
||||
|
||||
/*
|
||||
* Make a separate rdata entry for the tuple's buffer if we're
|
||||
* doing logical decoding, so that an eventual FPW doesn't
|
||||
* remove the tuple's data.
|
||||
* Make a separate rdata entry for the tuple's buffer if we're doing
|
||||
* logical decoding, so that an eventual FPW doesn't remove the
|
||||
* tuple's data.
|
||||
*/
|
||||
if (need_tuple_data)
|
||||
{
|
||||
|
@ -2487,9 +2487,9 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
|
|||
rdata[1].next = NULL;
|
||||
|
||||
/*
|
||||
* Make a separate rdata entry for the tuple's buffer if
|
||||
* we're doing logical decoding, so that an eventual FPW
|
||||
* doesn't remove the tuple's data.
|
||||
* Make a separate rdata entry for the tuple's buffer if we're
|
||||
* doing logical decoding, so that an eventual FPW doesn't remove
|
||||
* the tuple's data.
|
||||
*/
|
||||
if (need_tuple_data)
|
||||
{
|
||||
|
@ -2919,7 +2919,7 @@ l1:
|
|||
xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
|
||||
|
||||
rdata[1].next = &(rdata[2]);
|
||||
rdata[2].data = (char*)&xlhdr;
|
||||
rdata[2].data = (char *) &xlhdr;
|
||||
rdata[2].len = SizeOfHeapHeader;
|
||||
rdata[2].buffer = InvalidBuffer;
|
||||
rdata[2].next = NULL;
|
||||
|
@ -3951,8 +3951,7 @@ HeapSatisfiesHOTandKeyUpdate(Relation relation, Bitmapset *hot_attrs,
|
|||
/*
|
||||
* Since the HOT attributes are a superset of the key attributes and
|
||||
* the key attributes are a superset of the id attributes, this logic
|
||||
* is guaranteed to identify the next column that needs to be
|
||||
* checked.
|
||||
* is guaranteed to identify the next column that needs to be checked.
|
||||
*/
|
||||
if (hot_result && next_hot_attnum > FirstLowInvalidHeapAttributeNumber)
|
||||
check_now = next_hot_attnum;
|
||||
|
@ -3981,12 +3980,11 @@ HeapSatisfiesHOTandKeyUpdate(Relation relation, Bitmapset *hot_attrs,
|
|||
}
|
||||
|
||||
/*
|
||||
* Advance the next attribute numbers for the sets that contain
|
||||
* the attribute we just checked. As we work our way through the
|
||||
* columns, the next_attnum values will rise; but when each set
|
||||
* becomes empty, bms_first_member() will return -1 and the attribute
|
||||
* number will end up with a value less than
|
||||
* FirstLowInvalidHeapAttributeNumber.
|
||||
* Advance the next attribute numbers for the sets that contain the
|
||||
* attribute we just checked. As we work our way through the columns,
|
||||
* the next_attnum values will rise; but when each set becomes empty,
|
||||
* bms_first_member() will return -1 and the attribute number will end
|
||||
* up with a value less than FirstLowInvalidHeapAttributeNumber.
|
||||
*/
|
||||
if (hot_result && check_now == next_hot_attnum)
|
||||
{
|
||||
|
@ -4929,12 +4927,13 @@ l5:
|
|||
if (xmax == add_to_xmax)
|
||||
{
|
||||
/*
|
||||
* Note that it's not possible for the original tuple to be updated:
|
||||
* we wouldn't be here because the tuple would have been invisible and
|
||||
* we wouldn't try to update it. As a subtlety, this code can also
|
||||
* run when traversing an update chain to lock future versions of a
|
||||
* tuple. But we wouldn't be here either, because the add_to_xmax
|
||||
* would be different from the original updater.
|
||||
* Note that it's not possible for the original tuple to be
|
||||
* updated: we wouldn't be here because the tuple would have been
|
||||
* invisible and we wouldn't try to update it. As a subtlety,
|
||||
* this code can also run when traversing an update chain to lock
|
||||
* future versions of a tuple. But we wouldn't be here either,
|
||||
* because the add_to_xmax would be different from the original
|
||||
* updater.
|
||||
*/
|
||||
Assert(HEAP_XMAX_IS_LOCKED_ONLY(old_infomask));
|
||||
|
||||
|
@ -5026,18 +5025,18 @@ test_lockmode_for_conflict(MultiXactStatus status, TransactionId xid,
|
|||
if (TransactionIdIsCurrentTransactionId(xid))
|
||||
{
|
||||
/*
|
||||
* Updated by our own transaction? Just return failure. This shouldn't
|
||||
* normally happen.
|
||||
* Updated by our own transaction? Just return failure. This
|
||||
* shouldn't normally happen.
|
||||
*/
|
||||
return HeapTupleSelfUpdated;
|
||||
}
|
||||
else if (TransactionIdIsInProgress(xid))
|
||||
{
|
||||
/*
|
||||
* If the locking transaction is running, what we do depends on whether
|
||||
* the lock modes conflict: if they do, then we must wait for it to
|
||||
* finish; otherwise we can fall through to lock this tuple version
|
||||
* without waiting.
|
||||
* If the locking transaction is running, what we do depends on
|
||||
* whether the lock modes conflict: if they do, then we must wait for
|
||||
* it to finish; otherwise we can fall through to lock this tuple
|
||||
* version without waiting.
|
||||
*/
|
||||
if (DoLockModesConflict(LOCKMODE_from_mxstatus(status),
|
||||
LOCKMODE_from_mxstatus(wantedstatus)))
|
||||
|
@ -5046,8 +5045,8 @@ test_lockmode_for_conflict(MultiXactStatus status, TransactionId xid,
|
|||
}
|
||||
|
||||
/*
|
||||
* If we set needwait above, then this value doesn't matter; otherwise,
|
||||
* this value signals to caller that it's okay to proceed.
|
||||
* If we set needwait above, then this value doesn't matter;
|
||||
* otherwise, this value signals to caller that it's okay to proceed.
|
||||
*/
|
||||
return HeapTupleMayBeUpdated;
|
||||
}
|
||||
|
@ -5133,8 +5132,8 @@ l4:
|
|||
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
||||
|
||||
/*
|
||||
* Check the tuple XMIN against prior XMAX, if any. If we reached
|
||||
* the end of the chain, we're done, so return success.
|
||||
* Check the tuple XMIN against prior XMAX, if any. If we reached the
|
||||
* end of the chain, we're done, so return success.
|
||||
*/
|
||||
if (TransactionIdIsValid(priorXmax) &&
|
||||
!TransactionIdEquals(HeapTupleHeaderGetXmin(mytup.t_data),
|
||||
|
@ -5219,9 +5218,9 @@ l4:
|
|||
else
|
||||
{
|
||||
/*
|
||||
* LOCK_ONLY present alone (a pg_upgraded tuple
|
||||
* marked as share-locked in the old cluster) shouldn't
|
||||
* be seen in the middle of an update chain.
|
||||
* LOCK_ONLY present alone (a pg_upgraded tuple marked
|
||||
* as share-locked in the old cluster) shouldn't be
|
||||
* seen in the middle of an update chain.
|
||||
*/
|
||||
elog(ERROR, "invalid lock status in tuple");
|
||||
}
|
||||
|
@ -5801,11 +5800,11 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
|
|||
else if (flags & FRM_RETURN_IS_XID)
|
||||
{
|
||||
/*
|
||||
* NB -- some of these transformations are only valid because
|
||||
* we know the return Xid is a tuple updater (i.e. not merely a
|
||||
* NB -- some of these transformations are only valid because we
|
||||
* know the return Xid is a tuple updater (i.e. not merely a
|
||||
* locker.) Also note that the only reason we don't explicitely
|
||||
* worry about HEAP_KEYS_UPDATED is because it lives in t_infomask2
|
||||
* rather than t_infomask.
|
||||
* worry about HEAP_KEYS_UPDATED is because it lives in
|
||||
* t_infomask2 rather than t_infomask.
|
||||
*/
|
||||
frz->t_infomask &= ~HEAP_XMAX_BITS;
|
||||
frz->xmax = newxmax;
|
||||
|
@ -6674,10 +6673,10 @@ log_heap_update(Relation reln, Buffer oldbuf,
|
|||
info = XLOG_HEAP_UPDATE;
|
||||
|
||||
/*
|
||||
* If the old and new tuple are on the same page, we only need to log
|
||||
* the parts of the new tuple that were changed. That saves on the amount
|
||||
* of WAL we need to write. Currently, we just count any unchanged bytes
|
||||
* in the beginning and end of the tuple. That's quick to check, and
|
||||
* If the old and new tuple are on the same page, we only need to log the
|
||||
* parts of the new tuple that were changed. That saves on the amount of
|
||||
* WAL we need to write. Currently, we just count any unchanged bytes in
|
||||
* the beginning and end of the tuple. That's quick to check, and
|
||||
* perfectly covers the common case that only one field is updated.
|
||||
*
|
||||
* We could do this even if the old and new tuple are on different pages,
|
||||
|
@ -6688,10 +6687,10 @@ log_heap_update(Relation reln, Buffer oldbuf,
|
|||
* updates tend to create the new tuple version on the same page, there
|
||||
* isn't much to be gained by doing this across pages anyway.
|
||||
*
|
||||
* Skip this if we're taking a full-page image of the new page, as we don't
|
||||
* include the new tuple in the WAL record in that case. Also disable if
|
||||
* wal_level='logical', as logical decoding needs to be able to read the
|
||||
* new tuple in whole from the WAL record alone.
|
||||
* Skip this if we're taking a full-page image of the new page, as we
|
||||
* don't include the new tuple in the WAL record in that case. Also
|
||||
* disable if wal_level='logical', as logical decoding needs to be able to
|
||||
* read the new tuple in whole from the WAL record alone.
|
||||
*/
|
||||
if (oldbuf == newbuf && !need_tuple_data &&
|
||||
!XLogCheckBufferNeedsBackup(newbuf))
|
||||
|
@ -6707,6 +6706,7 @@ log_heap_update(Relation reln, Buffer oldbuf,
|
|||
if (newp[prefixlen] != oldp[prefixlen])
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Storing the length of the prefix takes 2 bytes, so we need to save
|
||||
* at least 3 bytes or there's no point.
|
||||
|
@ -6793,8 +6793,8 @@ log_heap_update(Relation reln, Buffer oldbuf,
|
|||
xlhdr.header.t_infomask2 = newtup->t_data->t_infomask2;
|
||||
xlhdr.header.t_infomask = newtup->t_data->t_infomask;
|
||||
xlhdr.header.t_hoff = newtup->t_data->t_hoff;
|
||||
Assert(offsetof(HeapTupleHeaderData, t_bits) + prefixlen + suffixlen <= newtup->t_len);
|
||||
xlhdr.t_len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) - prefixlen - suffixlen;
|
||||
Assert(offsetof(HeapTupleHeaderData, t_bits) +prefixlen + suffixlen <= newtup->t_len);
|
||||
xlhdr.t_len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) -prefixlen - suffixlen;
|
||||
|
||||
/*
|
||||
* As with insert records, we need not store this rdata segment if we
|
||||
|
@ -6816,7 +6816,7 @@ log_heap_update(Relation reln, Buffer oldbuf,
|
|||
if (prefixlen == 0)
|
||||
{
|
||||
rdata[nr].data = ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits);
|
||||
rdata[nr].len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) - suffixlen;
|
||||
rdata[nr].len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) -suffixlen;
|
||||
rdata[nr].buffer = need_tuple_data ? InvalidBuffer : newbufref;
|
||||
rdata[nr].buffer_std = true;
|
||||
rdata[nr].next = NULL;
|
||||
|
@ -6829,7 +6829,7 @@ log_heap_update(Relation reln, Buffer oldbuf,
|
|||
* two separate rdata entries.
|
||||
*/
|
||||
/* bitmap [+ padding] [+ oid] */
|
||||
if (newtup->t_data->t_hoff - offsetof(HeapTupleHeaderData, t_bits) > 0)
|
||||
if (newtup->t_data->t_hoff - offsetof(HeapTupleHeaderData, t_bits) >0)
|
||||
{
|
||||
rdata[nr - 1].next = &(rdata[nr]);
|
||||
rdata[nr].data = ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits);
|
||||
|
@ -6992,8 +6992,8 @@ log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
|
|||
recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata);
|
||||
|
||||
/*
|
||||
* The page may be uninitialized. If so, we can't set the LSN because
|
||||
* that would corrupt the page.
|
||||
* The page may be uninitialized. If so, we can't set the LSN because that
|
||||
* would corrupt the page.
|
||||
*/
|
||||
if (!PageIsNew(page))
|
||||
{
|
||||
|
@ -7179,8 +7179,8 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_changed, bool *
|
|||
{
|
||||
/*
|
||||
* The OID column can appear in an index definition, but that's
|
||||
* OK, becuse we always copy the OID if present (see below).
|
||||
* Other system columns may not.
|
||||
* OK, becuse we always copy the OID if present (see below). Other
|
||||
* system columns may not.
|
||||
*/
|
||||
if (attno == ObjectIdAttributeNumber)
|
||||
continue;
|
||||
|
@ -7211,6 +7211,7 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_changed, bool *
|
|||
if (HeapTupleHasExternal(key_tuple))
|
||||
{
|
||||
HeapTuple oldtup = key_tuple;
|
||||
|
||||
key_tuple = toast_flatten_tuple(oldtup, RelationGetDescr(relation));
|
||||
heap_freetuple(oldtup);
|
||||
}
|
||||
|
@ -8169,7 +8170,7 @@ newsame:;
|
|||
if (suffixlen > 0)
|
||||
memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
|
||||
|
||||
newlen = offsetof(HeapTupleHeaderData, t_bits) + xlhdr.t_len + prefixlen + suffixlen;
|
||||
newlen = offsetof(HeapTupleHeaderData, t_bits) +xlhdr.t_len + prefixlen + suffixlen;
|
||||
htup->t_infomask2 = xlhdr.header.t_infomask2;
|
||||
htup->t_infomask = xlhdr.header.t_infomask;
|
||||
htup->t_hoff = xlhdr.header.t_hoff;
|
||||
|
@ -8444,6 +8445,7 @@ heap2_redo(XLogRecPtr lsn, XLogRecord *record)
|
|||
heap_xlog_lock_updated(lsn, record);
|
||||
break;
|
||||
case XLOG_HEAP2_NEW_CID:
|
||||
|
||||
/*
|
||||
* Nothing to do on a real replay, only used during logical
|
||||
* decoding.
|
||||
|
|
|
@ -496,9 +496,10 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
|
|||
break;
|
||||
|
||||
case HEAPTUPLE_DELETE_IN_PROGRESS:
|
||||
|
||||
/*
|
||||
* This tuple may soon become DEAD. Update the hint field
|
||||
* so that the page is reconsidered for pruning in future.
|
||||
* This tuple may soon become DEAD. Update the hint field so
|
||||
* that the page is reconsidered for pruning in future.
|
||||
*/
|
||||
heap_prune_record_prunable(prstate,
|
||||
HeapTupleHeaderGetUpdateXid(htup));
|
||||
|
|
|
@ -962,14 +962,14 @@ logical_end_heap_rewrite(RewriteState state)
|
|||
return;
|
||||
|
||||
/* writeout remaining in-memory entries */
|
||||
if (state->rs_num_rewrite_mappings > 0 )
|
||||
if (state->rs_num_rewrite_mappings > 0)
|
||||
logical_heap_rewrite_flush_mappings(state);
|
||||
|
||||
/* Iterate over all mappings we have written and fsync the files. */
|
||||
hash_seq_init(&seq_status, state->rs_logical_mappings);
|
||||
while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL)
|
||||
{
|
||||
if(FileSync(src->vfd) != 0)
|
||||
if (FileSync(src->vfd) != 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not fsync file \"%s\": %m", src->path)));
|
||||
|
@ -1041,7 +1041,7 @@ logical_rewrite_log_mapping(RewriteState state, TransactionId xid,
|
|||
* Write out buffer every time we've too many in-memory entries across all
|
||||
* mapping files.
|
||||
*/
|
||||
if (state->rs_num_rewrite_mappings >= 1000 /* arbitrary number */)
|
||||
if (state->rs_num_rewrite_mappings >= 1000 /* arbitrary number */ )
|
||||
logical_heap_rewrite_flush_mappings(state);
|
||||
}
|
||||
|
||||
|
@ -1148,6 +1148,7 @@ heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r)
|
|||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not create file \"%s\": %m", path)));
|
||||
|
||||
/*
|
||||
* Truncate all data that's not guaranteed to have been safely fsynced (by
|
||||
* previous record or by the last checkpoint).
|
||||
|
@ -1174,6 +1175,7 @@ heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r)
|
|||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not write to file \"%s\": %m", path)));
|
||||
|
||||
/*
|
||||
* Now fsync all previously written data. We could improve things and only
|
||||
* do this for the last write to a file, but the required bookkeeping
|
||||
|
@ -1228,7 +1230,8 @@ CheckPointLogicalRewriteHeap(void)
|
|||
XLogRecPtr lsn;
|
||||
TransactionId rewrite_xid;
|
||||
TransactionId create_xid;
|
||||
uint32 hi, lo;
|
||||
uint32 hi,
|
||||
lo;
|
||||
|
||||
if (strcmp(mapping_de->d_name, ".") == 0 ||
|
||||
strcmp(mapping_de->d_name, "..") == 0)
|
||||
|
@ -1244,7 +1247,7 @@ CheckPointLogicalRewriteHeap(void)
|
|||
|
||||
if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT,
|
||||
&dboid, &relid, &hi, &lo, &rewrite_xid, &create_xid) != 6)
|
||||
elog(ERROR,"could not parse filename \"%s\"", mapping_de->d_name);
|
||||
elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name);
|
||||
|
||||
lsn = ((uint64) hi) << 32 | lo;
|
||||
|
||||
|
@ -1269,6 +1272,7 @@ CheckPointLogicalRewriteHeap(void)
|
|||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not open file \"%s\": %m", path)));
|
||||
|
||||
/*
|
||||
* We could try to avoid fsyncing files that either haven't
|
||||
* changed or have only been created since the checkpoint's start,
|
||||
|
|
|
@ -91,8 +91,9 @@ heap_tuple_fetch_attr(struct varlena * attr)
|
|||
* to persist a Datum for unusually long time, like in a HOLD cursor.
|
||||
*/
|
||||
struct varatt_indirect redirect;
|
||||
|
||||
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
|
||||
attr = (struct varlena *)redirect.pointer;
|
||||
attr = (struct varlena *) redirect.pointer;
|
||||
|
||||
/* nested indirect Datums aren't allowed */
|
||||
Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
|
||||
|
@ -147,8 +148,9 @@ heap_tuple_untoast_attr(struct varlena * attr)
|
|||
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
|
||||
{
|
||||
struct varatt_indirect redirect;
|
||||
|
||||
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
|
||||
attr = (struct varlena *)redirect.pointer;
|
||||
attr = (struct varlena *) redirect.pointer;
|
||||
|
||||
/* nested indirect Datums aren't allowed */
|
||||
Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
|
||||
|
@ -217,6 +219,7 @@ heap_tuple_untoast_attr_slice(struct varlena * attr,
|
|||
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
|
||||
{
|
||||
struct varatt_indirect redirect;
|
||||
|
||||
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
|
||||
|
||||
/* nested indirect Datums aren't allowed */
|
||||
|
@ -299,6 +302,7 @@ toast_raw_datum_size(Datum value)
|
|||
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
|
||||
{
|
||||
struct varatt_indirect toast_pointer;
|
||||
|
||||
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
|
||||
|
||||
/* nested indirect Datums aren't allowed */
|
||||
|
@ -354,6 +358,7 @@ toast_datum_size(Datum value)
|
|||
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
|
||||
{
|
||||
struct varatt_indirect toast_pointer;
|
||||
|
||||
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
|
||||
|
||||
/* nested indirect Datums aren't allowed */
|
||||
|
@ -2127,6 +2132,7 @@ toast_open_indexes(Relation toastrel,
|
|||
for (i = 0; i < *num_indexes; i++)
|
||||
{
|
||||
Relation toastidx = (*toastidxs)[i];
|
||||
|
||||
if (toastidx->rd_index->indisvalid)
|
||||
{
|
||||
res = i;
|
||||
|
@ -2136,14 +2142,14 @@ toast_open_indexes(Relation toastrel,
|
|||
}
|
||||
|
||||
/*
|
||||
* Free index list, not necessary anymore as relations are opened
|
||||
* and a valid index has been found.
|
||||
* Free index list, not necessary anymore as relations are opened and a
|
||||
* valid index has been found.
|
||||
*/
|
||||
list_free(indexlist);
|
||||
|
||||
/*
|
||||
* The toast relation should have one valid index, so something is
|
||||
* going wrong if there is nothing.
|
||||
* The toast relation should have one valid index, so something is going
|
||||
* wrong if there is nothing.
|
||||
*/
|
||||
if (!found)
|
||||
elog(ERROR, "no valid index found for toast relation with Oid %d",
|
||||
|
|
|
@ -620,10 +620,10 @@ _bt_findinsertloc(Relation rel,
|
|||
lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
/*
|
||||
* If this page was incompletely split, finish the split now.
|
||||
* We do this while holding a lock on the left sibling, which
|
||||
* is not good because finishing the split could be a fairly
|
||||
* lengthy operation. But this should happen very seldom.
|
||||
* If this page was incompletely split, finish the split now. We
|
||||
* do this while holding a lock on the left sibling, which is not
|
||||
* good because finishing the split could be a fairly lengthy
|
||||
* operation. But this should happen very seldom.
|
||||
*/
|
||||
if (P_INCOMPLETE_SPLIT(lpageop))
|
||||
{
|
||||
|
@ -1330,11 +1330,10 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
|
|||
lastrdata++;
|
||||
|
||||
/*
|
||||
* Although we don't need to WAL-log anything on the left page,
|
||||
* we still need XLogInsert to consider storing a full-page image
|
||||
* of the left page, so make an empty entry referencing that
|
||||
* buffer. This also ensures that the left page is always backup
|
||||
* block 1.
|
||||
* Although we don't need to WAL-log anything on the left page, we
|
||||
* still need XLogInsert to consider storing a full-page image of
|
||||
* the left page, so make an empty entry referencing that buffer.
|
||||
* This also ensures that the left page is always backup block 1.
|
||||
*/
|
||||
lastrdata->data = NULL;
|
||||
lastrdata->len = 0;
|
||||
|
|
|
@ -1049,11 +1049,12 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
|
|||
lbuf = _bt_getbuf(rel, leftsib, BT_READ);
|
||||
lpage = BufferGetPage(lbuf);
|
||||
lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage);
|
||||
|
||||
/*
|
||||
* If the left sibling was concurrently split, so that its
|
||||
* next-pointer doesn't point to the current page anymore,
|
||||
* the split that created the current page must be completed.
|
||||
* (We don't allow splitting an incompletely split page again
|
||||
* next-pointer doesn't point to the current page anymore, the
|
||||
* split that created the current page must be completed. (We
|
||||
* don't allow splitting an incompletely split page again
|
||||
* until the previous split has been completed)
|
||||
*/
|
||||
if (lopaque->btpo_next == parent &&
|
||||
|
@ -1112,6 +1113,7 @@ _bt_pagedel(Relation rel, Buffer buf)
|
|||
bool rightsib_empty;
|
||||
Page page;
|
||||
BTPageOpaque opaque;
|
||||
|
||||
/*
|
||||
* "stack" is a search stack leading (approximately) to the target page.
|
||||
* It is initially NULL, but when iterating, we keep it to avoid
|
||||
|
@ -1140,10 +1142,10 @@ _bt_pagedel(Relation rel, Buffer buf)
|
|||
* was never supposed to leave half-dead pages in the tree, it was
|
||||
* just a transient state, but it was nevertheless possible in
|
||||
* error scenarios. We don't know how to deal with them here. They
|
||||
* are harmless as far as searches are considered, but inserts into
|
||||
* the deleted keyspace could add out-of-order downlinks in the
|
||||
* upper levels. Log a notice, hopefully the admin will notice and
|
||||
* reindex.
|
||||
* are harmless as far as searches are considered, but inserts
|
||||
* into the deleted keyspace could add out-of-order downlinks in
|
||||
* the upper levels. Log a notice, hopefully the admin will notice
|
||||
* and reindex.
|
||||
*/
|
||||
if (P_ISHALFDEAD(opaque))
|
||||
ereport(LOG,
|
||||
|
@ -1156,8 +1158,8 @@ _bt_pagedel(Relation rel, Buffer buf)
|
|||
}
|
||||
|
||||
/*
|
||||
* We can never delete rightmost pages nor root pages. While at
|
||||
* it, check that page is not already deleted and is empty.
|
||||
* We can never delete rightmost pages nor root pages. While at it,
|
||||
* check that page is not already deleted and is empty.
|
||||
*
|
||||
* To keep the algorithm simple, we also never delete an incompletely
|
||||
* split page (they should be rare enough that this doesn't make any
|
||||
|
@ -1167,10 +1169,10 @@ _bt_pagedel(Relation rel, Buffer buf)
|
|||
* left half of an incomplete split, but ensuring that it's not the
|
||||
* right half is more complicated. For that, we have to check that
|
||||
* the left sibling doesn't have its INCOMPLETE_SPLIT flag set. On
|
||||
* the first iteration, we temporarily release the lock on the
|
||||
* current page, and check the left sibling and also construct a
|
||||
* search stack to. On subsequent iterations, we know we stepped right
|
||||
* from a page that passed these tests, so it's OK.
|
||||
* the first iteration, we temporarily release the lock on the current
|
||||
* page, and check the left sibling and also construct a search stack
|
||||
* to. On subsequent iterations, we know we stepped right from a page
|
||||
* that passed these tests, so it's OK.
|
||||
*/
|
||||
if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) || P_ISDELETED(opaque) ||
|
||||
P_FIRSTDATAKEY(opaque) <= PageGetMaxOffsetNumber(page) ||
|
||||
|
@ -1184,9 +1186,9 @@ _bt_pagedel(Relation rel, Buffer buf)
|
|||
}
|
||||
|
||||
/*
|
||||
* First, remove downlink pointing to the page (or a parent of the page,
|
||||
* if we are going to delete a taller branch), and mark the page as
|
||||
* half-dead.
|
||||
* First, remove downlink pointing to the page (or a parent of the
|
||||
* page, if we are going to delete a taller branch), and mark the page
|
||||
* as half-dead.
|
||||
*/
|
||||
if (!P_ISHALFDEAD(opaque))
|
||||
{
|
||||
|
@ -1219,9 +1221,9 @@ _bt_pagedel(Relation rel, Buffer buf)
|
|||
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
|
||||
|
||||
/*
|
||||
* Fetch the left sibling, to check that it's not marked
|
||||
* with INCOMPLETE_SPLIT flag. That would mean that the
|
||||
* page to-be-deleted doesn't have a downlink, and the page
|
||||
* Fetch the left sibling, to check that it's not marked with
|
||||
* INCOMPLETE_SPLIT flag. That would mean that the page
|
||||
* to-be-deleted doesn't have a downlink, and the page
|
||||
* deletion algorithm isn't prepared to handle that.
|
||||
*/
|
||||
if (!P_LEFTMOST(opaque))
|
||||
|
@ -1267,7 +1269,7 @@ _bt_pagedel(Relation rel, Buffer buf)
|
|||
|
||||
/*
|
||||
* Then unlink it from its siblings. Each call to
|
||||
*_bt_unlink_halfdead_page unlinks the topmost page from the branch,
|
||||
* _bt_unlink_halfdead_page unlinks the topmost page from the branch,
|
||||
* making it shallower. Iterate until the leaf page is gone.
|
||||
*/
|
||||
rightsib_empty = false;
|
||||
|
@ -1291,8 +1293,8 @@ _bt_pagedel(Relation rel, Buffer buf)
|
|||
* is that it was the rightmost child of the parent. Now that we
|
||||
* removed the downlink for this page, the right sibling might now be
|
||||
* the only child of the parent, and could be removed. It would be
|
||||
* picked up by the next vacuum anyway, but might as well try to remove
|
||||
* it now, so loop back to process the right sibling.
|
||||
* picked up by the next vacuum anyway, but might as well try to
|
||||
* remove it now, so loop back to process the right sibling.
|
||||
*/
|
||||
if (!rightsib_empty)
|
||||
break;
|
||||
|
@ -1605,9 +1607,9 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
|
|||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
/*
|
||||
* Check page is still empty etc, else abandon deletion. This is just
|
||||
* for paranoia's sake; a half-dead page cannot resurrect because there
|
||||
* can be only one vacuum process running at a time.
|
||||
* Check page is still empty etc, else abandon deletion. This is just for
|
||||
* paranoia's sake; a half-dead page cannot resurrect because there can be
|
||||
* only one vacuum process running at a time.
|
||||
*/
|
||||
if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) || P_ISDELETED(opaque))
|
||||
{
|
||||
|
|
|
@ -40,9 +40,9 @@ _bt_restore_page(Page page, char *from, int len)
|
|||
int nitems;
|
||||
|
||||
/*
|
||||
* To get the items back in the original order, we add them to the page
|
||||
* in reverse. To figure out where one tuple ends and another begins,
|
||||
* we have to scan them in forward order first.
|
||||
* To get the items back in the original order, we add them to the page in
|
||||
* reverse. To figure out where one tuple ends and another begins, we
|
||||
* have to scan them in forward order first.
|
||||
*/
|
||||
i = 0;
|
||||
while (from < end)
|
||||
|
@ -128,6 +128,7 @@ _bt_clear_incomplete_split(XLogRecPtr lsn, XLogRecord *record,
|
|||
if (lsn > PageGetLSN(page))
|
||||
{
|
||||
BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
Assert((pageop->btpo_flags & BTP_INCOMPLETE_SPLIT) != 0);
|
||||
pageop->btpo_flags &= ~BTP_INCOMPLETE_SPLIT;
|
||||
|
||||
|
@ -153,6 +154,7 @@ btree_xlog_insert(bool isleaf, bool ismeta,
|
|||
|
||||
datapos = (char *) xlrec + SizeOfBtreeInsert;
|
||||
datalen = record->xl_len - SizeOfBtreeInsert;
|
||||
|
||||
/*
|
||||
* if this insert finishes a split at lower level, extract the block
|
||||
* number of the (left) child.
|
||||
|
@ -172,10 +174,10 @@ btree_xlog_insert(bool isleaf, bool ismeta,
|
|||
}
|
||||
|
||||
/*
|
||||
* Insertion to an internal page finishes an incomplete split at the
|
||||
* child level. Clear the incomplete-split flag in the child. Note:
|
||||
* during normal operation, the child and parent pages are locked at the
|
||||
* same time, so that clearing the flag and inserting the downlink appear
|
||||
* Insertion to an internal page finishes an incomplete split at the child
|
||||
* level. Clear the incomplete-split flag in the child. Note: during
|
||||
* normal operation, the child and parent pages are locked at the same
|
||||
* time, so that clearing the flag and inserting the downlink appear
|
||||
* atomic to other backends. We don't bother with that during replay,
|
||||
* because readers don't care about the incomplete-split flag and there
|
||||
* cannot be updates happening.
|
||||
|
@ -279,9 +281,10 @@ btree_xlog_split(bool onleft, bool isroot,
|
|||
datapos += left_hikeysz;
|
||||
datalen -= left_hikeysz;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this insertion finishes an incomplete split, get the block number
|
||||
* of the child.
|
||||
* If this insertion finishes an incomplete split, get the block number of
|
||||
* the child.
|
||||
*/
|
||||
if (!isleaf && !(record->xl_info & XLR_BKP_BLOCK(1)))
|
||||
{
|
||||
|
@ -716,9 +719,9 @@ btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
|
|||
/*
|
||||
* If all heap tuples were LP_DEAD then we will be returning
|
||||
* InvalidTransactionId here, which avoids conflicts. This matches
|
||||
* existing logic which assumes that LP_DEAD tuples must already be
|
||||
* older than the latestRemovedXid on the cleanup record that
|
||||
* set them as LP_DEAD, hence must already have generated a conflict.
|
||||
* existing logic which assumes that LP_DEAD tuples must already be older
|
||||
* than the latestRemovedXid on the cleanup record that set them as
|
||||
* LP_DEAD, hence must already have generated a conflict.
|
||||
*/
|
||||
return latestRemovedXid;
|
||||
}
|
||||
|
|
|
@ -54,7 +54,7 @@ desc_recompress_leaf(StringInfo buf, ginxlogRecompressDataLeaf *insertData)
|
|||
walbuf += nitems * sizeof(ItemPointerData);
|
||||
}
|
||||
|
||||
switch(a_action)
|
||||
switch (a_action)
|
||||
{
|
||||
case GIN_SEGMENT_ADDITEMS:
|
||||
appendStringInfo(buf, " %d (add %d items)", a_segno, nitems);
|
||||
|
@ -129,6 +129,7 @@ gin_desc(StringInfo buf, uint8 xl_info, char *rec)
|
|||
else
|
||||
{
|
||||
ginxlogInsertDataInternal *insertData = (ginxlogInsertDataInternal *) payload;
|
||||
|
||||
appendStringInfo(buf, " pitem: %u-%u/%u",
|
||||
PostingItemGetBlockNumber(&insertData->newitem),
|
||||
ItemPointerGetBlockNumber(&insertData->newitem.key),
|
||||
|
@ -155,6 +156,7 @@ gin_desc(StringInfo buf, uint8 xl_info, char *rec)
|
|||
case XLOG_GIN_VACUUM_DATA_LEAF_PAGE:
|
||||
{
|
||||
ginxlogVacuumDataLeafPage *xlrec = (ginxlogVacuumDataLeafPage *) rec;
|
||||
|
||||
appendStringInfoString(buf, "Vacuum data leaf page, ");
|
||||
desc_node(buf, xlrec->node, xlrec->blkno);
|
||||
if (xl_info & XLR_BKP_BLOCK(0))
|
||||
|
|
|
@ -579,9 +579,9 @@ MultiXactIdSetOldestMember(void)
|
|||
* back. Which would be wrong.
|
||||
*
|
||||
* Note that a shared lock is sufficient, because it's enough to stop
|
||||
* someone from advancing nextMXact; and nobody else could be trying to
|
||||
* write to our OldestMember entry, only reading (and we assume storing
|
||||
* it is atomic.)
|
||||
* someone from advancing nextMXact; and nobody else could be trying
|
||||
* to write to our OldestMember entry, only reading (and we assume
|
||||
* storing it is atomic.)
|
||||
*/
|
||||
LWLockAcquire(MultiXactGenLock, LW_SHARED);
|
||||
|
||||
|
@ -2399,8 +2399,8 @@ SlruScanDirCbRemoveMembers(SlruCtl ctl, char *filename, int segpage,
|
|||
return false; /* easy case out */
|
||||
|
||||
/*
|
||||
* To ensure that no segment is spuriously removed, we must keep track
|
||||
* of new segments added since the start of the directory scan; to do this,
|
||||
* To ensure that no segment is spuriously removed, we must keep track of
|
||||
* new segments added since the start of the directory scan; to do this,
|
||||
* we update our end-of-range point as we run.
|
||||
*
|
||||
* As an optimization, we can skip looking at shared memory if we know for
|
||||
|
|
|
@ -487,8 +487,8 @@ AssignTransactionId(TransactionState s)
|
|||
|
||||
/*
|
||||
* When wal_level=logical, guarantee that a subtransaction's xid can only
|
||||
* be seen in the WAL stream if its toplevel xid has been logged
|
||||
* before. If necessary we log a xact_assignment record with fewer than
|
||||
* be seen in the WAL stream if its toplevel xid has been logged before.
|
||||
* If necessary we log a xact_assignment record with fewer than
|
||||
* PGPROC_MAX_CACHED_SUBXIDS. Note that it is fine if didLogXid isn't set
|
||||
* for a transaction even though it appears in a WAL record, we just might
|
||||
* superfluously log something. That can happen when an xid is included
|
||||
|
|
|
@ -418,11 +418,11 @@ typedef struct XLogCtlInsert
|
|||
slock_t insertpos_lck; /* protects CurrBytePos and PrevBytePos */
|
||||
|
||||
/*
|
||||
* CurrBytePos is the end of reserved WAL. The next record will be inserted
|
||||
* at that position. PrevBytePos is the start position of the previously
|
||||
* inserted (or rather, reserved) record - it is copied to the prev-link
|
||||
* of the next record. These are stored as "usable byte positions" rather
|
||||
* than XLogRecPtrs (see XLogBytePosToRecPtr()).
|
||||
* CurrBytePos is the end of reserved WAL. The next record will be
|
||||
* inserted at that position. PrevBytePos is the start position of the
|
||||
* previously inserted (or rather, reserved) record - it is copied to the
|
||||
* prev-link of the next record. These are stored as "usable byte
|
||||
* positions" rather than XLogRecPtrs (see XLogBytePosToRecPtr()).
|
||||
*/
|
||||
uint64 CurrBytePos;
|
||||
uint64 PrevBytePos;
|
||||
|
@ -504,10 +504,11 @@ typedef struct XLogCtlData
|
|||
* Latest initialized page in the cache (last byte position + 1).
|
||||
*
|
||||
* To change the identity of a buffer (and InitializedUpTo), you need to
|
||||
* hold WALBufMappingLock. To change the identity of a buffer that's still
|
||||
* dirty, the old page needs to be written out first, and for that you
|
||||
* need WALWriteLock, and you need to ensure that there are no in-progress
|
||||
* insertions to the page by calling WaitXLogInsertionsToFinish().
|
||||
* hold WALBufMappingLock. To change the identity of a buffer that's
|
||||
* still dirty, the old page needs to be written out first, and for that
|
||||
* you need WALWriteLock, and you need to ensure that there are no
|
||||
* in-progress insertions to the page by calling
|
||||
* WaitXLogInsertionsToFinish().
|
||||
*/
|
||||
XLogRecPtr InitializedUpTo;
|
||||
|
||||
|
@ -860,6 +861,7 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
|
|||
if (rechdr == NULL)
|
||||
{
|
||||
static char rechdrbuf[SizeOfXLogRecord + MAXIMUM_ALIGNOF];
|
||||
|
||||
rechdr = (XLogRecord *) MAXALIGN(&rechdrbuf);
|
||||
MemSet(rechdr, 0, SizeOfXLogRecord);
|
||||
}
|
||||
|
@ -1232,6 +1234,7 @@ begin:;
|
|||
{
|
||||
TRACE_POSTGRESQL_XLOG_SWITCH();
|
||||
XLogFlush(EndPos);
|
||||
|
||||
/*
|
||||
* Even though we reserved the rest of the segment for us, which is
|
||||
* reflected in EndPos, we return a pointer to just the end of the
|
||||
|
@ -1272,7 +1275,7 @@ begin:;
|
|||
rdt_lastnormal->next = NULL;
|
||||
|
||||
initStringInfo(&recordbuf);
|
||||
for (;rdata != NULL; rdata = rdata->next)
|
||||
for (; rdata != NULL; rdata = rdata->next)
|
||||
appendBinaryStringInfo(&recordbuf, rdata->data, rdata->len);
|
||||
|
||||
appendStringInfoString(&buf, " - ");
|
||||
|
@ -1514,8 +1517,8 @@ CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata,
|
|||
|
||||
/*
|
||||
* If this was an xlog-switch, it's not enough to write the switch record,
|
||||
* we also have to consume all the remaining space in the WAL segment.
|
||||
* We have already reserved it for us, but we still need to make sure it's
|
||||
* we also have to consume all the remaining space in the WAL segment. We
|
||||
* have already reserved it for us, but we still need to make sure it's
|
||||
* allocated and zeroed in the WAL buffers so that when the caller (or
|
||||
* someone else) does XLogWrite(), it can really write out all the zeros.
|
||||
*/
|
||||
|
@ -1556,14 +1559,14 @@ WALInsertLockAcquire(void)
|
|||
|
||||
/*
|
||||
* It doesn't matter which of the WAL insertion locks we acquire, so try
|
||||
* the one we used last time. If the system isn't particularly busy,
|
||||
* it's a good bet that it's still available, and it's good to have some
|
||||
* the one we used last time. If the system isn't particularly busy, it's
|
||||
* a good bet that it's still available, and it's good to have some
|
||||
* affinity to a particular lock so that you don't unnecessarily bounce
|
||||
* cache lines between processes when there's no contention.
|
||||
*
|
||||
* If this is the first time through in this backend, pick a lock
|
||||
* (semi-)randomly. This allows the locks to be used evenly if you have
|
||||
* a lot of very short connections.
|
||||
* (semi-)randomly. This allows the locks to be used evenly if you have a
|
||||
* lot of very short connections.
|
||||
*/
|
||||
static int lockToTry = -1;
|
||||
|
||||
|
@ -1583,10 +1586,10 @@ WALInsertLockAcquire(void)
|
|||
/*
|
||||
* If we couldn't get the lock immediately, try another lock next
|
||||
* time. On a system with more insertion locks than concurrent
|
||||
* inserters, this causes all the inserters to eventually migrate
|
||||
* to a lock that no-one else is using. On a system with more
|
||||
* inserters than locks, it still helps to distribute the inserters
|
||||
* evenly across the locks.
|
||||
* inserters, this causes all the inserters to eventually migrate to a
|
||||
* lock that no-one else is using. On a system with more inserters
|
||||
* than locks, it still helps to distribute the inserters evenly
|
||||
* across the locks.
|
||||
*/
|
||||
lockToTry = (lockToTry + 1) % num_xloginsert_locks;
|
||||
}
|
||||
|
@ -1604,8 +1607,8 @@ WALInsertLockAcquireExclusive(void)
|
|||
/*
|
||||
* When holding all the locks, we only update the last lock's insertingAt
|
||||
* indicator. The others are set to 0xFFFFFFFFFFFFFFFF, which is higher
|
||||
* than any real XLogRecPtr value, to make sure that no-one blocks
|
||||
* waiting on those.
|
||||
* than any real XLogRecPtr value, to make sure that no-one blocks waiting
|
||||
* on those.
|
||||
*/
|
||||
for (i = 0; i < num_xloginsert_locks - 1; i++)
|
||||
{
|
||||
|
@ -1716,15 +1719,16 @@ WaitXLogInsertionsToFinish(XLogRecPtr upto)
|
|||
* Loop through all the locks, sleeping on any in-progress insert older
|
||||
* than 'upto'.
|
||||
*
|
||||
* finishedUpto is our return value, indicating the point upto which
|
||||
* all the WAL insertions have been finished. Initialize it to the head
|
||||
* of reserved WAL, and as we iterate through the insertion locks, back it
|
||||
* finishedUpto is our return value, indicating the point upto which all
|
||||
* the WAL insertions have been finished. Initialize it to the head of
|
||||
* reserved WAL, and as we iterate through the insertion locks, back it
|
||||
* out for any insertion that's still in progress.
|
||||
*/
|
||||
finishedUpto = reservedUpto;
|
||||
for (i = 0; i < num_xloginsert_locks; i++)
|
||||
{
|
||||
XLogRecPtr insertingat = InvalidXLogRecPtr;
|
||||
|
||||
do
|
||||
{
|
||||
/*
|
||||
|
@ -1797,9 +1801,9 @@ GetXLogBuffer(XLogRecPtr ptr)
|
|||
}
|
||||
|
||||
/*
|
||||
* The XLog buffer cache is organized so that a page is always loaded
|
||||
* to a particular buffer. That way we can easily calculate the buffer
|
||||
* a given page must be loaded into, from the XLogRecPtr alone.
|
||||
* The XLog buffer cache is organized so that a page is always loaded to a
|
||||
* particular buffer. That way we can easily calculate the buffer a given
|
||||
* page must be loaded into, from the XLogRecPtr alone.
|
||||
*/
|
||||
idx = XLogRecPtrToBufIdx(ptr);
|
||||
|
||||
|
@ -1827,8 +1831,8 @@ GetXLogBuffer(XLogRecPtr ptr)
|
|||
if (expectedEndPtr != endptr)
|
||||
{
|
||||
/*
|
||||
* Let others know that we're finished inserting the record up
|
||||
* to the page boundary.
|
||||
* Let others know that we're finished inserting the record up to the
|
||||
* page boundary.
|
||||
*/
|
||||
WALInsertLockUpdateInsertingAt(expectedEndPtr - XLOG_BLCKSZ);
|
||||
|
||||
|
@ -1837,7 +1841,7 @@ GetXLogBuffer(XLogRecPtr ptr)
|
|||
|
||||
if (expectedEndPtr != endptr)
|
||||
elog(PANIC, "could not find WAL buffer for %X/%X",
|
||||
(uint32) (ptr >> 32) , (uint32) ptr);
|
||||
(uint32) (ptr >> 32), (uint32) ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -2170,8 +2174,8 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic)
|
|||
}
|
||||
|
||||
/*
|
||||
* Now the next buffer slot is free and we can set it up to be the next
|
||||
* output page.
|
||||
* Now the next buffer slot is free and we can set it up to be the
|
||||
* next output page.
|
||||
*/
|
||||
NewPageBeginPtr = XLogCtl->InitializedUpTo;
|
||||
NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ;
|
||||
|
@ -2194,6 +2198,7 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic)
|
|||
/* NewPage->xlp_info = 0; */ /* done by memset */
|
||||
NewPage ->xlp_tli = ThisTimeLineID;
|
||||
NewPage ->xlp_pageaddr = NewPageBeginPtr;
|
||||
|
||||
/* NewPage->xlp_rem_len = 0; */ /* done by memset */
|
||||
|
||||
/*
|
||||
|
@ -2202,12 +2207,12 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic)
|
|||
* blocks. This allows the WAL archiver to know whether it is safe to
|
||||
* compress archived WAL data by transforming full-block records into
|
||||
* the non-full-block format. It is sufficient to record this at the
|
||||
* page level because we force a page switch (in fact a segment switch)
|
||||
* when starting a backup, so the flag will be off before any records
|
||||
* can be written during the backup. At the end of a backup, the last
|
||||
* page will be marked as all unsafe when perhaps only part is unsafe,
|
||||
* but at worst the archiver would miss the opportunity to compress a
|
||||
* few records.
|
||||
* page level because we force a page switch (in fact a segment
|
||||
* switch) when starting a backup, so the flag will be off before any
|
||||
* records can be written during the backup. At the end of a backup,
|
||||
* the last page will be marked as all unsafe when perhaps only part
|
||||
* is unsafe, but at worst the archiver would miss the opportunity to
|
||||
* compress a few records.
|
||||
*/
|
||||
if (!Insert->forcePageWrites)
|
||||
NewPage ->xlp_info |= XLP_BKP_REMOVABLE;
|
||||
|
@ -2330,6 +2335,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
|
|||
* last page that's been initialized by AdvanceXLInsertBuffer.
|
||||
*/
|
||||
XLogRecPtr EndPtr = XLogCtl->xlblocks[curridx];
|
||||
|
||||
if (LogwrtResult.Write >= EndPtr)
|
||||
elog(PANIC, "xlog write request %X/%X is past end of log %X/%X",
|
||||
(uint32) (LogwrtResult.Write >> 32),
|
||||
|
@ -2617,6 +2623,7 @@ XLogGetReplicationSlotMinimumLSN(void)
|
|||
/* use volatile pointer to prevent code rearrangement */
|
||||
volatile XLogCtlData *xlogctl = XLogCtl;
|
||||
XLogRecPtr retval;
|
||||
|
||||
SpinLockAcquire(&xlogctl->info_lck);
|
||||
retval = xlogctl->replicationSlotMinLSN;
|
||||
SpinLockRelease(&xlogctl->info_lck);
|
||||
|
@ -3828,6 +3835,7 @@ RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr endptr)
|
|||
xlde->d_name)));
|
||||
|
||||
#ifdef WIN32
|
||||
|
||||
/*
|
||||
* On Windows, if another process (e.g another backend)
|
||||
* holds the file open in FILE_SHARE_DELETE mode, unlink
|
||||
|
@ -4815,7 +4823,7 @@ XLOGShmemInit(void)
|
|||
|
||||
/* WAL insertion locks. Ensure they're aligned to the full padded size */
|
||||
allocptr += sizeof(WALInsertLockPadded) -
|
||||
((uintptr_t) allocptr) % sizeof(WALInsertLockPadded);
|
||||
((uintptr_t) allocptr) %sizeof(WALInsertLockPadded);
|
||||
WALInsertLocks = XLogCtl->Insert.WALInsertLocks =
|
||||
(WALInsertLockPadded *) allocptr;
|
||||
allocptr += sizeof(WALInsertLockPadded) * num_xloginsert_locks;
|
||||
|
@ -4836,8 +4844,8 @@ XLOGShmemInit(void)
|
|||
|
||||
/*
|
||||
* Align the start of the page buffers to a full xlog block size boundary.
|
||||
* This simplifies some calculations in XLOG insertion. It is also required
|
||||
* for O_DIRECT.
|
||||
* This simplifies some calculations in XLOG insertion. It is also
|
||||
* required for O_DIRECT.
|
||||
*/
|
||||
allocptr = (char *) TYPEALIGN(XLOG_BLCKSZ, allocptr);
|
||||
XLogCtl->pages = allocptr;
|
||||
|
@ -5464,8 +5472,8 @@ recoveryStopsBefore(XLogRecord *record)
|
|||
*
|
||||
* when testing for an xid, we MUST test for equality only, since
|
||||
* transactions are numbered in the order they start, not the order
|
||||
* they complete. A higher numbered xid will complete before you
|
||||
* about 50% of the time...
|
||||
* they complete. A higher numbered xid will complete before you about
|
||||
* 50% of the time...
|
||||
*/
|
||||
stopsHere = (record->xl_xid == recoveryTargetXid);
|
||||
}
|
||||
|
@ -5525,8 +5533,8 @@ recoveryStopsAfter(XLogRecord *record)
|
|||
record_info = record->xl_info & ~XLR_INFO_MASK;
|
||||
|
||||
/*
|
||||
* There can be many restore points that share the same name; we stop
|
||||
* at the first one.
|
||||
* There can be many restore points that share the same name; we stop at
|
||||
* the first one.
|
||||
*/
|
||||
if (recoveryTarget == RECOVERY_TARGET_NAME &&
|
||||
record->xl_rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT)
|
||||
|
@ -5688,10 +5696,10 @@ recoveryApplyDelay(XLogRecord *record)
|
|||
/*
|
||||
* Is it a COMMIT record?
|
||||
*
|
||||
* We deliberately choose not to delay aborts since they have no effect
|
||||
* on MVCC. We already allow replay of records that don't have a
|
||||
* timestamp, so there is already opportunity for issues caused by early
|
||||
* conflicts on standbys.
|
||||
* We deliberately choose not to delay aborts since they have no effect on
|
||||
* MVCC. We already allow replay of records that don't have a timestamp,
|
||||
* so there is already opportunity for issues caused by early conflicts on
|
||||
* standbys.
|
||||
*/
|
||||
record_info = record->xl_info & ~XLR_INFO_MASK;
|
||||
if (!(record->xl_rmid == RM_XACT_ID &&
|
||||
|
@ -5711,7 +5719,7 @@ recoveryApplyDelay(XLogRecord *record)
|
|||
*/
|
||||
TimestampDifference(GetCurrentTimestamp(), recoveryDelayUntilTime,
|
||||
&secs, µsecs);
|
||||
if (secs <= 0 && microsecs <=0)
|
||||
if (secs <= 0 && microsecs <= 0)
|
||||
return false;
|
||||
|
||||
while (true)
|
||||
|
@ -5731,7 +5739,7 @@ recoveryApplyDelay(XLogRecord *record)
|
|||
TimestampDifference(GetCurrentTimestamp(), recoveryDelayUntilTime,
|
||||
&secs, µsecs);
|
||||
|
||||
if (secs <= 0 && microsecs <=0)
|
||||
if (secs <= 0 && microsecs <= 0)
|
||||
break;
|
||||
|
||||
elog(DEBUG2, "recovery apply delay %ld seconds, %d milliseconds",
|
||||
|
@ -6261,9 +6269,9 @@ StartupXLOG(void)
|
|||
StartupReorderBuffer();
|
||||
|
||||
/*
|
||||
* Startup MultiXact. We need to do this early for two reasons: one
|
||||
* is that we might try to access multixacts when we do tuple freezing,
|
||||
* and the other is we need its state initialized because we attempt
|
||||
* Startup MultiXact. We need to do this early for two reasons: one is
|
||||
* that we might try to access multixacts when we do tuple freezing, and
|
||||
* the other is we need its state initialized because we attempt
|
||||
* truncation during restartpoints.
|
||||
*/
|
||||
StartupMultiXact();
|
||||
|
@ -6517,9 +6525,9 @@ StartupXLOG(void)
|
|||
}
|
||||
|
||||
/*
|
||||
* Initialize shared variables for tracking progress of WAL replay,
|
||||
* as if we had just replayed the record before the REDO location
|
||||
* (or the checkpoint record itself, if it's a shutdown checkpoint).
|
||||
* Initialize shared variables for tracking progress of WAL replay, as
|
||||
* if we had just replayed the record before the REDO location (or the
|
||||
* checkpoint record itself, if it's a shutdown checkpoint).
|
||||
*/
|
||||
SpinLockAcquire(&xlogctl->info_lck);
|
||||
if (checkPoint.redo < RecPtr)
|
||||
|
@ -6646,17 +6654,17 @@ StartupXLOG(void)
|
|||
}
|
||||
|
||||
/*
|
||||
* If we've been asked to lag the master, wait on
|
||||
* latch until enough time has passed.
|
||||
* If we've been asked to lag the master, wait on latch until
|
||||
* enough time has passed.
|
||||
*/
|
||||
if (recoveryApplyDelay(record))
|
||||
{
|
||||
/*
|
||||
* We test for paused recovery again here. If
|
||||
* user sets delayed apply, it may be because
|
||||
* they expect to pause recovery in case of
|
||||
* problems, so we must test again here otherwise
|
||||
* pausing during the delay-wait wouldn't work.
|
||||
* We test for paused recovery again here. If user sets
|
||||
* delayed apply, it may be because they expect to pause
|
||||
* recovery in case of problems, so we must test again
|
||||
* here otherwise pausing during the delay-wait wouldn't
|
||||
* work.
|
||||
*/
|
||||
if (xlogctl->recoveryPause)
|
||||
recoveryPausesHere();
|
||||
|
@ -6996,9 +7004,9 @@ StartupXLOG(void)
|
|||
else
|
||||
{
|
||||
/*
|
||||
* There is no partial block to copy. Just set InitializedUpTo,
|
||||
* and let the first attempt to insert a log record to initialize
|
||||
* the next buffer.
|
||||
* There is no partial block to copy. Just set InitializedUpTo, and
|
||||
* let the first attempt to insert a log record to initialize the next
|
||||
* buffer.
|
||||
*/
|
||||
XLogCtl->InitializedUpTo = EndOfLog;
|
||||
}
|
||||
|
@ -7335,6 +7343,7 @@ RecoveryInProgress(void)
|
|||
pg_memory_barrier();
|
||||
InitXLOGAccess();
|
||||
}
|
||||
|
||||
/*
|
||||
* Note: We don't need a memory barrier when we're still in recovery.
|
||||
* We might exit recovery immediately after return, so the caller
|
||||
|
@ -8131,9 +8140,8 @@ CreateCheckPoint(int flags)
|
|||
* fuzzy: it is possible that we will wait for xacts we didn't really need
|
||||
* to wait for. But the delay should be short and it seems better to make
|
||||
* checkpoint take a bit longer than to hold off insertions longer than
|
||||
* necessary.
|
||||
* (In fact, the whole reason we have this issue is that xact.c does
|
||||
* commit record XLOG insertion and clog update as two separate steps
|
||||
* necessary. (In fact, the whole reason we have this issue is that xact.c
|
||||
* does commit record XLOG insertion and clog update as two separate steps
|
||||
* protected by different locks, but again that seems best on grounds of
|
||||
* minimizing lock contention.)
|
||||
*
|
||||
|
@ -8600,11 +8608,11 @@ CreateRestartPoint(int flags)
|
|||
_logSegNo--;
|
||||
|
||||
/*
|
||||
* Try to recycle segments on a useful timeline. If we've been promoted
|
||||
* since the beginning of this restartpoint, use the new timeline
|
||||
* chosen at end of recovery (RecoveryInProgress() sets ThisTimeLineID
|
||||
* in that case). If we're still in recovery, use the timeline we're
|
||||
* currently replaying.
|
||||
* Try to recycle segments on a useful timeline. If we've been
|
||||
* promoted since the beginning of this restartpoint, use the new
|
||||
* timeline chosen at end of recovery (RecoveryInProgress() sets
|
||||
* ThisTimeLineID in that case). If we're still in recovery, use the
|
||||
* timeline we're currently replaying.
|
||||
*
|
||||
* There is no guarantee that the WAL segments will be useful on the
|
||||
* current timeline; if recovery proceeds to a new timeline right
|
||||
|
@ -8859,8 +8867,9 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
|
|||
* lsn updates. We assume pd_lower/upper cannot be changed without an
|
||||
* exclusive lock, so the contents bkp are not racy.
|
||||
*
|
||||
* With buffer_std set to false, XLogCheckBuffer() sets hole_length and
|
||||
* hole_offset to 0; so the following code is safe for either case.
|
||||
* With buffer_std set to false, XLogCheckBuffer() sets hole_length
|
||||
* and hole_offset to 0; so the following code is safe for either
|
||||
* case.
|
||||
*/
|
||||
memcpy(copied_buffer, origdata, bkpb.hole_offset);
|
||||
memcpy(copied_buffer + bkpb.hole_offset,
|
||||
|
@ -9262,10 +9271,10 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
|
|||
BkpBlock bkpb;
|
||||
|
||||
/*
|
||||
* Full-page image (FPI) records contain a backup block stored "inline"
|
||||
* in the normal data since the locking when writing hint records isn't
|
||||
* sufficient to use the normal backup block mechanism, which assumes
|
||||
* exclusive lock on the buffer supplied.
|
||||
* Full-page image (FPI) records contain a backup block stored
|
||||
* "inline" in the normal data since the locking when writing hint
|
||||
* records isn't sufficient to use the normal backup block mechanism,
|
||||
* which assumes exclusive lock on the buffer supplied.
|
||||
*
|
||||
* Since the only change in these backup block are hint bits, there
|
||||
* are no recovery conflicts generated.
|
||||
|
|
|
@ -1256,10 +1256,10 @@ index_constraint_create(Relation heapRelation,
|
|||
/*
|
||||
* If needed, mark the index as primary and/or deferred in pg_index.
|
||||
*
|
||||
* Note: When making an existing index into a constraint, caller must
|
||||
* have a table lock that prevents concurrent table updates; otherwise,
|
||||
* there is a risk that concurrent readers of the table will miss seeing
|
||||
* this index at all.
|
||||
* Note: When making an existing index into a constraint, caller must have
|
||||
* a table lock that prevents concurrent table updates; otherwise, there
|
||||
* is a risk that concurrent readers of the table will miss seeing this
|
||||
* index at all.
|
||||
*/
|
||||
if (update_pgindex && (mark_as_primary || deferrable))
|
||||
{
|
||||
|
@ -1443,10 +1443,10 @@ index_drop(Oid indexId, bool concurrent)
|
|||
/*
|
||||
* Now we must wait until no running transaction could be using the
|
||||
* index for a query. Use AccessExclusiveLock here to check for
|
||||
* running transactions that hold locks of any kind on the table.
|
||||
* Note we do not need to worry about xacts that open the table for
|
||||
* reading after this point; they will see the index as invalid when
|
||||
* they open the relation.
|
||||
* running transactions that hold locks of any kind on the table. Note
|
||||
* we do not need to worry about xacts that open the table for reading
|
||||
* after this point; they will see the index as invalid when they open
|
||||
* the relation.
|
||||
*
|
||||
* Note: the reason we use actual lock acquisition here, rather than
|
||||
* just checking the ProcArray and sleeping, is that deadlock is
|
||||
|
|
|
@ -344,7 +344,7 @@ smgrDoPendingDeletes(bool isCommit)
|
|||
if (maxrels == 0)
|
||||
{
|
||||
maxrels = 8;
|
||||
srels = palloc(sizeof(SMgrRelation) * maxrels );
|
||||
srels = palloc(sizeof(SMgrRelation) * maxrels);
|
||||
}
|
||||
else if (maxrels <= nrels)
|
||||
{
|
||||
|
|
|
@ -177,8 +177,8 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
|
|||
return false;
|
||||
|
||||
/*
|
||||
* If requested check lockmode is sufficient. This is a cross check
|
||||
* in case of errors or conflicting decisions in earlier code.
|
||||
* If requested check lockmode is sufficient. This is a cross check in
|
||||
* case of errors or conflicting decisions in earlier code.
|
||||
*/
|
||||
if (check && lockmode != AccessExclusiveLock)
|
||||
elog(ERROR, "AccessExclusiveLock required to add toast table.");
|
||||
|
|
|
@ -696,11 +696,11 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, bool forcetemp,
|
|||
*
|
||||
* If the relation doesn't have a TOAST table already, we can't need one
|
||||
* for the new relation. The other way around is possible though: if some
|
||||
* wide columns have been dropped, NewHeapCreateToastTable can decide
|
||||
* that no TOAST table is needed for the new table.
|
||||
* wide columns have been dropped, NewHeapCreateToastTable can decide that
|
||||
* no TOAST table is needed for the new table.
|
||||
*
|
||||
* Note that NewHeapCreateToastTable ends with CommandCounterIncrement,
|
||||
* so that the TOAST table will be visible for insertion.
|
||||
* Note that NewHeapCreateToastTable ends with CommandCounterIncrement, so
|
||||
* that the TOAST table will be visible for insertion.
|
||||
*/
|
||||
toastid = OldHeap->rd_rel->reltoastrelid;
|
||||
if (OidIsValid(toastid))
|
||||
|
@ -1404,7 +1404,8 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
|
|||
relform1->relkind == RELKIND_TOASTVALUE &&
|
||||
relform2->relkind == RELKIND_TOASTVALUE)
|
||||
{
|
||||
Oid toastIndex1, toastIndex2;
|
||||
Oid toastIndex1,
|
||||
toastIndex2;
|
||||
|
||||
/* Get valid index for each relation */
|
||||
toastIndex1 = toast_get_valid_index(r1,
|
||||
|
@ -1511,11 +1512,11 @@ finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
|
|||
* swap_relation_files()), thus relfrozenxid was not updated. That's
|
||||
* annoying because a potential reason for doing a VACUUM FULL is a
|
||||
* imminent or actual anti-wraparound shutdown. So, now that we can
|
||||
* access the new relation using it's indices, update
|
||||
* relfrozenxid. pg_class doesn't have a toast relation, so we don't need
|
||||
* to update the corresponding toast relation. Not that there's little
|
||||
* point moving all relfrozenxid updates here since swap_relation_files()
|
||||
* needs to write to pg_class for non-mapped relations anyway.
|
||||
* access the new relation using it's indices, update relfrozenxid.
|
||||
* pg_class doesn't have a toast relation, so we don't need to update the
|
||||
* corresponding toast relation. Not that there's little point moving all
|
||||
* relfrozenxid updates here since swap_relation_files() needs to write to
|
||||
* pg_class for non-mapped relations anyway.
|
||||
*/
|
||||
if (OIDOldHeap == RelationRelationId)
|
||||
{
|
||||
|
|
|
@ -2248,8 +2248,8 @@ CopyFrom(CopyState cstate)
|
|||
{
|
||||
/*
|
||||
* Reset the per-tuple exprcontext. We can only do this if the
|
||||
* tuple buffer is empty. (Calling the context the per-tuple memory
|
||||
* context is a bit of a misnomer now.)
|
||||
* tuple buffer is empty. (Calling the context the per-tuple
|
||||
* memory context is a bit of a misnomer now.)
|
||||
*/
|
||||
ResetPerTupleExprContext(estate);
|
||||
}
|
||||
|
@ -2569,19 +2569,20 @@ BeginCopyFrom(Relation rel,
|
|||
num_defaults++;
|
||||
|
||||
/*
|
||||
* If a default expression looks at the table being loaded, then
|
||||
* it could give the wrong answer when using multi-insert. Since
|
||||
* database access can be dynamic this is hard to test for
|
||||
* exactly, so we use the much wider test of whether the
|
||||
* default expression is volatile. We allow for the special case
|
||||
* of when the default expression is the nextval() of a sequence
|
||||
* which in this specific case is known to be safe for use with
|
||||
* the multi-insert optimisation. Hence we use this special case
|
||||
* function checker rather than the standard check for
|
||||
* If a default expression looks at the table being loaded,
|
||||
* then it could give the wrong answer when using
|
||||
* multi-insert. Since database access can be dynamic this is
|
||||
* hard to test for exactly, so we use the much wider test of
|
||||
* whether the default expression is volatile. We allow for
|
||||
* the special case of when the default expression is the
|
||||
* nextval() of a sequence which in this specific case is
|
||||
* known to be safe for use with the multi-insert
|
||||
* optimisation. Hence we use this special case function
|
||||
* checker rather than the standard check for
|
||||
* contain_volatile_functions().
|
||||
*/
|
||||
if (!volatile_defexprs)
|
||||
volatile_defexprs = contain_volatile_functions_not_nextval((Node *)defexpr);
|
||||
volatile_defexprs = contain_volatile_functions_not_nextval((Node *) defexpr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2861,7 +2862,7 @@ NextCopyFrom(CopyState cstate, ExprContext *econtext,
|
|||
|
||||
if (cstate->csv_mode)
|
||||
{
|
||||
if(string == NULL &&
|
||||
if (string == NULL &&
|
||||
cstate->force_notnull_flags[m])
|
||||
{
|
||||
/*
|
||||
|
@ -2870,14 +2871,14 @@ NextCopyFrom(CopyState cstate, ExprContext *econtext,
|
|||
*/
|
||||
string = cstate->null_print;
|
||||
}
|
||||
else if(string != NULL && cstate->force_null_flags[m]
|
||||
&& strcmp(string,cstate->null_print) == 0 )
|
||||
else if (string != NULL && cstate->force_null_flags[m]
|
||||
&& strcmp(string, cstate->null_print) == 0)
|
||||
{
|
||||
/*
|
||||
* FORCE_NULL option is set and column matches the NULL string.
|
||||
* It must have been quoted, or otherwise the string would already
|
||||
* have been set to NULL.
|
||||
* Convert it to NULL as specified.
|
||||
* FORCE_NULL option is set and column matches the NULL
|
||||
* string. It must have been quoted, or otherwise the
|
||||
* string would already have been set to NULL. Convert it
|
||||
* to NULL as specified.
|
||||
*/
|
||||
string = NULL;
|
||||
}
|
||||
|
|
|
@ -359,8 +359,8 @@ intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
|
|||
|
||||
/*
|
||||
* If necessary, create a TOAST table for the target table. Note that
|
||||
* NewRelationCreateToastTable ends with CommandCounterIncrement(), so that
|
||||
* the TOAST table will be visible for insertion.
|
||||
* NewRelationCreateToastTable ends with CommandCounterIncrement(), so
|
||||
* that the TOAST table will be visible for insertion.
|
||||
*/
|
||||
CommandCounterIncrement();
|
||||
|
||||
|
|
|
@ -751,7 +751,8 @@ dropdb(const char *dbname, bool missing_ok)
|
|||
HeapTuple tup;
|
||||
int notherbackends;
|
||||
int npreparedxacts;
|
||||
int nslots, nslots_active;
|
||||
int nslots,
|
||||
nslots_active;
|
||||
|
||||
/*
|
||||
* Look up the target database's OID, and get exclusive lock on it. We
|
||||
|
|
|
@ -321,7 +321,8 @@ ExplainOneQuery(Query *query, IntoClause *into, ExplainState *es,
|
|||
else
|
||||
{
|
||||
PlannedStmt *plan;
|
||||
instr_time planstart, planduration;
|
||||
instr_time planstart,
|
||||
planduration;
|
||||
|
||||
INSTR_TIME_SET_CURRENT(planstart);
|
||||
|
||||
|
|
|
@ -349,11 +349,11 @@ DefineIndex(Oid relationId,
|
|||
* index build; but for concurrent builds we allow INSERT/UPDATE/DELETE
|
||||
* (but not VACUUM).
|
||||
*
|
||||
* NB: Caller is responsible for making sure that relationId refers
|
||||
* to the relation on which the index should be built; except in bootstrap
|
||||
* mode, this will typically require the caller to have already locked
|
||||
* the relation. To avoid lock upgrade hazards, that lock should be at
|
||||
* least as strong as the one we take here.
|
||||
* NB: Caller is responsible for making sure that relationId refers to the
|
||||
* relation on which the index should be built; except in bootstrap mode,
|
||||
* this will typically require the caller to have already locked the
|
||||
* relation. To avoid lock upgrade hazards, that lock should be at least
|
||||
* as strong as the one we take here.
|
||||
*/
|
||||
lockmode = stmt->concurrent ? ShareUpdateExclusiveLock : ShareLock;
|
||||
rel = heap_open(relationId, lockmode);
|
||||
|
|
|
@ -240,9 +240,9 @@ ExecRefreshMatView(RefreshMatViewStmt *stmt, const char *queryString,
|
|||
owner = matviewRel->rd_rel->relowner;
|
||||
|
||||
/*
|
||||
* Create the transient table that will receive the regenerated data.
|
||||
* Lock it against access by any other process until commit (by which time
|
||||
* it will be gone).
|
||||
* Create the transient table that will receive the regenerated data. Lock
|
||||
* it against access by any other process until commit (by which time it
|
||||
* will be gone).
|
||||
*/
|
||||
OIDNewHeap = make_new_heap(matviewOid, tableSpace, concurrent,
|
||||
ExclusiveLock);
|
||||
|
|
|
@ -325,8 +325,8 @@ fill_seq_with_data(Relation rel, HeapTuple tuple)
|
|||
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
||||
|
||||
/*
|
||||
* Since VACUUM does not process sequences, we have to force the tuple
|
||||
* to have xmin = FrozenTransactionId now. Otherwise it would become
|
||||
* Since VACUUM does not process sequences, we have to force the tuple to
|
||||
* have xmin = FrozenTransactionId now. Otherwise it would become
|
||||
* invisible to SELECTs after 2G transactions. It is okay to do this
|
||||
* because if the current transaction aborts, no other xact will ever
|
||||
* examine the sequence tuple anyway.
|
||||
|
@ -1554,13 +1554,13 @@ seq_redo(XLogRecPtr lsn, XLogRecord *record)
|
|||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
/*
|
||||
* We always reinit the page. However, since this WAL record type is
|
||||
* also used for updating sequences, it's possible that a hot-standby
|
||||
* backend is examining the page concurrently; so we mustn't transiently
|
||||
* trash the buffer. The solution is to build the correct new page
|
||||
* contents in local workspace and then memcpy into the buffer. Then only
|
||||
* bytes that are supposed to change will change, even transiently. We
|
||||
* must palloc the local page for alignment reasons.
|
||||
* We always reinit the page. However, since this WAL record type is also
|
||||
* used for updating sequences, it's possible that a hot-standby backend
|
||||
* is examining the page concurrently; so we mustn't transiently trash the
|
||||
* buffer. The solution is to build the correct new page contents in
|
||||
* local workspace and then memcpy into the buffer. Then only bytes that
|
||||
* are supposed to change will change, even transiently. We must palloc
|
||||
* the local page for alignment reasons.
|
||||
*/
|
||||
localpage = (Page) palloc(BufferGetPageSize(buffer));
|
||||
|
||||
|
|
|
@ -2787,12 +2787,13 @@ AlterTableGetLockLevel(List *cmds)
|
|||
break;
|
||||
|
||||
/*
|
||||
* These subcommands may require addition of toast tables. If we
|
||||
* add a toast table to a table currently being scanned, we
|
||||
* These subcommands may require addition of toast tables. If
|
||||
* we add a toast table to a table currently being scanned, we
|
||||
* might miss data added to the new toast table by concurrent
|
||||
* insert transactions.
|
||||
*/
|
||||
case AT_SetStorage: /* may add toast tables, see ATRewriteCatalogs() */
|
||||
case AT_SetStorage:/* may add toast tables, see
|
||||
* ATRewriteCatalogs() */
|
||||
cmd_lockmode = AccessExclusiveLock;
|
||||
break;
|
||||
|
||||
|
@ -2834,8 +2835,8 @@ AlterTableGetLockLevel(List *cmds)
|
|||
break;
|
||||
|
||||
/*
|
||||
* These subcommands affect write operations only.
|
||||
* XXX Theoretically, these could be ShareRowExclusiveLock.
|
||||
* These subcommands affect write operations only. XXX
|
||||
* Theoretically, these could be ShareRowExclusiveLock.
|
||||
*/
|
||||
case AT_ColumnDefault:
|
||||
case AT_ProcessedConstraint: /* becomes AT_AddConstraint */
|
||||
|
@ -2872,9 +2873,9 @@ AlterTableGetLockLevel(List *cmds)
|
|||
* Cases essentially the same as CREATE INDEX. We
|
||||
* could reduce the lock strength to ShareLock if
|
||||
* we can work out how to allow concurrent catalog
|
||||
* updates.
|
||||
* XXX Might be set down to ShareRowExclusiveLock
|
||||
* but requires further analysis.
|
||||
* updates. XXX Might be set down to
|
||||
* ShareRowExclusiveLock but requires further
|
||||
* analysis.
|
||||
*/
|
||||
cmd_lockmode = AccessExclusiveLock;
|
||||
break;
|
||||
|
@ -2883,10 +2884,9 @@ AlterTableGetLockLevel(List *cmds)
|
|||
/*
|
||||
* We add triggers to both tables when we add a
|
||||
* Foreign Key, so the lock level must be at least
|
||||
* as strong as CREATE TRIGGER.
|
||||
* XXX Might be set down to ShareRowExclusiveLock
|
||||
* though trigger info is accessed by
|
||||
* pg_get_triggerdef
|
||||
* as strong as CREATE TRIGGER. XXX Might be set
|
||||
* down to ShareRowExclusiveLock though trigger
|
||||
* info is accessed by pg_get_triggerdef
|
||||
*/
|
||||
cmd_lockmode = AccessExclusiveLock;
|
||||
break;
|
||||
|
@ -2902,8 +2902,8 @@ AlterTableGetLockLevel(List *cmds)
|
|||
* started before us will continue to see the old inheritance
|
||||
* behaviour, while queries started after we commit will see
|
||||
* new behaviour. No need to prevent reads or writes to the
|
||||
* subtable while we hook it up though.
|
||||
* Changing the TupDesc may be a problem, so keep highest lock.
|
||||
* subtable while we hook it up though. Changing the TupDesc
|
||||
* may be a problem, so keep highest lock.
|
||||
*/
|
||||
case AT_AddInherit:
|
||||
case AT_DropInherit:
|
||||
|
@ -2912,9 +2912,9 @@ AlterTableGetLockLevel(List *cmds)
|
|||
|
||||
/*
|
||||
* These subcommands affect implicit row type conversion. They
|
||||
* have affects similar to CREATE/DROP CAST on queries.
|
||||
* don't provide for invalidating parse trees as a result of
|
||||
* such changes, so we keep these at AccessExclusiveLock.
|
||||
* have affects similar to CREATE/DROP CAST on queries. don't
|
||||
* provide for invalidating parse trees as a result of such
|
||||
* changes, so we keep these at AccessExclusiveLock.
|
||||
*/
|
||||
case AT_AddOf:
|
||||
case AT_DropOf:
|
||||
|
@ -2947,22 +2947,25 @@ AlterTableGetLockLevel(List *cmds)
|
|||
cmd_lockmode = ShareUpdateExclusiveLock;
|
||||
break;
|
||||
|
||||
case AT_ValidateConstraint: /* Uses MVCC in getConstraints() */
|
||||
case AT_ValidateConstraint: /* Uses MVCC in
|
||||
* getConstraints() */
|
||||
cmd_lockmode = ShareUpdateExclusiveLock;
|
||||
break;
|
||||
|
||||
/*
|
||||
* Rel options are more complex than first appears. Options
|
||||
* are set here for tables, views and indexes; for historical
|
||||
* reasons these can all be used with ALTER TABLE, so we
|
||||
* can't decide between them using the basic grammar.
|
||||
* reasons these can all be used with ALTER TABLE, so we can't
|
||||
* decide between them using the basic grammar.
|
||||
*
|
||||
* XXX Look in detail at each option to determine lock level,
|
||||
* e.g.
|
||||
* cmd_lockmode = GetRelOptionsLockLevel((List *) cmd->def);
|
||||
* e.g. cmd_lockmode = GetRelOptionsLockLevel((List *)
|
||||
* cmd->def);
|
||||
*/
|
||||
case AT_SetRelOptions: /* Uses MVCC in getIndexes() and getTables() */
|
||||
case AT_ResetRelOptions: /* Uses MVCC in getIndexes() and getTables() */
|
||||
case AT_SetRelOptions: /* Uses MVCC in getIndexes() and
|
||||
* getTables() */
|
||||
case AT_ResetRelOptions: /* Uses MVCC in getIndexes() and
|
||||
* getTables() */
|
||||
cmd_lockmode = AccessExclusiveLock;
|
||||
break;
|
||||
|
||||
|
@ -3946,8 +3949,8 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
|
|||
HeapTupleSetOid(tuple, tupOid);
|
||||
|
||||
/*
|
||||
* Constraints might reference the tableoid column, so initialize
|
||||
* t_tableOid before evaluating them.
|
||||
* Constraints might reference the tableoid column, so
|
||||
* initialize t_tableOid before evaluating them.
|
||||
*/
|
||||
tuple->t_tableOid = RelationGetRelid(oldrel);
|
||||
}
|
||||
|
@ -6374,8 +6377,8 @@ ATExecAlterConstraint(Relation rel, AlterTableCmd *cmd,
|
|||
heap_freetuple(copyTuple);
|
||||
|
||||
/*
|
||||
* Now we need to update the multiple entries in pg_trigger
|
||||
* that implement the constraint.
|
||||
* Now we need to update the multiple entries in pg_trigger that
|
||||
* implement the constraint.
|
||||
*/
|
||||
tgrel = heap_open(TriggerRelationId, RowExclusiveLock);
|
||||
|
||||
|
@ -8150,11 +8153,11 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
|
|||
* that before dropping. It's safe because the parser won't actually look
|
||||
* at the catalogs to detect the existing entry.
|
||||
*
|
||||
* We can't rely on the output of deparsing to tell us which relation
|
||||
* to operate on, because concurrent activity might have made the name
|
||||
* We can't rely on the output of deparsing to tell us which relation to
|
||||
* operate on, because concurrent activity might have made the name
|
||||
* resolve differently. Instead, we've got to use the OID of the
|
||||
* constraint or index we're processing to figure out which relation
|
||||
* to operate on.
|
||||
* constraint or index we're processing to figure out which relation to
|
||||
* operate on.
|
||||
*/
|
||||
forboth(oid_item, tab->changedConstraintOids,
|
||||
def_item, tab->changedConstraintDefs)
|
||||
|
@ -9099,6 +9102,7 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode)
|
|||
if (OidIsValid(reltoastrelid))
|
||||
{
|
||||
Relation toastRel = relation_open(reltoastrelid, lockmode);
|
||||
|
||||
reltoastidxids = RelationGetIndexList(toastRel);
|
||||
relation_close(toastRel, lockmode);
|
||||
}
|
||||
|
@ -9120,8 +9124,8 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode)
|
|||
FlushRelationBuffers(rel);
|
||||
|
||||
/*
|
||||
* Relfilenodes are not unique in databases across tablespaces, so we
|
||||
* need to allocate a new one in the new tablespace.
|
||||
* Relfilenodes are not unique in databases across tablespaces, so we need
|
||||
* to allocate a new one in the new tablespace.
|
||||
*/
|
||||
newrelfilenode = GetNewRelFileNode(newTableSpace, NULL,
|
||||
rel->rd_rel->relpersistence);
|
||||
|
@ -9236,9 +9240,9 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst,
|
|||
forkNum))));
|
||||
|
||||
/*
|
||||
* WAL-log the copied page. Unfortunately we don't know what kind of
|
||||
* a page this is, so we have to log the full page including any
|
||||
* unused space.
|
||||
* WAL-log the copied page. Unfortunately we don't know what kind of a
|
||||
* page this is, so we have to log the full page including any unused
|
||||
* space.
|
||||
*/
|
||||
if (use_wal)
|
||||
log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page, false);
|
||||
|
@ -10191,8 +10195,8 @@ relation_mark_replica_identity(Relation rel, char ri_type, Oid indexOid,
|
|||
}
|
||||
|
||||
/*
|
||||
* Clear the indisreplident flag from any index that had it previously, and
|
||||
* set it for any index that should have it now.
|
||||
* Clear the indisreplident flag from any index that had it previously,
|
||||
* and set it for any index that should have it now.
|
||||
*/
|
||||
pg_index = heap_open(IndexRelationId, RowExclusiveLock);
|
||||
foreach(index, RelationGetIndexList(rel))
|
||||
|
@ -10261,7 +10265,7 @@ ATExecReplicaIdentity(Relation rel, ReplicaIdentityStmt *stmt, LOCKMODE lockmode
|
|||
}
|
||||
else if (stmt->identity_type == REPLICA_IDENTITY_INDEX)
|
||||
{
|
||||
/* fallthrough */;
|
||||
/* fallthrough */ ;
|
||||
}
|
||||
else
|
||||
elog(ERROR, "unexpected identity type %u", stmt->identity_type);
|
||||
|
|
|
@ -1119,8 +1119,8 @@ AlterTableSpaceMove(AlterTableSpaceMoveStmt *stmt)
|
|||
|
||||
/*
|
||||
* Handle permissions-checking here since we are locking the tables
|
||||
* and also to avoid doing a bunch of work only to fail part-way.
|
||||
* Note that permissions will also be checked by AlterTableInternal().
|
||||
* and also to avoid doing a bunch of work only to fail part-way. Note
|
||||
* that permissions will also be checked by AlterTableInternal().
|
||||
*
|
||||
* Caller must be considered an owner on the table to move it.
|
||||
*/
|
||||
|
|
|
@ -3566,6 +3566,7 @@ AfterTriggerExecute(AfterTriggerEvent event,
|
|||
}
|
||||
/* fall through */
|
||||
case AFTER_TRIGGER_FDW_REUSE:
|
||||
|
||||
/*
|
||||
* Using ExecMaterializeSlot() rather than ExecFetchSlotTuple()
|
||||
* ensures that tg_trigtuple does not reference tuplestore memory.
|
||||
|
|
|
@ -706,10 +706,10 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
|||
* It's possible that another backend has extended the heap,
|
||||
* initialized the page, and then failed to WAL-log the page
|
||||
* due to an ERROR. Since heap extension is not WAL-logged,
|
||||
* recovery might try to replay our record setting the
|
||||
* page all-visible and find that the page isn't initialized,
|
||||
* which will cause a PANIC. To prevent that, check whether
|
||||
* the page has been previously WAL-logged, and if not, do that
|
||||
* recovery might try to replay our record setting the page
|
||||
* all-visible and find that the page isn't initialized, which
|
||||
* will cause a PANIC. To prevent that, check whether the
|
||||
* page has been previously WAL-logged, and if not, do that
|
||||
* now.
|
||||
*/
|
||||
if (RelationNeedsWAL(onerel) &&
|
||||
|
@ -834,8 +834,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
|||
* NB: Like with per-tuple hint bits, we can't set the
|
||||
* PD_ALL_VISIBLE flag if the inserter committed
|
||||
* asynchronously. See SetHintBits for more info. Check
|
||||
* that the tuple is hinted xmin-committed because
|
||||
* of that.
|
||||
* that the tuple is hinted xmin-committed because of
|
||||
* that.
|
||||
*/
|
||||
if (all_visible)
|
||||
{
|
||||
|
|
|
@ -460,8 +460,8 @@ DefineView(ViewStmt *stmt, const char *queryString)
|
|||
}
|
||||
|
||||
/*
|
||||
* If the check option is specified, look to see if the view is
|
||||
* actually auto-updatable or not.
|
||||
* If the check option is specified, look to see if the view is actually
|
||||
* auto-updatable or not.
|
||||
*/
|
||||
if (check_option)
|
||||
{
|
||||
|
|
|
@ -1639,7 +1639,8 @@ ExecWithCheckOptions(ResultRelInfo *resultRelInfo,
|
|||
TupleTableSlot *slot, EState *estate)
|
||||
{
|
||||
ExprContext *econtext;
|
||||
ListCell *l1, *l2;
|
||||
ListCell *l1,
|
||||
*l2;
|
||||
|
||||
/*
|
||||
* We will use the EState's per-tuple context for evaluating constraint
|
||||
|
|
|
@ -449,8 +449,8 @@ ExecInitFunctionScan(FunctionScan *node, EState *estate, int eflags)
|
|||
* Create the combined TupleDesc
|
||||
*
|
||||
* If there is just one function without ordinality, the scan result
|
||||
* tupdesc is the same as the function result tupdesc --- except that
|
||||
* we may stuff new names into it below, so drop any rowtype label.
|
||||
* tupdesc is the same as the function result tupdesc --- except that we
|
||||
* may stuff new names into it below, so drop any rowtype label.
|
||||
*/
|
||||
if (scanstate->simple)
|
||||
{
|
||||
|
|
|
@ -973,6 +973,7 @@ ExecModifyTable(ModifyTableState *node)
|
|||
* ctid!! */
|
||||
tupleid = &tuple_ctid;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use the wholerow attribute, when available, to reconstruct
|
||||
* the old relation tuple.
|
||||
|
@ -1175,6 +1176,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
|
|||
WithCheckOption *wco = (WithCheckOption *) lfirst(ll);
|
||||
ExprState *wcoExpr = ExecInitExpr((Expr *) wco->qual,
|
||||
mtstate->mt_plans[i]);
|
||||
|
||||
wcoExprs = lappend(wcoExprs, wcoExpr);
|
||||
}
|
||||
|
||||
|
|
|
@ -692,6 +692,7 @@ StreamConnection(pgsocket server_fd, Port *port)
|
|||
}
|
||||
|
||||
#ifdef WIN32
|
||||
|
||||
/*
|
||||
* This is a Win32 socket optimization. The ideal size is 32k.
|
||||
* http://support.microsoft.com/kb/823764/EN-US/
|
||||
|
|
|
@ -109,6 +109,7 @@ main(int argc, char *argv[])
|
|||
set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("postgres"));
|
||||
|
||||
#ifdef WIN32
|
||||
|
||||
/*
|
||||
* Windows uses codepages rather than the environment, so we work around
|
||||
* that by querying the environment explicitly first for LC_COLLATE and
|
||||
|
@ -202,6 +203,7 @@ main(int argc, char *argv[])
|
|||
#endif
|
||||
|
||||
#ifdef WIN32
|
||||
|
||||
/*
|
||||
* Start our win32 signal implementation
|
||||
*
|
||||
|
|
|
@ -3300,7 +3300,7 @@ _copyReplicaIdentityStmt(const ReplicaIdentityStmt *from)
|
|||
}
|
||||
|
||||
static AlterSystemStmt *
|
||||
_copyAlterSystemStmt(const AlterSystemStmt * from)
|
||||
_copyAlterSystemStmt(const AlterSystemStmt *from)
|
||||
{
|
||||
AlterSystemStmt *newnode = makeNode(AlterSystemStmt);
|
||||
|
||||
|
|
|
@ -1551,7 +1551,7 @@ _equalReplicaIdentityStmt(const ReplicaIdentityStmt *a, const ReplicaIdentityStm
|
|||
}
|
||||
|
||||
static bool
|
||||
_equalAlterSystemStmt(const AlterSystemStmt * a, const AlterSystemStmt * b)
|
||||
_equalAlterSystemStmt(const AlterSystemStmt *a, const AlterSystemStmt *b)
|
||||
{
|
||||
COMPARE_NODE_FIELD(setstmt);
|
||||
|
||||
|
|
|
@ -1938,8 +1938,8 @@ add_child_rel_equivalences(PlannerInfo *root,
|
|||
continue;
|
||||
|
||||
/*
|
||||
* No point in searching if parent rel not mentioned in eclass; but
|
||||
* we can't tell that for sure if parent rel is itself a child.
|
||||
* No point in searching if parent rel not mentioned in eclass; but we
|
||||
* can't tell that for sure if parent rel is itself a child.
|
||||
*/
|
||||
if (parent_rel->reloptkind == RELOPT_BASEREL &&
|
||||
!bms_is_subset(parent_rel->relids, cur_ec->ec_relids))
|
||||
|
|
|
@ -916,8 +916,8 @@ inheritance_planner(PlannerInfo *root)
|
|||
subplan = grouping_planner(&subroot, 0.0 /* retrieve all tuples */ );
|
||||
|
||||
/*
|
||||
* Planning may have modified the query result relation (if there
|
||||
* were security barrier quals on the result RTE).
|
||||
* Planning may have modified the query result relation (if there were
|
||||
* security barrier quals on the result RTE).
|
||||
*/
|
||||
appinfo->child_relid = subroot.parse->resultRelation;
|
||||
|
||||
|
@ -940,7 +940,8 @@ inheritance_planner(PlannerInfo *root)
|
|||
else
|
||||
{
|
||||
List *tmp_rtable = NIL;
|
||||
ListCell *cell1, *cell2;
|
||||
ListCell *cell1,
|
||||
*cell2;
|
||||
|
||||
/*
|
||||
* Check to see if any of the original RTEs were turned into
|
||||
|
|
|
@ -97,6 +97,7 @@ expand_security_quals(PlannerInfo *root, List *tlist)
|
|||
if (rt_index == parse->resultRelation)
|
||||
{
|
||||
RangeTblEntry *newrte = copyObject(rte);
|
||||
|
||||
parse->rtable = lappend(parse->rtable, newrte);
|
||||
parse->resultRelation = list_length(parse->rtable);
|
||||
|
||||
|
@ -117,11 +118,11 @@ expand_security_quals(PlannerInfo *root, List *tlist)
|
|||
rte->modifiedCols = NULL;
|
||||
|
||||
/*
|
||||
* For the most part, Vars referencing the original relation should
|
||||
* remain as they are, meaning that they pull OLD values from the
|
||||
* expanded RTE. But in the RETURNING list and in any WITH CHECK
|
||||
* OPTION quals, we want such Vars to represent NEW values, so
|
||||
* change them to reference the new RTE.
|
||||
* For the most part, Vars referencing the original relation
|
||||
* should remain as they are, meaning that they pull OLD values
|
||||
* from the expanded RTE. But in the RETURNING list and in any
|
||||
* WITH CHECK OPTION quals, we want such Vars to represent NEW
|
||||
* values, so change them to reference the new RTE.
|
||||
*/
|
||||
ChangeVarNodes((Node *) parse->returningList, rt_index,
|
||||
parse->resultRelation, 0);
|
||||
|
@ -142,6 +143,7 @@ expand_security_quals(PlannerInfo *root, List *tlist)
|
|||
while (rte->securityQuals != NIL)
|
||||
{
|
||||
Node *qual = (Node *) linitial(rte->securityQuals);
|
||||
|
||||
rte->securityQuals = list_delete_first(rte->securityQuals);
|
||||
|
||||
ChangeVarNodes(qual, rt_index, 1, 0);
|
||||
|
@ -182,6 +184,7 @@ expand_security_qual(PlannerInfo *root, List *tlist, int rt_index,
|
|||
switch (rte->rtekind)
|
||||
{
|
||||
case RTE_RELATION:
|
||||
|
||||
/*
|
||||
* Turn the relation RTE into a security barrier subquery RTE,
|
||||
* moving all permissions checks down into the subquery.
|
||||
|
@ -219,9 +222,9 @@ expand_security_qual(PlannerInfo *root, List *tlist, int rt_index,
|
|||
* Note that we can't push the user-defined quals down since they
|
||||
* may included untrusted functions and that means that we will
|
||||
* end up locking all rows which pass the securityQuals, even if
|
||||
* those rows don't pass the user-defined quals. This is currently
|
||||
* documented behavior, but it'd be nice to come up with a better
|
||||
* solution some day.
|
||||
* those rows don't pass the user-defined quals. This is
|
||||
* currently documented behavior, but it'd be nice to come up with
|
||||
* a better solution some day.
|
||||
*/
|
||||
rc = get_plan_rowmark(root->rowMarks, rt_index);
|
||||
if (rc != NULL)
|
||||
|
@ -277,6 +280,7 @@ expand_security_qual(PlannerInfo *root, List *tlist, int rt_index,
|
|||
break;
|
||||
|
||||
case RTE_SUBQUERY:
|
||||
|
||||
/*
|
||||
* Build a new subquery that includes all the same columns as the
|
||||
* original subquery.
|
||||
|
|
|
@ -1708,6 +1708,7 @@ adjust_appendrel_attrs_mutator(Node *node,
|
|||
foreach(lc, fields)
|
||||
{
|
||||
Var *field = (Var *) lfirst(lc);
|
||||
|
||||
field->varlevelsup += context->sublevels_up;
|
||||
}
|
||||
rowexpr = makeNode(RowExpr);
|
||||
|
|
|
@ -2131,7 +2131,8 @@ transformDistinctOnClause(ParseState *pstate, List *distinctlist,
|
|||
}
|
||||
|
||||
/*
|
||||
* An empty result list is impossible here because of grammar restrictions.
|
||||
* An empty result list is impossible here because of grammar
|
||||
* restrictions.
|
||||
*/
|
||||
Assert(result != NIL);
|
||||
|
||||
|
|
|
@ -143,8 +143,8 @@ downcase_truncate_identifier(const char *ident, int len, bool warn)
|
|||
* locale-aware translation. However, there are some locales where this
|
||||
* is not right either (eg, Turkish may do strange things with 'i' and
|
||||
* 'I'). Our current compromise is to use tolower() for characters with
|
||||
* the high bit set, as long as they aren't part of a multi-byte character,
|
||||
* and use an ASCII-only downcasing for 7-bit characters.
|
||||
* the high bit set, as long as they aren't part of a multi-byte
|
||||
* character, and use an ASCII-only downcasing for 7-bit characters.
|
||||
*/
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
|
|
|
@ -374,8 +374,8 @@ CreateAnonymousSegment(Size *size)
|
|||
(huge_pages == HUGE_PAGES_TRY && ptr == MAP_FAILED))
|
||||
{
|
||||
/*
|
||||
* use the original size, not the rounded up value, when falling
|
||||
* back to non-huge pages.
|
||||
* use the original size, not the rounded up value, when falling back
|
||||
* to non-huge pages.
|
||||
*/
|
||||
allocsize = *size;
|
||||
ptr = mmap(NULL, allocsize, PROT_READ | PROT_WRITE,
|
||||
|
@ -512,9 +512,9 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port,
|
|||
/*
|
||||
* The segment appears to be from a dead Postgres process, or from a
|
||||
* previous cycle of life in this same process. Zap it, if possible,
|
||||
* and any associated dynamic shared memory segments, as well.
|
||||
* This probably shouldn't fail, but if it does, assume the segment
|
||||
* belongs to someone else after all, and continue quietly.
|
||||
* and any associated dynamic shared memory segments, as well. This
|
||||
* probably shouldn't fail, but if it does, assume the segment belongs
|
||||
* to someone else after all, and continue quietly.
|
||||
*/
|
||||
if (hdr->dsm_control != 0)
|
||||
dsm_cleanup_using_control_segment(hdr->dsm_control);
|
||||
|
|
|
@ -127,10 +127,10 @@ BackgroundWorkerShmemInit(void)
|
|||
BackgroundWorkerData->total_slots = max_worker_processes;
|
||||
|
||||
/*
|
||||
* Copy contents of worker list into shared memory. Record the
|
||||
* shared memory slot assigned to each worker. This ensures
|
||||
* a 1-to-1 correspondence betwen the postmaster's private list and
|
||||
* the array in shared memory.
|
||||
* Copy contents of worker list into shared memory. Record the shared
|
||||
* memory slot assigned to each worker. This ensures a 1-to-1
|
||||
* correspondence betwen the postmaster's private list and the array
|
||||
* in shared memory.
|
||||
*/
|
||||
slist_foreach(siter, &BackgroundWorkerList)
|
||||
{
|
||||
|
@ -200,8 +200,8 @@ BackgroundWorkerStateChange(void)
|
|||
* The total number of slots stored in shared memory should match our
|
||||
* notion of max_worker_processes. If it does not, something is very
|
||||
* wrong. Further down, we always refer to this value as
|
||||
* max_worker_processes, in case shared memory gets corrupted while
|
||||
* we're looping.
|
||||
* max_worker_processes, in case shared memory gets corrupted while we're
|
||||
* looping.
|
||||
*/
|
||||
if (max_worker_processes != BackgroundWorkerData->total_slots)
|
||||
{
|
||||
|
@ -213,8 +213,8 @@ BackgroundWorkerStateChange(void)
|
|||
}
|
||||
|
||||
/*
|
||||
* Iterate through slots, looking for newly-registered workers or
|
||||
* workers who must die.
|
||||
* Iterate through slots, looking for newly-registered workers or workers
|
||||
* who must die.
|
||||
*/
|
||||
for (slotno = 0; slotno < max_worker_processes; ++slotno)
|
||||
{
|
||||
|
@ -267,8 +267,8 @@ BackgroundWorkerStateChange(void)
|
|||
}
|
||||
|
||||
/*
|
||||
* Copy strings in a paranoid way. If shared memory is corrupted,
|
||||
* the source data might not even be NUL-terminated.
|
||||
* Copy strings in a paranoid way. If shared memory is corrupted, the
|
||||
* source data might not even be NUL-terminated.
|
||||
*/
|
||||
ascii_safe_strlcpy(rw->rw_worker.bgw_name,
|
||||
slot->worker.bgw_name, BGW_MAXLEN);
|
||||
|
@ -280,10 +280,10 @@ BackgroundWorkerStateChange(void)
|
|||
/*
|
||||
* Copy various fixed-size fields.
|
||||
*
|
||||
* flags, start_time, and restart_time are examined by the
|
||||
* postmaster, but nothing too bad will happen if they are
|
||||
* corrupted. The remaining fields will only be examined by the
|
||||
* child process. It might crash, but we won't.
|
||||
* flags, start_time, and restart_time are examined by the postmaster,
|
||||
* but nothing too bad will happen if they are corrupted. The
|
||||
* remaining fields will only be examined by the child process. It
|
||||
* might crash, but we won't.
|
||||
*/
|
||||
rw->rw_worker.bgw_flags = slot->worker.bgw_flags;
|
||||
rw->rw_worker.bgw_start_time = slot->worker.bgw_start_time;
|
||||
|
@ -292,13 +292,13 @@ BackgroundWorkerStateChange(void)
|
|||
rw->rw_worker.bgw_main_arg = slot->worker.bgw_main_arg;
|
||||
|
||||
/*
|
||||
* Copy the PID to be notified about state changes, but only if
|
||||
* the postmaster knows about a backend with that PID. It isn't
|
||||
* an error if the postmaster doesn't know about the PID, because
|
||||
* the backend that requested the worker could have died (or been
|
||||
* killed) just after doing so. Nonetheless, at least until we get
|
||||
* some experience with how this plays out in the wild, log a message
|
||||
* at a relative high debug level.
|
||||
* Copy the PID to be notified about state changes, but only if the
|
||||
* postmaster knows about a backend with that PID. It isn't an error
|
||||
* if the postmaster doesn't know about the PID, because the backend
|
||||
* that requested the worker could have died (or been killed) just
|
||||
* after doing so. Nonetheless, at least until we get some experience
|
||||
* with how this plays out in the wild, log a message at a relative
|
||||
* high debug level.
|
||||
*/
|
||||
rw->rw_worker.bgw_notify_pid = slot->worker.bgw_notify_pid;
|
||||
if (!PostmasterMarkPIDForWorkerNotify(rw->rw_worker.bgw_notify_pid))
|
||||
|
@ -633,11 +633,11 @@ StartBackgroundWorker(void)
|
|||
/*
|
||||
* If bgw_main is set, we use that value as the initial entrypoint.
|
||||
* However, if the library containing the entrypoint wasn't loaded at
|
||||
* postmaster startup time, passing it as a direct function pointer is
|
||||
* not possible. To work around that, we allow callers for whom a
|
||||
* function pointer is not available to pass a library name (which will
|
||||
* be loaded, if necessary) and a function name (which will be looked up
|
||||
* in the named library).
|
||||
* postmaster startup time, passing it as a direct function pointer is not
|
||||
* possible. To work around that, we allow callers for whom a function
|
||||
* pointer is not available to pass a library name (which will be loaded,
|
||||
* if necessary) and a function name (which will be looked up in the named
|
||||
* library).
|
||||
*/
|
||||
if (worker->bgw_main != NULL)
|
||||
entrypt = worker->bgw_main;
|
||||
|
@ -761,12 +761,12 @@ RegisterDynamicBackgroundWorker(BackgroundWorker *worker,
|
|||
uint64 generation = 0;
|
||||
|
||||
/*
|
||||
* We can't register dynamic background workers from the postmaster.
|
||||
* If this is a standalone backend, we're the only process and can't
|
||||
* start any more. In a multi-process environement, it might be
|
||||
* theoretically possible, but we don't currently support it due to
|
||||
* locking considerations; see comments on the BackgroundWorkerSlot
|
||||
* data structure.
|
||||
* We can't register dynamic background workers from the postmaster. If
|
||||
* this is a standalone backend, we're the only process and can't start
|
||||
* any more. In a multi-process environement, it might be theoretically
|
||||
* possible, but we don't currently support it due to locking
|
||||
* considerations; see comments on the BackgroundWorkerSlot data
|
||||
* structure.
|
||||
*/
|
||||
if (!IsUnderPostmaster)
|
||||
return false;
|
||||
|
@ -792,8 +792,8 @@ RegisterDynamicBackgroundWorker(BackgroundWorker *worker,
|
|||
generation = slot->generation;
|
||||
|
||||
/*
|
||||
* Make sure postmaster doesn't see the slot as in use before
|
||||
* it sees the new contents.
|
||||
* Make sure postmaster doesn't see the slot as in use before it
|
||||
* sees the new contents.
|
||||
*/
|
||||
pg_write_barrier();
|
||||
|
||||
|
@ -845,10 +845,10 @@ GetBackgroundWorkerPid(BackgroundWorkerHandle *handle, pid_t *pidp)
|
|||
slot = &BackgroundWorkerData->slot[handle->slot];
|
||||
|
||||
/*
|
||||
* We could probably arrange to synchronize access to data using
|
||||
* memory barriers only, but for now, let's just keep it simple and
|
||||
* grab the lock. It seems unlikely that there will be enough traffic
|
||||
* here to result in meaningful contention.
|
||||
* We could probably arrange to synchronize access to data using memory
|
||||
* barriers only, but for now, let's just keep it simple and grab the
|
||||
* lock. It seems unlikely that there will be enough traffic here to
|
||||
* result in meaningful contention.
|
||||
*/
|
||||
LWLockAcquire(BackgroundWorkerLock, LW_SHARED);
|
||||
|
||||
|
|
|
@ -298,11 +298,11 @@ BackgroundWriterMain(void)
|
|||
}
|
||||
|
||||
/*
|
||||
* Log a new xl_running_xacts every now and then so replication can get
|
||||
* into a consistent state faster (think of suboverflowed snapshots)
|
||||
* and clean up resources (locks, KnownXids*) more frequently. The
|
||||
* costs of this are relatively low, so doing it 4 times
|
||||
* (LOG_SNAPSHOT_INTERVAL_MS) a minute seems fine.
|
||||
* Log a new xl_running_xacts every now and then so replication can
|
||||
* get into a consistent state faster (think of suboverflowed
|
||||
* snapshots) and clean up resources (locks, KnownXids*) more
|
||||
* frequently. The costs of this are relatively low, so doing it 4
|
||||
* times (LOG_SNAPSHOT_INTERVAL_MS) a minute seems fine.
|
||||
*
|
||||
* We assume the interval for writing xl_running_xacts is
|
||||
* significantly bigger than BgWriterDelay, so we don't complicate the
|
||||
|
@ -314,20 +314,21 @@ BackgroundWriterMain(void)
|
|||
* we've logged a running xacts.
|
||||
*
|
||||
* We do this logging in the bgwriter as its the only process thats
|
||||
* run regularly and returns to its mainloop all the
|
||||
* time. E.g. Checkpointer, when active, is barely ever in its
|
||||
* mainloop and thus makes it hard to log regularly.
|
||||
* run regularly and returns to its mainloop all the time. E.g.
|
||||
* Checkpointer, when active, is barely ever in its mainloop and thus
|
||||
* makes it hard to log regularly.
|
||||
*/
|
||||
if (XLogStandbyInfoActive() && !RecoveryInProgress())
|
||||
{
|
||||
TimestampTz timeout = 0;
|
||||
TimestampTz now = GetCurrentTimestamp();
|
||||
|
||||
timeout = TimestampTzPlusMilliseconds(last_snapshot_ts,
|
||||
LOG_SNAPSHOT_INTERVAL_MS);
|
||||
|
||||
/*
|
||||
* only log if enough time has passed and some xlog record has been
|
||||
* inserted.
|
||||
* only log if enough time has passed and some xlog record has
|
||||
* been inserted.
|
||||
*/
|
||||
if (now >= timeout &&
|
||||
last_snapshot_lsn != GetXLogInsertRecPtr())
|
||||
|
|
|
@ -487,14 +487,20 @@ pgarch_ArchiverCopyLoop(void)
|
|||
/* successful */
|
||||
pgarch_archiveDone(xlog);
|
||||
|
||||
/* Tell the collector about the WAL file that we successfully archived */
|
||||
/*
|
||||
* Tell the collector about the WAL file that we successfully
|
||||
* archived
|
||||
*/
|
||||
pgstat_send_archiver(xlog, false);
|
||||
|
||||
break; /* out of inner retry loop */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Tell the collector about the WAL file that we failed to archive */
|
||||
/*
|
||||
* Tell the collector about the WAL file that we failed to
|
||||
* archive
|
||||
*/
|
||||
pgstat_send_archiver(xlog, true);
|
||||
|
||||
if (++failures >= NUM_ARCHIVE_RETRIES)
|
||||
|
|
|
@ -3912,8 +3912,8 @@ pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep)
|
|||
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
|
||||
|
||||
/*
|
||||
* Clear out global and archiver statistics so they start from zero
|
||||
* in case we can't load an existing statsfile.
|
||||
* Clear out global and archiver statistics so they start from zero in
|
||||
* case we can't load an existing statsfile.
|
||||
*/
|
||||
memset(&globalStats, 0, sizeof(globalStats));
|
||||
memset(&archiverStats, 0, sizeof(archiverStats));
|
||||
|
|
|
@ -1093,6 +1093,7 @@ PostmasterMain(int argc, char *argv[])
|
|||
InitPostmasterDeathWatchHandle();
|
||||
|
||||
#ifdef WIN32
|
||||
|
||||
/*
|
||||
* Initialize I/O completion port used to deliver list of dead children.
|
||||
*/
|
||||
|
@ -1655,9 +1656,9 @@ ServerLoop(void)
|
|||
|
||||
/*
|
||||
* If we already sent SIGQUIT to children and they are slow to shut
|
||||
* down, it's time to send them SIGKILL. This doesn't happen normally,
|
||||
* but under certain conditions backends can get stuck while shutting
|
||||
* down. This is a last measure to get them unwedged.
|
||||
* down, it's time to send them SIGKILL. This doesn't happen
|
||||
* normally, but under certain conditions backends can get stuck while
|
||||
* shutting down. This is a last measure to get them unwedged.
|
||||
*
|
||||
* Note we also do this during recovery from a process crash.
|
||||
*/
|
||||
|
@ -1671,8 +1672,8 @@ ServerLoop(void)
|
|||
AbortStartTime = 0;
|
||||
|
||||
/*
|
||||
* Additionally, unless we're recovering from a process crash, it's
|
||||
* now the time for postmaster to abandon ship.
|
||||
* Additionally, unless we're recovering from a process crash,
|
||||
* it's now the time for postmaster to abandon ship.
|
||||
*/
|
||||
if (!FatalError)
|
||||
ExitPostmaster(1);
|
||||
|
@ -2884,6 +2885,7 @@ CleanupBackgroundWorker(int pid,
|
|||
#ifdef EXEC_BACKEND
|
||||
ShmemBackendArrayRemove(rw->rw_backend);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* It's possible that this background worker started some OTHER
|
||||
* background worker and asked to be notified when that worker
|
||||
|
@ -2930,6 +2932,7 @@ CleanupBackend(int pid,
|
|||
*/
|
||||
|
||||
#ifdef WIN32
|
||||
|
||||
/*
|
||||
* On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
|
||||
* since that sometimes happens under load when the process fails to start
|
||||
|
@ -2974,12 +2977,12 @@ CleanupBackend(int pid,
|
|||
if (bp->bgworker_notify)
|
||||
{
|
||||
/*
|
||||
* This backend may have been slated to receive SIGUSR1
|
||||
* when some background worker started or stopped. Cancel
|
||||
* those notifications, as we don't want to signal PIDs that
|
||||
* are not PostgreSQL backends. This gets skipped in the
|
||||
* (probably very common) case where the backend has never
|
||||
* requested any such notifications.
|
||||
* This backend may have been slated to receive SIGUSR1 when
|
||||
* some background worker started or stopped. Cancel those
|
||||
* notifications, as we don't want to signal PIDs that are not
|
||||
* PostgreSQL backends. This gets skipped in the (probably
|
||||
* very common) case where the backend has never requested any
|
||||
* such notifications.
|
||||
*/
|
||||
BackgroundWorkerStopNotifications(bp->pid);
|
||||
}
|
||||
|
@ -3006,10 +3009,11 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
|
|||
bool take_action;
|
||||
|
||||
/*
|
||||
* We only log messages and send signals if this is the first process crash
|
||||
* and we're not doing an immediate shutdown; otherwise, we're only here to
|
||||
* update postmaster's idea of live processes. If we have already signalled
|
||||
* children, nonzero exit status is to be expected, so don't clutter log.
|
||||
* We only log messages and send signals if this is the first process
|
||||
* crash and we're not doing an immediate shutdown; otherwise, we're only
|
||||
* here to update postmaster's idea of live processes. If we have already
|
||||
* signalled children, nonzero exit status is to be expected, so don't
|
||||
* clutter log.
|
||||
*/
|
||||
take_action = !FatalError && Shutdown != ImmediateShutdown;
|
||||
|
||||
|
@ -3366,13 +3370,13 @@ PostmasterStateMachine(void)
|
|||
* PM_WAIT_BACKENDS state ends when we have no regular backends
|
||||
* (including autovac workers), no bgworkers (including unconnected
|
||||
* ones), and no walwriter, autovac launcher or bgwriter. If we are
|
||||
* doing crash recovery or an immediate shutdown then we expect
|
||||
* the checkpointer to exit as well, otherwise not. The archiver,
|
||||
* stats, and syslogger processes are disregarded since
|
||||
* they are not connected to shared memory; we also disregard
|
||||
* dead_end children here. Walsenders are also disregarded,
|
||||
* they will be terminated later after writing the checkpoint record,
|
||||
* like the archiver process.
|
||||
* doing crash recovery or an immediate shutdown then we expect the
|
||||
* checkpointer to exit as well, otherwise not. The archiver, stats,
|
||||
* and syslogger processes are disregarded since they are not
|
||||
* connected to shared memory; we also disregard dead_end children
|
||||
* here. Walsenders are also disregarded, they will be terminated
|
||||
* later after writing the checkpoint record, like the archiver
|
||||
* process.
|
||||
*/
|
||||
if (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_WORKER) == 0 &&
|
||||
CountUnconnectedWorkers() == 0 &&
|
||||
|
|
|
@ -670,6 +670,7 @@ SysLogger_Start(void)
|
|||
close(syslogPipe[1]);
|
||||
syslogPipe[1] = -1;
|
||||
#else
|
||||
|
||||
/*
|
||||
* open the pipe in binary mode and make sure stderr is binary
|
||||
* after it's been dup'ed into, to avoid disturbing the pipe
|
||||
|
|
|
@ -137,8 +137,8 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
|
|||
SendXlogRecPtrResult(startptr, starttli);
|
||||
|
||||
/*
|
||||
* Calculate the relative path of temporary statistics directory
|
||||
* in order to skip the files which are located in that directory later.
|
||||
* Calculate the relative path of temporary statistics directory in order
|
||||
* to skip the files which are located in that directory later.
|
||||
*/
|
||||
if (is_absolute_path(pgstat_stat_directory) &&
|
||||
strncmp(pgstat_stat_directory, DataDir, datadirpathlen) == 0)
|
||||
|
@ -231,8 +231,8 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
|
|||
(int64) opt->maxrate * (int64) 1024 / THROTTLING_FREQUENCY;
|
||||
|
||||
/*
|
||||
* The minimum amount of time for throttling_sample
|
||||
* bytes to be transfered.
|
||||
* The minimum amount of time for throttling_sample bytes to be
|
||||
* transfered.
|
||||
*/
|
||||
elapsed_min_unit = USECS_PER_SEC / THROTTLING_FREQUENCY;
|
||||
|
||||
|
@ -1276,8 +1276,8 @@ throttle(size_t increment)
|
|||
else
|
||||
{
|
||||
/*
|
||||
* The actual transfer rate is below the limit. A negative value would
|
||||
* distort the adjustment of throttled_last.
|
||||
* The actual transfer rate is below the limit. A negative value
|
||||
* would distort the adjustment of throttled_last.
|
||||
*/
|
||||
wait_result = 0;
|
||||
sleep = 0;
|
||||
|
|
|
@ -156,6 +156,7 @@ DecodeXLogOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
|||
|
||||
break;
|
||||
case XLOG_CHECKPOINT_ONLINE:
|
||||
|
||||
/*
|
||||
* a RUNNING_XACTS record will have been logged near to this, we
|
||||
* can restart from there.
|
||||
|
@ -292,6 +293,7 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
|||
break;
|
||||
}
|
||||
case XLOG_XACT_PREPARE:
|
||||
|
||||
/*
|
||||
* Currently decoding ignores PREPARE TRANSACTION and will just
|
||||
* decode the transaction when the COMMIT PREPARED is sent or
|
||||
|
@ -321,7 +323,9 @@ DecodeStandbyOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
|||
case XLOG_RUNNING_XACTS:
|
||||
{
|
||||
xl_running_xacts *running = (xl_running_xacts *) buf->record_data;
|
||||
|
||||
SnapBuildProcessRunningXacts(builder, buf->origptr, running);
|
||||
|
||||
/*
|
||||
* Abort all transactions that we keep track of, that are
|
||||
* older than the record's oldestRunningXid. This is the most
|
||||
|
@ -364,21 +368,24 @@ DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
|||
case XLOG_HEAP2_NEW_CID:
|
||||
{
|
||||
xl_heap_new_cid *xlrec;
|
||||
|
||||
xlrec = (xl_heap_new_cid *) buf->record_data;
|
||||
SnapBuildProcessNewCid(builder, xid, buf->origptr, xlrec);
|
||||
|
||||
break;
|
||||
}
|
||||
case XLOG_HEAP2_REWRITE:
|
||||
|
||||
/*
|
||||
* Although these records only exist to serve the needs of logical
|
||||
* decoding, all the work happens as part of crash or archive
|
||||
* recovery, so we don't need to do anything here.
|
||||
*/
|
||||
break;
|
||||
|
||||
/*
|
||||
* Everything else here is just low level physical stuff we're
|
||||
* not interested in.
|
||||
* Everything else here is just low level physical stuff we're not
|
||||
* interested in.
|
||||
*/
|
||||
case XLOG_HEAP2_FREEZE_PAGE:
|
||||
case XLOG_HEAP2_CLEAN:
|
||||
|
@ -429,6 +436,7 @@ DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
|||
break;
|
||||
|
||||
case XLOG_HEAP_NEWPAGE:
|
||||
|
||||
/*
|
||||
* This is only used in places like indexams and CLUSTER which
|
||||
* don't contain changes relevant for logical replication.
|
||||
|
@ -436,6 +444,7 @@ DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
|
|||
break;
|
||||
|
||||
case XLOG_HEAP_INPLACE:
|
||||
|
||||
/*
|
||||
* Inplace updates are only ever performed on catalog tuples and
|
||||
* can, per definition, not change tuple visibility. Since we
|
||||
|
|
|
@ -117,7 +117,8 @@ StartupDecodingContext(List *output_plugin_options,
|
|||
LogicalOutputPluginWriterWrite do_write)
|
||||
{
|
||||
ReplicationSlot *slot;
|
||||
MemoryContext context, old_context;
|
||||
MemoryContext context,
|
||||
old_context;
|
||||
LogicalDecodingContext *ctx;
|
||||
|
||||
/* shorter lines... */
|
||||
|
@ -133,7 +134,10 @@ StartupDecodingContext(List *output_plugin_options,
|
|||
|
||||
ctx->context = context;
|
||||
|
||||
/* (re-)load output plugins, so we detect a bad (removed) output plugin now. */
|
||||
/*
|
||||
* (re-)load output plugins, so we detect a bad (removed) output plugin
|
||||
* now.
|
||||
*/
|
||||
LoadOutputPlugin(&ctx->callbacks, NameStr(slot->data.plugin));
|
||||
|
||||
/*
|
||||
|
@ -254,7 +258,7 @@ CreateInitDecodingContext(char *plugin,
|
|||
{
|
||||
XLogRecPtr flushptr;
|
||||
|
||||
/* start at current insert position*/
|
||||
/* start at current insert position */
|
||||
slot->data.restart_lsn = GetXLogInsertRecPtr();
|
||||
|
||||
/* make sure we have enough information to start */
|
||||
|
@ -307,8 +311,8 @@ CreateInitDecodingContext(char *plugin,
|
|||
LWLockRelease(ProcArrayLock);
|
||||
|
||||
/*
|
||||
* tell the snapshot builder to only assemble snapshot once reaching
|
||||
* the a running_xact's record with the respective xmin.
|
||||
* tell the snapshot builder to only assemble snapshot once reaching the a
|
||||
* running_xact's record with the respective xmin.
|
||||
*/
|
||||
xmin_horizon = slot->data.catalog_xmin;
|
||||
|
||||
|
@ -385,14 +389,14 @@ CreateDecodingContext(XLogRecPtr start_lsn,
|
|||
* pretty common for a client to acknowledge a LSN it doesn't have to
|
||||
* do anything for, and thus didn't store persistently, because the
|
||||
* xlog records didn't result in anything relevant for logical
|
||||
* decoding. Clients have to be able to do that to support
|
||||
* synchronous replication.
|
||||
* decoding. Clients have to be able to do that to support synchronous
|
||||
* replication.
|
||||
*/
|
||||
start_lsn = slot->data.confirmed_flush;
|
||||
elog(DEBUG1, "cannot stream from %X/%X, minimum is %X/%X, forwarding",
|
||||
(uint32)(start_lsn >> 32), (uint32)start_lsn,
|
||||
(uint32)(slot->data.confirmed_flush >> 32),
|
||||
(uint32)slot->data.confirmed_flush);
|
||||
(uint32) (start_lsn >> 32), (uint32) start_lsn,
|
||||
(uint32) (slot->data.confirmed_flush >> 32),
|
||||
(uint32) slot->data.confirmed_flush);
|
||||
}
|
||||
|
||||
ctx = StartupDecodingContext(output_plugin_options,
|
||||
|
@ -409,10 +413,10 @@ CreateDecodingContext(XLogRecPtr start_lsn,
|
|||
(errmsg("starting logical decoding for slot %s",
|
||||
NameStr(slot->data.name)),
|
||||
errdetail("streaming transactions committing after %X/%X, reading WAL from %X/%X",
|
||||
(uint32)(slot->data.confirmed_flush >> 32),
|
||||
(uint32)slot->data.confirmed_flush,
|
||||
(uint32)(slot->data.restart_lsn >> 32),
|
||||
(uint32)slot->data.restart_lsn)));
|
||||
(uint32) (slot->data.confirmed_flush >> 32),
|
||||
(uint32) slot->data.confirmed_flush,
|
||||
(uint32) (slot->data.restart_lsn >> 32),
|
||||
(uint32) slot->data.restart_lsn)));
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
@ -438,8 +442,8 @@ DecodingContextFindStartpoint(LogicalDecodingContext *ctx)
|
|||
startptr = ctx->slot->data.restart_lsn;
|
||||
|
||||
elog(DEBUG1, "searching for logical decoding starting point, starting at %X/%X",
|
||||
(uint32)(ctx->slot->data.restart_lsn >> 32),
|
||||
(uint32)ctx->slot->data.restart_lsn);
|
||||
(uint32) (ctx->slot->data.restart_lsn >> 32),
|
||||
(uint32) ctx->slot->data.restart_lsn);
|
||||
|
||||
/* Wait for a consistent starting point */
|
||||
for (;;)
|
||||
|
@ -543,14 +547,15 @@ static void
|
|||
output_plugin_error_callback(void *arg)
|
||||
{
|
||||
LogicalErrorCallbackState *state = (LogicalErrorCallbackState *) arg;
|
||||
|
||||
/* not all callbacks have an associated LSN */
|
||||
if (state->report_location != InvalidXLogRecPtr)
|
||||
errcontext("slot \"%s\", output plugin \"%s\", in the %s callback, associated LSN %X/%X",
|
||||
NameStr(state->ctx->slot->data.name),
|
||||
NameStr(state->ctx->slot->data.plugin),
|
||||
state->callback_name,
|
||||
(uint32)(state->report_location >> 32),
|
||||
(uint32)state->report_location);
|
||||
(uint32) (state->report_location >> 32),
|
||||
(uint32) state->report_location);
|
||||
else
|
||||
errcontext("slot \"%s\", output plugin \"%s\", in the %s callback",
|
||||
NameStr(state->ctx->slot->data.name),
|
||||
|
@ -690,6 +695,7 @@ change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
|
|||
/* set output state */
|
||||
ctx->accept_writes = true;
|
||||
ctx->write_xid = txn->xid;
|
||||
|
||||
/*
|
||||
* report this change's lsn so replies from clients can give an up2date
|
||||
* answer. This won't ever be enough (and shouldn't be!) to confirm
|
||||
|
@ -725,16 +731,17 @@ LogicalIncreaseXminForSlot(XLogRecPtr current_lsn, TransactionId xmin)
|
|||
SpinLockAcquire(&slot->mutex);
|
||||
|
||||
/*
|
||||
* don't overwrite if we already have a newer xmin. This can
|
||||
* happen if we restart decoding in a slot.
|
||||
* don't overwrite if we already have a newer xmin. This can happen if we
|
||||
* restart decoding in a slot.
|
||||
*/
|
||||
if (TransactionIdPrecedesOrEquals(xmin, slot->data.catalog_xmin))
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* If the client has already confirmed up to this lsn, we directly
|
||||
* can mark this as accepted. This can happen if we restart
|
||||
* decoding in a slot.
|
||||
* If the client has already confirmed up to this lsn, we directly can
|
||||
* mark this as accepted. This can happen if we restart decoding in a
|
||||
* slot.
|
||||
*/
|
||||
else if (current_lsn <= slot->data.confirmed_flush)
|
||||
{
|
||||
|
@ -744,6 +751,7 @@ LogicalIncreaseXminForSlot(XLogRecPtr current_lsn, TransactionId xmin)
|
|||
/* our candidate can directly be used */
|
||||
updated_xmin = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Only increase if the previous values have been applied, otherwise we
|
||||
* might never end up updating if the receiver acks too slowly.
|
||||
|
@ -781,13 +789,14 @@ LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart
|
|||
|
||||
SpinLockAcquire(&slot->mutex);
|
||||
|
||||
/* don't overwrite if have a newer restart lsn*/
|
||||
/* don't overwrite if have a newer restart lsn */
|
||||
if (restart_lsn <= slot->data.restart_lsn)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* We might have already flushed far enough to directly accept this lsn, in
|
||||
* this case there is no need to check for existing candidate LSNs
|
||||
* We might have already flushed far enough to directly accept this lsn,
|
||||
* in this case there is no need to check for existing candidate LSNs
|
||||
*/
|
||||
else if (current_lsn <= slot->data.confirmed_flush)
|
||||
{
|
||||
|
@ -797,6 +806,7 @@ LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart
|
|||
/* our candidate can directly be used */
|
||||
updated_lsn = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Only increase if the previous values have been applied, otherwise we
|
||||
* might never end up updating if the receiver acks too slowly. A missed
|
||||
|
@ -896,6 +906,7 @@ LogicalConfirmReceivedLocation(XLogRecPtr lsn)
|
|||
ReplicationSlotSave();
|
||||
elog(DEBUG1, "updated xmin: %u restart: %u", updated_xmin, updated_restart);
|
||||
}
|
||||
|
||||
/*
|
||||
* Now the new xmin is safely on disk, we can let the global value
|
||||
* advance. We do not take ProcArrayLock or similar since we only
|
||||
|
|
|
@ -42,7 +42,8 @@
|
|||
#include "storage/fd.h"
|
||||
|
||||
/* private date for writing out data */
|
||||
typedef struct DecodingOutputState {
|
||||
typedef struct DecodingOutputState
|
||||
{
|
||||
Tuplestorestate *tupstore;
|
||||
TupleDesc tupdesc;
|
||||
bool binary_output;
|
||||
|
@ -475,6 +476,7 @@ Datum
|
|||
pg_logical_slot_get_changes(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Datum ret = pg_logical_slot_get_changes_guts(fcinfo, true, false);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -485,6 +487,7 @@ Datum
|
|||
pg_logical_slot_peek_changes(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Datum ret = pg_logical_slot_get_changes_guts(fcinfo, false, false);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -495,6 +498,7 @@ Datum
|
|||
pg_logical_slot_get_binary_changes(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Datum ret = pg_logical_slot_get_changes_guts(fcinfo, true, true);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -505,5 +509,6 @@ Datum
|
|||
pg_logical_slot_peek_binary_changes(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Datum ret = pg_logical_slot_get_changes_guts(fcinfo, false, true);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -1047,8 +1047,8 @@ ReorderBufferCleanupTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
|
|||
}
|
||||
|
||||
/*
|
||||
* Cleanup the tuplecids we stored for decoding catalog snapshot
|
||||
* access. They are always stored in the toplevel transaction.
|
||||
* Cleanup the tuplecids we stored for decoding catalog snapshot access.
|
||||
* They are always stored in the toplevel transaction.
|
||||
*/
|
||||
dlist_foreach_modify(iter, &txn->tuplecids)
|
||||
{
|
||||
|
@ -1204,9 +1204,9 @@ ReorderBufferCopySnap(ReorderBuffer *rb, Snapshot orig_snap,
|
|||
snap->subxip[i++] = txn->xid;
|
||||
|
||||
/*
|
||||
* nsubxcnt isn't decreased when subtransactions abort, so count
|
||||
* manually. Since it's an upper boundary it is safe to use it for the
|
||||
* allocation above.
|
||||
* nsubxcnt isn't decreased when subtransactions abort, so count manually.
|
||||
* Since it's an upper boundary it is safe to use it for the allocation
|
||||
* above.
|
||||
*/
|
||||
snap->subxcnt = 1;
|
||||
|
||||
|
@ -1309,8 +1309,8 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
|
|||
|
||||
/*
|
||||
* Decoding needs access to syscaches et al., which in turn use
|
||||
* heavyweight locks and such. Thus we need to have enough state around
|
||||
* to keep track of those. The easiest way is to simply use a
|
||||
* heavyweight locks and such. Thus we need to have enough state
|
||||
* around to keep track of those. The easiest way is to simply use a
|
||||
* transaction internally. That also allows us to easily enforce that
|
||||
* nothing writes to the database by checking for xid assignments.
|
||||
*
|
||||
|
@ -1415,6 +1415,7 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
|
|||
ReorderBufferCopySnap(rb, change->data.snapshot,
|
||||
txn, command_id);
|
||||
}
|
||||
|
||||
/*
|
||||
* Restored from disk, need to be careful not to double
|
||||
* free. We could introduce refcounting for that, but for
|
||||
|
@ -1586,7 +1587,7 @@ ReorderBufferAbortOld(ReorderBuffer *rb, TransactionId oldestRunningXid)
|
|||
*/
|
||||
dlist_foreach_modify(it, &rb->toplevel_by_lsn)
|
||||
{
|
||||
ReorderBufferTXN * txn;
|
||||
ReorderBufferTXN *txn;
|
||||
|
||||
txn = dlist_container(ReorderBufferTXN, node, it.cur);
|
||||
|
||||
|
@ -1998,7 +1999,8 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
|
|||
case REORDER_BUFFER_CHANGE_DELETE:
|
||||
{
|
||||
char *data;
|
||||
ReorderBufferTupleBuf *oldtup, *newtup;
|
||||
ReorderBufferTupleBuf *oldtup,
|
||||
*newtup;
|
||||
Size oldlen = 0;
|
||||
Size newlen = 0;
|
||||
|
||||
|
@ -2007,12 +2009,12 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
|
|||
|
||||
if (oldtup)
|
||||
oldlen = offsetof(ReorderBufferTupleBuf, data)
|
||||
+ oldtup->tuple.t_len
|
||||
+oldtup->tuple.t_len
|
||||
- offsetof(HeapTupleHeaderData, t_bits);
|
||||
|
||||
if (newtup)
|
||||
newlen = offsetof(ReorderBufferTupleBuf, data)
|
||||
+ newtup->tuple.t_len
|
||||
+newtup->tuple.t_len
|
||||
- offsetof(HeapTupleHeaderData, t_bits);
|
||||
|
||||
sz += oldlen;
|
||||
|
@ -2884,8 +2886,8 @@ TransactionIdInArray(TransactionId xid, TransactionId *xip, Size num)
|
|||
static int
|
||||
file_sort_by_lsn(const void *a_p, const void *b_p)
|
||||
{
|
||||
RewriteMappingFile *a = *(RewriteMappingFile **)a_p;
|
||||
RewriteMappingFile *b = *(RewriteMappingFile **)b_p;
|
||||
RewriteMappingFile *a = *(RewriteMappingFile **) a_p;
|
||||
RewriteMappingFile *b = *(RewriteMappingFile **) b_p;
|
||||
|
||||
if (a->lsn < b->lsn)
|
||||
return -1;
|
||||
|
@ -2917,14 +2919,15 @@ UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
|
|||
TransactionId f_mapped_xid;
|
||||
TransactionId f_create_xid;
|
||||
XLogRecPtr f_lsn;
|
||||
uint32 f_hi, f_lo;
|
||||
uint32 f_hi,
|
||||
f_lo;
|
||||
RewriteMappingFile *f;
|
||||
|
||||
if (strcmp(mapping_de->d_name, ".") == 0 ||
|
||||
strcmp(mapping_de->d_name, "..") == 0)
|
||||
continue;
|
||||
|
||||
/* Ignore files that aren't ours*/
|
||||
/* Ignore files that aren't ours */
|
||||
if (strncmp(mapping_de->d_name, "map-", 4) != 0)
|
||||
continue;
|
||||
|
||||
|
@ -2971,9 +2974,10 @@ UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
|
|||
qsort(files_a, list_length(files), sizeof(RewriteMappingFile *),
|
||||
file_sort_by_lsn);
|
||||
|
||||
for(off = 0; off < list_length(files); off++)
|
||||
for (off = 0; off < list_length(files); off++)
|
||||
{
|
||||
RewriteMappingFile *f = files_a[off];
|
||||
|
||||
elog(DEBUG1, "applying mapping: \"%s\" in %u", f->fname,
|
||||
snapshot->subxip[0]);
|
||||
ApplyLogicalMappingFile(tuplecid_data, relid, f->fname);
|
||||
|
|
|
@ -692,10 +692,10 @@ SnapBuildProcessNewCid(SnapBuild *builder, TransactionId xid,
|
|||
CommandId cid;
|
||||
|
||||
/*
|
||||
* we only log new_cid's if a catalog tuple was modified, so mark
|
||||
* the transaction as containing catalog modifications
|
||||
* we only log new_cid's if a catalog tuple was modified, so mark the
|
||||
* transaction as containing catalog modifications
|
||||
*/
|
||||
ReorderBufferXidSetCatalogChanges(builder->reorder, xid,lsn);
|
||||
ReorderBufferXidSetCatalogChanges(builder->reorder, xid, lsn);
|
||||
|
||||
ReorderBufferAddNewTupleCids(builder->reorder, xlrec->top_xid, lsn,
|
||||
xlrec->target.node, xlrec->target.tid,
|
||||
|
@ -901,7 +901,7 @@ SnapBuildEndTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid)
|
|||
*/
|
||||
ereport(LOG,
|
||||
(errmsg("logical decoding found consistent point at %X/%X",
|
||||
(uint32)(lsn >> 32), (uint32)lsn),
|
||||
(uint32) (lsn >> 32), (uint32) lsn),
|
||||
errdetail("xid %u finished, no running transactions anymore",
|
||||
xid)));
|
||||
builder->state = SNAPBUILD_CONSISTENT;
|
||||
|
@ -1170,6 +1170,7 @@ SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn, xl_running_xact
|
|||
*/
|
||||
if (txn != NULL && txn->restart_decoding_lsn != InvalidXLogRecPtr)
|
||||
LogicalIncreaseRestartDecodingForSlot(lsn, txn->restart_decoding_lsn);
|
||||
|
||||
/*
|
||||
* No in-progress transaction, can reuse the last serialized snapshot if
|
||||
* we have one.
|
||||
|
@ -1263,7 +1264,7 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn
|
|||
|
||||
ereport(LOG,
|
||||
(errmsg("logical decoding found consistent point at %X/%X",
|
||||
(uint32)(lsn >> 32), (uint32)lsn),
|
||||
(uint32) (lsn >> 32), (uint32) lsn),
|
||||
errdetail("running xacts with xcnt == 0")));
|
||||
|
||||
return false;
|
||||
|
@ -1274,11 +1275,12 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn
|
|||
/* there won't be any state to cleanup */
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* b) first encounter of a useable xl_running_xacts record. If we had
|
||||
* found one earlier we would either track running transactions
|
||||
* (i.e. builder->running.xcnt != 0) or be consistent (this function
|
||||
* wouldn't get called).
|
||||
* found one earlier we would either track running transactions (i.e.
|
||||
* builder->running.xcnt != 0) or be consistent (this function wouldn't
|
||||
* get called).
|
||||
*/
|
||||
else if (!builder->running.xcnt)
|
||||
{
|
||||
|
@ -1321,7 +1323,7 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn
|
|||
|
||||
ereport(LOG,
|
||||
(errmsg("logical decoding found initial starting point at %X/%X",
|
||||
(uint32)(lsn >> 32), (uint32)lsn),
|
||||
(uint32) (lsn >> 32), (uint32) lsn),
|
||||
errdetail("%u xacts need to finish", (uint32) builder->running.xcnt)));
|
||||
|
||||
/*
|
||||
|
@ -1331,7 +1333,7 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn
|
|||
* isolationtester to notice that we're currently waiting for
|
||||
* something.
|
||||
*/
|
||||
for(off = 0; off < builder->running.xcnt; off++)
|
||||
for (off = 0; off < builder->running.xcnt; off++)
|
||||
{
|
||||
TransactionId xid = builder->running.xip[off];
|
||||
|
||||
|
@ -1471,9 +1473,9 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
|
|||
* but remember location, so we don't need to read old data again.
|
||||
*
|
||||
* To be sure it has been synced to disk after the rename() from the
|
||||
* tempfile filename to the real filename, we just repeat the
|
||||
* fsync. That ought to be cheap because in most scenarios it should
|
||||
* already be safely on disk.
|
||||
* tempfile filename to the real filename, we just repeat the fsync.
|
||||
* That ought to be cheap because in most scenarios it should already
|
||||
* be safely on disk.
|
||||
*/
|
||||
fsync_fname(path, false);
|
||||
fsync_fname("pg_llog/snapshots", true);
|
||||
|
@ -1597,8 +1599,8 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
|
|||
fsync_fname("pg_llog/snapshots", true);
|
||||
|
||||
/*
|
||||
* Now there's no way we can loose the dumped state anymore, remember
|
||||
* this as a serialization point.
|
||||
* Now there's no way we can loose the dumped state anymore, remember this
|
||||
* as a serialization point.
|
||||
*/
|
||||
builder->last_serialized_snapshot = lsn;
|
||||
|
||||
|
@ -1781,7 +1783,7 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
|
|||
|
||||
ereport(LOG,
|
||||
(errmsg("logical decoding found consistent point at %X/%X",
|
||||
(uint32)(lsn >> 32), (uint32)lsn),
|
||||
(uint32) (lsn >> 32), (uint32) lsn),
|
||||
errdetail("found initial snapshot in snapbuild file")));
|
||||
return true;
|
||||
|
||||
|
@ -1846,8 +1848,8 @@ CheckPointSnapBuild(void)
|
|||
/*
|
||||
* temporary filenames from SnapBuildSerialize() include the LSN and
|
||||
* everything but are postfixed by .$pid.tmp. We can just remove them
|
||||
* the same as other files because there can be none that are currently
|
||||
* being written that are older than cutoff.
|
||||
* the same as other files because there can be none that are
|
||||
* currently being written that are older than cutoff.
|
||||
*
|
||||
* We just log a message if a file doesn't fit the pattern, it's
|
||||
* probably some editors lock/state file or similar...
|
||||
|
|
|
@ -81,7 +81,8 @@ ReplicationSlotCtlData *ReplicationSlotCtl = NULL;
|
|||
ReplicationSlot *MyReplicationSlot = NULL;
|
||||
|
||||
/* GUCs */
|
||||
int max_replication_slots = 0; /* the maximum number of replication slots */
|
||||
int max_replication_slots = 0; /* the maximum number of replication
|
||||
* slots */
|
||||
|
||||
static void ReplicationSlotDropAcquired(void);
|
||||
|
||||
|
@ -208,18 +209,18 @@ ReplicationSlotCreate(const char *name, bool db_specific,
|
|||
ReplicationSlotValidateName(name, ERROR);
|
||||
|
||||
/*
|
||||
* If some other backend ran this code currently with us, we'd likely
|
||||
* both allocate the same slot, and that would be bad. We'd also be
|
||||
* at risk of missing a name collision. Also, we don't want to try to
|
||||
* create a new slot while somebody's busy cleaning up an old one, because
|
||||
* we might both be monkeying with the same directory.
|
||||
* If some other backend ran this code currently with us, we'd likely both
|
||||
* allocate the same slot, and that would be bad. We'd also be at risk of
|
||||
* missing a name collision. Also, we don't want to try to create a new
|
||||
* slot while somebody's busy cleaning up an old one, because we might
|
||||
* both be monkeying with the same directory.
|
||||
*/
|
||||
LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE);
|
||||
|
||||
/*
|
||||
* Check for name collision, and identify an allocatable slot. We need
|
||||
* to hold ReplicationSlotControlLock in shared mode for this, so that
|
||||
* nobody else can change the in_use flags while we're looking at them.
|
||||
* Check for name collision, and identify an allocatable slot. We need to
|
||||
* hold ReplicationSlotControlLock in shared mode for this, so that nobody
|
||||
* else can change the in_use flags while we're looking at them.
|
||||
*/
|
||||
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
|
||||
for (i = 0; i < max_replication_slots; i++)
|
||||
|
@ -243,10 +244,10 @@ ReplicationSlotCreate(const char *name, bool db_specific,
|
|||
errhint("Free one or increase max_replication_slots.")));
|
||||
|
||||
/*
|
||||
* Since this slot is not in use, nobody should be looking at any
|
||||
* part of it other than the in_use field unless they're trying to allocate
|
||||
* it. And since we hold ReplicationSlotAllocationLock, nobody except us
|
||||
* can be doing that. So it's safe to initialize the slot.
|
||||
* Since this slot is not in use, nobody should be looking at any part of
|
||||
* it other than the in_use field unless they're trying to allocate it.
|
||||
* And since we hold ReplicationSlotAllocationLock, nobody except us can
|
||||
* be doing that. So it's safe to initialize the slot.
|
||||
*/
|
||||
Assert(!slot->in_use);
|
||||
Assert(!slot->active);
|
||||
|
@ -366,6 +367,7 @@ ReplicationSlotRelease(void)
|
|||
{
|
||||
/* Mark slot inactive. We're not freeing it, just disconnecting. */
|
||||
volatile ReplicationSlot *vslot = slot;
|
||||
|
||||
SpinLockAcquire(&slot->mutex);
|
||||
vslot->active = false;
|
||||
SpinLockRelease(&slot->mutex);
|
||||
|
@ -802,8 +804,8 @@ CheckPointReplicationSlots(void)
|
|||
* Prevent any slot from being created/dropped while we're active. As we
|
||||
* explicitly do *not* want to block iterating over replication_slots or
|
||||
* acquiring a slot we cannot take the control lock - but that's OK,
|
||||
* because holding ReplicationSlotAllocationLock is strictly stronger,
|
||||
* and enough to guarantee that nobody can change the in_use bits on us.
|
||||
* because holding ReplicationSlotAllocationLock is strictly stronger, and
|
||||
* enough to guarantee that nobody can change the in_use bits on us.
|
||||
*/
|
||||
LWLockAcquire(ReplicationSlotAllocationLock, LW_SHARED);
|
||||
|
||||
|
@ -904,11 +906,10 @@ CreateSlotOnDisk(ReplicationSlot *slot)
|
|||
sprintf(tmppath, "pg_replslot/%s.tmp", NameStr(slot->data.name));
|
||||
|
||||
/*
|
||||
* It's just barely possible that some previous effort to create or
|
||||
* drop a slot with this name left a temp directory lying around.
|
||||
* If that seems to be the case, try to remove it. If the rmtree()
|
||||
* fails, we'll error out at the mkdir() below, so we don't bother
|
||||
* checking success.
|
||||
* It's just barely possible that some previous effort to create or drop a
|
||||
* slot with this name left a temp directory lying around. If that seems
|
||||
* to be the case, try to remove it. If the rmtree() fails, we'll error
|
||||
* out at the mkdir() below, so we don't bother checking success.
|
||||
*/
|
||||
if (stat(tmppath, &st) == 0 && S_ISDIR(st.st_mode))
|
||||
rmtree(tmppath, true);
|
||||
|
@ -1003,12 +1004,13 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
|
|||
SpinLockRelease(&slot->mutex);
|
||||
|
||||
COMP_CRC32(cp.checksum,
|
||||
(char *)(&cp) + ReplicationSlotOnDiskConstantSize,
|
||||
(char *) (&cp) + ReplicationSlotOnDiskConstantSize,
|
||||
ReplicationSlotOnDiskDynamicSize);
|
||||
|
||||
if ((write(fd, &cp, sizeof(cp))) != sizeof(cp))
|
||||
{
|
||||
int save_errno = errno;
|
||||
|
||||
CloseTransientFile(fd);
|
||||
errno = save_errno;
|
||||
ereport(elevel,
|
||||
|
@ -1022,6 +1024,7 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
|
|||
if (pg_fsync(fd) != 0)
|
||||
{
|
||||
int save_errno = errno;
|
||||
|
||||
CloseTransientFile(fd);
|
||||
errno = save_errno;
|
||||
ereport(elevel,
|
||||
|
@ -1162,7 +1165,7 @@ RestoreSlotFromDisk(const char *name)
|
|||
|
||||
/* Now that we know the size, read the entire file */
|
||||
readBytes = read(fd,
|
||||
(char *)&cp + ReplicationSlotOnDiskConstantSize,
|
||||
(char *) &cp + ReplicationSlotOnDiskConstantSize,
|
||||
cp.length);
|
||||
if (readBytes != cp.length)
|
||||
{
|
||||
|
@ -1181,7 +1184,7 @@ RestoreSlotFromDisk(const char *name)
|
|||
/* now verify the CRC32 */
|
||||
INIT_CRC32(checksum);
|
||||
COMP_CRC32(checksum,
|
||||
(char *)&cp + ReplicationSlotOnDiskConstantSize,
|
||||
(char *) &cp + ReplicationSlotOnDiskConstantSize,
|
||||
ReplicationSlotOnDiskDynamicSize);
|
||||
|
||||
if (!EQ_CRC32(checksum, cp.checksum))
|
||||
|
|
|
@ -53,7 +53,7 @@ pg_create_physical_replication_slot(PG_FUNCTION_ARGS)
|
|||
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
|
||||
elog(ERROR, "return type must be a row type");
|
||||
|
||||
/* acquire replication slot, this will check for conflicting names*/
|
||||
/* acquire replication slot, this will check for conflicting names */
|
||||
ReplicationSlotCreate(NameStr(*name), false, RS_PERSISTENT);
|
||||
|
||||
values[0] = NameGetDatum(&MyReplicationSlot->data.name);
|
||||
|
@ -97,8 +97,7 @@ pg_create_logical_replication_slot(PG_FUNCTION_ARGS)
|
|||
Assert(!MyReplicationSlot);
|
||||
|
||||
/*
|
||||
* Acquire a logical decoding slot, this will check for conflicting
|
||||
* names.
|
||||
* Acquire a logical decoding slot, this will check for conflicting names.
|
||||
*/
|
||||
ReplicationSlotCreate(NameStr(*name), true, RS_EPHEMERAL);
|
||||
|
||||
|
|
|
@ -117,8 +117,8 @@ SyncRepWaitForLSN(XLogRecPtr XactCommitLSN)
|
|||
* set. See SyncRepUpdateSyncStandbysDefined.
|
||||
*
|
||||
* Also check that the standby hasn't already replied. Unlikely race
|
||||
* condition but we'll be fetching that cache line anyway so it's likely to
|
||||
* be a low cost check.
|
||||
* condition but we'll be fetching that cache line anyway so it's likely
|
||||
* to be a low cost check.
|
||||
*/
|
||||
if (!WalSndCtl->sync_standbys_defined ||
|
||||
XactCommitLSN <= WalSndCtl->lsn[mode])
|
||||
|
|
|
@ -188,7 +188,7 @@ static void WalSndXLogSendHandler(SIGNAL_ARGS);
|
|||
static void WalSndLastCycleHandler(SIGNAL_ARGS);
|
||||
|
||||
/* Prototypes for private functions */
|
||||
typedef void (*WalSndSendDataCallback)(void);
|
||||
typedef void (*WalSndSendDataCallback) (void);
|
||||
static void WalSndLoop(WalSndSendDataCallback send_data);
|
||||
static void InitWalSenderSlot(void);
|
||||
static void WalSndKill(int code, Datum arg);
|
||||
|
@ -301,8 +301,8 @@ IdentifySystem(void)
|
|||
|
||||
/*
|
||||
* Reply with a result set with one row, four columns. First col is system
|
||||
* ID, second is timeline ID, third is current xlog location and the fourth
|
||||
* contains the database name if we are connected to one.
|
||||
* ID, second is timeline ID, third is current xlog location and the
|
||||
* fourth contains the database name if we are connected to one.
|
||||
*/
|
||||
|
||||
snprintf(sysid, sizeof(sysid), UINT64_FORMAT,
|
||||
|
@ -731,8 +731,8 @@ StartReplication(StartReplicationCmd *cmd)
|
|||
* set everytime WAL is flushed.
|
||||
*/
|
||||
static int
|
||||
logical_read_xlog_page(XLogReaderState* state, XLogRecPtr targetPagePtr, int reqLen,
|
||||
XLogRecPtr targetRecPtr, char* cur_page, TimeLineID *pageTLI)
|
||||
logical_read_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen,
|
||||
XLogRecPtr targetRecPtr, char *cur_page, TimeLineID *pageTLI)
|
||||
{
|
||||
XLogRecPtr flushptr;
|
||||
int count;
|
||||
|
@ -1013,6 +1013,7 @@ WalSndPrepareWrite(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xi
|
|||
pq_sendbyte(ctx->out, 'w');
|
||||
pq_sendint64(ctx->out, lsn); /* dataStart */
|
||||
pq_sendint64(ctx->out, lsn); /* walEnd */
|
||||
|
||||
/*
|
||||
* Fill out the sendtime later, just as it's done in XLogSendPhysical, but
|
||||
* reserve space here.
|
||||
|
@ -1035,9 +1036,9 @@ WalSndWriteData(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
|
|||
pq_putmessage_noblock('d', ctx->out->data, ctx->out->len);
|
||||
|
||||
/*
|
||||
* Fill the send timestamp last, so that it is taken as late as
|
||||
* possible. This is somewhat ugly, but the protocol's set as it's already
|
||||
* used for several releases by streaming physical replication.
|
||||
* Fill the send timestamp last, so that it is taken as late as possible.
|
||||
* This is somewhat ugly, but the protocol's set as it's already used for
|
||||
* several releases by streaming physical replication.
|
||||
*/
|
||||
resetStringInfo(&tmpbuf);
|
||||
pq_sendint64(&tmpbuf, GetCurrentIntegerTimestamp());
|
||||
|
@ -1297,6 +1298,7 @@ exec_replication_command(const char *cmd_string)
|
|||
case T_StartReplicationCmd:
|
||||
{
|
||||
StartReplicationCmd *cmd = (StartReplicationCmd *) cmd_node;
|
||||
|
||||
if (cmd->kind == REPLICATION_KIND_PHYSICAL)
|
||||
StartReplication(cmd);
|
||||
else
|
||||
|
@ -1473,6 +1475,7 @@ static void
|
|||
PhysicalConfirmReceivedLocation(XLogRecPtr lsn)
|
||||
{
|
||||
bool changed = false;
|
||||
|
||||
/* use volatile pointer to prevent code rearrangement */
|
||||
volatile ReplicationSlot *slot = MyReplicationSlot;
|
||||
|
||||
|
@ -1492,9 +1495,9 @@ PhysicalConfirmReceivedLocation(XLogRecPtr lsn)
|
|||
}
|
||||
|
||||
/*
|
||||
* One could argue that the slot should be saved to disk now, but that'd be
|
||||
* energy wasted - the worst lost information can do here is give us wrong
|
||||
* information in a statistics view - we'll just potentially be more
|
||||
* One could argue that the slot should be saved to disk now, but that'd
|
||||
* be energy wasted - the worst lost information can do here is give us
|
||||
* wrong information in a statistics view - we'll just potentially be more
|
||||
* conservative in removing files.
|
||||
*/
|
||||
}
|
||||
|
@ -1566,10 +1569,11 @@ PhysicalReplicationSlotNewXmin(TransactionId feedbackXmin)
|
|||
|
||||
SpinLockAcquire(&slot->mutex);
|
||||
MyPgXact->xmin = InvalidTransactionId;
|
||||
|
||||
/*
|
||||
* For physical replication we don't need the interlock provided
|
||||
* by xmin and effective_xmin since the consequences of a missed increase
|
||||
* are limited to query cancellations, so set both at once.
|
||||
* For physical replication we don't need the interlock provided by xmin
|
||||
* and effective_xmin since the consequences of a missed increase are
|
||||
* limited to query cancellations, so set both at once.
|
||||
*/
|
||||
if (!TransactionIdIsNormal(slot->data.xmin) ||
|
||||
!TransactionIdIsNormal(feedbackXmin) ||
|
||||
|
@ -1667,7 +1671,7 @@ ProcessStandbyHSFeedbackMessage(void)
|
|||
*
|
||||
* If we're using a replication slot we reserve the xmin via that,
|
||||
* otherwise via the walsender's PGXACT entry.
|
||||
|
||||
*
|
||||
* XXX: It might make sense to introduce ephemeral slots and always use
|
||||
* the slot mechanism.
|
||||
*/
|
||||
|
@ -1703,9 +1707,9 @@ WalSndComputeSleeptime(TimestampTz now)
|
|||
wal_sender_timeout);
|
||||
|
||||
/*
|
||||
* If no ping has been sent yet, wakeup when it's time to do
|
||||
* so. WalSndKeepaliveIfNecessary() wants to send a keepalive once
|
||||
* half of the timeout passed without a response.
|
||||
* If no ping has been sent yet, wakeup when it's time to do so.
|
||||
* WalSndKeepaliveIfNecessary() wants to send a keepalive once half of
|
||||
* the timeout passed without a response.
|
||||
*/
|
||||
if (!waiting_for_ping_response)
|
||||
wakeup_time = TimestampTzPlusMilliseconds(last_reply_timestamp,
|
||||
|
@ -1738,8 +1742,8 @@ WalSndCheckTimeOut(TimestampTz now)
|
|||
{
|
||||
/*
|
||||
* Since typically expiration of replication timeout means
|
||||
* communication problem, we don't send the error message to
|
||||
* the standby.
|
||||
* communication problem, we don't send the error message to the
|
||||
* standby.
|
||||
*/
|
||||
ereport(COMMERROR,
|
||||
(errmsg("terminating walsender process due to replication timeout")));
|
||||
|
@ -1839,10 +1843,10 @@ WalSndLoop(WalSndSendDataCallback send_data)
|
|||
|
||||
/*
|
||||
* When SIGUSR2 arrives, we send any outstanding logs up to the
|
||||
* shutdown checkpoint record (i.e., the latest record), wait
|
||||
* for them to be replicated to the standby, and exit.
|
||||
* This may be a normal termination at shutdown, or a promotion,
|
||||
* the walsender is not sure which.
|
||||
* shutdown checkpoint record (i.e., the latest record), wait for
|
||||
* them to be replicated to the standby, and exit. This may be a
|
||||
* normal termination at shutdown, or a promotion, the walsender
|
||||
* is not sure which.
|
||||
*/
|
||||
if (walsender_ready_to_stop)
|
||||
WalSndDone(send_data);
|
||||
|
@ -2416,8 +2420,8 @@ XLogSendLogical(void)
|
|||
else
|
||||
{
|
||||
/*
|
||||
* If the record we just wanted read is at or beyond the flushed point,
|
||||
* then we're caught up.
|
||||
* If the record we just wanted read is at or beyond the flushed
|
||||
* point, then we're caught up.
|
||||
*/
|
||||
if (logical_decoding_ctx->reader->EndRecPtr >= GetFlushRecPtr())
|
||||
WalSndCaughtUp = true;
|
||||
|
@ -2452,10 +2456,10 @@ WalSndDone(WalSndSendDataCallback send_data)
|
|||
send_data();
|
||||
|
||||
/*
|
||||
* Check a write location to see whether all the WAL have
|
||||
* successfully been replicated if this walsender is connecting
|
||||
* to a standby such as pg_receivexlog which always returns
|
||||
* an invalid flush location. Otherwise, check a flush location.
|
||||
* Check a write location to see whether all the WAL have successfully
|
||||
* been replicated if this walsender is connecting to a standby such as
|
||||
* pg_receivexlog which always returns an invalid flush location.
|
||||
* Otherwise, check a flush location.
|
||||
*/
|
||||
replicatedPtr = XLogRecPtrIsInvalid(MyWalSnd->flush) ?
|
||||
MyWalSnd->write : MyWalSnd->flush;
|
||||
|
@ -2562,8 +2566,8 @@ WalSndLastCycleHandler(SIGNAL_ARGS)
|
|||
/*
|
||||
* If replication has not yet started, die like with SIGTERM. If
|
||||
* replication is active, only set a flag and wake up the main loop. It
|
||||
* will send any outstanding WAL, wait for it to be replicated to
|
||||
* the standby, and then exit gracefully.
|
||||
* will send any outstanding WAL, wait for it to be replicated to the
|
||||
* standby, and then exit gracefully.
|
||||
*/
|
||||
if (!replication_active)
|
||||
kill(MyProcPid, SIGTERM);
|
||||
|
|
|
@ -2174,8 +2174,8 @@ view_cols_are_auto_updatable(Query *viewquery,
|
|||
ListCell *cell;
|
||||
|
||||
/*
|
||||
* The caller should have verified that this view is auto-updatable and
|
||||
* so there should be a single base relation.
|
||||
* The caller should have verified that this view is auto-updatable and so
|
||||
* there should be a single base relation.
|
||||
*/
|
||||
Assert(list_length(viewquery->jointree->fromlist) == 1);
|
||||
rtr = (RangeTblRef *) linitial(viewquery->jointree->fromlist);
|
||||
|
@ -2354,9 +2354,9 @@ relation_is_updatable(Oid reloid,
|
|||
|
||||
/*
|
||||
* Determine which of the view's columns are updatable. If there
|
||||
* are none within the set of columns we are looking at, then
|
||||
* the view doesn't support INSERT/UPDATE, but it may still
|
||||
* support DELETE.
|
||||
* are none within the set of columns we are looking at, then the
|
||||
* view doesn't support INSERT/UPDATE, but it may still support
|
||||
* DELETE.
|
||||
*/
|
||||
view_cols_are_auto_updatable(viewquery, NULL,
|
||||
&updatable_cols, NULL);
|
||||
|
@ -2703,8 +2703,8 @@ rewriteTargetView(Query *parsetree, Relation view)
|
|||
|
||||
/*
|
||||
* Move any security barrier quals from the view RTE onto the new target
|
||||
* RTE. Any such quals should now apply to the new target RTE and will not
|
||||
* reference the original view RTE in the rewritten query.
|
||||
* RTE. Any such quals should now apply to the new target RTE and will
|
||||
* not reference the original view RTE in the rewritten query.
|
||||
*/
|
||||
new_rte->securityQuals = view_rte->securityQuals;
|
||||
view_rte->securityQuals = NIL;
|
||||
|
@ -2790,8 +2790,8 @@ rewriteTargetView(Query *parsetree, Relation view)
|
|||
* we did with the view targetlist).
|
||||
*
|
||||
* Note that there is special-case handling for the quals of a security
|
||||
* barrier view, since they need to be kept separate from any user-supplied
|
||||
* quals, so these quals are kept on the new target RTE.
|
||||
* barrier view, since they need to be kept separate from any
|
||||
* user-supplied quals, so these quals are kept on the new target RTE.
|
||||
*
|
||||
* For INSERT, the view's quals can be ignored in the main query.
|
||||
*/
|
||||
|
@ -2836,8 +2836,9 @@ rewriteTargetView(Query *parsetree, Relation view)
|
|||
* If the parent view has a cascaded check option, treat this view as
|
||||
* if it also had a cascaded check option.
|
||||
*
|
||||
* New WithCheckOptions are added to the start of the list, so if there
|
||||
* is a cascaded check option, it will be the first item in the list.
|
||||
* New WithCheckOptions are added to the start of the list, so if
|
||||
* there is a cascaded check option, it will be the first item in the
|
||||
* list.
|
||||
*/
|
||||
if (parsetree->withCheckOptions != NIL)
|
||||
{
|
||||
|
|
|
@ -170,10 +170,10 @@ dsm_postmaster_startup(PGShmemHeader *shim)
|
|||
segsize = dsm_control_bytes_needed(maxitems);
|
||||
|
||||
/*
|
||||
* Loop until we find an unused identifier for the new control segment.
|
||||
* We sometimes use 0 as a sentinel value indicating that no control
|
||||
* segment is known to exist, so avoid using that value for a real
|
||||
* control segment.
|
||||
* Loop until we find an unused identifier for the new control segment. We
|
||||
* sometimes use 0 as a sentinel value indicating that no control segment
|
||||
* is known to exist, so avoid using that value for a real control
|
||||
* segment.
|
||||
*/
|
||||
for (;;)
|
||||
{
|
||||
|
@ -224,17 +224,17 @@ dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
|
|||
|
||||
/*
|
||||
* Try to attach the segment. If this fails, it probably just means that
|
||||
* the operating system has been rebooted and the segment no longer exists,
|
||||
* or an unrelated proces has used the same shm ID. So just fall out
|
||||
* quietly.
|
||||
* the operating system has been rebooted and the segment no longer
|
||||
* exists, or an unrelated proces has used the same shm ID. So just fall
|
||||
* out quietly.
|
||||
*/
|
||||
if (!dsm_impl_op(DSM_OP_ATTACH, old_control_handle, 0, &impl_private,
|
||||
&mapped_address, &mapped_size, DEBUG1))
|
||||
return;
|
||||
|
||||
/*
|
||||
* We've managed to reattach it, but the contents might not be sane.
|
||||
* If they aren't, we disregard the segment after all.
|
||||
* We've managed to reattach it, but the contents might not be sane. If
|
||||
* they aren't, we disregard the segment after all.
|
||||
*/
|
||||
old_control = (dsm_control_header *) mapped_address;
|
||||
if (!dsm_control_segment_sane(old_control, mapped_size))
|
||||
|
@ -245,8 +245,8 @@ dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
|
|||
}
|
||||
|
||||
/*
|
||||
* OK, the control segment looks basically valid, so we can get use
|
||||
* it to get a list of segments that need to be removed.
|
||||
* OK, the control segment looks basically valid, so we can get use it to
|
||||
* get a list of segments that need to be removed.
|
||||
*/
|
||||
nitems = old_control->nitems;
|
||||
for (i = 0; i < nitems; ++i)
|
||||
|
@ -307,6 +307,7 @@ dsm_cleanup_for_mmap(void)
|
|||
strlen(PG_DYNSHMEM_MMAP_FILE_PREFIX)) == 0)
|
||||
{
|
||||
char buf[MAXPGPATH];
|
||||
|
||||
snprintf(buf, MAXPGPATH, PG_DYNSHMEM_DIR "/%s", dent->d_name);
|
||||
|
||||
elog(DEBUG2, "removing file \"%s\"", buf);
|
||||
|
@ -352,8 +353,8 @@ dsm_postmaster_shutdown(int code, Datum arg)
|
|||
* If some other backend exited uncleanly, it might have corrupted the
|
||||
* control segment while it was dying. In that case, we warn and ignore
|
||||
* the contents of the control segment. This may end up leaving behind
|
||||
* stray shared memory segments, but there's not much we can do about
|
||||
* that if the metadata is gone.
|
||||
* stray shared memory segments, but there's not much we can do about that
|
||||
* if the metadata is gone.
|
||||
*/
|
||||
nitems = dsm_control->nitems;
|
||||
if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size))
|
||||
|
@ -537,13 +538,13 @@ dsm_attach(dsm_handle h)
|
|||
|
||||
/*
|
||||
* Since this is just a debugging cross-check, we could leave it out
|
||||
* altogether, or include it only in assert-enabled builds. But since
|
||||
* the list of attached segments should normally be very short, let's
|
||||
* include it always for right now.
|
||||
* altogether, or include it only in assert-enabled builds. But since the
|
||||
* list of attached segments should normally be very short, let's include
|
||||
* it always for right now.
|
||||
*
|
||||
* If you're hitting this error, you probably want to attempt to
|
||||
* find an existing mapping via dsm_find_mapping() before calling
|
||||
* dsm_attach() to create a new one.
|
||||
* If you're hitting this error, you probably want to attempt to find an
|
||||
* existing mapping via dsm_find_mapping() before calling dsm_attach() to
|
||||
* create a new one.
|
||||
*/
|
||||
dlist_foreach(iter, &dsm_segment_list)
|
||||
{
|
||||
|
@ -584,10 +585,10 @@ dsm_attach(dsm_handle h)
|
|||
LWLockRelease(DynamicSharedMemoryControlLock);
|
||||
|
||||
/*
|
||||
* If we didn't find the handle we're looking for in the control
|
||||
* segment, it probably means that everyone else who had it mapped,
|
||||
* including the original creator, died before we got to this point.
|
||||
* It's up to the caller to decide what to do about that.
|
||||
* If we didn't find the handle we're looking for in the control segment,
|
||||
* it probably means that everyone else who had it mapped, including the
|
||||
* original creator, died before we got to this point. It's up to the
|
||||
* caller to decide what to do about that.
|
||||
*/
|
||||
if (seg->control_slot == INVALID_CONTROL_SLOT)
|
||||
{
|
||||
|
@ -710,13 +711,12 @@ dsm_detach(dsm_segment *seg)
|
|||
}
|
||||
|
||||
/*
|
||||
* Try to remove the mapping, if one exists. Normally, there will be,
|
||||
* but maybe not, if we failed partway through a create or attach
|
||||
* operation. We remove the mapping before decrementing the reference
|
||||
* count so that the process that sees a zero reference count can be
|
||||
* certain that no remaining mappings exist. Even if this fails, we
|
||||
* pretend that it works, because retrying is likely to fail in the
|
||||
* same way.
|
||||
* Try to remove the mapping, if one exists. Normally, there will be, but
|
||||
* maybe not, if we failed partway through a create or attach operation.
|
||||
* We remove the mapping before decrementing the reference count so that
|
||||
* the process that sees a zero reference count can be certain that no
|
||||
* remaining mappings exist. Even if this fails, we pretend that it
|
||||
* works, because retrying is likely to fail in the same way.
|
||||
*/
|
||||
if (seg->mapped_address != NULL)
|
||||
{
|
||||
|
@ -744,15 +744,15 @@ dsm_detach(dsm_segment *seg)
|
|||
if (refcnt == 1)
|
||||
{
|
||||
/*
|
||||
* If we fail to destroy the segment here, or are killed before
|
||||
* we finish doing so, the reference count will remain at 1, which
|
||||
* If we fail to destroy the segment here, or are killed before we
|
||||
* finish doing so, the reference count will remain at 1, which
|
||||
* will mean that nobody else can attach to the segment. At
|
||||
* postmaster shutdown time, or when a new postmaster is started
|
||||
* after a hard kill, another attempt will be made to remove the
|
||||
* segment.
|
||||
*
|
||||
* The main case we're worried about here is being killed by
|
||||
* a signal before we can finish removing the segment. In that
|
||||
* The main case we're worried about here is being killed by a
|
||||
* signal before we can finish removing the segment. In that
|
||||
* case, it's important to be sure that the segment still gets
|
||||
* removed. If we actually fail to remove the segment for some
|
||||
* other reason, the postmaster may not have any better luck than
|
||||
|
@ -1005,5 +1005,5 @@ static uint64
|
|||
dsm_control_bytes_needed(uint32 nitems)
|
||||
{
|
||||
return offsetof(dsm_control_header, item)
|
||||
+ sizeof(dsm_control_item) * (uint64) nitems;
|
||||
+sizeof(dsm_control_item) * (uint64) nitems;
|
||||
}
|
||||
|
|
|
@ -93,18 +93,18 @@ static int errcode_for_dynamic_shared_memory(void);
|
|||
|
||||
const struct config_enum_entry dynamic_shared_memory_options[] = {
|
||||
#ifdef USE_DSM_POSIX
|
||||
{ "posix", DSM_IMPL_POSIX, false},
|
||||
{"posix", DSM_IMPL_POSIX, false},
|
||||
#endif
|
||||
#ifdef USE_DSM_SYSV
|
||||
{ "sysv", DSM_IMPL_SYSV, false},
|
||||
{"sysv", DSM_IMPL_SYSV, false},
|
||||
#endif
|
||||
#ifdef USE_DSM_WINDOWS
|
||||
{ "windows", DSM_IMPL_WINDOWS, false},
|
||||
{"windows", DSM_IMPL_WINDOWS, false},
|
||||
#endif
|
||||
#ifdef USE_DSM_MMAP
|
||||
{ "mmap", DSM_IMPL_MMAP, false},
|
||||
{"mmap", DSM_IMPL_MMAP, false},
|
||||
#endif
|
||||
{ "none", DSM_IMPL_NONE, false},
|
||||
{"none", DSM_IMPL_NONE, false},
|
||||
{NULL, 0, false}
|
||||
};
|
||||
|
||||
|
@ -367,8 +367,8 @@ dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
|
|||
}
|
||||
|
||||
/* Map it. */
|
||||
address = mmap(NULL, request_size, PROT_READ|PROT_WRITE,
|
||||
MAP_SHARED|MAP_HASSEMAPHORE, fd, 0);
|
||||
address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_HASSEMAPHORE, fd, 0);
|
||||
if (address == MAP_FAILED)
|
||||
{
|
||||
int save_errno;
|
||||
|
@ -427,27 +427,27 @@ dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
|
|||
return true;
|
||||
|
||||
/*
|
||||
* POSIX shared memory and mmap-based shared memory identify segments
|
||||
* with names. To avoid needless error message variation, we use the
|
||||
* handle as the name.
|
||||
* POSIX shared memory and mmap-based shared memory identify segments with
|
||||
* names. To avoid needless error message variation, we use the handle as
|
||||
* the name.
|
||||
*/
|
||||
snprintf(name, 64, "%u", handle);
|
||||
|
||||
/*
|
||||
* The System V shared memory namespace is very restricted; names are
|
||||
* of type key_t, which is expected to be some sort of integer data type,
|
||||
* but not necessarily the same one as dsm_handle. Since we use
|
||||
* dsm_handle to identify shared memory segments across processes, this
|
||||
* might seem like a problem, but it's really not. If dsm_handle is
|
||||
* bigger than key_t, the cast below might truncate away some bits from
|
||||
* the handle the user-provided, but it'll truncate exactly the same bits
|
||||
* away in exactly the same fashion every time we use that handle, which
|
||||
* is all that really matters. Conversely, if dsm_handle is smaller than
|
||||
* key_t, we won't use the full range of available key space, but that's
|
||||
* no big deal either.
|
||||
* The System V shared memory namespace is very restricted; names are of
|
||||
* type key_t, which is expected to be some sort of integer data type, but
|
||||
* not necessarily the same one as dsm_handle. Since we use dsm_handle to
|
||||
* identify shared memory segments across processes, this might seem like
|
||||
* a problem, but it's really not. If dsm_handle is bigger than key_t,
|
||||
* the cast below might truncate away some bits from the handle the
|
||||
* user-provided, but it'll truncate exactly the same bits away in exactly
|
||||
* the same fashion every time we use that handle, which is all that
|
||||
* really matters. Conversely, if dsm_handle is smaller than key_t, we
|
||||
* won't use the full range of available key space, but that's no big deal
|
||||
* either.
|
||||
*
|
||||
* We do make sure that the key isn't negative, because that might not
|
||||
* be portable.
|
||||
* We do make sure that the key isn't negative, because that might not be
|
||||
* portable.
|
||||
*/
|
||||
key = (key_t) handle;
|
||||
if (key < 1) /* avoid compiler warning if type is unsigned */
|
||||
|
@ -455,10 +455,10 @@ dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
|
|||
|
||||
/*
|
||||
* There's one special key, IPC_PRIVATE, which can't be used. If we end
|
||||
* up with that value by chance during a create operation, just pretend
|
||||
* it already exists, so that caller will retry. If we run into it
|
||||
* anywhere else, the caller has passed a handle that doesn't correspond
|
||||
* to anything we ever created, which should not happen.
|
||||
* up with that value by chance during a create operation, just pretend it
|
||||
* already exists, so that caller will retry. If we run into it anywhere
|
||||
* else, the caller has passed a handle that doesn't correspond to
|
||||
* anything we ever created, which should not happen.
|
||||
*/
|
||||
if (key == IPC_PRIVATE)
|
||||
{
|
||||
|
@ -469,9 +469,9 @@ dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
|
|||
}
|
||||
|
||||
/*
|
||||
* Before we can do anything with a shared memory segment, we have to
|
||||
* map the shared memory key to a shared memory identifier using shmget().
|
||||
* To avoid repeated lookups, we store the key using impl_private.
|
||||
* Before we can do anything with a shared memory segment, we have to map
|
||||
* the shared memory key to a shared memory identifier using shmget(). To
|
||||
* avoid repeated lookups, we store the key using impl_private.
|
||||
*/
|
||||
if (*impl_private != NULL)
|
||||
{
|
||||
|
@ -507,6 +507,7 @@ dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
|
|||
if (errno != EEXIST)
|
||||
{
|
||||
int save_errno = errno;
|
||||
|
||||
pfree(ident_cache);
|
||||
errno = save_errno;
|
||||
ereport(elevel,
|
||||
|
@ -631,12 +632,12 @@ dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
|
|||
return true;
|
||||
|
||||
/*
|
||||
* Storing the shared memory segment in the Global\ namespace, can
|
||||
* allow any process running in any session to access that file
|
||||
* mapping object provided that the caller has the required access rights.
|
||||
* But to avoid issues faced in main shared memory, we are using the naming
|
||||
* convention similar to main shared memory. We can change here once
|
||||
* issue mentioned in GetSharedMemName is resolved.
|
||||
* Storing the shared memory segment in the Global\ namespace, can allow
|
||||
* any process running in any session to access that file mapping object
|
||||
* provided that the caller has the required access rights. But to avoid
|
||||
* issues faced in main shared memory, we are using the naming convention
|
||||
* similar to main shared memory. We can change here once issue mentioned
|
||||
* in GetSharedMemName is resolved.
|
||||
*/
|
||||
snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
|
||||
|
||||
|
@ -752,9 +753,9 @@ dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
|
|||
}
|
||||
|
||||
/*
|
||||
* VirtualQuery gives size in page_size units, which is 4K for Windows.
|
||||
* We need size only when we are attaching, but it's better to get the
|
||||
* size when creating new segment to keep size consistent both for
|
||||
* VirtualQuery gives size in page_size units, which is 4K for Windows. We
|
||||
* need size only when we are attaching, but it's better to get the size
|
||||
* when creating new segment to keep size consistent both for
|
||||
* DSM_OP_CREATE and DSM_OP_ATTACH.
|
||||
*/
|
||||
if (VirtualQuery(address, &info, sizeof(info)) == 0)
|
||||
|
@ -891,19 +892,19 @@ dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
|
|||
/*
|
||||
* Allocate a buffer full of zeros.
|
||||
*
|
||||
* Note: palloc zbuffer, instead of just using a local char array,
|
||||
* to ensure it is reasonably well-aligned; this may save a few
|
||||
* cycles transferring data to the kernel.
|
||||
* Note: palloc zbuffer, instead of just using a local char array, to
|
||||
* ensure it is reasonably well-aligned; this may save a few cycles
|
||||
* transferring data to the kernel.
|
||||
*/
|
||||
char *zbuffer = (char *) palloc0(ZBUFFER_SIZE);
|
||||
uint32 remaining = request_size;
|
||||
bool success = true;
|
||||
|
||||
/*
|
||||
* Zero-fill the file. We have to do this the hard way to ensure
|
||||
* that all the file space has really been allocated, so that we
|
||||
* don't later seg fault when accessing the memory mapping. This
|
||||
* is pretty pessimal.
|
||||
* Zero-fill the file. We have to do this the hard way to ensure that
|
||||
* all the file space has really been allocated, so that we don't
|
||||
* later seg fault when accessing the memory mapping. This is pretty
|
||||
* pessimal.
|
||||
*/
|
||||
while (success && remaining > 0)
|
||||
{
|
||||
|
@ -966,8 +967,8 @@ dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
|
|||
}
|
||||
|
||||
/* Map it. */
|
||||
address = mmap(NULL, request_size, PROT_READ|PROT_WRITE,
|
||||
MAP_SHARED|MAP_HASSEMAPHORE, fd, 0);
|
||||
address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_HASSEMAPHORE, fd, 0);
|
||||
if (address == MAP_FAILED)
|
||||
{
|
||||
int save_errno;
|
||||
|
|
|
@ -219,10 +219,10 @@ shmem_exit(int code)
|
|||
/*
|
||||
* Call before_shmem_exit callbacks.
|
||||
*
|
||||
* These should be things that need most of the system to still be
|
||||
* up and working, such as cleanup of temp relations, which requires
|
||||
* catalog access; or things that need to be completed because later
|
||||
* cleanup steps depend on them, such as releasing lwlocks.
|
||||
* These should be things that need most of the system to still be up and
|
||||
* working, such as cleanup of temp relations, which requires catalog
|
||||
* access; or things that need to be completed because later cleanup steps
|
||||
* depend on them, such as releasing lwlocks.
|
||||
*/
|
||||
elog(DEBUG3, "shmem_exit(%d): %d before_shmem_exit callbacks to make",
|
||||
code, before_shmem_exit_index);
|
||||
|
@ -241,9 +241,9 @@ shmem_exit(int code)
|
|||
* callback before invoking it, so that we don't get stuck in an infinite
|
||||
* loop if one of those callbacks itself throws an ERROR or FATAL.
|
||||
*
|
||||
* Note that explicitly calling this function here is quite different
|
||||
* from registering it as an on_shmem_exit callback for precisely this
|
||||
* reason: if one dynamic shared memory callback errors out, the remaining
|
||||
* Note that explicitly calling this function here is quite different from
|
||||
* registering it as an on_shmem_exit callback for precisely this reason:
|
||||
* if one dynamic shared memory callback errors out, the remaining
|
||||
* callbacks will still be invoked. Thus, hard-coding this call puts it
|
||||
* equal footing with callbacks for the main shared memory segment.
|
||||
*/
|
||||
|
|
|
@ -142,7 +142,7 @@ static shm_mq_result shm_mq_send_bytes(shm_mq_handle *mq, Size nbytes,
|
|||
void *data, bool nowait, Size *bytes_written);
|
||||
static shm_mq_result shm_mq_receive_bytes(shm_mq *mq, Size bytes_needed,
|
||||
bool nowait, Size *nbytesp, void **datap);
|
||||
static bool shm_mq_wait_internal(volatile shm_mq *mq, PGPROC * volatile *ptr,
|
||||
static bool shm_mq_wait_internal(volatile shm_mq *mq, PGPROC *volatile * ptr,
|
||||
BackgroundWorkerHandle *handle);
|
||||
static uint64 shm_mq_get_bytes_read(volatile shm_mq *mq, bool *detached);
|
||||
static void shm_mq_inc_bytes_read(volatile shm_mq *mq, Size n);
|
||||
|
@ -153,7 +153,7 @@ static void shm_mq_detach_callback(dsm_segment *seg, Datum arg);
|
|||
|
||||
/* Minimum queue size is enough for header and at least one chunk of data. */
|
||||
const Size shm_mq_minimum_size =
|
||||
MAXALIGN(offsetof(shm_mq, mq_ring)) + MAXIMUM_ALIGNOF;
|
||||
MAXALIGN(offsetof(shm_mq, mq_ring)) + MAXIMUM_ALIGNOF;
|
||||
|
||||
#define MQH_INITIAL_BUFSIZE 8192
|
||||
|
||||
|
@ -328,7 +328,7 @@ shm_mq_send(shm_mq_handle *mqh, Size nbytes, void *data, bool nowait)
|
|||
{
|
||||
Assert(mqh->mqh_partial_bytes < sizeof(Size));
|
||||
res = shm_mq_send_bytes(mqh, sizeof(Size) - mqh->mqh_partial_bytes,
|
||||
((char *) &nbytes) + mqh->mqh_partial_bytes,
|
||||
((char *) &nbytes) +mqh->mqh_partial_bytes,
|
||||
nowait, &bytes_written);
|
||||
mqh->mqh_partial_bytes += bytes_written;
|
||||
if (res != SHM_MQ_SUCCESS)
|
||||
|
@ -441,16 +441,17 @@ shm_mq_receive(shm_mq_handle *mqh, Size *nbytesp, void **datap, bool nowait)
|
|||
{
|
||||
Size needed;
|
||||
|
||||
nbytes = * (Size *) rawdata;
|
||||
nbytes = *(Size *) rawdata;
|
||||
|
||||
/* If we've already got the whole message, we're done. */
|
||||
needed = MAXALIGN(sizeof(Size)) + MAXALIGN(nbytes);
|
||||
if (rb >= needed)
|
||||
{
|
||||
/*
|
||||
* Technically, we could consume the message length information
|
||||
* at this point, but the extra write to shared memory wouldn't
|
||||
* be free and in most cases we would reap no benefit.
|
||||
* Technically, we could consume the message length
|
||||
* information at this point, but the extra write to shared
|
||||
* memory wouldn't be free and in most cases we would reap no
|
||||
* benefit.
|
||||
*/
|
||||
mqh->mqh_consume_pending = needed;
|
||||
*nbytesp = nbytes;
|
||||
|
@ -498,7 +499,7 @@ shm_mq_receive(shm_mq_handle *mqh, Size *nbytesp, void **datap, bool nowait)
|
|||
if (mqh->mqh_partial_bytes >= sizeof(Size))
|
||||
{
|
||||
Assert(mqh->mqh_partial_bytes == sizeof(Size));
|
||||
mqh->mqh_expected_bytes = * (Size *) mqh->mqh_buffer;
|
||||
mqh->mqh_expected_bytes = *(Size *) mqh->mqh_buffer;
|
||||
mqh->mqh_length_word_complete = true;
|
||||
mqh->mqh_partial_bytes = 0;
|
||||
}
|
||||
|
@ -527,8 +528,8 @@ shm_mq_receive(shm_mq_handle *mqh, Size *nbytesp, void **datap, bool nowait)
|
|||
|
||||
/*
|
||||
* The message has wrapped the buffer. We'll need to copy it in order
|
||||
* to return it to the client in one chunk. First, make sure we have a
|
||||
* large enough buffer available.
|
||||
* to return it to the client in one chunk. First, make sure we have
|
||||
* a large enough buffer available.
|
||||
*/
|
||||
if (mqh->mqh_buflen < nbytes)
|
||||
{
|
||||
|
@ -559,10 +560,10 @@ shm_mq_receive(shm_mq_handle *mqh, Size *nbytesp, void **datap, bool nowait)
|
|||
mqh->mqh_partial_bytes += rb;
|
||||
|
||||
/*
|
||||
* Update count of bytes read, with alignment padding. Note
|
||||
* that this will never actually insert any padding except at the
|
||||
* end of a message, because the buffer size is a multiple of
|
||||
* MAXIMUM_ALIGNOF, and each read and write is as well.
|
||||
* Update count of bytes read, with alignment padding. Note that this
|
||||
* will never actually insert any padding except at the end of a
|
||||
* message, because the buffer size is a multiple of MAXIMUM_ALIGNOF,
|
||||
* and each read and write is as well.
|
||||
*/
|
||||
Assert(mqh->mqh_partial_bytes == nbytes || rb == MAXALIGN(rb));
|
||||
shm_mq_inc_bytes_read(mq, MAXALIGN(rb));
|
||||
|
@ -717,11 +718,11 @@ shm_mq_send_bytes(shm_mq_handle *mqh, Size nbytes, void *data, bool nowait,
|
|||
}
|
||||
|
||||
/*
|
||||
* Wait for our latch to be set. It might already be set for
|
||||
* some unrelated reason, but that'll just result in one extra
|
||||
* trip through the loop. It's worth it to avoid resetting the
|
||||
* latch at top of loop, because setting an already-set latch is
|
||||
* much cheaper than setting one that has been reset.
|
||||
* Wait for our latch to be set. It might already be set for some
|
||||
* unrelated reason, but that'll just result in one extra trip
|
||||
* through the loop. It's worth it to avoid resetting the latch
|
||||
* at top of loop, because setting an already-set latch is much
|
||||
* cheaper than setting one that has been reset.
|
||||
*/
|
||||
WaitLatch(&MyProc->procLatch, WL_LATCH_SET, 0);
|
||||
|
||||
|
@ -751,9 +752,9 @@ shm_mq_send_bytes(shm_mq_handle *mqh, Size nbytes, void *data, bool nowait,
|
|||
shm_mq_inc_bytes_written(mq, MAXALIGN(sendnow));
|
||||
|
||||
/*
|
||||
* For efficiency, we don't set the reader's latch here. We'll
|
||||
* do that only when the buffer fills up or after writing an
|
||||
* entire message.
|
||||
* For efficiency, we don't set the reader's latch here. We'll do
|
||||
* that only when the buffer fills up or after writing an entire
|
||||
* message.
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
@ -801,10 +802,10 @@ shm_mq_receive_bytes(shm_mq *mq, Size bytes_needed, bool nowait,
|
|||
/*
|
||||
* Fall out before waiting if the queue has been detached.
|
||||
*
|
||||
* Note that we don't check for this until *after* considering
|
||||
* whether the data already available is enough, since the
|
||||
* receiver can finish receiving a message stored in the buffer
|
||||
* even after the sender has detached.
|
||||
* Note that we don't check for this until *after* considering whether
|
||||
* the data already available is enough, since the receiver can finish
|
||||
* receiving a message stored in the buffer even after the sender has
|
||||
* detached.
|
||||
*/
|
||||
if (detached)
|
||||
return SHM_MQ_DETACHED;
|
||||
|
@ -814,11 +815,11 @@ shm_mq_receive_bytes(shm_mq *mq, Size bytes_needed, bool nowait,
|
|||
return SHM_MQ_WOULD_BLOCK;
|
||||
|
||||
/*
|
||||
* Wait for our latch to be set. It might already be set for
|
||||
* some unrelated reason, but that'll just result in one extra
|
||||
* trip through the loop. It's worth it to avoid resetting the
|
||||
* latch at top of loop, because setting an already-set latch is
|
||||
* much cheaper than setting one that has been reset.
|
||||
* Wait for our latch to be set. It might already be set for some
|
||||
* unrelated reason, but that'll just result in one extra trip through
|
||||
* the loop. It's worth it to avoid resetting the latch at top of
|
||||
* loop, because setting an already-set latch is much cheaper than
|
||||
* setting one that has been reset.
|
||||
*/
|
||||
WaitLatch(&MyProc->procLatch, WL_LATCH_SET, 0);
|
||||
|
||||
|
@ -842,7 +843,7 @@ shm_mq_receive_bytes(shm_mq *mq, Size bytes_needed, bool nowait,
|
|||
* non-NULL when our counterpart attaches to the queue.
|
||||
*/
|
||||
static bool
|
||||
shm_mq_wait_internal(volatile shm_mq *mq, PGPROC * volatile *ptr,
|
||||
shm_mq_wait_internal(volatile shm_mq *mq, PGPROC *volatile * ptr,
|
||||
BackgroundWorkerHandle *handle)
|
||||
{
|
||||
bool save_set_latch_on_sigusr1;
|
||||
|
|
|
@ -96,7 +96,7 @@ shm_toc_allocate(shm_toc *toc, Size nbytes)
|
|||
total_bytes = vtoc->toc_total_bytes;
|
||||
allocated_bytes = vtoc->toc_allocated_bytes;
|
||||
nentry = vtoc->toc_nentry;
|
||||
toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry)
|
||||
toc_bytes = offsetof(shm_toc, toc_entry) +nentry * sizeof(shm_toc_entry)
|
||||
+ allocated_bytes;
|
||||
|
||||
/* Check for memory exhaustion and overflow. */
|
||||
|
@ -132,7 +132,7 @@ shm_toc_freespace(shm_toc *toc)
|
|||
nentry = vtoc->toc_nentry;
|
||||
SpinLockRelease(&toc->toc_mutex);
|
||||
|
||||
toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry);
|
||||
toc_bytes = offsetof(shm_toc, toc_entry) +nentry * sizeof(shm_toc_entry);
|
||||
Assert(allocated_bytes + BUFFERALIGN(toc_bytes) <= total_bytes);
|
||||
return total_bytes - (allocated_bytes + BUFFERALIGN(toc_bytes));
|
||||
}
|
||||
|
@ -176,7 +176,7 @@ shm_toc_insert(shm_toc *toc, uint64 key, void *address)
|
|||
total_bytes = vtoc->toc_total_bytes;
|
||||
allocated_bytes = vtoc->toc_allocated_bytes;
|
||||
nentry = vtoc->toc_nentry;
|
||||
toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry)
|
||||
toc_bytes = offsetof(shm_toc, toc_entry) +nentry * sizeof(shm_toc_entry)
|
||||
+ allocated_bytes;
|
||||
|
||||
/* Check for memory exhaustion and overflow. */
|
||||
|
|
|
@ -889,8 +889,8 @@ LogStandbySnapshot(void)
|
|||
running = GetRunningTransactionData();
|
||||
|
||||
/*
|
||||
* GetRunningTransactionData() acquired ProcArrayLock, we must release
|
||||
* it. For Hot Standby this can be done before inserting the WAL record
|
||||
* GetRunningTransactionData() acquired ProcArrayLock, we must release it.
|
||||
* For Hot Standby this can be done before inserting the WAL record
|
||||
* because ProcArrayApplyRecoveryInfo() rechecks the commit status using
|
||||
* the clog. For logical decoding, though, the lock can't be released
|
||||
* early becuase the clog might be "in the future" from the POV of the
|
||||
|
@ -977,9 +977,9 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
|
|||
/*
|
||||
* Ensure running_xacts information is synced to disk not too far in the
|
||||
* future. We don't want to stall anything though (i.e. use XLogFlush()),
|
||||
* so we let the wal writer do it during normal
|
||||
* operation. XLogSetAsyncXactLSN() conveniently will mark the LSN as
|
||||
* to-be-synced and nudge the WALWriter into action if sleeping. Check
|
||||
* so we let the wal writer do it during normal operation.
|
||||
* XLogSetAsyncXactLSN() conveniently will mark the LSN as to-be-synced
|
||||
* and nudge the WALWriter into action if sleeping. Check
|
||||
* XLogBackgroundFlush() for details why a record might not be flushed
|
||||
* without it.
|
||||
*/
|
||||
|
|
|
@ -266,10 +266,10 @@ inv_open(Oid lobjId, int flags, MemoryContext mcxt)
|
|||
errmsg("large object %u does not exist", lobjId)));
|
||||
|
||||
/*
|
||||
* We must register the snapshot in TopTransaction's resowner, because
|
||||
* it must stay alive until the LO is closed rather than until the
|
||||
* current portal shuts down. Do this after checking that the LO exists,
|
||||
* to avoid leaking the snapshot if an error is thrown.
|
||||
* We must register the snapshot in TopTransaction's resowner, because it
|
||||
* must stay alive until the LO is closed rather than until the current
|
||||
* portal shuts down. Do this after checking that the LO exists, to avoid
|
||||
* leaking the snapshot if an error is thrown.
|
||||
*/
|
||||
if (snapshot)
|
||||
snapshot = RegisterSnapshotOnOwner(snapshot,
|
||||
|
|
|
@ -920,8 +920,8 @@ LWLockWaitForVar(LWLock *l, uint64 *valptr, uint64 oldval, uint64 *newval)
|
|||
return true;
|
||||
|
||||
/*
|
||||
* Lock out cancel/die interrupts while we sleep on the lock. There is
|
||||
* no cleanup mechanism to remove us from the wait queue if we got
|
||||
* Lock out cancel/die interrupts while we sleep on the lock. There is no
|
||||
* cleanup mechanism to remove us from the wait queue if we got
|
||||
* interrupted.
|
||||
*/
|
||||
HOLD_INTERRUPTS();
|
||||
|
|
|
@ -4919,6 +4919,7 @@ is_admin_of_role(Oid member, Oid role)
|
|||
return true;
|
||||
|
||||
if (member == role)
|
||||
|
||||
/*
|
||||
* A role can admin itself when it matches the session user and we're
|
||||
* outside any security-restricted operation, SECURITY DEFINER or
|
||||
|
|
|
@ -1747,6 +1747,7 @@ Datum
|
|||
array_cardinality(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
|
||||
|
||||
PG_RETURN_INT32(ArrayGetNItems(ARR_NDIM(v), ARR_DIMS(v)));
|
||||
}
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue