Fix hash index vs "snapshot too old" problemms

Hash indexes are not WAL-logged, and so do not maintain the LSN of index pages. Since the "snapshot too old" feature counts on detecting error conditions using the LSN of a table and all indexes on it, this makes it impossible to safely do early vacuuming on any table with a hash index, so add this to the tests for whether the xid used to vacuum a table can be adjusted based on old_snapshot_threshold. While at it, add a paragraph to the docs for old_snapshot_threshold which specifically mentions this and other aspects of the feature which may otherwise surprise users. Problem reported and patch reviewed by Amit Kapila
2016-05-06 07:47:12 -05:00 · 2016-05-06 07:47:12 -05:00 · 2cc41acd8f
parent 9b66aa006f
commit 2cc41acd8f
6 changed files with 62 additions and 6 deletions
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@ -2077,6 +2077,19 @@ include_dir 'conf.d'
         allowed, please note that in many workloads extreme bloat or
         transaction ID wraparound may occur in much shorter time frames.
        </para>
+
+        <para>
+         This setting does not attempt to guarantee that an error will be
+         generated under any particular circumstances.  In fact, if the
+         correct results can be generated from (for example) a cursor which
+         has materialized a result set, no error will be generated even if the
+         underlying rows in the referenced table have been vacuumed away.
+         Some tables cannot safely be vacuumed early, and so will not be
+         affected by this setting.  Examples include system catalogs and any
+         table which has a hash index.  For such tables this setting will
+         neither reduce bloat nor create a possibility of a <literal>snapshot
+         too old</> error on scanning.
+        </para>
       </listitem>
      </varlistentry>
     </variablelist>
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@ -279,7 +279,6 @@ hashgettuple(IndexScanDesc scan, ScanDirection dir)
 		buf = so->hashso_curbuf;
 		Assert(BufferIsValid(buf));
 		page = BufferGetPage(buf);
-		TestForOldSnapshot(scan->xs_snapshot, rel, page);
 		maxoffnum = PageGetMaxOffsetNumber(page);
 		for (offnum = ItemPointerGetOffsetNumber(current);
 			 offnum <= maxoffnum;
--- a/src/backend/access/hash/hashsearch.c
+++ b/src/backend/access/hash/hashsearch.c
@ -189,7 +189,6 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
 	/* Read the metapage */
 	metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
 	page = BufferGetPage(metabuf);
-	TestForOldSnapshot(scan->xs_snapshot, rel, page);
 	metap = HashPageGetMeta(page);

 	/*
@ -243,7 +242,6 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
 	/* Fetch the primary bucket page for the bucket */
 	buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE);
 	page = BufferGetPage(buf);
-	TestForOldSnapshot(scan->xs_snapshot, rel, page);
 	opaque = (HashPageOpaque) PageGetSpecialPointer(page);
 	Assert(opaque->hasho_bucket == bucket);

@ -350,7 +348,6 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
 					_hash_readnext(rel, &buf, &page, &opaque);
 					if (BufferIsValid(buf))
 					{
-						TestForOldSnapshot(scan->xs_snapshot, rel, page);
 						maxoff = PageGetMaxOffsetNumber(page);
 						offnum = _hash_binsearch(page, so->hashso_sk_hash);
 					}
@ -392,7 +389,6 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
 					_hash_readprev(rel, &buf, &page, &opaque);
 					if (BufferIsValid(buf))
 					{
-						TestForOldSnapshot(scan->xs_snapshot, rel, page);
 						maxoff = PageGetMaxOffsetNumber(page);
 						offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
 					}
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@ -5312,6 +5312,52 @@ RelationIdIsInInitFile(Oid relationId)
 	return RelationSupportsSysCache(relationId);
 }

+/*
+ * Tells whether any index for the relation is unlogged.
+ *
+ * Any index using the hash AM is implicitly unlogged.
+ *
+ * Note: There doesn't seem to be any way to have an unlogged index attached
+ * to a permanent table except to create a hash index, but it seems best to
+ * keep this general so that it returns sensible results even when they seem
+ * obvious (like for an unlogged table) and to handle possible future unlogged
+ * indexes on permanent tables.
+ */
+bool
+RelationHasUnloggedIndex(Relation rel)
+{
+	List		   *indexoidlist;
+	ListCell	   *indexoidscan;
+	bool			result = false;
+
+	indexoidlist = RelationGetIndexList(rel);
+
+	foreach(indexoidscan, indexoidlist)
+	{
+		Oid			indexoid = lfirst_oid(indexoidscan);
+		HeapTuple	tp;
+		Form_pg_class reltup;
+
+		tp = SearchSysCache1(RELOID, ObjectIdGetDatum(indexoid));
+		if (!HeapTupleIsValid(tp))
+			elog(ERROR, "cache lookup failed for relation %u", indexoid);
+		reltup = (Form_pg_class) GETSTRUCT(tp);
+
+		if (reltup->relpersistence == RELPERSISTENCE_UNLOGGED
+			|| reltup->relam == HASH_AM_OID)
+			result = true;
+
+		ReleaseSysCache(tp);
+
+		if (result == true)
+			break;
+	}
+
+	list_free(indexoidlist);
+
+	return result;
+}
+
 /*
 * Invalidate (remove) the init file during commit of a transaction that
 * changed one or more of the relation cache entries that are kept in the
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@ -1590,7 +1590,8 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
 		&& old_snapshot_threshold >= 0
 		&& RelationNeedsWAL(relation)
 		&& !IsCatalogRelation(relation)
-		&& !RelationIsAccessibleInLogicalDecoding(relation))
+		&& !RelationIsAccessibleInLogicalDecoding(relation)
+		&& !RelationHasUnloggedIndex(relation))
 	{
 		int64		ts = GetSnapshotCurrentTimestamp();
 		TransactionId xlimit = recentXmin;
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@ -505,5 +505,6 @@ typedef struct ViewOptions
 /* routines in utils/cache/relcache.c */
 extern void RelationIncrementReferenceCount(Relation rel);
 extern void RelationDecrementReferenceCount(Relation rel);
+extern bool RelationHasUnloggedIndex(Relation rel);

 #endif   /* REL_H */