Add gin_clean_pending_list function to clean up GIN pending list

This function cleans up the pending list of the GIN index by
moving entries in it to the main GIN data structure in bulk.
It returns the number of pages cleaned up from the pending list.

This function is useful, for example, when the pending list
needs to be cleaned up *quickly* to improve the performance of
the search using GIN index. VACUUM can do the same thing, too,
but it may take days to run on a large table.

Jeff Janes,
reviewed by Julien Rouhaud, Jaime Casanova, Alvaro Herrera and me.

Discussion: CAMkU=1x8zFkpfnozXyt40zmR3Ub_kHu58LtRmwHUKRgQss7=iQ@mail.gmail.com
This commit is contained in:
Fujii Masao 2016-01-28 12:57:52 +09:00
parent eaf7b1f643
commit 7f46eaf035
9 changed files with 108 additions and 4 deletions

View File

@ -18036,9 +18036,16 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
<primary>brin_summarize_new_values</primary>
</indexterm>
<indexterm>
<primary>gin_clean_pending_list</primary>
</indexterm>
<para>
<xref linkend="functions-admin-index-table"> shows the functions
available for index maintenance tasks.
These functions cannot be executed during recovery.
Use of these functions is restricted to superusers and the owner
of the given index.
</para>
<table id="functions-admin-index-table">
@ -18056,6 +18063,13 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
<entry><type>integer</type></entry>
<entry>summarize page ranges not already summarized</entry>
</row>
<row>
<entry>
<literal><function>gin_clean_pending_list(<parameter>index</> <type>regclass</>)</function></literal>
</entry>
<entry><type>bigint</type></entry>
<entry>move GIN pending list entries into main index structure</entry>
</row>
</tbody>
</tgroup>
</table>
@ -18069,6 +18083,18 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
into the index.
</para>
<para>
<function>gin_clean_pending_list</> accepts the OID or name of
a GIN index and cleans up the pending list of the specified GIN index
by moving entries in it to the main GIN data structure in bulk.
It returns the number of pages cleaned up from the pending list.
Note that if the argument is a GIN index built with <literal>fastupdate</>
option disabled, the cleanup does not happen and the return value is 0
because the index doesn't have a pending list.
Please see <xref linkend="gin-fast-update"> and <xref linkend="gin-tips">
for details of the pending list and <literal>fastupdate</> option.
</para>
</sect2>
<sect2 id="functions-admin-genfile">

View File

@ -734,7 +734,9 @@
from the indexed item). As of <productname>PostgreSQL</productname> 8.4,
<acronym>GIN</> is capable of postponing much of this work by inserting
new tuples into a temporary, unsorted list of pending entries.
When the table is vacuumed, or if the pending list becomes larger than
When the table is vacuumed or autoanalyzed, or when
<function>gin_clean_pending_list</function> function is called, or if the
pending list becomes larger than
<xref linkend="guc-gin-pending-list-limit">, the entries are moved to the
main <acronym>GIN</acronym> data structure using the same bulk insert
techniques used during initial index creation. This greatly improves

View File

@ -362,8 +362,8 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
Turning <literal>fastupdate</> off via <command>ALTER INDEX</> prevents
future insertions from going into the list of pending index entries,
but does not in itself flush previous entries. You might want to
<command>VACUUM</> the table afterward to ensure the pending list is
emptied.
<command>VACUUM</> the table or call <function>gin_clean_pending_list</>
function afterward to ensure the pending list is emptied.
</para>
</note>
</listitem>

View File

@ -20,10 +20,13 @@
#include "access/gin_private.h"
#include "access/xloginsert.h"
#include "access/xlog.h"
#include "commands/vacuum.h"
#include "catalog/pg_am.h"
#include "miscadmin.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/acl.h"
#include "storage/indexfsm.h"
/* GUC parameter */
@ -958,3 +961,52 @@ ginInsertCleanup(GinState *ginstate,
MemoryContextSwitchTo(oldCtx);
MemoryContextDelete(opCtx);
}
/*
* SQL-callable function to clean the insert pending list
*/
Datum
gin_clean_pending_list(PG_FUNCTION_ARGS)
{
Oid indexoid = PG_GETARG_OID(0);
Relation indexRel = index_open(indexoid, AccessShareLock);
IndexBulkDeleteResult stats;
GinState ginstate;
if (RecoveryInProgress())
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("recovery is in progress"),
errhint("GIN pending list cannot be cleaned up during recovery.")));
/* Must be a GIN index */
if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
indexRel->rd_rel->relam != GIN_AM_OID)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a GIN index",
RelationGetRelationName(indexRel))));
/*
* Reject attempts to read non-local temporary relations; we would be
* likely to get wrong data since we have no visibility into the owning
* session's local buffers.
*/
if (RELATION_IS_OTHER_TEMP(indexRel))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot access temporary indexes of other sessions")));
/* User must own the index (comparable to privileges needed for VACUUM) */
if (!pg_class_ownercheck(indexoid, GetUserId()))
aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS,
RelationGetRelationName(indexRel));
memset(&stats, 0, sizeof(stats));
initGinState(&ginstate, indexRel);
ginInsertCleanup(&ginstate, true, &stats);
index_close(indexRel, AccessShareLock);
PG_RETURN_INT64((int64) stats.pages_deleted);
}

View File

@ -881,6 +881,9 @@ extern void ginFreeScanKeys(GinScanOpaque so);
/* ginget.c */
extern int64 gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
/* ginfast.c */
extern Datum gin_clean_pending_list(PG_FUNCTION_ARGS);
/* ginlogic.c */
extern void ginInitConsistentFunction(GinState *ginstate, GinScanKey key);

View File

@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 201601271
#define CATALOG_VERSION_NO 201601281
#endif

View File

@ -4517,6 +4517,8 @@ DATA(insert OID = 3087 ( gin_extract_tsquery PGNSP PGUID 12 1 0 0 0 f f f f t f
DESCR("GIN tsvector support (obsolete)");
DATA(insert OID = 3088 ( gin_tsquery_consistent PGNSP PGUID 12 1 0 0 0 f f f f t f i s 6 0 16 "2281 21 3615 23 2281 2281" _null_ _null_ _null_ _null_ _null_ gin_tsquery_consistent_6args _null_ _null_ _null_ ));
DESCR("GIN tsvector support (obsolete)");
DATA(insert OID = 3789 ( gin_clean_pending_list PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 20 "2205" _null_ _null_ _null_ _null_ _null_ gin_clean_pending_list _null_ _null_ _null_ ));
DESCR("clean up GIN pending list");
DATA(insert OID = 3662 ( tsquery_lt PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_lt _null_ _null_ _null_ ));
DATA(insert OID = 3663 ( tsquery_le PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_le _null_ _null_ _null_ ));

View File

@ -8,7 +8,20 @@ create table gin_test_tbl(i int4[]);
create index gin_test_idx on gin_test_tbl using gin (i) with (fastupdate = on);
insert into gin_test_tbl select array[1, 2, g] from generate_series(1, 20000) g;
insert into gin_test_tbl select array[1, 3, g] from generate_series(1, 1000) g;
select gin_clean_pending_list('gin_test_idx')>10 as many; -- flush the fastupdate buffers
many
------
t
(1 row)
insert into gin_test_tbl select array[3, 1, g] from generate_series(1, 1000) g;
vacuum gin_test_tbl; -- flush the fastupdate buffers
select gin_clean_pending_list('gin_test_idx'); -- nothing to flush
gin_clean_pending_list
------------------------
0
(1 row)
-- Test vacuuming
delete from gin_test_tbl where i @> array[2];
vacuum gin_test_tbl;

View File

@ -10,8 +10,14 @@ create index gin_test_idx on gin_test_tbl using gin (i) with (fastupdate = on);
insert into gin_test_tbl select array[1, 2, g] from generate_series(1, 20000) g;
insert into gin_test_tbl select array[1, 3, g] from generate_series(1, 1000) g;
select gin_clean_pending_list('gin_test_idx')>10 as many; -- flush the fastupdate buffers
insert into gin_test_tbl select array[3, 1, g] from generate_series(1, 1000) g;
vacuum gin_test_tbl; -- flush the fastupdate buffers
select gin_clean_pending_list('gin_test_idx'); -- nothing to flush
-- Test vacuuming
delete from gin_test_tbl where i @> array[2];
vacuum gin_test_tbl;