diff --git a/src/backend/access/common/Makefile b/src/backend/access/common/Makefile index b9aff0ccfd..e78de31265 100644 --- a/src/backend/access/common/Makefile +++ b/src/backend/access/common/Makefile @@ -25,6 +25,7 @@ OBJS = \ scankey.o \ session.o \ syncscan.o \ + tidstore.o \ toast_compression.o \ toast_internals.o \ tupconvert.o \ diff --git a/src/backend/access/common/meson.build b/src/backend/access/common/meson.build index 725041a4ce..14090c728f 100644 --- a/src/backend/access/common/meson.build +++ b/src/backend/access/common/meson.build @@ -13,6 +13,7 @@ backend_sources += files( 'scankey.c', 'session.c', 'syncscan.c', + 'tidstore.c', 'toast_compression.c', 'toast_internals.c', 'tupconvert.c', diff --git a/src/backend/access/common/tidstore.c b/src/backend/access/common/tidstore.c new file mode 100644 index 0000000000..745393806d --- /dev/null +++ b/src/backend/access/common/tidstore.c @@ -0,0 +1,463 @@ +/*------------------------------------------------------------------------- + * + * tidstore.c + * TID (ItemPointerData) storage implementation. + * + * TidStore is a in-memory data structure to store TIDs (ItemPointerData). + * Internally it uses a radix tree as the storage for TIDs. The key is the + * BlockNumber and the value is a bitmap of offsets, BlocktableEntry. + * + * TidStore can be shared among parallel worker processes by passing DSA area + * to TidStoreCreate(). Other backends can attach to the shared TidStore by + * TidStoreAttach(). + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/access/common/tidstore.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/tidstore.h" +#include "miscadmin.h" +#include "nodes/bitmapset.h" +#include "storage/lwlock.h" +#include "utils/dsa.h" + + +#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD) +#define BITNUM(x) ((x) % BITS_PER_BITMAPWORD) + +/* number of active words for a page: */ +#define WORDS_PER_PAGE(n) ((n) / BITS_PER_BITMAPWORD + 1) + +/* + * This is named similarly to PagetableEntry in tidbitmap.c + * because the two have a similar function. + */ +typedef struct BlocktableEntry +{ + uint16 nwords; + bitmapword words[FLEXIBLE_ARRAY_MEMBER]; +} BlocktableEntry; +#define MaxBlocktableEntrySize \ + offsetof(BlocktableEntry, words) + \ + (sizeof(bitmapword) * WORDS_PER_PAGE(MaxOffsetNumber)) + +#define RT_PREFIX local_ts +#define RT_SCOPE static +#define RT_DECLARE +#define RT_DEFINE +#define RT_VALUE_TYPE BlocktableEntry +#define RT_VARLEN_VALUE_SIZE(page) \ + (offsetof(BlocktableEntry, words) + \ + sizeof(bitmapword) * (page)->nwords) +#include "lib/radixtree.h" + +#define RT_PREFIX shared_ts +#define RT_SHMEM +#define RT_SCOPE static +#define RT_DECLARE +#define RT_DEFINE +#define RT_VALUE_TYPE BlocktableEntry +#define RT_VARLEN_VALUE_SIZE(page) \ + (offsetof(BlocktableEntry, words) + \ + sizeof(bitmapword) * (page)->nwords) +#include "lib/radixtree.h" + +/* Per-backend state for a TidStore */ +struct TidStore +{ + /* MemoryContext where the TidStore is allocated */ + MemoryContext context; + + /* MemoryContext that the radix tree uses */ + MemoryContext rt_context; + + /* Storage for TIDs. Use either one depending on TidStoreIsShared() */ + union + { + local_ts_radix_tree *local; + shared_ts_radix_tree *shared; + } tree; + + /* DSA area for TidStore if using shared memory */ + dsa_area *area; +}; +#define TidStoreIsShared(ts) ((ts)->area != NULL) + +/* Iterator for TidStore */ +struct TidStoreIter +{ + TidStore *ts; + + /* iterator of radix tree. Use either one depending on TidStoreIsShared() */ + union + { + shared_ts_iter *shared; + local_ts_iter *local; + } tree_iter; + + /* output for the caller */ + TidStoreIterResult output; +}; + +static void tidstore_iter_extract_tids(TidStoreIter *iter, BlockNumber blkno, + BlocktableEntry *page); + +/* + * Create a TidStore. The TidStore will live in the memory context that is + * CurrentMemoryContext at the time of this call. The TID storage, backed + * by a radix tree, will live in its child memory context, rt_context. The + * TidStore will be limited to (approximately) max_bytes total memory + * consumption. If the 'area' is non-NULL, the radix tree is created in the + * DSA area. + * + * The returned object is allocated in backend-local memory. + */ +TidStore * +TidStoreCreate(size_t max_bytes, dsa_area *area, int tranche_id) +{ + TidStore *ts; + size_t initBlockSize = ALLOCSET_DEFAULT_INITSIZE; + size_t minContextSize = ALLOCSET_DEFAULT_MINSIZE; + size_t maxBlockSize = ALLOCSET_DEFAULT_MAXSIZE; + + ts = palloc0(sizeof(TidStore)); + ts->context = CurrentMemoryContext; + + /* choose the maxBlockSize to be no larger than 1/16 of max_bytes */ + while (16 * maxBlockSize > max_bytes * 1024L) + maxBlockSize >>= 1; + + if (maxBlockSize < ALLOCSET_DEFAULT_INITSIZE) + maxBlockSize = ALLOCSET_DEFAULT_INITSIZE; + + /* Create a memory context for the TID storage */ + ts->rt_context = AllocSetContextCreate(CurrentMemoryContext, + "TID storage", + minContextSize, + initBlockSize, + maxBlockSize); + + if (area != NULL) + { + ts->tree.shared = shared_ts_create(ts->rt_context, area, + tranche_id); + ts->area = area; + } + else + ts->tree.local = local_ts_create(ts->rt_context); + + return ts; +} + +/* + * Attach to the shared TidStore using the given handle. The returned object + * is allocated in backend-local memory using the CurrentMemoryContext. + */ +TidStore * +TidStoreAttach(dsa_area *area, dsa_pointer handle) +{ + TidStore *ts; + + Assert(area != NULL); + Assert(DsaPointerIsValid(handle)); + + /* create per-backend state */ + ts = palloc0(sizeof(TidStore)); + + /* Find the shared the shared radix tree */ + ts->tree.shared = shared_ts_attach(area, handle); + ts->area = area; + + return ts; +} + +/* + * Detach from a TidStore. This detaches from radix tree and frees the + * backend-local resources. The radix tree will continue to exist until + * it is either explicitly destroyed, or the area that backs it is returned + * to the operating system. + */ +void +TidStoreDetach(TidStore *ts) +{ + Assert(TidStoreIsShared(ts)); + + shared_ts_detach(ts->tree.shared); + pfree(ts); +} + +/* + * Lock support functions. + * + * We can use the radix tree's lock for shared TidStore as the data we + * need to protect is only the shared radix tree. + */ + +void +TidStoreLockExclusive(TidStore *ts) +{ + if (TidStoreIsShared(ts)) + shared_ts_lock_exclusive(ts->tree.shared); +} + +void +TidStoreLockShare(TidStore *ts) +{ + if (TidStoreIsShared(ts)) + shared_ts_lock_share(ts->tree.shared); +} + +void +TidStoreUnlock(TidStore *ts) +{ + if (TidStoreIsShared(ts)) + shared_ts_unlock(ts->tree.shared); +} + +/* + * Destroy a TidStore, returning all memory. + * + * Note that the caller must be certain that no other backend will attempt to + * access the TidStore before calling this function. Other backend must + * explicitly call TidStoreDetach() to free up backend-local memory associated + * with the TidStore. The backend that calls TidStoreDestroy() must not call + * TidStoreDetach(). + */ +void +TidStoreDestroy(TidStore *ts) +{ + /* Destroy underlying radix tree */ + if (TidStoreIsShared(ts)) + shared_ts_free(ts->tree.shared); + else + local_ts_free(ts->tree.local); + + MemoryContextDelete(ts->rt_context); + + pfree(ts); +} + +/* + * Create or replace an entry for the given block and array of offsets. + * + * NB: This function is designed and optimized for vacuum's heap scanning + * phase, so has some limitations: + * + * - The offset numbers "offsets" must be sorted in ascending order. + * - If the block number already exists, the entry will be replaced -- + * there is no way to add or remove offsets from an entry. + */ +void +TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets, + int num_offsets) +{ + char data[MaxBlocktableEntrySize]; + BlocktableEntry *page = (BlocktableEntry *) data; + bitmapword word; + int wordnum; + int next_word_threshold; + int idx = 0; + + Assert(num_offsets > 0); + + /* Check if the given offset numbers are ordered */ + for (int i = 1; i < num_offsets; i++) + Assert(offsets[i] > offsets[i - 1]); + + for (wordnum = 0, next_word_threshold = BITS_PER_BITMAPWORD; + wordnum <= WORDNUM(offsets[num_offsets - 1]); + wordnum++, next_word_threshold += BITS_PER_BITMAPWORD) + { + word = 0; + + while (idx < num_offsets) + { + OffsetNumber off = offsets[idx]; + + /* safety check to ensure we don't overrun bit array bounds */ + if (!OffsetNumberIsValid(off)) + elog(ERROR, "tuple offset out of range: %u", off); + + if (off >= next_word_threshold) + break; + + word |= ((bitmapword) 1 << BITNUM(off)); + idx++; + } + + /* write out offset bitmap for this wordnum */ + page->words[wordnum] = word; + } + + page->nwords = wordnum; + Assert(page->nwords == WORDS_PER_PAGE(offsets[num_offsets - 1])); + + if (TidStoreIsShared(ts)) + shared_ts_set(ts->tree.shared, blkno, page); + else + local_ts_set(ts->tree.local, blkno, page); +} + +/* Return true if the given TID is present in the TidStore */ +bool +TidStoreIsMember(TidStore *ts, ItemPointer tid) +{ + int wordnum; + int bitnum; + BlocktableEntry *page; + BlockNumber blk = ItemPointerGetBlockNumber(tid); + OffsetNumber off = ItemPointerGetOffsetNumber(tid); + + if (TidStoreIsShared(ts)) + page = shared_ts_find(ts->tree.shared, blk); + else + page = local_ts_find(ts->tree.local, blk); + + /* no entry for the blk */ + if (page == NULL) + return false; + + wordnum = WORDNUM(off); + bitnum = BITNUM(off); + + /* no bitmap for the off */ + if (wordnum >= page->nwords) + return false; + + return (page->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0; +} + +/* + * Prepare to iterate through a TidStore. + * + * The TidStoreIter struct is created in the caller's memory context, and it + * will be freed in TidStoreEndIterate. + * + * The caller is responsible for locking TidStore until the iteration is + * finished. + */ +TidStoreIter * +TidStoreBeginIterate(TidStore *ts) +{ + TidStoreIter *iter; + + iter = palloc0(sizeof(TidStoreIter)); + iter->ts = ts; + + /* + * We start with an array large enough to contain at least the offsets + * from one completely full bitmap element. + */ + iter->output.max_offset = 2 * BITS_PER_BITMAPWORD; + iter->output.offsets = palloc(sizeof(OffsetNumber) * iter->output.max_offset); + + if (TidStoreIsShared(ts)) + iter->tree_iter.shared = shared_ts_begin_iterate(ts->tree.shared); + else + iter->tree_iter.local = local_ts_begin_iterate(ts->tree.local); + + return iter; +} + + +/* + * Scan the TidStore and return the TIDs of the next block. The offsets in + * each iteration result are ordered, as are the block numbers over all + * iterations. + */ +TidStoreIterResult * +TidStoreIterateNext(TidStoreIter *iter) +{ + uint64 key; + BlocktableEntry *page; + + if (TidStoreIsShared(iter->ts)) + page = shared_ts_iterate_next(iter->tree_iter.shared, &key); + else + page = local_ts_iterate_next(iter->tree_iter.local, &key); + + if (page == NULL) + return NULL; + + /* Collect TIDs from the key-value pair */ + tidstore_iter_extract_tids(iter, (BlockNumber) key, page); + + return &(iter->output); +} + +/* + * Finish the iteration on TidStore. + * + * The caller is responsible for releasing any locks. + */ +void +TidStoreEndIterate(TidStoreIter *iter) +{ + if (TidStoreIsShared(iter->ts)) + shared_ts_end_iterate(iter->tree_iter.shared); + else + local_ts_end_iterate(iter->tree_iter.local); + + pfree(iter->output.offsets); + pfree(iter); +} + +/* + * Return the memory usage of TidStore. + */ +size_t +TidStoreMemoryUsage(TidStore *ts) +{ + if (TidStoreIsShared(ts)) + return shared_ts_memory_usage(ts->tree.shared); + else + return local_ts_memory_usage(ts->tree.local); +} + +dsa_pointer +TidStoreGetHandle(TidStore *ts) +{ + Assert(TidStoreIsShared(ts)); + + return (dsa_pointer) shared_ts_get_handle(ts->tree.shared); +} + +/* Extract TIDs from the given key-value pair */ +static void +tidstore_iter_extract_tids(TidStoreIter *iter, BlockNumber blkno, + BlocktableEntry *page) +{ + TidStoreIterResult *result = (&iter->output); + int wordnum; + + result->num_offsets = 0; + result->blkno = blkno; + + for (wordnum = 0; wordnum < page->nwords; wordnum++) + { + bitmapword w = page->words[wordnum]; + int off = wordnum * BITS_PER_BITMAPWORD; + + /* Make sure there is enough space to add offsets */ + if ((result->num_offsets + BITS_PER_BITMAPWORD) > result->max_offset) + { + result->max_offset *= 2; + result->offsets = repalloc(result->offsets, + sizeof(OffsetNumber) * result->max_offset); + } + + while (w != 0) + { + if (w & 1) + result->offsets[result->num_offsets++] = (OffsetNumber) off; + off++; + w >>= 1; + } + } +} diff --git a/src/include/access/tidstore.h b/src/include/access/tidstore.h new file mode 100644 index 0000000000..8cf4e94f12 --- /dev/null +++ b/src/include/access/tidstore.h @@ -0,0 +1,49 @@ +/*------------------------------------------------------------------------- + * + * tidstore.h + * TidStore interface. + * + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/access/tidstore.h + * + *------------------------------------------------------------------------- + */ +#ifndef TIDSTORE_H +#define TIDSTORE_H + +#include "storage/itemptr.h" +#include "utils/dsa.h" + +typedef struct TidStore TidStore; +typedef struct TidStoreIter TidStoreIter; + +/* Result struct for TidStoreIterateNext */ +typedef struct TidStoreIterResult +{ + BlockNumber blkno; + int max_offset; + int num_offsets; + OffsetNumber *offsets; +} TidStoreIterResult; + +extern TidStore *TidStoreCreate(size_t max_bytes, dsa_area *dsa, + int tranche_id); +extern TidStore *TidStoreAttach(dsa_area *dsa, dsa_pointer rt_dp); +extern void TidStoreDetach(TidStore *ts); +extern void TidStoreLockExclusive(TidStore *ts); +extern void TidStoreLockShare(TidStore *ts); +extern void TidStoreUnlock(TidStore *ts); +extern void TidStoreDestroy(TidStore *ts); +extern void TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets, + int num_offsets); +extern bool TidStoreIsMember(TidStore *ts, ItemPointer tid); +extern TidStoreIter *TidStoreBeginIterate(TidStore *ts); +extern TidStoreIterResult *TidStoreIterateNext(TidStoreIter *iter); +extern void TidStoreEndIterate(TidStoreIter *iter); +extern size_t TidStoreMemoryUsage(TidStore *ts); +extern dsa_pointer TidStoreGetHandle(TidStore *ts); + +#endif /* TIDSTORE_H */ diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile index 875a76d6f1..1cbd532156 100644 --- a/src/test/modules/Makefile +++ b/src/test/modules/Makefile @@ -35,6 +35,7 @@ SUBDIRS = \ test_rls_hooks \ test_shm_mq \ test_slru \ + test_tidstore \ unsafe_tests \ worker_spi \ xid_wraparound diff --git a/src/test/modules/meson.build b/src/test/modules/meson.build index f1d18a1b29..7c11fb97f2 100644 --- a/src/test/modules/meson.build +++ b/src/test/modules/meson.build @@ -34,6 +34,7 @@ subdir('test_resowner') subdir('test_rls_hooks') subdir('test_shm_mq') subdir('test_slru') +subdir('test_tidstore') subdir('unsafe_tests') subdir('worker_spi') subdir('xid_wraparound') diff --git a/src/test/modules/test_tidstore/.gitignore b/src/test/modules/test_tidstore/.gitignore new file mode 100644 index 0000000000..5dcb3ff972 --- /dev/null +++ b/src/test/modules/test_tidstore/.gitignore @@ -0,0 +1,4 @@ +# Generated subdirectories +/log/ +/results/ +/tmp_check/ diff --git a/src/test/modules/test_tidstore/Makefile b/src/test/modules/test_tidstore/Makefile new file mode 100644 index 0000000000..dab107d70c --- /dev/null +++ b/src/test/modules/test_tidstore/Makefile @@ -0,0 +1,23 @@ +# src/test/modules/test_tidstore/Makefile + +MODULE_big = test_tidstore +OBJS = \ + $(WIN32RES) \ + test_tidstore.o +PGFILEDESC = "test_tidstore - test code for src/backend/access/common/tidstore.c" + +EXTENSION = test_tidstore +DATA = test_tidstore--1.0.sql + +REGRESS = test_tidstore + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/modules/test_tidstore +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/src/test/modules/test_tidstore/expected/test_tidstore.out b/src/test/modules/test_tidstore/expected/test_tidstore.out new file mode 100644 index 0000000000..0ae2f970da --- /dev/null +++ b/src/test/modules/test_tidstore/expected/test_tidstore.out @@ -0,0 +1,97 @@ +-- Note: The test code use an array of TIDs for verification similar +-- to vacuum's dead item array pre-PG17. To avoid adding duplicates, +-- each call to do_set_block_offsets() should use different block +-- numbers. +CREATE EXTENSION test_tidstore; +-- To hide the output of do_set_block_offsets() +CREATE TEMP TABLE hideblocks(blockno bigint); +-- Constant values used in the tests. +\set maxblkno 4294967295 +-- The maximum number of heap tuples (MaxHeapTuplesPerPage) in 8kB block is 291. +-- We use a higher number to test tidstore. +\set maxoffset 512 +SELECT test_create(false); + test_create +------------- + +(1 row) + +-- Test on empty tidstore. +SELECT test_is_full(); + test_is_full +-------------- + f +(1 row) + +SELECT check_set_block_offsets(); + check_set_block_offsets +------------------------- + +(1 row) + +-- Add TIDs. +INSERT INTO hideblocks (blockno) +SELECT do_set_block_offsets(blk, array_agg(off)::int2[]) + FROM + (VALUES (0), (1), (:maxblkno / 2), (:maxblkno - 1), (:maxblkno)) AS blocks(blk), + (VALUES (1), (2), (:maxoffset / 2), (:maxoffset - 1), (:maxoffset)) AS offsets(off) + GROUP BY blk; +-- Add enough TIDs to cause the store to appear "full", compared +-- to the allocated memory it started out with. This is easier +-- with memory contexts in local memory. +INSERT INTO hideblocks (blockno) +SELECT do_set_block_offsets(blk, ARRAY[1,31,32,63,64,200]::int2[]) + FROM generate_series(1000, 2000, 1) blk; +-- Zero offset not allowed +SELECT do_set_block_offsets(1, ARRAY[0]::int2[]); +ERROR: tuple offset out of range: 0 +-- Check TIDs we've added to the store. +SELECT check_set_block_offsets(); + check_set_block_offsets +------------------------- + +(1 row) + +SELECT test_is_full(); + test_is_full +-------------- + t +(1 row) + +-- Re-create the TID store for randommized tests. +SELECT test_destroy(); + test_destroy +-------------- + +(1 row) + +-- Use shared memory this time. We can't do that in test_radixtree.sql, +-- because unused static functions would raise warnings there. +SELECT test_create(true); + test_create +------------- + +(1 row) + +-- Random TIDs test. The offset numbers are randomized and must be +-- unique and ordered. +INSERT INTO hideblocks (blockno) +SELECT do_set_block_offsets(blkno, array_agg(DISTINCT greatest((random() * :maxoffset)::int, 1))::int2[]) + FROM generate_series(1, 100) num_offsets, + generate_series(1000, 1100, 1) blkno +GROUP BY blkno; +-- Check TIDs we've added to the store. +SELECT check_set_block_offsets(); + check_set_block_offsets +------------------------- + +(1 row) + +-- cleanup +SELECT test_destroy(); + test_destroy +-------------- + +(1 row) + +DROP TABLE hideblocks; diff --git a/src/test/modules/test_tidstore/meson.build b/src/test/modules/test_tidstore/meson.build new file mode 100644 index 0000000000..0ed3ea2ef3 --- /dev/null +++ b/src/test/modules/test_tidstore/meson.build @@ -0,0 +1,33 @@ +# Copyright (c) 2024, PostgreSQL Global Development Group + +test_tidstore_sources = files( + 'test_tidstore.c', +) + +if host_system == 'windows' + test_tidstore_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'test_tidstore', + '--FILEDESC', 'test_tidstore - test code for src/backend/access/common/tidstore.c',]) +endif + +test_tidstore = shared_module('test_tidstore', + test_tidstore_sources, + kwargs: pg_test_mod_args, +) +test_install_libs += test_tidstore + +test_install_data += files( + 'test_tidstore.control', + 'test_tidstore--1.0.sql', +) + +tests += { + 'name': 'test_tidstore', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'regress': { + 'sql': [ + 'test_tidstore', + ], + }, +} diff --git a/src/test/modules/test_tidstore/sql/test_tidstore.sql b/src/test/modules/test_tidstore/sql/test_tidstore.sql new file mode 100644 index 0000000000..e5edfbb264 --- /dev/null +++ b/src/test/modules/test_tidstore/sql/test_tidstore.sql @@ -0,0 +1,65 @@ +-- Note: The test code use an array of TIDs for verification similar +-- to vacuum's dead item array pre-PG17. To avoid adding duplicates, +-- each call to do_set_block_offsets() should use different block +-- numbers. + +CREATE EXTENSION test_tidstore; + +-- To hide the output of do_set_block_offsets() +CREATE TEMP TABLE hideblocks(blockno bigint); + +-- Constant values used in the tests. +\set maxblkno 4294967295 +-- The maximum number of heap tuples (MaxHeapTuplesPerPage) in 8kB block is 291. +-- We use a higher number to test tidstore. +\set maxoffset 512 + +SELECT test_create(false); + +-- Test on empty tidstore. +SELECT test_is_full(); +SELECT check_set_block_offsets(); + +-- Add TIDs. +INSERT INTO hideblocks (blockno) +SELECT do_set_block_offsets(blk, array_agg(off)::int2[]) + FROM + (VALUES (0), (1), (:maxblkno / 2), (:maxblkno - 1), (:maxblkno)) AS blocks(blk), + (VALUES (1), (2), (:maxoffset / 2), (:maxoffset - 1), (:maxoffset)) AS offsets(off) + GROUP BY blk; + +-- Add enough TIDs to cause the store to appear "full", compared +-- to the allocated memory it started out with. This is easier +-- with memory contexts in local memory. +INSERT INTO hideblocks (blockno) +SELECT do_set_block_offsets(blk, ARRAY[1,31,32,63,64,200]::int2[]) + FROM generate_series(1000, 2000, 1) blk; + +-- Zero offset not allowed +SELECT do_set_block_offsets(1, ARRAY[0]::int2[]); + +-- Check TIDs we've added to the store. +SELECT check_set_block_offsets(); + +SELECT test_is_full(); + +-- Re-create the TID store for randommized tests. +SELECT test_destroy(); +-- Use shared memory this time. We can't do that in test_radixtree.sql, +-- because unused static functions would raise warnings there. +SELECT test_create(true); + +-- Random TIDs test. The offset numbers are randomized and must be +-- unique and ordered. +INSERT INTO hideblocks (blockno) +SELECT do_set_block_offsets(blkno, array_agg(DISTINCT greatest((random() * :maxoffset)::int, 1))::int2[]) + FROM generate_series(1, 100) num_offsets, + generate_series(1000, 1100, 1) blkno +GROUP BY blkno; + +-- Check TIDs we've added to the store. +SELECT check_set_block_offsets(); + +-- cleanup +SELECT test_destroy(); +DROP TABLE hideblocks; diff --git a/src/test/modules/test_tidstore/test_tidstore--1.0.sql b/src/test/modules/test_tidstore/test_tidstore--1.0.sql new file mode 100644 index 0000000000..7e6c60c7bb --- /dev/null +++ b/src/test/modules/test_tidstore/test_tidstore--1.0.sql @@ -0,0 +1,27 @@ +/* src/test/modules/test_tidstore/test_tidstore--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION test_tidstore" to load this file. \quit + +CREATE FUNCTION test_create( +shared bool) +RETURNS void STRICT PARALLEL UNSAFE +AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION do_set_block_offsets( +blkno bigint, +offsets int2[]) +RETURNS bigint STRICT PARALLEL UNSAFE +AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION check_set_block_offsets() +RETURNS void STRICT PARALLEL UNSAFE +AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION test_is_full() +RETURNS bool STRICT PARALLEL UNSAFE +AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION test_destroy() +RETURNS void STRICT PARALLEL UNSAFE +AS 'MODULE_PATHNAME' LANGUAGE C; diff --git a/src/test/modules/test_tidstore/test_tidstore.c b/src/test/modules/test_tidstore/test_tidstore.c new file mode 100644 index 0000000000..c74ad2cf8b --- /dev/null +++ b/src/test/modules/test_tidstore/test_tidstore.c @@ -0,0 +1,317 @@ +/*-------------------------------------------------------------------------- + * + * test_tidstore.c + * Test TidStore data structure. + * + * Note: all locking in this test module is useless since there is only + * a single process to use the TidStore. It is meant to be an example of + * usage. + * + * Copyright (c) 2024, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/test/modules/test_tidstore/test_tidstore.c + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/tidstore.h" +#include "fmgr.h" +#include "funcapi.h" +#include "storage/block.h" +#include "storage/itemptr.h" +#include "storage/lwlock.h" +#include "utils/array.h" +#include "utils/memutils.h" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(test_create); +PG_FUNCTION_INFO_V1(do_set_block_offsets); +PG_FUNCTION_INFO_V1(check_set_block_offsets); +PG_FUNCTION_INFO_V1(test_is_full); +PG_FUNCTION_INFO_V1(test_destroy); + +static TidStore *tidstore = NULL; +static dsa_area *dsa = NULL; +static size_t tidstore_empty_size; + +/* array for verification of some tests */ +typedef struct ItemArray +{ + ItemPointerData *insert_tids; + ItemPointerData *lookup_tids; + ItemPointerData *iter_tids; + int max_tids; + int num_tids; +} ItemArray; + +static ItemArray items; + +/* comparator routine for ItemPointer */ +static int +itemptr_cmp(const void *left, const void *right) +{ + BlockNumber lblk, + rblk; + OffsetNumber loff, + roff; + + lblk = ItemPointerGetBlockNumber((ItemPointer) left); + rblk = ItemPointerGetBlockNumber((ItemPointer) right); + + if (lblk < rblk) + return -1; + if (lblk > rblk) + return 1; + + loff = ItemPointerGetOffsetNumber((ItemPointer) left); + roff = ItemPointerGetOffsetNumber((ItemPointer) right); + + if (loff < roff) + return -1; + if (loff > roff) + return 1; + + return 0; +} + +/* + * Create a TidStore. If shared is false, the tidstore is created + * on TopMemoryContext, otherwise on DSA. Although the tidstore + * is created on DSA, only the same process can subsequently use + * the tidstore. The tidstore handle is not shared anywhere. +*/ +Datum +test_create(PG_FUNCTION_ARGS) +{ + bool shared = PG_GETARG_BOOL(0); + MemoryContext old_ctx; + + /* doesn't really matter, since it's just a hint */ + size_t tidstore_max_size = 2 * 1024 * 1024; + size_t array_init_size = 1024; + + Assert(tidstore == NULL); + Assert(dsa == NULL); + + /* + * Create the TidStore on TopMemoryContext so that the same process use it + * for subsequent tests. + */ + old_ctx = MemoryContextSwitchTo(TopMemoryContext); + + if (shared) + { + int tranche_id; + + tranche_id = LWLockNewTrancheId(); + LWLockRegisterTranche(tranche_id, "test_tidstore"); + + dsa = dsa_create(tranche_id); + + /* + * Remain attached until end of backend or explicitly detached so that + * the same process use the tidstore for subsequent tests. + */ + dsa_pin_mapping(dsa); + + tidstore = TidStoreCreate(tidstore_max_size, dsa, tranche_id); + } + else + tidstore = TidStoreCreate(tidstore_max_size, NULL, 0); + + tidstore_empty_size = TidStoreMemoryUsage(tidstore); + + items.num_tids = 0; + items.max_tids = array_init_size / sizeof(ItemPointerData); + items.insert_tids = (ItemPointerData *) palloc0(array_init_size); + items.lookup_tids = (ItemPointerData *) palloc0(array_init_size); + items.iter_tids = (ItemPointerData *) palloc0(array_init_size); + + MemoryContextSwitchTo(old_ctx); + + PG_RETURN_VOID(); +} + +static void +sanity_check_array(ArrayType *ta) +{ + if (ARR_HASNULL(ta) && array_contains_nulls(ta)) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("array must not contain nulls"))); + + if (ARR_NDIM(ta) > 1) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("argument must be empty or one-dimensional array"))); +} + +/* Set the given block and offsets pairs */ +Datum +do_set_block_offsets(PG_FUNCTION_ARGS) +{ + BlockNumber blkno = PG_GETARG_INT64(0); + ArrayType *ta = PG_GETARG_ARRAYTYPE_P_COPY(1); + OffsetNumber *offs; + int noffs; + + sanity_check_array(ta); + + noffs = ArrayGetNItems(ARR_NDIM(ta), ARR_DIMS(ta)); + offs = ((OffsetNumber *) ARR_DATA_PTR(ta)); + + /* Set TIDs in the store */ + TidStoreLockExclusive(tidstore); + TidStoreSetBlockOffsets(tidstore, blkno, offs, noffs); + TidStoreUnlock(tidstore); + + /* Set TIDs in verification array */ + for (int i = 0; i < noffs; i++) + { + ItemPointer tid; + int idx = items.num_tids + i; + + /* Enlarge the TID arrays if necessary */ + if (idx >= items.max_tids) + { + items.max_tids *= 2; + items.insert_tids = repalloc(items.insert_tids, sizeof(ItemPointerData) * items.max_tids); + items.lookup_tids = repalloc(items.lookup_tids, sizeof(ItemPointerData) * items.max_tids); + items.iter_tids = repalloc(items.iter_tids, sizeof(ItemPointerData) * items.max_tids); + } + + tid = &(items.insert_tids[idx]); + ItemPointerSet(tid, blkno, offs[i]); + } + + /* Update statistics */ + items.num_tids += noffs; + + PG_RETURN_INT64(blkno); +} + +/* + * Verify TIDs in store against the array. + */ +Datum +check_set_block_offsets(PG_FUNCTION_ARGS) +{ + TidStoreIter *iter; + TidStoreIterResult *iter_result; + int num_iter_tids = 0; + int num_lookup_tids = 0; + BlockNumber prevblkno = 0;; + + /* lookup each member in the verification array */ + for (int i = 0; i < items.num_tids; i++) + if (!TidStoreIsMember(tidstore, &items.insert_tids[i])) + elog(ERROR, "missing TID with block %u, offset %u", + ItemPointerGetBlockNumber(&items.insert_tids[i]), + ItemPointerGetOffsetNumber(&items.insert_tids[i])); + + /* + * Lookup all possible TIDs for each distinct block in the verification + * array and save successful lookups in the lookup array. + */ + + for (int i = 0; i < items.num_tids; i++) + { + BlockNumber blkno = ItemPointerGetBlockNumber(&items.insert_tids[i]); + + if (i > 0 && blkno == prevblkno) + continue; + + for (OffsetNumber offset = FirstOffsetNumber; offset < MaxOffsetNumber; offset++) + { + ItemPointerData tid; + + ItemPointerSet(&tid, blkno, offset); + + TidStoreLockShare(tidstore); + if (TidStoreIsMember(tidstore, &tid)) + ItemPointerSet(&items.lookup_tids[num_lookup_tids++], blkno, offset); + TidStoreUnlock(tidstore); + } + + prevblkno = blkno; + } + + /* Collect TIDs stored in the tidstore, in order */ + + TidStoreLockShare(tidstore); + iter = TidStoreBeginIterate(tidstore); + while ((iter_result = TidStoreIterateNext(iter)) != NULL) + { + for (int i = 0; i < iter_result->num_offsets; i++) + ItemPointerSet(&(items.iter_tids[num_iter_tids++]), iter_result->blkno, + iter_result->offsets[i]); + } + TidStoreEndIterate(iter); + TidStoreUnlock(tidstore); + + /* + * Sort verification and lookup arrays and test that all arrays are the + * same. + */ + + if (num_lookup_tids != items.num_tids) + elog(ERROR, "should have %d TIDs, have %d", items.num_tids, num_lookup_tids); + if (num_iter_tids != items.num_tids) + elog(ERROR, "should have %d TIDs, have %d", items.num_tids, num_iter_tids); + + qsort(items.insert_tids, items.num_tids, sizeof(ItemPointerData), itemptr_cmp); + qsort(items.lookup_tids, items.num_tids, sizeof(ItemPointerData), itemptr_cmp); + for (int i = 0; i < items.num_tids; i++) + { + if (itemptr_cmp((const void *) &items.insert_tids[i], (const void *) &items.iter_tids[i]) != 0) + elog(ERROR, "TID iter array doesn't match verification array, got (%u,%u) expected (%u,%u)", + ItemPointerGetBlockNumber(&items.iter_tids[i]), + ItemPointerGetOffsetNumber(&items.iter_tids[i]), + ItemPointerGetBlockNumber(&items.insert_tids[i]), + ItemPointerGetOffsetNumber(&items.insert_tids[i])); + if (itemptr_cmp((const void *) &items.insert_tids[i], (const void *) &items.lookup_tids[i]) != 0) + elog(ERROR, "TID lookup array doesn't match verification array, got (%u,%u) expected (%u,%u)", + ItemPointerGetBlockNumber(&items.lookup_tids[i]), + ItemPointerGetOffsetNumber(&items.lookup_tids[i]), + ItemPointerGetBlockNumber(&items.insert_tids[i]), + ItemPointerGetOffsetNumber(&items.insert_tids[i])); + } + + PG_RETURN_VOID(); +} + +/* + * In real world use, we care if the memory usage is greater than + * some configured limit. Here we just want to verify that + * TidStoreMemoryUsage is not broken. + */ +Datum +test_is_full(PG_FUNCTION_ARGS) +{ + bool is_full; + + is_full = (TidStoreMemoryUsage(tidstore) > tidstore_empty_size); + + PG_RETURN_BOOL(is_full); +} + +/* Free the tidstore */ +Datum +test_destroy(PG_FUNCTION_ARGS) +{ + TidStoreDestroy(tidstore); + tidstore = NULL; + items.num_tids = 0; + pfree(items.insert_tids); + pfree(items.lookup_tids); + pfree(items.iter_tids); + + if (dsa) + dsa_detach(dsa); + dsa = NULL; + + PG_RETURN_VOID(); +} diff --git a/src/test/modules/test_tidstore/test_tidstore.control b/src/test/modules/test_tidstore/test_tidstore.control new file mode 100644 index 0000000000..9b6bd4638f --- /dev/null +++ b/src/test/modules/test_tidstore/test_tidstore.control @@ -0,0 +1,4 @@ +comment = 'Test code for tidstore' +default_version = '1.0' +module_pathname = '$libdir/test_tidstore' +relocatable = true diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index e294f8bc4e..c79e7b2eb6 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -4065,3 +4065,8 @@ rfile ws_options ws_file_info PathKeyInfo +TidStore +TidStoreIter +TidStoreIterResult +BlocktableEntry +ItemArray