postgresql/contrib/pg_prewarm/pg_prewarm.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

243 lines
6.4 KiB
C
Raw Permalink Normal View History

/*-------------------------------------------------------------------------
*
* pg_prewarm.c
* prewarming utilities
*
* Copyright (c) 2010-2024, PostgreSQL Global Development Group
*
* IDENTIFICATION
* contrib/pg_prewarm/pg_prewarm.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <sys/stat.h>
#include <unistd.h>
#include "access/relation.h"
#include "fmgr.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/read_stream.h"
#include "storage/smgr.h"
#include "utils/acl.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
PG_MODULE_MAGIC;
PG_FUNCTION_INFO_V1(pg_prewarm);
typedef enum
{
PREWARM_PREFETCH,
PREWARM_READ,
PREWARM_BUFFER,
} PrewarmType;
Introduce PG_IO_ALIGN_SIZE and align all I/O buffers. In order to have the option to use O_DIRECT/FILE_FLAG_NO_BUFFERING in a later commit, we need the addresses of user space buffers to be well aligned. The exact requirements vary by OS and file system (typically sectors and/or memory pages). The address alignment size is set to 4096, which is enough for currently known systems: it matches modern sectors and common memory page size. There is no standard governing O_DIRECT's requirements so we might eventually have to reconsider this with more information from the field or future systems. Aligning I/O buffers on memory pages is also known to improve regular buffered I/O performance. Three classes of I/O buffers for regular data pages are adjusted: (1) Heap buffers are now allocated with the new palloc_aligned() or MemoryContextAllocAligned() functions introduced by commit 439f6175. (2) Stack buffers now use a new struct PGIOAlignedBlock to respect PG_IO_ALIGN_SIZE, if possible with this compiler. (3) The buffer pool is also aligned in shared memory. WAL buffers were already aligned on XLOG_BLCKSZ. It's possible for XLOG_BLCKSZ to be configured smaller than PG_IO_ALIGNED_SIZE and thus for O_DIRECT WAL writes to fail to be well aligned, but that's a pre-existing condition and will be addressed by a later commit. BufFiles are not yet addressed (there's no current plan to use O_DIRECT for those, but they could potentially get some incidental speedup even in plain buffered I/O operations through better alignment). If we can't align stack objects suitably using the compiler extensions we know about, we disable the use of O_DIRECT by setting PG_O_DIRECT to 0. This avoids the need to consider systems that have O_DIRECT but can't align stack objects the way we want; such systems could in theory be supported with more work but we don't currently know of any such machines, so it's easier to pretend there is no O_DIRECT support instead. That's an existing and tested class of system. Add assertions that all buffers passed into smgrread(), smgrwrite() and smgrextend() are correctly aligned, unless PG_O_DIRECT is 0 (= stack alignment tricks may be unavailable) or the block size has been set too small to allow arrays of buffers to be all aligned. Author: Thomas Munro <thomas.munro@gmail.com> Author: Andres Freund <andres@anarazel.de> Reviewed-by: Justin Pryzby <pryzby@telsasoft.com> Discussion: https://postgr.es/m/CA+hUKGK1X532hYqJ_MzFWt0n1zt8trz980D79WbjwnT-yYLZpg@mail.gmail.com
2023-04-08 00:38:09 +02:00
static PGIOAlignedBlock blockbuffer;
struct pg_prewarm_read_stream_private
{
BlockNumber blocknum;
int64 last_block;
};
static BlockNumber
pg_prewarm_read_stream_next_block(ReadStream *stream,
void *callback_private_data,
void *per_buffer_data)
{
struct pg_prewarm_read_stream_private *p = callback_private_data;
if (p->blocknum <= p->last_block)
return p->blocknum++;
return InvalidBlockNumber;
}
/*
* pg_prewarm(regclass, mode text, fork text,
* first_block int8, last_block int8)
*
* The first argument is the relation to be prewarmed; the second controls
* how prewarming is done; legal options are 'prefetch', 'read', and 'buffer'.
* The third is the name of the relation fork to be prewarmed. The fourth
* and fifth arguments specify the first and last block to be prewarmed.
* If the fourth argument is NULL, it will be taken as 0; if the fifth argument
* is NULL, it will be taken as the number of blocks in the relation. The
* return value is the number of blocks successfully prewarmed.
*/
Datum
pg_prewarm(PG_FUNCTION_ARGS)
{
Oid relOid;
text *forkName;
text *type;
int64 first_block;
int64 last_block;
int64 nblocks;
int64 blocks_done = 0;
int64 block;
Relation rel;
ForkNumber forkNumber;
char *forkString;
char *ttype;
PrewarmType ptype;
AclResult aclresult;
/* Basic sanity checking. */
if (PG_ARGISNULL(0))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("relation cannot be null")));
relOid = PG_GETARG_OID(0);
if (PG_ARGISNULL(1))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("prewarm type cannot be null")));
type = PG_GETARG_TEXT_PP(1);
ttype = text_to_cstring(type);
if (strcmp(ttype, "prefetch") == 0)
ptype = PREWARM_PREFETCH;
else if (strcmp(ttype, "read") == 0)
ptype = PREWARM_READ;
else if (strcmp(ttype, "buffer") == 0)
ptype = PREWARM_BUFFER;
else
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid prewarm type"),
errhint("Valid prewarm types are \"prefetch\", \"read\", and \"buffer\".")));
PG_RETURN_INT64(0); /* Placate compiler. */
}
if (PG_ARGISNULL(2))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("relation fork cannot be null")));
forkName = PG_GETARG_TEXT_PP(2);
forkString = text_to_cstring(forkName);
forkNumber = forkname_to_number(forkString);
/* Open relation and check privileges. */
rel = relation_open(relOid, AccessShareLock);
aclresult = pg_class_aclcheck(relOid, GetUserId(), ACL_SELECT);
if (aclresult != ACLCHECK_OK)
aclcheck_error(aclresult, get_relkind_objtype(rel->rd_rel->relkind), get_rel_name(relOid));
/* Check that the fork exists. */
if (!smgrexists(RelationGetSmgr(rel), forkNumber))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("fork \"%s\" does not exist for this relation",
forkString)));
/* Validate block numbers, or handle nulls. */
nblocks = RelationGetNumberOfBlocksInFork(rel, forkNumber);
if (PG_ARGISNULL(3))
first_block = 0;
else
{
first_block = PG_GETARG_INT64(3);
if (first_block < 0 || first_block >= nblocks)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("starting block number must be between 0 and %lld",
(long long) (nblocks - 1))));
}
if (PG_ARGISNULL(4))
last_block = nblocks - 1;
else
{
last_block = PG_GETARG_INT64(4);
if (last_block < 0 || last_block >= nblocks)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("ending block number must be between 0 and %lld",
(long long) (nblocks - 1))));
}
/* Now we're ready to do the real work. */
if (ptype == PREWARM_PREFETCH)
{
#ifdef USE_PREFETCH
/*
* In prefetch mode, we just hint the OS to read the blocks, but we
* don't know whether it really does it, and we don't wait for it to
* finish.
*
* It would probably be better to pass our prefetch requests in chunks
* of a megabyte or maybe even a whole segment at a time, but there's
* no practical way to do that at present without a gross modularity
* violation, so we just do this.
*/
for (block = first_block; block <= last_block; ++block)
{
CHECK_FOR_INTERRUPTS();
PrefetchBuffer(rel, forkNumber, block);
++blocks_done;
}
#else
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("prefetch is not supported by this build")));
#endif
}
else if (ptype == PREWARM_READ)
{
/*
* In read mode, we actually read the blocks, but not into shared
* buffers. This is more portable than prefetch mode (it works
* everywhere) and is synchronous.
*/
for (block = first_block; block <= last_block; ++block)
{
CHECK_FOR_INTERRUPTS();
smgrread(RelationGetSmgr(rel), forkNumber, block, blockbuffer.data);
++blocks_done;
}
}
else if (ptype == PREWARM_BUFFER)
{
struct pg_prewarm_read_stream_private p;
ReadStream *stream;
/*
* In buffer mode, we actually pull the data into shared_buffers.
*/
/* Set up the private state for our streaming buffer read callback. */
p.blocknum = first_block;
p.last_block = last_block;
stream = read_stream_begin_relation(READ_STREAM_FULL,
NULL,
rel,
forkNumber,
pg_prewarm_read_stream_next_block,
&p,
0);
for (block = first_block; block <= last_block; ++block)
{
Buffer buf;
CHECK_FOR_INTERRUPTS();
buf = read_stream_next_buffer(stream, NULL);
ReleaseBuffer(buf);
++blocks_done;
}
Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer);
read_stream_end(stream);
}
/* Close relation, release lock. */
relation_close(rel, AccessShareLock);
PG_RETURN_INT64(blocks_done);
}