postgresql/src/backend/backup/backup_manifest.c

/*-------------------------------------------------------------------------
 *
 * backup_manifest.c
 *	  code for generating and sending a backup manifest
 *
 * Portions Copyright (c) 2010-2024, PostgreSQL Global Development Group
 *
 * IDENTIFICATION
 *	  src/backend/backup/backup_manifest.c
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "access/timeline.h"
#include "backup/backup_manifest.h"
#include "backup/basebackup_sink.h"
#include "libpq/libpq.h"
#include "libpq/pqformat.h"
#include "mb/pg_wchar.h"
#include "utils/builtins.h"
#include "utils/json.h"

static void AppendStringToManifest(backup_manifest_info *manifest, const char *s);

/*
 * Does the user want a backup manifest?
 *
 * It's simplest to always have a manifest_info object, so that we don't need
 * checks for NULL pointers in too many places. However, if the user doesn't
 * want a manifest, we set manifest->buffile to NULL.
 */
static inline bool
IsManifestEnabled(backup_manifest_info *manifest)
{
	return (manifest->buffile != NULL);
}

/*
 * Convenience macro for appending data to the backup manifest.
 */
#define AppendToManifest(manifest, ...) \
	{ \
		char *_manifest_s = psprintf(__VA_ARGS__);	\
		AppendStringToManifest(manifest, _manifest_s);	\
		pfree(_manifest_s);	\
	}

/*
 * Initialize state so that we can construct a backup manifest.
 *
 * NB: Although the checksum type for the data files is configurable, the
 * checksum for the manifest itself always uses SHA-256. See comments in
 * SendBackupManifest.
 */
void
InitializeBackupManifest(backup_manifest_info *manifest,
						 backup_manifest_option want_manifest,
						 pg_checksum_type manifest_checksum_type)
{
	memset(manifest, 0, sizeof(backup_manifest_info));
	manifest->checksum_type = manifest_checksum_type;

	if (want_manifest == MANIFEST_OPTION_NO)
		manifest->buffile = NULL;
	else
	{
		manifest->buffile = BufFileCreateTemp(false);
		manifest->manifest_ctx = pg_cryptohash_create(PG_SHA256);
		if (pg_cryptohash_init(manifest->manifest_ctx) < 0)
			elog(ERROR, "failed to initialize checksum of backup manifest: %s",
				 pg_cryptohash_error(manifest->manifest_ctx));
	}

	manifest->manifest_size = UINT64CONST(0);
	manifest->force_encode = (want_manifest == MANIFEST_OPTION_FORCE_ENCODE);
	manifest->first_file = true;
	manifest->still_checksumming = true;

	if (want_manifest != MANIFEST_OPTION_NO)
		AppendToManifest(manifest,
						 "{ \"PostgreSQL-Backup-Manifest-Version\": 1,\n"
						 "\"Files\": [");
}

/*
 * Free resources assigned to a backup manifest constructed.
 */
void
FreeBackupManifest(backup_manifest_info *manifest)
{
	pg_cryptohash_free(manifest->manifest_ctx);
	manifest->manifest_ctx = NULL;
}

/*
 * Add an entry to the backup manifest for a file.
 */
void
AddFileToBackupManifest(backup_manifest_info *manifest, Oid spcoid,
						const char *pathname, size_t size, pg_time_t mtime,
						pg_checksum_context *checksum_ctx)
{
	char		pathbuf[MAXPGPATH];
	int			pathlen;
	StringInfoData buf;

	if (!IsManifestEnabled(manifest))
		return;

	/*
	 * If this file is part of a tablespace, the pathname passed to this
	 * function will be relative to the tar file that contains it. We want the
	 * pathname relative to the data directory (ignoring the intermediate
	 * symlink traversal).
	 */
	if (OidIsValid(spcoid))
	{
		snprintf(pathbuf, sizeof(pathbuf), "pg_tblspc/%u/%s", spcoid,
				 pathname);
		pathname = pathbuf;
	}

	/*
	 * Each file's entry needs to be separated from any entry that follows by
	 * a comma, but there's no comma before the first one or after the last
	 * one. To make that work, adding a file to the manifest starts by
	 * terminating the most recently added line, with a comma if appropriate,
	 * but does not terminate the line inserted for this file.
	 */
	initStringInfo(&buf);
	if (manifest->first_file)
	{
		appendStringInfoChar(&buf, '\n');
		manifest->first_file = false;
	}
	else
		appendStringInfoString(&buf, ",\n");

	/*
	 * Write the relative pathname to this file out to the manifest. The
	 * manifest is always stored in UTF-8, so we have to encode paths that are
	 * not valid in that encoding.
	 */
	pathlen = strlen(pathname);
	if (!manifest->force_encode &&
		pg_verify_mbstr(PG_UTF8, pathname, pathlen, true))
	{
		appendStringInfoString(&buf, "{ \"Path\": ");
		escape_json(&buf, pathname);
		appendStringInfoString(&buf, ", ");
	}
	else
	{
		appendStringInfoString(&buf, "{ \"Encoded-Path\": \"");
		enlargeStringInfo(&buf, 2 * pathlen);
		buf.len += hex_encode(pathname, pathlen,
							  &buf.data[buf.len]);
		appendStringInfoString(&buf, "\", ");
	}

	appendStringInfo(&buf, "\"Size\": %zu, ", size);

	/*
	 * Convert last modification time to a string and append it to the
	 * manifest. Since it's not clear what time zone to use and since time
	 * zone definitions can change, possibly causing confusion, use GMT
	 * always.
	 */
	appendStringInfoString(&buf, "\"Last-Modified\": \"");
	enlargeStringInfo(&buf, 128);
	buf.len += pg_strftime(&buf.data[buf.len], 128, "%Y-%m-%d %H:%M:%S %Z",
						   pg_gmtime(&mtime));
	appendStringInfoChar(&buf, '"');

	/* Add checksum information. */
	if (checksum_ctx->type != CHECKSUM_TYPE_NONE)
	{
		uint8		checksumbuf[PG_CHECKSUM_MAX_LENGTH];
		int			checksumlen;

		checksumlen = pg_checksum_final(checksum_ctx, checksumbuf);
		if (checksumlen < 0)
			elog(ERROR, "could not finalize checksum of file \"%s\"",
				 pathname);

		appendStringInfo(&buf,
						 ", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"",
						 pg_checksum_type_name(checksum_ctx->type));
		enlargeStringInfo(&buf, 2 * checksumlen);
		buf.len += hex_encode((char *) checksumbuf, checksumlen,
							  &buf.data[buf.len]);
		appendStringInfoChar(&buf, '"');
	}

	/* Close out the object. */
	appendStringInfoString(&buf, " }");

	/* OK, add it to the manifest. */
	AppendStringToManifest(manifest, buf.data);

	/* Avoid leaking memory. */
	pfree(buf.data);
}

/*
 * Add information about the WAL that will need to be replayed when restoring
 * this backup to the manifest.
 */
void
AddWALInfoToBackupManifest(backup_manifest_info *manifest, XLogRecPtr startptr,
						   TimeLineID starttli, XLogRecPtr endptr,
						   TimeLineID endtli)
{
	List	   *timelines;
	ListCell   *lc;
	bool		first_wal_range = true;
	bool		found_start_timeline = false;

	if (!IsManifestEnabled(manifest))
		return;

	/* Terminate the list of files. */
	AppendStringToManifest(manifest, "\n],\n");

	/* Read the timeline history for the ending timeline. */
	timelines = readTimeLineHistory(endtli);

	/* Start a list of LSN ranges. */
	AppendStringToManifest(manifest, "\"WAL-Ranges\": [\n");

	foreach(lc, timelines)
	{
		TimeLineHistoryEntry *entry = lfirst(lc);
		XLogRecPtr	tl_beginptr;

		/*
		 * We only care about timelines that were active during the backup.
		 * Skip any that ended before the backup started. (Note that if
		 * entry->end is InvalidXLogRecPtr, it means that the timeline has not
		 * yet ended.)
		 */
		if (!XLogRecPtrIsInvalid(entry->end) && entry->end < startptr)
			continue;

		/*
		 * Because the timeline history file lists newer timelines before
		 * older ones, the first timeline we encounter that is new enough to
		 * matter ought to match the ending timeline of the backup.
		 */
		if (first_wal_range && endtli != entry->tli)
			ereport(ERROR,
					errmsg("expected end timeline %u but found timeline %u",
						   starttli, entry->tli));

		/*
		 * If this timeline entry matches with the timeline on which the
		 * backup started, WAL needs to be checked from the start LSN of the
		 * backup.  If this entry refers to a newer timeline, WAL needs to be
		 * checked since the beginning of this timeline, so use the LSN where
		 * the timeline began.
		 */
		if (starttli == entry->tli)
			tl_beginptr = startptr;
		else
		{
			tl_beginptr = entry->begin;

			/*
			 * If we reach a TLI that has no valid beginning LSN, there can't
			 * be any more timelines in the history after this point, so we'd
			 * better have arrived at the expected starting TLI. If not,
			 * something's gone horribly wrong.
			 */
			if (XLogRecPtrIsInvalid(entry->begin))
				ereport(ERROR,
						errmsg("expected start timeline %u but found timeline %u",
							   starttli, entry->tli));
		}

		AppendToManifest(manifest,
						 "%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%X\", \"End-LSN\": \"%X/%X\" }",
						 first_wal_range ? "" : ",\n",
						 entry->tli,
						 LSN_FORMAT_ARGS(tl_beginptr),
						 LSN_FORMAT_ARGS(endptr));

		if (starttli == entry->tli)
		{
			found_start_timeline = true;
			break;
		}

		endptr = entry->begin;
		first_wal_range = false;
	}

	/*
	 * The last entry in the timeline history for the ending timeline should
	 * be the ending timeline itself. Verify that this is what we observed.
	 */
	if (!found_start_timeline)
		ereport(ERROR,
				errmsg("start timeline %u not found in history of timeline %u",
					   starttli, endtli));

	/* Terminate the list of WAL ranges. */
	AppendStringToManifest(manifest, "\n],\n");
}

/*
 * Finalize the backup manifest, and send it to the client.
 */
void
SendBackupManifest(backup_manifest_info *manifest, bbsink *sink)
{
	uint8		checksumbuf[PG_SHA256_DIGEST_LENGTH];
	char		checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH];
	size_t		manifest_bytes_done = 0;

	if (!IsManifestEnabled(manifest))
		return;

	/*
	 * Append manifest checksum, so that the problems with the manifest itself
	 * can be detected.
	 *
	 * We always use SHA-256 for this, regardless of what algorithm is chosen
	 * for checksumming the files.  If we ever want to make the checksum
	 * algorithm used for the manifest file variable, the client will need a
	 * way to figure out which algorithm to use as close to the beginning of
	 * the manifest file as possible, to avoid having to read the whole thing
	 * twice.
	 */
	manifest->still_checksumming = false;
	if (pg_cryptohash_final(manifest->manifest_ctx, checksumbuf,
							sizeof(checksumbuf)) < 0)
		elog(ERROR, "failed to finalize checksum of backup manifest: %s",
			 pg_cryptohash_error(manifest->manifest_ctx));
	AppendStringToManifest(manifest, "\"Manifest-Checksum\": \"");

	hex_encode((char *) checksumbuf, sizeof checksumbuf, checksumstringbuf);
	checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH - 1] = '\0';

	AppendStringToManifest(manifest, checksumstringbuf);
	AppendStringToManifest(manifest, "\"}\n");

	/*
	 * We've written all the data to the manifest file.  Rewind the file so
	 * that we can read it all back.
	 */
	if (BufFileSeek(manifest->buffile, 0, 0, SEEK_SET))
		ereport(ERROR,
				(errcode_for_file_access(),
				 errmsg("could not rewind temporary file")));


	/*
	 * Send the backup manifest.
	 */
	bbsink_begin_manifest(sink);
	while (manifest_bytes_done < manifest->manifest_size)
	{
		size_t		bytes_to_read;

		bytes_to_read = Min(sink->bbs_buffer_length,
							manifest->manifest_size - manifest_bytes_done);
		BufFileReadExact(manifest->buffile, sink->bbs_buffer, bytes_to_read);
		bbsink_manifest_contents(sink, bytes_to_read);
		manifest_bytes_done += bytes_to_read;
	}
	bbsink_end_manifest(sink);

	/* Release resources */
	BufFileClose(manifest->buffile);
}

/*
 * Append a cstring to the manifest.
 */
static void
AppendStringToManifest(backup_manifest_info *manifest, const char *s)
{
	int			len = strlen(s);

	Assert(manifest != NULL);
	if (manifest->still_checksumming)
	{
		if (pg_cryptohash_update(manifest->manifest_ctx, (uint8 *) s, len) < 0)
			elog(ERROR, "failed to update checksum of backup manifest: %s",
				 pg_cryptohash_error(manifest->manifest_ctx));
	}
	BufFileWrite(manifest->buffile, s, len);
	manifest->manifest_size += len;
}