From cc7f27eae888234d2fda9e0eaadbeb33a48cd274 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 19 Oct 2018 22:44:12 +0900 Subject: [PATCH] Use whitelist to choose files scanned with pg_verify_checksums The original implementation of pg_verify_checksums used a blacklist to decide which files should be skipped for scanning as they do not include data checksums, like pg_internal.init or pg_control. However, this missed two things: - Some files are created within builds of EXEC_BACKEND and these were not listed, causing failures on Windows. - Extensions may create custom files in data folders, causing the tool to equally fail. This commit switches to a whitelist-like method instead by checking if the files to scan are authorized relation files. This is close to a reverse-engineering of what is defined in relpath.c in charge of building the relation paths, and we could consider refactoring what this patch does so as all routines are in a single place. This is left for later. This is based on a suggestion from Andres Freund. TAP tests are updated so as multiple file patterns are tested. The bug has been spotted by various buildfarm members as a result of b34e84f which has introduced the TAP tests of pg_verify_checksums. Author: Michael Paquier Reviewed-by: Andrew Dunstan, Michael Banck Discussion: https://postgr.es/m/20181012005614.GC26424@paquier.xyz Backpatch-through: 11 --- .../pg_verify_checksums/pg_verify_checksums.c | 77 +++++++++++++++---- 1 file changed, 60 insertions(+), 17 deletions(-) diff --git a/src/bin/pg_verify_checksums/pg_verify_checksums.c b/src/bin/pg_verify_checksums/pg_verify_checksums.c index 589a3cc589..36d11ab563 100644 --- a/src/bin/pg_verify_checksums/pg_verify_checksums.c +++ b/src/bin/pg_verify_checksums/pg_verify_checksums.c @@ -15,6 +15,7 @@ #include "catalog/pg_control.h" #include "common/controldata_utils.h" +#include "common/relpath.h" #include "getopt_long.h" #include "pg_getopt.h" #include "storage/bufpage.h" @@ -49,27 +50,69 @@ usage(void) printf(_("Report bugs to .\n")); } -static const char *const skip[] = { - "pg_control", - "pg_filenode.map", - "pg_internal.init", - "PG_VERSION", - NULL, -}; - +/* + * isRelFileName + * + * Check if the given file name is authorized for checksum verification. + */ static bool -skipfile(const char *fn) +isRelFileName(const char *fn) { - const char *const *f; + int pos; - if (strcmp(fn, ".") == 0 || - strcmp(fn, "..") == 0) + /*---------- + * Only files including data checksums are authorized for verification. + * This is guessed based on the file name by reverse-engineering + * GetRelationPath() so make sure to update both code paths if any + * updates are done. The following file name formats are allowed: + * + * . + * _ + * _. + * + * Note that temporary files, beginning with 't', are also skipped. + * + *---------- + */ + + /* A non-empty string of digits should follow */ + for (pos = 0; isdigit((unsigned char) fn[pos]); ++pos) + ; + /* leave if no digits */ + if (pos == 0) + return false; + /* good to go if only digits */ + if (fn[pos] == '\0') return true; - for (f = skip; *f; f++) - if (strcmp(*f, fn) == 0) - return true; - return false; + /* Authorized fork files can be scanned */ + if (fn[pos] == '_') + { + int forkchar = forkname_chars(&fn[pos + 1], NULL); + + if (forkchar <= 0) + return false; + + pos += forkchar + 1; + } + + /* Check for an optional segment number */ + if (fn[pos] == '.') + { + int segchar; + + for (segchar = 1; isdigit((unsigned char) fn[pos + segchar]); ++segchar) + ; + + if (segchar <= 1) + return false; + pos += segchar; + } + + /* Now this should be the end */ + if (fn[pos] != '\0') + return false; + return true; } static void @@ -146,7 +189,7 @@ scan_directory(const char *basedir, const char *subdir) char fn[MAXPGPATH]; struct stat st; - if (skipfile(de->d_name)) + if (!isRelFileName(de->d_name)) continue; snprintf(fn, sizeof(fn), "%s/%s", path, de->d_name);