From 5c47c6546c413d5eb51c1626070a807026e6139d Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Mon, 23 Oct 2023 15:08:53 -0400 Subject: [PATCH] Refactor parse_filename_for_nontemp_relation to parse more. Instead of returning the number of characters in the RelFileNumber, return the RelFileNumber itself. Continue to return the fork number, as before, and additionally return the segment number. parse_filename_for_nontemp_relation now rejects a RelFileNumber or segment number that begins with a leading zero. Before, we accepted such cases as relation filenames, but if we continued to do so after this change, the function might return the same values for two different files (e.g. 1234.5 and 001234.5 or 1234.005) which could be annoying for callers. Since we don't actually ever generate filenames with leading zeroes in the names, any such files that we find must have been created by something other than PostgreSQL, and it is therefore reasonable to treat them as non-relation files. Along the way, change unlogged_relation_entry to store a RelFileNumber rather than an OID. This update should have been made in 851f4cc75cdd8c831f1baa9a7abf8c8248b65890, but it was overlooked. It's trivial to make the update as part of this commit, perhaps more trivial than it would have been without it, so do that. Patch by me, reviewed by David Steele. Discussion: http://postgr.es/m/CA+TgmoZNVeBzoqDL8xvr-nkaepq815jtDR4nJzPew7=3iEuM1g@mail.gmail.com --- src/backend/backup/basebackup.c | 15 ++-- src/backend/storage/file/reinit.c | 139 ++++++++++++++++++------------ src/include/storage/reinit.h | 5 +- 3 files changed, 94 insertions(+), 65 deletions(-) diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c index 7d025bcf38..b126d9c890 100644 --- a/src/backend/backup/basebackup.c +++ b/src/backend/backup/basebackup.c @@ -1197,9 +1197,9 @@ sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly, { int excludeIdx; bool excludeFound; - ForkNumber relForkNum; /* Type of fork if file is a relation */ - int relnumchars; /* Chars in filename that are the - * relnumber */ + RelFileNumber relNumber; + ForkNumber relForkNum; + unsigned segno; /* Skip special stuff */ if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) @@ -1249,23 +1249,20 @@ sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly, /* Exclude all forks for unlogged tables except the init fork */ if (isDbDir && - parse_filename_for_nontemp_relation(de->d_name, &relnumchars, - &relForkNum)) + parse_filename_for_nontemp_relation(de->d_name, &relNumber, + &relForkNum, &segno)) { /* Never exclude init forks */ if (relForkNum != INIT_FORKNUM) { char initForkFile[MAXPGPATH]; - char relNumber[OIDCHARS + 1]; /* * If any other type of fork, check if there is an init fork * with the same RelFileNumber. If so, the file can be * excluded. */ - memcpy(relNumber, de->d_name, relnumchars); - relNumber[relnumchars] = '\0'; - snprintf(initForkFile, sizeof(initForkFile), "%s/%s_init", + snprintf(initForkFile, sizeof(initForkFile), "%s/%u_init", path, relNumber); if (lstat(initForkFile, &statbuf) == 0) diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c index fb55371b1b..5df2517b46 100644 --- a/src/backend/storage/file/reinit.c +++ b/src/backend/storage/file/reinit.c @@ -31,7 +31,7 @@ static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, typedef struct { - Oid reloid; /* hash key */ + RelFileNumber relnumber; /* hash key */ } unlogged_relation_entry; /* @@ -195,12 +195,13 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) { ForkNumber forkNum; - int relnumchars; + unsigned segno; unlogged_relation_entry ent; /* Skip anything that doesn't look like a relation data file. */ - if (!parse_filename_for_nontemp_relation(de->d_name, &relnumchars, - &forkNum)) + if (!parse_filename_for_nontemp_relation(de->d_name, + &ent.relnumber, + &forkNum, &segno)) continue; /* Also skip it unless this is the init fork. */ @@ -208,10 +209,8 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) continue; /* - * Put the OID portion of the name into the hash table, if it - * isn't already. + * Put the RelFileNumber into the hash table, if it isn't already. */ - ent.reloid = atooid(de->d_name); (void) hash_search(hash, &ent, HASH_ENTER, NULL); } @@ -235,12 +234,13 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) { ForkNumber forkNum; - int relnumchars; + unsigned segno; unlogged_relation_entry ent; /* Skip anything that doesn't look like a relation data file. */ - if (!parse_filename_for_nontemp_relation(de->d_name, &relnumchars, - &forkNum)) + if (!parse_filename_for_nontemp_relation(de->d_name, + &ent.relnumber, + &forkNum, &segno)) continue; /* We never remove the init fork. */ @@ -251,7 +251,6 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) * See whether the OID portion of the name shows up in the hash * table. If so, nuke it! */ - ent.reloid = atooid(de->d_name); if (hash_search(hash, &ent, HASH_FIND, NULL)) { snprintf(rm_path, sizeof(rm_path), "%s/%s", @@ -285,14 +284,14 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) { ForkNumber forkNum; - int relnumchars; - char relnumbuf[OIDCHARS + 1]; + RelFileNumber relNumber; + unsigned segno; char srcpath[MAXPGPATH * 2]; char dstpath[MAXPGPATH]; /* Skip anything that doesn't look like a relation data file. */ - if (!parse_filename_for_nontemp_relation(de->d_name, &relnumchars, - &forkNum)) + if (!parse_filename_for_nontemp_relation(de->d_name, &relNumber, + &forkNum, &segno)) continue; /* Also skip it unless this is the init fork. */ @@ -304,11 +303,12 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) dbspacedirname, de->d_name); /* Construct destination pathname. */ - memcpy(relnumbuf, de->d_name, relnumchars); - relnumbuf[relnumchars] = '\0'; - snprintf(dstpath, sizeof(dstpath), "%s/%s%s", - dbspacedirname, relnumbuf, de->d_name + relnumchars + 1 + - strlen(forkNames[INIT_FORKNUM])); + if (segno == 0) + snprintf(dstpath, sizeof(dstpath), "%s/%u", + dbspacedirname, relNumber); + else + snprintf(dstpath, sizeof(dstpath), "%s/%u.%u", + dbspacedirname, relNumber, segno); /* OK, we're ready to perform the actual copy. */ elog(DEBUG2, "copying %s to %s", srcpath, dstpath); @@ -327,14 +327,14 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) dbspace_dir = AllocateDir(dbspacedirname); while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) { + RelFileNumber relNumber; ForkNumber forkNum; - int relnumchars; - char relnumbuf[OIDCHARS + 1]; + unsigned segno; char mainpath[MAXPGPATH]; /* Skip anything that doesn't look like a relation data file. */ - if (!parse_filename_for_nontemp_relation(de->d_name, &relnumchars, - &forkNum)) + if (!parse_filename_for_nontemp_relation(de->d_name, &relNumber, + &forkNum, &segno)) continue; /* Also skip it unless this is the init fork. */ @@ -342,11 +342,12 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) continue; /* Construct main fork pathname. */ - memcpy(relnumbuf, de->d_name, relnumchars); - relnumbuf[relnumchars] = '\0'; - snprintf(mainpath, sizeof(mainpath), "%s/%s%s", - dbspacedirname, relnumbuf, de->d_name + relnumchars + 1 + - strlen(forkNames[INIT_FORKNUM])); + if (segno == 0) + snprintf(mainpath, sizeof(mainpath), "%s/%u", + dbspacedirname, relNumber); + else + snprintf(mainpath, sizeof(mainpath), "%s/%u.%u", + dbspacedirname, relNumber, segno); fsync_fname(mainpath, false); } @@ -371,52 +372,82 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) * This function returns true if the file appears to be in the correct format * for a non-temporary relation and false otherwise. * - * NB: If this function returns true, the caller is entitled to assume that - * *relnumchars has been set to a value no more than OIDCHARS, and thus - * that a buffer of OIDCHARS+1 characters is sufficient to hold the - * RelFileNumber portion of the filename. This is critical to protect against - * a possible buffer overrun. + * If it returns true, it sets *relnumber, *fork, and *segno to the values + * extracted from the filename. If it returns false, these values are set to + * InvalidRelFileNumber, InvalidForkNumber, and 0, respectively. */ bool -parse_filename_for_nontemp_relation(const char *name, int *relnumchars, - ForkNumber *fork) +parse_filename_for_nontemp_relation(const char *name, RelFileNumber *relnumber, + ForkNumber *fork, unsigned *segno) { - int pos; + unsigned long n, + s; + ForkNumber f; + char *endp; - /* Look for a non-empty string of digits (that isn't too long). */ - for (pos = 0; isdigit((unsigned char) name[pos]); ++pos) - ; - if (pos == 0 || pos > OIDCHARS) + *relnumber = InvalidRelFileNumber; + *fork = InvalidForkNumber; + *segno = 0; + + /* + * Relation filenames should begin with a digit that is not a zero. By + * rejecting cases involving leading zeroes, the caller can assume that + * there's only one possible string of characters that could have produced + * any given value for *relnumber. + * + * (To be clear, we don't expect files with names like 0017.3 to exist at + * all -- but if 0017.3 does exist, it's a non-relation file, not part of + * the main fork for relfilenode 17.) + */ + if (name[0] < '1' || name[0] > '9') return false; - *relnumchars = pos; + + /* + * Parse the leading digit string. If the value is out of range, we + * conclude that this isn't a relation file at all. + */ + errno = 0; + n = strtoul(name, &endp, 10); + if (errno || name == endp || n <= 0 || n > PG_UINT32_MAX) + return false; + name = endp; /* Check for a fork name. */ - if (name[pos] != '_') - *fork = MAIN_FORKNUM; + if (*name != '_') + f = MAIN_FORKNUM; else { int forkchar; - forkchar = forkname_chars(&name[pos + 1], fork); + forkchar = forkname_chars(name + 1, &f); if (forkchar <= 0) return false; - pos += forkchar + 1; + name += forkchar + 1; } /* Check for a segment number. */ - if (name[pos] == '.') + if (*name != '.') + s = 0; + else { - int segchar; - - for (segchar = 1; isdigit((unsigned char) name[pos + segchar]); ++segchar) - ; - if (segchar <= 1) + /* Reject leading zeroes, just like we do for RelFileNumber. */ + if (name[0] < '1' || name[0] > '9') return false; - pos += segchar; + + errno = 0; + s = strtoul(name + 1, &endp, 10); + if (errno || name + 1 == endp || s <= 0 || s > PG_UINT32_MAX) + return false; + name = endp; } /* Now we should be at the end. */ - if (name[pos] != '\0') + if (*name != '\0') return false; + + /* Set out parameters and return. */ + *relnumber = (RelFileNumber) n; + *fork = f; + *segno = (unsigned) s; return true; } diff --git a/src/include/storage/reinit.h b/src/include/storage/reinit.h index e2bbb5abe9..f8eb7ce234 100644 --- a/src/include/storage/reinit.h +++ b/src/include/storage/reinit.h @@ -20,8 +20,9 @@ extern void ResetUnloggedRelations(int op); extern bool parse_filename_for_nontemp_relation(const char *name, - int *relnumchars, - ForkNumber *fork); + RelFileNumber *relnumber, + ForkNumber *fork, + unsigned *segno); #define UNLOGGED_RELATION_CLEANUP 0x0001 #define UNLOGGED_RELATION_INIT 0x0002