postgresql/src/bin/pg_upgrade/version.c

402 lines
12 KiB
C
Raw Normal View History

/*
* version.c
*
* Postgres-version-specific routines
*
* Copyright (c) 2010-2018, PostgreSQL Global Development Group
* src/bin/pg_upgrade/version.c
*/
#include "postgres_fe.h"
#include "pg_upgrade.h"
#include "catalog/pg_class_d.h"
#include "fe_utils/string_utils.h"
/*
* new_9_0_populate_pg_largeobject_metadata()
* new >= 9.0, old <= 8.4
* 9.0 has a new pg_largeobject permission table
*/
void
new_9_0_populate_pg_largeobject_metadata(ClusterInfo *cluster, bool check_mode)
{
int dbnum;
FILE *script = NULL;
bool found = false;
char output_path[MAXPGPATH];
prep_status("Checking for large objects");
snprintf(output_path, sizeof(output_path), "pg_largeobject.sql");
for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
{
PGresult *res;
int i_count;
DbInfo *active_db = &cluster->dbarr.dbs[dbnum];
PGconn *conn = connectToServer(cluster, active_db->db_name);
/* find if there are any large objects */
res = executeQueryOrDie(conn,
"SELECT count(*) "
"FROM pg_catalog.pg_largeobject ");
i_count = PQfnumber(res, "count");
if (atoi(PQgetvalue(res, 0, i_count)) != 0)
{
found = true;
if (!check_mode)
{
PQExpBufferData connectbuf;
if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL)
Improve error reporting in pg_upgrade's file copying/linking/rewriting. The previous design for this had copyFile(), linkFile(), and rewriteVisibilityMap() returning strerror strings, with the caller producing one-size-fits-all error messages based on that. This made it impossible to produce messages that described the failures with any degree of precision, especially not short-read problems since those don't set errno at all. Since pg_upgrade has no intention of continuing after any error in this area, let's fix this by just letting these functions call pg_fatal() for themselves, making it easy for each point of failure to have a suitable error message. Taking this approach also allows dropping cleanup code that was unnecessary and was often rather sloppy about preserving errno. To not lose relevant info that was reported before, pass in the schema name and table name of the current table so that they can be included in the error reports. An additional problem was the use of getErrorText(), which was flat out wrong for all but a couple of call sites, because it unconditionally did "_dosmaperr(GetLastError())" on Windows. That's only appropriate when reporting an error from a Windows-native API, which only a couple of the callers were actually doing. Thus, even the reported strerror string would be unrelated to the actual failure in many cases on Windows. To fix, get rid of getErrorText() altogether, and just have call sites do strerror(errno) instead, since that's the way all the rest of our frontend programs do it. Add back the _dosmaperr() calls in the two places where that's actually appropriate. In passing, make assorted messages hew more closely to project style guidelines, notably by removing initial capitals in not-complete-sentence primary error messages. (I didn't make any effort to clean up places I didn't have another reason to touch, though.) Per discussion of a report from Thomas Kellerer. Back-patch to 9.6, but no further; given the relative infrequency of reports of problems here, it's not clear it's worth adapting the patch to older branches. Patch by me, but with credit to Alvaro Herrera for spotting the issue with getErrorText's misuse of _dosmaperr(). Discussion: <nsjrbh$8li$1@blaine.gmane.org>
2016-10-01 02:40:27 +02:00
pg_fatal("could not open file \"%s\": %s\n", output_path,
strerror(errno));
initPQExpBuffer(&connectbuf);
appendPsqlMetaConnect(&connectbuf, active_db->db_name);
fputs(connectbuf.data, script);
termPQExpBuffer(&connectbuf);
fprintf(script,
"SELECT pg_catalog.lo_create(t.loid)\n"
"FROM (SELECT DISTINCT loid FROM pg_catalog.pg_largeobject) AS t;\n");
}
}
PQclear(res);
PQfinish(conn);
}
if (script)
fclose(script);
if (found)
{
report_status(PG_WARNING, "warning");
if (check_mode)
pg_log(PG_WARNING, "\n"
"Your installation contains large objects. The new database has an\n"
"additional large object permission table. After upgrading, you will be\n"
"given a command to populate the pg_largeobject_metadata table with\n"
"default permissions.\n\n");
else
pg_log(PG_WARNING, "\n"
"Your installation contains large objects. The new database has an\n"
"additional large object permission table, so default permissions must be\n"
"defined for all large objects. The file\n"
" %s\n"
"when executed by psql by the database superuser will set the default\n"
"permissions.\n\n",
output_path);
}
else
check_ok();
}
/*
* old_9_3_check_for_line_data_type_usage()
* 9.3 -> 9.4
* Fully implement the 'line' data type in 9.4, which previously returned
2014-05-15 19:23:31 +02:00
* "not enabled" by default and was only functionally enabled with a
* compile-time switch; 9.4 "line" has different binary and text
* representation formats; checks tables and indexes.
*/
void
old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster)
{
int dbnum;
FILE *script = NULL;
bool found = false;
char output_path[MAXPGPATH];
prep_status("Checking for incompatible \"line\" data type");
snprintf(output_path, sizeof(output_path), "tables_using_line.txt");
for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
{
PGresult *res;
bool db_used = false;
int ntups;
int rowno;
int i_nspname,
i_relname,
i_attname;
DbInfo *active_db = &cluster->dbarr.dbs[dbnum];
PGconn *conn = connectToServer(cluster, active_db->db_name);
res = executeQueryOrDie(conn,
"SELECT n.nspname, c.relname, a.attname "
"FROM pg_catalog.pg_class c, "
" pg_catalog.pg_namespace n, "
" pg_catalog.pg_attribute a "
"WHERE c.oid = a.attrelid AND "
" NOT a.attisdropped AND "
" a.atttypid = 'pg_catalog.line'::pg_catalog.regtype AND "
" c.relnamespace = n.oid AND "
/* exclude possible orphaned temp tables */
" n.nspname !~ '^pg_temp_' AND "
2017-06-21 20:39:04 +02:00
" n.nspname !~ '^pg_toast_temp_' AND "
" n.nspname NOT IN ('pg_catalog', 'information_schema')");
ntups = PQntuples(res);
i_nspname = PQfnumber(res, "nspname");
i_relname = PQfnumber(res, "relname");
i_attname = PQfnumber(res, "attname");
for (rowno = 0; rowno < ntups; rowno++)
{
found = true;
if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL)
Improve error reporting in pg_upgrade's file copying/linking/rewriting. The previous design for this had copyFile(), linkFile(), and rewriteVisibilityMap() returning strerror strings, with the caller producing one-size-fits-all error messages based on that. This made it impossible to produce messages that described the failures with any degree of precision, especially not short-read problems since those don't set errno at all. Since pg_upgrade has no intention of continuing after any error in this area, let's fix this by just letting these functions call pg_fatal() for themselves, making it easy for each point of failure to have a suitable error message. Taking this approach also allows dropping cleanup code that was unnecessary and was often rather sloppy about preserving errno. To not lose relevant info that was reported before, pass in the schema name and table name of the current table so that they can be included in the error reports. An additional problem was the use of getErrorText(), which was flat out wrong for all but a couple of call sites, because it unconditionally did "_dosmaperr(GetLastError())" on Windows. That's only appropriate when reporting an error from a Windows-native API, which only a couple of the callers were actually doing. Thus, even the reported strerror string would be unrelated to the actual failure in many cases on Windows. To fix, get rid of getErrorText() altogether, and just have call sites do strerror(errno) instead, since that's the way all the rest of our frontend programs do it. Add back the _dosmaperr() calls in the two places where that's actually appropriate. In passing, make assorted messages hew more closely to project style guidelines, notably by removing initial capitals in not-complete-sentence primary error messages. (I didn't make any effort to clean up places I didn't have another reason to touch, though.) Per discussion of a report from Thomas Kellerer. Back-patch to 9.6, but no further; given the relative infrequency of reports of problems here, it's not clear it's worth adapting the patch to older branches. Patch by me, but with credit to Alvaro Herrera for spotting the issue with getErrorText's misuse of _dosmaperr(). Discussion: <nsjrbh$8li$1@blaine.gmane.org>
2016-10-01 02:40:27 +02:00
pg_fatal("could not open file \"%s\": %s\n", output_path,
strerror(errno));
if (!db_used)
{
fprintf(script, "Database: %s\n", active_db->db_name);
db_used = true;
}
fprintf(script, " %s.%s.%s\n",
PQgetvalue(res, rowno, i_nspname),
PQgetvalue(res, rowno, i_relname),
PQgetvalue(res, rowno, i_attname));
}
PQclear(res);
PQfinish(conn);
}
if (script)
fclose(script);
if (found)
{
pg_log(PG_REPORT, "fatal\n");
pg_fatal("Your installation contains the \"line\" data type in user tables. This\n"
2015-05-24 03:35:49 +02:00
"data type changed its internal and input/output format between your old\n"
"and new clusters so this cluster cannot currently be upgraded. You can\n"
2015-05-24 03:35:49 +02:00
"remove the problem tables and restart the upgrade. A list of the problem\n"
"columns is in the file:\n"
" %s\n\n", output_path);
}
else
check_ok();
}
Change unknown-type literals to type text in SELECT and RETURNING lists. Previously, we left such literals alone if the query or subquery had no properties forcing a type decision to be made (such as an ORDER BY or DISTINCT clause using that output column). This meant that "unknown" could be an exposed output column type, which has never been a great idea because it could result in strange failures later on. For example, an outer query that tried to do any operations on an unknown-type subquery output would generally fail with some weird error like "failed to find conversion function from unknown to text" or "could not determine which collation to use for string comparison". Also, if the case occurred in a CREATE VIEW's query then the view would have an unknown-type column, causing similar failures in queries trying to use the view. To fix, at the tail end of parse analysis of a query, forcibly convert any remaining "unknown" literals in its SELECT or RETURNING list to type text. However, provide a switch to suppress that, and use it in the cases of SELECT inside a set operation or INSERT command. In those cases we already had type resolution rules that make use of context information from outside the subquery proper, and we don't want to change that behavior. Also, change creation of an unknown-type column in a relation from a warning to a hard error. The error should be unreachable now in CREATE VIEW or CREATE MATVIEW, but it's still possible to explicitly say "unknown" in CREATE TABLE or CREATE (composite) TYPE. We want to forbid that because it's nothing but a foot-gun. This change creates a pg_upgrade failure case: a matview that contains an unknown-type column can't be pg_upgraded, because reparsing the matview's defining query will now decide that the column is of type text, which doesn't match the cstring-like storage that the old materialized column would actually have. Add a checking pass to detect that. While at it, we can detect tables or composite types that would fail, essentially for free. Those would fail safely anyway later on, but we might as well fail earlier. This patch is by me, but it owes something to previous investigations by Rahila Syed. Also thanks to Ashutosh Bapat and Michael Paquier for review. Discussion: https://postgr.es/m/CAH2L28uwwbL9HUM-WR=hromW1Cvamkn7O-g8fPY2m=_7muJ0oA@mail.gmail.com
2017-01-25 15:17:18 +01:00
/*
* old_9_6_check_for_unknown_data_type_usage()
* 9.6 -> 10
* It's no longer allowed to create tables or views with "unknown"-type
* columns. We do not complain about views with such columns, because
* they should get silently converted to "text" columns during the DDL
* dump and reload; it seems unlikely to be worth making users do that
* by hand. However, if there's a table with such a column, the DDL
* reload will fail, so we should pre-detect that rather than failing
* mid-upgrade. Worse, if there's a matview with such a column, the
* DDL reload will silently change it to "text" which won't match the
* on-disk storage (which is like "cstring"). So we *must* reject that.
* Also check composite types, in case they are used for table columns.
* We needn't check indexes, because "unknown" has no opclasses.
*/
void
old_9_6_check_for_unknown_data_type_usage(ClusterInfo *cluster)
{
int dbnum;
FILE *script = NULL;
bool found = false;
char output_path[MAXPGPATH];
prep_status("Checking for invalid \"unknown\" user columns");
snprintf(output_path, sizeof(output_path), "tables_using_unknown.txt");
for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
{
PGresult *res;
bool db_used = false;
int ntups;
int rowno;
int i_nspname,
i_relname,
i_attname;
DbInfo *active_db = &cluster->dbarr.dbs[dbnum];
PGconn *conn = connectToServer(cluster, active_db->db_name);
res = executeQueryOrDie(conn,
"SELECT n.nspname, c.relname, a.attname "
"FROM pg_catalog.pg_class c, "
" pg_catalog.pg_namespace n, "
" pg_catalog.pg_attribute a "
"WHERE c.oid = a.attrelid AND "
" NOT a.attisdropped AND "
" a.atttypid = 'pg_catalog.unknown'::pg_catalog.regtype AND "
" c.relkind IN ("
CppAsString2(RELKIND_RELATION) ", "
CppAsString2(RELKIND_COMPOSITE_TYPE) ", "
CppAsString2(RELKIND_MATVIEW) ") AND "
Change unknown-type literals to type text in SELECT and RETURNING lists. Previously, we left such literals alone if the query or subquery had no properties forcing a type decision to be made (such as an ORDER BY or DISTINCT clause using that output column). This meant that "unknown" could be an exposed output column type, which has never been a great idea because it could result in strange failures later on. For example, an outer query that tried to do any operations on an unknown-type subquery output would generally fail with some weird error like "failed to find conversion function from unknown to text" or "could not determine which collation to use for string comparison". Also, if the case occurred in a CREATE VIEW's query then the view would have an unknown-type column, causing similar failures in queries trying to use the view. To fix, at the tail end of parse analysis of a query, forcibly convert any remaining "unknown" literals in its SELECT or RETURNING list to type text. However, provide a switch to suppress that, and use it in the cases of SELECT inside a set operation or INSERT command. In those cases we already had type resolution rules that make use of context information from outside the subquery proper, and we don't want to change that behavior. Also, change creation of an unknown-type column in a relation from a warning to a hard error. The error should be unreachable now in CREATE VIEW or CREATE MATVIEW, but it's still possible to explicitly say "unknown" in CREATE TABLE or CREATE (composite) TYPE. We want to forbid that because it's nothing but a foot-gun. This change creates a pg_upgrade failure case: a matview that contains an unknown-type column can't be pg_upgraded, because reparsing the matview's defining query will now decide that the column is of type text, which doesn't match the cstring-like storage that the old materialized column would actually have. Add a checking pass to detect that. While at it, we can detect tables or composite types that would fail, essentially for free. Those would fail safely anyway later on, but we might as well fail earlier. This patch is by me, but it owes something to previous investigations by Rahila Syed. Also thanks to Ashutosh Bapat and Michael Paquier for review. Discussion: https://postgr.es/m/CAH2L28uwwbL9HUM-WR=hromW1Cvamkn7O-g8fPY2m=_7muJ0oA@mail.gmail.com
2017-01-25 15:17:18 +01:00
" c.relnamespace = n.oid AND "
/* exclude possible orphaned temp tables */
" n.nspname !~ '^pg_temp_' AND "
2017-06-21 20:39:04 +02:00
" n.nspname !~ '^pg_toast_temp_' AND "
" n.nspname NOT IN ('pg_catalog', 'information_schema')");
Change unknown-type literals to type text in SELECT and RETURNING lists. Previously, we left such literals alone if the query or subquery had no properties forcing a type decision to be made (such as an ORDER BY or DISTINCT clause using that output column). This meant that "unknown" could be an exposed output column type, which has never been a great idea because it could result in strange failures later on. For example, an outer query that tried to do any operations on an unknown-type subquery output would generally fail with some weird error like "failed to find conversion function from unknown to text" or "could not determine which collation to use for string comparison". Also, if the case occurred in a CREATE VIEW's query then the view would have an unknown-type column, causing similar failures in queries trying to use the view. To fix, at the tail end of parse analysis of a query, forcibly convert any remaining "unknown" literals in its SELECT or RETURNING list to type text. However, provide a switch to suppress that, and use it in the cases of SELECT inside a set operation or INSERT command. In those cases we already had type resolution rules that make use of context information from outside the subquery proper, and we don't want to change that behavior. Also, change creation of an unknown-type column in a relation from a warning to a hard error. The error should be unreachable now in CREATE VIEW or CREATE MATVIEW, but it's still possible to explicitly say "unknown" in CREATE TABLE or CREATE (composite) TYPE. We want to forbid that because it's nothing but a foot-gun. This change creates a pg_upgrade failure case: a matview that contains an unknown-type column can't be pg_upgraded, because reparsing the matview's defining query will now decide that the column is of type text, which doesn't match the cstring-like storage that the old materialized column would actually have. Add a checking pass to detect that. While at it, we can detect tables or composite types that would fail, essentially for free. Those would fail safely anyway later on, but we might as well fail earlier. This patch is by me, but it owes something to previous investigations by Rahila Syed. Also thanks to Ashutosh Bapat and Michael Paquier for review. Discussion: https://postgr.es/m/CAH2L28uwwbL9HUM-WR=hromW1Cvamkn7O-g8fPY2m=_7muJ0oA@mail.gmail.com
2017-01-25 15:17:18 +01:00
ntups = PQntuples(res);
i_nspname = PQfnumber(res, "nspname");
i_relname = PQfnumber(res, "relname");
i_attname = PQfnumber(res, "attname");
for (rowno = 0; rowno < ntups; rowno++)
{
found = true;
if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL)
pg_fatal("could not open file \"%s\": %s\n", output_path,
strerror(errno));
if (!db_used)
{
fprintf(script, "Database: %s\n", active_db->db_name);
db_used = true;
}
fprintf(script, " %s.%s.%s\n",
PQgetvalue(res, rowno, i_nspname),
PQgetvalue(res, rowno, i_relname),
PQgetvalue(res, rowno, i_attname));
}
PQclear(res);
PQfinish(conn);
}
if (script)
fclose(script);
if (found)
{
pg_log(PG_REPORT, "fatal\n");
pg_fatal("Your installation contains the \"unknown\" data type in user tables. This\n"
"data type is no longer allowed in tables, so this cluster cannot currently\n"
"be upgraded. You can remove the problem tables and restart the upgrade.\n"
"A list of the problem columns is in the file:\n"
" %s\n\n", output_path);
}
else
check_ok();
}
/*
* old_9_6_invalidate_hash_indexes()
* 9.6 -> 10
* Hash index binary format has changed from 9.6->10.0
*/
void
old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode)
{
int dbnum;
FILE *script = NULL;
bool found = false;
char *output_path = "reindex_hash.sql";
prep_status("Checking for hash indexes");
for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
{
PGresult *res;
bool db_used = false;
int ntups;
int rowno;
int i_nspname,
i_relname;
DbInfo *active_db = &cluster->dbarr.dbs[dbnum];
PGconn *conn = connectToServer(cluster, active_db->db_name);
/* find hash indexes */
res = executeQueryOrDie(conn,
"SELECT n.nspname, c.relname "
"FROM pg_catalog.pg_class c, "
" pg_catalog.pg_index i, "
" pg_catalog.pg_am a, "
" pg_catalog.pg_namespace n "
"WHERE i.indexrelid = c.oid AND "
" c.relam = a.oid AND "
" c.relnamespace = n.oid AND "
" a.amname = 'hash'"
);
ntups = PQntuples(res);
i_nspname = PQfnumber(res, "nspname");
i_relname = PQfnumber(res, "relname");
for (rowno = 0; rowno < ntups; rowno++)
{
found = true;
if (!check_mode)
{
if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL)
pg_fatal("could not open file \"%s\": %s\n", output_path,
strerror(errno));
if (!db_used)
{
PQExpBufferData connectbuf;
initPQExpBuffer(&connectbuf);
appendPsqlMetaConnect(&connectbuf, active_db->db_name);
fputs(connectbuf.data, script);
termPQExpBuffer(&connectbuf);
db_used = true;
}
fprintf(script, "REINDEX INDEX %s.%s;\n",
quote_identifier(PQgetvalue(res, rowno, i_nspname)),
quote_identifier(PQgetvalue(res, rowno, i_relname)));
}
}
PQclear(res);
if (!check_mode && db_used)
{
/* mark hash indexes as invalid */
PQclear(executeQueryOrDie(conn,
"UPDATE pg_catalog.pg_index i "
"SET indisvalid = false "
"FROM pg_catalog.pg_class c, "
" pg_catalog.pg_am a, "
" pg_catalog.pg_namespace n "
"WHERE i.indexrelid = c.oid AND "
" c.relam = a.oid AND "
" c.relnamespace = n.oid AND "
" a.amname = 'hash'"));
}
PQfinish(conn);
}
if (script)
fclose(script);
if (found)
{
report_status(PG_WARNING, "warning");
if (check_mode)
pg_log(PG_WARNING, "\n"
"Your installation contains hash indexes. These indexes have different\n"
"internal formats between your old and new clusters, so they must be\n"
"reindexed with the REINDEX command. After upgrading, you will be given\n"
"REINDEX instructions.\n\n");
else
pg_log(PG_WARNING, "\n"
"Your installation contains hash indexes. These indexes have different\n"
"internal formats between your old and new clusters, so they must be\n"
"reindexed with the REINDEX command. The file\n"
" %s\n"
"when executed by psql by the database superuser will recreate all invalid\n"
"indexes; until then, none of these indexes will be used.\n\n",
output_path);
}
else
check_ok();
}