postgresql/src/backend/commands/tsearchcmds.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1871 lines
46 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* tsearchcmds.c
*
* Routines for tsearch manipulation commands
*
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/backend/commands/tsearchcmds.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <ctype.h>
#include "access/genam.h"
#include "access/htup_details.h"
#include "access/table.h"
#include "access/xact.h"
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
#include "catalog/catalog.h"
#include "catalog/dependency.h"
#include "catalog/indexing.h"
#include "catalog/objectaccess.h"
#include "catalog/pg_namespace.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_ts_config.h"
#include "catalog/pg_ts_config_map.h"
#include "catalog/pg_ts_dict.h"
#include "catalog/pg_ts_parser.h"
#include "catalog/pg_ts_template.h"
#include "catalog/pg_type.h"
#include "commands/defrem.h"
Allow on-the-fly capture of DDL event details This feature lets user code inspect and take action on DDL events. Whenever a ddl_command_end event trigger is installed, DDL actions executed are saved to a list which can be inspected during execution of a function attached to ddl_command_end. The set-returning function pg_event_trigger_ddl_commands can be used to list actions so captured; it returns data about the type of command executed, as well as the affected object. This is sufficient for many uses of this feature. For the cases where it is not, we also provide a "command" column of a new pseudo-type pg_ddl_command, which is a pointer to a C structure that can be accessed by C code. The struct contains all the info necessary to completely inspect and even reconstruct the executed command. There is no actual deparse code here; that's expected to come later. What we have is enough infrastructure that the deparsing can be done in an external extension. The intention is that we will add some deparsing code in a later release, as an in-core extension. A new test module is included. It's probably insufficient as is, but it should be sufficient as a starting point for a more complete and future-proof approach. Authors: Álvaro Herrera, with some help from Andres Freund, Ian Barwick, Abhijit Menon-Sen. Reviews by Andres Freund, Robert Haas, Amit Kapila, Michael Paquier, Craig Ringer, David Steele. Additional input from Chris Browne, Dimitri Fontaine, Stephen Frost, Petr Jelínek, Tom Lane, Jim Nasby, Steven Singer, Pavel Stěhule. Based on original work by Dimitri Fontaine, though I didn't use his code. Discussion: https://www.postgresql.org/message-id/m2txrsdzxa.fsf@2ndQuadrant.fr https://www.postgresql.org/message-id/20131108153322.GU5809@eldon.alvh.no-ip.org https://www.postgresql.org/message-id/20150215044814.GL3391@alvh.no-ip.org
2015-05-12 00:14:31 +02:00
#include "commands/event_trigger.h"
#include "common/string.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "parser/parse_func.h"
#include "tsearch/ts_cache.h"
#include "tsearch/ts_public.h"
#include "utils/acl.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
#include "utils/syscache.h"
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
/* Single entry of List returned by getTokenTypes() */
typedef struct
{
int num; /* token type number */
char *name; /* token type name */
} TSTokenTypeItem;
static void MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
HeapTuple tup, Relation relMap);
static void DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
HeapTuple tup, Relation relMap);
static DefElem *buildDefItem(const char *name, const char *val,
bool was_quoted);
/* --------------------- TS Parser commands ------------------------ */
/*
* lookup a parser support function and return its OID (as a Datum)
*
* attnum is the pg_ts_parser column the function will go into
*/
static Datum
get_ts_parser_func(DefElem *defel, int attnum)
{
List *funcName = defGetQualifiedName(defel);
Oid typeId[3];
Oid retTypeId;
int nargs;
Oid procOid;
retTypeId = INTERNALOID; /* correct for most */
typeId[0] = INTERNALOID;
switch (attnum)
{
case Anum_pg_ts_parser_prsstart:
nargs = 2;
typeId[1] = INT4OID;
break;
case Anum_pg_ts_parser_prstoken:
nargs = 3;
typeId[1] = INTERNALOID;
typeId[2] = INTERNALOID;
break;
case Anum_pg_ts_parser_prsend:
nargs = 1;
retTypeId = VOIDOID;
break;
case Anum_pg_ts_parser_prsheadline:
nargs = 3;
typeId[1] = INTERNALOID;
typeId[2] = TSQUERYOID;
break;
case Anum_pg_ts_parser_prslextype:
nargs = 1;
2011-06-09 20:32:50 +02:00
/*
* Note: because the lextype method returns type internal, it must
* have an internal-type argument for security reasons. The
* argument is not actually used, but is just passed as a zero.
*/
break;
default:
/* should not be here */
elog(ERROR, "unrecognized attribute for text search parser: %d",
attnum);
nargs = 0; /* keep compiler quiet */
}
procOid = LookupFuncName(funcName, nargs, typeId, false);
if (get_func_rettype(procOid) != retTypeId)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("function %s should return type %s",
func_signature_string(funcName, nargs, NIL, typeId),
format_type_be(retTypeId))));
return ObjectIdGetDatum(procOid);
}
/*
* make pg_depend entries for a new pg_ts_parser entry
*
* Return value is the address of said new entry.
*/
static ObjectAddress
makeParserDependencies(HeapTuple tuple)
{
Form_pg_ts_parser prs = (Form_pg_ts_parser) GETSTRUCT(tuple);
ObjectAddress myself,
referenced;
ObjectAddresses *addrs;
ObjectAddressSet(myself, TSParserRelationId, prs->oid);
/* dependency on extension */
recordDependencyOnCurrentExtension(&myself, false);
addrs = new_object_addresses();
/* dependency on namespace */
ObjectAddressSet(referenced, NamespaceRelationId, prs->prsnamespace);
add_exact_object_address(&referenced, addrs);
/* dependencies on functions */
ObjectAddressSet(referenced, ProcedureRelationId, prs->prsstart);
add_exact_object_address(&referenced, addrs);
referenced.objectId = prs->prstoken;
add_exact_object_address(&referenced, addrs);
referenced.objectId = prs->prsend;
add_exact_object_address(&referenced, addrs);
referenced.objectId = prs->prslextype;
add_exact_object_address(&referenced, addrs);
if (OidIsValid(prs->prsheadline))
{
referenced.objectId = prs->prsheadline;
add_exact_object_address(&referenced, addrs);
}
record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL);
free_object_addresses(addrs);
return myself;
}
/*
* CREATE TEXT SEARCH PARSER
*/
ObjectAddress
DefineTSParser(List *names, List *parameters)
{
char *prsname;
ListCell *pl;
Relation prsRel;
HeapTuple tup;
Datum values[Natts_pg_ts_parser];
bool nulls[Natts_pg_ts_parser];
NameData pname;
Oid prsOid;
Oid namespaceoid;
ObjectAddress address;
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser to create text search parsers")));
prsRel = table_open(TSParserRelationId, RowExclusiveLock);
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
/* Convert list of names to a name and namespace */
namespaceoid = QualifiedNameGetCreationNamespace(names, &prsname);
/* initialize tuple fields with name/namespace */
memset(values, 0, sizeof(values));
memset(nulls, false, sizeof(nulls));
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
prsOid = GetNewOidWithIndex(prsRel, TSParserOidIndexId,
Anum_pg_ts_parser_oid);
values[Anum_pg_ts_parser_oid - 1] = ObjectIdGetDatum(prsOid);
namestrcpy(&pname, prsname);
values[Anum_pg_ts_parser_prsname - 1] = NameGetDatum(&pname);
values[Anum_pg_ts_parser_prsnamespace - 1] = ObjectIdGetDatum(namespaceoid);
/*
* loop over the definition list and extract the information we need.
*/
foreach(pl, parameters)
{
DefElem *defel = (DefElem *) lfirst(pl);
Avoid unnecessary use of pg_strcasecmp for already-downcased identifiers. We have a lot of code in which option names, which from the user's viewpoint are logically keywords, are passed through the grammar as plain identifiers, and then matched to string literals during command execution. This approach avoids making words into lexer keywords unnecessarily. Some places matched these strings using plain strcmp, some using pg_strcasecmp. But the latter should be unnecessary since identifiers would have been downcased on their way through the parser. Aside from any efficiency concerns (probably not a big factor), the lack of consistency in this area creates a hazard of subtle bugs due to different places coming to different conclusions about whether two option names are the same or different. Hence, standardize on using strcmp() to match any option names that are expected to have been fed through the parser. This does create a user-visible behavioral change, which is that while formerly all of these would work: alter table foo set (fillfactor = 50); alter table foo set (FillFactor = 50); alter table foo set ("fillfactor" = 50); alter table foo set ("FillFactor" = 50); now the last case will fail because that double-quoted identifier is different from the others. However, none of our documentation says that you can use a quoted identifier in such contexts at all, and we should discourage doing so since it would break if we ever decide to parse such constructs as true lexer keywords rather than poor man's substitutes. So this shouldn't create a significant compatibility issue for users. Daniel Gustafsson, reviewed by Michael Paquier, small changes by me Discussion: https://postgr.es/m/29405B24-564E-476B-98C0-677A29805B84@yesql.se
2018-01-27 00:25:02 +01:00
if (strcmp(defel->defname, "start") == 0)
{
values[Anum_pg_ts_parser_prsstart - 1] =
get_ts_parser_func(defel, Anum_pg_ts_parser_prsstart);
}
Avoid unnecessary use of pg_strcasecmp for already-downcased identifiers. We have a lot of code in which option names, which from the user's viewpoint are logically keywords, are passed through the grammar as plain identifiers, and then matched to string literals during command execution. This approach avoids making words into lexer keywords unnecessarily. Some places matched these strings using plain strcmp, some using pg_strcasecmp. But the latter should be unnecessary since identifiers would have been downcased on their way through the parser. Aside from any efficiency concerns (probably not a big factor), the lack of consistency in this area creates a hazard of subtle bugs due to different places coming to different conclusions about whether two option names are the same or different. Hence, standardize on using strcmp() to match any option names that are expected to have been fed through the parser. This does create a user-visible behavioral change, which is that while formerly all of these would work: alter table foo set (fillfactor = 50); alter table foo set (FillFactor = 50); alter table foo set ("fillfactor" = 50); alter table foo set ("FillFactor" = 50); now the last case will fail because that double-quoted identifier is different from the others. However, none of our documentation says that you can use a quoted identifier in such contexts at all, and we should discourage doing so since it would break if we ever decide to parse such constructs as true lexer keywords rather than poor man's substitutes. So this shouldn't create a significant compatibility issue for users. Daniel Gustafsson, reviewed by Michael Paquier, small changes by me Discussion: https://postgr.es/m/29405B24-564E-476B-98C0-677A29805B84@yesql.se
2018-01-27 00:25:02 +01:00
else if (strcmp(defel->defname, "gettoken") == 0)
{
values[Anum_pg_ts_parser_prstoken - 1] =
get_ts_parser_func(defel, Anum_pg_ts_parser_prstoken);
}
Avoid unnecessary use of pg_strcasecmp for already-downcased identifiers. We have a lot of code in which option names, which from the user's viewpoint are logically keywords, are passed through the grammar as plain identifiers, and then matched to string literals during command execution. This approach avoids making words into lexer keywords unnecessarily. Some places matched these strings using plain strcmp, some using pg_strcasecmp. But the latter should be unnecessary since identifiers would have been downcased on their way through the parser. Aside from any efficiency concerns (probably not a big factor), the lack of consistency in this area creates a hazard of subtle bugs due to different places coming to different conclusions about whether two option names are the same or different. Hence, standardize on using strcmp() to match any option names that are expected to have been fed through the parser. This does create a user-visible behavioral change, which is that while formerly all of these would work: alter table foo set (fillfactor = 50); alter table foo set (FillFactor = 50); alter table foo set ("fillfactor" = 50); alter table foo set ("FillFactor" = 50); now the last case will fail because that double-quoted identifier is different from the others. However, none of our documentation says that you can use a quoted identifier in such contexts at all, and we should discourage doing so since it would break if we ever decide to parse such constructs as true lexer keywords rather than poor man's substitutes. So this shouldn't create a significant compatibility issue for users. Daniel Gustafsson, reviewed by Michael Paquier, small changes by me Discussion: https://postgr.es/m/29405B24-564E-476B-98C0-677A29805B84@yesql.se
2018-01-27 00:25:02 +01:00
else if (strcmp(defel->defname, "end") == 0)
{
values[Anum_pg_ts_parser_prsend - 1] =
get_ts_parser_func(defel, Anum_pg_ts_parser_prsend);
}
Avoid unnecessary use of pg_strcasecmp for already-downcased identifiers. We have a lot of code in which option names, which from the user's viewpoint are logically keywords, are passed through the grammar as plain identifiers, and then matched to string literals during command execution. This approach avoids making words into lexer keywords unnecessarily. Some places matched these strings using plain strcmp, some using pg_strcasecmp. But the latter should be unnecessary since identifiers would have been downcased on their way through the parser. Aside from any efficiency concerns (probably not a big factor), the lack of consistency in this area creates a hazard of subtle bugs due to different places coming to different conclusions about whether two option names are the same or different. Hence, standardize on using strcmp() to match any option names that are expected to have been fed through the parser. This does create a user-visible behavioral change, which is that while formerly all of these would work: alter table foo set (fillfactor = 50); alter table foo set (FillFactor = 50); alter table foo set ("fillfactor" = 50); alter table foo set ("FillFactor" = 50); now the last case will fail because that double-quoted identifier is different from the others. However, none of our documentation says that you can use a quoted identifier in such contexts at all, and we should discourage doing so since it would break if we ever decide to parse such constructs as true lexer keywords rather than poor man's substitutes. So this shouldn't create a significant compatibility issue for users. Daniel Gustafsson, reviewed by Michael Paquier, small changes by me Discussion: https://postgr.es/m/29405B24-564E-476B-98C0-677A29805B84@yesql.se
2018-01-27 00:25:02 +01:00
else if (strcmp(defel->defname, "headline") == 0)
{
values[Anum_pg_ts_parser_prsheadline - 1] =
get_ts_parser_func(defel, Anum_pg_ts_parser_prsheadline);
}
Avoid unnecessary use of pg_strcasecmp for already-downcased identifiers. We have a lot of code in which option names, which from the user's viewpoint are logically keywords, are passed through the grammar as plain identifiers, and then matched to string literals during command execution. This approach avoids making words into lexer keywords unnecessarily. Some places matched these strings using plain strcmp, some using pg_strcasecmp. But the latter should be unnecessary since identifiers would have been downcased on their way through the parser. Aside from any efficiency concerns (probably not a big factor), the lack of consistency in this area creates a hazard of subtle bugs due to different places coming to different conclusions about whether two option names are the same or different. Hence, standardize on using strcmp() to match any option names that are expected to have been fed through the parser. This does create a user-visible behavioral change, which is that while formerly all of these would work: alter table foo set (fillfactor = 50); alter table foo set (FillFactor = 50); alter table foo set ("fillfactor" = 50); alter table foo set ("FillFactor" = 50); now the last case will fail because that double-quoted identifier is different from the others. However, none of our documentation says that you can use a quoted identifier in such contexts at all, and we should discourage doing so since it would break if we ever decide to parse such constructs as true lexer keywords rather than poor man's substitutes. So this shouldn't create a significant compatibility issue for users. Daniel Gustafsson, reviewed by Michael Paquier, small changes by me Discussion: https://postgr.es/m/29405B24-564E-476B-98C0-677A29805B84@yesql.se
2018-01-27 00:25:02 +01:00
else if (strcmp(defel->defname, "lextypes") == 0)
{
values[Anum_pg_ts_parser_prslextype - 1] =
get_ts_parser_func(defel, Anum_pg_ts_parser_prslextype);
}
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("text search parser parameter \"%s\" not recognized",
defel->defname)));
}
/*
* Validation
*/
if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_parser_prsstart - 1])))
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("text search parser start method is required")));
if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_parser_prstoken - 1])))
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("text search parser gettoken method is required")));
if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_parser_prsend - 1])))
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("text search parser end method is required")));
if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_parser_prslextype - 1])))
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("text search parser lextypes method is required")));
/*
* Looks good, insert
*/
tup = heap_form_tuple(prsRel->rd_att, values, nulls);
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
CatalogTupleInsert(prsRel, tup);
address = makeParserDependencies(tup);
/* Post creation hook for new text search parser */
InvokeObjectPostCreateHook(TSParserRelationId, prsOid, 0);
heap_freetuple(tup);
table_close(prsRel, RowExclusiveLock);
return address;
}
/* ---------------------- TS Dictionary commands -----------------------*/
/*
* make pg_depend entries for a new pg_ts_dict entry
*
* Return value is address of the new entry
*/
static ObjectAddress
makeDictionaryDependencies(HeapTuple tuple)
{
Form_pg_ts_dict dict = (Form_pg_ts_dict) GETSTRUCT(tuple);
ObjectAddress myself,
referenced;
ObjectAddresses *addrs;
ObjectAddressSet(myself, TSDictionaryRelationId, dict->oid);
/* dependency on owner */
recordDependencyOnOwner(myself.classId, myself.objectId, dict->dictowner);
/* dependency on extension */
recordDependencyOnCurrentExtension(&myself, false);
addrs = new_object_addresses();
/* dependency on namespace */
ObjectAddressSet(referenced, NamespaceRelationId, dict->dictnamespace);
add_exact_object_address(&referenced, addrs);
/* dependency on template */
ObjectAddressSet(referenced, TSTemplateRelationId, dict->dicttemplate);
add_exact_object_address(&referenced, addrs);
record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL);
free_object_addresses(addrs);
return myself;
}
/*
* verify that a template's init method accepts a proposed option list
*/
static void
verify_dictoptions(Oid tmplId, List *dictoptions)
{
HeapTuple tup;
Form_pg_ts_template tform;
Oid initmethod;
/*
* Suppress this test when running in a standalone backend. This is a
* hack to allow initdb to create prefab dictionaries that might not
* actually be usable in template1's encoding (due to using external files
* that can't be translated into template1's encoding). We want to create
* them anyway, since they might be usable later in other databases.
*/
if (!IsUnderPostmaster)
return;
tup = SearchSysCache1(TSTEMPLATEOID, ObjectIdGetDatum(tmplId));
if (!HeapTupleIsValid(tup)) /* should not happen */
elog(ERROR, "cache lookup failed for text search template %u",
tmplId);
tform = (Form_pg_ts_template) GETSTRUCT(tup);
initmethod = tform->tmplinit;
if (!OidIsValid(initmethod))
{
/* If there is no init method, disallow any options */
if (dictoptions)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("text search template \"%s\" does not accept options",
NameStr(tform->tmplname))));
}
else
{
/*
* Copy the options just in case init method thinks it can scribble on
* them ...
*/
dictoptions = copyObject(dictoptions);
/*
* Call the init method and see if it complains. We don't worry about
* it leaking memory, since our command will soon be over anyway.
*/
(void) OidFunctionCall1(initmethod, PointerGetDatum(dictoptions));
}
ReleaseSysCache(tup);
}
/*
* CREATE TEXT SEARCH DICTIONARY
*/
ObjectAddress
DefineTSDictionary(List *names, List *parameters)
{
ListCell *pl;
Relation dictRel;
HeapTuple tup;
Datum values[Natts_pg_ts_dict];
bool nulls[Natts_pg_ts_dict];
NameData dname;
Oid templId = InvalidOid;
List *dictoptions = NIL;
Oid dictOid;
Oid namespaceoid;
AclResult aclresult;
char *dictname;
ObjectAddress address;
/* Convert list of names to a name and namespace */
namespaceoid = QualifiedNameGetCreationNamespace(names, &dictname);
/* Check we have creation rights in target namespace */
aclresult = object_aclcheck(NamespaceRelationId, namespaceoid, GetUserId(), ACL_CREATE);
if (aclresult != ACLCHECK_OK)
aclcheck_error(aclresult, OBJECT_SCHEMA,
get_namespace_name(namespaceoid));
/*
* loop over the definition list and extract the information we need.
*/
foreach(pl, parameters)
{
DefElem *defel = (DefElem *) lfirst(pl);
Avoid unnecessary use of pg_strcasecmp for already-downcased identifiers. We have a lot of code in which option names, which from the user's viewpoint are logically keywords, are passed through the grammar as plain identifiers, and then matched to string literals during command execution. This approach avoids making words into lexer keywords unnecessarily. Some places matched these strings using plain strcmp, some using pg_strcasecmp. But the latter should be unnecessary since identifiers would have been downcased on their way through the parser. Aside from any efficiency concerns (probably not a big factor), the lack of consistency in this area creates a hazard of subtle bugs due to different places coming to different conclusions about whether two option names are the same or different. Hence, standardize on using strcmp() to match any option names that are expected to have been fed through the parser. This does create a user-visible behavioral change, which is that while formerly all of these would work: alter table foo set (fillfactor = 50); alter table foo set (FillFactor = 50); alter table foo set ("fillfactor" = 50); alter table foo set ("FillFactor" = 50); now the last case will fail because that double-quoted identifier is different from the others. However, none of our documentation says that you can use a quoted identifier in such contexts at all, and we should discourage doing so since it would break if we ever decide to parse such constructs as true lexer keywords rather than poor man's substitutes. So this shouldn't create a significant compatibility issue for users. Daniel Gustafsson, reviewed by Michael Paquier, small changes by me Discussion: https://postgr.es/m/29405B24-564E-476B-98C0-677A29805B84@yesql.se
2018-01-27 00:25:02 +01:00
if (strcmp(defel->defname, "template") == 0)
{
templId = get_ts_template_oid(defGetQualifiedName(defel), false);
}
else
{
/* Assume it's an option for the dictionary itself */
dictoptions = lappend(dictoptions, defel);
}
}
/*
* Validation
*/
if (!OidIsValid(templId))
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("text search template is required")));
verify_dictoptions(templId, dictoptions);
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
dictRel = table_open(TSDictionaryRelationId, RowExclusiveLock);
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
/*
* Looks good, insert
*/
memset(values, 0, sizeof(values));
memset(nulls, false, sizeof(nulls));
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
dictOid = GetNewOidWithIndex(dictRel, TSDictionaryOidIndexId,
Anum_pg_ts_dict_oid);
values[Anum_pg_ts_dict_oid - 1] = ObjectIdGetDatum(dictOid);
namestrcpy(&dname, dictname);
values[Anum_pg_ts_dict_dictname - 1] = NameGetDatum(&dname);
values[Anum_pg_ts_dict_dictnamespace - 1] = ObjectIdGetDatum(namespaceoid);
values[Anum_pg_ts_dict_dictowner - 1] = ObjectIdGetDatum(GetUserId());
values[Anum_pg_ts_dict_dicttemplate - 1] = ObjectIdGetDatum(templId);
if (dictoptions)
values[Anum_pg_ts_dict_dictinitoption - 1] =
PointerGetDatum(serialize_deflist(dictoptions));
else
nulls[Anum_pg_ts_dict_dictinitoption - 1] = true;
tup = heap_form_tuple(dictRel->rd_att, values, nulls);
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
CatalogTupleInsert(dictRel, tup);
address = makeDictionaryDependencies(tup);
/* Post creation hook for new text search dictionary */
InvokeObjectPostCreateHook(TSDictionaryRelationId, dictOid, 0);
heap_freetuple(tup);
table_close(dictRel, RowExclusiveLock);
return address;
}
/*
* ALTER TEXT SEARCH DICTIONARY
*/
ObjectAddress
AlterTSDictionary(AlterTSDictionaryStmt *stmt)
{
HeapTuple tup,
newtup;
Relation rel;
Oid dictId;
ListCell *pl;
List *dictoptions;
Datum opt;
bool isnull;
Datum repl_val[Natts_pg_ts_dict];
bool repl_null[Natts_pg_ts_dict];
bool repl_repl[Natts_pg_ts_dict];
ObjectAddress address;
dictId = get_ts_dict_oid(stmt->dictname, false);
rel = table_open(TSDictionaryRelationId, RowExclusiveLock);
tup = SearchSysCache1(TSDICTOID, ObjectIdGetDatum(dictId));
if (!HeapTupleIsValid(tup))
elog(ERROR, "cache lookup failed for text search dictionary %u",
dictId);
/* must be owner */
if (!object_ownercheck(TSDictionaryRelationId, dictId, GetUserId()))
aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_TSDICTIONARY,
NameListToString(stmt->dictname));
/* deserialize the existing set of options */
opt = SysCacheGetAttr(TSDICTOID, tup,
Anum_pg_ts_dict_dictinitoption,
&isnull);
if (isnull)
dictoptions = NIL;
else
dictoptions = deserialize_deflist(opt);
/*
* Modify the options list as per specified changes
*/
foreach(pl, stmt->options)
{
DefElem *defel = (DefElem *) lfirst(pl);
ListCell *cell;
/*
* Remove any matches ...
*/
Represent Lists as expansible arrays, not chains of cons-cells. Originally, Postgres Lists were a more or less exact reimplementation of Lisp lists, which consist of chains of separately-allocated cons cells, each having a value and a next-cell link. We'd hacked that once before (commit d0b4399d8) to add a separate List header, but the data was still in cons cells. That makes some operations -- notably list_nth() -- O(N), and it's bulky because of the next-cell pointers and per-cell palloc overhead, and it's very cache-unfriendly if the cons cells end up scattered around rather than being adjacent. In this rewrite, we still have List headers, but the data is in a resizable array of values, with no next-cell links. Now we need at most two palloc's per List, and often only one, since we can allocate some values in the same palloc call as the List header. (Of course, extending an existing List may require repalloc's to enlarge the array. But this involves just O(log N) allocations not O(N).) Of course this is not without downsides. The key difficulty is that addition or deletion of a list entry may now cause other entries to move, which it did not before. For example, that breaks foreach() and sister macros, which historically used a pointer to the current cons-cell as loop state. We can repair those macros transparently by making their actual loop state be an integer list index; the exposed "ListCell *" pointer is no longer state carried across loop iterations, but is just a derived value. (In practice, modern compilers can optimize things back to having just one loop state value, at least for simple cases with inline loop bodies.) In principle, this is a semantics change for cases where the loop body inserts or deletes list entries ahead of the current loop index; but I found no such cases in the Postgres code. The change is not at all transparent for code that doesn't use foreach() but chases lists "by hand" using lnext(). The largest share of such code in the backend is in loops that were maintaining "prev" and "next" variables in addition to the current-cell pointer, in order to delete list cells efficiently using list_delete_cell(). However, we no longer need a previous-cell pointer to delete a list cell efficiently. Keeping a next-cell pointer doesn't work, as explained above, but we can improve matters by changing such code to use a regular foreach() loop and then using the new macro foreach_delete_current() to delete the current cell. (This macro knows how to update the associated foreach loop's state so that no cells will be missed in the traversal.) There remains a nontrivial risk of code assuming that a ListCell * pointer will remain good over an operation that could now move the list contents. To help catch such errors, list.c can be compiled with a new define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents whenever that could possibly happen. This makes list operations significantly more expensive so it's not normally turned on (though it is on by default if USE_VALGRIND is on). There are two notable API differences from the previous code: * lnext() now requires the List's header pointer in addition to the current cell's address. * list_delete_cell() no longer requires a previous-cell argument. These changes are somewhat unfortunate, but on the other hand code using either function needs inspection to see if it is assuming anything it shouldn't, so it's not all bad. Programmers should be aware of these significant performance changes: * list_nth() and related functions are now O(1); so there's no major access-speed difference between a list and an array. * Inserting or deleting a list element now takes time proportional to the distance to the end of the list, due to moving the array elements. (However, it typically *doesn't* require palloc or pfree, so except in long lists it's probably still faster than before.) Notably, lcons() used to be about the same cost as lappend(), but that's no longer true if the list is long. Code that uses lcons() and list_delete_first() to maintain a stack might usefully be rewritten to push and pop at the end of the list rather than the beginning. * There are now list_insert_nth...() and list_delete_nth...() functions that add or remove a list cell identified by index. These have the data-movement penalty explained above, but there's no search penalty. * list_concat() and variants now copy the second list's data into storage belonging to the first list, so there is no longer any sharing of cells between the input lists. The second argument is now declared "const List *" to reflect that it isn't changed. This patch just does the minimum needed to get the new implementation in place and fix bugs exposed by the regression tests. As suggested by the foregoing, there's a fair amount of followup work remaining to do. Also, the ENABLE_LIST_COMPAT macros are finally removed in this commit. Code using those should have been gone a dozen years ago. Patch by me; thanks to David Rowley, Jesper Pedersen, and others for review. Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
2019-07-15 19:41:58 +02:00
foreach(cell, dictoptions)
{
DefElem *oldel = (DefElem *) lfirst(cell);
Avoid unnecessary use of pg_strcasecmp for already-downcased identifiers. We have a lot of code in which option names, which from the user's viewpoint are logically keywords, are passed through the grammar as plain identifiers, and then matched to string literals during command execution. This approach avoids making words into lexer keywords unnecessarily. Some places matched these strings using plain strcmp, some using pg_strcasecmp. But the latter should be unnecessary since identifiers would have been downcased on their way through the parser. Aside from any efficiency concerns (probably not a big factor), the lack of consistency in this area creates a hazard of subtle bugs due to different places coming to different conclusions about whether two option names are the same or different. Hence, standardize on using strcmp() to match any option names that are expected to have been fed through the parser. This does create a user-visible behavioral change, which is that while formerly all of these would work: alter table foo set (fillfactor = 50); alter table foo set (FillFactor = 50); alter table foo set ("fillfactor" = 50); alter table foo set ("FillFactor" = 50); now the last case will fail because that double-quoted identifier is different from the others. However, none of our documentation says that you can use a quoted identifier in such contexts at all, and we should discourage doing so since it would break if we ever decide to parse such constructs as true lexer keywords rather than poor man's substitutes. So this shouldn't create a significant compatibility issue for users. Daniel Gustafsson, reviewed by Michael Paquier, small changes by me Discussion: https://postgr.es/m/29405B24-564E-476B-98C0-677A29805B84@yesql.se
2018-01-27 00:25:02 +01:00
if (strcmp(oldel->defname, defel->defname) == 0)
Represent Lists as expansible arrays, not chains of cons-cells. Originally, Postgres Lists were a more or less exact reimplementation of Lisp lists, which consist of chains of separately-allocated cons cells, each having a value and a next-cell link. We'd hacked that once before (commit d0b4399d8) to add a separate List header, but the data was still in cons cells. That makes some operations -- notably list_nth() -- O(N), and it's bulky because of the next-cell pointers and per-cell palloc overhead, and it's very cache-unfriendly if the cons cells end up scattered around rather than being adjacent. In this rewrite, we still have List headers, but the data is in a resizable array of values, with no next-cell links. Now we need at most two palloc's per List, and often only one, since we can allocate some values in the same palloc call as the List header. (Of course, extending an existing List may require repalloc's to enlarge the array. But this involves just O(log N) allocations not O(N).) Of course this is not without downsides. The key difficulty is that addition or deletion of a list entry may now cause other entries to move, which it did not before. For example, that breaks foreach() and sister macros, which historically used a pointer to the current cons-cell as loop state. We can repair those macros transparently by making their actual loop state be an integer list index; the exposed "ListCell *" pointer is no longer state carried across loop iterations, but is just a derived value. (In practice, modern compilers can optimize things back to having just one loop state value, at least for simple cases with inline loop bodies.) In principle, this is a semantics change for cases where the loop body inserts or deletes list entries ahead of the current loop index; but I found no such cases in the Postgres code. The change is not at all transparent for code that doesn't use foreach() but chases lists "by hand" using lnext(). The largest share of such code in the backend is in loops that were maintaining "prev" and "next" variables in addition to the current-cell pointer, in order to delete list cells efficiently using list_delete_cell(). However, we no longer need a previous-cell pointer to delete a list cell efficiently. Keeping a next-cell pointer doesn't work, as explained above, but we can improve matters by changing such code to use a regular foreach() loop and then using the new macro foreach_delete_current() to delete the current cell. (This macro knows how to update the associated foreach loop's state so that no cells will be missed in the traversal.) There remains a nontrivial risk of code assuming that a ListCell * pointer will remain good over an operation that could now move the list contents. To help catch such errors, list.c can be compiled with a new define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents whenever that could possibly happen. This makes list operations significantly more expensive so it's not normally turned on (though it is on by default if USE_VALGRIND is on). There are two notable API differences from the previous code: * lnext() now requires the List's header pointer in addition to the current cell's address. * list_delete_cell() no longer requires a previous-cell argument. These changes are somewhat unfortunate, but on the other hand code using either function needs inspection to see if it is assuming anything it shouldn't, so it's not all bad. Programmers should be aware of these significant performance changes: * list_nth() and related functions are now O(1); so there's no major access-speed difference between a list and an array. * Inserting or deleting a list element now takes time proportional to the distance to the end of the list, due to moving the array elements. (However, it typically *doesn't* require palloc or pfree, so except in long lists it's probably still faster than before.) Notably, lcons() used to be about the same cost as lappend(), but that's no longer true if the list is long. Code that uses lcons() and list_delete_first() to maintain a stack might usefully be rewritten to push and pop at the end of the list rather than the beginning. * There are now list_insert_nth...() and list_delete_nth...() functions that add or remove a list cell identified by index. These have the data-movement penalty explained above, but there's no search penalty. * list_concat() and variants now copy the second list's data into storage belonging to the first list, so there is no longer any sharing of cells between the input lists. The second argument is now declared "const List *" to reflect that it isn't changed. This patch just does the minimum needed to get the new implementation in place and fix bugs exposed by the regression tests. As suggested by the foregoing, there's a fair amount of followup work remaining to do. Also, the ENABLE_LIST_COMPAT macros are finally removed in this commit. Code using those should have been gone a dozen years ago. Patch by me; thanks to David Rowley, Jesper Pedersen, and others for review. Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
2019-07-15 19:41:58 +02:00
dictoptions = foreach_delete_current(dictoptions, cell);
}
/*
* and add new value if it's got one
*/
if (defel->arg)
dictoptions = lappend(dictoptions, defel);
}
/*
* Validate
*/
verify_dictoptions(((Form_pg_ts_dict) GETSTRUCT(tup))->dicttemplate,
dictoptions);
/*
* Looks good, update
*/
memset(repl_val, 0, sizeof(repl_val));
memset(repl_null, false, sizeof(repl_null));
memset(repl_repl, false, sizeof(repl_repl));
if (dictoptions)
repl_val[Anum_pg_ts_dict_dictinitoption - 1] =
PointerGetDatum(serialize_deflist(dictoptions));
else
repl_null[Anum_pg_ts_dict_dictinitoption - 1] = true;
repl_repl[Anum_pg_ts_dict_dictinitoption - 1] = true;
newtup = heap_modify_tuple(tup, RelationGetDescr(rel),
repl_val, repl_null, repl_repl);
CatalogTupleUpdate(rel, &newtup->t_self, newtup);
InvokeObjectPostAlterHook(TSDictionaryRelationId, dictId, 0);
ObjectAddressSet(address, TSDictionaryRelationId, dictId);
/*
* NOTE: because we only support altering the options, not the template,
* there is no need to update dependencies. This might have to change if
* the options ever reference inside-the-database objects.
*/
heap_freetuple(newtup);
ReleaseSysCache(tup);
table_close(rel, RowExclusiveLock);
return address;
}
/* ---------------------- TS Template commands -----------------------*/
/*
* lookup a template support function and return its OID (as a Datum)
*
* attnum is the pg_ts_template column the function will go into
*/
static Datum
get_ts_template_func(DefElem *defel, int attnum)
{
List *funcName = defGetQualifiedName(defel);
Oid typeId[4];
Oid retTypeId;
int nargs;
Oid procOid;
retTypeId = INTERNALOID;
typeId[0] = INTERNALOID;
typeId[1] = INTERNALOID;
typeId[2] = INTERNALOID;
typeId[3] = INTERNALOID;
switch (attnum)
{
case Anum_pg_ts_template_tmplinit:
nargs = 1;
break;
case Anum_pg_ts_template_tmpllexize:
nargs = 4;
break;
default:
/* should not be here */
elog(ERROR, "unrecognized attribute for text search template: %d",
attnum);
nargs = 0; /* keep compiler quiet */
}
procOid = LookupFuncName(funcName, nargs, typeId, false);
if (get_func_rettype(procOid) != retTypeId)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("function %s should return type %s",
func_signature_string(funcName, nargs, NIL, typeId),
format_type_be(retTypeId))));
return ObjectIdGetDatum(procOid);
}
/*
* make pg_depend entries for a new pg_ts_template entry
*/
static ObjectAddress
makeTSTemplateDependencies(HeapTuple tuple)
{
Form_pg_ts_template tmpl = (Form_pg_ts_template) GETSTRUCT(tuple);
ObjectAddress myself,
referenced;
ObjectAddresses *addrs;
ObjectAddressSet(myself, TSTemplateRelationId, tmpl->oid);
/* dependency on extension */
recordDependencyOnCurrentExtension(&myself, false);
addrs = new_object_addresses();
/* dependency on namespace */
ObjectAddressSet(referenced, NamespaceRelationId, tmpl->tmplnamespace);
add_exact_object_address(&referenced, addrs);
/* dependencies on functions */
ObjectAddressSet(referenced, ProcedureRelationId, tmpl->tmpllexize);
add_exact_object_address(&referenced, addrs);
if (OidIsValid(tmpl->tmplinit))
{
referenced.objectId = tmpl->tmplinit;
add_exact_object_address(&referenced, addrs);
}
record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL);
free_object_addresses(addrs);
return myself;
}
/*
* CREATE TEXT SEARCH TEMPLATE
*/
ObjectAddress
DefineTSTemplate(List *names, List *parameters)
{
ListCell *pl;
Relation tmplRel;
HeapTuple tup;
Datum values[Natts_pg_ts_template];
bool nulls[Natts_pg_ts_template];
NameData dname;
int i;
Oid tmplOid;
Oid namespaceoid;
char *tmplname;
ObjectAddress address;
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser to create text search templates")));
/* Convert list of names to a name and namespace */
namespaceoid = QualifiedNameGetCreationNamespace(names, &tmplname);
tmplRel = table_open(TSTemplateRelationId, RowExclusiveLock);
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
for (i = 0; i < Natts_pg_ts_template; i++)
{
nulls[i] = false;
values[i] = ObjectIdGetDatum(InvalidOid);
}
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
tmplOid = GetNewOidWithIndex(tmplRel, TSTemplateOidIndexId,
Anum_pg_ts_dict_oid);
values[Anum_pg_ts_template_oid - 1] = ObjectIdGetDatum(tmplOid);
namestrcpy(&dname, tmplname);
values[Anum_pg_ts_template_tmplname - 1] = NameGetDatum(&dname);
values[Anum_pg_ts_template_tmplnamespace - 1] = ObjectIdGetDatum(namespaceoid);
/*
* loop over the definition list and extract the information we need.
*/
foreach(pl, parameters)
{
DefElem *defel = (DefElem *) lfirst(pl);
Avoid unnecessary use of pg_strcasecmp for already-downcased identifiers. We have a lot of code in which option names, which from the user's viewpoint are logically keywords, are passed through the grammar as plain identifiers, and then matched to string literals during command execution. This approach avoids making words into lexer keywords unnecessarily. Some places matched these strings using plain strcmp, some using pg_strcasecmp. But the latter should be unnecessary since identifiers would have been downcased on their way through the parser. Aside from any efficiency concerns (probably not a big factor), the lack of consistency in this area creates a hazard of subtle bugs due to different places coming to different conclusions about whether two option names are the same or different. Hence, standardize on using strcmp() to match any option names that are expected to have been fed through the parser. This does create a user-visible behavioral change, which is that while formerly all of these would work: alter table foo set (fillfactor = 50); alter table foo set (FillFactor = 50); alter table foo set ("fillfactor" = 50); alter table foo set ("FillFactor" = 50); now the last case will fail because that double-quoted identifier is different from the others. However, none of our documentation says that you can use a quoted identifier in such contexts at all, and we should discourage doing so since it would break if we ever decide to parse such constructs as true lexer keywords rather than poor man's substitutes. So this shouldn't create a significant compatibility issue for users. Daniel Gustafsson, reviewed by Michael Paquier, small changes by me Discussion: https://postgr.es/m/29405B24-564E-476B-98C0-677A29805B84@yesql.se
2018-01-27 00:25:02 +01:00
if (strcmp(defel->defname, "init") == 0)
{
values[Anum_pg_ts_template_tmplinit - 1] =
get_ts_template_func(defel, Anum_pg_ts_template_tmplinit);
nulls[Anum_pg_ts_template_tmplinit - 1] = false;
}
Avoid unnecessary use of pg_strcasecmp for already-downcased identifiers. We have a lot of code in which option names, which from the user's viewpoint are logically keywords, are passed through the grammar as plain identifiers, and then matched to string literals during command execution. This approach avoids making words into lexer keywords unnecessarily. Some places matched these strings using plain strcmp, some using pg_strcasecmp. But the latter should be unnecessary since identifiers would have been downcased on their way through the parser. Aside from any efficiency concerns (probably not a big factor), the lack of consistency in this area creates a hazard of subtle bugs due to different places coming to different conclusions about whether two option names are the same or different. Hence, standardize on using strcmp() to match any option names that are expected to have been fed through the parser. This does create a user-visible behavioral change, which is that while formerly all of these would work: alter table foo set (fillfactor = 50); alter table foo set (FillFactor = 50); alter table foo set ("fillfactor" = 50); alter table foo set ("FillFactor" = 50); now the last case will fail because that double-quoted identifier is different from the others. However, none of our documentation says that you can use a quoted identifier in such contexts at all, and we should discourage doing so since it would break if we ever decide to parse such constructs as true lexer keywords rather than poor man's substitutes. So this shouldn't create a significant compatibility issue for users. Daniel Gustafsson, reviewed by Michael Paquier, small changes by me Discussion: https://postgr.es/m/29405B24-564E-476B-98C0-677A29805B84@yesql.se
2018-01-27 00:25:02 +01:00
else if (strcmp(defel->defname, "lexize") == 0)
{
values[Anum_pg_ts_template_tmpllexize - 1] =
get_ts_template_func(defel, Anum_pg_ts_template_tmpllexize);
nulls[Anum_pg_ts_template_tmpllexize - 1] = false;
}
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("text search template parameter \"%s\" not recognized",
defel->defname)));
}
/*
* Validation
*/
if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_template_tmpllexize - 1])))
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("text search template lexize method is required")));
/*
* Looks good, insert
*/
tup = heap_form_tuple(tmplRel->rd_att, values, nulls);
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
CatalogTupleInsert(tmplRel, tup);
address = makeTSTemplateDependencies(tup);
/* Post creation hook for new text search template */
InvokeObjectPostCreateHook(TSTemplateRelationId, tmplOid, 0);
heap_freetuple(tup);
table_close(tmplRel, RowExclusiveLock);
return address;
}
/* ---------------------- TS Configuration commands -----------------------*/
/*
* Finds syscache tuple of configuration.
* Returns NULL if no such cfg.
*/
static HeapTuple
GetTSConfigTuple(List *names)
{
HeapTuple tup;
Oid cfgId;
cfgId = get_ts_config_oid(names, true);
if (!OidIsValid(cfgId))
return NULL;
tup = SearchSysCache1(TSCONFIGOID, ObjectIdGetDatum(cfgId));
if (!HeapTupleIsValid(tup)) /* should not happen */
elog(ERROR, "cache lookup failed for text search configuration %u",
cfgId);
return tup;
}
/*
* make pg_depend entries for a new or updated pg_ts_config entry
*
* Pass opened pg_ts_config_map relation if there might be any config map
* entries for the config.
*/
static ObjectAddress
makeConfigurationDependencies(HeapTuple tuple, bool removeOld,
Relation mapRel)
{
Form_pg_ts_config cfg = (Form_pg_ts_config) GETSTRUCT(tuple);
ObjectAddresses *addrs;
ObjectAddress myself,
referenced;
myself.classId = TSConfigRelationId;
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
myself.objectId = cfg->oid;
myself.objectSubId = 0;
/* for ALTER case, first flush old dependencies, except extension deps */
if (removeOld)
{
deleteDependencyRecordsFor(myself.classId, myself.objectId, true);
deleteSharedDependencyRecordsFor(myself.classId, myself.objectId, 0);
}
/*
* We use an ObjectAddresses list to remove possible duplicate
* dependencies from the config map info. The pg_ts_config items
* shouldn't be duplicates, but might as well fold them all into one call.
*/
addrs = new_object_addresses();
/* dependency on namespace */
referenced.classId = NamespaceRelationId;
referenced.objectId = cfg->cfgnamespace;
referenced.objectSubId = 0;
add_exact_object_address(&referenced, addrs);
/* dependency on owner */
recordDependencyOnOwner(myself.classId, myself.objectId, cfg->cfgowner);
/* dependency on extension */
recordDependencyOnCurrentExtension(&myself, removeOld);
/* dependency on parser */
referenced.classId = TSParserRelationId;
referenced.objectId = cfg->cfgparser;
referenced.objectSubId = 0;
add_exact_object_address(&referenced, addrs);
/* dependencies on dictionaries listed in config map */
if (mapRel)
{
ScanKeyData skey;
SysScanDesc scan;
HeapTuple maptup;
/* CCI to ensure we can see effects of caller's changes */
CommandCounterIncrement();
ScanKeyInit(&skey,
Anum_pg_ts_config_map_mapcfg,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(myself.objectId));
scan = systable_beginscan(mapRel, TSConfigMapIndexId, true,
NULL, 1, &skey);
while (HeapTupleIsValid((maptup = systable_getnext(scan))))
{
Form_pg_ts_config_map cfgmap = (Form_pg_ts_config_map) GETSTRUCT(maptup);
referenced.classId = TSDictionaryRelationId;
referenced.objectId = cfgmap->mapdict;
referenced.objectSubId = 0;
add_exact_object_address(&referenced, addrs);
}
systable_endscan(scan);
}
/* Record 'em (this includes duplicate elimination) */
record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL);
free_object_addresses(addrs);
return myself;
}
/*
* CREATE TEXT SEARCH CONFIGURATION
*/
ObjectAddress
DefineTSConfiguration(List *names, List *parameters, ObjectAddress *copied)
{
Relation cfgRel;
Relation mapRel = NULL;
HeapTuple tup;
Datum values[Natts_pg_ts_config];
bool nulls[Natts_pg_ts_config];
AclResult aclresult;
Oid namespaceoid;
char *cfgname;
NameData cname;
Oid sourceOid = InvalidOid;
Oid prsOid = InvalidOid;
Oid cfgOid;
ListCell *pl;
ObjectAddress address;
/* Convert list of names to a name and namespace */
namespaceoid = QualifiedNameGetCreationNamespace(names, &cfgname);
/* Check we have creation rights in target namespace */
aclresult = object_aclcheck(NamespaceRelationId, namespaceoid, GetUserId(), ACL_CREATE);
if (aclresult != ACLCHECK_OK)
aclcheck_error(aclresult, OBJECT_SCHEMA,
get_namespace_name(namespaceoid));
/*
* loop over the definition list and extract the information we need.
*/
foreach(pl, parameters)
{
DefElem *defel = (DefElem *) lfirst(pl);
Avoid unnecessary use of pg_strcasecmp for already-downcased identifiers. We have a lot of code in which option names, which from the user's viewpoint are logically keywords, are passed through the grammar as plain identifiers, and then matched to string literals during command execution. This approach avoids making words into lexer keywords unnecessarily. Some places matched these strings using plain strcmp, some using pg_strcasecmp. But the latter should be unnecessary since identifiers would have been downcased on their way through the parser. Aside from any efficiency concerns (probably not a big factor), the lack of consistency in this area creates a hazard of subtle bugs due to different places coming to different conclusions about whether two option names are the same or different. Hence, standardize on using strcmp() to match any option names that are expected to have been fed through the parser. This does create a user-visible behavioral change, which is that while formerly all of these would work: alter table foo set (fillfactor = 50); alter table foo set (FillFactor = 50); alter table foo set ("fillfactor" = 50); alter table foo set ("FillFactor" = 50); now the last case will fail because that double-quoted identifier is different from the others. However, none of our documentation says that you can use a quoted identifier in such contexts at all, and we should discourage doing so since it would break if we ever decide to parse such constructs as true lexer keywords rather than poor man's substitutes. So this shouldn't create a significant compatibility issue for users. Daniel Gustafsson, reviewed by Michael Paquier, small changes by me Discussion: https://postgr.es/m/29405B24-564E-476B-98C0-677A29805B84@yesql.se
2018-01-27 00:25:02 +01:00
if (strcmp(defel->defname, "parser") == 0)
prsOid = get_ts_parser_oid(defGetQualifiedName(defel), false);
Avoid unnecessary use of pg_strcasecmp for already-downcased identifiers. We have a lot of code in which option names, which from the user's viewpoint are logically keywords, are passed through the grammar as plain identifiers, and then matched to string literals during command execution. This approach avoids making words into lexer keywords unnecessarily. Some places matched these strings using plain strcmp, some using pg_strcasecmp. But the latter should be unnecessary since identifiers would have been downcased on their way through the parser. Aside from any efficiency concerns (probably not a big factor), the lack of consistency in this area creates a hazard of subtle bugs due to different places coming to different conclusions about whether two option names are the same or different. Hence, standardize on using strcmp() to match any option names that are expected to have been fed through the parser. This does create a user-visible behavioral change, which is that while formerly all of these would work: alter table foo set (fillfactor = 50); alter table foo set (FillFactor = 50); alter table foo set ("fillfactor" = 50); alter table foo set ("FillFactor" = 50); now the last case will fail because that double-quoted identifier is different from the others. However, none of our documentation says that you can use a quoted identifier in such contexts at all, and we should discourage doing so since it would break if we ever decide to parse such constructs as true lexer keywords rather than poor man's substitutes. So this shouldn't create a significant compatibility issue for users. Daniel Gustafsson, reviewed by Michael Paquier, small changes by me Discussion: https://postgr.es/m/29405B24-564E-476B-98C0-677A29805B84@yesql.se
2018-01-27 00:25:02 +01:00
else if (strcmp(defel->defname, "copy") == 0)
sourceOid = get_ts_config_oid(defGetQualifiedName(defel), false);
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("text search configuration parameter \"%s\" not recognized",
defel->defname)));
}
if (OidIsValid(sourceOid) && OidIsValid(prsOid))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("cannot specify both PARSER and COPY options")));
/* make copied tsconfig available to callers */
if (copied && OidIsValid(sourceOid))
{
ObjectAddressSet(*copied,
TSConfigRelationId,
sourceOid);
}
/*
* Look up source config if given.
*/
if (OidIsValid(sourceOid))
{
Form_pg_ts_config cfg;
tup = SearchSysCache1(TSCONFIGOID, ObjectIdGetDatum(sourceOid));
if (!HeapTupleIsValid(tup))
elog(ERROR, "cache lookup failed for text search configuration %u",
sourceOid);
cfg = (Form_pg_ts_config) GETSTRUCT(tup);
/* use source's parser */
prsOid = cfg->cfgparser;
ReleaseSysCache(tup);
}
/*
* Validation
*/
if (!OidIsValid(prsOid))
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("text search parser is required")));
cfgRel = table_open(TSConfigRelationId, RowExclusiveLock);
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
/*
* Looks good, build tuple and insert
*/
memset(values, 0, sizeof(values));
memset(nulls, false, sizeof(nulls));
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
cfgOid = GetNewOidWithIndex(cfgRel, TSConfigOidIndexId,
Anum_pg_ts_config_oid);
values[Anum_pg_ts_config_oid - 1] = ObjectIdGetDatum(cfgOid);
namestrcpy(&cname, cfgname);
values[Anum_pg_ts_config_cfgname - 1] = NameGetDatum(&cname);
values[Anum_pg_ts_config_cfgnamespace - 1] = ObjectIdGetDatum(namespaceoid);
values[Anum_pg_ts_config_cfgowner - 1] = ObjectIdGetDatum(GetUserId());
values[Anum_pg_ts_config_cfgparser - 1] = ObjectIdGetDatum(prsOid);
tup = heap_form_tuple(cfgRel->rd_att, values, nulls);
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
CatalogTupleInsert(cfgRel, tup);
if (OidIsValid(sourceOid))
{
/*
* Copy token-dicts map from source config
*/
ScanKeyData skey;
SysScanDesc scan;
HeapTuple maptup;
TupleDesc mapDesc;
TupleTableSlot **slot;
CatalogIndexState indstate;
int max_slots,
slot_init_count,
slot_stored_count;
mapRel = table_open(TSConfigMapRelationId, RowExclusiveLock);
mapDesc = RelationGetDescr(mapRel);
indstate = CatalogOpenIndexes(mapRel);
/*
* Allocate the slots to use, but delay costly initialization until we
* know that they will be used.
*/
max_slots = MAX_CATALOG_MULTI_INSERT_BYTES / sizeof(FormData_pg_ts_config_map);
slot = palloc(sizeof(TupleTableSlot *) * max_slots);
ScanKeyInit(&skey,
Anum_pg_ts_config_map_mapcfg,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(sourceOid));
scan = systable_beginscan(mapRel, TSConfigMapIndexId, true,
NULL, 1, &skey);
/* number of slots currently storing tuples */
slot_stored_count = 0;
/* number of slots currently initialized */
slot_init_count = 0;
while (HeapTupleIsValid((maptup = systable_getnext(scan))))
{
Form_pg_ts_config_map cfgmap = (Form_pg_ts_config_map) GETSTRUCT(maptup);
if (slot_init_count < max_slots)
{
slot[slot_stored_count] = MakeSingleTupleTableSlot(mapDesc,
&TTSOpsHeapTuple);
slot_init_count++;
}
ExecClearTuple(slot[slot_stored_count]);
memset(slot[slot_stored_count]->tts_isnull, false,
slot[slot_stored_count]->tts_tupleDescriptor->natts * sizeof(bool));
slot[slot_stored_count]->tts_values[Anum_pg_ts_config_map_mapcfg - 1] = cfgOid;
slot[slot_stored_count]->tts_values[Anum_pg_ts_config_map_maptokentype - 1] = cfgmap->maptokentype;
slot[slot_stored_count]->tts_values[Anum_pg_ts_config_map_mapseqno - 1] = cfgmap->mapseqno;
slot[slot_stored_count]->tts_values[Anum_pg_ts_config_map_mapdict - 1] = cfgmap->mapdict;
ExecStoreVirtualTuple(slot[slot_stored_count]);
slot_stored_count++;
/* If slots are full, insert a batch of tuples */
if (slot_stored_count == max_slots)
{
CatalogTuplesMultiInsertWithInfo(mapRel, slot, slot_stored_count,
indstate);
slot_stored_count = 0;
}
}
/* Insert any tuples left in the buffer */
if (slot_stored_count > 0)
CatalogTuplesMultiInsertWithInfo(mapRel, slot, slot_stored_count,
indstate);
for (int i = 0; i < slot_init_count; i++)
ExecDropSingleTupleTableSlot(slot[i]);
systable_endscan(scan);
CatalogCloseIndexes(indstate);
}
address = makeConfigurationDependencies(tup, false, mapRel);
/* Post creation hook for new text search configuration */
InvokeObjectPostCreateHook(TSConfigRelationId, cfgOid, 0);
heap_freetuple(tup);
if (mapRel)
table_close(mapRel, RowExclusiveLock);
table_close(cfgRel, RowExclusiveLock);
return address;
}
/*
* Guts of TS configuration deletion.
*/
void
RemoveTSConfigurationById(Oid cfgId)
{
Relation relCfg,
relMap;
HeapTuple tup;
ScanKeyData skey;
SysScanDesc scan;
/* Remove the pg_ts_config entry */
relCfg = table_open(TSConfigRelationId, RowExclusiveLock);
tup = SearchSysCache1(TSCONFIGOID, ObjectIdGetDatum(cfgId));
if (!HeapTupleIsValid(tup))
elog(ERROR, "cache lookup failed for text search dictionary %u",
cfgId);
CatalogTupleDelete(relCfg, &tup->t_self);
ReleaseSysCache(tup);
table_close(relCfg, RowExclusiveLock);
/* Remove any pg_ts_config_map entries */
relMap = table_open(TSConfigMapRelationId, RowExclusiveLock);
ScanKeyInit(&skey,
Anum_pg_ts_config_map_mapcfg,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(cfgId));
scan = systable_beginscan(relMap, TSConfigMapIndexId, true,
NULL, 1, &skey);
while (HeapTupleIsValid((tup = systable_getnext(scan))))
{
CatalogTupleDelete(relMap, &tup->t_self);
}
systable_endscan(scan);
table_close(relMap, RowExclusiveLock);
}
/*
* ALTER TEXT SEARCH CONFIGURATION - main entry point
*/
ObjectAddress
AlterTSConfiguration(AlterTSConfigurationStmt *stmt)
{
HeapTuple tup;
Oid cfgId;
Relation relMap;
ObjectAddress address;
/* Find the configuration */
tup = GetTSConfigTuple(stmt->cfgname);
if (!HeapTupleIsValid(tup))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("text search configuration \"%s\" does not exist",
NameListToString(stmt->cfgname))));
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
cfgId = ((Form_pg_ts_config) GETSTRUCT(tup))->oid;
/* must be owner */
if (!object_ownercheck(TSConfigRelationId, cfgId, GetUserId()))
aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_TSCONFIGURATION,
NameListToString(stmt->cfgname));
relMap = table_open(TSConfigMapRelationId, RowExclusiveLock);
/* Add or drop mappings */
if (stmt->dicts)
MakeConfigurationMapping(stmt, tup, relMap);
else if (stmt->tokentype)
DropConfigurationMapping(stmt, tup, relMap);
/* Update dependencies */
makeConfigurationDependencies(tup, true, relMap);
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
InvokeObjectPostAlterHook(TSConfigRelationId, cfgId, 0);
ObjectAddressSet(address, TSConfigRelationId, cfgId);
table_close(relMap, RowExclusiveLock);
ReleaseSysCache(tup);
return address;
}
/*
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
* Check whether a token type name is a member of a TSTokenTypeItem list.
*/
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
static bool
tstoken_list_member(char *token_name, List *tokens)
{
ListCell *c;
bool found = false;
foreach(c, tokens)
{
TSTokenTypeItem *ts = (TSTokenTypeItem *) lfirst(c);
if (strcmp(token_name, ts->name) == 0)
{
found = true;
break;
}
}
return found;
}
/*
* Translate a list of token type names to a list of unique TSTokenTypeItem.
*
* Duplicated entries list are removed from tokennames.
*/
static List *
getTokenTypes(Oid prsId, List *tokennames)
{
TSParserCacheEntry *prs = lookup_ts_parser_cache(prsId);
LexDescr *list;
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
List *result = NIL;
int ntoken;
ListCell *tn;
ntoken = list_length(tokennames);
if (ntoken == 0)
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
return NIL;
if (!OidIsValid(prs->lextypeOid))
elog(ERROR, "method lextype isn't defined for text search parser %u",
prsId);
/* lextype takes one dummy argument */
list = (LexDescr *) DatumGetPointer(OidFunctionCall1(prs->lextypeOid,
(Datum) 0));
foreach(tn, tokennames)
{
String *val = lfirst_node(String, tn);
bool found = false;
int j;
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
/* Skip if this token is already in the result */
if (tstoken_list_member(strVal(val), result))
continue;
j = 0;
while (list && list[j].lexid)
{
if (strcmp(strVal(val), list[j].alias) == 0)
{
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
TSTokenTypeItem *ts = (TSTokenTypeItem *) palloc0(sizeof(TSTokenTypeItem));
ts->num = list[j].lexid;
ts->name = pstrdup(strVal(val));
result = lappend(result, ts);
found = true;
break;
}
j++;
}
if (!found)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("token type \"%s\" does not exist",
strVal(val))));
}
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
return result;
}
/*
* ALTER TEXT SEARCH CONFIGURATION ADD/ALTER MAPPING
*/
static void
MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
HeapTuple tup, Relation relMap)
{
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
Form_pg_ts_config tsform;
Oid cfgId;
ScanKeyData skey[2];
SysScanDesc scan;
HeapTuple maptup;
int i;
int j;
Oid prsId;
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
List *tokens = NIL;
int ntoken;
Oid *dictIds;
int ndict;
ListCell *c;
CatalogIndexState indstate;
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
tsform = (Form_pg_ts_config) GETSTRUCT(tup);
cfgId = tsform->oid;
prsId = tsform->cfgparser;
tokens = getTokenTypes(prsId, stmt->tokentype);
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
ntoken = list_length(tokens);
if (stmt->override)
{
/*
* delete maps for tokens if they exist and command was ALTER
*/
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
foreach(c, tokens)
{
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
TSTokenTypeItem *ts = (TSTokenTypeItem *) lfirst(c);
ScanKeyInit(&skey[0],
Anum_pg_ts_config_map_mapcfg,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(cfgId));
ScanKeyInit(&skey[1],
Anum_pg_ts_config_map_maptokentype,
BTEqualStrategyNumber, F_INT4EQ,
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
Int32GetDatum(ts->num));
scan = systable_beginscan(relMap, TSConfigMapIndexId, true,
NULL, 2, skey);
while (HeapTupleIsValid((maptup = systable_getnext(scan))))
{
CatalogTupleDelete(relMap, &maptup->t_self);
}
systable_endscan(scan);
}
}
/*
* Convert list of dictionary names to array of dict OIDs
*/
ndict = list_length(stmt->dicts);
dictIds = (Oid *) palloc(sizeof(Oid) * ndict);
i = 0;
foreach(c, stmt->dicts)
{
List *names = (List *) lfirst(c);
dictIds[i] = get_ts_dict_oid(names, false);
i++;
}
indstate = CatalogOpenIndexes(relMap);
if (stmt->replace)
{
/*
* Replace a specific dictionary in existing entries
*/
Oid dictOld = dictIds[0],
dictNew = dictIds[1];
ScanKeyInit(&skey[0],
Anum_pg_ts_config_map_mapcfg,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(cfgId));
scan = systable_beginscan(relMap, TSConfigMapIndexId, true,
NULL, 1, skey);
while (HeapTupleIsValid((maptup = systable_getnext(scan))))
{
Form_pg_ts_config_map cfgmap = (Form_pg_ts_config_map) GETSTRUCT(maptup);
/*
* check if it's one of target token types
*/
if (tokens)
{
bool tokmatch = false;
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
foreach(c, tokens)
{
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
TSTokenTypeItem *ts = (TSTokenTypeItem *) lfirst(c);
if (cfgmap->maptokentype == ts->num)
{
tokmatch = true;
break;
}
}
if (!tokmatch)
continue;
}
/*
* replace dictionary if match
*/
if (cfgmap->mapdict == dictOld)
{
Datum repl_val[Natts_pg_ts_config_map];
bool repl_null[Natts_pg_ts_config_map];
bool repl_repl[Natts_pg_ts_config_map];
HeapTuple newtup;
memset(repl_val, 0, sizeof(repl_val));
memset(repl_null, false, sizeof(repl_null));
memset(repl_repl, false, sizeof(repl_repl));
repl_val[Anum_pg_ts_config_map_mapdict - 1] = ObjectIdGetDatum(dictNew);
repl_repl[Anum_pg_ts_config_map_mapdict - 1] = true;
newtup = heap_modify_tuple(maptup,
RelationGetDescr(relMap),
repl_val, repl_null, repl_repl);
CatalogTupleUpdateWithInfo(relMap, &newtup->t_self, newtup, indstate);
}
}
systable_endscan(scan);
}
else
{
TupleTableSlot **slot;
int slotCount = 0;
int nslots;
/* Allocate the slots to use and initialize them */
nslots = Min(ntoken * ndict,
MAX_CATALOG_MULTI_INSERT_BYTES / sizeof(FormData_pg_ts_config_map));
slot = palloc(sizeof(TupleTableSlot *) * nslots);
for (i = 0; i < nslots; i++)
slot[i] = MakeSingleTupleTableSlot(RelationGetDescr(relMap),
&TTSOpsHeapTuple);
/*
* Insertion of new entries
*/
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
foreach(c, tokens)
{
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
TSTokenTypeItem *ts = (TSTokenTypeItem *) lfirst(c);
for (j = 0; j < ndict; j++)
{
ExecClearTuple(slot[slotCount]);
memset(slot[slotCount]->tts_isnull, false,
slot[slotCount]->tts_tupleDescriptor->natts * sizeof(bool));
slot[slotCount]->tts_values[Anum_pg_ts_config_map_mapcfg - 1] = ObjectIdGetDatum(cfgId);
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
slot[slotCount]->tts_values[Anum_pg_ts_config_map_maptokentype - 1] = Int32GetDatum(ts->num);
slot[slotCount]->tts_values[Anum_pg_ts_config_map_mapseqno - 1] = Int32GetDatum(j + 1);
slot[slotCount]->tts_values[Anum_pg_ts_config_map_mapdict - 1] = ObjectIdGetDatum(dictIds[j]);
ExecStoreVirtualTuple(slot[slotCount]);
slotCount++;
/* If slots are full, insert a batch of tuples */
if (slotCount == nslots)
{
CatalogTuplesMultiInsertWithInfo(relMap, slot, slotCount,
indstate);
slotCount = 0;
}
}
}
/* Insert any tuples left in the buffer */
if (slotCount > 0)
CatalogTuplesMultiInsertWithInfo(relMap, slot, slotCount,
indstate);
for (i = 0; i < nslots; i++)
ExecDropSingleTupleTableSlot(slot[i]);
}
Allow on-the-fly capture of DDL event details This feature lets user code inspect and take action on DDL events. Whenever a ddl_command_end event trigger is installed, DDL actions executed are saved to a list which can be inspected during execution of a function attached to ddl_command_end. The set-returning function pg_event_trigger_ddl_commands can be used to list actions so captured; it returns data about the type of command executed, as well as the affected object. This is sufficient for many uses of this feature. For the cases where it is not, we also provide a "command" column of a new pseudo-type pg_ddl_command, which is a pointer to a C structure that can be accessed by C code. The struct contains all the info necessary to completely inspect and even reconstruct the executed command. There is no actual deparse code here; that's expected to come later. What we have is enough infrastructure that the deparsing can be done in an external extension. The intention is that we will add some deparsing code in a later release, as an in-core extension. A new test module is included. It's probably insufficient as is, but it should be sufficient as a starting point for a more complete and future-proof approach. Authors: Álvaro Herrera, with some help from Andres Freund, Ian Barwick, Abhijit Menon-Sen. Reviews by Andres Freund, Robert Haas, Amit Kapila, Michael Paquier, Craig Ringer, David Steele. Additional input from Chris Browne, Dimitri Fontaine, Stephen Frost, Petr Jelínek, Tom Lane, Jim Nasby, Steven Singer, Pavel Stěhule. Based on original work by Dimitri Fontaine, though I didn't use his code. Discussion: https://www.postgresql.org/message-id/m2txrsdzxa.fsf@2ndQuadrant.fr https://www.postgresql.org/message-id/20131108153322.GU5809@eldon.alvh.no-ip.org https://www.postgresql.org/message-id/20150215044814.GL3391@alvh.no-ip.org
2015-05-12 00:14:31 +02:00
/* clean up */
CatalogCloseIndexes(indstate);
Allow on-the-fly capture of DDL event details This feature lets user code inspect and take action on DDL events. Whenever a ddl_command_end event trigger is installed, DDL actions executed are saved to a list which can be inspected during execution of a function attached to ddl_command_end. The set-returning function pg_event_trigger_ddl_commands can be used to list actions so captured; it returns data about the type of command executed, as well as the affected object. This is sufficient for many uses of this feature. For the cases where it is not, we also provide a "command" column of a new pseudo-type pg_ddl_command, which is a pointer to a C structure that can be accessed by C code. The struct contains all the info necessary to completely inspect and even reconstruct the executed command. There is no actual deparse code here; that's expected to come later. What we have is enough infrastructure that the deparsing can be done in an external extension. The intention is that we will add some deparsing code in a later release, as an in-core extension. A new test module is included. It's probably insufficient as is, but it should be sufficient as a starting point for a more complete and future-proof approach. Authors: Álvaro Herrera, with some help from Andres Freund, Ian Barwick, Abhijit Menon-Sen. Reviews by Andres Freund, Robert Haas, Amit Kapila, Michael Paquier, Craig Ringer, David Steele. Additional input from Chris Browne, Dimitri Fontaine, Stephen Frost, Petr Jelínek, Tom Lane, Jim Nasby, Steven Singer, Pavel Stěhule. Based on original work by Dimitri Fontaine, though I didn't use his code. Discussion: https://www.postgresql.org/message-id/m2txrsdzxa.fsf@2ndQuadrant.fr https://www.postgresql.org/message-id/20131108153322.GU5809@eldon.alvh.no-ip.org https://www.postgresql.org/message-id/20150215044814.GL3391@alvh.no-ip.org
2015-05-12 00:14:31 +02:00
EventTriggerCollectAlterTSConfig(stmt, cfgId, dictIds, ndict);
}
/*
* ALTER TEXT SEARCH CONFIGURATION DROP MAPPING
*/
static void
DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
HeapTuple tup, Relation relMap)
{
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
Form_pg_ts_config tsform;
Oid cfgId;
ScanKeyData skey[2];
SysScanDesc scan;
HeapTuple maptup;
Oid prsId;
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
List *tokens = NIL;
ListCell *c;
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
tsform = (Form_pg_ts_config) GETSTRUCT(tup);
cfgId = tsform->oid;
prsId = tsform->cfgparser;
tokens = getTokenTypes(prsId, stmt->tokentype);
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
foreach(c, tokens)
{
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
TSTokenTypeItem *ts = (TSTokenTypeItem *) lfirst(c);
bool found = false;
ScanKeyInit(&skey[0],
Anum_pg_ts_config_map_mapcfg,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(cfgId));
ScanKeyInit(&skey[1],
Anum_pg_ts_config_map_maptokentype,
BTEqualStrategyNumber, F_INT4EQ,
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
Int32GetDatum(ts->num));
scan = systable_beginscan(relMap, TSConfigMapIndexId, true,
NULL, 2, skey);
while (HeapTupleIsValid((maptup = systable_getnext(scan))))
{
CatalogTupleDelete(relMap, &maptup->t_self);
found = true;
}
systable_endscan(scan);
if (!found)
{
if (!stmt->missing_ok)
{
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("mapping for token type \"%s\" does not exist",
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
ts->name)));
}
else
{
ereport(NOTICE,
(errmsg("mapping for token type \"%s\" does not exist, skipping",
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
ts->name)));
}
}
}
Allow on-the-fly capture of DDL event details This feature lets user code inspect and take action on DDL events. Whenever a ddl_command_end event trigger is installed, DDL actions executed are saved to a list which can be inspected during execution of a function attached to ddl_command_end. The set-returning function pg_event_trigger_ddl_commands can be used to list actions so captured; it returns data about the type of command executed, as well as the affected object. This is sufficient for many uses of this feature. For the cases where it is not, we also provide a "command" column of a new pseudo-type pg_ddl_command, which is a pointer to a C structure that can be accessed by C code. The struct contains all the info necessary to completely inspect and even reconstruct the executed command. There is no actual deparse code here; that's expected to come later. What we have is enough infrastructure that the deparsing can be done in an external extension. The intention is that we will add some deparsing code in a later release, as an in-core extension. A new test module is included. It's probably insufficient as is, but it should be sufficient as a starting point for a more complete and future-proof approach. Authors: Álvaro Herrera, with some help from Andres Freund, Ian Barwick, Abhijit Menon-Sen. Reviews by Andres Freund, Robert Haas, Amit Kapila, Michael Paquier, Craig Ringer, David Steele. Additional input from Chris Browne, Dimitri Fontaine, Stephen Frost, Petr Jelínek, Tom Lane, Jim Nasby, Steven Singer, Pavel Stěhule. Based on original work by Dimitri Fontaine, though I didn't use his code. Discussion: https://www.postgresql.org/message-id/m2txrsdzxa.fsf@2ndQuadrant.fr https://www.postgresql.org/message-id/20131108153322.GU5809@eldon.alvh.no-ip.org https://www.postgresql.org/message-id/20150215044814.GL3391@alvh.no-ip.org
2015-05-12 00:14:31 +02:00
EventTriggerCollectAlterTSConfig(stmt, cfgId, NULL, 0);
}
/*
* Serialize dictionary options, producing a TEXT datum from a List of DefElem
*
* This is used to form the value stored in pg_ts_dict.dictinitoption.
* For the convenience of pg_dump, the output is formatted exactly as it
* would need to appear in CREATE TEXT SEARCH DICTIONARY to reproduce the
* same options.
*/
text *
serialize_deflist(List *deflist)
{
text *result;
StringInfoData buf;
ListCell *l;
initStringInfo(&buf);
foreach(l, deflist)
{
DefElem *defel = (DefElem *) lfirst(l);
char *val = defGetString(defel);
appendStringInfo(&buf, "%s = ",
quote_identifier(defel->defname));
/*
* If the value is a T_Integer or T_Float, emit it without quotes,
* otherwise with quotes. This is essential to allow correct
* reconstruction of the node type as well as the value.
*/
if (IsA(defel->arg, Integer) || IsA(defel->arg, Float))
appendStringInfoString(&buf, val);
else
{
/* If backslashes appear, force E syntax to quote them safely */
if (strchr(val, '\\'))
appendStringInfoChar(&buf, ESCAPE_STRING_SYNTAX);
appendStringInfoChar(&buf, '\'');
while (*val)
{
char ch = *val++;
if (SQL_STR_DOUBLE(ch, true))
appendStringInfoChar(&buf, ch);
appendStringInfoChar(&buf, ch);
}
appendStringInfoChar(&buf, '\'');
}
Represent Lists as expansible arrays, not chains of cons-cells. Originally, Postgres Lists were a more or less exact reimplementation of Lisp lists, which consist of chains of separately-allocated cons cells, each having a value and a next-cell link. We'd hacked that once before (commit d0b4399d8) to add a separate List header, but the data was still in cons cells. That makes some operations -- notably list_nth() -- O(N), and it's bulky because of the next-cell pointers and per-cell palloc overhead, and it's very cache-unfriendly if the cons cells end up scattered around rather than being adjacent. In this rewrite, we still have List headers, but the data is in a resizable array of values, with no next-cell links. Now we need at most two palloc's per List, and often only one, since we can allocate some values in the same palloc call as the List header. (Of course, extending an existing List may require repalloc's to enlarge the array. But this involves just O(log N) allocations not O(N).) Of course this is not without downsides. The key difficulty is that addition or deletion of a list entry may now cause other entries to move, which it did not before. For example, that breaks foreach() and sister macros, which historically used a pointer to the current cons-cell as loop state. We can repair those macros transparently by making their actual loop state be an integer list index; the exposed "ListCell *" pointer is no longer state carried across loop iterations, but is just a derived value. (In practice, modern compilers can optimize things back to having just one loop state value, at least for simple cases with inline loop bodies.) In principle, this is a semantics change for cases where the loop body inserts or deletes list entries ahead of the current loop index; but I found no such cases in the Postgres code. The change is not at all transparent for code that doesn't use foreach() but chases lists "by hand" using lnext(). The largest share of such code in the backend is in loops that were maintaining "prev" and "next" variables in addition to the current-cell pointer, in order to delete list cells efficiently using list_delete_cell(). However, we no longer need a previous-cell pointer to delete a list cell efficiently. Keeping a next-cell pointer doesn't work, as explained above, but we can improve matters by changing such code to use a regular foreach() loop and then using the new macro foreach_delete_current() to delete the current cell. (This macro knows how to update the associated foreach loop's state so that no cells will be missed in the traversal.) There remains a nontrivial risk of code assuming that a ListCell * pointer will remain good over an operation that could now move the list contents. To help catch such errors, list.c can be compiled with a new define symbol DEBUG_LIST_MEMORY_USAGE that forcibly moves list contents whenever that could possibly happen. This makes list operations significantly more expensive so it's not normally turned on (though it is on by default if USE_VALGRIND is on). There are two notable API differences from the previous code: * lnext() now requires the List's header pointer in addition to the current cell's address. * list_delete_cell() no longer requires a previous-cell argument. These changes are somewhat unfortunate, but on the other hand code using either function needs inspection to see if it is assuming anything it shouldn't, so it's not all bad. Programmers should be aware of these significant performance changes: * list_nth() and related functions are now O(1); so there's no major access-speed difference between a list and an array. * Inserting or deleting a list element now takes time proportional to the distance to the end of the list, due to moving the array elements. (However, it typically *doesn't* require palloc or pfree, so except in long lists it's probably still faster than before.) Notably, lcons() used to be about the same cost as lappend(), but that's no longer true if the list is long. Code that uses lcons() and list_delete_first() to maintain a stack might usefully be rewritten to push and pop at the end of the list rather than the beginning. * There are now list_insert_nth...() and list_delete_nth...() functions that add or remove a list cell identified by index. These have the data-movement penalty explained above, but there's no search penalty. * list_concat() and variants now copy the second list's data into storage belonging to the first list, so there is no longer any sharing of cells between the input lists. The second argument is now declared "const List *" to reflect that it isn't changed. This patch just does the minimum needed to get the new implementation in place and fix bugs exposed by the regression tests. As suggested by the foregoing, there's a fair amount of followup work remaining to do. Also, the ENABLE_LIST_COMPAT macros are finally removed in this commit. Code using those should have been gone a dozen years ago. Patch by me; thanks to David Rowley, Jesper Pedersen, and others for review. Discussion: https://postgr.es/m/11587.1550975080@sss.pgh.pa.us
2019-07-15 19:41:58 +02:00
if (lnext(deflist, l) != NULL)
appendStringInfoString(&buf, ", ");
}
result = cstring_to_text_with_len(buf.data, buf.len);
pfree(buf.data);
return result;
}
/*
* Deserialize dictionary options, reconstructing a List of DefElem from TEXT
*
* This is also used for prsheadline options, so for backward compatibility
* we need to accept a few things serialize_deflist() will never emit:
* in particular, unquoted and double-quoted strings.
*/
List *
deserialize_deflist(Datum txt)
{
text *in = DatumGetTextPP(txt); /* in case it's toasted */
List *result = NIL;
int len = VARSIZE_ANY_EXHDR(in);
char *ptr,
*endptr,
*workspace,
*wsptr = NULL,
*startvalue = NULL;
typedef enum
{
CS_WAITKEY,
CS_INKEY,
CS_INQKEY,
CS_WAITEQ,
CS_WAITVALUE,
CS_INSQVALUE,
CS_INDQVALUE,
CS_INWVALUE
} ds_state;
ds_state state = CS_WAITKEY;
workspace = (char *) palloc(len + 1); /* certainly enough room */
ptr = VARDATA_ANY(in);
endptr = ptr + len;
for (; ptr < endptr; ptr++)
{
switch (state)
{
case CS_WAITKEY:
if (isspace((unsigned char) *ptr) || *ptr == ',')
continue;
if (*ptr == '"')
{
wsptr = workspace;
state = CS_INQKEY;
}
else
{
wsptr = workspace;
*wsptr++ = *ptr;
state = CS_INKEY;
}
break;
case CS_INKEY:
if (isspace((unsigned char) *ptr))
{
*wsptr++ = '\0';
state = CS_WAITEQ;
}
else if (*ptr == '=')
{
*wsptr++ = '\0';
state = CS_WAITVALUE;
}
else
{
*wsptr++ = *ptr;
}
break;
case CS_INQKEY:
if (*ptr == '"')
{
if (ptr + 1 < endptr && ptr[1] == '"')
{
/* copy only one of the two quotes */
*wsptr++ = *ptr++;
}
else
{
*wsptr++ = '\0';
state = CS_WAITEQ;
}
}
else
{
*wsptr++ = *ptr;
}
break;
case CS_WAITEQ:
if (*ptr == '=')
state = CS_WAITVALUE;
else if (!isspace((unsigned char) *ptr))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid parameter list format: \"%s\"",
text_to_cstring(in))));
break;
case CS_WAITVALUE:
if (*ptr == '\'')
{
startvalue = wsptr;
state = CS_INSQVALUE;
}
else if (*ptr == 'E' && ptr + 1 < endptr && ptr[1] == '\'')
{
ptr++;
startvalue = wsptr;
state = CS_INSQVALUE;
}
else if (*ptr == '"')
{
startvalue = wsptr;
state = CS_INDQVALUE;
}
else if (!isspace((unsigned char) *ptr))
{
startvalue = wsptr;
*wsptr++ = *ptr;
state = CS_INWVALUE;
}
break;
case CS_INSQVALUE:
if (*ptr == '\'')
{
if (ptr + 1 < endptr && ptr[1] == '\'')
{
/* copy only one of the two quotes */
*wsptr++ = *ptr++;
}
else
{
*wsptr++ = '\0';
result = lappend(result,
buildDefItem(workspace,
startvalue,
true));
state = CS_WAITKEY;
}
}
else if (*ptr == '\\')
{
if (ptr + 1 < endptr && ptr[1] == '\\')
{
/* copy only one of the two backslashes */
*wsptr++ = *ptr++;
}
else
*wsptr++ = *ptr;
}
else
{
*wsptr++ = *ptr;
}
break;
case CS_INDQVALUE:
if (*ptr == '"')
{
if (ptr + 1 < endptr && ptr[1] == '"')
{
/* copy only one of the two quotes */
*wsptr++ = *ptr++;
}
else
{
*wsptr++ = '\0';
result = lappend(result,
buildDefItem(workspace,
startvalue,
true));
state = CS_WAITKEY;
}
}
else
{
*wsptr++ = *ptr;
}
break;
case CS_INWVALUE:
if (*ptr == ',' || isspace((unsigned char) *ptr))
{
*wsptr++ = '\0';
result = lappend(result,
buildDefItem(workspace,
startvalue,
false));
state = CS_WAITKEY;
}
else
{
*wsptr++ = *ptr;
}
break;
default:
elog(ERROR, "unrecognized deserialize_deflist state: %d",
state);
}
}
if (state == CS_INWVALUE)
{
*wsptr++ = '\0';
result = lappend(result,
buildDefItem(workspace,
startvalue,
false));
}
else if (state != CS_WAITKEY)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid parameter list format: \"%s\"",
text_to_cstring(in))));
pfree(workspace);
return result;
}
/*
* Build one DefElem for deserialize_deflist
*/
static DefElem *
buildDefItem(const char *name, const char *val, bool was_quoted)
{
/* If input was quoted, always emit as string */
if (!was_quoted && val[0] != '\0')
{
int v;
char *endptr;
/* Try to parse as an integer */
errno = 0;
v = strtoint(val, &endptr, 10);
if (errno == 0 && *endptr == '\0')
return makeDefElem(pstrdup(name),
(Node *) makeInteger(v),
-1);
/* Nope, how about as a float? */
errno = 0;
(void) strtod(val, &endptr);
if (errno == 0 && *endptr == '\0')
return makeDefElem(pstrdup(name),
(Node *) makeFloat(pstrdup(val)),
-1);
if (strcmp(val, "true") == 0)
return makeDefElem(pstrdup(name),
(Node *) makeBoolean(true),
-1);
if (strcmp(val, "false") == 0)
return makeDefElem(pstrdup(name),
(Node *) makeBoolean(false),
-1);
}
/* Just make it a string */
return makeDefElem(pstrdup(name),
(Node *) makeString(pstrdup(val)),
-1);
}