Preserve integer and float values accurately in (de)serialize_deflist.

Previously, this code just smashed all types of DefElem values to
strings, cavalierly reasoning that nobody would care.  But in point of
fact, most of the defGetFoo functions do distinguish among different
input syntaxes; for instance defGetBoolean will accept 1 as an integer
but not "1" as a string.  This led to CREATE/ALTER TEXT SEARCH
DICTIONARY accepting 0 and 1 as values for boolean dictionary
properties, only to have the dictionary fail at runtime.

We can upgrade this behavior by teaching serialize_deflist that it
does not need to quote T_Integer or T_Float nodes' values on output,
and then teaching deserialize_deflist to restore unquoted integer or
float values as the appropriate node type.  This should not break
anything using pg_ts_dict.dictinitoption, since that field is just
defined as being something valid to include in CREATE TEXT SEARCH
DICTIONARY.

deserialize_deflist is also used to parse the options arguments
for the ts_headline family of functions, but so far as I can see
this won't cause any problems there either: the only consumer of
that output is prsd_headline which always uses defGetString.
(Really that's a bad idea, but I won't risk changing it here.)

This is surely a bug fix, but given the lack of field complaints
I don't think it's necessary to back-patch.

Discussion: https://postgr.es/m/CAMkU=1xRcs_BUPzR0+V3WndaCAv0E_m3h6aUEJ8NF-sY1nnHsw@mail.gmail.com
This commit is contained in:
Tom Lane 2020-03-10 12:29:59 -04:00
parent 40b3e2c201
commit d01f03a495
5 changed files with 155 additions and 24 deletions

View File

@ -300,8 +300,10 @@ select ts_lexize('intdict', '314532610153');
{314532}
(1 row)
ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = -214783648);
ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = -214783648); -- fail
ERROR: maxlen value has to be >= 1
-- This ought to fail, perhaps, but historically it has not:
ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = 6.7);
select ts_lexize('intdict', '-40865854');
ts_lexize
-----------
@ -327,3 +329,28 @@ select ts_lexize('intdict', '+40865854');
{408658}
(1 row)
ALTER TEXT SEARCH DICTIONARY intdict (REJECTLONG = 1);
select ts_lexize('intdict', '-40865854');
ts_lexize
-----------
{}
(1 row)
select ts_lexize('intdict', '-4086585');
ts_lexize
-----------
{}
(1 row)
select ts_lexize('intdict', '-408658');
ts_lexize
-----------
{408658}
(1 row)
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'intdict';
dictinitoption
-----------------------------------------------
maxlen = 6.7, absval = 'true', rejectlong = 1
(1 row)

View File

@ -52,10 +52,18 @@ select ts_lexize('intdict', '313425');
select ts_lexize('intdict', '641439323669');
select ts_lexize('intdict', '314532610153');
ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = -214783648);
ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = -214783648); -- fail
-- This ought to fail, perhaps, but historically it has not:
ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = 6.7);
select ts_lexize('intdict', '-40865854');
select ts_lexize('intdict', '+40865854');
ALTER TEXT SEARCH DICTIONARY intdict (ABSVAL = true);
select ts_lexize('intdict', '-40865854');
select ts_lexize('intdict', '+40865854');
ALTER TEXT SEARCH DICTIONARY intdict (REJECTLONG = 1);
select ts_lexize('intdict', '-40865854');
select ts_lexize('intdict', '-4086585');
select ts_lexize('intdict', '-408658');
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'intdict';

View File

@ -36,6 +36,7 @@
#include "commands/alter.h"
#include "commands/defrem.h"
#include "commands/event_trigger.h"
#include "common/string.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "parser/parse_func.h"
@ -52,6 +53,8 @@ static void MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
HeapTuple tup, Relation relMap);
static void DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
HeapTuple tup, Relation relMap);
static DefElem *buildDefItem(const char *name, const char *val,
bool was_quoted);
/* --------------------- TS Parser commands ------------------------ */
@ -1519,9 +1522,6 @@ DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
* For the convenience of pg_dump, the output is formatted exactly as it
* would need to appear in CREATE TEXT SEARCH DICTIONARY to reproduce the
* same options.
*
* Note that we assume that only the textual representation of an option's
* value is interesting --- hence, non-string DefElems get forced to strings.
*/
text *
serialize_deflist(List *deflist)
@ -1539,19 +1539,30 @@ serialize_deflist(List *deflist)
appendStringInfo(&buf, "%s = ",
quote_identifier(defel->defname));
/* If backslashes appear, force E syntax to determine their handling */
if (strchr(val, '\\'))
appendStringInfoChar(&buf, ESCAPE_STRING_SYNTAX);
appendStringInfoChar(&buf, '\'');
while (*val)
{
char ch = *val++;
if (SQL_STR_DOUBLE(ch, true))
/*
* If the value is a T_Integer or T_Float, emit it without quotes,
* otherwise with quotes. This is essential to allow correct
* reconstruction of the node type as well as the value.
*/
if (IsA(defel->arg, Integer) || IsA(defel->arg, Float))
appendStringInfoString(&buf, val);
else
{
/* If backslashes appear, force E syntax to quote them safely */
if (strchr(val, '\\'))
appendStringInfoChar(&buf, ESCAPE_STRING_SYNTAX);
appendStringInfoChar(&buf, '\'');
while (*val)
{
char ch = *val++;
if (SQL_STR_DOUBLE(ch, true))
appendStringInfoChar(&buf, ch);
appendStringInfoChar(&buf, ch);
appendStringInfoChar(&buf, ch);
}
appendStringInfoChar(&buf, '\'');
}
appendStringInfoChar(&buf, '\'');
if (lnext(deflist, l) != NULL)
appendStringInfoString(&buf, ", ");
}
@ -1566,7 +1577,7 @@ serialize_deflist(List *deflist)
*
* This is also used for prsheadline options, so for backward compatibility
* we need to accept a few things serialize_deflist() will never emit:
* in particular, unquoted and double-quoted values.
* in particular, unquoted and double-quoted strings.
*/
List *
deserialize_deflist(Datum txt)
@ -1694,8 +1705,9 @@ deserialize_deflist(Datum txt)
{
*wsptr++ = '\0';
result = lappend(result,
makeDefElem(pstrdup(workspace),
(Node *) makeString(pstrdup(startvalue)), -1));
buildDefItem(workspace,
startvalue,
true));
state = CS_WAITKEY;
}
}
@ -1726,8 +1738,9 @@ deserialize_deflist(Datum txt)
{
*wsptr++ = '\0';
result = lappend(result,
makeDefElem(pstrdup(workspace),
(Node *) makeString(pstrdup(startvalue)), -1));
buildDefItem(workspace,
startvalue,
true));
state = CS_WAITKEY;
}
}
@ -1741,8 +1754,9 @@ deserialize_deflist(Datum txt)
{
*wsptr++ = '\0';
result = lappend(result,
makeDefElem(pstrdup(workspace),
(Node *) makeString(pstrdup(startvalue)), -1));
buildDefItem(workspace,
startvalue,
false));
state = CS_WAITKEY;
}
else
@ -1760,8 +1774,9 @@ deserialize_deflist(Datum txt)
{
*wsptr++ = '\0';
result = lappend(result,
makeDefElem(pstrdup(workspace),
(Node *) makeString(pstrdup(startvalue)), -1));
buildDefItem(workspace,
startvalue,
false));
}
else if (state != CS_WAITKEY)
ereport(ERROR,
@ -1773,3 +1788,36 @@ deserialize_deflist(Datum txt)
return result;
}
/*
* Build one DefElem for deserialize_deflist
*/
static DefElem *
buildDefItem(const char *name, const char *val, bool was_quoted)
{
/* If input was quoted, always emit as string */
if (!was_quoted && val[0] != '\0')
{
int v;
char *endptr;
/* Try to parse as an integer */
errno = 0;
v = strtoint(val, &endptr, 10);
if (errno == 0 && *endptr == '\0')
return makeDefElem(pstrdup(name),
(Node *) makeInteger(v),
-1);
/* Nope, how about as a float? */
errno = 0;
(void) strtod(val, &endptr);
if (errno == 0 && *endptr == '\0')
return makeDefElem(pstrdup(name),
(Node *) makeFloat(pstrdup(val)),
-1);
}
/* Just make it a string */
return makeDefElem(pstrdup(name),
(Node *) makeString(pstrdup(val)),
-1);
}

View File

@ -470,6 +470,41 @@ SELECT ts_lexize('synonym', 'indices');
{index}
(1 row)
-- test altering boolean parameters
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
dictinitoption
-----------------------------
synonyms = 'synonym_sample'
(1 row)
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = 1);
SELECT ts_lexize('synonym', 'PoStGrEs');
ts_lexize
-----------
(1 row)
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
dictinitoption
------------------------------------------------
synonyms = 'synonym_sample', casesensitive = 1
(1 row)
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = 2); -- fail
ERROR: casesensitive requires a Boolean value
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = off);
SELECT ts_lexize('synonym', 'PoStGrEs');
ts_lexize
-----------
{pgsql}
(1 row)
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
dictinitoption
----------------------------------------------------
synonyms = 'synonym_sample', casesensitive = 'off'
(1 row)
-- Create and simple test thesaurus dictionary
-- More tests in configuration checks because ts_lexize()
-- cannot pass more than one word to thesaurus.

View File

@ -148,6 +148,19 @@ SELECT ts_lexize('synonym', 'PoStGrEs');
SELECT ts_lexize('synonym', 'Gogle');
SELECT ts_lexize('synonym', 'indices');
-- test altering boolean parameters
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = 1);
SELECT ts_lexize('synonym', 'PoStGrEs');
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = 2); -- fail
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = off);
SELECT ts_lexize('synonym', 'PoStGrEs');
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
-- Create and simple test thesaurus dictionary
-- More tests in configuration checks because ts_lexize()
-- cannot pass more than one word to thesaurus.